mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
work on streaming cell reading, using ugly copy-pasted code for now [ci
skip]
This commit is contained in:
parent
3153600768
commit
6f716c6e89
|
@ -31,6 +31,10 @@
|
||||||
|
|
||||||
#include <xlnt/xlnt_config.hpp>
|
#include <xlnt/xlnt_config.hpp>
|
||||||
|
|
||||||
|
namespace xml {
|
||||||
|
class parser;
|
||||||
|
}
|
||||||
|
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
|
|
||||||
class cell;
|
class cell;
|
||||||
|
@ -73,7 +77,7 @@ public:
|
||||||
/// Beings reading of the next worksheet in the workbook and optionally
|
/// Beings reading of the next worksheet in the workbook and optionally
|
||||||
/// returns its title if the last worksheet has not yet been read.
|
/// returns its title if the last worksheet has not yet been read.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
std::string begin_worksheet();
|
void begin_worksheet();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Ends reading of the current worksheet in the workbook and optionally
|
/// Ends reading of the current worksheet in the workbook and optionally
|
||||||
|
@ -118,6 +122,11 @@ private:
|
||||||
std::vector<std::string> worksheet_queue_;
|
std::vector<std::string> worksheet_queue_;
|
||||||
std::unique_ptr<detail::xlsx_consumer> consumer_;
|
std::unique_ptr<detail::xlsx_consumer> consumer_;
|
||||||
std::unique_ptr<workbook> workbook_;
|
std::unique_ptr<workbook> workbook_;
|
||||||
|
std::unique_ptr<std::istream> stream_;
|
||||||
|
std::unique_ptr<std::streambuf> stream_buffer_;
|
||||||
|
std::unique_ptr<std::istream> part_stream_;
|
||||||
|
std::unique_ptr<std::streambuf> part_stream_buffer_;
|
||||||
|
std::unique_ptr<xml::parser> parser_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xlnt
|
} // namespace xlnt
|
||||||
|
|
|
@ -148,14 +148,152 @@ void xlsx_consumer::open(std::istream &source)
|
||||||
|
|
||||||
cell xlsx_consumer::read_cell()
|
cell xlsx_consumer::read_cell()
|
||||||
{
|
{
|
||||||
return cell(nullptr);
|
if (!has_cell())
|
||||||
|
{
|
||||||
|
return cell(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ws = worksheet(stream_worksheet_);
|
||||||
|
|
||||||
|
if (in_element(qn("spreadsheetml", "sheetData")))
|
||||||
|
{
|
||||||
|
expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
|
||||||
|
auto row_index = parser().attribute<row_t>("r");
|
||||||
|
|
||||||
|
if (parser().attribute_present("ht"))
|
||||||
|
{
|
||||||
|
ws.row_properties(row_index).height = parser().attribute<double>("ht");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present("customHeight"))
|
||||||
|
{
|
||||||
|
ws.row_properties(row_index).custom_height = is_true(parser().attribute("customHeight"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
|
||||||
|
{
|
||||||
|
ws.row_properties(row_index).hidden = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
skip_attributes({ qn("x14ac", "dyDescent") });
|
||||||
|
skip_attributes({ "customFormat", "s", "customFont",
|
||||||
|
"outlineLevel", "collapsed", "thickTop", "thickBot",
|
||||||
|
"ph", "spans" });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in_element(qn("spreadsheetml", "row")))
|
||||||
|
{
|
||||||
|
return cell(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
|
||||||
|
auto cell = ws.cell(cell_reference(parser().attribute("r")));
|
||||||
|
|
||||||
|
auto has_type = parser().attribute_present("t");
|
||||||
|
auto type = has_type ? parser().attribute("t") : "n";
|
||||||
|
|
||||||
|
auto has_format = parser().attribute_present("s");
|
||||||
|
auto format_id = static_cast<std::size_t>(has_format ? std::stoull(parser().attribute("s")) : 0LL);
|
||||||
|
|
||||||
|
auto has_value = false;
|
||||||
|
auto value_string = std::string();
|
||||||
|
|
||||||
|
auto has_formula = false;
|
||||||
|
auto has_shared_formula = false;
|
||||||
|
auto formula_value_string = std::string();
|
||||||
|
|
||||||
|
while (in_element(qn("spreadsheetml", "c")))
|
||||||
|
{
|
||||||
|
auto current_element = expect_start_element(xml::content::mixed);
|
||||||
|
|
||||||
|
if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
|
||||||
|
{
|
||||||
|
has_value = true;
|
||||||
|
value_string = read_text();
|
||||||
|
}
|
||||||
|
else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
|
||||||
|
{
|
||||||
|
has_formula = true;
|
||||||
|
|
||||||
|
if (parser().attribute_present("t"))
|
||||||
|
{
|
||||||
|
has_shared_formula = parser().attribute("t") == "shared";
|
||||||
|
}
|
||||||
|
|
||||||
|
skip_attributes(
|
||||||
|
{ "aca", "ref", "dt2D", "dtr", "del1", "del2", "r1", "r2", "ca", "si", "bx" });
|
||||||
|
|
||||||
|
formula_value_string = read_text();
|
||||||
|
}
|
||||||
|
else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
|
||||||
|
{
|
||||||
|
expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
|
||||||
|
value_string = read_text();
|
||||||
|
expect_end_element(qn("spreadsheetml", "t"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unexpected_element(current_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_end_element(current_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_end_element(qn("spreadsheetml", "c"));
|
||||||
|
|
||||||
|
if (has_formula && !has_shared_formula)
|
||||||
|
{
|
||||||
|
cell.formula(formula_value_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_value)
|
||||||
|
{
|
||||||
|
if (type == "str")
|
||||||
|
{
|
||||||
|
cell.d_->value_text_ = value_string;
|
||||||
|
cell.data_type(cell::type::formula_string);
|
||||||
|
}
|
||||||
|
else if (type == "inlineStr")
|
||||||
|
{
|
||||||
|
cell.d_->value_text_ = value_string;
|
||||||
|
cell.data_type(cell::type::inline_string);
|
||||||
|
}
|
||||||
|
else if (type == "s")
|
||||||
|
{
|
||||||
|
cell.d_->value_numeric_ = std::stold(value_string);
|
||||||
|
cell.data_type(cell::type::shared_string);
|
||||||
|
}
|
||||||
|
else if (type == "b") // boolean
|
||||||
|
{
|
||||||
|
cell.value(is_true(value_string));
|
||||||
|
}
|
||||||
|
else if (type == "n") // numeric
|
||||||
|
{
|
||||||
|
cell.value(std::stold(value_string));
|
||||||
|
}
|
||||||
|
else if (!value_string.empty() && value_string[0] == '#')
|
||||||
|
{
|
||||||
|
cell.error(value_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_format)
|
||||||
|
{
|
||||||
|
cell.format(target_.format(format_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return cell;
|
||||||
}
|
}
|
||||||
|
|
||||||
void xlsx_consumer::read_worksheet(const std::string &rel_id)
|
void xlsx_consumer::read_worksheet(const std::string &rel_id, bool streaming)
|
||||||
{
|
{
|
||||||
read_worksheet_begin(rel_id);
|
read_worksheet_begin(rel_id);
|
||||||
read_worksheet_sheetdata();
|
|
||||||
read_worksheet_end(rel_id);
|
if (!streaming)
|
||||||
|
{
|
||||||
|
read_worksheet_sheetdata();
|
||||||
|
read_worksheet_end(rel_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
|
std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
|
||||||
|
@ -874,7 +1012,8 @@ xml::parser &xlsx_consumer::parser()
|
||||||
|
|
||||||
bool xlsx_consumer::has_cell()
|
bool xlsx_consumer::has_cell()
|
||||||
{
|
{
|
||||||
return stream_cell_ != nullptr;
|
return in_element(qn("spreadsheetml", "row"))
|
||||||
|
|| in_element(qn("spreadsheetml", "sheetData"));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
|
std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
|
||||||
|
@ -991,7 +1130,7 @@ void xlsx_consumer::read_part(const std::vector<relationship> &rel_chain, bool s
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case relationship_type::worksheet:
|
case relationship_type::worksheet:
|
||||||
read_worksheet(rel_chain.back().id());
|
read_worksheet(rel_chain.back().id(), streaming);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case relationship_type::thumbnail:
|
case relationship_type::thumbnail:
|
||||||
|
|
|
@ -198,7 +198,7 @@ private:
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// xl/sheets/*.xml
|
/// xl/sheets/*.xml
|
||||||
/// </summary>
|
/// </summary>
|
||||||
void read_worksheet(const std::string &rel_id);
|
void read_worksheet(const std::string &rel_id, bool streaming);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// xl/sheets/*.xml
|
/// xl/sheets/*.xml
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
// Copyright (c) 2014-2017 Thomas Fussell
|
// Copyright (c) 2017 Thomas Fussell
|
||||||
// Copyright (c) 2010-2015 openpyxl
|
|
||||||
//
|
//
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
@ -27,6 +26,7 @@
|
||||||
#include <detail/serialization/vector_streambuf.hpp>
|
#include <detail/serialization/vector_streambuf.hpp>
|
||||||
#include <detail/serialization/xlsx_consumer.hpp>
|
#include <detail/serialization/xlsx_consumer.hpp>
|
||||||
#include <xlnt/cell/cell.hpp>
|
#include <xlnt/cell/cell.hpp>
|
||||||
|
#include <xlnt/packaging/manifest.hpp>
|
||||||
#include <xlnt/utils/optional.hpp>
|
#include <xlnt/utils/optional.hpp>
|
||||||
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
||||||
#include <xlnt/workbook/workbook.hpp>
|
#include <xlnt/workbook/workbook.hpp>
|
||||||
|
@ -88,6 +88,7 @@ void streaming_workbook_reader::close()
|
||||||
if (consumer_)
|
if (consumer_)
|
||||||
{
|
{
|
||||||
consumer_.reset(nullptr);
|
consumer_.reset(nullptr);
|
||||||
|
stream_buffer_.reset(nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,10 +107,25 @@ bool streaming_workbook_reader::has_worksheet()
|
||||||
return !worksheet_queue_.empty();
|
return !worksheet_queue_.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string streaming_workbook_reader::begin_worksheet()
|
void streaming_workbook_reader::begin_worksheet()
|
||||||
{
|
{
|
||||||
auto next_worksheet_rel = worksheet_queue_.back();
|
const auto next_worksheet_rel = worksheet_queue_.back();
|
||||||
return consumer_->read_worksheet_begin(next_worksheet_rel);
|
const auto workbook_rel = workbook_->manifest()
|
||||||
|
.relationship(path("/"), relationship_type::office_document);
|
||||||
|
const auto worksheet_rel = workbook_->manifest()
|
||||||
|
.relationship(workbook_rel.target().path(), next_worksheet_rel);
|
||||||
|
|
||||||
|
auto rel_chain = std::vector<relationship>{ workbook_rel, worksheet_rel };
|
||||||
|
|
||||||
|
const auto &manifest = consumer_->target_.manifest();
|
||||||
|
const auto part_path = manifest.canonicalize(rel_chain);
|
||||||
|
auto part_stream_buffer = consumer_->archive_->open(part_path);
|
||||||
|
part_stream_buffer_.swap(part_stream_buffer);
|
||||||
|
part_stream_.reset(new std::istream(part_stream_buffer_.get()));
|
||||||
|
parser_.reset(new xml::parser(*part_stream_, part_path.string()));
|
||||||
|
consumer_->parser_ = parser_.get();
|
||||||
|
|
||||||
|
consumer_->read_worksheet_begin(next_worksheet_rel);
|
||||||
}
|
}
|
||||||
|
|
||||||
worksheet streaming_workbook_reader::end_worksheet()
|
worksheet streaming_workbook_reader::end_worksheet()
|
||||||
|
@ -121,29 +137,32 @@ worksheet streaming_workbook_reader::end_worksheet()
|
||||||
|
|
||||||
void streaming_workbook_reader::open(const std::vector<std::uint8_t> &data)
|
void streaming_workbook_reader::open(const std::vector<std::uint8_t> &data)
|
||||||
{
|
{
|
||||||
detail::vector_istreambuf buffer(data);
|
stream_buffer_.reset(new detail::vector_istreambuf(data));
|
||||||
std::istream buffer_stream(&buffer);
|
stream_.reset(new std::istream(stream_buffer_.get()));
|
||||||
open(buffer_stream);
|
open(*stream_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void streaming_workbook_reader::open(const std::string &filename)
|
void streaming_workbook_reader::open(const std::string &filename)
|
||||||
{
|
{
|
||||||
std::ifstream file_stream;
|
stream_.reset(new std::ifstream());
|
||||||
open_stream(file_stream, filename);
|
open_stream((std::ifstream &)stream_, filename);
|
||||||
|
open(*stream_);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
void streaming_workbook_reader::open(const std::wstring &filename)
|
void streaming_workbook_reader::open(const std::wstring &filename)
|
||||||
{
|
{
|
||||||
std::ifstream file_stream;
|
stream_.reset(new std::ifstream());
|
||||||
open_stream(file_stream, filename);
|
open_stream((std::ifstream &)*stream_, filename);
|
||||||
|
open(*stream_);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void streaming_workbook_reader::open(const xlnt::path &filename)
|
void streaming_workbook_reader::open(const xlnt::path &filename)
|
||||||
{
|
{
|
||||||
std::ifstream file_stream;
|
stream_.reset(new std::ifstream());
|
||||||
open_stream(file_stream, filename.string());
|
open_stream((std::ifstream &)*stream_, filename.string());
|
||||||
|
open(*stream_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void streaming_workbook_reader::open(std::istream &stream)
|
void streaming_workbook_reader::open(std::istream &stream)
|
||||||
|
@ -151,6 +170,16 @@ void streaming_workbook_reader::open(std::istream &stream)
|
||||||
workbook_.reset(new workbook());
|
workbook_.reset(new workbook());
|
||||||
consumer_.reset(new detail::xlsx_consumer(*workbook_));
|
consumer_.reset(new detail::xlsx_consumer(*workbook_));
|
||||||
consumer_->open(stream);
|
consumer_->open(stream);
|
||||||
|
|
||||||
|
const auto workbook_rel = workbook_->manifest()
|
||||||
|
.relationship(path("/"), relationship_type::office_document);
|
||||||
|
const auto workbook_path = workbook_rel.target().path();
|
||||||
|
|
||||||
|
for (auto worksheet_rel : workbook_->manifest()
|
||||||
|
.relationships(workbook_path, relationship_type::worksheet))
|
||||||
|
{
|
||||||
|
worksheet_queue_.push_back(worksheet_rel.id());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace xlnt
|
} // namespace xlnt
|
||||||
|
|
|
@ -58,6 +58,8 @@ public:
|
||||||
register_test(test_read_custom_properties);
|
register_test(test_read_custom_properties);
|
||||||
register_test(test_round_trip_rw);
|
register_test(test_round_trip_rw);
|
||||||
register_test(test_round_trip_rw_encrypted);
|
register_test(test_round_trip_rw_encrypted);
|
||||||
|
register_test(test_streaming_read);
|
||||||
|
//register_test(test_streaming_write);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
|
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user