diff --git a/include/xlnt/workbook/streaming_workbook_reader.hpp b/include/xlnt/workbook/streaming_workbook_reader.hpp index 59e4baf0..6e975426 100644 --- a/include/xlnt/workbook/streaming_workbook_reader.hpp +++ b/include/xlnt/workbook/streaming_workbook_reader.hpp @@ -31,6 +31,10 @@ #include +namespace xml { +class parser; +} + namespace xlnt { class cell; @@ -73,7 +77,7 @@ public: /// Beings reading of the next worksheet in the workbook and optionally /// returns its title if the last worksheet has not yet been read. /// - std::string begin_worksheet(); + void begin_worksheet(); /// /// Ends reading of the current worksheet in the workbook and optionally @@ -118,6 +122,11 @@ private: std::vector worksheet_queue_; std::unique_ptr consumer_; std::unique_ptr workbook_; + std::unique_ptr stream_; + std::unique_ptr stream_buffer_; + std::unique_ptr part_stream_; + std::unique_ptr part_stream_buffer_; + std::unique_ptr parser_; }; } // namespace xlnt diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index ae51dc49..b719755a 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -148,14 +148,152 @@ void xlsx_consumer::open(std::istream &source) cell xlsx_consumer::read_cell() { - return cell(nullptr); + if (!has_cell()) + { + return cell(nullptr); + } + + auto ws = worksheet(stream_worksheet_); + + if (in_element(qn("spreadsheetml", "sheetData"))) + { + expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row + auto row_index = parser().attribute("r"); + + if (parser().attribute_present("ht")) + { + ws.row_properties(row_index).height = parser().attribute("ht"); + } + + if (parser().attribute_present("customHeight")) + { + ws.row_properties(row_index).custom_height = is_true(parser().attribute("customHeight")); + } + + if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden"))) + { + ws.row_properties(row_index).hidden = true; + } + + skip_attributes({ qn("x14ac", "dyDescent") }); + skip_attributes({ "customFormat", "s", "customFont", + "outlineLevel", "collapsed", "thickTop", "thickBot", + "ph", "spans" }); + } + + if (!in_element(qn("spreadsheetml", "row"))) + { + return cell(nullptr); + } + + expect_start_element(qn("spreadsheetml", "c"), xml::content::complex); + auto cell = ws.cell(cell_reference(parser().attribute("r"))); + + auto has_type = parser().attribute_present("t"); + auto type = has_type ? parser().attribute("t") : "n"; + + auto has_format = parser().attribute_present("s"); + auto format_id = static_cast(has_format ? std::stoull(parser().attribute("s")) : 0LL); + + auto has_value = false; + auto value_string = std::string(); + + auto has_formula = false; + auto has_shared_formula = false; + auto formula_value_string = std::string(); + + while (in_element(qn("spreadsheetml", "c"))) + { + auto current_element = expect_start_element(xml::content::mixed); + + if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring + { + has_value = true; + value_string = read_text(); + } + else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula + { + has_formula = true; + + if (parser().attribute_present("t")) + { + has_shared_formula = parser().attribute("t") == "shared"; + } + + skip_attributes( + { "aca", "ref", "dt2D", "dtr", "del1", "del2", "r1", "r2", "ca", "si", "bx" }); + + formula_value_string = read_text(); + } + else if (current_element == qn("spreadsheetml", "is")) // CT_Rst + { + expect_start_element(qn("spreadsheetml", "t"), xml::content::simple); + value_string = read_text(); + expect_end_element(qn("spreadsheetml", "t")); + } + else + { + unexpected_element(current_element); + } + + expect_end_element(current_element); + } + + expect_end_element(qn("spreadsheetml", "c")); + + if (has_formula && !has_shared_formula) + { + cell.formula(formula_value_string); + } + + if (has_value) + { + if (type == "str") + { + cell.d_->value_text_ = value_string; + cell.data_type(cell::type::formula_string); + } + else if (type == "inlineStr") + { + cell.d_->value_text_ = value_string; + cell.data_type(cell::type::inline_string); + } + else if (type == "s") + { + cell.d_->value_numeric_ = std::stold(value_string); + cell.data_type(cell::type::shared_string); + } + else if (type == "b") // boolean + { + cell.value(is_true(value_string)); + } + else if (type == "n") // numeric + { + cell.value(std::stold(value_string)); + } + else if (!value_string.empty() && value_string[0] == '#') + { + cell.error(value_string); + } + } + + if (has_format) + { + cell.format(target_.format(format_id)); + } + + return cell; } -void xlsx_consumer::read_worksheet(const std::string &rel_id) +void xlsx_consumer::read_worksheet(const std::string &rel_id, bool streaming) { read_worksheet_begin(rel_id); - read_worksheet_sheetdata(); - read_worksheet_end(rel_id); + + if (!streaming) + { + read_worksheet_sheetdata(); + read_worksheet_end(rel_id); + } } std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) @@ -874,7 +1012,8 @@ xml::parser &xlsx_consumer::parser() bool xlsx_consumer::has_cell() { - return stream_cell_ != nullptr; + return in_element(qn("spreadsheetml", "row")) + || in_element(qn("spreadsheetml", "sheetData")); } std::vector xlsx_consumer::read_relationships(const path &part) @@ -991,7 +1130,7 @@ void xlsx_consumer::read_part(const std::vector &rel_chain, bool s break; case relationship_type::worksheet: - read_worksheet(rel_chain.back().id()); + read_worksheet(rel_chain.back().id(), streaming); break; case relationship_type::thumbnail: diff --git a/source/detail/serialization/xlsx_consumer.hpp b/source/detail/serialization/xlsx_consumer.hpp index 31e1047e..164f1b6f 100644 --- a/source/detail/serialization/xlsx_consumer.hpp +++ b/source/detail/serialization/xlsx_consumer.hpp @@ -198,7 +198,7 @@ private: /// /// xl/sheets/*.xml /// - void read_worksheet(const std::string &rel_id); + void read_worksheet(const std::string &rel_id, bool streaming); /// /// xl/sheets/*.xml diff --git a/source/workbook/streaming_workbook_reader.cpp b/source/workbook/streaming_workbook_reader.cpp index a761973f..03c9a1ab 100644 --- a/source/workbook/streaming_workbook_reader.cpp +++ b/source/workbook/streaming_workbook_reader.cpp @@ -1,5 +1,4 @@ -// Copyright (c) 2014-2017 Thomas Fussell -// Copyright (c) 2010-2015 openpyxl +// Copyright (c) 2017 Thomas Fussell // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -27,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +88,7 @@ void streaming_workbook_reader::close() if (consumer_) { consumer_.reset(nullptr); + stream_buffer_.reset(nullptr); } } @@ -106,10 +107,25 @@ bool streaming_workbook_reader::has_worksheet() return !worksheet_queue_.empty(); } -std::string streaming_workbook_reader::begin_worksheet() +void streaming_workbook_reader::begin_worksheet() { - auto next_worksheet_rel = worksheet_queue_.back(); - return consumer_->read_worksheet_begin(next_worksheet_rel); + const auto next_worksheet_rel = worksheet_queue_.back(); + const auto workbook_rel = workbook_->manifest() + .relationship(path("/"), relationship_type::office_document); + const auto worksheet_rel = workbook_->manifest() + .relationship(workbook_rel.target().path(), next_worksheet_rel); + + auto rel_chain = std::vector{ workbook_rel, worksheet_rel }; + + const auto &manifest = consumer_->target_.manifest(); + const auto part_path = manifest.canonicalize(rel_chain); + auto part_stream_buffer = consumer_->archive_->open(part_path); + part_stream_buffer_.swap(part_stream_buffer); + part_stream_.reset(new std::istream(part_stream_buffer_.get())); + parser_.reset(new xml::parser(*part_stream_, part_path.string())); + consumer_->parser_ = parser_.get(); + + consumer_->read_worksheet_begin(next_worksheet_rel); } worksheet streaming_workbook_reader::end_worksheet() @@ -121,29 +137,32 @@ worksheet streaming_workbook_reader::end_worksheet() void streaming_workbook_reader::open(const std::vector &data) { - detail::vector_istreambuf buffer(data); - std::istream buffer_stream(&buffer); - open(buffer_stream); + stream_buffer_.reset(new detail::vector_istreambuf(data)); + stream_.reset(new std::istream(stream_buffer_.get())); + open(*stream_); } void streaming_workbook_reader::open(const std::string &filename) { - std::ifstream file_stream; - open_stream(file_stream, filename); + stream_.reset(new std::ifstream()); + open_stream((std::ifstream &)stream_, filename); + open(*stream_); } #ifdef _MSC_VER void streaming_workbook_reader::open(const std::wstring &filename) { - std::ifstream file_stream; - open_stream(file_stream, filename); + stream_.reset(new std::ifstream()); + open_stream((std::ifstream &)*stream_, filename); + open(*stream_); } #endif void streaming_workbook_reader::open(const xlnt::path &filename) { - std::ifstream file_stream; - open_stream(file_stream, filename.string()); + stream_.reset(new std::ifstream()); + open_stream((std::ifstream &)*stream_, filename.string()); + open(*stream_); } void streaming_workbook_reader::open(std::istream &stream) @@ -151,6 +170,16 @@ void streaming_workbook_reader::open(std::istream &stream) workbook_.reset(new workbook()); consumer_.reset(new detail::xlsx_consumer(*workbook_)); consumer_->open(stream); + + const auto workbook_rel = workbook_->manifest() + .relationship(path("/"), relationship_type::office_document); + const auto workbook_path = workbook_rel.target().path(); + + for (auto worksheet_rel : workbook_->manifest() + .relationships(workbook_path, relationship_type::worksheet)) + { + worksheet_queue_.push_back(worksheet_rel.id()); + } } } // namespace xlnt diff --git a/tests/workbook/serialization_test_suite.hpp b/tests/workbook/serialization_test_suite.hpp index 8449d016..034be292 100644 --- a/tests/workbook/serialization_test_suite.hpp +++ b/tests/workbook/serialization_test_suite.hpp @@ -58,6 +58,8 @@ public: register_test(test_read_custom_properties); register_test(test_round_trip_rw); register_test(test_round_trip_rw_encrypted); + register_test(test_streaming_read); + //register_test(test_streaming_write); } bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)