diff --git a/include/xlnt/worksheet/range_reference.hpp b/include/xlnt/worksheet/range_reference.hpp index 652b6380..939cd314 100644 --- a/include/xlnt/worksheet/range_reference.hpp +++ b/include/xlnt/worksheet/range_reference.hpp @@ -115,6 +115,11 @@ public: /// std::string to_string() const; + /// + /// Returns true if the given cell reference is within the bounds of this range reference. + /// + bool contains(const cell_reference &ref) const; + /// /// Returns true if this range is equivalent to the other range. /// diff --git a/include/xlnt/xlnt.hpp b/include/xlnt/xlnt.hpp index 12472c72..514770a4 100644 --- a/include/xlnt/xlnt.hpp +++ b/include/xlnt/xlnt.hpp @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index 0947bad2..7959372a 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -165,7 +165,7 @@ xlnt::cell_type type_from_string(const std::string &str) return xlnt::cell::type::shared_string; } -xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) +xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser, std::unordered_map &array_formulae, std::unordered_map &shared_formulae) { xlnt::detail::Cell c; for (auto &attr : parser->attribute_map()) @@ -202,6 +202,16 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) switch (e) { case xml::parser::start_element: { + if (string_equal(parser->name(), "f") && parser->attribute_present("t")) + { + // Skip shared formulas with a ref attribute because it indicates that this + // is the master cell which will be handled in the xml::parser::characters case. + if (parser->attribute("t") == "shared" && !parser->attribute_present("ref")) + { + auto shared_index = parser->attribute("si"); + c.formula_string = shared_formulae[shared_index]; + } + } ++level; break; } @@ -223,6 +233,21 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) else if (string_equal(parser->name(), "f")) { c.formula_string += std::move(parser->value()); + + if (parser->attribute_present("t")) + { + auto formula_ref = parser->attribute("ref"); + auto formula_type = parser->attribute("t"); + if (formula_type == "shared") + { + auto shared_index = parser->attribute("si"); + shared_formulae[shared_index] = c.formula_string; + } + else if (formula_type == "array") + { + array_formulae[formula_ref] = c.formula_string; + } + } } } else if (level == 3) @@ -251,7 +276,7 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) } // inside element -std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells) +std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells, std::unordered_map &array_formulae, std::unordered_map &shared_formulae) { std::pair props; for (auto &attr : parser->attribute_map()) @@ -301,7 +326,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail switch (e) { case xml::parser::start_element: { - parsed_cells.push_back(parse_cell(static_cast(props.second), parser)); + parsed_cells.push_back(parse_cell(static_cast(props.second), parser, array_formulae, shared_formulae)); break; } case xml::parser::end_element: { @@ -326,7 +351,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail } // inside element -Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter) +Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::unordered_map &array_formulae, std::unordered_map &shared_formulae) { Sheet_Data sheet_data; int level = 1; // nesting level @@ -339,7 +364,7 @@ Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser switch (e) { case xml::parser::start_element: { - sheet_data.parsed_rows.push_back(parse_row(parser, converter, sheet_data.parsed_cells)); + sheet_data.parsed_rows.push_back(parse_row(parser, converter, sheet_data.parsed_cells, array_formulae, shared_formulae)); break; } case xml::parser::end_element: { @@ -429,6 +454,9 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) { streaming_cell_.reset(new detail::cell_impl()); } + + array_formulae_.clear(); + shared_formulae_.clear(); auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(), target_.d_->sheet_title_rel_id_map_.end(), @@ -742,7 +770,8 @@ void xlsx_consumer::read_worksheet_sheetdata() { return; } - Sheet_Data ws_data = parse_sheet_data(parser_, converter_); + + auto ws_data = parse_sheet_data(parser_, converter_, array_formulae_, shared_formulae_); // NOTE: parse->construct are seperated here and could easily be threaded // with a SPSC queue for what is likely to be an easy performance win for (auto &row : ws_data.parsed_rows) @@ -803,6 +832,8 @@ void xlsx_consumer::read_worksheet_sheetdata() } } stack_.pop_back(); + + } worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) @@ -1258,6 +1289,17 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) manifest.relationship(sheet_path, relationship_type::printer_settings)}); } + + for (auto array_formula : array_formulae_) + { + for (auto row : ws.range(array_formula.first)) + { + for (auto cell : row) + { + cell.formula(array_formula.second); + } + } + } return ws; } @@ -1356,10 +1398,7 @@ bool xlsx_consumer::has_cell() auto has_value = false; auto value_string = std::string(); - - auto has_formula = false; - auto has_shared_formula = false; - auto formula_value_string = std::string(); + auto formula_string = std::string(); while (in_element(qn("spreadsheetml", "c"))) { @@ -1372,17 +1411,56 @@ bool xlsx_consumer::has_cell() } else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula { - has_formula = true; + auto has_shared_formula = false; + auto has_array_formula = false; + auto is_master_cell = false; + auto shared_formula_index = 0; + auto formula_range = range_reference(); if (parser().attribute_present("t")) { - has_shared_formula = parser().attribute("t") == "shared"; + auto formula_type = parser().attribute("t"); + if (formula_type == "shared") + { + has_shared_formula = true; + shared_formula_index = parser().attribute("si"); + if (parser().attribute_present("ref")) + { + is_master_cell = true; + } + } + else if (formula_type == "array") + { + has_array_formula = true; + formula_range = range_reference(parser().attribute("ref")); + is_master_cell = true; + } } - skip_attributes({"aca", "ref", "dt2D", "dtr", "del1", - "del2", "r1", "r2", "ca", "si", "bx"}); + skip_attributes({"aca", "dt2D", "dtr", "del1", "del2", "r1", + "r2", "ca", "bx"}); - formula_value_string = read_text(); + formula_string = read_text(); + + if (is_master_cell) + { + if (has_shared_formula) + { + shared_formulae_[shared_formula_index] = formula_string; + } + else if (has_array_formula) + { + array_formulae_[formula_range.to_string()] = formula_string; + } + } + else if (has_shared_formula) + { + auto shared_formula = shared_formulae_.find(shared_formula_index); + if (shared_formula != shared_formulae_.end()) + { + formula_string = shared_formula->second; + } + } } else if (current_element == qn("spreadsheetml", "is")) // CT_Rst { @@ -1401,9 +1479,9 @@ bool xlsx_consumer::has_cell() expect_end_element(qn("spreadsheetml", "c")); - if (has_formula && !has_shared_formula) + if (!formula_string.empty()) { - cell.formula(formula_value_string); + cell.formula(formula_string); } if (has_value) diff --git a/source/detail/serialization/xlsx_consumer.hpp b/source/detail/serialization/xlsx_consumer.hpp index e3d6b2aa..914b76e9 100644 --- a/source/detail/serialization/xlsx_consumer.hpp +++ b/source/detail/serialization/xlsx_consumer.hpp @@ -45,6 +45,7 @@ class manifest; template class optional; class path; +class range_reference; class relationship; class streaming_workbook_reader; class variant; @@ -417,6 +418,9 @@ private: bool streaming_ = false; std::unique_ptr streaming_cell_; + + std::unordered_map shared_formulae_; + std::unordered_map array_formulae_; detail::worksheet_impl *current_worksheet_; number_serialiser converter_; diff --git a/source/worksheet/range.cpp b/source/worksheet/range.cpp index f8b9685d..dcccb826 100644 --- a/source/worksheet/range.cpp +++ b/source/worksheet/range.cpp @@ -132,12 +132,9 @@ const cell_vector range::vector(std::size_t vector_index) const return cell_vector(ws_, cursor, ref_, order_, skip_null_, false); } -bool range::contains(const cell_reference &ref) +bool range::contains(const cell_reference &cell_ref) { - return ref_.top_left().column_index() <= ref.column_index() - && ref_.bottom_right().column_index() >= ref.column_index() - && ref_.top_left().row() <= ref.row() - && ref_.bottom_right().row() >= ref.row(); + return ref_.contains(cell_ref); } range range::alignment(const xlnt::alignment &new_alignment) diff --git a/source/worksheet/range_reference.cpp b/source/worksheet/range_reference.cpp index 52914634..63da6a6c 100644 --- a/source/worksheet/range_reference.cpp +++ b/source/worksheet/range_reference.cpp @@ -145,6 +145,14 @@ cell_reference range_reference::bottom_right() const return bottom_right_; } +bool range_reference::contains(const cell_reference &ref) const +{ + return top_left_.column_index() <= ref.column_index() + && bottom_right_.column_index() >= ref.column_index() + && top_left_.row() <= ref.row() + && bottom_right_.row() >= ref.row(); +} + bool range_reference::operator==(const std::string &reference_string) const { return *this == range_reference(reference_string); diff --git a/tests/data/18_formulae.xlsx b/tests/data/18_formulae.xlsx new file mode 100644 index 00000000..a9077d63 Binary files /dev/null and b/tests/data/18_formulae.xlsx differ diff --git a/tests/workbook/serialization_test_suite.cpp b/tests/workbook/serialization_test_suite.cpp index 14d27f24..10dbb4fd 100644 --- a/tests/workbook/serialization_test_suite.cpp +++ b/tests/workbook/serialization_test_suite.cpp @@ -23,31 +23,7 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include #include @@ -62,7 +38,7 @@ public: register_test(test_produce_simple_excel); register_test(test_save_after_sheet_deletion); register_test(test_write_comments_hyperlinks_formulae); - register_test(test_save_after_clear_all_formulae); + register_test(test_save_after_clear_formula); register_test(test_load_non_xlsx); register_test(test_decrypt_agile); register_test(test_decrypt_libre_office); @@ -304,21 +280,20 @@ public: xlnt_assert(workbook_matches_file(wb, path)); } - void test_save_after_clear_all_formulae() + void test_save_after_clear_formula() { xlnt::workbook wb; - const auto path = path_helper::test_file("10_comments_hyperlinks_formulae.xlsx"); + const auto path = path_helper::test_file("18_formulae.xlsx"); wb.load(path); auto ws1 = wb.sheet_by_index(0); - xlnt_assert(ws1.cell("C1").has_formula()); - xlnt_assert_equals(ws1.cell("C1").formula(), "CONCATENATE(C2,C3)"); - ws1.cell("C1").clear_formula(); - - auto ws2 = wb.sheet_by_index(1); - xlnt_assert(ws2.cell("C1").has_formula()); - xlnt_assert_equals(ws2.cell("C1").formula(), "C2*C3"); - ws2.cell("C1").clear_formula(); + for (auto row : ws1) + { + for (auto cell : row) + { + cell.clear_formula(); + } + } wb.save("clear_formulae.xlsx"); } @@ -425,20 +400,41 @@ public: void test_read_formulae() { xlnt::workbook wb; - const auto path = path_helper::test_file("10_comments_hyperlinks_formulae.xlsx"); + const auto path = path_helper::test_file("18_formulae.xlsx"); wb.load(path); auto ws1 = wb.sheet_by_index(0); - xlnt_assert(ws1.cell("C1").has_formula()); - xlnt_assert_equals(ws1.cell("C1").formula(), "CONCATENATE(C2,C3)"); - xlnt_assert_equals(ws1.cell("C2").value(), "a"); - xlnt_assert_equals(ws1.cell("C3").value(), "b"); + + // test has_formula + // A1:B3 are plain text cells + // C1:G3,I2,F4 have formulae + for (auto row = 1; row < 4; row++) + { + for (auto column = 1; column < 8; column++) + { + if (column < 3) + { + xlnt_assert(!ws1.cell(column, row).has_formula()); + } + else + { + xlnt_assert(ws1.cell(column, row).has_formula()); + } + } + } - auto ws2 = wb.sheet_by_index(1); - xlnt_assert(ws2.cell("C1").has_formula()); - xlnt_assert_equals(ws2.cell("C1").formula(), "C2*C3"); - xlnt_assert_equals(ws2.cell("C2").value(), 2); - xlnt_assert_equals(ws2.cell("C3").value(), 3); + xlnt_assert(ws1.cell("I2").has_formula()); + xlnt_assert(ws1.cell("F4").has_formula()); + + xlnt_assert(!ws1.cell("C9").has_formula()); // empty cell + xlnt_assert(!ws1.cell("F5").has_formula()); // text cell + + xlnt_assert_equals(ws1.cell("C1").formula(), "B1^2"); // basic math with reference + xlnt_assert_equals(ws1.cell("D1").formula(), "CONCATENATE(A1,B1)"); // concat with ref + xlnt_assert_equals(ws1.cell("E1").formula(), "CONCATENATE($C$1,$D$1)"); // concat with absolute ref + xlnt_assert_equals(ws1.cell("F1").formula(), "1+1"); // basic math + xlnt_assert_equals(ws1.cell("G1").formula(), "PI()"); // constant + xlnt_assert_equals(ws1.cell("I2").formula(), "COS(C2)+IMAGINARY(SIN(B2))"); // fancy math } void test_read_headers_and_footers() @@ -657,7 +653,7 @@ public: { xlnt_assert(round_trip_matches_rw(path_helper::test_file("13_custom_heights_widths.xlsx"))); } - + void test_round_trip_rw_encrypted_agile() { xlnt_assert(round_trip_matches_rw(path_helper::test_file("5_encrypted_agile.xlsx"), "secret"));