diff --git a/include/xlnt/utils/numeric.hpp b/include/xlnt/utils/numeric.hpp index 6022484e..1f6c182a 100644 --- a/include/xlnt/utils/numeric.hpp +++ b/include/xlnt/utils/numeric.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -129,10 +130,12 @@ class number_serialiser public: explicit number_serialiser() - : should_convert_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + : should_convert_comma(localeconv()->decimal_point[0] == ',') { } + // for printing to file. + // This matches the output format of excel irrespective of current locale std::string serialise(double d) const { char buf[30]; @@ -144,29 +147,43 @@ public: return std::string(buf, static_cast(len)); } - double deserialise(std::string &s) const noexcept + // replacement for std::to_string / s*printf("%f", ...) + // behaves same irrespective of locale + std::string serialise_short(double d) const { - assert(!s.empty()); + char buf[30]; + int len = snprintf(buf, sizeof(buf), "%f", d); if (should_convert_comma) { - // s.data() doesn't have a non-const overload until c++17, hence this little dance - convert_pt_to_comma(&s[0], s.size()); + convert_comma_to_pt(buf, len); } - return strtod(s.c_str(), nullptr); + return std::string(buf, static_cast(len)); } - double deserialise(const std::string &s) const + double deserialise(const std::string &s, ptrdiff_t *len_converted) const { assert(!s.empty()); + assert(len_converted != nullptr); + char *end_of_convert; if (!should_convert_comma) { - return strtod(s.c_str(), nullptr); + double d = strtod(s.c_str(), &end_of_convert); + *len_converted = end_of_convert - s.c_str(); + return d; } char buf[30]; assert(s.size() < sizeof(buf)); auto copy_end = std::copy(s.begin(), s.end(), buf); convert_pt_to_comma(buf, static_cast(copy_end - buf)); - return strtod(buf, nullptr); + double d = strtod(buf, &end_of_convert); + *len_converted = end_of_convert - buf; + return d; + } + + double deserialise(const std::string &s) const + { + ptrdiff_t ignore; + return deserialise(s, &ignore); } }; diff --git a/source/cell/cell.cpp b/source/cell/cell.cpp index e47f2a93..ed496c8b 100644 --- a/source/cell/cell.cpp +++ b/source/cell/cell.cpp @@ -57,15 +57,16 @@ #include #include #include +#include namespace { std::pair cast_numeric(const std::string &s) { - auto str_end = static_cast(nullptr); - auto result = std::strtod(s.c_str(), &str_end); - - return (str_end != s.c_str() + s.size()) + xlnt::detail::number_serialiser ser; + ptrdiff_t len_convert; + double result = ser.deserialise(s, &len_convert); + return (len_convert != static_cast(s.size())) ? std::make_pair(false, 0.0) : std::make_pair(true, result); } @@ -108,7 +109,7 @@ std::pair cast_time(const std::string &s) } std::vector numeric_components; - + xlnt::detail::number_serialiser ser; for (auto component : time_components) { if (component.empty() || (component.substr(0, component.find('.')).size() > 2)) @@ -123,9 +124,7 @@ std::pair cast_time(const std::string &s) return {false, result}; } } - - auto without_leading_zero = component.front() == '0' ? component.substr(1) : component; - auto numeric = std::stod(without_leading_zero); + auto numeric = ser.deserialise(component); numeric_components.push_back(numeric); } diff --git a/source/detail/number_format/number_formatter.cpp b/source/detail/number_format/number_formatter.cpp index b116fd0e..7ffcd7c5 100644 --- a/source/detail/number_format/number_formatter.cpp +++ b/source/detail/number_format/number_formatter.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -622,7 +623,8 @@ void number_format_parser::parse() value = token.string.substr(1); } - section.condition.value = std::stod(value); + detail::number_serialiser ser; + section.condition.value = ser.deserialise(value); break; } @@ -1565,19 +1567,16 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do if (p.type == format_placeholders::placeholders_type::general || p.type == format_placeholders::placeholders_type::text) { - result = std::to_string(number); - - while (result.back() == '0') + auto s = serialiser_.serialise_short(number); + while (s.size() > 1 && s.back() == '0') { - result.pop_back(); + s.pop_back(); } - - if (result.back() == '.') + if (s.back() == '.') { - result.pop_back(); + s.pop_back(); } - - return result; + return s; } if (p.percentage) @@ -1636,21 +1635,22 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do auto fractional_part = number - integer_part; result = std::fabs(fractional_part) < std::numeric_limits::min() ? std::string(".") - : std::to_string(fractional_part).substr(1); + : serialiser_.serialise_short(fractional_part).substr(1); while (result.back() == '0' || result.size() > (p.num_zeros + p.num_optionals + p.num_spaces + 1)) { result.pop_back(); } - while (result.size() < p.num_zeros + 1) + + if (result.size() < p.num_zeros + 1) { - result.push_back('0'); + result.resize(p.num_zeros + 1, '0'); } - while (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1) + if (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1) { - result.push_back(' '); + result.resize(p.num_zeros + p.num_optionals + p.num_spaces + 1, ' '); } if (p.percentage) @@ -1689,13 +1689,7 @@ std::string number_formatter::fill_scientific_placeholders(const format_placehol integer_string = std::string(integer_part.num_zeros + integer_part.num_optionals, '0'); } - std::string fractional_string = std::to_string(fraction).substr(1); - - while (fractional_string.size() > fractional_part.num_zeros + fractional_part.num_optionals + 1) - { - fractional_string.pop_back(); - } - + std::string fractional_string = serialiser_.serialise_short(fraction).substr(1, fractional_part.num_zeros + fractional_part.num_optionals + 1); std::string exponent_string = std::to_string(logarithm); while (exponent_string.size() < fractional_part.num_zeros) diff --git a/source/detail/number_format/number_formatter.hpp b/source/detail/number_format/number_formatter.hpp index 6b7a11ea..0f08992e 100644 --- a/source/detail/number_format/number_formatter.hpp +++ b/source/detail/number_format/number_formatter.hpp @@ -28,6 +28,7 @@ #include #include +#include namespace xlnt { namespace detail { @@ -691,6 +692,7 @@ private: number_format_parser parser_; std::vector format_; xlnt::calendar calendar_; + xlnt::detail::number_serialiser serialiser_; }; } // namespace detail diff --git a/source/detail/serialization/serialisation_helpers.hpp b/source/detail/serialization/serialisation_helpers.hpp new file mode 100644 index 00000000..69682b94 --- /dev/null +++ b/source/detail/serialization/serialisation_helpers.hpp @@ -0,0 +1,96 @@ +#ifndef XLNT_DETAIL_SERIALISATION_HELPERS_HPP +#define XLNT_DETAIL_SERIALISATION_HELPERS_HPP + +#include +#include +#include + +namespace xlnt { +namespace detail { + +/// parsing assumptions used by the following functions +/// - on entry, the start element for the element has been consumed by parser->next +/// - on exit, the closing element has been consumed by parser->next +/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!) +/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map + +/// 'r' == cell reference e.g. 'A1' +/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0 +/// +/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...) +/// many thousands are created during (de)serialisation, so even minor overhead is noticable +struct Cell_Reference +{ + // the obvious ctor + explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept + : row(row_arg), column(column_arg) + { + } + + // the common case. row # is already known during parsing (from parent element) + // just need to evaluate the column + explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept + : row(row_arg) + { + // only three characters allowed for the column + // assumption: + // - regex pattern match: [A-Z]{1,3}\d{1,7} + const char *iter = reference.c_str(); + int temp = *iter - 'A' + 1; // 'A' == 1 + ++iter; + if (*iter >= 'A') // second char + { + temp *= 26; // LHS values are more significant + temp += *iter - 'A' + 1; // 'A' == 1 + ++iter; + if (*iter >= 'A') // third char + { + temp *= 26; // LHS values are more significant + temp += *iter - 'A' + 1; // 'A' == 1 + } + } + column = static_cast(temp); + } + + // for sorting purposes + bool operator<(const Cell_Reference &rhs) + { + // row first, serialisation is done by row then column + if (row < rhs.row) + { + return true; + } + else if (rhs.row < row) + { + return false; + } + // same row, column comparison + return column < rhs.column; + } + + xlnt::row_t row; // range:[1, 1048576] + xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576] +}; + +// inside element +// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1 +struct Cell +{ + // sort cells by location, row first + bool operator<(const Cell &rhs) + { + return ref < rhs.ref; + } + + bool is_phonetic = false; // 'ph' + xlnt::cell_type type = xlnt::cell_type::number; // 't' + int cell_metatdata_idx = -1; // 'cm' + int style_index = -1; // 's' + Cell_Reference ref{0, 0}; // 'r' + std::string value; // OR + std::string formula_string; // +}; + +} // namespace detail +} // namespace xlnt +#endif \ No newline at end of file diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index bde76349..62d9c418 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -127,74 +128,14 @@ void set_style_by_xfid(const std::vector &styles, } } -/// parsing assumptions used by the following functions -/// - on entry, the start element for the element has been consumed by parser->next -/// - on exit, the closing element has been consumed by parser->next -/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!) -/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map - -/// 'r' == cell reference e.g. 'A1' -/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0 -/// -/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...) -/// many thousands are created during parsing, so even minor overhead is noticable -struct Cell_Reference -{ - // not commonly used, added as the obvious ctor - explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept - : row(row_arg), column(column_arg) - { - } - // the common case. row # is already known during parsing (from parent element) - // just need to evaluate the column - explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept - : row(row_arg) - { - // only three characters allowed for the column - // assumption: - // - regex pattern match: [A-Z]{1,3}\d{1,7} - const char *iter = reference.c_str(); - int temp = *iter - 'A' + 1; // 'A' == 1 - ++iter; - if (*iter >= 'A') // second char - { - temp *= 26; // LHS values are more significant - temp += *iter - 'A' + 1; // 'A' == 1 - ++iter; - if (*iter >= 'A') // third char - { - temp *= 26; // LHS values are more significant - temp += *iter - 'A' + 1; // 'A' == 1 - } - } - column = static_cast(temp); - } - - xlnt::row_t row; // range:[1, 1048576] - xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576] -}; - -// inside element -// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1 -struct Cell -{ - bool is_phonetic = false; // 'ph' - xlnt::cell::type type = xlnt::cell::type::number; // 't' - int cell_metatdata_idx = -1; // 'cm' - int style_index = -1; // 's' - Cell_Reference ref{0, 0}; // 'r' - std::string value; // OR - std::string formula_string; // -}; - // element struct Sheet_Data { std::vector> parsed_rows; - std::vector parsed_cells; + std::vector parsed_cells; }; -xlnt::cell::type type_from_string(const std::string &str) +xlnt::cell_type type_from_string(const std::string &str) { if (string_equal(str, "s")) { @@ -223,14 +164,14 @@ xlnt::cell::type type_from_string(const std::string &str) return xlnt::cell::type::shared_string; } -Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) +xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) { - Cell c; + xlnt::detail::Cell c; for (auto &attr : parser->attribute_map()) { if (string_equal(attr.first.name(), "r")) { - c.ref = Cell_Reference(row_arg, attr.second.value); + c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value); } else if (string_equal(attr.first.name(), "t")) { @@ -251,7 +192,8 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) } int level = 1; // nesting level // 1 == - // 2 == // + // 2 == / + // 3 == // exit loop at while (level > 0) { @@ -272,7 +214,6 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) if (level == 2) { // -> numeric values - // -> inline string if (string_equal(parser->name(), "v")) { c.value += std::move(parser->value()); @@ -307,7 +248,7 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) } // inside element -std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells) +std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells) { std::pair props; for (auto &attr : parser->attribute_map()) diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp index 1646b70b..e6683a31 100644 --- a/source/detail/serialization/xlsx_producer.cpp +++ b/source/detail/serialization/xlsx_producer.cpp @@ -80,7 +80,9 @@ namespace detail { xlsx_producer::xlsx_producer(const workbook &target) : source_(target), - current_part_stream_(nullptr) + current_part_stream_(nullptr), + current_cell_(nullptr), + current_worksheet_(nullptr) { } @@ -918,8 +920,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/) // todo: is there a more elegant way to get this number? std::size_t string_count = 0; -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wrange-loop-analysis" for (const auto ws : source_) { auto dimension = ws.calculate_dimension(); @@ -929,8 +929,8 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/) { while (current_cell.column() <= dimension.bottom_right().column()) { - if (ws.has_cell(current_cell) - && ws.cell(current_cell).data_type() == cell::type::shared_string) + auto c_iter = ws.d_->cell_map_.find(current_cell); + if (c_iter != ws.d_->cell_map_.end() && c_iter->second.type_ == cell_type::shared_string) { ++string_count; } @@ -942,7 +942,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/) current_cell.column_index(dimension.top_left().column_index()); } } -#pragma clang diagnostic pop write_attribute("count", string_count); write_attribute("uniqueCount", source_.shared_strings_by_id().size()); @@ -2814,33 +2813,12 @@ void xlsx_producer::write_worksheet(const relationship &rel) { write_start_element(xmlns, "pageMargins"); - // TODO: there must be a better way to do this - auto remove_trailing_zeros = [](const std::string &n) -> std::string { - auto decimal = n.find('.'); - - if (decimal == std::string::npos) return n; - - auto index = n.size() - 1; - - while (index >= decimal && n[index] == '0') - { - index--; - } - - if (index == decimal) - { - return n.substr(0, decimal); - } - - return n.substr(0, index + 1); - }; - - write_attribute("left", remove_trailing_zeros(std::to_string(ws.page_margins().left()))); - write_attribute("right", remove_trailing_zeros(std::to_string(ws.page_margins().right()))); - write_attribute("top", remove_trailing_zeros(std::to_string(ws.page_margins().top()))); - write_attribute("bottom", remove_trailing_zeros(std::to_string(ws.page_margins().bottom()))); - write_attribute("header", remove_trailing_zeros(std::to_string(ws.page_margins().header()))); - write_attribute("footer", remove_trailing_zeros(std::to_string(ws.page_margins().footer()))); + write_attribute("left", ws.page_margins().left()); + write_attribute("right", ws.page_margins().right()); + write_attribute("top", ws.page_margins().top()); + write_attribute("bottom", ws.page_margins().bottom()); + write_attribute("header", ws.page_margins().header()); + write_attribute("footer", ws.page_margins().footer()); write_end_element(xmlns, "pageMargins"); } diff --git a/source/detail/serialization/xlsx_producer.hpp b/source/detail/serialization/xlsx_producer.hpp index 4660f3c2..80afdf01 100644 --- a/source/detail/serialization/xlsx_producer.hpp +++ b/source/detail/serialization/xlsx_producer.hpp @@ -26,11 +26,12 @@ #include #include #include +#include #include +#include #include #include -#include namespace xml { class serializer; @@ -169,19 +170,34 @@ private: void write_namespace(const std::string &ns, const std::string &prefix); - template + // std::string attribute name + // not integer or float type + template ::value>::type> void write_attribute(const std::string &name, T value) { current_part_serializer_->attribute(name, value); } - template + void write_attribute(const std::string &name, double value) + { + current_part_serializer_->attribute(name, converter_.serialise(value)); + } + + // qname attribute name + // not integer or float type + template ::value>::type> void write_attribute(const xml::qname &name, T value) { current_part_serializer_->attribute(name, value); } - template + void write_attribute(const xml::qname &name, double value) + { + current_part_serializer_->attribute(name, converter_.serialise(value)); + } + + + template void write_characters(T characters, bool preserve_whitespace = false) { if (preserve_whitespace)