Merge pull request #467 from Crzyrndm/experimental/serialisation

locale aware double->string conversions
This commit is contained in:
Thomas Fussell 2020-06-08 19:40:08 -04:00 committed by GitHub
commit 8d2a8e161b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 187 additions and 144 deletions

View File

@ -27,6 +27,7 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstddef>
#include <limits> #include <limits>
#include <sstream> #include <sstream>
#include <type_traits> #include <type_traits>
@ -129,10 +130,12 @@ class number_serialiser
public: public:
explicit number_serialiser() explicit number_serialiser()
: should_convert_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',') : should_convert_comma(localeconv()->decimal_point[0] == ',')
{ {
} }
// for printing to file.
// This matches the output format of excel irrespective of current locale
std::string serialise(double d) const std::string serialise(double d) const
{ {
char buf[30]; char buf[30];
@ -144,29 +147,43 @@ public:
return std::string(buf, static_cast<size_t>(len)); return std::string(buf, static_cast<size_t>(len));
} }
double deserialise(std::string &s) const noexcept // replacement for std::to_string / s*printf("%f", ...)
// behaves same irrespective of locale
std::string serialise_short(double d) const
{ {
assert(!s.empty()); char buf[30];
int len = snprintf(buf, sizeof(buf), "%f", d);
if (should_convert_comma) if (should_convert_comma)
{ {
// s.data() doesn't have a non-const overload until c++17, hence this little dance convert_comma_to_pt(buf, len);
convert_pt_to_comma(&s[0], s.size());
} }
return strtod(s.c_str(), nullptr); return std::string(buf, static_cast<size_t>(len));
} }
double deserialise(const std::string &s) const double deserialise(const std::string &s, ptrdiff_t *len_converted) const
{ {
assert(!s.empty()); assert(!s.empty());
assert(len_converted != nullptr);
char *end_of_convert;
if (!should_convert_comma) if (!should_convert_comma)
{ {
return strtod(s.c_str(), nullptr); double d = strtod(s.c_str(), &end_of_convert);
*len_converted = end_of_convert - s.c_str();
return d;
} }
char buf[30]; char buf[30];
assert(s.size() < sizeof(buf)); assert(s.size() < sizeof(buf));
auto copy_end = std::copy(s.begin(), s.end(), buf); auto copy_end = std::copy(s.begin(), s.end(), buf);
convert_pt_to_comma(buf, static_cast<size_t>(copy_end - buf)); convert_pt_to_comma(buf, static_cast<size_t>(copy_end - buf));
return strtod(buf, nullptr); double d = strtod(buf, &end_of_convert);
*len_converted = end_of_convert - buf;
return d;
}
double deserialise(const std::string &s) const
{
ptrdiff_t ignore;
return deserialise(s, &ignore);
} }
}; };

View File

@ -57,15 +57,16 @@
#include <detail/implementations/hyperlink_impl.hpp> #include <detail/implementations/hyperlink_impl.hpp>
#include <detail/implementations/stylesheet.hpp> #include <detail/implementations/stylesheet.hpp>
#include <detail/implementations/worksheet_impl.hpp> #include <detail/implementations/worksheet_impl.hpp>
#include <xlnt/utils/numeric.hpp>
namespace { namespace {
std::pair<bool, double> cast_numeric(const std::string &s) std::pair<bool, double> cast_numeric(const std::string &s)
{ {
auto str_end = static_cast<char *>(nullptr); xlnt::detail::number_serialiser ser;
auto result = std::strtod(s.c_str(), &str_end); ptrdiff_t len_convert;
double result = ser.deserialise(s, &len_convert);
return (str_end != s.c_str() + s.size()) return (len_convert != static_cast<ptrdiff_t>(s.size()))
? std::make_pair(false, 0.0) ? std::make_pair(false, 0.0)
: std::make_pair(true, result); : std::make_pair(true, result);
} }
@ -108,7 +109,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
} }
std::vector<double> numeric_components; std::vector<double> numeric_components;
xlnt::detail::number_serialiser ser;
for (auto component : time_components) for (auto component : time_components)
{ {
if (component.empty() || (component.substr(0, component.find('.')).size() > 2)) if (component.empty() || (component.substr(0, component.find('.')).size() > 2))
@ -123,9 +124,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
return {false, result}; return {false, result};
} }
} }
auto numeric = ser.deserialise(component);
auto without_leading_zero = component.front() == '0' ? component.substr(1) : component;
auto numeric = std::stod(without_leading_zero);
numeric_components.push_back(numeric); numeric_components.push_back(numeric);
} }

View File

@ -26,6 +26,7 @@
#include <cmath> #include <cmath>
#include <xlnt/utils/exceptions.hpp> #include <xlnt/utils/exceptions.hpp>
#include <xlnt/utils/numeric.hpp>
#include <detail/default_case.hpp> #include <detail/default_case.hpp>
#include <detail/number_format/number_formatter.hpp> #include <detail/number_format/number_formatter.hpp>
@ -622,7 +623,8 @@ void number_format_parser::parse()
value = token.string.substr(1); value = token.string.substr(1);
} }
section.condition.value = std::stod(value); detail::number_serialiser ser;
section.condition.value = ser.deserialise(value);
break; break;
} }
@ -1565,19 +1567,16 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
if (p.type == format_placeholders::placeholders_type::general if (p.type == format_placeholders::placeholders_type::general
|| p.type == format_placeholders::placeholders_type::text) || p.type == format_placeholders::placeholders_type::text)
{ {
result = std::to_string(number); auto s = serialiser_.serialise_short(number);
while (s.size() > 1 && s.back() == '0')
while (result.back() == '0')
{ {
result.pop_back(); s.pop_back();
} }
if (s.back() == '.')
if (result.back() == '.')
{ {
result.pop_back(); s.pop_back();
} }
return s;
return result;
} }
if (p.percentage) if (p.percentage)
@ -1636,21 +1635,22 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
auto fractional_part = number - integer_part; auto fractional_part = number - integer_part;
result = std::fabs(fractional_part) < std::numeric_limits<double>::min() result = std::fabs(fractional_part) < std::numeric_limits<double>::min()
? std::string(".") ? std::string(".")
: std::to_string(fractional_part).substr(1); : serialiser_.serialise_short(fractional_part).substr(1);
while (result.back() == '0' || result.size() > (p.num_zeros + p.num_optionals + p.num_spaces + 1)) while (result.back() == '0' || result.size() > (p.num_zeros + p.num_optionals + p.num_spaces + 1))
{ {
result.pop_back(); result.pop_back();
} }
while (result.size() < p.num_zeros + 1)
if (result.size() < p.num_zeros + 1)
{ {
result.push_back('0'); result.resize(p.num_zeros + 1, '0');
} }
while (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1) if (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1)
{ {
result.push_back(' '); result.resize(p.num_zeros + p.num_optionals + p.num_spaces + 1, ' ');
} }
if (p.percentage) if (p.percentage)
@ -1689,13 +1689,7 @@ std::string number_formatter::fill_scientific_placeholders(const format_placehol
integer_string = std::string(integer_part.num_zeros + integer_part.num_optionals, '0'); integer_string = std::string(integer_part.num_zeros + integer_part.num_optionals, '0');
} }
std::string fractional_string = std::to_string(fraction).substr(1); std::string fractional_string = serialiser_.serialise_short(fraction).substr(1, fractional_part.num_zeros + fractional_part.num_optionals + 1);
while (fractional_string.size() > fractional_part.num_zeros + fractional_part.num_optionals + 1)
{
fractional_string.pop_back();
}
std::string exponent_string = std::to_string(logarithm); std::string exponent_string = std::to_string(logarithm);
while (exponent_string.size() < fractional_part.num_zeros) while (exponent_string.size() < fractional_part.num_zeros)

View File

@ -28,6 +28,7 @@
#include <vector> #include <vector>
#include <xlnt/utils/datetime.hpp> #include <xlnt/utils/datetime.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xlnt { namespace xlnt {
namespace detail { namespace detail {
@ -691,6 +692,7 @@ private:
number_format_parser parser_; number_format_parser parser_;
std::vector<format_code> format_; std::vector<format_code> format_;
xlnt::calendar calendar_; xlnt::calendar calendar_;
xlnt::detail::number_serialiser serialiser_;
}; };
} // namespace detail } // namespace detail

View File

@ -0,0 +1,96 @@
#ifndef XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#define XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#include <xlnt/cell/cell_type.hpp>
#include <xlnt/cell/index_types.hpp>
#include <string>
namespace xlnt {
namespace detail {
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during (de)serialisation, so even minor overhead is noticable
struct Cell_Reference
{
// the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
// for sorting purposes
bool operator<(const Cell_Reference &rhs)
{
// row first, serialisation is done by row then column
if (row < rhs.row)
{
return true;
}
else if (rhs.row < row)
{
return false;
}
// same row, column comparison
return column < rhs.column;
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
// sort cells by location, row first
bool operator<(const Cell &rhs)
{
return ref < rhs.ref;
}
bool is_phonetic = false; // 'ph'
xlnt::cell_type type = xlnt::cell_type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
} // namespace detail
} // namespace xlnt
#endif

View File

@ -40,6 +40,7 @@
#include <detail/header_footer/header_footer_code.hpp> #include <detail/header_footer/header_footer_code.hpp>
#include <detail/implementations/workbook_impl.hpp> #include <detail/implementations/workbook_impl.hpp>
#include <detail/serialization/custom_value_traits.hpp> #include <detail/serialization/custom_value_traits.hpp>
#include <detail/serialization/serialisation_helpers.hpp>
#include <detail/serialization/vector_streambuf.hpp> #include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_consumer.hpp> #include <detail/serialization/xlsx_consumer.hpp>
#include <detail/serialization/zstream.hpp> #include <detail/serialization/zstream.hpp>
@ -127,74 +128,14 @@ void set_style_by_xfid(const std::vector<style_id_pair> &styles,
} }
} }
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during parsing, so even minor overhead is noticable
struct Cell_Reference
{
// not commonly used, added as the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
bool is_phonetic = false; // 'ph'
xlnt::cell::type type = xlnt::cell::type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
// <sheetData> element // <sheetData> element
struct Sheet_Data struct Sheet_Data
{ {
std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows; std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
std::vector<Cell> parsed_cells; std::vector<xlnt::detail::Cell> parsed_cells;
}; };
xlnt::cell::type type_from_string(const std::string &str) xlnt::cell_type type_from_string(const std::string &str)
{ {
if (string_equal(str, "s")) if (string_equal(str, "s"))
{ {
@ -223,14 +164,14 @@ xlnt::cell::type type_from_string(const std::string &str)
return xlnt::cell::type::shared_string; return xlnt::cell::type::shared_string;
} }
Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
{ {
Cell c; xlnt::detail::Cell c;
for (auto &attr : parser->attribute_map()) for (auto &attr : parser->attribute_map())
{ {
if (string_equal(attr.first.name(), "r")) if (string_equal(attr.first.name(), "r"))
{ {
c.ref = Cell_Reference(row_arg, attr.second.value); c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
} }
else if (string_equal(attr.first.name(), "t")) else if (string_equal(attr.first.name(), "t"))
{ {
@ -251,7 +192,8 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
} }
int level = 1; // nesting level int level = 1; // nesting level
// 1 == <c> // 1 == <c>
// 2 == <v>/<is>/<f> // 2 == <v>/<f>
// 3 == <is><t>
// exit loop at </c> // exit loop at </c>
while (level > 0) while (level > 0)
{ {
@ -272,7 +214,6 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
if (level == 2) if (level == 2)
{ {
// <v> -> numeric values // <v> -> numeric values
// <is><t> -> inline string
if (string_equal(parser->name(), "v")) if (string_equal(parser->name(), "v"))
{ {
c.value += std::move(parser->value()); c.value += std::move(parser->value());
@ -307,7 +248,7 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
} }
// <row> inside <sheetData> element // <row> inside <sheetData> element
std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<Cell> &parsed_cells) std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells)
{ {
std::pair<xlnt::row_properties, int> props; std::pair<xlnt::row_properties, int> props;
for (auto &attr : parser->attribute_map()) for (auto &attr : parser->attribute_map())

View File

@ -80,7 +80,9 @@ namespace detail {
xlsx_producer::xlsx_producer(const workbook &target) xlsx_producer::xlsx_producer(const workbook &target)
: source_(target), : source_(target),
current_part_stream_(nullptr) current_part_stream_(nullptr),
current_cell_(nullptr),
current_worksheet_(nullptr)
{ {
} }
@ -918,8 +920,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
// todo: is there a more elegant way to get this number? // todo: is there a more elegant way to get this number?
std::size_t string_count = 0; std::size_t string_count = 0;
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wrange-loop-analysis"
for (const auto ws : source_) for (const auto ws : source_)
{ {
auto dimension = ws.calculate_dimension(); auto dimension = ws.calculate_dimension();
@ -929,8 +929,8 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
{ {
while (current_cell.column() <= dimension.bottom_right().column()) while (current_cell.column() <= dimension.bottom_right().column())
{ {
if (ws.has_cell(current_cell) auto c_iter = ws.d_->cell_map_.find(current_cell);
&& ws.cell(current_cell).data_type() == cell::type::shared_string) if (c_iter != ws.d_->cell_map_.end() && c_iter->second.type_ == cell_type::shared_string)
{ {
++string_count; ++string_count;
} }
@ -942,7 +942,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
current_cell.column_index(dimension.top_left().column_index()); current_cell.column_index(dimension.top_left().column_index());
} }
} }
#pragma clang diagnostic pop
write_attribute("count", string_count); write_attribute("count", string_count);
write_attribute("uniqueCount", source_.shared_strings_by_id().size()); write_attribute("uniqueCount", source_.shared_strings_by_id().size());
@ -2814,33 +2813,12 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{ {
write_start_element(xmlns, "pageMargins"); write_start_element(xmlns, "pageMargins");
// TODO: there must be a better way to do this write_attribute("left", ws.page_margins().left());
auto remove_trailing_zeros = [](const std::string &n) -> std::string { write_attribute("right", ws.page_margins().right());
auto decimal = n.find('.'); write_attribute("top", ws.page_margins().top());
write_attribute("bottom", ws.page_margins().bottom());
if (decimal == std::string::npos) return n; write_attribute("header", ws.page_margins().header());
write_attribute("footer", ws.page_margins().footer());
auto index = n.size() - 1;
while (index >= decimal && n[index] == '0')
{
index--;
}
if (index == decimal)
{
return n.substr(0, decimal);
}
return n.substr(0, index + 1);
};
write_attribute("left", remove_trailing_zeros(std::to_string(ws.page_margins().left())));
write_attribute("right", remove_trailing_zeros(std::to_string(ws.page_margins().right())));
write_attribute("top", remove_trailing_zeros(std::to_string(ws.page_margins().top())));
write_attribute("bottom", remove_trailing_zeros(std::to_string(ws.page_margins().bottom())));
write_attribute("header", remove_trailing_zeros(std::to_string(ws.page_margins().header())));
write_attribute("footer", remove_trailing_zeros(std::to_string(ws.page_margins().footer())));
write_end_element(xmlns, "pageMargins"); write_end_element(xmlns, "pageMargins");
} }

View File

@ -26,11 +26,12 @@
#include <cstdint> #include <cstdint>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <type_traits>
#include <vector> #include <vector>
#include <xlnt/utils/numeric.hpp>
#include <detail/constants.hpp> #include <detail/constants.hpp>
#include <detail/external/include_libstudxml.hpp> #include <detail/external/include_libstudxml.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xml { namespace xml {
class serializer; class serializer;
@ -169,18 +170,33 @@ private:
void write_namespace(const std::string &ns, const std::string &prefix); void write_namespace(const std::string &ns, const std::string &prefix);
template<typename T> // std::string attribute name
// not integer or float type
template <typename T, typename = typename std::enable_if<!std::is_convertible<T, double>::value>::type>
void write_attribute(const std::string &name, T value) void write_attribute(const std::string &name, T value)
{ {
current_part_serializer_->attribute(name, value); current_part_serializer_->attribute(name, value);
} }
template<typename T> void write_attribute(const std::string &name, double value)
{
current_part_serializer_->attribute(name, converter_.serialise(value));
}
// qname attribute name
// not integer or float type
template <typename T, typename = typename std::enable_if<!std::is_convertible<T, double>::value>::type>
void write_attribute(const xml::qname &name, T value) void write_attribute(const xml::qname &name, T value)
{ {
current_part_serializer_->attribute(name, value); current_part_serializer_->attribute(name, value);
} }
void write_attribute(const xml::qname &name, double value)
{
current_part_serializer_->attribute(name, converter_.serialise(value));
}
template <typename T> template <typename T>
void write_characters(T characters, bool preserve_whitespace = false) void write_characters(T characters, bool preserve_whitespace = false)
{ {