Merge pull request #467 from Crzyrndm/experimental/serialisation

locale aware double->string conversions
pull/490/head
Thomas Fussell 2020-06-08 19:40:08 -04:00 committed by GitHub
commit 8d2a8e161b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 187 additions and 144 deletions

View File

@ -27,6 +27,7 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <limits>
#include <sstream>
#include <type_traits>
@ -129,10 +130,12 @@ class number_serialiser
public:
explicit number_serialiser()
: should_convert_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
: should_convert_comma(localeconv()->decimal_point[0] == ',')
{
}
// for printing to file.
// This matches the output format of excel irrespective of current locale
std::string serialise(double d) const
{
char buf[30];
@ -144,29 +147,43 @@ public:
return std::string(buf, static_cast<size_t>(len));
}
double deserialise(std::string &s) const noexcept
// replacement for std::to_string / s*printf("%f", ...)
// behaves same irrespective of locale
std::string serialise_short(double d) const
{
assert(!s.empty());
char buf[30];
int len = snprintf(buf, sizeof(buf), "%f", d);
if (should_convert_comma)
{
// s.data() doesn't have a non-const overload until c++17, hence this little dance
convert_pt_to_comma(&s[0], s.size());
convert_comma_to_pt(buf, len);
}
return strtod(s.c_str(), nullptr);
return std::string(buf, static_cast<size_t>(len));
}
double deserialise(const std::string &s) const
double deserialise(const std::string &s, ptrdiff_t *len_converted) const
{
assert(!s.empty());
assert(len_converted != nullptr);
char *end_of_convert;
if (!should_convert_comma)
{
return strtod(s.c_str(), nullptr);
double d = strtod(s.c_str(), &end_of_convert);
*len_converted = end_of_convert - s.c_str();
return d;
}
char buf[30];
assert(s.size() < sizeof(buf));
auto copy_end = std::copy(s.begin(), s.end(), buf);
convert_pt_to_comma(buf, static_cast<size_t>(copy_end - buf));
return strtod(buf, nullptr);
double d = strtod(buf, &end_of_convert);
*len_converted = end_of_convert - buf;
return d;
}
double deserialise(const std::string &s) const
{
ptrdiff_t ignore;
return deserialise(s, &ignore);
}
};

View File

@ -57,15 +57,16 @@
#include <detail/implementations/hyperlink_impl.hpp>
#include <detail/implementations/stylesheet.hpp>
#include <detail/implementations/worksheet_impl.hpp>
#include <xlnt/utils/numeric.hpp>
namespace {
std::pair<bool, double> cast_numeric(const std::string &s)
{
auto str_end = static_cast<char *>(nullptr);
auto result = std::strtod(s.c_str(), &str_end);
return (str_end != s.c_str() + s.size())
xlnt::detail::number_serialiser ser;
ptrdiff_t len_convert;
double result = ser.deserialise(s, &len_convert);
return (len_convert != static_cast<ptrdiff_t>(s.size()))
? std::make_pair(false, 0.0)
: std::make_pair(true, result);
}
@ -108,7 +109,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
}
std::vector<double> numeric_components;
xlnt::detail::number_serialiser ser;
for (auto component : time_components)
{
if (component.empty() || (component.substr(0, component.find('.')).size() > 2))
@ -123,9 +124,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
return {false, result};
}
}
auto without_leading_zero = component.front() == '0' ? component.substr(1) : component;
auto numeric = std::stod(without_leading_zero);
auto numeric = ser.deserialise(component);
numeric_components.push_back(numeric);
}

View File

@ -26,6 +26,7 @@
#include <cmath>
#include <xlnt/utils/exceptions.hpp>
#include <xlnt/utils/numeric.hpp>
#include <detail/default_case.hpp>
#include <detail/number_format/number_formatter.hpp>
@ -622,7 +623,8 @@ void number_format_parser::parse()
value = token.string.substr(1);
}
section.condition.value = std::stod(value);
detail::number_serialiser ser;
section.condition.value = ser.deserialise(value);
break;
}
@ -1565,19 +1567,16 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
if (p.type == format_placeholders::placeholders_type::general
|| p.type == format_placeholders::placeholders_type::text)
{
result = std::to_string(number);
while (result.back() == '0')
auto s = serialiser_.serialise_short(number);
while (s.size() > 1 && s.back() == '0')
{
result.pop_back();
s.pop_back();
}
if (result.back() == '.')
if (s.back() == '.')
{
result.pop_back();
s.pop_back();
}
return result;
return s;
}
if (p.percentage)
@ -1636,21 +1635,22 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
auto fractional_part = number - integer_part;
result = std::fabs(fractional_part) < std::numeric_limits<double>::min()
? std::string(".")
: std::to_string(fractional_part).substr(1);
: serialiser_.serialise_short(fractional_part).substr(1);
while (result.back() == '0' || result.size() > (p.num_zeros + p.num_optionals + p.num_spaces + 1))
{
result.pop_back();
}
while (result.size() < p.num_zeros + 1)
if (result.size() < p.num_zeros + 1)
{
result.push_back('0');
result.resize(p.num_zeros + 1, '0');
}
while (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1)
if (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1)
{
result.push_back(' ');
result.resize(p.num_zeros + p.num_optionals + p.num_spaces + 1, ' ');
}
if (p.percentage)
@ -1689,13 +1689,7 @@ std::string number_formatter::fill_scientific_placeholders(const format_placehol
integer_string = std::string(integer_part.num_zeros + integer_part.num_optionals, '0');
}
std::string fractional_string = std::to_string(fraction).substr(1);
while (fractional_string.size() > fractional_part.num_zeros + fractional_part.num_optionals + 1)
{
fractional_string.pop_back();
}
std::string fractional_string = serialiser_.serialise_short(fraction).substr(1, fractional_part.num_zeros + fractional_part.num_optionals + 1);
std::string exponent_string = std::to_string(logarithm);
while (exponent_string.size() < fractional_part.num_zeros)

View File

@ -28,6 +28,7 @@
#include <vector>
#include <xlnt/utils/datetime.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xlnt {
namespace detail {
@ -691,6 +692,7 @@ private:
number_format_parser parser_;
std::vector<format_code> format_;
xlnt::calendar calendar_;
xlnt::detail::number_serialiser serialiser_;
};
} // namespace detail

View File

@ -0,0 +1,96 @@
#ifndef XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#define XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#include <xlnt/cell/cell_type.hpp>
#include <xlnt/cell/index_types.hpp>
#include <string>
namespace xlnt {
namespace detail {
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during (de)serialisation, so even minor overhead is noticable
struct Cell_Reference
{
// the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
// for sorting purposes
bool operator<(const Cell_Reference &rhs)
{
// row first, serialisation is done by row then column
if (row < rhs.row)
{
return true;
}
else if (rhs.row < row)
{
return false;
}
// same row, column comparison
return column < rhs.column;
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
// sort cells by location, row first
bool operator<(const Cell &rhs)
{
return ref < rhs.ref;
}
bool is_phonetic = false; // 'ph'
xlnt::cell_type type = xlnt::cell_type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
} // namespace detail
} // namespace xlnt
#endif

View File

@ -40,6 +40,7 @@
#include <detail/header_footer/header_footer_code.hpp>
#include <detail/implementations/workbook_impl.hpp>
#include <detail/serialization/custom_value_traits.hpp>
#include <detail/serialization/serialisation_helpers.hpp>
#include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_consumer.hpp>
#include <detail/serialization/zstream.hpp>
@ -127,74 +128,14 @@ void set_style_by_xfid(const std::vector<style_id_pair> &styles,
}
}
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during parsing, so even minor overhead is noticable
struct Cell_Reference
{
// not commonly used, added as the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
bool is_phonetic = false; // 'ph'
xlnt::cell::type type = xlnt::cell::type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
// <sheetData> element
struct Sheet_Data
{
std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
std::vector<Cell> parsed_cells;
std::vector<xlnt::detail::Cell> parsed_cells;
};
xlnt::cell::type type_from_string(const std::string &str)
xlnt::cell_type type_from_string(const std::string &str)
{
if (string_equal(str, "s"))
{
@ -223,14 +164,14 @@ xlnt::cell::type type_from_string(const std::string &str)
return xlnt::cell::type::shared_string;
}
Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
{
Cell c;
xlnt::detail::Cell c;
for (auto &attr : parser->attribute_map())
{
if (string_equal(attr.first.name(), "r"))
{
c.ref = Cell_Reference(row_arg, attr.second.value);
c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
}
else if (string_equal(attr.first.name(), "t"))
{
@ -251,7 +192,8 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
}
int level = 1; // nesting level
// 1 == <c>
// 2 == <v>/<is>/<f>
// 2 == <v>/<f>
// 3 == <is><t>
// exit loop at </c>
while (level > 0)
{
@ -272,7 +214,6 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
if (level == 2)
{
// <v> -> numeric values
// <is><t> -> inline string
if (string_equal(parser->name(), "v"))
{
c.value += std::move(parser->value());
@ -307,7 +248,7 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
}
// <row> inside <sheetData> element
std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<Cell> &parsed_cells)
std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells)
{
std::pair<xlnt::row_properties, int> props;
for (auto &attr : parser->attribute_map())

View File

@ -80,7 +80,9 @@ namespace detail {
xlsx_producer::xlsx_producer(const workbook &target)
: source_(target),
current_part_stream_(nullptr)
current_part_stream_(nullptr),
current_cell_(nullptr),
current_worksheet_(nullptr)
{
}
@ -918,8 +920,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
// todo: is there a more elegant way to get this number?
std::size_t string_count = 0;
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wrange-loop-analysis"
for (const auto ws : source_)
{
auto dimension = ws.calculate_dimension();
@ -929,8 +929,8 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
{
while (current_cell.column() <= dimension.bottom_right().column())
{
if (ws.has_cell(current_cell)
&& ws.cell(current_cell).data_type() == cell::type::shared_string)
auto c_iter = ws.d_->cell_map_.find(current_cell);
if (c_iter != ws.d_->cell_map_.end() && c_iter->second.type_ == cell_type::shared_string)
{
++string_count;
}
@ -942,7 +942,6 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
current_cell.column_index(dimension.top_left().column_index());
}
}
#pragma clang diagnostic pop
write_attribute("count", string_count);
write_attribute("uniqueCount", source_.shared_strings_by_id().size());
@ -2814,33 +2813,12 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{
write_start_element(xmlns, "pageMargins");
// TODO: there must be a better way to do this
auto remove_trailing_zeros = [](const std::string &n) -> std::string {
auto decimal = n.find('.');
if (decimal == std::string::npos) return n;
auto index = n.size() - 1;
while (index >= decimal && n[index] == '0')
{
index--;
}
if (index == decimal)
{
return n.substr(0, decimal);
}
return n.substr(0, index + 1);
};
write_attribute("left", remove_trailing_zeros(std::to_string(ws.page_margins().left())));
write_attribute("right", remove_trailing_zeros(std::to_string(ws.page_margins().right())));
write_attribute("top", remove_trailing_zeros(std::to_string(ws.page_margins().top())));
write_attribute("bottom", remove_trailing_zeros(std::to_string(ws.page_margins().bottom())));
write_attribute("header", remove_trailing_zeros(std::to_string(ws.page_margins().header())));
write_attribute("footer", remove_trailing_zeros(std::to_string(ws.page_margins().footer())));
write_attribute("left", ws.page_margins().left());
write_attribute("right", ws.page_margins().right());
write_attribute("top", ws.page_margins().top());
write_attribute("bottom", ws.page_margins().bottom());
write_attribute("header", ws.page_margins().header());
write_attribute("footer", ws.page_margins().footer());
write_end_element(xmlns, "pageMargins");
}

View File

@ -26,11 +26,12 @@
#include <cstdint>
#include <iostream>
#include <memory>
#include <type_traits>
#include <vector>
#include <xlnt/utils/numeric.hpp>
#include <detail/constants.hpp>
#include <detail/external/include_libstudxml.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xml {
class serializer;
@ -169,19 +170,34 @@ private:
void write_namespace(const std::string &ns, const std::string &prefix);
template<typename T>
// std::string attribute name
// not integer or float type
template <typename T, typename = typename std::enable_if<!std::is_convertible<T, double>::value>::type>
void write_attribute(const std::string &name, T value)
{
current_part_serializer_->attribute(name, value);
}
template<typename T>
void write_attribute(const std::string &name, double value)
{
current_part_serializer_->attribute(name, converter_.serialise(value));
}
// qname attribute name
// not integer or float type
template <typename T, typename = typename std::enable_if<!std::is_convertible<T, double>::value>::type>
void write_attribute(const xml::qname &name, T value)
{
current_part_serializer_->attribute(name, value);
}
template<typename T>
void write_attribute(const xml::qname &name, double value)
{
current_part_serializer_->attribute(name, converter_.serialise(value));
}
template <typename T>
void write_characters(T characters, bool preserve_whitespace = false)
{
if (preserve_whitespace)