Merge pull request #447 from Crzyrndm/feature/benchmark

microbenchmarks for double<->string conversion, serialisation improvements
This commit is contained in:
Thomas Fussell 2020-03-20 18:02:37 -04:00 committed by GitHub
commit 2f5934f60e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 670 additions and 163 deletions

View File

@ -41,3 +41,8 @@ foreach(BENCHMARK_SOURCE IN ITEMS ${BENCHMARK_SOURCES})
$<TARGET_FILE_DIR:${BENCHMARK_EXECUTABLE}>) $<TARGET_FILE_DIR:${BENCHMARK_EXECUTABLE}>)
endif() endif()
endforeach() endforeach()
option(XLNT_MICROBENCH_ENABLED "Enable small benchmarks typically used for development" OFF)
if (XLNT_MICROBENCH_ENABLED)
add_subdirectory(microbenchmarks)
endif()

View File

@ -0,0 +1,36 @@
# FetchContent added in cmake v3.11
# https://cmake.org/cmake/help/v3.11/module/FetchContent.html
# this file is behind a feature flag (XLNT_MICROBENCH_ENABLED) so the primary build is not affected
cmake_minimum_required(VERSION 3.11)
project(xlnt_ubench)
# acquire google benchmark dependency
# disable generation of the various test projects
set(BENCHMARK_ENABLE_TESTING OFF)
# gtest not required
set(BENCHMARK_ENABLE_GTEST_TESTS OFF)
include(FetchContent)
FetchContent_Declare(
googlebenchmark
GIT_REPOSITORY https://github.com/google/benchmark
GIT_TAG v1.5.0
)
# download if not already present
FetchContent_GetProperties(googlebenchmark)
if(NOT googlebenchmark_POPULATED)
FetchContent_Populate(googlebenchmark)
add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR})
endif()
# equivalent of add_subdirectory, now available for use
FetchContent_MakeAvailable(googlebenchmark)
add_executable(xlnt_ubench)
target_sources(xlnt_ubench
PRIVATE
string_to_double.cpp
double_to_string.cpp
)
target_link_libraries(xlnt_ubench benchmark_main xlnt)
target_compile_features(xlnt_ubench PRIVATE cxx_std_17)

View File

@ -0,0 +1,207 @@
// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them
// this has a few requirements
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf)
#include "benchmark/benchmark.h"
#include <locale>
#include <random>
#include <sstream>
namespace {
// setup a large quantity of random doubles as strings
template <bool Decimal_Locale = true>
class RandomFloats : public benchmark::Fixture
{
static constexpr size_t Number_of_Elements = 1 << 20;
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");
std::vector<double> inputs;
size_t index = 0;
const char *locale_str = nullptr;
public:
void SetUp(const ::benchmark::State &state)
{
if (Decimal_Locale)
{
locale_str = setlocale(LC_ALL, "C");
}
else
{
locale_str = setlocale(LC_ALL, "de-DE");
}
std::random_device rd; // obtain a seed for the random number engine
std::mt19937 gen(rd());
// doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers
// uniform is probably not the best distribution to use here, but it will do for now
std::uniform_real_distribution<double> dis(-1'000, 1'000);
// generate a large quantity of doubles to deserialise
inputs.reserve(Number_of_Elements);
for (int i = 0; i < Number_of_Elements; ++i)
{
double d = dis(gen);
inputs.push_back(d);
}
}
void TearDown(const ::benchmark::State &state)
{
// restore locale
setlocale(LC_ALL, locale_str);
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
inputs = std::vector<double>{};
}
double &get_rand()
{
return inputs[++index & (Number_of_Elements - 1)];
}
};
/// Takes in a double and outputs a string form of that number which will
/// serialise and deserialise without loss of precision
std::string serialize_number_to_string(double num)
{
// more digits and excel won't match
constexpr int Excel_Digit_Precision = 15; //sf
std::stringstream ss;
ss.precision(Excel_Digit_Precision);
ss << num;
return ss.str();
}
class number_serialiser
{
static constexpr int Excel_Digit_Precision = 15; //sf
std::ostringstream ss;
public:
explicit number_serialiser()
{
ss.precision(Excel_Digit_Precision);
ss.imbue(std::locale("C"));
}
std::string serialise(double d)
{
ss.str(""); // reset string buffer
ss.clear(); // reset any error flags
ss << d;
return ss.str();
}
};
class number_serialiser_mk2
{
static constexpr int Excel_Digit_Precision = 15; //sf
bool should_convert_comma;
void convert_comma(char *buf, int len)
{
char *buf_end = buf + len;
char *decimal = std::find(buf, buf_end, ',');
if (decimal != buf_end)
{
*decimal = '.';
}
}
public:
explicit number_serialiser_mk2()
: should_convert_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
{
}
std::string serialise(double d)
{
char buf[Excel_Digit_Precision + 1]; // need space for trailing '\0'
int len = snprintf(buf, sizeof(buf), "%.15g", d);
if (should_convert_comma)
{
convert_comma(buf, len);
}
return std::string(buf, len);
}
};
using RandFloats = RandomFloats<true>;
using RandFloatsComma = RandomFloats<false>;
} // namespace
BENCHMARK_F(RandFloats, string_from_double_sstream)
(benchmark::State &state)
{
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
serialize_number_to_string(get_rand()));
}
}
BENCHMARK_F(RandFloats, string_from_double_sstream_cached)
(benchmark::State &state)
{
number_serialiser ser;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
ser.serialise(get_rand()));
}
}
BENCHMARK_F(RandFloats, string_from_double_snprintf)
(benchmark::State &state)
{
while (state.KeepRunning())
{
char buf[16];
int len = snprintf(buf, sizeof(buf), "%.15g", get_rand());
benchmark::DoNotOptimize(
std::string(buf, len));
}
}
BENCHMARK_F(RandFloats, string_from_double_snprintf_fixed)
(benchmark::State &state)
{
number_serialiser_mk2 ser;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
ser.serialise(get_rand()));
}
}
// locale names are different between OS's, and std::from_chars is only complete in MSVC
#ifdef _MSC_VER
#include <charconv>
BENCHMARK_F(RandFloats, string_from_double_std_to_chars)
(benchmark::State &state)
{
while (state.KeepRunning())
{
char buf[16];
std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand());
benchmark::DoNotOptimize(
std::string(buf, result.ptr));
}
}
BENCHMARK_F(RandFloatsComma, string_from_double_snprintf_fixed_comma)
(benchmark::State &state)
{
number_serialiser_mk2 ser;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
ser.serialise(get_rand()));
}
}
#endif

View File

@ -0,0 +1,223 @@
// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double
// this has a few requirements
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf)
#include <benchmark/benchmark.h>
#include <locale>
#include <random>
#include <sstream>
namespace {
// setup a large quantity of random doubles as strings
template <bool Decimal_Locale = true>
class RandomFloatStrs : public benchmark::Fixture
{
static constexpr size_t Number_of_Elements = 1 << 20;
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");
std::vector<std::string> inputs;
size_t index = 0;
const char *locale_str = nullptr;
public:
void SetUp(const ::benchmark::State &state)
{
if (Decimal_Locale)
{
locale_str = setlocale(LC_ALL, "C");
}
else
{
locale_str = setlocale(LC_ALL, "de-DE");
}
std::random_device rd; // obtain a seed for the random number engine
std::mt19937 gen(rd());
// doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers
// uniform is probably not the best distribution to use here, but it will do for now
std::uniform_real_distribution<double> dis(-1'000, 1'000);
// generate a large quantity of doubles to deserialise
inputs.reserve(Number_of_Elements);
for (int i = 0; i < Number_of_Elements; ++i)
{
double d = dis(gen);
char buf[16];
snprintf(buf, 16, "%.15f", d);
inputs.push_back(std::string(buf));
}
}
void TearDown(const ::benchmark::State &state)
{
// restore locale
setlocale(LC_ALL, locale_str);
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
inputs = std::vector<std::string>{};
}
std::string &get_rand()
{
return inputs[++index & (Number_of_Elements - 1)];
}
};
// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf
// std::istringstream with the locale set to "C"
struct number_converter
{
number_converter()
{
stream.imbue(std::locale("C"));
}
double stold(const std::string &s)
{
stream.str(s);
stream.clear();
stream >> result;
return result;
}
std::istringstream stream;
double result;
};
// to resolve the locale issue with strtod, a little preprocessing of the input is required
struct number_converter_mk2
{
explicit number_converter_mk2()
: should_convert_to_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
{
}
double stold(std::string &s) const noexcept
{
assert(!s.empty());
if (should_convert_to_comma)
{
auto decimal_pt = std::find(s.begin(), s.end(), '.');
if (decimal_pt != s.end())
{
*decimal_pt = ',';
}
}
return strtod(s.c_str(), nullptr);
}
double stold(const std::string &s) const
{
assert(!s.empty());
if (!should_convert_to_comma)
{
return strtod(s.c_str(), nullptr);
}
std::string copy(s);
auto decimal_pt = std::find(copy.begin(), copy.end(), '.');
if (decimal_pt != copy.end())
{
*decimal_pt = ',';
}
return strtod(copy.c_str(), nullptr);
}
private:
bool should_convert_to_comma = false;
};
using RandFloatStrs = RandomFloatStrs<true>;
// german locale uses ',' as the seperator
using RandFloatCommaStrs = RandomFloatStrs<false>;
} // namespace
BENCHMARK_F(RandFloatStrs, double_from_string_sstream)
(benchmark::State &state)
{
number_converter converter;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
converter.stold(get_rand()));
}
}
// using strotod
// https://en.cppreference.com/w/cpp/string/byte/strtof
// this naive usage is broken in the face of locales (fails condition 1)
#include <cstdlib>
BENCHMARK_F(RandFloatStrs, double_from_string_strtod)
(benchmark::State &state)
{
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
strtod(get_rand().c_str(), nullptr));
}
}
BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed)
(benchmark::State &state)
{
number_converter_mk2 converter;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
converter.stold(get_rand()));
}
}
BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
while (state.KeepRunning())
{
const std::string &inp = get_rand();
benchmark::DoNotOptimize(
converter.stold(inp));
}
}
// locale names are different between OS's, and std::from_chars is only complete in MSVC
#ifdef _MSC_VER
#include <charconv>
BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars)
(benchmark::State &state)
{
while (state.KeepRunning())
{
const std::string &input = get_rand();
double output;
benchmark::DoNotOptimize(
std::from_chars(input.data(), input.data() + input.size(), output));
}
}
// not using the standard "C" locale with '.' seperator
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
while (state.KeepRunning())
{
benchmark::DoNotOptimize(
converter.stold(get_rand()));
}
}
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref)
(benchmark::State &state)
{
number_converter_mk2 converter;
while (state.KeepRunning())
{
const std::string &inp = get_rand();
benchmark::DoNotOptimize(
converter.stold(inp));
}
}
#endif

View File

@ -5,7 +5,7 @@
namespace { namespace {
using milliseconds_d = std::chrono::duration<double, std::milli>; using milliseconds_d = std::chrono::duration<double, std::milli>;
void run_test(const xlnt::path &file, int runs = 10) void run_load_test(const xlnt::path &file, int runs = 10)
{ {
std::cout << file.string() << "\n\n"; std::cout << file.string() << "\n\n";
@ -24,10 +24,35 @@ void run_test(const xlnt::path &file, int runs = 10)
std::cout << milliseconds_d(test_timings.back()).count() << " ms\n"; std::cout << milliseconds_d(test_timings.back()).count() << " ms\n";
} }
} }
void run_save_test(const xlnt::path &file, int runs = 10)
{
std::cout << file.string() << "\n\n";
xlnt::workbook wb;
wb.load(file);
const xlnt::path save_path(file.filename());
std::vector<std::chrono::steady_clock::duration> test_timings;
for (int i = 0; i < runs; ++i)
{
auto start = std::chrono::steady_clock::now();
wb.save(save_path);
auto end = std::chrono::steady_clock::now();
test_timings.push_back(end - start);
std::cout << milliseconds_d(test_timings.back()).count() << " ms\n";
}
}
} // namespace } // namespace
int main() int main()
{ {
run_test(path_helper::benchmark_file("large.xlsx")); run_load_test(path_helper::benchmark_file("large.xlsx"));
run_test(path_helper::benchmark_file("very_large.xlsx")); run_load_test(path_helper::benchmark_file("very_large.xlsx"));
run_save_test(path_helper::benchmark_file("large.xlsx"));
run_save_test(path_helper::benchmark_file("very_large.xlsx"));
} }

View File

@ -92,26 +92,11 @@ public:
/// </summary> /// </summary>
column_t(const char *column_string); column_t(const char *column_string);
/// <summary>
/// Copy constructor. Constructs a column by copying it from other.
/// </summary>
column_t(const column_t &other);
/// <summary>
/// Move constructor. Constructs a column by moving it from other.
/// </summary>
column_t(column_t &&other);
/// <summary> /// <summary>
/// Returns a string representation of this column index. /// Returns a string representation of this column index.
/// </summary> /// </summary>
std::string column_string() const; std::string column_string() const;
/// <summary>
/// Sets this column to be the same as rhs's and return reference to self.
/// </summary>
column_t &operator=(column_t rhs);
/// <summary> /// <summary>
/// Sets this column to be equal to rhs and return reference to self. /// Sets this column to be equal to rhs and return reference to self.
/// </summary> /// </summary>

View File

@ -34,21 +34,6 @@
namespace xlnt { namespace xlnt {
namespace detail { namespace detail {
/// <summary>
/// Takes in any number and outputs a string form of that number which will
/// serialise and deserialise without loss of precision
/// </summary>
template <typename Number>
std::string serialize_number_to_string(Number num)
{
// more digits and excel won't match
constexpr int Excel_Digit_Precision = 15; //sf
std::stringstream ss;
ss.precision(Excel_Digit_Precision);
ss << num;
return ss.str();
}
/// <summary> /// <summary>
/// constexpr abs /// constexpr abs
/// </summary> /// </summary>
@ -117,45 +102,72 @@ bool float_equals(const LNumber &lhs, const RNumber &rhs,
return ((lhs + scaled_fuzz) >= rhs) && ((rhs + scaled_fuzz) >= lhs); return ((lhs + scaled_fuzz) >= rhs) && ((rhs + scaled_fuzz) >= lhs);
} }
struct number_converter class number_serialiser
{ {
explicit number_converter() static constexpr int Excel_Digit_Precision = 15; //sf
: should_convert_to_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',') bool should_convert_comma;
static void convert_comma_to_pt(char *buf, int len)
{
char *buf_end = buf + len;
char *decimal = std::find(buf, buf_end, ',');
if (decimal != buf_end)
{
*decimal = '.';
}
}
static void convert_pt_to_comma(char *buf, size_t len)
{
char *buf_end = buf + len;
char *decimal = std::find(buf, buf_end, '.');
if (decimal != buf_end)
{
*decimal = ',';
}
}
public:
explicit number_serialiser()
: should_convert_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
{ {
} }
double stold(std::string &s) const noexcept std::string serialise(double d) const
{
char buf[30];
int len = snprintf(buf, sizeof(buf), "%.15g", d);
if (should_convert_comma)
{
convert_comma_to_pt(buf, len);
}
return std::string(buf, static_cast<size_t>(len));
}
double deserialise(std::string &s) const noexcept
{ {
assert(!s.empty()); assert(!s.empty());
if (should_convert_to_comma) if (should_convert_comma)
{ {
auto decimal_pt = std::find(s.begin(), s.end(), '.'); // s.data() doesn't have a non-const overload until c++17, hence this little dance
if (decimal_pt != s.end()) convert_pt_to_comma(&s[0], s.size());
{
*decimal_pt = ',';
}
} }
return strtod(s.c_str(), nullptr); return strtod(s.c_str(), nullptr);
} }
double stold(const std::string &s) const double deserialise(const std::string &s) const
{ {
assert(!s.empty()); assert(!s.empty());
if (!should_convert_to_comma) if (!should_convert_comma)
{ {
return strtod(s.c_str(), nullptr); return strtod(s.c_str(), nullptr);
} }
std::string copy(s); char buf[30];
auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); assert(s.size() < sizeof(buf));
if (decimal_pt != copy.end()) auto copy_end = std::copy(s.begin(), s.end(), buf);
{ convert_pt_to_comma(buf, static_cast<size_t>(copy_end - buf));
*decimal_pt = ','; return strtod(buf, nullptr);
}
return strtod(copy.c_str(), nullptr);
} }
private:
bool should_convert_to_comma = false;
}; };
} // namespace detail } // namespace detail

View File

@ -69,8 +69,6 @@ public:
range_reference(column_t column_index_start, row_t row_index_start, range_reference(column_t column_index_start, row_t row_index_start,
column_t column_index_end, row_t row_index_end); column_t column_index_end, row_t row_index_end);
range_reference(const range_reference &ref);
/// <summary> /// <summary>
/// Returns true if the range has a width and height of 1 cell. /// Returns true if the range has a width and height of 1 cell.
/// </summary> /// </summary>
@ -151,11 +149,6 @@ public:
/// </summary> /// </summary>
bool operator!=(const char *reference_string) const; bool operator!=(const char *reference_string) const;
/// <summary>
/// Assigns the extents of the provided range to this range.
/// </summary>
range_reference &operator=(const range_reference &ref);
private: private:
/// <summary> /// <summary>
/// The top left cell in the range /// The top left cell in the range

View File

@ -199,7 +199,7 @@ cell::cell(detail::cell_impl *d)
bool cell::garbage_collectible() const bool cell::garbage_collectible() const
{ {
return !(has_value() || is_merged() || phonetics_visible() || has_formula() || has_format() || has_hyperlink()); return d_->is_garbage_collectible();
} }
void cell::value(std::nullptr_t) void cell::value(std::nullptr_t)

View File

@ -109,27 +109,11 @@ column_t::column_t(const char *column_string)
{ {
} }
column_t::column_t(const column_t &other)
: column_t(other.index)
{
}
column_t::column_t(column_t &&other)
{
swap(*this, other);
}
std::string column_t::column_string() const std::string column_t::column_string() const
{ {
return column_string_from_index(index); return column_string_from_index(index);
} }
column_t &column_t::operator=(column_t rhs)
{
swap(*this, rhs);
return *this;
}
column_t &column_t::operator=(const std::string &rhs) column_t &column_t::operator=(const std::string &rhs)
{ {
return *this = column_t(rhs); return *this = column_t(rhs);

View File

@ -27,7 +27,7 @@
namespace xlnt { namespace xlnt {
namespace detail { namespace detail {
std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::string &hf_string) std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser)
{ {
std::array<xlnt::optional<xlnt::rich_text>, 3> result; std::array<xlnt::optional<xlnt::rich_text>, 3> result;
@ -216,7 +216,7 @@ std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::s
tokens.push_back(token); tokens.push_back(token);
} }
const auto parse_section = [&tokens, &result](hf_code code) { const auto parse_section = [&tokens, &result, &serialiser](hf_code code) {
std::vector<hf_code> end_codes{hf_code::left_section, hf_code::center_section, hf_code::right_section}; std::vector<hf_code> end_codes{hf_code::left_section, hf_code::center_section, hf_code::right_section};
end_codes.erase(std::find(end_codes.begin(), end_codes.end(), code)); end_codes.erase(std::find(end_codes.begin(), end_codes.end(), code));
@ -297,7 +297,7 @@ std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::s
current_run.second = xlnt::font(); current_run.second = xlnt::font();
} }
current_run.second.get().size(std::stod(current_token.value)); current_run.second.get().size(serialiser.deserialise(current_token.value));
break; break;
} }
@ -460,7 +460,7 @@ std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::s
return result; return result;
} }
std::string encode_header_footer(const rich_text &t, header_footer::location where) std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser &serialiser)
{ {
const auto location_code_map = const auto location_code_map =
std::unordered_map<header_footer::location, std::unordered_map<header_footer::location,
@ -505,7 +505,7 @@ std::string encode_header_footer(const rich_text &t, header_footer::location whe
if (run.second.get().has_size()) if (run.second.get().has_size())
{ {
encoded.push_back('&'); encoded.push_back('&');
encoded.append(serialize_number_to_string(run.second.get().size())); encoded.append(serialiser.serialise(run.second.get().size()));
} }
if (run.second.get().underlined()) if (run.second.get().underlined())
{ {

View File

@ -27,12 +27,13 @@
#include <xlnt/cell/rich_text.hpp> #include <xlnt/cell/rich_text.hpp>
#include <xlnt/utils/optional.hpp> #include <xlnt/utils/optional.hpp>
#include <xlnt/worksheet/header_footer.hpp> #include <xlnt/worksheet/header_footer.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xlnt { namespace xlnt {
namespace detail { namespace detail {
std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::string &hf_string); std::array<xlnt::optional<xlnt::rich_text>, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser);
std::string encode_header_footer(const rich_text &t, header_footer::location where); std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser& serialiser);
} // namespace detail } // namespace detail
} // namespace xlnt } // namespace xlnt

View File

@ -65,6 +65,11 @@ struct cell_impl
optional<hyperlink_impl> hyperlink_; optional<hyperlink_impl> hyperlink_;
optional<format_impl *> format_; optional<format_impl *> format_;
optional<comment *> comment_; optional<comment *> comment_;
bool is_garbage_collectible() const
{
return !(type_ != cell_type::empty || is_merged_ || phonetics_visible_ || formula_.is_set() || format_.is_set() || hyperlink_.is_set());
}
}; };
inline bool operator==(const cell_impl &lhs, const cell_impl &rhs) inline bool operator==(const cell_impl &lhs, const cell_impl &rhs)

View File

@ -307,14 +307,14 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
} }
// <row> inside <sheetData> element // <row> inside <sheetData> element
std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_converter &converter, std::vector<Cell> &parsed_cells) std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<Cell> &parsed_cells)
{ {
std::pair<xlnt::row_properties, int> props; std::pair<xlnt::row_properties, int> props;
for (auto &attr : parser->attribute_map()) for (auto &attr : parser->attribute_map())
{ {
if (string_equal(attr.first.name(), "dyDescent")) if (string_equal(attr.first.name(), "dyDescent"))
{ {
props.first.dy_descent = converter.stold(attr.second.value); props.first.dy_descent = converter.deserialise(attr.second.value);
} }
else if (string_equal(attr.first.name(), "spans")) else if (string_equal(attr.first.name(), "spans"))
{ {
@ -322,7 +322,7 @@ std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail
} }
else if (string_equal(attr.first.name(), "ht")) else if (string_equal(attr.first.name(), "ht"))
{ {
props.first.height = converter.stold(attr.second.value); props.first.height = converter.deserialise(attr.second.value);
} }
else if (string_equal(attr.first.name(), "s")) else if (string_equal(attr.first.name(), "s"))
{ {
@ -382,7 +382,7 @@ std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail
} }
// <sheetData> inside <worksheet> element // <sheetData> inside <worksheet> element
Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_converter &converter) Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter)
{ {
Sheet_Data sheet_data; Sheet_Data sheet_data;
int level = 1; // nesting level int level = 1; // nesting level
@ -480,7 +480,7 @@ cell xlsx_consumer::read_cell()
if (parser().attribute_present("ht")) if (parser().attribute_present("ht"))
{ {
row_properties.height = converter_.stold(parser().attribute("ht")); row_properties.height = converter_.deserialise(parser().attribute("ht"));
} }
if (parser().attribute_present("customHeight")) if (parser().attribute_present("customHeight"))
@ -495,7 +495,7 @@ cell xlsx_consumer::read_cell()
if (parser().attribute_present(qn("x14ac", "dyDescent"))) if (parser().attribute_present(qn("x14ac", "dyDescent")))
{ {
row_properties.dy_descent = converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
} }
if (parser().attribute_present("spans")) if (parser().attribute_present("spans"))
@ -602,7 +602,7 @@ cell xlsx_consumer::read_cell()
} }
else if (type == "s") else if (type == "s")
{ {
cell.d_->value_numeric_ = converter_.stold(value_string); cell.d_->value_numeric_ = converter_.deserialise(value_string);
cell.data_type(cell::type::shared_string); cell.data_type(cell::type::shared_string);
} }
else if (type == "b") // boolean else if (type == "b") // boolean
@ -611,7 +611,7 @@ cell xlsx_consumer::read_cell()
} }
else if (type == "n") // numeric else if (type == "n") // numeric
{ {
cell.value(converter_.stold(value_string)); cell.value(converter_.deserialise(value_string));
} }
else if (!value_string.empty() && value_string[0] == '#') else if (!value_string.empty() && value_string[0] == '#')
{ {
@ -863,23 +863,23 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
if (parser().attribute_present("baseColWidth")) if (parser().attribute_present("baseColWidth"))
{ {
ws.d_->format_properties_.base_col_width = ws.d_->format_properties_.base_col_width =
converter_.stold(parser().attribute("baseColWidth")); converter_.deserialise(parser().attribute("baseColWidth"));
} }
if (parser().attribute_present("defaultColWidth")) if (parser().attribute_present("defaultColWidth"))
{ {
ws.d_->format_properties_.default_column_width = ws.d_->format_properties_.default_column_width =
converter_.stold(parser().attribute("defaultColWidth")); converter_.deserialise(parser().attribute("defaultColWidth"));
} }
if (parser().attribute_present("defaultRowHeight")) if (parser().attribute_present("defaultRowHeight"))
{ {
ws.d_->format_properties_.default_row_height = ws.d_->format_properties_.default_row_height =
converter_.stold(parser().attribute("defaultRowHeight")); converter_.deserialise(parser().attribute("defaultRowHeight"));
} }
if (parser().attribute_present(qn("x14ac", "dyDescent"))) if (parser().attribute_present(qn("x14ac", "dyDescent")))
{ {
ws.d_->format_properties_.dy_descent = ws.d_->format_properties_.dy_descent =
converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
} }
skip_attributes(); skip_attributes();
@ -899,7 +899,7 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
optional<double> width = [this](xml::parser &p) -> xlnt::optional<double> { optional<double> width = [this](xml::parser &p) -> xlnt::optional<double> {
if (p.attribute_present("width")) if (p.attribute_present("width"))
{ {
return (converter_.stold(p.attribute("width")) * 7 - 5) / 7; return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7;
} }
return xlnt::optional<double>(); return xlnt::optional<double>();
}(parser()); }(parser());
@ -1000,7 +1000,7 @@ void xlsx_consumer::read_worksheet_sheetdata()
case cell::type::empty: case cell::type::empty:
case cell::type::number: case cell::type::number:
case cell::type::date: { case cell::type::date: {
ws_cell_impl->value_numeric_ = converter_.stold(cell.value); ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value);
break; break;
} }
case cell::type::shared_string: { case cell::type::shared_string: {
@ -1196,12 +1196,12 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
{ {
page_margins margins; page_margins margins;
margins.top(converter_.stold(parser().attribute("top"))); margins.top(converter_.deserialise(parser().attribute("top")));
margins.bottom(converter_.stold(parser().attribute("bottom"))); margins.bottom(converter_.deserialise(parser().attribute("bottom")));
margins.left(converter_.stold(parser().attribute("left"))); margins.left(converter_.deserialise(parser().attribute("left")));
margins.right(converter_.stold(parser().attribute("right"))); margins.right(converter_.deserialise(parser().attribute("right")));
margins.header(converter_.stold(parser().attribute("header"))); margins.header(converter_.deserialise(parser().attribute("header")));
margins.footer(converter_.stold(parser().attribute("footer"))); margins.footer(converter_.deserialise(parser().attribute("footer")));
ws.page_margins(margins); ws.page_margins(margins);
} }
@ -1251,27 +1251,27 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
if (current_hf_element == qn("spreadsheetml", "oddHeader")) if (current_hf_element == qn("spreadsheetml", "oddHeader"))
{ {
odd_header = decode_header_footer(read_text()); odd_header = decode_header_footer(read_text(), converter_);
} }
else if (current_hf_element == qn("spreadsheetml", "oddFooter")) else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
{ {
odd_footer = decode_header_footer(read_text()); odd_footer = decode_header_footer(read_text(), converter_);
} }
else if (current_hf_element == qn("spreadsheetml", "evenHeader")) else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
{ {
even_header = decode_header_footer(read_text()); even_header = decode_header_footer(read_text(), converter_);
} }
else if (current_hf_element == qn("spreadsheetml", "evenFooter")) else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
{ {
even_footer = decode_header_footer(read_text()); even_footer = decode_header_footer(read_text(), converter_);
} }
else if (current_hf_element == qn("spreadsheetml", "firstHeader")) else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
{ {
first_header = decode_header_footer(read_text()); first_header = decode_header_footer(read_text(), converter_);
} }
else if (current_hf_element == qn("spreadsheetml", "firstFooter")) else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
{ {
first_footer = decode_header_footer(read_text()); first_footer = decode_header_footer(read_text(), converter_);
} }
else else
{ {
@ -2308,7 +2308,7 @@ void xlsx_consumer::read_stylesheet()
while (in_element(qn("spreadsheetml", "gradientFill"))) while (in_element(qn("spreadsheetml", "gradientFill")))
{ {
expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex); expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex);
auto position = converter_.stold(parser().attribute("position")); auto position = converter_.deserialise(parser().attribute("position"));
expect_start_element(qn("spreadsheetml", "color"), xml::content::complex); expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
auto color = read_color(); auto color = read_color();
expect_end_element(qn("spreadsheetml", "color")); expect_end_element(qn("spreadsheetml", "color"));
@ -2356,7 +2356,7 @@ void xlsx_consumer::read_stylesheet()
if (font_property_element == qn("spreadsheetml", "sz")) if (font_property_element == qn("spreadsheetml", "sz"))
{ {
new_font.size(converter_.stold(parser().attribute("val"))); new_font.size(converter_.deserialise(parser().attribute("val")));
} }
else if (font_property_element == qn("spreadsheetml", "name")) else if (font_property_element == qn("spreadsheetml", "name"))
{ {
@ -3169,7 +3169,7 @@ rich_text xlsx_consumer::read_rich_text(const xml::qname &parent)
if (current_run_property_element == xml::qname(xmlns, "sz")) if (current_run_property_element == xml::qname(xmlns, "sz"))
{ {
run.second.get().size(converter_.stold(parser().attribute("val"))); run.second.get().size(converter_.deserialise(parser().attribute("val")));
} }
else if (current_run_property_element == xml::qname(xmlns, "rFont")) else if (current_run_property_element == xml::qname(xmlns, "rFont"))
{ {
@ -3307,7 +3307,7 @@ xlnt::color xlsx_consumer::read_color()
if (parser().attribute_present("tint")) if (parser().attribute_present("tint"))
{ {
result.tint(converter_.stold(parser().attribute("tint"))); result.tint(converter_.deserialise(parser().attribute("tint")));
} }
return result; return result;

View File

@ -416,7 +416,7 @@ private:
detail::cell_impl *current_cell_; detail::cell_impl *current_cell_;
detail::worksheet_impl *current_worksheet_; detail::worksheet_impl *current_worksheet_;
number_converter converter_; number_serialiser converter_;
}; };
} // namespace detail } // namespace detail

View File

@ -2267,16 +2267,18 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{ {
write_attribute("enableFormatConditionsCalculation", props.enable_format_condition_calculation.get()); write_attribute("enableFormatConditionsCalculation", props.enable_format_condition_calculation.get());
} }
// outlinePr is optional in the spec but is being written every time?
write_start_element(xmlns, "outlinePr"); write_start_element(xmlns, "outlinePr");
write_attribute("summaryBelow", "1"); write_attribute("summaryBelow", "1");
write_attribute("summaryRight", "1"); write_attribute("summaryRight", "1");
write_end_element(xmlns, "outlinePr"); write_end_element(xmlns, "outlinePr");
write_start_element(xmlns, "pageSetUpPr"); if (ws.has_page_setup())
write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page())); {
write_end_element(xmlns, "pageSetUpPr"); write_start_element(xmlns, "pageSetUpPr");
write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page()));
write_end_element(xmlns, "pageSetUpPr");
}
write_end_element(xmlns, "sheetPr"); write_end_element(xmlns, "sheetPr");
} }
@ -2418,7 +2420,7 @@ void xlsx_producer::write_worksheet(const relationship &rel)
if (props.width.is_set()) if (props.width.is_set())
{ {
double width = (props.width.get() * 7 + 5) / 7; double width = (props.width.get() * 7 + 5) / 7;
write_attribute("width", serialize_number_to_string(width)); write_attribute("width", converter_.serialise(width));
} }
if (props.best_fit) if (props.best_fit)
@ -2481,12 +2483,19 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{ {
for (auto column = dimension.top_left().column(); column <= dimension.bottom_right().column(); ++column) for (auto column = dimension.top_left().column(); column <= dimension.bottom_right().column(); ++column)
{ {
if (!ws.has_cell(cell_reference(column, check_row))) continue; auto ref = cell_reference(column, check_row);
auto cell = ws.cell(cell_reference(column, check_row)); auto cell = ws.d_->cell_map_.find(ref);
if (cell.garbage_collectible()) continue; if (cell == ws.d_->cell_map_.end())
{
continue;
}
if (cell->second.is_garbage_collectible())
{
continue;
}
first_block_column = std::min(first_block_column, cell.column()); first_block_column = std::min(first_block_column, cell->second.column_);
last_block_column = std::max(last_block_column, cell.column()); last_block_column = std::max(last_block_column, cell->second.column_);
if (row == check_row) if (row == check_row)
{ {
@ -2520,7 +2529,7 @@ void xlsx_producer::write_worksheet(const relationship &rel)
if (props.height.is_set()) if (props.height.is_set())
{ {
auto height = props.height.get(); auto height = props.height.get();
write_attribute("ht", serialize_number_to_string(height)); write_attribute("ht", converter_.serialise(height));
} }
if (props.hidden) if (props.hidden)
@ -2647,7 +2656,7 @@ void xlsx_producer::write_worksheet(const relationship &rel)
case cell::type::number: case cell::type::number:
write_start_element(xmlns, "v"); write_start_element(xmlns, "v");
write_characters(serialize_number_to_string(cell.value<double>())); write_characters(converter_.serialise(cell.value<double>()));
write_end_element(xmlns, "v"); write_end_element(xmlns, "v");
break; break;
@ -2884,26 +2893,26 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{ {
if (hf.has_odd_even_header(location)) if (hf.has_odd_even_header(location))
{ {
odd_header.append(encode_header_footer(hf.odd_header(location), location)); odd_header.append(encode_header_footer(hf.odd_header(location), location, converter_));
even_header.append(encode_header_footer(hf.even_header(location), location)); even_header.append(encode_header_footer(hf.even_header(location), location, converter_));
} }
if (hf.has_odd_even_footer(location)) if (hf.has_odd_even_footer(location))
{ {
odd_footer.append(encode_header_footer(hf.odd_footer(location), location)); odd_footer.append(encode_header_footer(hf.odd_footer(location), location, converter_));
even_footer.append(encode_header_footer(hf.even_footer(location), location)); even_footer.append(encode_header_footer(hf.even_footer(location), location, converter_));
} }
} }
else else
{ {
if (hf.has_header(location)) if (hf.has_header(location))
{ {
odd_header.append(encode_header_footer(hf.header(location), location)); odd_header.append(encode_header_footer(hf.header(location), location, converter_));
} }
if (hf.has_footer(location)) if (hf.has_footer(location))
{ {
odd_footer.append(encode_header_footer(hf.footer(location), location)); odd_footer.append(encode_header_footer(hf.footer(location), location, converter_));
} }
} }
@ -2911,12 +2920,12 @@ void xlsx_producer::write_worksheet(const relationship &rel)
{ {
if (hf.has_first_page_header(location)) if (hf.has_first_page_header(location))
{ {
first_header.append(encode_header_footer(hf.first_page_header(location), location)); first_header.append(encode_header_footer(hf.first_page_header(location), location, converter_));
} }
if (hf.has_first_page_footer(location)) if (hf.has_first_page_footer(location))
{ {
first_footer.append(encode_header_footer(hf.first_page_footer(location), location)); first_footer.append(encode_header_footer(hf.first_page_footer(location), location, converter_));
} }
} }
} }
@ -3383,7 +3392,7 @@ void xlsx_producer::write_color(const xlnt::color &color)
} }
if (color.has_tint()) if (color.has_tint())
{ {
write_attribute("tint", serialize_number_to_string(color.tint())); write_attribute("tint", converter_.serialise(color.tint()));
} }
} }

View File

@ -30,6 +30,7 @@
#include <detail/constants.hpp> #include <detail/constants.hpp>
#include <detail/external/include_libstudxml.hpp> #include <detail/external/include_libstudxml.hpp>
#include <xlnt/utils/numeric.hpp>
namespace xml { namespace xml {
class serializer; class serializer;
@ -208,6 +209,7 @@ private:
detail::cell_impl *current_cell_; detail::cell_impl *current_cell_;
detail::worksheet_impl *current_worksheet_; detail::worksheet_impl *current_worksheet_;
detail::number_serialiser converter_;
}; };
} // namespace detail } // namespace detail

View File

@ -46,12 +46,6 @@ range_reference::range_reference(const char *range_string)
{ {
} }
range_reference::range_reference(const range_reference &ref)
{
top_left_ = ref.top_left_;
bottom_right_ = ref.bottom_right_;
}
range_reference::range_reference(const std::string &range_string) range_reference::range_reference(const std::string &range_string)
: top_left_("A1"), bottom_right_("A1") : top_left_("A1"), bottom_right_("A1")
{ {
@ -183,11 +177,4 @@ XLNT_API bool operator!=(const char *reference_string, const range_reference &re
return ref != reference_string; return ref != reference_string;
} }
range_reference &range_reference::operator=(const range_reference &ref)
{
top_left_ = ref.top_left_;
bottom_right_ = ref.bottom_right_;
return *this;
}
} // namespace xlnt } // namespace xlnt

View File

@ -579,8 +579,40 @@ column_t worksheet::highest_column_or_props() const
range_reference worksheet::calculate_dimension() const range_reference worksheet::calculate_dimension() const
{ {
return range_reference(lowest_column(), lowest_row_or_props(), // partially optimised version of:
highest_column(), highest_row_or_props()); // return range_reference(lowest_column(), lowest_row_or_props(),
// highest_column(), highest_row_or_props());
//
if (d_->cell_map_.empty() && d_->row_properties_.empty())
{
return range_reference(constants::min_column(), constants::min_row(),
constants::min_column(), constants::min_row());
}
row_t min_row_prop = constants::max_row();
row_t max_row_prop = constants::min_row();
for (const auto &row_prop : d_->row_properties_)
{
min_row_prop = std::min(min_row_prop, row_prop.first);
max_row_prop = std::max(max_row_prop, row_prop.first);
}
if (d_->cell_map_.empty())
{
return range_reference(constants::min_column(), min_row_prop,
constants::min_column(), max_row_prop);
}
// find min and max row/column in cell map
column_t min_col = constants::max_column();
column_t max_col = constants::min_column();
row_t min_row = min_row_prop;
row_t max_row = max_row_prop;
for (auto &c : d_->cell_map_)
{
min_col = std::min(min_col, c.second.column_);
max_col = std::max(max_col, c.second.column_);
min_row = std::min(min_row, c.second.row_);
max_row = std::max(max_row, c.second.row_);
}
return range_reference(min_col, min_row, max_col, max_row);
} }
range worksheet::range(const std::string &reference_string) range worksheet::range(const std::string &reference_string)

View File

@ -40,17 +40,18 @@ public:
void test_serialise_number() void test_serialise_number()
{ {
xlnt::detail::number_serialiser serialiser;
// excel serialises numbers as floating point values with <= 15 digits of precision // excel serialises numbers as floating point values with <= 15 digits of precision
xlnt_assert(xlnt::detail::serialize_number_to_string(1) == "1"); xlnt_assert(serialiser.serialise(1) == "1");
// trailing zeroes are ignored // trailing zeroes are ignored
xlnt_assert(xlnt::detail::serialize_number_to_string(1.0) == "1"); xlnt_assert(serialiser.serialise(1.0) == "1");
xlnt_assert(xlnt::detail::serialize_number_to_string(1.0f) == "1"); xlnt_assert(serialiser.serialise(1.0f) == "1");
// one to 1 relation // one to 1 relation
xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456) == "1.23456"); xlnt_assert(serialiser.serialise(1.23456) == "1.23456");
xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345) == "1.23456789012345"); xlnt_assert(serialiser.serialise(1.23456789012345) == "1.23456789012345");
xlnt_assert(xlnt::detail::serialize_number_to_string(123456.789012345) == "123456.789012345"); xlnt_assert(serialiser.serialise(123456.789012345) == "123456.789012345");
xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e+67) == "1.23456789012345e+67"); xlnt_assert(serialiser.serialise(1.23456789012345e+67) == "1.23456789012345e+67");
xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e-67) == "1.23456789012345e-67"); xlnt_assert(serialiser.serialise(1.23456789012345e-67) == "1.23456789012345e-67");
} }
void test_float_equals_zero() void test_float_equals_zero()