From 0adb8a69b1a1c6f9ebe9cb285be16a7caef62467 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sat, 29 Feb 2020 22:11:31 +1300 Subject: [PATCH 01/13] add micro benchmarking project, setup number parsing benchmark relating to previous work https://github.com/tfussell/xlnt/issues/422 Results are matching what was observed at the time ^^ was being worked on std::from_chars is included as the target to beat, but since only MSVC has it for floating point it's not hugely useful yet uniform real distribution is probably a horrible choice, and it might be good to randomise the number of sf in each string also (currently the y all end up at max length) Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- RandFloats/double_from_string_sstream 804 ns 820 ns 896000 RandFloats/double_from_string_strtod 163 ns 162 ns 5973333 RandFloats/double_from_string_strtod_fixed 175 ns 172 ns 5352107 RandFloats/double_from_string_strtod_fixed_const_ref 150 ns 152 ns 5352107 RandFloats/double_from_string_std_from_chars 87.1 ns 88.3 ns 9557333 RandFloatsComma/double_from_string_strtod_fixed_comma_ref 172 ns 173 ns 5146257 RandFloatsComma/double_from_string_strtod_fixed_comma_const_ref 180 ns 175 ns 5352107 --- benchmarks/CMakeLists.txt | 5 + benchmarks/microbenchmarks/CMakeLists.txt | 36 +++ .../microbenchmarks/double_to_string.cpp | 2 + .../microbenchmarks/string_to_double.cpp | 219 ++++++++++++++++++ 4 files changed, 262 insertions(+) create mode 100644 benchmarks/microbenchmarks/CMakeLists.txt create mode 100644 benchmarks/microbenchmarks/double_to_string.cpp create mode 100644 benchmarks/microbenchmarks/string_to_double.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4ab93935..3c00ccda 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -41,3 +41,8 @@ foreach(BENCHMARK_SOURCE IN ITEMS ${BENCHMARK_SOURCES}) $) endif() endforeach() + +option(XLNT_MICROBENCH_ENABLED "Enable small benchmarks typically used for development" OFF) +if (XLNT_MICROBENCH_ENABLED) + add_subdirectory(microbenchmarks) +endif() \ No newline at end of file diff --git a/benchmarks/microbenchmarks/CMakeLists.txt b/benchmarks/microbenchmarks/CMakeLists.txt new file mode 100644 index 00000000..745df1b8 --- /dev/null +++ b/benchmarks/microbenchmarks/CMakeLists.txt @@ -0,0 +1,36 @@ +# FetchContent added in cmake v3.11 +# https://cmake.org/cmake/help/v3.11/module/FetchContent.html +# this file is behind a feature flag (XLNT_MICROBENCH_ENABLED) so the primary build is not affected +cmake_minimum_required(VERSION 3.11) +project(xlnt_ubench) + +# acquire google benchmark dependency +# disable generation of the various test projects +set(BENCHMARK_ENABLE_TESTING OFF) +# gtest not required +set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + +include(FetchContent) +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark + GIT_TAG v1.5.0 +) +# download if not already present +FetchContent_GetProperties(googlebenchmark) +if(NOT googlebenchmark_POPULATED) + FetchContent_Populate(googlebenchmark) + add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) +endif() +# equivalent of add_subdirectory, now available for use +FetchContent_MakeAvailable(googlebenchmark) + + +add_executable(xlnt_ubench) +target_sources(xlnt_ubench + PRIVATE + string_to_double.cpp + double_to_string.cpp +) +target_link_libraries(xlnt_ubench benchmark_main xlnt) +target_compile_features(xlnt_ubench PRIVATE cxx_std_17) \ No newline at end of file diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp new file mode 100644 index 00000000..7b426ef1 --- /dev/null +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -0,0 +1,2 @@ +#include "benchmark/benchmark.h" + diff --git a/benchmarks/microbenchmarks/string_to_double.cpp b/benchmarks/microbenchmarks/string_to_double.cpp new file mode 100644 index 00000000..58736e7a --- /dev/null +++ b/benchmarks/microbenchmarks/string_to_double.cpp @@ -0,0 +1,219 @@ +// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double +// this has a few requirements +// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) + +#include +#include +#include + +// setup a large quantity of random doubles as strings +template +class RandomFloats : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + char buf[16]; + snprintf(buf, 16, "%.15f", d); + inputs.push_back(std::string(buf)); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data... + inputs = std::vector{}; + } + + std::string &get_rand() + { + return inputs[++index & Number_of_Elements]; + } +}; + +// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf +// std::istringstream with the locale set to "C" +#include +struct number_converter +{ + number_converter() + { + stream.imbue(std::locale("C")); + } + + double stold(const std::string &s) + { + stream.str(s); + stream.clear(); + stream >> result; + return result; + } + + std::istringstream stream; + double result; +}; + +using RandFloats = RandomFloats; + +BENCHMARK_F(RandFloats, double_from_string_sstream) +(benchmark::State &state) +{ + number_converter converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +// using strotod +// https://en.cppreference.com/w/cpp/string/byte/strtof +// this naive usage is broken in the face of locales (fails condition 1) +#include +BENCHMARK_F(RandFloats, double_from_string_strtod) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + strtod(get_rand().c_str(), nullptr)); + } +} + +// to resolve the locale issue with strtod, a little preprocessing of the input is required +struct number_converter_mk2 +{ + explicit number_converter_mk2() + : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + double stold(std::string &s) const noexcept + { + assert(!s.empty()); + if (should_convert_to_comma) + { + auto decimal_pt = std::find(s.begin(), s.end(), '.'); + if (decimal_pt != s.end()) + { + *decimal_pt = ','; + } + } + return strtod(s.c_str(), nullptr); + } + + double stold(const std::string &s) const + { + assert(!s.empty()); + if (!should_convert_to_comma) + { + return strtod(s.c_str(), nullptr); + } + std::string copy(s); + auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); + if (decimal_pt != copy.end()) + { + *decimal_pt = ','; + } + return strtod(copy.c_str(), nullptr); + } + +private: + bool should_convert_to_comma = false; +}; + +BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloats, double_from_string_std_from_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + const std::string &input = get_rand(); + double output; + benchmark::DoNotOptimize( + std::from_chars(input.data(), input.data() + input.size(), output)); + } +} + +// not using the standard "C" locale with '.' seperator +// german locale uses ',' as the seperator +using RandFloatsComma = RandomFloats; +BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +#endif \ No newline at end of file From 7ba36b5e739e424a1aaebff0e80450586f7ff1c8 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sat, 29 Feb 2020 22:58:52 +1300 Subject: [PATCH 02/13] fix dumb bug in input randomiser, add basic double->string benchmarks * input randomiser was feeding a constant value previously, now actually randomising * start to_string with the current method (sstream), an faster more correct version (sstream_cached), snprintf, and std::to_chars ** NOTE: only std::to_chars and sstream_cached are correct in the face of locales Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------- RandFloatStrs/double_from_string_sstream 1012 ns 1001 ns 640000 RandFloatStrs/double_from_string_strtod 276 ns 276 ns 2488889 RandFloatStrs/double_from_string_strtod_fixed 312 ns 308 ns 2133333 RandFloatStrs/double_from_string_strtod_fixed_const_ref 307 ns 300 ns 2240000 RandFloatStrs/double_from_string_std_from_chars 194 ns 188 ns 3733333 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_ref 315 ns 314 ns 2240000 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_const_ref 306 ns 305 ns 2357895 RandFloats/string_from_double_sstream 1372 ns 1381 ns 497778 RandFloats/string_from_double_sstream_cached 1136 ns 1123 ns 640000 RandFloats/string_from_double_snprintf 536 ns 516 ns 1000000 RandFloats/string_from_double_std_to_chars 116 ns 115 ns 6400000 --- .../microbenchmarks/double_to_string.cpp | 151 +++++++++++++++++- .../microbenchmarks/string_to_double.cpp | 82 +++++----- 2 files changed, 193 insertions(+), 40 deletions(-) diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp index 7b426ef1..01efa9ef 100644 --- a/benchmarks/microbenchmarks/double_to_string.cpp +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -1,2 +1,151 @@ -#include "benchmark/benchmark.h" +// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them +// this has a few requirements +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf) +#include "benchmark/benchmark.h" +#include +#include +#include + +namespace { + +// setup a large quantity of random doubles as strings +template +class RandomFloats : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str = nullptr; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + inputs.push_back(d); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data after use + inputs = std::vector{}; + } + + double &get_rand() + { + return inputs[++index & (Number_of_Elements - 1)]; + } +}; + +/// Takes in a double and outputs a string form of that number which will +/// serialise and deserialise without loss of precision +std::string serialize_number_to_string(double num) +{ + // more digits and excel won't match + constexpr int Excel_Digit_Precision = 15; //sf + std::stringstream ss; + ss.precision(Excel_Digit_Precision); + ss << num; + return ss.str(); +} + +class number_serialiser +{ + static constexpr int Excel_Digit_Precision = 15; //sf + std::ostringstream ss; + +public: + explicit number_serialiser() + { + ss.precision(Excel_Digit_Precision); + ss.imbue(std::locale("C")); + } + + std::string serialise(double d) + { + ss.str(""); // reset string buffer + ss.clear(); // reset any error flags + ss << d; + return ss.str(); + } +}; + +using RandFloats = RandomFloats; +} // namespace + +BENCHMARK_F(RandFloats, string_from_double_sstream) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + serialize_number_to_string(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_sstream_cached) +(benchmark::State &state) +{ + number_serialiser ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_snprintf) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + int len = snprintf(buf, sizeof(buf), "%16f", get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, len)); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloats, string_from_double_std_to_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, result.ptr)); + } +} + +#endif \ No newline at end of file diff --git a/benchmarks/microbenchmarks/string_to_double.cpp b/benchmarks/microbenchmarks/string_to_double.cpp index 58736e7a..5392f0a2 100644 --- a/benchmarks/microbenchmarks/string_to_double.cpp +++ b/benchmarks/microbenchmarks/string_to_double.cpp @@ -1,15 +1,18 @@ // A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double // this has a few requirements -// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) // - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) #include #include #include +#include + +namespace { // setup a large quantity of random doubles as strings template -class RandomFloats : public benchmark::Fixture +class RandomFloatStrs : public benchmark::Fixture { static constexpr size_t Number_of_Elements = 1 << 20; static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); @@ -17,7 +20,7 @@ class RandomFloats : public benchmark::Fixture std::vector inputs; size_t index = 0; - const char *locale_str; + const char *locale_str = nullptr; public: void SetUp(const ::benchmark::State &state) @@ -50,19 +53,18 @@ public: { // restore locale setlocale(LC_ALL, locale_str); - // gbench is keeping the fixtures alive somewhere, need to clear the data... + // gbench is keeping the fixtures alive somewhere, need to clear the data after use inputs = std::vector{}; } std::string &get_rand() { - return inputs[++index & Number_of_Elements]; + return inputs[++index & (Number_of_Elements - 1)]; } }; // method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf // std::istringstream with the locale set to "C" -#include struct number_converter { number_converter() @@ -82,32 +84,6 @@ struct number_converter double result; }; -using RandFloats = RandomFloats; - -BENCHMARK_F(RandFloats, double_from_string_sstream) -(benchmark::State &state) -{ - number_converter converter; - while (state.KeepRunning()) - { - benchmark::DoNotOptimize( - converter.stold(get_rand())); - } -} - -// using strotod -// https://en.cppreference.com/w/cpp/string/byte/strtof -// this naive usage is broken in the face of locales (fails condition 1) -#include -BENCHMARK_F(RandFloats, double_from_string_strtod) -(benchmark::State &state) -{ - while (state.KeepRunning()) - { - benchmark::DoNotOptimize( - strtod(get_rand().c_str(), nullptr)); - } -} // to resolve the locale issue with strtod, a little preprocessing of the input is required struct number_converter_mk2 @@ -151,7 +127,37 @@ private: bool should_convert_to_comma = false; }; -BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) +using RandFloatStrs = RandomFloatStrs; +// german locale uses ',' as the seperator +using RandFloatCommaStrs = RandomFloatStrs; +} // namespace + +BENCHMARK_F(RandFloatStrs, double_from_string_sstream) +(benchmark::State &state) +{ + number_converter converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +// using strotod +// https://en.cppreference.com/w/cpp/string/byte/strtof +// this naive usage is broken in the face of locales (fails condition 1) +#include +BENCHMARK_F(RandFloatStrs, double_from_string_strtod) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + strtod(get_rand().c_str(), nullptr)); + } +} + +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed) (benchmark::State &state) { number_converter_mk2 converter; @@ -162,7 +168,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) } } -BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref) (benchmark::State &state) { number_converter_mk2 converter; @@ -178,7 +184,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) #ifdef _MSC_VER #include -BENCHMARK_F(RandFloats, double_from_string_std_from_chars) +BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars) (benchmark::State &state) { while (state.KeepRunning()) @@ -191,9 +197,7 @@ BENCHMARK_F(RandFloats, double_from_string_std_from_chars) } // not using the standard "C" locale with '.' seperator -// german locale uses ',' as the seperator -using RandFloatsComma = RandomFloats; -BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref) (benchmark::State &state) { number_converter_mk2 converter; @@ -204,7 +208,7 @@ BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) } } -BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref) +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref) (benchmark::State &state) { number_converter_mk2 converter; From a5aca5c2121420c603db4fffb560d8673ac7bd9e Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sat, 29 Feb 2020 23:09:07 +1300 Subject: [PATCH 03/13] add the fixed snprintf serialiser Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------- RandFloatStrs/double_from_string_sstream 969 ns 977 ns 640000 RandFloatStrs/double_from_string_strtod 274 ns 270 ns 2488889 RandFloatStrs/double_from_string_strtod_fixed 273 ns 273 ns 2635294 RandFloatStrs/double_from_string_strtod_fixed_const_ref 274 ns 276 ns 2488889 RandFloatStrs/double_from_string_std_from_chars 193 ns 193 ns 3733333 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_ref 273 ns 267 ns 2635294 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_const_ref 273 ns 273 ns 2635294 RandFloats/string_from_double_sstream 1323 ns 1311 ns 560000 RandFloats/string_from_double_sstream_cached 1074 ns 1074 ns 640000 RandFloats/string_from_double_snprintf 519 ns 516 ns 1000000 RandFloats/string_from_double_snprintf_fixed 517 ns 516 ns 1000000 RandFloats/string_from_double_std_to_chars 118 ns 117 ns 5600000 RandFloatsComma/string_from_double_snprintf_fixed_comma 520 ns 516 ns 1120000 --- .../microbenchmarks/double_to_string.cpp | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp index 01efa9ef..1182219c 100644 --- a/benchmarks/microbenchmarks/double_to_string.cpp +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -94,7 +94,34 @@ public: } }; +class number_serialiser_mk2 +{ + bool should_convert_to_comma; + +public: + explicit number_serialiser_mk2() + : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + std::string serialise(double d) + { + char buf[16]; + int len = snprintf(buf, sizeof(buf), "%16f", d); + if (should_convert_to_comma) + { + auto decimal = std::find(buf, buf + len, ','); + if (decimal != buf + len) + { + *decimal = '.'; + } + } + return std::string(buf, len); + } +}; + using RandFloats = RandomFloats; +using RandFloatsComma = RandomFloats; } // namespace BENCHMARK_F(RandFloats, string_from_double_sstream) @@ -131,6 +158,17 @@ BENCHMARK_F(RandFloats, string_from_double_snprintf) } } +BENCHMARK_F(RandFloats, string_from_double_snprintf_fixed) +(benchmark::State &state) +{ + number_serialiser_mk2 ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + // locale names are different between OS's, and std::from_chars is only complete in MSVC #ifdef _MSC_VER @@ -148,4 +186,15 @@ BENCHMARK_F(RandFloats, string_from_double_std_to_chars) } } +BENCHMARK_F(RandFloatsComma, string_from_double_snprintf_fixed_comma) +(benchmark::State &state) +{ + number_serialiser_mk2 ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + #endif \ No newline at end of file From d135f35bd4076f78ff96324ce91d4c931695fc42 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 20:24:22 +1300 Subject: [PATCH 04/13] minor cleanup --- .../microbenchmarks/double_to_string.cpp | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp index 1182219c..621c1165 100644 --- a/benchmarks/microbenchmarks/double_to_string.cpp +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -96,25 +96,32 @@ public: class number_serialiser_mk2 { - bool should_convert_to_comma; + static constexpr int Excel_Digit_Precision = 15; //sf + bool should_convert_comma; + + void convert_comma(char *buf, int len) + { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, ','); + if (decimal != buf_end) + { + *decimal = '.'; + } + } public: explicit number_serialiser_mk2() - : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + : should_convert_comma(std::use_facet>(std::locale{}).decimal_point() == ',') { } std::string serialise(double d) { - char buf[16]; + char buf[Excel_Digit_Precision + 1]; // need space for trailing '\0' int len = snprintf(buf, sizeof(buf), "%16f", d); - if (should_convert_to_comma) + if (should_convert_comma) { - auto decimal = std::find(buf, buf + len, ','); - if (decimal != buf + len) - { - *decimal = '.'; - } + convert_comma(buf, len); } return std::string(buf, len); } From 0915fde09048b51dca743b1038259c3ed8131860 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 20:43:56 +1300 Subject: [PATCH 05/13] add saving to the spreadsheet-load test, fix a bug in the serialiser --- benchmarks/spreadsheet-load.cpp | 27 ++++++++++++++++++- source/detail/serialization/xlsx_producer.cpp | 12 +++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/benchmarks/spreadsheet-load.cpp b/benchmarks/spreadsheet-load.cpp index 0caadb02..041be63a 100644 --- a/benchmarks/spreadsheet-load.cpp +++ b/benchmarks/spreadsheet-load.cpp @@ -5,7 +5,7 @@ namespace { using milliseconds_d = std::chrono::duration; -void run_test(const xlnt::path &file, int runs = 10) +void run_load_test(const xlnt::path &file, int runs = 10) { std::cout << file.string() << "\n\n"; @@ -24,10 +24,35 @@ void run_test(const xlnt::path &file, int runs = 10) std::cout << milliseconds_d(test_timings.back()).count() << " ms\n"; } } + +void run_save_test(const xlnt::path &file, int runs = 10) +{ + std::cout << file.string() << "\n\n"; + + xlnt::workbook wb; + wb.load(file); + const xlnt::path save_path(file.filename()); + + std::vector test_timings; + + for (int i = 0; i < runs; ++i) + { + auto start = std::chrono::steady_clock::now(); + + wb.save(save_path); + + auto end = std::chrono::steady_clock::now(); + test_timings.push_back(end - start); + std::cout << milliseconds_d(test_timings.back()).count() << " ms\n"; + } +} } // namespace int main() { run_test(path_helper::benchmark_file("large.xlsx")); run_test(path_helper::benchmark_file("very_large.xlsx")); + + run_save_test(path_helper::benchmark_file("large.xlsx")); + run_save_test(path_helper::benchmark_file("very_large.xlsx")); } \ No newline at end of file diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp index e0a52870..743839ac 100644 --- a/source/detail/serialization/xlsx_producer.cpp +++ b/source/detail/serialization/xlsx_producer.cpp @@ -2267,16 +2267,18 @@ void xlsx_producer::write_worksheet(const relationship &rel) { write_attribute("enableFormatConditionsCalculation", props.enable_format_condition_calculation.get()); } - + // outlinePr is optional in the spec but is being written every time? write_start_element(xmlns, "outlinePr"); write_attribute("summaryBelow", "1"); write_attribute("summaryRight", "1"); write_end_element(xmlns, "outlinePr"); - write_start_element(xmlns, "pageSetUpPr"); - write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page())); - write_end_element(xmlns, "pageSetUpPr"); - + if (ws.has_page_setup()) + { + write_start_element(xmlns, "pageSetUpPr"); + write_attribute("fitToPage", write_bool(ws.page_setup().fit_to_page())); + write_end_element(xmlns, "pageSetUpPr"); + } write_end_element(xmlns, "sheetPr"); } From fbbf7ae767f5af8922947377474c452e6a412d5d Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 20:46:19 +1300 Subject: [PATCH 06/13] fix git revert error --- benchmarks/spreadsheet-load.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/spreadsheet-load.cpp b/benchmarks/spreadsheet-load.cpp index 041be63a..64318c79 100644 --- a/benchmarks/spreadsheet-load.cpp +++ b/benchmarks/spreadsheet-load.cpp @@ -50,8 +50,8 @@ void run_save_test(const xlnt::path &file, int runs = 10) int main() { - run_test(path_helper::benchmark_file("large.xlsx")); - run_test(path_helper::benchmark_file("very_large.xlsx")); + run_load_test(path_helper::benchmark_file("large.xlsx")); + run_load_test(path_helper::benchmark_file("very_large.xlsx")); run_save_test(path_helper::benchmark_file("large.xlsx")); run_save_test(path_helper::benchmark_file("very_large.xlsx")); From ee593c267358ea662bf21b2b51ebbf14f79930f3 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 22:01:14 +1300 Subject: [PATCH 07/13] bug fixes, move the faster serialisation into the numeric header serialisation ends up roughly 2x improvement going from sstream to snprintf Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------- RandFloatStrs/double_from_string_sstream 968 ns 977 ns 640000 RandFloatStrs/double_from_string_strtod 272 ns 270 ns 2488889 RandFloatStrs/double_from_string_strtod_fixed 272 ns 270 ns 2488889 RandFloatStrs/double_from_string_strtod_fixed_const_ref 273 ns 270 ns 2488889 RandFloatStrs/double_from_string_std_from_chars 193 ns 195 ns 3446154 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_ref 272 ns 273 ns 2635294 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_const_ref 276 ns 273 ns 2635294 RandFloats/string_from_double_sstream 1311 ns 1318 ns 497778 RandFloats/string_from_double_sstream_cached 1076 ns 1050 ns 640000 RandFloats/string_from_double_snprintf 601 ns 600 ns 1120000 RandFloats/string_from_double_snprintf_fixed 600 ns 600 ns 1120000 RandFloats/string_from_double_std_to_chars 117 ns 117 ns 5600000 RandFloatsComma/string_from_double_snprintf_fixed_comma 600 ns 600 ns 1120000 --- .../microbenchmarks/double_to_string.cpp | 4 +- include/xlnt/utils/numeric.hpp | 86 +++++++++++-------- tests/detail/numeric_util_test_suite.cpp | 17 ++-- 3 files changed, 60 insertions(+), 47 deletions(-) diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp index 621c1165..0b82803f 100644 --- a/benchmarks/microbenchmarks/double_to_string.cpp +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -118,7 +118,7 @@ public: std::string serialise(double d) { char buf[Excel_Digit_Precision + 1]; // need space for trailing '\0' - int len = snprintf(buf, sizeof(buf), "%16f", d); + int len = snprintf(buf, sizeof(buf), "%.15g", d); if (should_convert_comma) { convert_comma(buf, len); @@ -158,7 +158,7 @@ BENCHMARK_F(RandFloats, string_from_double_snprintf) while (state.KeepRunning()) { char buf[16]; - int len = snprintf(buf, sizeof(buf), "%16f", get_rand()); + int len = snprintf(buf, sizeof(buf), "%.15g", get_rand()); benchmark::DoNotOptimize( std::string(buf, len)); diff --git a/include/xlnt/utils/numeric.hpp b/include/xlnt/utils/numeric.hpp index 28f37a90..6171ec13 100644 --- a/include/xlnt/utils/numeric.hpp +++ b/include/xlnt/utils/numeric.hpp @@ -34,21 +34,6 @@ namespace xlnt { namespace detail { -/// -/// Takes in any number and outputs a string form of that number which will -/// serialise and deserialise without loss of precision -/// -template -std::string serialize_number_to_string(Number num) -{ - // more digits and excel won't match - constexpr int Excel_Digit_Precision = 15; //sf - std::stringstream ss; - ss.precision(Excel_Digit_Precision); - ss << num; - return ss.str(); -} - /// /// constexpr abs /// @@ -117,45 +102,72 @@ bool float_equals(const LNumber &lhs, const RNumber &rhs, return ((lhs + scaled_fuzz) >= rhs) && ((rhs + scaled_fuzz) >= lhs); } -struct number_converter +class number_serialiser { - explicit number_converter() - : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + static constexpr int Excel_Digit_Precision = 15; //sf + bool should_convert_comma; + + static void convert_comma_to_pt(char *buf, int len) + { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, ','); + if (decimal != buf_end) + { + *decimal = '.'; + } + } + + static void convert_pt_to_comma(char *buf, size_t len) + { + char *buf_end = buf + len; + char *decimal = std::find(buf, buf_end, '.'); + if (decimal != buf_end) + { + *decimal = ','; + } + } + +public: + explicit number_serialiser() + : should_convert_comma(std::use_facet>(std::locale{}).decimal_point() == ',') { } - double stold(std::string &s) const noexcept + std::string serialise(double d) const + { + char buf[30]; + int len = snprintf(buf, sizeof(buf), "%.15g", d); + if (should_convert_comma) + { + convert_comma_to_pt(buf, len); + } + return std::string(buf, len); + } + + double deserialise(std::string &s) const noexcept { assert(!s.empty()); - if (should_convert_to_comma) + if (should_convert_comma) { - auto decimal_pt = std::find(s.begin(), s.end(), '.'); - if (decimal_pt != s.end()) - { - *decimal_pt = ','; - } + // s.data() doesn't have a non-const overload until c++17, hence this little dance + convert_pt_to_comma(&s[0], s.size()); } return strtod(s.c_str(), nullptr); } - double stold(const std::string &s) const + double deserialise(const std::string &s) const { assert(!s.empty()); - if (!should_convert_to_comma) + if (!should_convert_comma) { return strtod(s.c_str(), nullptr); } - std::string copy(s); - auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); - if (decimal_pt != copy.end()) - { - *decimal_pt = ','; - } - return strtod(copy.c_str(), nullptr); + char buf[30]; + assert(s.size() < std::size(buf)); + auto copy_end = std::copy(s.begin(), s.end(), buf); + convert_pt_to_comma(buf, static_cast(copy_end - buf)); + return strtod(buf, nullptr); } - -private: - bool should_convert_to_comma = false; }; } // namespace detail diff --git a/tests/detail/numeric_util_test_suite.cpp b/tests/detail/numeric_util_test_suite.cpp index 1d8026ba..c2536f84 100644 --- a/tests/detail/numeric_util_test_suite.cpp +++ b/tests/detail/numeric_util_test_suite.cpp @@ -40,17 +40,18 @@ public: void test_serialise_number() { + xlnt::detail::number_serialiser serialiser; // excel serialises numbers as floating point values with <= 15 digits of precision - xlnt_assert(xlnt::detail::serialize_number_to_string(1) == "1"); + xlnt_assert(serialiser.serialise(1) == "1"); // trailing zeroes are ignored - xlnt_assert(xlnt::detail::serialize_number_to_string(1.0) == "1"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.0f) == "1"); + xlnt_assert(serialiser.serialise(1.0) == "1"); + xlnt_assert(serialiser.serialise(1.0f) == "1"); // one to 1 relation - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456) == "1.23456"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345) == "1.23456789012345"); - xlnt_assert(xlnt::detail::serialize_number_to_string(123456.789012345) == "123456.789012345"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e+67) == "1.23456789012345e+67"); - xlnt_assert(xlnt::detail::serialize_number_to_string(1.23456789012345e-67) == "1.23456789012345e-67"); + xlnt_assert(serialiser.serialise(1.23456) == "1.23456"); + xlnt_assert(serialiser.serialise(1.23456789012345) == "1.23456789012345"); + xlnt_assert(serialiser.serialise(123456.789012345) == "123456.789012345"); + xlnt_assert(serialiser.serialise(1.23456789012345e+67) == "1.23456789012345e+67"); + xlnt_assert(serialiser.serialise(1.23456789012345e-67) == "1.23456789012345e-67"); } void test_float_equals_zero() From 39f498f40128850ce6b0d76ae45e65770818656e Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 22:01:53 +1300 Subject: [PATCH 08/13] use the new faster serialisation everywhere in xlsx_producer --- .../header_footer/header_footer_code.cpp | 10 ++-- .../header_footer/header_footer_code.hpp | 5 +- source/detail/serialization/xlsx_consumer.cpp | 60 +++++++++---------- source/detail/serialization/xlsx_consumer.hpp | 2 +- source/detail/serialization/xlsx_producer.cpp | 24 ++++---- source/detail/serialization/xlsx_producer.hpp | 2 + 6 files changed, 53 insertions(+), 50 deletions(-) diff --git a/source/detail/header_footer/header_footer_code.cpp b/source/detail/header_footer/header_footer_code.cpp index f7560eb8..014234de 100644 --- a/source/detail/header_footer/header_footer_code.cpp +++ b/source/detail/header_footer/header_footer_code.cpp @@ -27,7 +27,7 @@ namespace xlnt { namespace detail { -std::array, 3> decode_header_footer(const std::string &hf_string) +std::array, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser) { std::array, 3> result; @@ -216,7 +216,7 @@ std::array, 3> decode_header_footer(const std::s tokens.push_back(token); } - const auto parse_section = [&tokens, &result](hf_code code) { + const auto parse_section = [&tokens, &result, &serialiser](hf_code code) { std::vector end_codes{hf_code::left_section, hf_code::center_section, hf_code::right_section}; end_codes.erase(std::find(end_codes.begin(), end_codes.end(), code)); @@ -297,7 +297,7 @@ std::array, 3> decode_header_footer(const std::s current_run.second = xlnt::font(); } - current_run.second.get().size(std::stod(current_token.value)); + current_run.second.get().size(serialiser.deserialise(current_token.value)); break; } @@ -460,7 +460,7 @@ std::array, 3> decode_header_footer(const std::s return result; } -std::string encode_header_footer(const rich_text &t, header_footer::location where) +std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser &serialiser) { const auto location_code_map = std::unordered_map #include #include +#include namespace xlnt { namespace detail { -std::array, 3> decode_header_footer(const std::string &hf_string); -std::string encode_header_footer(const rich_text &t, header_footer::location where); +std::array, 3> decode_header_footer(const std::string &hf_string, const number_serialiser &serialiser); +std::string encode_header_footer(const rich_text &t, header_footer::location where, const number_serialiser& serialiser); } // namespace detail } // namespace xlnt diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index ea64b131..bde76349 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -307,14 +307,14 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) } // inside element -std::pair parse_row(xml::parser *parser, xlnt::detail::number_converter &converter, std::vector &parsed_cells) +std::pair parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector &parsed_cells) { std::pair props; for (auto &attr : parser->attribute_map()) { if (string_equal(attr.first.name(), "dyDescent")) { - props.first.dy_descent = converter.stold(attr.second.value); + props.first.dy_descent = converter.deserialise(attr.second.value); } else if (string_equal(attr.first.name(), "spans")) { @@ -322,7 +322,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail } else if (string_equal(attr.first.name(), "ht")) { - props.first.height = converter.stold(attr.second.value); + props.first.height = converter.deserialise(attr.second.value); } else if (string_equal(attr.first.name(), "s")) { @@ -382,7 +382,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail } // inside element -Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_converter &converter) +Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter) { Sheet_Data sheet_data; int level = 1; // nesting level @@ -480,7 +480,7 @@ cell xlsx_consumer::read_cell() if (parser().attribute_present("ht")) { - row_properties.height = converter_.stold(parser().attribute("ht")); + row_properties.height = converter_.deserialise(parser().attribute("ht")); } if (parser().attribute_present("customHeight")) @@ -495,7 +495,7 @@ cell xlsx_consumer::read_cell() if (parser().attribute_present(qn("x14ac", "dyDescent"))) { - row_properties.dy_descent = converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); + row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent"))); } if (parser().attribute_present("spans")) @@ -602,7 +602,7 @@ cell xlsx_consumer::read_cell() } else if (type == "s") { - cell.d_->value_numeric_ = converter_.stold(value_string); + cell.d_->value_numeric_ = converter_.deserialise(value_string); cell.data_type(cell::type::shared_string); } else if (type == "b") // boolean @@ -611,7 +611,7 @@ cell xlsx_consumer::read_cell() } else if (type == "n") // numeric { - cell.value(converter_.stold(value_string)); + cell.value(converter_.deserialise(value_string)); } else if (!value_string.empty() && value_string[0] == '#') { @@ -863,23 +863,23 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) if (parser().attribute_present("baseColWidth")) { ws.d_->format_properties_.base_col_width = - converter_.stold(parser().attribute("baseColWidth")); + converter_.deserialise(parser().attribute("baseColWidth")); } if (parser().attribute_present("defaultColWidth")) { ws.d_->format_properties_.default_column_width = - converter_.stold(parser().attribute("defaultColWidth")); + converter_.deserialise(parser().attribute("defaultColWidth")); } if (parser().attribute_present("defaultRowHeight")) { ws.d_->format_properties_.default_row_height = - converter_.stold(parser().attribute("defaultRowHeight")); + converter_.deserialise(parser().attribute("defaultRowHeight")); } if (parser().attribute_present(qn("x14ac", "dyDescent"))) { ws.d_->format_properties_.dy_descent = - converter_.stold(parser().attribute(qn("x14ac", "dyDescent"))); + converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent"))); } skip_attributes(); @@ -899,7 +899,7 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id) optional width = [this](xml::parser &p) -> xlnt::optional { if (p.attribute_present("width")) { - return (converter_.stold(p.attribute("width")) * 7 - 5) / 7; + return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7; } return xlnt::optional(); }(parser()); @@ -1000,7 +1000,7 @@ void xlsx_consumer::read_worksheet_sheetdata() case cell::type::empty: case cell::type::number: case cell::type::date: { - ws_cell_impl->value_numeric_ = converter_.stold(cell.value); + ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value); break; } case cell::type::shared_string: { @@ -1196,12 +1196,12 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) { page_margins margins; - margins.top(converter_.stold(parser().attribute("top"))); - margins.bottom(converter_.stold(parser().attribute("bottom"))); - margins.left(converter_.stold(parser().attribute("left"))); - margins.right(converter_.stold(parser().attribute("right"))); - margins.header(converter_.stold(parser().attribute("header"))); - margins.footer(converter_.stold(parser().attribute("footer"))); + margins.top(converter_.deserialise(parser().attribute("top"))); + margins.bottom(converter_.deserialise(parser().attribute("bottom"))); + margins.left(converter_.deserialise(parser().attribute("left"))); + margins.right(converter_.deserialise(parser().attribute("right"))); + margins.header(converter_.deserialise(parser().attribute("header"))); + margins.footer(converter_.deserialise(parser().attribute("footer"))); ws.page_margins(margins); } @@ -1251,27 +1251,27 @@ worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id) if (current_hf_element == qn("spreadsheetml", "oddHeader")) { - odd_header = decode_header_footer(read_text()); + odd_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "oddFooter")) { - odd_footer = decode_header_footer(read_text()); + odd_footer = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "evenHeader")) { - even_header = decode_header_footer(read_text()); + even_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "evenFooter")) { - even_footer = decode_header_footer(read_text()); + even_footer = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "firstHeader")) { - first_header = decode_header_footer(read_text()); + first_header = decode_header_footer(read_text(), converter_); } else if (current_hf_element == qn("spreadsheetml", "firstFooter")) { - first_footer = decode_header_footer(read_text()); + first_footer = decode_header_footer(read_text(), converter_); } else { @@ -2308,7 +2308,7 @@ void xlsx_consumer::read_stylesheet() while (in_element(qn("spreadsheetml", "gradientFill"))) { expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex); - auto position = converter_.stold(parser().attribute("position")); + auto position = converter_.deserialise(parser().attribute("position")); expect_start_element(qn("spreadsheetml", "color"), xml::content::complex); auto color = read_color(); expect_end_element(qn("spreadsheetml", "color")); @@ -2356,7 +2356,7 @@ void xlsx_consumer::read_stylesheet() if (font_property_element == qn("spreadsheetml", "sz")) { - new_font.size(converter_.stold(parser().attribute("val"))); + new_font.size(converter_.deserialise(parser().attribute("val"))); } else if (font_property_element == qn("spreadsheetml", "name")) { @@ -3169,7 +3169,7 @@ rich_text xlsx_consumer::read_rich_text(const xml::qname &parent) if (current_run_property_element == xml::qname(xmlns, "sz")) { - run.second.get().size(converter_.stold(parser().attribute("val"))); + run.second.get().size(converter_.deserialise(parser().attribute("val"))); } else if (current_run_property_element == xml::qname(xmlns, "rFont")) { @@ -3307,7 +3307,7 @@ xlnt::color xlsx_consumer::read_color() if (parser().attribute_present("tint")) { - result.tint(converter_.stold(parser().attribute("tint"))); + result.tint(converter_.deserialise(parser().attribute("tint"))); } return result; diff --git a/source/detail/serialization/xlsx_consumer.hpp b/source/detail/serialization/xlsx_consumer.hpp index 963e1361..c9a987ee 100644 --- a/source/detail/serialization/xlsx_consumer.hpp +++ b/source/detail/serialization/xlsx_consumer.hpp @@ -416,7 +416,7 @@ private: detail::cell_impl *current_cell_; detail::worksheet_impl *current_worksheet_; - number_converter converter_; + number_serialiser converter_; }; } // namespace detail diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp index 743839ac..a0455cef 100644 --- a/source/detail/serialization/xlsx_producer.cpp +++ b/source/detail/serialization/xlsx_producer.cpp @@ -2420,7 +2420,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) if (props.width.is_set()) { double width = (props.width.get() * 7 + 5) / 7; - write_attribute("width", serialize_number_to_string(width)); + write_attribute("width", converter_.serialise(width)); } if (props.best_fit) @@ -2522,7 +2522,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) if (props.height.is_set()) { auto height = props.height.get(); - write_attribute("ht", serialize_number_to_string(height)); + write_attribute("ht", converter_.serialise(height)); } if (props.hidden) @@ -2649,7 +2649,7 @@ void xlsx_producer::write_worksheet(const relationship &rel) case cell::type::number: write_start_element(xmlns, "v"); - write_characters(serialize_number_to_string(cell.value())); + write_characters(converter_.serialise(cell.value())); write_end_element(xmlns, "v"); break; @@ -2886,26 +2886,26 @@ void xlsx_producer::write_worksheet(const relationship &rel) { if (hf.has_odd_even_header(location)) { - odd_header.append(encode_header_footer(hf.odd_header(location), location)); - even_header.append(encode_header_footer(hf.even_header(location), location)); + odd_header.append(encode_header_footer(hf.odd_header(location), location, converter_)); + even_header.append(encode_header_footer(hf.even_header(location), location, converter_)); } if (hf.has_odd_even_footer(location)) { - odd_footer.append(encode_header_footer(hf.odd_footer(location), location)); - even_footer.append(encode_header_footer(hf.even_footer(location), location)); + odd_footer.append(encode_header_footer(hf.odd_footer(location), location, converter_)); + even_footer.append(encode_header_footer(hf.even_footer(location), location, converter_)); } } else { if (hf.has_header(location)) { - odd_header.append(encode_header_footer(hf.header(location), location)); + odd_header.append(encode_header_footer(hf.header(location), location, converter_)); } if (hf.has_footer(location)) { - odd_footer.append(encode_header_footer(hf.footer(location), location)); + odd_footer.append(encode_header_footer(hf.footer(location), location, converter_)); } } @@ -2913,12 +2913,12 @@ void xlsx_producer::write_worksheet(const relationship &rel) { if (hf.has_first_page_header(location)) { - first_header.append(encode_header_footer(hf.first_page_header(location), location)); + first_header.append(encode_header_footer(hf.first_page_header(location), location, converter_)); } if (hf.has_first_page_footer(location)) { - first_footer.append(encode_header_footer(hf.first_page_footer(location), location)); + first_footer.append(encode_header_footer(hf.first_page_footer(location), location, converter_)); } } } @@ -3385,7 +3385,7 @@ void xlsx_producer::write_color(const xlnt::color &color) } if (color.has_tint()) { - write_attribute("tint", serialize_number_to_string(color.tint())); + write_attribute("tint", converter_.serialise(color.tint())); } } diff --git a/source/detail/serialization/xlsx_producer.hpp b/source/detail/serialization/xlsx_producer.hpp index 296025b9..4660f3c2 100644 --- a/source/detail/serialization/xlsx_producer.hpp +++ b/source/detail/serialization/xlsx_producer.hpp @@ -30,6 +30,7 @@ #include #include +#include namespace xml { class serializer; @@ -208,6 +209,7 @@ private: detail::cell_impl *current_cell_; detail::worksheet_impl *current_worksheet_; + detail::number_serialiser converter_; }; } // namespace detail From 932fc4596f3916c36034447c0454bc7c6fb0c981 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 23:16:57 +1300 Subject: [PATCH 09/13] remove declarations of copy/assignment operators that only do default work user defined copy operators suppress compiler creation of move operations, and not having all of copy/move/dtor defined (rule of 0/5) is suspicious. Also happens to be very slightly slower --- include/xlnt/cell/index_types.hpp | 15 --------------- include/xlnt/worksheet/range_reference.hpp | 2 -- source/cell/index_types.cpp | 16 ---------------- source/worksheet/range_reference.cpp | 6 ------ 4 files changed, 39 deletions(-) diff --git a/include/xlnt/cell/index_types.hpp b/include/xlnt/cell/index_types.hpp index 2c14a0e0..1834d92e 100644 --- a/include/xlnt/cell/index_types.hpp +++ b/include/xlnt/cell/index_types.hpp @@ -92,26 +92,11 @@ public: /// column_t(const char *column_string); - /// - /// Copy constructor. Constructs a column by copying it from other. - /// - column_t(const column_t &other); - - /// - /// Move constructor. Constructs a column by moving it from other. - /// - column_t(column_t &&other); - /// /// Returns a string representation of this column index. /// std::string column_string() const; - /// - /// Sets this column to be the same as rhs's and return reference to self. - /// - column_t &operator=(column_t rhs); - /// /// Sets this column to be equal to rhs and return reference to self. /// diff --git a/include/xlnt/worksheet/range_reference.hpp b/include/xlnt/worksheet/range_reference.hpp index 76ba94cf..9e12638d 100644 --- a/include/xlnt/worksheet/range_reference.hpp +++ b/include/xlnt/worksheet/range_reference.hpp @@ -69,8 +69,6 @@ public: range_reference(column_t column_index_start, row_t row_index_start, column_t column_index_end, row_t row_index_end); - range_reference(const range_reference &ref); - /// /// Returns true if the range has a width and height of 1 cell. /// diff --git a/source/cell/index_types.cpp b/source/cell/index_types.cpp index 3c306a00..cecc6b0e 100644 --- a/source/cell/index_types.cpp +++ b/source/cell/index_types.cpp @@ -109,27 +109,11 @@ column_t::column_t(const char *column_string) { } -column_t::column_t(const column_t &other) - : column_t(other.index) -{ -} - -column_t::column_t(column_t &&other) -{ - swap(*this, other); -} - std::string column_t::column_string() const { return column_string_from_index(index); } -column_t &column_t::operator=(column_t rhs) -{ - swap(*this, rhs); - return *this; -} - column_t &column_t::operator=(const std::string &rhs) { return *this = column_t(rhs); diff --git a/source/worksheet/range_reference.cpp b/source/worksheet/range_reference.cpp index c53a33ef..54033b64 100644 --- a/source/worksheet/range_reference.cpp +++ b/source/worksheet/range_reference.cpp @@ -46,12 +46,6 @@ range_reference::range_reference(const char *range_string) { } -range_reference::range_reference(const range_reference &ref) -{ - top_left_ = ref.top_left_; - bottom_right_ = ref.bottom_right_; -} - range_reference::range_reference(const std::string &range_string) : top_left_("A1"), bottom_right_("A1") { From c418c13010a82010ef637174827e5fa9aef7dd1a Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 23:18:13 +1300 Subject: [PATCH 10/13] remove a double lookup in the cell map during serialisation --- source/cell/cell.cpp | 2 +- source/detail/implementations/cell_impl.hpp | 7 ++++++- source/detail/serialization/xlsx_producer.cpp | 17 ++++++++++++----- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/source/cell/cell.cpp b/source/cell/cell.cpp index ef6e7b24..e47f2a93 100644 --- a/source/cell/cell.cpp +++ b/source/cell/cell.cpp @@ -199,7 +199,7 @@ cell::cell(detail::cell_impl *d) bool cell::garbage_collectible() const { - return !(has_value() || is_merged() || phonetics_visible() || has_formula() || has_format() || has_hyperlink()); + return d_->is_garbage_collectible(); } void cell::value(std::nullptr_t) diff --git a/source/detail/implementations/cell_impl.hpp b/source/detail/implementations/cell_impl.hpp index 52e59444..11097414 100644 --- a/source/detail/implementations/cell_impl.hpp +++ b/source/detail/implementations/cell_impl.hpp @@ -47,7 +47,7 @@ struct cell_impl cell_impl(cell_impl &&other) = default; cell_impl &operator=(const cell_impl &other) = default; cell_impl &operator=(cell_impl &&other) = default; - + cell_type type_; worksheet_impl *parent_; @@ -65,6 +65,11 @@ struct cell_impl optional hyperlink_; optional format_; optional comment_; + + bool is_garbage_collectible() const + { + return !(type_ != cell_type::empty || is_merged_ || phonetics_visible_ || formula_.is_set() || format_.is_set() || hyperlink_.is_set()); + } }; inline bool operator==(const cell_impl &lhs, const cell_impl &rhs) diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp index a0455cef..1646b70b 100644 --- a/source/detail/serialization/xlsx_producer.cpp +++ b/source/detail/serialization/xlsx_producer.cpp @@ -2483,12 +2483,19 @@ void xlsx_producer::write_worksheet(const relationship &rel) { for (auto column = dimension.top_left().column(); column <= dimension.bottom_right().column(); ++column) { - if (!ws.has_cell(cell_reference(column, check_row))) continue; - auto cell = ws.cell(cell_reference(column, check_row)); - if (cell.garbage_collectible()) continue; + auto ref = cell_reference(column, check_row); + auto cell = ws.d_->cell_map_.find(ref); + if (cell == ws.d_->cell_map_.end()) + { + continue; + } + if (cell->second.is_garbage_collectible()) + { + continue; + } - first_block_column = std::min(first_block_column, cell.column()); - last_block_column = std::max(last_block_column, cell.column()); + first_block_column = std::min(first_block_column, cell->second.column_); + last_block_column = std::max(last_block_column, cell->second.column_); if (row == check_row) { From 0ea054026fea178d94d5d36b3fb140833c2a787b Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 23:20:44 +1300 Subject: [PATCH 11/13] speedup worksheet::calculate dimension by only looping the cell map once --- source/worksheet/worksheet.cpp | 36 ++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/source/worksheet/worksheet.cpp b/source/worksheet/worksheet.cpp index 866f0207..bb7141e2 100644 --- a/source/worksheet/worksheet.cpp +++ b/source/worksheet/worksheet.cpp @@ -579,8 +579,40 @@ column_t worksheet::highest_column_or_props() const range_reference worksheet::calculate_dimension() const { - return range_reference(lowest_column(), lowest_row_or_props(), - highest_column(), highest_row_or_props()); + // partially optimised version of: + // return range_reference(lowest_column(), lowest_row_or_props(), + // highest_column(), highest_row_or_props()); + // + if (d_->cell_map_.empty() && d_->row_properties_.empty()) + { + return range_reference(constants::min_column(), constants::min_row(), + constants::min_column(), constants::min_row()); + } + row_t min_row_prop = constants::max_row(); + row_t max_row_prop = constants::min_row(); + for (const auto &row_prop : d_->row_properties_) + { + min_row_prop = std::min(min_row_prop, row_prop.first); + max_row_prop = std::max(max_row_prop, row_prop.first); + } + if (d_->cell_map_.empty()) + { + return range_reference(constants::min_column(), min_row_prop, + constants::min_column(), max_row_prop); + } + // find min and max row/column in cell map + column_t min_col = constants::max_column(); + column_t max_col = constants::min_column(); + row_t min_row = min_row_prop; + row_t max_row = max_row_prop; + for (auto &c : d_->cell_map_) + { + min_col = std::min(min_col, c.second.column_); + max_col = std::max(max_col, c.second.column_); + min_row = std::min(min_row, c.second.row_); + max_row = std::max(max_row, c.second.row_); + } + return range_reference(min_col, min_row, max_col, max_row); } range worksheet::range(const std::string &reference_string) From e8cb8d9bc6f4ac3ce93c77f63a435f0081eefaf0 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sun, 1 Mar 2020 23:23:20 +1300 Subject: [PATCH 12/13] fix compiler error --- include/xlnt/utils/numeric.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xlnt/utils/numeric.hpp b/include/xlnt/utils/numeric.hpp index 6171ec13..7ccb69ae 100644 --- a/include/xlnt/utils/numeric.hpp +++ b/include/xlnt/utils/numeric.hpp @@ -163,7 +163,7 @@ public: return strtod(s.c_str(), nullptr); } char buf[30]; - assert(s.size() < std::size(buf)); + assert(s.size() < sizeof(buf)); auto copy_end = std::copy(s.begin(), s.end(), buf); convert_pt_to_comma(buf, static_cast(copy_end - buf)); return strtod(buf, nullptr); From f4d00acb9f7dd4b738e337ff2c346592cc3f47ea Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Mon, 2 Mar 2020 13:32:39 +1300 Subject: [PATCH 13/13] resolve warnings --- include/xlnt/utils/numeric.hpp | 2 +- include/xlnt/worksheet/range_reference.hpp | 5 ----- source/worksheet/range_reference.cpp | 7 ------- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/include/xlnt/utils/numeric.hpp b/include/xlnt/utils/numeric.hpp index 7ccb69ae..cbf03094 100644 --- a/include/xlnt/utils/numeric.hpp +++ b/include/xlnt/utils/numeric.hpp @@ -141,7 +141,7 @@ public: { convert_comma_to_pt(buf, len); } - return std::string(buf, len); + return std::string(buf, static_cast(len)); } double deserialise(std::string &s) const noexcept diff --git a/include/xlnt/worksheet/range_reference.hpp b/include/xlnt/worksheet/range_reference.hpp index 9e12638d..907854a8 100644 --- a/include/xlnt/worksheet/range_reference.hpp +++ b/include/xlnt/worksheet/range_reference.hpp @@ -149,11 +149,6 @@ public: /// bool operator!=(const char *reference_string) const; - /// - /// Assigns the extents of the provided range to this range. - /// - range_reference &operator=(const range_reference &ref); - private: /// /// The top left cell in the range diff --git a/source/worksheet/range_reference.cpp b/source/worksheet/range_reference.cpp index 54033b64..490cac55 100644 --- a/source/worksheet/range_reference.cpp +++ b/source/worksheet/range_reference.cpp @@ -177,11 +177,4 @@ XLNT_API bool operator!=(const char *reference_string, const range_reference &re return ref != reference_string; } -range_reference &range_reference::operator=(const range_reference &ref) -{ - top_left_ = ref.top_left_; - bottom_right_ = ref.bottom_right_; - return *this; -} - } // namespace xlnt