diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp index 7b426ef1..01efa9ef 100644 --- a/benchmarks/microbenchmarks/double_to_string.cpp +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -1,2 +1,151 @@ -#include "benchmark/benchmark.h" +// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them +// this has a few requirements +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf) +#include "benchmark/benchmark.h" +#include +#include +#include + +namespace { + +// setup a large quantity of random doubles as strings +template +class RandomFloats : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str = nullptr; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + inputs.push_back(d); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data after use + inputs = std::vector{}; + } + + double &get_rand() + { + return inputs[++index & (Number_of_Elements - 1)]; + } +}; + +/// Takes in a double and outputs a string form of that number which will +/// serialise and deserialise without loss of precision +std::string serialize_number_to_string(double num) +{ + // more digits and excel won't match + constexpr int Excel_Digit_Precision = 15; //sf + std::stringstream ss; + ss.precision(Excel_Digit_Precision); + ss << num; + return ss.str(); +} + +class number_serialiser +{ + static constexpr int Excel_Digit_Precision = 15; //sf + std::ostringstream ss; + +public: + explicit number_serialiser() + { + ss.precision(Excel_Digit_Precision); + ss.imbue(std::locale("C")); + } + + std::string serialise(double d) + { + ss.str(""); // reset string buffer + ss.clear(); // reset any error flags + ss << d; + return ss.str(); + } +}; + +using RandFloats = RandomFloats; +} // namespace + +BENCHMARK_F(RandFloats, string_from_double_sstream) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + serialize_number_to_string(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_sstream_cached) +(benchmark::State &state) +{ + number_serialiser ser; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + ser.serialise(get_rand())); + } +} + +BENCHMARK_F(RandFloats, string_from_double_snprintf) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + int len = snprintf(buf, sizeof(buf), "%16f", get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, len)); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloats, string_from_double_std_to_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + char buf[16]; + std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand()); + + benchmark::DoNotOptimize( + std::string(buf, result.ptr)); + } +} + +#endif \ No newline at end of file diff --git a/benchmarks/microbenchmarks/string_to_double.cpp b/benchmarks/microbenchmarks/string_to_double.cpp index 58736e7a..5392f0a2 100644 --- a/benchmarks/microbenchmarks/string_to_double.cpp +++ b/benchmarks/microbenchmarks/string_to_double.cpp @@ -1,15 +1,18 @@ // A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double // this has a few requirements -// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) // - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) #include #include #include +#include + +namespace { // setup a large quantity of random doubles as strings template -class RandomFloats : public benchmark::Fixture +class RandomFloatStrs : public benchmark::Fixture { static constexpr size_t Number_of_Elements = 1 << 20; static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); @@ -17,7 +20,7 @@ class RandomFloats : public benchmark::Fixture std::vector inputs; size_t index = 0; - const char *locale_str; + const char *locale_str = nullptr; public: void SetUp(const ::benchmark::State &state) @@ -50,19 +53,18 @@ public: { // restore locale setlocale(LC_ALL, locale_str); - // gbench is keeping the fixtures alive somewhere, need to clear the data... + // gbench is keeping the fixtures alive somewhere, need to clear the data after use inputs = std::vector{}; } std::string &get_rand() { - return inputs[++index & Number_of_Elements]; + return inputs[++index & (Number_of_Elements - 1)]; } }; // method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf // std::istringstream with the locale set to "C" -#include struct number_converter { number_converter() @@ -82,32 +84,6 @@ struct number_converter double result; }; -using RandFloats = RandomFloats; - -BENCHMARK_F(RandFloats, double_from_string_sstream) -(benchmark::State &state) -{ - number_converter converter; - while (state.KeepRunning()) - { - benchmark::DoNotOptimize( - converter.stold(get_rand())); - } -} - -// using strotod -// https://en.cppreference.com/w/cpp/string/byte/strtof -// this naive usage is broken in the face of locales (fails condition 1) -#include -BENCHMARK_F(RandFloats, double_from_string_strtod) -(benchmark::State &state) -{ - while (state.KeepRunning()) - { - benchmark::DoNotOptimize( - strtod(get_rand().c_str(), nullptr)); - } -} // to resolve the locale issue with strtod, a little preprocessing of the input is required struct number_converter_mk2 @@ -151,7 +127,37 @@ private: bool should_convert_to_comma = false; }; -BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) +using RandFloatStrs = RandomFloatStrs; +// german locale uses ',' as the seperator +using RandFloatCommaStrs = RandomFloatStrs; +} // namespace + +BENCHMARK_F(RandFloatStrs, double_from_string_sstream) +(benchmark::State &state) +{ + number_converter converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +// using strotod +// https://en.cppreference.com/w/cpp/string/byte/strtof +// this naive usage is broken in the face of locales (fails condition 1) +#include +BENCHMARK_F(RandFloatStrs, double_from_string_strtod) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + strtod(get_rand().c_str(), nullptr)); + } +} + +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed) (benchmark::State &state) { number_converter_mk2 converter; @@ -162,7 +168,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) } } -BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) +BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref) (benchmark::State &state) { number_converter_mk2 converter; @@ -178,7 +184,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) #ifdef _MSC_VER #include -BENCHMARK_F(RandFloats, double_from_string_std_from_chars) +BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars) (benchmark::State &state) { while (state.KeepRunning()) @@ -191,9 +197,7 @@ BENCHMARK_F(RandFloats, double_from_string_std_from_chars) } // not using the standard "C" locale with '.' seperator -// german locale uses ',' as the seperator -using RandFloatsComma = RandomFloats; -BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref) (benchmark::State &state) { number_converter_mk2 converter; @@ -204,7 +208,7 @@ BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) } } -BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref) +BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref) (benchmark::State &state) { number_converter_mk2 converter;