From 0adb8a69b1a1c6f9ebe9cb285be16a7caef62467 Mon Sep 17 00:00:00 2001 From: JCrawfy Date: Sat, 29 Feb 2020 22:11:31 +1300 Subject: [PATCH] add micro benchmarking project, setup number parsing benchmark relating to previous work https://github.com/tfussell/xlnt/issues/422 Results are matching what was observed at the time ^^ was being worked on std::from_chars is included as the target to beat, but since only MSVC has it for floating point it's not hugely useful yet uniform real distribution is probably a horrible choice, and it might be good to randomise the number of sf in each string also (currently the y all end up at max length) Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- RandFloats/double_from_string_sstream 804 ns 820 ns 896000 RandFloats/double_from_string_strtod 163 ns 162 ns 5973333 RandFloats/double_from_string_strtod_fixed 175 ns 172 ns 5352107 RandFloats/double_from_string_strtod_fixed_const_ref 150 ns 152 ns 5352107 RandFloats/double_from_string_std_from_chars 87.1 ns 88.3 ns 9557333 RandFloatsComma/double_from_string_strtod_fixed_comma_ref 172 ns 173 ns 5146257 RandFloatsComma/double_from_string_strtod_fixed_comma_const_ref 180 ns 175 ns 5352107 --- benchmarks/CMakeLists.txt | 5 + benchmarks/microbenchmarks/CMakeLists.txt | 36 +++ .../microbenchmarks/double_to_string.cpp | 2 + .../microbenchmarks/string_to_double.cpp | 219 ++++++++++++++++++ 4 files changed, 262 insertions(+) create mode 100644 benchmarks/microbenchmarks/CMakeLists.txt create mode 100644 benchmarks/microbenchmarks/double_to_string.cpp create mode 100644 benchmarks/microbenchmarks/string_to_double.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4ab93935..3c00ccda 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -41,3 +41,8 @@ foreach(BENCHMARK_SOURCE IN ITEMS ${BENCHMARK_SOURCES}) $) endif() endforeach() + +option(XLNT_MICROBENCH_ENABLED "Enable small benchmarks typically used for development" OFF) +if (XLNT_MICROBENCH_ENABLED) + add_subdirectory(microbenchmarks) +endif() \ No newline at end of file diff --git a/benchmarks/microbenchmarks/CMakeLists.txt b/benchmarks/microbenchmarks/CMakeLists.txt new file mode 100644 index 00000000..745df1b8 --- /dev/null +++ b/benchmarks/microbenchmarks/CMakeLists.txt @@ -0,0 +1,36 @@ +# FetchContent added in cmake v3.11 +# https://cmake.org/cmake/help/v3.11/module/FetchContent.html +# this file is behind a feature flag (XLNT_MICROBENCH_ENABLED) so the primary build is not affected +cmake_minimum_required(VERSION 3.11) +project(xlnt_ubench) + +# acquire google benchmark dependency +# disable generation of the various test projects +set(BENCHMARK_ENABLE_TESTING OFF) +# gtest not required +set(BENCHMARK_ENABLE_GTEST_TESTS OFF) + +include(FetchContent) +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark + GIT_TAG v1.5.0 +) +# download if not already present +FetchContent_GetProperties(googlebenchmark) +if(NOT googlebenchmark_POPULATED) + FetchContent_Populate(googlebenchmark) + add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) +endif() +# equivalent of add_subdirectory, now available for use +FetchContent_MakeAvailable(googlebenchmark) + + +add_executable(xlnt_ubench) +target_sources(xlnt_ubench + PRIVATE + string_to_double.cpp + double_to_string.cpp +) +target_link_libraries(xlnt_ubench benchmark_main xlnt) +target_compile_features(xlnt_ubench PRIVATE cxx_std_17) \ No newline at end of file diff --git a/benchmarks/microbenchmarks/double_to_string.cpp b/benchmarks/microbenchmarks/double_to_string.cpp new file mode 100644 index 00000000..7b426ef1 --- /dev/null +++ b/benchmarks/microbenchmarks/double_to_string.cpp @@ -0,0 +1,2 @@ +#include "benchmark/benchmark.h" + diff --git a/benchmarks/microbenchmarks/string_to_double.cpp b/benchmarks/microbenchmarks/string_to_double.cpp new file mode 100644 index 00000000..58736e7a --- /dev/null +++ b/benchmarks/microbenchmarks/string_to_double.cpp @@ -0,0 +1,219 @@ +// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double +// this has a few requirements +// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) +// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) + +#include +#include +#include + +// setup a large quantity of random doubles as strings +template +class RandomFloats : public benchmark::Fixture +{ + static constexpr size_t Number_of_Elements = 1 << 20; + static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); + + std::vector inputs; + + size_t index = 0; + const char *locale_str; + +public: + void SetUp(const ::benchmark::State &state) + { + if (Decimal_Locale) + { + locale_str = setlocale(LC_ALL, "C"); + } + else + { + locale_str = setlocale(LC_ALL, "de-DE"); + } + std::random_device rd; // obtain a seed for the random number engine + std::mt19937 gen(rd()); + // doing full range is stupid (::min/max()...), it just ends up generating very large numbers + // uniform is probably not the best distribution to use here, but it will do for now + std::uniform_real_distribution dis(-1'000, 1'000); + // generate a large quantity of doubles to deserialise + inputs.reserve(Number_of_Elements); + for (int i = 0; i < Number_of_Elements; ++i) + { + double d = dis(gen); + char buf[16]; + snprintf(buf, 16, "%.15f", d); + inputs.push_back(std::string(buf)); + } + } + + void TearDown(const ::benchmark::State &state) + { + // restore locale + setlocale(LC_ALL, locale_str); + // gbench is keeping the fixtures alive somewhere, need to clear the data... + inputs = std::vector{}; + } + + std::string &get_rand() + { + return inputs[++index & Number_of_Elements]; + } +}; + +// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf +// std::istringstream with the locale set to "C" +#include +struct number_converter +{ + number_converter() + { + stream.imbue(std::locale("C")); + } + + double stold(const std::string &s) + { + stream.str(s); + stream.clear(); + stream >> result; + return result; + } + + std::istringstream stream; + double result; +}; + +using RandFloats = RandomFloats; + +BENCHMARK_F(RandFloats, double_from_string_sstream) +(benchmark::State &state) +{ + number_converter converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +// using strotod +// https://en.cppreference.com/w/cpp/string/byte/strtof +// this naive usage is broken in the face of locales (fails condition 1) +#include +BENCHMARK_F(RandFloats, double_from_string_strtod) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + strtod(get_rand().c_str(), nullptr)); + } +} + +// to resolve the locale issue with strtod, a little preprocessing of the input is required +struct number_converter_mk2 +{ + explicit number_converter_mk2() + : should_convert_to_comma(std::use_facet>(std::locale{}).decimal_point() == ',') + { + } + + double stold(std::string &s) const noexcept + { + assert(!s.empty()); + if (should_convert_to_comma) + { + auto decimal_pt = std::find(s.begin(), s.end(), '.'); + if (decimal_pt != s.end()) + { + *decimal_pt = ','; + } + } + return strtod(s.c_str(), nullptr); + } + + double stold(const std::string &s) const + { + assert(!s.empty()); + if (!should_convert_to_comma) + { + return strtod(s.c_str(), nullptr); + } + std::string copy(s); + auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); + if (decimal_pt != copy.end()) + { + *decimal_pt = ','; + } + return strtod(copy.c_str(), nullptr); + } + +private: + bool should_convert_to_comma = false; +}; + +BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +// locale names are different between OS's, and std::from_chars is only complete in MSVC +#ifdef _MSC_VER + +#include +BENCHMARK_F(RandFloats, double_from_string_std_from_chars) +(benchmark::State &state) +{ + while (state.KeepRunning()) + { + const std::string &input = get_rand(); + double output; + benchmark::DoNotOptimize( + std::from_chars(input.data(), input.data() + input.size(), output)); + } +} + +// not using the standard "C" locale with '.' seperator +// german locale uses ',' as the seperator +using RandFloatsComma = RandomFloats; +BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + benchmark::DoNotOptimize( + converter.stold(get_rand())); + } +} + +BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref) +(benchmark::State &state) +{ + number_converter_mk2 converter; + while (state.KeepRunning()) + { + const std::string &inp = get_rand(); + benchmark::DoNotOptimize( + converter.stold(inp)); + } +} + +#endif \ No newline at end of file