mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
fix dumb bug in input randomiser, add basic double->string benchmarks
* input randomiser was feeding a constant value previously, now actually randomising * start to_string with the current method (sstream), an faster more correct version (sstream_cached), snprintf, and std::to_chars ** NOTE: only std::to_chars and sstream_cached are correct in the face of locales Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------------------- RandFloatStrs/double_from_string_sstream 1012 ns 1001 ns 640000 RandFloatStrs/double_from_string_strtod 276 ns 276 ns 2488889 RandFloatStrs/double_from_string_strtod_fixed 312 ns 308 ns 2133333 RandFloatStrs/double_from_string_strtod_fixed_const_ref 307 ns 300 ns 2240000 RandFloatStrs/double_from_string_std_from_chars 194 ns 188 ns 3733333 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_ref 315 ns 314 ns 2240000 RandFloatCommaStrs/double_from_string_strtod_fixed_comma_const_ref 306 ns 305 ns 2357895 RandFloats/string_from_double_sstream 1372 ns 1381 ns 497778 RandFloats/string_from_double_sstream_cached 1136 ns 1123 ns 640000 RandFloats/string_from_double_snprintf 536 ns 516 ns 1000000 RandFloats/string_from_double_std_to_chars 116 ns 115 ns 6400000
This commit is contained in:
parent
0adb8a69b1
commit
7ba36b5e73
|
@ -1,2 +1,151 @@
|
|||
#include "benchmark/benchmark.h"
|
||||
// A core part of the xlsx serialisation routine is taking doubles from memory and stringifying them
|
||||
// this has a few requirements
|
||||
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
|
||||
// - outputs up to 15 significant figures (excel only serialises numbers up to 15sf)
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include <locale>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
|
||||
namespace {
|
||||
|
||||
// setup a large quantity of random doubles as strings
|
||||
template <bool Decimal_Locale = true>
|
||||
class RandomFloats : public benchmark::Fixture
|
||||
{
|
||||
static constexpr size_t Number_of_Elements = 1 << 20;
|
||||
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");
|
||||
|
||||
std::vector<double> inputs;
|
||||
|
||||
size_t index = 0;
|
||||
const char *locale_str = nullptr;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State &state)
|
||||
{
|
||||
if (Decimal_Locale)
|
||||
{
|
||||
locale_str = setlocale(LC_ALL, "C");
|
||||
}
|
||||
else
|
||||
{
|
||||
locale_str = setlocale(LC_ALL, "de-DE");
|
||||
}
|
||||
std::random_device rd; // obtain a seed for the random number engine
|
||||
std::mt19937 gen(rd());
|
||||
// doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers
|
||||
// uniform is probably not the best distribution to use here, but it will do for now
|
||||
std::uniform_real_distribution<double> dis(-1'000, 1'000);
|
||||
// generate a large quantity of doubles to deserialise
|
||||
inputs.reserve(Number_of_Elements);
|
||||
for (int i = 0; i < Number_of_Elements; ++i)
|
||||
{
|
||||
double d = dis(gen);
|
||||
inputs.push_back(d);
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State &state)
|
||||
{
|
||||
// restore locale
|
||||
setlocale(LC_ALL, locale_str);
|
||||
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
|
||||
inputs = std::vector<double>{};
|
||||
}
|
||||
|
||||
double &get_rand()
|
||||
{
|
||||
return inputs[++index & (Number_of_Elements - 1)];
|
||||
}
|
||||
};
|
||||
|
||||
/// Takes in a double and outputs a string form of that number which will
|
||||
/// serialise and deserialise without loss of precision
|
||||
std::string serialize_number_to_string(double num)
|
||||
{
|
||||
// more digits and excel won't match
|
||||
constexpr int Excel_Digit_Precision = 15; //sf
|
||||
std::stringstream ss;
|
||||
ss.precision(Excel_Digit_Precision);
|
||||
ss << num;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
class number_serialiser
|
||||
{
|
||||
static constexpr int Excel_Digit_Precision = 15; //sf
|
||||
std::ostringstream ss;
|
||||
|
||||
public:
|
||||
explicit number_serialiser()
|
||||
{
|
||||
ss.precision(Excel_Digit_Precision);
|
||||
ss.imbue(std::locale("C"));
|
||||
}
|
||||
|
||||
std::string serialise(double d)
|
||||
{
|
||||
ss.str(""); // reset string buffer
|
||||
ss.clear(); // reset any error flags
|
||||
ss << d;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
using RandFloats = RandomFloats<true>;
|
||||
} // namespace
|
||||
|
||||
BENCHMARK_F(RandFloats, string_from_double_sstream)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
serialize_number_to_string(get_rand()));
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_F(RandFloats, string_from_double_sstream_cached)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_serialiser ser;
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
ser.serialise(get_rand()));
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_F(RandFloats, string_from_double_snprintf)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
char buf[16];
|
||||
int len = snprintf(buf, sizeof(buf), "%16f", get_rand());
|
||||
|
||||
benchmark::DoNotOptimize(
|
||||
std::string(buf, len));
|
||||
}
|
||||
}
|
||||
|
||||
// locale names are different between OS's, and std::from_chars is only complete in MSVC
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <charconv>
|
||||
BENCHMARK_F(RandFloats, string_from_double_std_to_chars)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
char buf[16];
|
||||
std::to_chars_result result = std::to_chars(buf, buf + std::size(buf), get_rand());
|
||||
|
||||
benchmark::DoNotOptimize(
|
||||
std::string(buf, result.ptr));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,15 +1,18 @@
|
|||
// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double
|
||||
// this has a few requirements
|
||||
// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
|
||||
// - expect strings in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator)
|
||||
// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf)
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <locale>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
|
||||
namespace {
|
||||
|
||||
// setup a large quantity of random doubles as strings
|
||||
template <bool Decimal_Locale = true>
|
||||
class RandomFloats : public benchmark::Fixture
|
||||
class RandomFloatStrs : public benchmark::Fixture
|
||||
{
|
||||
static constexpr size_t Number_of_Elements = 1 << 20;
|
||||
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated");
|
||||
|
@ -17,7 +20,7 @@ class RandomFloats : public benchmark::Fixture
|
|||
std::vector<std::string> inputs;
|
||||
|
||||
size_t index = 0;
|
||||
const char *locale_str;
|
||||
const char *locale_str = nullptr;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State &state)
|
||||
|
@ -50,19 +53,18 @@ public:
|
|||
{
|
||||
// restore locale
|
||||
setlocale(LC_ALL, locale_str);
|
||||
// gbench is keeping the fixtures alive somewhere, need to clear the data...
|
||||
// gbench is keeping the fixtures alive somewhere, need to clear the data after use
|
||||
inputs = std::vector<std::string>{};
|
||||
}
|
||||
|
||||
std::string &get_rand()
|
||||
{
|
||||
return inputs[++index & Number_of_Elements];
|
||||
return inputs[++index & (Number_of_Elements - 1)];
|
||||
}
|
||||
};
|
||||
|
||||
// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf
|
||||
// std::istringstream with the locale set to "C"
|
||||
#include <sstream>
|
||||
struct number_converter
|
||||
{
|
||||
number_converter()
|
||||
|
@ -82,32 +84,6 @@ struct number_converter
|
|||
double result;
|
||||
};
|
||||
|
||||
using RandFloats = RandomFloats<true>;
|
||||
|
||||
BENCHMARK_F(RandFloats, double_from_string_sstream)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter converter;
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
converter.stold(get_rand()));
|
||||
}
|
||||
}
|
||||
|
||||
// using strotod
|
||||
// https://en.cppreference.com/w/cpp/string/byte/strtof
|
||||
// this naive usage is broken in the face of locales (fails condition 1)
|
||||
#include <cstdlib>
|
||||
BENCHMARK_F(RandFloats, double_from_string_strtod)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
strtod(get_rand().c_str(), nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
// to resolve the locale issue with strtod, a little preprocessing of the input is required
|
||||
struct number_converter_mk2
|
||||
|
@ -151,7 +127,37 @@ private:
|
|||
bool should_convert_to_comma = false;
|
||||
};
|
||||
|
||||
BENCHMARK_F(RandFloats, double_from_string_strtod_fixed)
|
||||
using RandFloatStrs = RandomFloatStrs<true>;
|
||||
// german locale uses ',' as the seperator
|
||||
using RandFloatCommaStrs = RandomFloatStrs<false>;
|
||||
} // namespace
|
||||
|
||||
BENCHMARK_F(RandFloatStrs, double_from_string_sstream)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter converter;
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
converter.stold(get_rand()));
|
||||
}
|
||||
}
|
||||
|
||||
// using strotod
|
||||
// https://en.cppreference.com/w/cpp/string/byte/strtof
|
||||
// this naive usage is broken in the face of locales (fails condition 1)
|
||||
#include <cstdlib>
|
||||
BENCHMARK_F(RandFloatStrs, double_from_string_strtod)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
{
|
||||
benchmark::DoNotOptimize(
|
||||
strtod(get_rand().c_str(), nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter_mk2 converter;
|
||||
|
@ -162,7 +168,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed)
|
|||
}
|
||||
}
|
||||
|
||||
BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref)
|
||||
BENCHMARK_F(RandFloatStrs, double_from_string_strtod_fixed_const_ref)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter_mk2 converter;
|
||||
|
@ -178,7 +184,7 @@ BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref)
|
|||
#ifdef _MSC_VER
|
||||
|
||||
#include <charconv>
|
||||
BENCHMARK_F(RandFloats, double_from_string_std_from_chars)
|
||||
BENCHMARK_F(RandFloatStrs, double_from_string_std_from_chars)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
while (state.KeepRunning())
|
||||
|
@ -191,9 +197,7 @@ BENCHMARK_F(RandFloats, double_from_string_std_from_chars)
|
|||
}
|
||||
|
||||
// not using the standard "C" locale with '.' seperator
|
||||
// german locale uses ',' as the seperator
|
||||
using RandFloatsComma = RandomFloats<false>;
|
||||
BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref)
|
||||
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_ref)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter_mk2 converter;
|
||||
|
@ -204,7 +208,7 @@ BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref)
|
|||
}
|
||||
}
|
||||
|
||||
BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref)
|
||||
BENCHMARK_F(RandFloatCommaStrs, double_from_string_strtod_fixed_comma_const_ref)
|
||||
(benchmark::State &state)
|
||||
{
|
||||
number_converter_mk2 converter;
|
||||
|
|
Loading…
Reference in New Issue
Block a user