move the simplified cell_reference and cell structs out to a header

the standard xlnt::cell and xlnt::cell_reference have plenty of extra functionality that just slows things down during (de)serialisation
These intermediate structs can be used to minimise overhead before transforming to the final type
This commit is contained in:
JCrawfy 2020-03-02 14:28:37 +13:00
parent 1069c17fbe
commit 9136d21845
2 changed files with 103 additions and 66 deletions

View File

@ -0,0 +1,96 @@
#ifndef XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#define XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#include <xlnt/cell/cell_type.hpp>
#include <xlnt/cell/index_types.hpp>
#include <string>
namespace xlnt {
namespace detail {
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during (de)serialisation, so even minor overhead is noticable
struct Cell_Reference
{
// the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
// for sorting purposes
bool operator<(const Cell_Reference &rhs)
{
// row first, serialisation is done by row then column
if (row < rhs.row)
{
return true;
}
else if (rhs.row < row)
{
return false;
}
// same row, column comparison
return column < rhs.column;
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
// sort cells by location, row first
bool operator<(const Cell &rhs)
{
return ref < rhs.ref;
}
bool is_phonetic = false; // 'ph'
xlnt::cell_type type = xlnt::cell_type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
} // namespace detail
} // namespace xlnt
#endif

View File

@ -40,6 +40,7 @@
#include <detail/header_footer/header_footer_code.hpp> #include <detail/header_footer/header_footer_code.hpp>
#include <detail/implementations/workbook_impl.hpp> #include <detail/implementations/workbook_impl.hpp>
#include <detail/serialization/custom_value_traits.hpp> #include <detail/serialization/custom_value_traits.hpp>
#include <detail/serialization/serialisation_helpers.hpp>
#include <detail/serialization/vector_streambuf.hpp> #include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_consumer.hpp> #include <detail/serialization/xlsx_consumer.hpp>
#include <detail/serialization/zstream.hpp> #include <detail/serialization/zstream.hpp>
@ -127,74 +128,14 @@ void set_style_by_xfid(const std::vector<style_id_pair> &styles,
} }
} }
/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map
/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during parsing, so even minor overhead is noticable
struct Cell_Reference
{
// not commonly used, added as the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}
// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}
xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};
// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
bool is_phonetic = false; // 'ph'
xlnt::cell::type type = xlnt::cell::type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};
// <sheetData> element // <sheetData> element
struct Sheet_Data struct Sheet_Data
{ {
std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows; std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
std::vector<Cell> parsed_cells; std::vector<xlnt::detail::Cell> parsed_cells;
}; };
xlnt::cell::type type_from_string(const std::string &str) xlnt::cell_type type_from_string(const std::string &str)
{ {
if (string_equal(str, "s")) if (string_equal(str, "s"))
{ {
@ -223,14 +164,14 @@ xlnt::cell::type type_from_string(const std::string &str)
return xlnt::cell::type::shared_string; return xlnt::cell::type::shared_string;
} }
Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser) xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
{ {
Cell c; xlnt::detail::Cell c;
for (auto &attr : parser->attribute_map()) for (auto &attr : parser->attribute_map())
{ {
if (string_equal(attr.first.name(), "r")) if (string_equal(attr.first.name(), "r"))
{ {
c.ref = Cell_Reference(row_arg, attr.second.value); c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
} }
else if (string_equal(attr.first.name(), "t")) else if (string_equal(attr.first.name(), "t"))
{ {
@ -307,7 +248,7 @@ Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
} }
// <row> inside <sheetData> element // <row> inside <sheetData> element
std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<Cell> &parsed_cells) std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells)
{ {
std::pair<xlnt::row_properties, int> props; std::pair<xlnt::row_properties, int> props;
for (auto &attr : parser->attribute_map()) for (auto &attr : parser->attribute_map())