mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
xLnt. shared string performance optimization.
This commit is contained in:
parent
cb55735644
commit
403605a536
|
@ -130,4 +130,18 @@ private:
|
||||||
std::vector<rich_text_run> runs_;
|
std::vector<rich_text_run> runs_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class XLNT_API rich_text_hash
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
std::size_t operator()(const rich_text& k) const
|
||||||
|
{
|
||||||
|
std::size_t res = 0;
|
||||||
|
|
||||||
|
for (auto r : k.runs())
|
||||||
|
res ^= std::hash<std::string>()(r.first);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace xlnt
|
} // namespace xlnt
|
||||||
|
|
|
@ -31,8 +31,10 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include <xlnt/xlnt_config.hpp>
|
#include <xlnt/xlnt_config.hpp>
|
||||||
|
#include <xlnt/cell/rich_text.hpp>
|
||||||
|
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
|
|
||||||
|
@ -704,17 +706,27 @@ public:
|
||||||
/// </summary>
|
/// </summary>
|
||||||
std::size_t add_shared_string(const rich_text &shared, bool allow_duplicates = false);
|
std::size_t add_shared_string(const rich_text &shared, bool allow_duplicates = false);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns a reference to the shared strings being used by cells
|
/// Returns a reference to the shared string ordered by id
|
||||||
/// in this workbook.
|
/// </summary>
|
||||||
/// </summary>
|
const std::map<std::size_t, rich_text> &workbook::shared_strings_by_id() const;
|
||||||
std::vector<rich_text> &shared_strings();
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a reference to the shared string related to the specified index
|
||||||
|
/// </summary>
|
||||||
|
const rich_text& workbook::shared_strings(std::size_t index) const;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Returns a reference to the shared strings being used by cells
|
/// Returns a reference to the shared strings being used by cells
|
||||||
/// in this workbook.
|
/// in this workbook.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
const std::vector<rich_text> &shared_strings() const;
|
std::unordered_map<rich_text, std::size_t, rich_text_hash> &shared_strings();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a reference to the shared strings being used by cells
|
||||||
|
/// in this workbook.
|
||||||
|
/// </summary>
|
||||||
|
const std::unordered_map<rich_text, std::size_t, rich_text_hash> &shared_strings() const;
|
||||||
|
|
||||||
// Thumbnail
|
// Thumbnail
|
||||||
|
|
||||||
|
|
|
@ -642,7 +642,7 @@ XLNT_API rich_text cell::value() const
|
||||||
{
|
{
|
||||||
if (data_type() == cell::type::shared_string)
|
if (data_type() == cell::type::shared_string)
|
||||||
{
|
{
|
||||||
return workbook().shared_strings().at(static_cast<std::size_t>(d_->value_numeric_));
|
return workbook().shared_strings(static_cast<std::size_t>(d_->value_numeric_));
|
||||||
}
|
}
|
||||||
|
|
||||||
return d_->value_text_;
|
return d_->value_text_;
|
||||||
|
|
|
@ -27,7 +27,10 @@ namespace xlnt {
|
||||||
|
|
||||||
bool rich_text_run::operator<(const rich_text_run &other) const
|
bool rich_text_run::operator<(const rich_text_run &other) const
|
||||||
{
|
{
|
||||||
return first < other.first && second < other.second;
|
if (first != other.first)
|
||||||
|
return first < other.first;
|
||||||
|
|
||||||
|
return second < other.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool rich_text_run::operator==(const rich_text_run &other) const
|
bool rich_text_run::operator==(const rich_text_run &other) const
|
||||||
|
|
|
@ -53,7 +53,8 @@ struct workbook_impl
|
||||||
workbook_impl(const workbook_impl &other)
|
workbook_impl(const workbook_impl &other)
|
||||||
: active_sheet_index_(other.active_sheet_index_),
|
: active_sheet_index_(other.active_sheet_index_),
|
||||||
worksheets_(other.worksheets_),
|
worksheets_(other.worksheets_),
|
||||||
shared_strings_(other.shared_strings_),
|
shared_strings_ids_(other.shared_strings_ids_),
|
||||||
|
shared_strings_values_(other.shared_strings_values_),
|
||||||
stylesheet_(other.stylesheet_),
|
stylesheet_(other.stylesheet_),
|
||||||
manifest_(other.manifest_),
|
manifest_(other.manifest_),
|
||||||
theme_(other.theme_),
|
theme_(other.theme_),
|
||||||
|
@ -71,8 +72,8 @@ struct workbook_impl
|
||||||
active_sheet_index_ = other.active_sheet_index_;
|
active_sheet_index_ = other.active_sheet_index_;
|
||||||
worksheets_.clear();
|
worksheets_.clear();
|
||||||
std::copy(other.worksheets_.begin(), other.worksheets_.end(), back_inserter(worksheets_));
|
std::copy(other.worksheets_.begin(), other.worksheets_.end(), back_inserter(worksheets_));
|
||||||
shared_strings_.clear();
|
shared_strings_ids_ = other.shared_strings_ids_;
|
||||||
std::copy(other.shared_strings_.begin(), other.shared_strings_.end(), std::back_inserter(shared_strings_));
|
shared_strings_values_ = other.shared_strings_values_;
|
||||||
theme_ = other.theme_;
|
theme_ = other.theme_;
|
||||||
manifest_ = other.manifest_;
|
manifest_ = other.manifest_;
|
||||||
|
|
||||||
|
@ -91,7 +92,8 @@ struct workbook_impl
|
||||||
optional<std::size_t> active_sheet_index_;
|
optional<std::size_t> active_sheet_index_;
|
||||||
|
|
||||||
std::list<worksheet_impl> worksheets_;
|
std::list<worksheet_impl> worksheets_;
|
||||||
std::vector<rich_text> shared_strings_;
|
std::unordered_map<rich_text, std::size_t, rich_text_hash> shared_strings_ids_;
|
||||||
|
std::map<std::size_t, rich_text> shared_strings_values_;
|
||||||
|
|
||||||
optional<stylesheet> stylesheet_;
|
optional<stylesheet> stylesheet_;
|
||||||
|
|
||||||
|
|
|
@ -1675,18 +1675,17 @@ void xlsx_consumer::read_shared_string_table()
|
||||||
unique_count = parser().attribute<std::size_t>("uniqueCount");
|
unique_count = parser().attribute<std::size_t>("uniqueCount");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &strings = target_.shared_strings();
|
|
||||||
|
|
||||||
while (in_element(qn("spreadsheetml", "sst")))
|
while (in_element(qn("spreadsheetml", "sst")))
|
||||||
{
|
{
|
||||||
expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
|
expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
|
||||||
strings.push_back(read_rich_text(qn("spreadsheetml", "si")));
|
auto rt = read_rich_text(qn("spreadsheetml", "si"));
|
||||||
|
target_.add_shared_string(rt);
|
||||||
expect_end_element(qn("spreadsheetml", "si"));
|
expect_end_element(qn("spreadsheetml", "si"));
|
||||||
}
|
}
|
||||||
|
|
||||||
expect_end_element(qn("spreadsheetml", "sst"));
|
expect_end_element(qn("spreadsheetml", "sst"));
|
||||||
|
|
||||||
if (has_unique_count && unique_count != strings.size())
|
if (has_unique_count && unique_count != target_.shared_strings().size())
|
||||||
{
|
{
|
||||||
throw invalid_file("sizes don't match");
|
throw invalid_file("sizes don't match");
|
||||||
}
|
}
|
||||||
|
|
|
@ -818,20 +818,20 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
|
||||||
#pragma clang diagnostic pop
|
#pragma clang diagnostic pop
|
||||||
|
|
||||||
write_attribute("count", string_count);
|
write_attribute("count", string_count);
|
||||||
write_attribute("uniqueCount", source_.shared_strings().size());
|
write_attribute("uniqueCount", source_.shared_strings_by_id().size());
|
||||||
|
|
||||||
auto has_trailing_whitespace = [](const std::string &s)
|
auto has_trailing_whitespace = [](const std::string &s)
|
||||||
{
|
{
|
||||||
return !s.empty() && (s.front() == ' ' || s.back() == ' ');
|
return !s.empty() && (s.front() == ' ' || s.back() == ' ');
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto &string : source_.shared_strings())
|
for (const auto &string : source_.shared_strings_by_id())
|
||||||
{
|
{
|
||||||
if (string.runs().size() == 1 && !string.runs().at(0).second.is_set())
|
if (string.second.runs().size() == 1 && !string.second.runs().at(0).second.is_set())
|
||||||
{
|
{
|
||||||
write_start_element(xmlns, "si");
|
write_start_element(xmlns, "si");
|
||||||
write_start_element(xmlns, "t");
|
write_start_element(xmlns, "t");
|
||||||
write_characters(string.plain_text(), has_trailing_whitespace(string.plain_text()));
|
write_characters(string.second.plain_text(), has_trailing_whitespace(string.second.plain_text()));
|
||||||
write_end_element(xmlns, "t");
|
write_end_element(xmlns, "t");
|
||||||
write_end_element(xmlns, "si");
|
write_end_element(xmlns, "si");
|
||||||
|
|
||||||
|
@ -840,7 +840,7 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
|
||||||
|
|
||||||
write_start_element(xmlns, "si");
|
write_start_element(xmlns, "si");
|
||||||
|
|
||||||
for (const auto &run : string.runs())
|
for (const auto &run : string.second.runs())
|
||||||
{
|
{
|
||||||
write_start_element(xmlns, "r");
|
write_start_element(xmlns, "r");
|
||||||
|
|
||||||
|
|
|
@ -1251,39 +1251,47 @@ const manifest &workbook::manifest() const
|
||||||
return d_->manifest_;
|
return d_->manifest_;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<rich_text> &workbook::shared_strings()
|
const std::map<std::size_t, rich_text> &workbook::shared_strings_by_id() const
|
||||||
{
|
{
|
||||||
return d_->shared_strings_;
|
return d_->shared_strings_values_;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<rich_text> &workbook::shared_strings() const
|
const rich_text& workbook::shared_strings(std::size_t index) const
|
||||||
{
|
{
|
||||||
return d_->shared_strings_;
|
auto it = d_->shared_strings_values_.find(index);
|
||||||
|
if (it != d_->shared_strings_values_.end())
|
||||||
|
return it->second;
|
||||||
|
|
||||||
|
static rich_text empty;
|
||||||
|
return empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<rich_text, std::size_t, rich_text_hash> &workbook::shared_strings()
|
||||||
|
{
|
||||||
|
return d_->shared_strings_ids_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unordered_map<rich_text, std::size_t, rich_text_hash> &workbook::shared_strings() const
|
||||||
|
{
|
||||||
|
return d_->shared_strings_ids_;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t workbook::add_shared_string(const rich_text &shared, bool allow_duplicates)
|
std::size_t workbook::add_shared_string(const rich_text &shared, bool allow_duplicates)
|
||||||
{
|
{
|
||||||
register_workbook_part(relationship_type::shared_string_table);
|
register_workbook_part(relationship_type::shared_string_table);
|
||||||
|
|
||||||
auto index = std::size_t(0);
|
|
||||||
|
|
||||||
if (!allow_duplicates)
|
if (!allow_duplicates)
|
||||||
{
|
{
|
||||||
// TODO: inefficient, use a set or something?
|
auto it = d_->shared_strings_ids_.find(shared);
|
||||||
for (auto &s : d_->shared_strings_)
|
if (it != d_->shared_strings_ids_.end())
|
||||||
{
|
return it->second;
|
||||||
if (s == shared)
|
|
||||||
{
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
++index;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
d_->shared_strings_.push_back(shared);
|
auto sz = d_->shared_strings_ids_.size();
|
||||||
|
d_->shared_strings_ids_[shared] = sz;
|
||||||
|
d_->shared_strings_values_[sz] = shared;
|
||||||
|
|
||||||
return index;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool workbook::contains(const std::string &sheet_title) const
|
bool workbook::contains(const std::string &sheet_title) const
|
||||||
|
|
Loading…
Reference in New Issue
Block a user