mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
xLnt. shared string performance optimization.
This commit is contained in:
parent
cb55735644
commit
403605a536
@ -130,4 +130,18 @@ private:
|
||||
std::vector<rich_text_run> runs_;
|
||||
};
|
||||
|
||||
class XLNT_API rich_text_hash
|
||||
{
|
||||
public:
|
||||
std::size_t operator()(const rich_text& k) const
|
||||
{
|
||||
std::size_t res = 0;
|
||||
|
||||
for (auto r : k.runs())
|
||||
res ^= std::hash<std::string>()(r.first);
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xlnt
|
||||
|
@ -31,8 +31,10 @@
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include <xlnt/xlnt_config.hpp>
|
||||
#include <xlnt/cell/rich_text.hpp>
|
||||
|
||||
namespace xlnt {
|
||||
|
||||
@ -704,17 +706,27 @@ public:
|
||||
/// </summary>
|
||||
std::size_t add_shared_string(const rich_text &shared, bool allow_duplicates = false);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a reference to the shared strings being used by cells
|
||||
/// in this workbook.
|
||||
/// </summary>
|
||||
std::vector<rich_text> &shared_strings();
|
||||
/// <summary>
|
||||
/// Returns a reference to the shared string ordered by id
|
||||
/// </summary>
|
||||
const std::map<std::size_t, rich_text> &workbook::shared_strings_by_id() const;
|
||||
|
||||
/// <summary>
|
||||
/// Returns a reference to the shared string related to the specified index
|
||||
/// </summary>
|
||||
const rich_text& workbook::shared_strings(std::size_t index) const;
|
||||
|
||||
/// <summary>
|
||||
/// Returns a reference to the shared strings being used by cells
|
||||
/// in this workbook.
|
||||
/// </summary>
|
||||
const std::vector<rich_text> &shared_strings() const;
|
||||
std::unordered_map<rich_text, std::size_t, rich_text_hash> &shared_strings();
|
||||
|
||||
/// <summary>
|
||||
/// Returns a reference to the shared strings being used by cells
|
||||
/// in this workbook.
|
||||
/// </summary>
|
||||
const std::unordered_map<rich_text, std::size_t, rich_text_hash> &shared_strings() const;
|
||||
|
||||
// Thumbnail
|
||||
|
||||
|
@ -642,7 +642,7 @@ XLNT_API rich_text cell::value() const
|
||||
{
|
||||
if (data_type() == cell::type::shared_string)
|
||||
{
|
||||
return workbook().shared_strings().at(static_cast<std::size_t>(d_->value_numeric_));
|
||||
return workbook().shared_strings(static_cast<std::size_t>(d_->value_numeric_));
|
||||
}
|
||||
|
||||
return d_->value_text_;
|
||||
|
@ -27,7 +27,10 @@ namespace xlnt {
|
||||
|
||||
bool rich_text_run::operator<(const rich_text_run &other) const
|
||||
{
|
||||
return first < other.first && second < other.second;
|
||||
if (first != other.first)
|
||||
return first < other.first;
|
||||
|
||||
return second < other.second;
|
||||
}
|
||||
|
||||
bool rich_text_run::operator==(const rich_text_run &other) const
|
||||
|
@ -53,7 +53,8 @@ struct workbook_impl
|
||||
workbook_impl(const workbook_impl &other)
|
||||
: active_sheet_index_(other.active_sheet_index_),
|
||||
worksheets_(other.worksheets_),
|
||||
shared_strings_(other.shared_strings_),
|
||||
shared_strings_ids_(other.shared_strings_ids_),
|
||||
shared_strings_values_(other.shared_strings_values_),
|
||||
stylesheet_(other.stylesheet_),
|
||||
manifest_(other.manifest_),
|
||||
theme_(other.theme_),
|
||||
@ -71,8 +72,8 @@ struct workbook_impl
|
||||
active_sheet_index_ = other.active_sheet_index_;
|
||||
worksheets_.clear();
|
||||
std::copy(other.worksheets_.begin(), other.worksheets_.end(), back_inserter(worksheets_));
|
||||
shared_strings_.clear();
|
||||
std::copy(other.shared_strings_.begin(), other.shared_strings_.end(), std::back_inserter(shared_strings_));
|
||||
shared_strings_ids_ = other.shared_strings_ids_;
|
||||
shared_strings_values_ = other.shared_strings_values_;
|
||||
theme_ = other.theme_;
|
||||
manifest_ = other.manifest_;
|
||||
|
||||
@ -91,7 +92,8 @@ struct workbook_impl
|
||||
optional<std::size_t> active_sheet_index_;
|
||||
|
||||
std::list<worksheet_impl> worksheets_;
|
||||
std::vector<rich_text> shared_strings_;
|
||||
std::unordered_map<rich_text, std::size_t, rich_text_hash> shared_strings_ids_;
|
||||
std::map<std::size_t, rich_text> shared_strings_values_;
|
||||
|
||||
optional<stylesheet> stylesheet_;
|
||||
|
||||
|
@ -1675,18 +1675,17 @@ void xlsx_consumer::read_shared_string_table()
|
||||
unique_count = parser().attribute<std::size_t>("uniqueCount");
|
||||
}
|
||||
|
||||
auto &strings = target_.shared_strings();
|
||||
|
||||
while (in_element(qn("spreadsheetml", "sst")))
|
||||
{
|
||||
expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
|
||||
strings.push_back(read_rich_text(qn("spreadsheetml", "si")));
|
||||
auto rt = read_rich_text(qn("spreadsheetml", "si"));
|
||||
target_.add_shared_string(rt);
|
||||
expect_end_element(qn("spreadsheetml", "si"));
|
||||
}
|
||||
|
||||
expect_end_element(qn("spreadsheetml", "sst"));
|
||||
|
||||
if (has_unique_count && unique_count != strings.size())
|
||||
if (has_unique_count && unique_count != target_.shared_strings().size())
|
||||
{
|
||||
throw invalid_file("sizes don't match");
|
||||
}
|
||||
|
@ -818,20 +818,20 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
write_attribute("count", string_count);
|
||||
write_attribute("uniqueCount", source_.shared_strings().size());
|
||||
write_attribute("uniqueCount", source_.shared_strings_by_id().size());
|
||||
|
||||
auto has_trailing_whitespace = [](const std::string &s)
|
||||
{
|
||||
return !s.empty() && (s.front() == ' ' || s.back() == ' ');
|
||||
};
|
||||
|
||||
for (const auto &string : source_.shared_strings())
|
||||
for (const auto &string : source_.shared_strings_by_id())
|
||||
{
|
||||
if (string.runs().size() == 1 && !string.runs().at(0).second.is_set())
|
||||
if (string.second.runs().size() == 1 && !string.second.runs().at(0).second.is_set())
|
||||
{
|
||||
write_start_element(xmlns, "si");
|
||||
write_start_element(xmlns, "t");
|
||||
write_characters(string.plain_text(), has_trailing_whitespace(string.plain_text()));
|
||||
write_characters(string.second.plain_text(), has_trailing_whitespace(string.second.plain_text()));
|
||||
write_end_element(xmlns, "t");
|
||||
write_end_element(xmlns, "si");
|
||||
|
||||
@ -840,7 +840,7 @@ void xlsx_producer::write_shared_string_table(const relationship & /*rel*/)
|
||||
|
||||
write_start_element(xmlns, "si");
|
||||
|
||||
for (const auto &run : string.runs())
|
||||
for (const auto &run : string.second.runs())
|
||||
{
|
||||
write_start_element(xmlns, "r");
|
||||
|
||||
|
@ -1251,39 +1251,47 @@ const manifest &workbook::manifest() const
|
||||
return d_->manifest_;
|
||||
}
|
||||
|
||||
std::vector<rich_text> &workbook::shared_strings()
|
||||
const std::map<std::size_t, rich_text> &workbook::shared_strings_by_id() const
|
||||
{
|
||||
return d_->shared_strings_;
|
||||
return d_->shared_strings_values_;
|
||||
}
|
||||
|
||||
const std::vector<rich_text> &workbook::shared_strings() const
|
||||
const rich_text& workbook::shared_strings(std::size_t index) const
|
||||
{
|
||||
return d_->shared_strings_;
|
||||
auto it = d_->shared_strings_values_.find(index);
|
||||
if (it != d_->shared_strings_values_.end())
|
||||
return it->second;
|
||||
|
||||
static rich_text empty;
|
||||
return empty;
|
||||
}
|
||||
|
||||
std::unordered_map<rich_text, std::size_t, rich_text_hash> &workbook::shared_strings()
|
||||
{
|
||||
return d_->shared_strings_ids_;
|
||||
}
|
||||
|
||||
const std::unordered_map<rich_text, std::size_t, rich_text_hash> &workbook::shared_strings() const
|
||||
{
|
||||
return d_->shared_strings_ids_;
|
||||
}
|
||||
|
||||
std::size_t workbook::add_shared_string(const rich_text &shared, bool allow_duplicates)
|
||||
{
|
||||
register_workbook_part(relationship_type::shared_string_table);
|
||||
|
||||
auto index = std::size_t(0);
|
||||
|
||||
if (!allow_duplicates)
|
||||
{
|
||||
// TODO: inefficient, use a set or something?
|
||||
for (auto &s : d_->shared_strings_)
|
||||
{
|
||||
if (s == shared)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
++index;
|
||||
}
|
||||
auto it = d_->shared_strings_ids_.find(shared);
|
||||
if (it != d_->shared_strings_ids_.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
d_->shared_strings_.push_back(shared);
|
||||
auto sz = d_->shared_strings_ids_.size();
|
||||
d_->shared_strings_ids_[shared] = sz;
|
||||
d_->shared_strings_values_[sz] = shared;
|
||||
|
||||
return index;
|
||||
return sz;
|
||||
}
|
||||
|
||||
bool workbook::contains(const std::string &sheet_title) const
|
||||
|
Loading…
x
Reference in New Issue
Block a user