Remove memory growth based on row count

- Nested unordered_map was the cause of a significant memory/cell spike.
- added cell_reference hash, lookup by cell_reference to make memory usage proportional to cell count only
This commit is contained in:
Crzyrndm 2018-07-29 11:31:38 +12:00
parent 138c90883b
commit dd6f338419
3 changed files with 54 additions and 69 deletions

View File

@ -26,7 +26,7 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include <utility> #include <utility>
#include <tuple>
#include <xlnt/xlnt_config.hpp> #include <xlnt/xlnt_config.hpp>
#include <xlnt/cell/index_types.hpp> #include <xlnt/cell/index_types.hpp>
@ -255,3 +255,17 @@ private:
}; };
} // namespace xlnt } // namespace xlnt
namespace std
{
template <>
struct hash<xlnt::cell_reference>
{
size_t operator()(const xlnt::cell_reference &x) const
{
static_assert(std::is_same<decltype(x.row()), std::uint32_t>::value, "this hash function expects both row and column to be 32-bit numbers");
static_assert(std::is_same<decltype(x.column_index()), std::uint32_t>::value, "this hash function expects both row and column to be 32-bit numbers");
return hash<std::uint64_t>{}(x.row() | static_cast<std::uint64_t>(x.column_index()) << 32);
}
};
}

View File

@ -88,14 +88,11 @@ struct worksheet_impl
sheet_properties_ = other.sheet_properties_; sheet_properties_ = other.sheet_properties_;
print_options_ = other.print_options_; print_options_ = other.print_options_;
for (auto &row : cell_map_) for (auto &cell : cell_map_)
{
for (auto &cell : row.second)
{ {
cell.second.parent_ = this; cell.second.parent_ = this;
} }
} }
}
workbook *parent_; workbook *parent_;
@ -134,7 +131,7 @@ struct worksheet_impl
std::unordered_map<column_t, column_properties> column_properties_; std::unordered_map<column_t, column_properties> column_properties_;
std::unordered_map<row_t, row_properties> row_properties_; std::unordered_map<row_t, row_properties> row_properties_;
std::unordered_map<row_t, std::unordered_map<column_t, cell_impl>> cell_map_; std::unordered_map<cell_reference, cell_impl> cell_map_;
optional<page_setup> page_setup_; optional<page_setup> page_setup_;
optional<range_reference> auto_filter_; optional<range_reference> auto_filter_;

View File

@ -196,32 +196,18 @@ const workbook &worksheet::workbook() const
void worksheet::garbage_collect() void worksheet::garbage_collect()
{ {
auto cell_map_iter = d_->cell_map_.begin(); auto cell_iter = d_->cell_map_.begin();
while (cell_map_iter != d_->cell_map_.end()) while (cell_iter != d_->cell_map_.end())
{ {
auto cell_iter = cell_map_iter->second.begin(); if (xlnt::cell(&cell_iter->second).garbage_collectible())
while (cell_iter != cell_map_iter->second.end())
{ {
class cell current_cell(&cell_iter->second); cell_iter = d_->cell_map_.erase(cell_iter);
if (current_cell.garbage_collectible())
{
cell_iter = cell_map_iter->second.erase(cell_iter);
continue;
} }
else
cell_iter++;
}
if (cell_map_iter->second.empty())
{ {
cell_map_iter = d_->cell_map_.erase(cell_map_iter); ++cell_iter;
continue;
} }
cell_map_iter++;
} }
} }
@ -391,25 +377,22 @@ cell_reference worksheet::active_cell() const
cell worksheet::cell(const cell_reference &reference) cell worksheet::cell(const cell_reference &reference)
{ {
auto &row = d_->cell_map_[reference.row()]; auto match = d_->cell_map_.find(reference);
auto match = row.find(reference.column_index()); if (match == d_->cell_map_.end())
if (match == row.end())
{ {
match = row.emplace(reference.column_index(), detail::cell_impl()).first; auto impl = detail::cell_impl();
auto &impl = match->second;
impl.parent_ = d_; impl.parent_ = d_;
impl.column_ = reference.column_index(); impl.column_ = reference.column_index();
impl.row_ = reference.row(); impl.row_ = reference.row();
}
match = d_->cell_map_.emplace(reference, impl).first;
}
return xlnt::cell(&match->second); return xlnt::cell(&match->second);
} }
const cell worksheet::cell(const cell_reference &reference) const const cell worksheet::cell(const cell_reference &reference) const
{ {
return xlnt::cell(&d_->cell_map_.at(reference.row()).at(reference.column_index())); return xlnt::cell(&d_->cell_map_.at(reference));
} }
cell worksheet::cell(xlnt::column_t column, row_t row) cell worksheet::cell(xlnt::column_t column, row_t row)
@ -424,13 +407,8 @@ const cell worksheet::cell(xlnt::column_t column, row_t row) const
bool worksheet::has_cell(const cell_reference &reference) const bool worksheet::has_cell(const cell_reference &reference) const
{ {
const auto row = d_->cell_map_.find(reference.row()); const auto cell = d_->cell_map_.find(reference);
if (row == d_->cell_map_.cend()) return false; return cell != d_->cell_map_.cend();
const auto col = row->second.find(reference.column_index());
if (col == row->second.cend()) return false;
return true;
} }
bool worksheet::has_row_properties(row_t row) const bool worksheet::has_row_properties(row_t row) const
@ -477,12 +455,9 @@ column_t worksheet::lowest_column() const
auto lowest = constants::max_column(); auto lowest = constants::max_column();
for (auto &row : d_->cell_map_) for (auto &cell : d_->cell_map_)
{ {
for (auto &c : row.second) lowest = std::min(lowest, cell.first.column());
{
lowest = std::min(lowest, c.first);
}
} }
return lowest; return lowest;
@ -514,9 +489,9 @@ row_t worksheet::lowest_row() const
auto lowest = constants::max_row(); auto lowest = constants::max_row();
for (auto &row : d_->cell_map_) for (auto &cell : d_->cell_map_)
{ {
lowest = std::min(lowest, row.first); lowest = std::min(lowest, cell.first.row());
} }
return lowest; return lowest;
@ -543,9 +518,9 @@ row_t worksheet::highest_row() const
{ {
auto highest = constants::min_row(); auto highest = constants::min_row();
for (auto &row : d_->cell_map_) for (auto &cell : d_->cell_map_)
{ {
highest = std::max(highest, row.first); highest = std::max(highest, cell.first.row());
} }
return highest; return highest;
@ -572,12 +547,9 @@ column_t worksheet::highest_column() const
{ {
auto highest = constants::min_column(); auto highest = constants::min_column();
for (auto &row : d_->cell_map_) for (auto &cell : d_->cell_map_)
{ {
for (auto &c : row.second) highest = std::max(highest, cell.first.column());
{
highest = std::max(highest, c.first);
}
} }
return highest; return highest;
@ -744,13 +716,22 @@ const cell_vector worksheet::cells(bool skip_null) const
void worksheet::clear_cell(const cell_reference &ref) void worksheet::clear_cell(const cell_reference &ref)
{ {
d_->cell_map_.at(ref.row()).erase(ref.column()); d_->cell_map_.erase(ref);
// TODO: garbage collect newly unreferenced resources such as styles? // TODO: garbage collect newly unreferenced resources such as styles?
} }
void worksheet::clear_row(row_t row) void worksheet::clear_row(row_t row)
{ {
d_->cell_map_.erase(row); for (auto it = d_->cell_map_.begin(); it != d_->cell_map_.end();)
{
if (it->first.row() == row)
{
it = d_->cell_map_.erase(it);
}
else {
++it;
}
}
d_->row_properties_.erase(row); d_->row_properties_.erase(row);
// TODO: garbage collect newly unreferenced resources such as styles? // TODO: garbage collect newly unreferenced resources such as styles?
} }
@ -769,22 +750,16 @@ bool worksheet::compare(const worksheet &other, bool reference) const
if (d_->parent_ != other.d_->parent_) return false; if (d_->parent_ != other.d_->parent_) return false;
for (auto &row : d_->cell_map_)
{
if (other.d_->cell_map_.find(row.first) == other.d_->cell_map_.end())
{
return false;
}
for (auto &cell : row.second) for (auto &cell : d_->cell_map_)
{ {
if (other.d_->cell_map_[row.first].find(cell.first) == other.d_->cell_map_[row.first].end()) if (other.d_->cell_map_.find(cell.first) == other.d_->cell_map_.end())
{ {
return false; return false;
} }
xlnt::cell this_cell(&cell.second); xlnt::cell this_cell(&cell.second);
xlnt::cell other_cell(&other.d_->cell_map_[row.first][cell.first]); xlnt::cell other_cell(&other.d_->cell_map_[cell.first]);
if (this_cell.data_type() != other_cell.data_type()) if (this_cell.data_type() != other_cell.data_type())
{ {
@ -797,7 +772,6 @@ bool worksheet::compare(const worksheet &other, bool reference) const
return false; return false;
} }
} }
}
// todo: missing some comparisons // todo: missing some comparisons