mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
Streaming: skip empty rows in has_cell()/read_cell()
Previously, an empty row would mess with the parser: if we're in an empty row, our helper methods don't detect us as being in the "row" _or_ in the "sheetData". So `has_cell()` would return false when it shouldn't. Similarly, `read_cell()` wouldn't skip rows; so `read_cell()` would return an invalid cell when placed in an empty row, causing a segfault when the caller tried to use the cell. Callers must take care to call `has_next()` before `read_next()`. In the future, perhaps we can make `read_next()` return a `std::optional` and nix `has_next()` altogether? [Closes #492]
This commit is contained in:
parent
8d2a8e161b
commit
319c4197c1
|
@ -21,6 +21,7 @@
|
|||
// @license: http://www.opensource.org/licenses/mit-license.php
|
||||
// @author: see AUTHORS file
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <numeric> // for std::accumulate
|
||||
#include <sstream>
|
||||
|
@ -406,171 +407,7 @@ void xlsx_consumer::open(std::istream &source)
|
|||
|
||||
cell xlsx_consumer::read_cell()
|
||||
{
|
||||
if (!has_cell())
|
||||
{
|
||||
return cell(nullptr);
|
||||
}
|
||||
|
||||
auto ws = worksheet(current_worksheet_);
|
||||
|
||||
if (in_element(qn("spreadsheetml", "sheetData")))
|
||||
{
|
||||
expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
|
||||
auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
|
||||
auto &row_properties = ws.row_properties(row_index);
|
||||
|
||||
if (parser().attribute_present("ht"))
|
||||
{
|
||||
row_properties.height = converter_.deserialise(parser().attribute("ht"));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("customHeight"))
|
||||
{
|
||||
row_properties.custom_height = is_true(parser().attribute("customHeight"));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
|
||||
{
|
||||
row_properties.hidden = true;
|
||||
}
|
||||
|
||||
if (parser().attribute_present(qn("x14ac", "dyDescent")))
|
||||
{
|
||||
row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("spans"))
|
||||
{
|
||||
row_properties.spans = parser().attribute("spans");
|
||||
}
|
||||
|
||||
skip_attributes({"customFormat", "s", "customFont",
|
||||
"outlineLevel", "collapsed", "thickTop", "thickBot",
|
||||
"ph"});
|
||||
}
|
||||
|
||||
if (!in_element(qn("spreadsheetml", "row")))
|
||||
{
|
||||
return cell(nullptr);
|
||||
}
|
||||
|
||||
expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
|
||||
|
||||
auto cell = streaming_
|
||||
? xlnt::cell(streaming_cell_.get())
|
||||
: ws.cell(cell_reference(parser().attribute("r")));
|
||||
auto reference = cell_reference(parser().attribute("r"));
|
||||
cell.d_->parent_ = current_worksheet_;
|
||||
cell.d_->column_ = reference.column_index();
|
||||
cell.d_->row_ = reference.row();
|
||||
|
||||
if (parser().attribute_present("ph"))
|
||||
{
|
||||
cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
|
||||
}
|
||||
|
||||
auto has_type = parser().attribute_present("t");
|
||||
auto type = has_type ? parser().attribute("t") : "n";
|
||||
|
||||
if (parser().attribute_present("s"))
|
||||
{
|
||||
cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
|
||||
}
|
||||
|
||||
auto has_value = false;
|
||||
auto value_string = std::string();
|
||||
|
||||
auto has_formula = false;
|
||||
auto has_shared_formula = false;
|
||||
auto formula_value_string = std::string();
|
||||
|
||||
while (in_element(qn("spreadsheetml", "c")))
|
||||
{
|
||||
auto current_element = expect_start_element(xml::content::mixed);
|
||||
|
||||
if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
|
||||
{
|
||||
has_value = true;
|
||||
value_string = read_text();
|
||||
}
|
||||
else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
|
||||
{
|
||||
has_formula = true;
|
||||
|
||||
if (parser().attribute_present("t"))
|
||||
{
|
||||
has_shared_formula = parser().attribute("t") == "shared";
|
||||
}
|
||||
|
||||
skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
|
||||
"del2", "r1", "r2", "ca", "si", "bx"});
|
||||
|
||||
formula_value_string = read_text();
|
||||
}
|
||||
else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
|
||||
{
|
||||
expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
|
||||
has_value = true;
|
||||
value_string = read_text();
|
||||
expect_end_element(qn("spreadsheetml", "t"));
|
||||
}
|
||||
else
|
||||
{
|
||||
unexpected_element(current_element);
|
||||
}
|
||||
|
||||
expect_end_element(current_element);
|
||||
}
|
||||
|
||||
expect_end_element(qn("spreadsheetml", "c"));
|
||||
|
||||
if (has_formula && !has_shared_formula)
|
||||
{
|
||||
cell.formula(formula_value_string);
|
||||
}
|
||||
|
||||
if (has_value)
|
||||
{
|
||||
if (type == "str")
|
||||
{
|
||||
cell.d_->value_text_ = value_string;
|
||||
cell.data_type(cell::type::formula_string);
|
||||
}
|
||||
else if (type == "inlineStr")
|
||||
{
|
||||
cell.d_->value_text_ = value_string;
|
||||
cell.data_type(cell::type::inline_string);
|
||||
}
|
||||
else if (type == "s")
|
||||
{
|
||||
cell.d_->value_numeric_ = converter_.deserialise(value_string);
|
||||
cell.data_type(cell::type::shared_string);
|
||||
}
|
||||
else if (type == "b") // boolean
|
||||
{
|
||||
cell.value(is_true(value_string));
|
||||
}
|
||||
else if (type == "n") // numeric
|
||||
{
|
||||
cell.value(converter_.deserialise(value_string));
|
||||
}
|
||||
else if (!value_string.empty() && value_string[0] == '#')
|
||||
{
|
||||
cell.error(value_string);
|
||||
}
|
||||
}
|
||||
|
||||
if (!in_element(qn("spreadsheetml", "row")))
|
||||
{
|
||||
expect_end_element(qn("spreadsheetml", "row"));
|
||||
|
||||
if (!in_element(qn("spreadsheetml", "sheetData")))
|
||||
{
|
||||
expect_end_element(qn("spreadsheetml", "sheetData"));
|
||||
}
|
||||
}
|
||||
|
||||
return cell;
|
||||
return cell(streaming_cell_.get());
|
||||
}
|
||||
|
||||
void xlsx_consumer::read_worksheet(const std::string &rel_id)
|
||||
|
@ -1411,8 +1248,174 @@ xml::parser &xlsx_consumer::parser()
|
|||
|
||||
bool xlsx_consumer::has_cell()
|
||||
{
|
||||
return in_element(qn("spreadsheetml", "row"))
|
||||
|| in_element(qn("spreadsheetml", "sheetData"));
|
||||
auto ws = worksheet(current_worksheet_);
|
||||
|
||||
while (streaming_cell_ // we're not at the end of the file
|
||||
&& !in_element(qn("spreadsheetml", "row"))) // we're at the end of a row, or between rows
|
||||
{
|
||||
if (parser().peek() == xml::parser::event_type::end_element
|
||||
&& stack_.back() == qn("spreadsheetml", "row"))
|
||||
{
|
||||
// We're at the end of a row.
|
||||
expect_end_element(qn("spreadsheetml", "row"));
|
||||
// ... and keep parsing.
|
||||
}
|
||||
|
||||
if (parser().peek() == xml::parser::event_type::end_element
|
||||
&& stack_.back() == qn("spreadsheetml", "sheetData"))
|
||||
{
|
||||
// End of sheet. Mark it by setting streaming_cell_ to nullptr, so we never get here again.
|
||||
expect_end_element(qn("spreadsheetml", "sheetData"));
|
||||
streaming_cell_.reset(nullptr);
|
||||
break;
|
||||
}
|
||||
|
||||
expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
|
||||
auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
|
||||
auto &row_properties = ws.row_properties(row_index);
|
||||
|
||||
if (parser().attribute_present("ht"))
|
||||
{
|
||||
row_properties.height = converter_.deserialise(parser().attribute("ht"));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("customHeight"))
|
||||
{
|
||||
row_properties.custom_height = is_true(parser().attribute("customHeight"));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
|
||||
{
|
||||
row_properties.hidden = true;
|
||||
}
|
||||
|
||||
if (parser().attribute_present(qn("x14ac", "dyDescent")))
|
||||
{
|
||||
row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
|
||||
}
|
||||
|
||||
if (parser().attribute_present("spans"))
|
||||
{
|
||||
row_properties.spans = parser().attribute("spans");
|
||||
}
|
||||
|
||||
skip_attributes({"customFormat", "s", "customFont",
|
||||
"outlineLevel", "collapsed", "thickTop", "thickBot",
|
||||
"ph"});
|
||||
}
|
||||
|
||||
if (!streaming_cell_)
|
||||
{
|
||||
// We're at the end of the worksheet
|
||||
return false;
|
||||
}
|
||||
|
||||
expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
|
||||
|
||||
assert(streaming_);
|
||||
auto cell = xlnt::cell(streaming_cell_.get());
|
||||
auto reference = cell_reference(parser().attribute("r"));
|
||||
cell.d_->parent_ = current_worksheet_;
|
||||
cell.d_->column_ = reference.column_index();
|
||||
cell.d_->row_ = reference.row();
|
||||
|
||||
if (parser().attribute_present("ph"))
|
||||
{
|
||||
cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
|
||||
}
|
||||
|
||||
auto has_type = parser().attribute_present("t");
|
||||
auto type = has_type ? parser().attribute("t") : "n";
|
||||
|
||||
if (parser().attribute_present("s"))
|
||||
{
|
||||
cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
|
||||
}
|
||||
|
||||
auto has_value = false;
|
||||
auto value_string = std::string();
|
||||
|
||||
auto has_formula = false;
|
||||
auto has_shared_formula = false;
|
||||
auto formula_value_string = std::string();
|
||||
|
||||
while (in_element(qn("spreadsheetml", "c")))
|
||||
{
|
||||
auto current_element = expect_start_element(xml::content::mixed);
|
||||
|
||||
if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
|
||||
{
|
||||
has_value = true;
|
||||
value_string = read_text();
|
||||
}
|
||||
else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
|
||||
{
|
||||
has_formula = true;
|
||||
|
||||
if (parser().attribute_present("t"))
|
||||
{
|
||||
has_shared_formula = parser().attribute("t") == "shared";
|
||||
}
|
||||
|
||||
skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
|
||||
"del2", "r1", "r2", "ca", "si", "bx"});
|
||||
|
||||
formula_value_string = read_text();
|
||||
}
|
||||
else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
|
||||
{
|
||||
expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
|
||||
has_value = true;
|
||||
value_string = read_text();
|
||||
expect_end_element(qn("spreadsheetml", "t"));
|
||||
}
|
||||
else
|
||||
{
|
||||
unexpected_element(current_element);
|
||||
}
|
||||
|
||||
expect_end_element(current_element);
|
||||
}
|
||||
|
||||
expect_end_element(qn("spreadsheetml", "c"));
|
||||
|
||||
if (has_formula && !has_shared_formula)
|
||||
{
|
||||
cell.formula(formula_value_string);
|
||||
}
|
||||
|
||||
if (has_value)
|
||||
{
|
||||
if (type == "str")
|
||||
{
|
||||
cell.d_->value_text_ = value_string;
|
||||
cell.data_type(cell::type::formula_string);
|
||||
}
|
||||
else if (type == "inlineStr")
|
||||
{
|
||||
cell.d_->value_text_ = value_string;
|
||||
cell.data_type(cell::type::inline_string);
|
||||
}
|
||||
else if (type == "s")
|
||||
{
|
||||
cell.d_->value_numeric_ = converter_.deserialise(value_string);
|
||||
cell.data_type(cell::type::shared_string);
|
||||
}
|
||||
else if (type == "b") // boolean
|
||||
{
|
||||
cell.value(is_true(value_string));
|
||||
}
|
||||
else if (type == "n") // numeric
|
||||
{
|
||||
cell.value(converter_.deserialise(value_string));
|
||||
}
|
||||
else if (!value_string.empty() && value_string[0] == '#')
|
||||
{
|
||||
cell.error(value_string);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
|
||||
|
|
|
@ -413,8 +413,6 @@ private:
|
|||
|
||||
std::unique_ptr<detail::cell_impl> streaming_cell_;
|
||||
|
||||
detail::cell_impl *current_cell_;
|
||||
|
||||
detail::worksheet_impl *current_worksheet_;
|
||||
number_serialiser converter_;
|
||||
};
|
||||
|
|
BIN
tests/data/Issue492_empty_row.xlsx
Normal file
BIN
tests/data/Issue492_empty_row.xlsx
Normal file
Binary file not shown.
|
@ -93,6 +93,7 @@ public:
|
|||
register_test(test_load_save_german_locale);
|
||||
register_test(test_Issue445_inline_str_load);
|
||||
register_test(test_Issue445_inline_str_streaming_read);
|
||||
register_test(test_Issue492_stream_empty_row);
|
||||
}
|
||||
|
||||
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
|
||||
|
@ -733,8 +734,25 @@ public:
|
|||
xlnt::streaming_workbook_reader wbr;
|
||||
wbr.open(path_helper::test_file("Issue445_inline_str.xlsx"));
|
||||
wbr.begin_worksheet("Sheet");
|
||||
xlnt_assert(wbr.has_cell());
|
||||
auto cell = wbr.read_cell();
|
||||
xlnt_assert_equals(cell.value<std::string>(), std::string("a"));
|
||||
}
|
||||
|
||||
void test_Issue492_stream_empty_row()
|
||||
{
|
||||
xlnt::streaming_workbook_reader wbr;
|
||||
wbr.open(path_helper::test_file("Issue492_empty_row.xlsx"));
|
||||
wbr.begin_worksheet("BLS Data Series");
|
||||
xlnt_assert(wbr.has_cell());
|
||||
xlnt_assert_equals(wbr.read_cell().reference(), "A1");
|
||||
xlnt_assert(wbr.has_cell());
|
||||
xlnt_assert_equals(wbr.read_cell().reference(), "A2");
|
||||
xlnt_assert(wbr.has_cell());
|
||||
xlnt_assert_equals(wbr.read_cell().reference(), "A4");
|
||||
xlnt_assert(wbr.has_cell());
|
||||
xlnt_assert_equals(wbr.read_cell().reference(), "B4");
|
||||
xlnt_assert(!wbr.has_cell());
|
||||
}
|
||||
};
|
||||
static serialization_test_suite x;
|
||||
|
|
Loading…
Reference in New Issue
Block a user