mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
Streaming: skip empty rows in has_cell()/read_cell()
Previously, an empty row would mess with the parser: if we're in an empty row, our helper methods don't detect us as being in the "row" _or_ in the "sheetData". So `has_cell()` would return false when it shouldn't. Similarly, `read_cell()` wouldn't skip rows; so `read_cell()` would return an invalid cell when placed in an empty row, causing a segfault when the caller tried to use the cell. Callers must take care to call `has_next()` before `read_next()`. In the future, perhaps we can make `read_next()` return a `std::optional` and nix `has_next()` altogether? [Closes #492]
This commit is contained in:
parent
8d2a8e161b
commit
319c4197c1
|
@ -21,6 +21,7 @@
|
||||||
// @license: http://www.opensource.org/licenses/mit-license.php
|
// @license: http://www.opensource.org/licenses/mit-license.php
|
||||||
// @author: see AUTHORS file
|
// @author: see AUTHORS file
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <numeric> // for std::accumulate
|
#include <numeric> // for std::accumulate
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@ -406,171 +407,7 @@ void xlsx_consumer::open(std::istream &source)
|
||||||
|
|
||||||
cell xlsx_consumer::read_cell()
|
cell xlsx_consumer::read_cell()
|
||||||
{
|
{
|
||||||
if (!has_cell())
|
return cell(streaming_cell_.get());
|
||||||
{
|
|
||||||
return cell(nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ws = worksheet(current_worksheet_);
|
|
||||||
|
|
||||||
if (in_element(qn("spreadsheetml", "sheetData")))
|
|
||||||
{
|
|
||||||
expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
|
|
||||||
auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
|
|
||||||
auto &row_properties = ws.row_properties(row_index);
|
|
||||||
|
|
||||||
if (parser().attribute_present("ht"))
|
|
||||||
{
|
|
||||||
row_properties.height = converter_.deserialise(parser().attribute("ht"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parser().attribute_present("customHeight"))
|
|
||||||
{
|
|
||||||
row_properties.custom_height = is_true(parser().attribute("customHeight"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
|
|
||||||
{
|
|
||||||
row_properties.hidden = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parser().attribute_present(qn("x14ac", "dyDescent")))
|
|
||||||
{
|
|
||||||
row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parser().attribute_present("spans"))
|
|
||||||
{
|
|
||||||
row_properties.spans = parser().attribute("spans");
|
|
||||||
}
|
|
||||||
|
|
||||||
skip_attributes({"customFormat", "s", "customFont",
|
|
||||||
"outlineLevel", "collapsed", "thickTop", "thickBot",
|
|
||||||
"ph"});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!in_element(qn("spreadsheetml", "row")))
|
|
||||||
{
|
|
||||||
return cell(nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
|
|
||||||
|
|
||||||
auto cell = streaming_
|
|
||||||
? xlnt::cell(streaming_cell_.get())
|
|
||||||
: ws.cell(cell_reference(parser().attribute("r")));
|
|
||||||
auto reference = cell_reference(parser().attribute("r"));
|
|
||||||
cell.d_->parent_ = current_worksheet_;
|
|
||||||
cell.d_->column_ = reference.column_index();
|
|
||||||
cell.d_->row_ = reference.row();
|
|
||||||
|
|
||||||
if (parser().attribute_present("ph"))
|
|
||||||
{
|
|
||||||
cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
|
|
||||||
}
|
|
||||||
|
|
||||||
auto has_type = parser().attribute_present("t");
|
|
||||||
auto type = has_type ? parser().attribute("t") : "n";
|
|
||||||
|
|
||||||
if (parser().attribute_present("s"))
|
|
||||||
{
|
|
||||||
cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto has_value = false;
|
|
||||||
auto value_string = std::string();
|
|
||||||
|
|
||||||
auto has_formula = false;
|
|
||||||
auto has_shared_formula = false;
|
|
||||||
auto formula_value_string = std::string();
|
|
||||||
|
|
||||||
while (in_element(qn("spreadsheetml", "c")))
|
|
||||||
{
|
|
||||||
auto current_element = expect_start_element(xml::content::mixed);
|
|
||||||
|
|
||||||
if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
|
|
||||||
{
|
|
||||||
has_value = true;
|
|
||||||
value_string = read_text();
|
|
||||||
}
|
|
||||||
else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
|
|
||||||
{
|
|
||||||
has_formula = true;
|
|
||||||
|
|
||||||
if (parser().attribute_present("t"))
|
|
||||||
{
|
|
||||||
has_shared_formula = parser().attribute("t") == "shared";
|
|
||||||
}
|
|
||||||
|
|
||||||
skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
|
|
||||||
"del2", "r1", "r2", "ca", "si", "bx"});
|
|
||||||
|
|
||||||
formula_value_string = read_text();
|
|
||||||
}
|
|
||||||
else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
|
|
||||||
{
|
|
||||||
expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
|
|
||||||
has_value = true;
|
|
||||||
value_string = read_text();
|
|
||||||
expect_end_element(qn("spreadsheetml", "t"));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
unexpected_element(current_element);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect_end_element(current_element);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect_end_element(qn("spreadsheetml", "c"));
|
|
||||||
|
|
||||||
if (has_formula && !has_shared_formula)
|
|
||||||
{
|
|
||||||
cell.formula(formula_value_string);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (has_value)
|
|
||||||
{
|
|
||||||
if (type == "str")
|
|
||||||
{
|
|
||||||
cell.d_->value_text_ = value_string;
|
|
||||||
cell.data_type(cell::type::formula_string);
|
|
||||||
}
|
|
||||||
else if (type == "inlineStr")
|
|
||||||
{
|
|
||||||
cell.d_->value_text_ = value_string;
|
|
||||||
cell.data_type(cell::type::inline_string);
|
|
||||||
}
|
|
||||||
else if (type == "s")
|
|
||||||
{
|
|
||||||
cell.d_->value_numeric_ = converter_.deserialise(value_string);
|
|
||||||
cell.data_type(cell::type::shared_string);
|
|
||||||
}
|
|
||||||
else if (type == "b") // boolean
|
|
||||||
{
|
|
||||||
cell.value(is_true(value_string));
|
|
||||||
}
|
|
||||||
else if (type == "n") // numeric
|
|
||||||
{
|
|
||||||
cell.value(converter_.deserialise(value_string));
|
|
||||||
}
|
|
||||||
else if (!value_string.empty() && value_string[0] == '#')
|
|
||||||
{
|
|
||||||
cell.error(value_string);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!in_element(qn("spreadsheetml", "row")))
|
|
||||||
{
|
|
||||||
expect_end_element(qn("spreadsheetml", "row"));
|
|
||||||
|
|
||||||
if (!in_element(qn("spreadsheetml", "sheetData")))
|
|
||||||
{
|
|
||||||
expect_end_element(qn("spreadsheetml", "sheetData"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return cell;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void xlsx_consumer::read_worksheet(const std::string &rel_id)
|
void xlsx_consumer::read_worksheet(const std::string &rel_id)
|
||||||
|
@ -1411,8 +1248,174 @@ xml::parser &xlsx_consumer::parser()
|
||||||
|
|
||||||
bool xlsx_consumer::has_cell()
|
bool xlsx_consumer::has_cell()
|
||||||
{
|
{
|
||||||
return in_element(qn("spreadsheetml", "row"))
|
auto ws = worksheet(current_worksheet_);
|
||||||
|| in_element(qn("spreadsheetml", "sheetData"));
|
|
||||||
|
while (streaming_cell_ // we're not at the end of the file
|
||||||
|
&& !in_element(qn("spreadsheetml", "row"))) // we're at the end of a row, or between rows
|
||||||
|
{
|
||||||
|
if (parser().peek() == xml::parser::event_type::end_element
|
||||||
|
&& stack_.back() == qn("spreadsheetml", "row"))
|
||||||
|
{
|
||||||
|
// We're at the end of a row.
|
||||||
|
expect_end_element(qn("spreadsheetml", "row"));
|
||||||
|
// ... and keep parsing.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().peek() == xml::parser::event_type::end_element
|
||||||
|
&& stack_.back() == qn("spreadsheetml", "sheetData"))
|
||||||
|
{
|
||||||
|
// End of sheet. Mark it by setting streaming_cell_ to nullptr, so we never get here again.
|
||||||
|
expect_end_element(qn("spreadsheetml", "sheetData"));
|
||||||
|
streaming_cell_.reset(nullptr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
|
||||||
|
auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
|
||||||
|
auto &row_properties = ws.row_properties(row_index);
|
||||||
|
|
||||||
|
if (parser().attribute_present("ht"))
|
||||||
|
{
|
||||||
|
row_properties.height = converter_.deserialise(parser().attribute("ht"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present("customHeight"))
|
||||||
|
{
|
||||||
|
row_properties.custom_height = is_true(parser().attribute("customHeight"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
|
||||||
|
{
|
||||||
|
row_properties.hidden = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present(qn("x14ac", "dyDescent")))
|
||||||
|
{
|
||||||
|
row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parser().attribute_present("spans"))
|
||||||
|
{
|
||||||
|
row_properties.spans = parser().attribute("spans");
|
||||||
|
}
|
||||||
|
|
||||||
|
skip_attributes({"customFormat", "s", "customFont",
|
||||||
|
"outlineLevel", "collapsed", "thickTop", "thickBot",
|
||||||
|
"ph"});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!streaming_cell_)
|
||||||
|
{
|
||||||
|
// We're at the end of the worksheet
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
|
||||||
|
|
||||||
|
assert(streaming_);
|
||||||
|
auto cell = xlnt::cell(streaming_cell_.get());
|
||||||
|
auto reference = cell_reference(parser().attribute("r"));
|
||||||
|
cell.d_->parent_ = current_worksheet_;
|
||||||
|
cell.d_->column_ = reference.column_index();
|
||||||
|
cell.d_->row_ = reference.row();
|
||||||
|
|
||||||
|
if (parser().attribute_present("ph"))
|
||||||
|
{
|
||||||
|
cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto has_type = parser().attribute_present("t");
|
||||||
|
auto type = has_type ? parser().attribute("t") : "n";
|
||||||
|
|
||||||
|
if (parser().attribute_present("s"))
|
||||||
|
{
|
||||||
|
cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto has_value = false;
|
||||||
|
auto value_string = std::string();
|
||||||
|
|
||||||
|
auto has_formula = false;
|
||||||
|
auto has_shared_formula = false;
|
||||||
|
auto formula_value_string = std::string();
|
||||||
|
|
||||||
|
while (in_element(qn("spreadsheetml", "c")))
|
||||||
|
{
|
||||||
|
auto current_element = expect_start_element(xml::content::mixed);
|
||||||
|
|
||||||
|
if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
|
||||||
|
{
|
||||||
|
has_value = true;
|
||||||
|
value_string = read_text();
|
||||||
|
}
|
||||||
|
else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
|
||||||
|
{
|
||||||
|
has_formula = true;
|
||||||
|
|
||||||
|
if (parser().attribute_present("t"))
|
||||||
|
{
|
||||||
|
has_shared_formula = parser().attribute("t") == "shared";
|
||||||
|
}
|
||||||
|
|
||||||
|
skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
|
||||||
|
"del2", "r1", "r2", "ca", "si", "bx"});
|
||||||
|
|
||||||
|
formula_value_string = read_text();
|
||||||
|
}
|
||||||
|
else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
|
||||||
|
{
|
||||||
|
expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
|
||||||
|
has_value = true;
|
||||||
|
value_string = read_text();
|
||||||
|
expect_end_element(qn("spreadsheetml", "t"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unexpected_element(current_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_end_element(current_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_end_element(qn("spreadsheetml", "c"));
|
||||||
|
|
||||||
|
if (has_formula && !has_shared_formula)
|
||||||
|
{
|
||||||
|
cell.formula(formula_value_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_value)
|
||||||
|
{
|
||||||
|
if (type == "str")
|
||||||
|
{
|
||||||
|
cell.d_->value_text_ = value_string;
|
||||||
|
cell.data_type(cell::type::formula_string);
|
||||||
|
}
|
||||||
|
else if (type == "inlineStr")
|
||||||
|
{
|
||||||
|
cell.d_->value_text_ = value_string;
|
||||||
|
cell.data_type(cell::type::inline_string);
|
||||||
|
}
|
||||||
|
else if (type == "s")
|
||||||
|
{
|
||||||
|
cell.d_->value_numeric_ = converter_.deserialise(value_string);
|
||||||
|
cell.data_type(cell::type::shared_string);
|
||||||
|
}
|
||||||
|
else if (type == "b") // boolean
|
||||||
|
{
|
||||||
|
cell.value(is_true(value_string));
|
||||||
|
}
|
||||||
|
else if (type == "n") // numeric
|
||||||
|
{
|
||||||
|
cell.value(converter_.deserialise(value_string));
|
||||||
|
}
|
||||||
|
else if (!value_string.empty() && value_string[0] == '#')
|
||||||
|
{
|
||||||
|
cell.error(value_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
|
std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
|
||||||
|
|
|
@ -413,8 +413,6 @@ private:
|
||||||
|
|
||||||
std::unique_ptr<detail::cell_impl> streaming_cell_;
|
std::unique_ptr<detail::cell_impl> streaming_cell_;
|
||||||
|
|
||||||
detail::cell_impl *current_cell_;
|
|
||||||
|
|
||||||
detail::worksheet_impl *current_worksheet_;
|
detail::worksheet_impl *current_worksheet_;
|
||||||
number_serialiser converter_;
|
number_serialiser converter_;
|
||||||
};
|
};
|
||||||
|
|
BIN
tests/data/Issue492_empty_row.xlsx
Normal file
BIN
tests/data/Issue492_empty_row.xlsx
Normal file
Binary file not shown.
|
@ -93,6 +93,7 @@ public:
|
||||||
register_test(test_load_save_german_locale);
|
register_test(test_load_save_german_locale);
|
||||||
register_test(test_Issue445_inline_str_load);
|
register_test(test_Issue445_inline_str_load);
|
||||||
register_test(test_Issue445_inline_str_streaming_read);
|
register_test(test_Issue445_inline_str_streaming_read);
|
||||||
|
register_test(test_Issue492_stream_empty_row);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
|
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
|
||||||
|
@ -733,8 +734,25 @@ public:
|
||||||
xlnt::streaming_workbook_reader wbr;
|
xlnt::streaming_workbook_reader wbr;
|
||||||
wbr.open(path_helper::test_file("Issue445_inline_str.xlsx"));
|
wbr.open(path_helper::test_file("Issue445_inline_str.xlsx"));
|
||||||
wbr.begin_worksheet("Sheet");
|
wbr.begin_worksheet("Sheet");
|
||||||
|
xlnt_assert(wbr.has_cell());
|
||||||
auto cell = wbr.read_cell();
|
auto cell = wbr.read_cell();
|
||||||
xlnt_assert_equals(cell.value<std::string>(), std::string("a"));
|
xlnt_assert_equals(cell.value<std::string>(), std::string("a"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_Issue492_stream_empty_row()
|
||||||
|
{
|
||||||
|
xlnt::streaming_workbook_reader wbr;
|
||||||
|
wbr.open(path_helper::test_file("Issue492_empty_row.xlsx"));
|
||||||
|
wbr.begin_worksheet("BLS Data Series");
|
||||||
|
xlnt_assert(wbr.has_cell());
|
||||||
|
xlnt_assert_equals(wbr.read_cell().reference(), "A1");
|
||||||
|
xlnt_assert(wbr.has_cell());
|
||||||
|
xlnt_assert_equals(wbr.read_cell().reference(), "A2");
|
||||||
|
xlnt_assert(wbr.has_cell());
|
||||||
|
xlnt_assert_equals(wbr.read_cell().reference(), "A4");
|
||||||
|
xlnt_assert(wbr.has_cell());
|
||||||
|
xlnt_assert_equals(wbr.read_cell().reference(), "B4");
|
||||||
|
xlnt_assert(!wbr.has_cell());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static serialization_test_suite x;
|
static serialization_test_suite x;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user