diff --git a/include/xlnt/workbook/streaming_workbook_reader.hpp b/include/xlnt/workbook/streaming_workbook_reader.hpp
index 6e975426..94bccb45 100644
--- a/include/xlnt/workbook/streaming_workbook_reader.hpp
+++ b/include/xlnt/workbook/streaming_workbook_reader.hpp
@@ -71,13 +71,13 @@ public:
///
cell read_cell();
- bool has_worksheet();
+ bool has_worksheet(const std::string &name);
///
/// Beings reading of the next worksheet in the workbook and optionally
/// returns its title if the last worksheet has not yet been read.
///
- void begin_worksheet();
+ void begin_worksheet(const std::string &name);
///
/// Ends reading of the current worksheet in the workbook and optionally
@@ -118,8 +118,13 @@ public:
///
void open(std::istream &stream);
+ ///
+ /// Returns a vector of the titles of sheets in the workbook in order.
+ ///
+ std::vector sheet_titles();
+
private:
- std::vector worksheet_queue_;
+ std::string worksheet_rel_id_;
std::unique_ptr consumer_;
std::unique_ptr workbook_;
std::unique_ptr stream_;
diff --git a/include/xlnt/workbook/workbook.hpp b/include/xlnt/workbook/workbook.hpp
index f879be08..fd2dcc57 100644
--- a/include/xlnt/workbook/workbook.hpp
+++ b/include/xlnt/workbook/workbook.hpp
@@ -63,6 +63,7 @@ class protection;
class range;
class range_reference;
class relationship;
+class streaming_workbook_reader;
class style;
class style_serializer;
class theme;
@@ -777,6 +778,7 @@ public:
bool operator!=(const workbook &rhs) const;
private:
+ friend class streaming_workbook_reader;
friend class worksheet;
friend class detail::xlsx_consumer;
friend class detail::xlsx_producer;
diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp
index 500303e3..cef9cddf 100644
--- a/source/detail/serialization/xlsx_consumer.cpp
+++ b/source/detail/serialization/xlsx_consumer.cpp
@@ -328,16 +328,6 @@ std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
return p.second == rel_id;
})->first;
- auto id = sheet_title_id_map_[title];
- auto index = sheet_title_index_map_[title];
-
- auto insertion_iter = target_.d_->worksheets_.begin();
- while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
- {
- ++insertion_iter;
- }
-
- current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
auto ws = worksheet(current_worksheet_);
expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
@@ -1572,14 +1562,29 @@ void xlsx_consumer::read_office_document(const std::string &content_type) // CT_
relationship_type::theme)});
}
- if (streaming_)
- {
- return;
- }
-
for (auto worksheet_rel : manifest().relationships(workbook_path, relationship_type::worksheet))
{
- read_part({workbook_rel, worksheet_rel});
+ auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
+ target_.d_->sheet_title_rel_id_map_.end(),
+ [&](const std::pair &p) {
+ return p.second == worksheet_rel.id();
+ })->first;
+
+ auto id = sheet_title_id_map_[title];
+ auto index = sheet_title_index_map_[title];
+
+ auto insertion_iter = target_.d_->worksheets_.begin();
+ while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
+ {
+ ++insertion_iter;
+ }
+
+ current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
+
+ if (!streaming_)
+ {
+ read_part({ workbook_rel, worksheet_rel });
+ }
}
}
diff --git a/source/workbook/streaming_workbook_reader.cpp b/source/workbook/streaming_workbook_reader.cpp
index 83aa90a9..3d7965e5 100644
--- a/source/workbook/streaming_workbook_reader.cpp
+++ b/source/workbook/streaming_workbook_reader.cpp
@@ -23,6 +23,7 @@
#include
+#include
#include
#include
#include
@@ -64,18 +65,24 @@ cell streaming_workbook_reader::read_cell()
return consumer_->read_cell();
}
-bool streaming_workbook_reader::has_worksheet()
+bool streaming_workbook_reader::has_worksheet(const std::string &name)
{
- return !worksheet_queue_.empty();
+ auto titles = sheet_titles();
+ return std::find(titles.begin(), titles.end(), name) != titles.end();
}
-void streaming_workbook_reader::begin_worksheet()
+void streaming_workbook_reader::begin_worksheet(const std::string &title)
{
- const auto next_worksheet_rel = worksheet_queue_.back();
+ if (!has_worksheet(title))
+ {
+ throw xlnt::exception("sheet not found");
+ }
+
+ worksheet_rel_id_ = workbook_->impl().sheet_title_rel_id_map_.at(title);
const auto workbook_rel = workbook_->manifest()
.relationship(path("/"), relationship_type::office_document);
const auto worksheet_rel = workbook_->manifest()
- .relationship(workbook_rel.target().path(), next_worksheet_rel);
+ .relationship(workbook_rel.target().path(), worksheet_rel_id_);
auto rel_chain = std::vector{ workbook_rel, worksheet_rel };
@@ -87,14 +94,27 @@ void streaming_workbook_reader::begin_worksheet()
parser_.reset(new xml::parser(*part_stream_, part_path.string()));
consumer_->parser_ = parser_.get();
- consumer_->read_worksheet_begin(next_worksheet_rel);
+ consumer_->current_worksheet_ = nullptr;
+
+ for (auto &impl : workbook_->impl().worksheets_)
+ {
+ if (impl.title_ == title)
+ {
+ consumer_->current_worksheet_ = &impl;
+ }
+ }
+
+ if (consumer_->current_worksheet_ == nullptr)
+ {
+ throw xlnt::exception("sheet not found");
+ }
+
+ consumer_->read_worksheet_begin(worksheet_rel_id_);
}
worksheet streaming_workbook_reader::end_worksheet()
{
- auto next_worksheet_rel = worksheet_queue_.back();
- worksheet_queue_.pop_back();
- return consumer_->read_worksheet_end(next_worksheet_rel);
+ return consumer_->read_worksheet_end(worksheet_rel_id_);
}
void streaming_workbook_reader::open(const std::vector &data)
@@ -136,12 +156,11 @@ void streaming_workbook_reader::open(std::istream &stream)
const auto workbook_rel = workbook_->manifest()
.relationship(path("/"), relationship_type::office_document);
const auto workbook_path = workbook_rel.target().path();
+}
- for (auto worksheet_rel : workbook_->manifest()
- .relationships(workbook_path, relationship_type::worksheet))
- {
- worksheet_queue_.push_back(worksheet_rel.id());
- }
+std::vector streaming_workbook_reader::sheet_titles()
+{
+ return workbook_->sheet_titles();
}
} // namespace xlnt
diff --git a/tests/workbook/serialization_test_suite.hpp b/tests/workbook/serialization_test_suite.hpp
index edb3748e..abd6a642 100644
--- a/tests/workbook/serialization_test_suite.hpp
+++ b/tests/workbook/serialization_test_suite.hpp
@@ -473,14 +473,14 @@ public:
reader.open(xlnt::path(path));
- while (reader.has_worksheet())
+ for (auto sheet_name : reader.sheet_titles())
{
- reader.begin_worksheet();
+ reader.begin_worksheet(sheet_name);
while (reader.has_cell())
{
const auto cell = reader.read_cell();
- //std::cout << cell.reference().to_string() << std::endl;
+ std::cout << cell.reference().to_string() << " " << cell.to_string() << std::endl;
}
const auto ws = reader.end_worksheet();
diff --git a/xlntpyarrow/methods.cpp b/xlntpyarrow/methods.cpp
index 428479b2..4b8fffe2 100644
--- a/xlntpyarrow/methods.cpp
+++ b/xlntpyarrow/methods.cpp
@@ -34,6 +34,7 @@
#include // must be included after Arrow
#include
+#include
#include
#include
#include
@@ -61,7 +62,7 @@ std::unique_ptr make_array_builder(xlnt::cell::type type)
return std::unique_ptr(new arrow::Date32Builder(arrow::default_memory_pool()));
}
- default_case(std::unique_ptr(nullptr));
+ default_case(std::unique_ptr(nullptrptr));
}
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
@@ -82,7 +83,7 @@ arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
return arrow::Field(name, arrow::date32());
}
- default_case(arrow::Field("", arrow::null()));
+ default_case(arrow::Field("", arrow::nullptr()));
}
} // namespace xlnt
@@ -114,29 +115,88 @@ extern "C" {
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
{
- static const char *keywords[] = { "file", NULL };
+ static const char *keywords[] = { "io", "sheetname", "header", "skiprows",
+ "skip_footer", "index_col", "names", "converters", "dtype", "true_values",
+ "false_values", "parse_cols", "squeeze", "na_values", "thousands",
+ "keep_default_na", "verbose", "convert_float", nullptr };
static auto keywords_nc = const_cast(keywords);
- PyObject *file = NULL;
+ PyObject *io = nullptr;
+ PyObject *sheetname = nullptr;
+ PyObject *header = nullptr;
+ PyObject *skiprows = nullptr;
+ auto skip_footer = 0;
+ PyObject *index_col = nullptr;
+ PyObject *names = nullptr;
+ PyObject *converters = nullptr;
+ PyObject *dtype = nullptr;
+ PyObject *true_values = nullptr;
+ PyObject *false_values = nullptr;
+ PyObject *parse_cols = nullptr;
+ auto squeeze = false;
+ PyObject *na_values = nullptr;
+ const char *thousands = nullptr;
+ auto keep_default_va = false;
+ auto verbose = false;
+ auto convert_float = false;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
+ std::cout << "here" << std::endl;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOiOOOOOOOpOzppp", keywords_nc,
+ &io, &sheetname, &header, &skiprows, &skip_footer, &index_col, &names,
+ &converters, &dtype, &true_values, &false_values, &parse_cols, &squeeze,
+ &na_values, &thousands, &keep_default_va, &verbose, &convert_float))
{
- return NULL;
+ PyErr_Print();
+ PyErr_Clear();
+ Py_RETURN_NONE;
}
+ std::cout << "here2" << std::endl;
+
if (!import_pyarrow())
{
Py_RETURN_NONE;
}
+ std::cout << "here3" << std::endl;
- xlnt::python_streambuf file_buffer(file);
+ // arg #1, io
+ xlnt::python_streambuf file_buffer(io);
std::istream file_stream(&file_buffer);
xlnt::streaming_workbook_reader reader;
reader.open(file_stream);
- reader.begin_worksheet();
+ std::cout << "here4" << std::endl;
+
+ // arg #2, sheetname
+ auto sheet_titles = reader.sheet_titles();
+ auto sheet_title = sheet_titles.front();
+
+ std::cout << "here5 " << sheet_title << std::endl;
+
+ if (sheetname != nullptr)
+ {
+ std::cout << "sheetname" << std::endl;
+
+ if (PyLong_Check(sheetname))
+ {
+ std::cout << "is long" << std::endl;
+ // handle int sheetname
+ auto sheet_index = PyLong_AsLong(sheetname);
+ sheet_title = sheet_titles.at(sheet_index);
+ }
+ else if (PyUnicode_Check(sheetname))
+ {
+ std::cout << "is string" << std::endl;
+ // handle string sheetname
+ sheet_title = std::string(reinterpret_cast(PyUnicode_1BYTE_DATA(sheetname)));
+ }
+ }
+
+ std::cout << sheet_title << std::endl;
+ reader.begin_worksheet(sheet_title);
auto column_names = std::vector();
auto columns = std::vector>();
@@ -223,15 +283,15 @@ PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwarg
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
{
- static const char *keywords[] = { "table", "file", NULL };
+ static const char *keywords[] = { "table", "file", nullptr };
static auto keywords_nc = const_cast(keywords);
- PyObject *table = NULL;
- PyObject *file = NULL;
+ PyObject *table = nullptr;
+ PyObject *file = nullptr;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", keywords_nc, &table, &file))
{
- return NULL;
+ return nullptr;
}
if (!import_pyarrow())