mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
now we're getting somewhere! xlntpyarrow.xlsx2arrow returns an Arrow table that can be converted to a pandas DataFrame now
This commit is contained in:
parent
5b95b3d463
commit
33399a5390
|
@ -10,6 +10,6 @@ class Table;
|
|||
namespace xlnt {
|
||||
|
||||
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s);
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s);
|
||||
|
||||
} // namespace xlnt
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <arrow/api.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
#include <detail/default_case.hpp>
|
||||
#include <xlnt/cell/cell.hpp>
|
||||
#include <xlnt/cell/cell_reference.hpp>
|
||||
#include <xlnt/utils/xlntarrow.hpp>
|
||||
|
@ -52,6 +53,8 @@ std::unique_ptr<arrow::ArrayBuilder> make_array_builder(xlnt::cell::type type)
|
|||
case xlnt::cell::type::date:
|
||||
return std::unique_ptr<arrow::Date32Builder>(new arrow::Date32Builder(arrow::default_memory_pool()));
|
||||
}
|
||||
|
||||
default_case(std::unique_ptr<arrow::ArrayBuilder>(nullptr));
|
||||
}
|
||||
|
||||
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
||||
|
@ -71,6 +74,8 @@ arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
|||
case xlnt::cell::type::date:
|
||||
return arrow::Field(name, arrow::date32());
|
||||
}
|
||||
|
||||
default_case(arrow::Field("", arrow::null()));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -103,6 +108,7 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
|||
if (cell.row() == 1)
|
||||
{
|
||||
column_names.push_back(cell.value<std::string>());
|
||||
continue;
|
||||
}
|
||||
else if (cell.row() == 2)
|
||||
{
|
||||
|
@ -111,6 +117,41 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
|||
fields.push_back(std::make_shared<arrow::Field>(field));
|
||||
columns.push_back(make_array_builder(cell.data_type()));
|
||||
}
|
||||
|
||||
auto builder = columns.at(cell.column().index - 1).get();
|
||||
|
||||
switch (cell.data_type())
|
||||
{
|
||||
case xlnt::cell::type::number:
|
||||
{
|
||||
auto typed_builder = static_cast<arrow::DoubleBuilder*>(builder);
|
||||
typed_builder->Append(0);
|
||||
break;
|
||||
}
|
||||
case xlnt::cell::type::inline_string:
|
||||
case xlnt::cell::type::shared_string:
|
||||
case xlnt::cell::type::error:
|
||||
case xlnt::cell::type::formula_string:
|
||||
case xlnt::cell::type::empty:
|
||||
{
|
||||
auto typed_builder = static_cast<arrow::StringBuilder*>(builder);
|
||||
typed_builder->Append(cell.value<std::string>());
|
||||
break;
|
||||
}
|
||||
case xlnt::cell::type::boolean:
|
||||
{
|
||||
auto typed_builder = static_cast<arrow::BooleanBuilder*>(builder);
|
||||
typed_builder->Append(cell.value<bool>());
|
||||
break;
|
||||
}
|
||||
case xlnt::cell::type::date:
|
||||
{
|
||||
auto typed_builder = static_cast<arrow::Date32Builder*>(builder);
|
||||
typed_builder->Append(cell.value<int>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
reader.end_worksheet();
|
||||
|
@ -131,7 +172,7 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
|||
return table;
|
||||
}
|
||||
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s)
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s)
|
||||
{
|
||||
xlnt::streaming_workbook_writer writer;
|
||||
writer.open(s);
|
||||
|
|
|
@ -42,6 +42,7 @@ xlntpyarrow_extension = Extension(
|
|||
include_dirs = include_dirs,
|
||||
libraries = [
|
||||
'arrow',
|
||||
'arrow_python',
|
||||
'xlnt'
|
||||
],
|
||||
library_dirs = library_dirs,
|
||||
|
|
|
@ -3,17 +3,65 @@
|
|||
#include <vector>
|
||||
|
||||
#include <arrow/api.h>
|
||||
#include <arrow/python/pyarrow.h>
|
||||
#include <Python.h> // must be included after Arrow
|
||||
|
||||
#include <python_streambuf.hpp>
|
||||
#include <xlnt/utils/xlntarrow.hpp>
|
||||
|
||||
PyObject *xlsx2arrow(PyObject *file)
|
||||
bool import_pyarrow()
|
||||
{
|
||||
xlnt::python_streambuf buffer(file);
|
||||
static bool imported = false;
|
||||
|
||||
if (!imported)
|
||||
{
|
||||
if (!arrow::py::import_pyarrow())
|
||||
{
|
||||
if (PyErr_Occurred() != nullptr)
|
||||
{
|
||||
PyErr_Print();
|
||||
PyErr_Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
imported = true;
|
||||
}
|
||||
}
|
||||
|
||||
return imported;
|
||||
}
|
||||
|
||||
PyObject *xlsx2arrow(PyObject *pyfile)
|
||||
{
|
||||
if (!import_pyarrow())
|
||||
{
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
xlnt::python_streambuf buffer(pyfile);
|
||||
std::istream stream(&buffer);
|
||||
auto table = xlnt::xlsx2arrow(stream);
|
||||
|
||||
return arrow::py::wrap_table(table);
|
||||
}
|
||||
|
||||
PyObject *arrow2xlsx(PyObject *pytable, PyObject *pyfile)
|
||||
{
|
||||
if (!import_pyarrow())
|
||||
{
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
(void)pytable;
|
||||
(void)pyfile;
|
||||
/*
|
||||
auto table = arrow::py::unwrap_table(pytable);
|
||||
xlnt::python_streambuf buffer(pyfile);
|
||||
std::ostream stream(&buffer);
|
||||
xlnt::arrow2xlsx(table, stream);
|
||||
*/
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
@ -28,10 +76,11 @@ Returns an arrow table representing the given XLSX file object.");
|
|||
|
||||
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *file = NULL;
|
||||
static const char *keywords[] = { "file", NULL };
|
||||
static auto keywords_nc = const_cast<char **>(keywords);
|
||||
|
||||
PyObject *file = NULL;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -50,16 +99,18 @@ Writes the given arrow table to out_file as an XLSX file.");
|
|||
|
||||
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *obj = NULL;
|
||||
static const char *keywords[] = { "file", NULL };
|
||||
static const char *keywords[] = { "table", "file", NULL };
|
||||
static auto keywords_nc = const_cast<char **>(keywords);
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", keywords_nc, &obj))
|
||||
PyObject *table = NULL;
|
||||
PyObject *file = NULL;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", keywords_nc, &table, &file))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
return arrow2xlsx(table, file);
|
||||
}
|
||||
|
||||
// 2.7/3 compatible based on https://docs.python.org/3/howto/cporting.html
|
||||
|
|
Loading…
Reference in New Issue
Block a user