mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
now we're getting somewhere! xlntpyarrow.xlsx2arrow returns an Arrow table that can be converted to a pandas DataFrame now
This commit is contained in:
parent
5b95b3d463
commit
33399a5390
|
@ -10,6 +10,6 @@ class Table;
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
|
|
||||||
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
|
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
|
||||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s);
|
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s);
|
||||||
|
|
||||||
} // namespace xlnt
|
} // namespace xlnt
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include <arrow/api.h>
|
#include <arrow/api.h>
|
||||||
#pragma warning(pop)
|
#pragma warning(pop)
|
||||||
|
|
||||||
|
#include <detail/default_case.hpp>
|
||||||
#include <xlnt/cell/cell.hpp>
|
#include <xlnt/cell/cell.hpp>
|
||||||
#include <xlnt/cell/cell_reference.hpp>
|
#include <xlnt/cell/cell_reference.hpp>
|
||||||
#include <xlnt/utils/xlntarrow.hpp>
|
#include <xlnt/utils/xlntarrow.hpp>
|
||||||
|
@ -52,6 +53,8 @@ std::unique_ptr<arrow::ArrayBuilder> make_array_builder(xlnt::cell::type type)
|
||||||
case xlnt::cell::type::date:
|
case xlnt::cell::type::date:
|
||||||
return std::unique_ptr<arrow::Date32Builder>(new arrow::Date32Builder(arrow::default_memory_pool()));
|
return std::unique_ptr<arrow::Date32Builder>(new arrow::Date32Builder(arrow::default_memory_pool()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
default_case(std::unique_ptr<arrow::ArrayBuilder>(nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
||||||
|
@ -71,6 +74,8 @@ arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
||||||
case xlnt::cell::type::date:
|
case xlnt::cell::type::date:
|
||||||
return arrow::Field(name, arrow::date32());
|
return arrow::Field(name, arrow::date32());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
default_case(arrow::Field("", arrow::null()));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -103,6 +108,7 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
||||||
if (cell.row() == 1)
|
if (cell.row() == 1)
|
||||||
{
|
{
|
||||||
column_names.push_back(cell.value<std::string>());
|
column_names.push_back(cell.value<std::string>());
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else if (cell.row() == 2)
|
else if (cell.row() == 2)
|
||||||
{
|
{
|
||||||
|
@ -111,6 +117,41 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
||||||
fields.push_back(std::make_shared<arrow::Field>(field));
|
fields.push_back(std::make_shared<arrow::Field>(field));
|
||||||
columns.push_back(make_array_builder(cell.data_type()));
|
columns.push_back(make_array_builder(cell.data_type()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto builder = columns.at(cell.column().index - 1).get();
|
||||||
|
|
||||||
|
switch (cell.data_type())
|
||||||
|
{
|
||||||
|
case xlnt::cell::type::number:
|
||||||
|
{
|
||||||
|
auto typed_builder = static_cast<arrow::DoubleBuilder*>(builder);
|
||||||
|
typed_builder->Append(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case xlnt::cell::type::inline_string:
|
||||||
|
case xlnt::cell::type::shared_string:
|
||||||
|
case xlnt::cell::type::error:
|
||||||
|
case xlnt::cell::type::formula_string:
|
||||||
|
case xlnt::cell::type::empty:
|
||||||
|
{
|
||||||
|
auto typed_builder = static_cast<arrow::StringBuilder*>(builder);
|
||||||
|
typed_builder->Append(cell.value<std::string>());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case xlnt::cell::type::boolean:
|
||||||
|
{
|
||||||
|
auto typed_builder = static_cast<arrow::BooleanBuilder*>(builder);
|
||||||
|
typed_builder->Append(cell.value<bool>());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case xlnt::cell::type::date:
|
||||||
|
{
|
||||||
|
auto typed_builder = static_cast<arrow::Date32Builder*>(builder);
|
||||||
|
typed_builder->Append(cell.value<int>());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
reader.end_worksheet();
|
reader.end_worksheet();
|
||||||
|
@ -131,7 +172,7 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s)
|
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s)
|
||||||
{
|
{
|
||||||
xlnt::streaming_workbook_writer writer;
|
xlnt::streaming_workbook_writer writer;
|
||||||
writer.open(s);
|
writer.open(s);
|
||||||
|
|
|
@ -42,6 +42,7 @@ xlntpyarrow_extension = Extension(
|
||||||
include_dirs = include_dirs,
|
include_dirs = include_dirs,
|
||||||
libraries = [
|
libraries = [
|
||||||
'arrow',
|
'arrow',
|
||||||
|
'arrow_python',
|
||||||
'xlnt'
|
'xlnt'
|
||||||
],
|
],
|
||||||
library_dirs = library_dirs,
|
library_dirs = library_dirs,
|
||||||
|
|
|
@ -3,17 +3,65 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <arrow/api.h>
|
#include <arrow/api.h>
|
||||||
|
#include <arrow/python/pyarrow.h>
|
||||||
#include <Python.h> // must be included after Arrow
|
#include <Python.h> // must be included after Arrow
|
||||||
|
|
||||||
#include <python_streambuf.hpp>
|
#include <python_streambuf.hpp>
|
||||||
#include <xlnt/utils/xlntarrow.hpp>
|
#include <xlnt/utils/xlntarrow.hpp>
|
||||||
|
|
||||||
PyObject *xlsx2arrow(PyObject *file)
|
bool import_pyarrow()
|
||||||
{
|
{
|
||||||
xlnt::python_streambuf buffer(file);
|
static bool imported = false;
|
||||||
|
|
||||||
|
if (!imported)
|
||||||
|
{
|
||||||
|
if (!arrow::py::import_pyarrow())
|
||||||
|
{
|
||||||
|
if (PyErr_Occurred() != nullptr)
|
||||||
|
{
|
||||||
|
PyErr_Print();
|
||||||
|
PyErr_Clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
imported = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return imported;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *xlsx2arrow(PyObject *pyfile)
|
||||||
|
{
|
||||||
|
if (!import_pyarrow())
|
||||||
|
{
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
xlnt::python_streambuf buffer(pyfile);
|
||||||
std::istream stream(&buffer);
|
std::istream stream(&buffer);
|
||||||
auto table = xlnt::xlsx2arrow(stream);
|
auto table = xlnt::xlsx2arrow(stream);
|
||||||
|
|
||||||
|
return arrow::py::wrap_table(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *arrow2xlsx(PyObject *pytable, PyObject *pyfile)
|
||||||
|
{
|
||||||
|
if (!import_pyarrow())
|
||||||
|
{
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)pytable;
|
||||||
|
(void)pyfile;
|
||||||
|
/*
|
||||||
|
auto table = arrow::py::unwrap_table(pytable);
|
||||||
|
xlnt::python_streambuf buffer(pyfile);
|
||||||
|
std::ostream stream(&buffer);
|
||||||
|
xlnt::arrow2xlsx(table, stream);
|
||||||
|
*/
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,10 +76,11 @@ Returns an arrow table representing the given XLSX file object.");
|
||||||
|
|
||||||
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
|
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
PyObject *file = NULL;
|
|
||||||
static const char *keywords[] = { "file", NULL };
|
static const char *keywords[] = { "file", NULL };
|
||||||
static auto keywords_nc = const_cast<char **>(keywords);
|
static auto keywords_nc = const_cast<char **>(keywords);
|
||||||
|
|
||||||
|
PyObject *file = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -50,16 +99,18 @@ Writes the given arrow table to out_file as an XLSX file.");
|
||||||
|
|
||||||
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
|
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
PyObject *obj = NULL;
|
static const char *keywords[] = { "table", "file", NULL };
|
||||||
static const char *keywords[] = { "file", NULL };
|
|
||||||
static auto keywords_nc = const_cast<char **>(keywords);
|
static auto keywords_nc = const_cast<char **>(keywords);
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", keywords_nc, &obj))
|
PyObject *table = NULL;
|
||||||
|
PyObject *file = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", keywords_nc, &table, &file))
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
return arrow2xlsx(table, file);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.7/3 compatible based on https://docs.python.org/3/howto/cporting.html
|
// 2.7/3 compatible based on https://docs.python.org/3/howto/cporting.html
|
||||||
|
|
Loading…
Reference in New Issue
Block a user