move arrow logic to xlntpyarrow for now

This commit is contained in:
Thomas Fussell 2017-07-18 14:20:46 -07:00
parent 7da9187808
commit abdd1be7c8
7 changed files with 178 additions and 174 deletions

View File

@ -1,15 +0,0 @@
#include <iostream>
#include <memory>
#include <xlnt/xlnt_config.hpp>
namespace arrow {
class Table;
}
namespace xlnt {
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s);
} // namespace xlnt

View File

@ -63,10 +63,6 @@ file(GLOB WORKBOOK_SOURCES ${XLNT_SOURCE_DIR}/workbook/*.cpp)
file(GLOB WORKSHEET_HEADERS ${XLNT_INCLUDE_DIR}/xlnt/worksheet/*.hpp)
file(GLOB WORKSHEET_SOURCES ${XLNT_SOURCE_DIR}/worksheet/*.cpp)
if(NOT ARROW)
list(REMOVE_ITEM UTILS_SOURCES ${XLNT_SOURCE_DIR}/utils/xlntarrow.cpp)
endif()
file(GLOB DETAIL_ROOT_HEADERS ${XLNT_SOURCE_DIR}/detail/*.hpp)
file(GLOB DETAIL_ROOT_SOURCES ${XLNT_SOURCE_DIR}/detail/*.cpp)
file(GLOB DETAIL_CRYPTOGRAPHY_HEADERS ${XLNT_SOURCE_DIR}/detail/cryptography/*.hpp)
@ -122,16 +118,6 @@ if(NOT BIN_DEST_DIR)
set(BIN_DEST_DIR ${CMAKE_INSTALL_PREFIX}/bin)
endif()
if(ARROW)
option(CONDA_ROOT "Path to Conda directory." "")
if(NOT CONDA_ROOT)
message(FATAL_ERROR "Missing Conda root directory option (-D CONDA_ROOT).")
endif()
link_directories(${CONDA_ROOT}/lib)
endif()
if(NOT STATIC)
add_library(xlnt SHARED ${XLNT_HEADERS} ${XLNT_SOURCES} $<TARGET_OBJECTS:libstudxml>)
target_compile_definitions(xlnt PRIVATE XLNT_SHARED=1)
@ -169,11 +155,6 @@ target_include_directories(xlnt PUBLIC ${XLNT_INCLUDE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/libstudxml)
if(ARROW)
target_link_libraries(xlnt PRIVATE arrow)
target_include_directories(xlnt PRIVATE ${CONDA_ROOT}/include)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0.0")
target_compile_definitions(xlnt PRIVATE UTFCPP=1)
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/utfcpp)

View File

@ -5,6 +5,13 @@ if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
option(CONDA_ROOT "Path to Conda directory." "")
if(NOT CONDA_ROOT)
message(FATAL_ERROR "Missing Conda root directory option (-D CONDA_ROOT).")
endif()
# Replace backslash with forward slash since Python interprets it as an escaped character
string(REGEX REPLACE "\\\\" "/" CONDA_ROOT ${CONDA_ROOT})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup.py.cmake"
@ -13,4 +20,4 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup.py.cmake"
add_custom_target(xlntpyarrow ALL
COMMAND python setup.py install --record=record.txt --xlntlib=$<TARGET_FILE:xlnt>
DEPENDS xlnt
SOURCES xlntpyarrow.cpp setup.py.cmake)
SOURCES xlntpyarrow.cpp setup.py.cmake methods.hpp methods.cpp)

View File

@ -21,15 +21,22 @@
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#include <iostream>
#include <memory>
#include <vector>
#pragma warning(push)
#pragma warning(disable: 4458)
#include <arrow/api.h>
#include <arrow/python/pyarrow.h>
#pragma warning(pop)
#include <Python.h> // must be included after Arrow
#include <detail/default_case.hpp>
#include <python_streambuf.hpp>
#include <xlnt/cell/cell.hpp>
#include <xlnt/cell/cell_reference.hpp>
#include <xlnt/utils/xlntarrow.hpp>
#include <xlnt/workbook/streaming_workbook_reader.hpp>
#include <xlnt/workbook/streaming_workbook_writer.hpp>
#include <xlnt/worksheet/worksheet.hpp>
@ -38,7 +45,7 @@ namespace {
std::unique_ptr<arrow::ArrayBuilder> make_array_builder(xlnt::cell::type type)
{
switch (type)
switch (type)
{
case xlnt::cell::type::number:
return std::unique_ptr<arrow::ArrayBuilder>(new arrow::DoubleBuilder(arrow::default_memory_pool(), arrow::float64()));
@ -78,14 +85,56 @@ arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
default_case(arrow::Field("", arrow::null()));
}
} // namespace
} // namespace xlnt
namespace xlnt {
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
bool import_pyarrow()
{
static bool imported = false;
if (!imported)
{
if (!arrow::py::import_pyarrow())
{
if (PyErr_Occurred() != nullptr)
{
PyErr_Print();
PyErr_Clear();
}
}
else
{
imported = true;
}
}
return imported;
}
extern "C" {
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
{
static const char *keywords[] = { "file", NULL };
static auto keywords_nc = const_cast<char **>(keywords);
PyObject *file = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
{
return NULL;
}
if (!import_pyarrow())
{
Py_RETURN_NONE;
}
xlnt::python_streambuf file_buffer(file);
std::istream file_stream(&file_buffer);
xlnt::streaming_workbook_reader reader;
reader.open(s);
reader.open(file_stream);
reader.begin_worksheet();
@ -169,21 +218,45 @@ std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
std::shared_ptr<arrow::Table> table;
arrow_check(MakeTable(schema, arrays, &table));
return table;
return arrow::py::wrap_table(table);
}
void XLNT_API arrow2xlsx(std::shared_ptr<arrow::Table> &table, std::ostream &s)
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
{
xlnt::streaming_workbook_writer writer;
writer.open(s);
static const char *keywords[] = { "table", "file", NULL };
static auto keywords_nc = const_cast<char **>(keywords);
writer.add_worksheet("Sheet1");
PyObject *table = NULL;
PyObject *file = NULL;
for (auto i = 0; i < table->num_columns(); ++i)
{
auto column_name = table->schema()->field(i)->name();
writer.add_cell(xlnt::cell_reference(i + 1, 1)).value(column_name);
}
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", keywords_nc, &table, &file))
{
return NULL;
}
if (!import_pyarrow())
{
Py_RETURN_NONE;
}
/*
auto table = arrow::py::unwrap_table(pytable);
xlnt::python_streambuf buffer(pyfile);
std::ostream stream(&buffer);
xlnt::streaming_workbook_writer writer;
writer.open(s);
writer.add_worksheet("Sheet1");
for (auto i = 0; i < table->num_columns(); ++i)
{
auto column_name = table->schema()->field(i)->name();
writer.add_cell(xlnt::cell_reference(i + 1, 1)).value(column_name);
}
*/
Py_RETURN_NONE;
}
} // namespace xlnt
} // extern "C"

31
xlntpyarrow/methods.hpp Normal file
View File

@ -0,0 +1,31 @@
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#pragma once
extern "C" {
typedef struct _object PyObject;
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs);
} // extern "C"

View File

@ -15,21 +15,28 @@ if 'CFLAGS' in cfg_vars:
project_root = '${CMAKE_SOURCE_DIR}'
conda_root = '${CONDA_ROOT}'
xlntlib = None
for arg in sys.argv:
if arg[:2] == '--' and arg.split('=')[0][2:] == 'xlntlib':
xlntlib = arg.split('=')[1]
sys.argv.remove(arg)
break
include_dirs = [
os.path.join(project_root, 'include'),
os.path.join(project_root, 'source'),
os.path.join(project_root, 'xlntpyarrow'),
os.path.join(conda_root, 'include')
]
subdirectory = ''
library_dir = 'lib'
if os.name == 'nt':
subdirectory = '/Release'
library_dir = 'Lib/site-packages'
library_dirs = [
os.path.join(project_root, 'build/source' + subdirectory),
os.path.dirname(xlntlib),
os.path.join(conda_root, 'lib')
]
@ -37,7 +44,7 @@ compile_args = '${CMAKE_CXX_FLAGS}'.split()
xlntpyarrow_extension = Extension(
'xlntpyarrow',
['${CMAKE_CURRENT_SOURCE_DIR}/xlntpyarrow.cpp'],
['${CMAKE_CURRENT_SOURCE_DIR}/xlntpyarrow.cpp', '${CMAKE_CURRENT_SOURCE_DIR}/methods.cpp'],
language = 'c++',
include_dirs = include_dirs,
libraries = [
@ -71,11 +78,8 @@ classifiers = [
data_files = []
for arg in sys.argv:
if arg[:2] == '--' and arg.split('=')[0][2:] == 'xlntlib':
data_files.append(os.path.relpath(arg.split('=')[1]).replace('\\', '/'))
sys.argv.remove(arg)
break
if xlntlib:
data_files.append((library_dir, [os.path.relpath(xlntlib).replace('\\', '/')]))
setup(
name = 'xlntpyarrow',
@ -86,5 +90,5 @@ setup(
author = 'Thomas Fussell',
author_email = 'thomas.fussell@gmail.com',
url = 'https://github.com/tfussell/xlnt',
data_files = [(library_dir, data_files)]
data_files = data_files
)

View File

@ -1,125 +1,48 @@
#include <iostream>
#include <memory>
#include <vector>
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#include <arrow/api.h>
#include <arrow/python/pyarrow.h>
#include <Python.h> // must be included after Arrow
#include <python_streambuf.hpp>
#include <xlnt/utils/xlntarrow.hpp>
bool import_pyarrow()
{
static bool imported = false;
if (!imported)
{
if (!arrow::py::import_pyarrow())
{
if (PyErr_Occurred() != nullptr)
{
PyErr_Print();
PyErr_Clear();
}
}
else
{
imported = true;
}
}
return imported;
}
PyObject *xlsx2arrow(PyObject *pyfile)
{
if (!import_pyarrow())
{
Py_RETURN_NONE;
}
xlnt::python_streambuf buffer(pyfile);
std::istream stream(&buffer);
auto table = xlnt::xlsx2arrow(stream);
return arrow::py::wrap_table(table);
}
PyObject *arrow2xlsx(PyObject *pytable, PyObject *pyfile)
{
if (!import_pyarrow())
{
Py_RETURN_NONE;
}
(void)pytable;
(void)pyfile;
/*
auto table = arrow::py::unwrap_table(pytable);
xlnt::python_streambuf buffer(pyfile);
std::ostream stream(&buffer);
xlnt::arrow2xlsx(table, stream);
*/
Py_RETURN_NONE;
}
#include <Python.h>
#include <methods.hpp>
extern "C" {
/*
* Implements XLSX->pyarrow table function.
*/
PyDoc_STRVAR(xlntpyarrow_xlsx2arrow_doc, "xlsx2arrow(in_file)\
\
Returns an arrow table representing the given XLSX file object.");
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
{
static const char *keywords[] = { "file", NULL };
static auto keywords_nc = const_cast<char **>(keywords);
PyObject *file = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
{
return NULL;
}
return xlsx2arrow(file);
}
/*
* Implements pyarrow table->XLSX function.
*/
PyDoc_STRVAR(xlntpyarrow_arrow2xlsx_doc, "arrow2xlsx(table, out_file)\
\
Writes the given arrow table to out_file as an XLSX file.");
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
{
static const char *keywords[] = { "table", "file", NULL };
static auto keywords_nc = const_cast<char **>(keywords);
PyObject *table = NULL;
PyObject *file = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", keywords_nc, &table, &file))
{
return NULL;
}
return arrow2xlsx(table, file);
}
// 2.7/3 compatible based on https://docs.python.org/3/howto/cporting.html
static PyMethodDef xlntpyarrow_methods[] =
{
{ "xlsx2arrow", (PyCFunction)xlntpyarrow_xlsx2arrow, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_xlsx2arrow_doc },
{ "arrow2xlsx", (PyCFunction)xlntpyarrow_arrow2xlsx, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_arrow2xlsx_doc },
{ NULL, NULL, 0, NULL }
{ "xlsx2arrow", (PyCFunction)xlntpyarrow_xlsx2arrow,
METH_VARARGS | METH_KEYWORDS, xlntpyarrow_xlsx2arrow_doc },
{ "arrow2xlsx", (PyCFunction)xlntpyarrow_arrow2xlsx,
METH_VARARGS | METH_KEYWORDS, xlntpyarrow_arrow2xlsx_doc },
{ nullptr, nullptr, 0, nullptr }
};
#if PY_MAJOR_VERSION >= 3
@ -133,10 +56,10 @@ static PyModuleDef xlntpyarrow_def =
xlntpyarrow_doc, // m_doc
0, // m_size
xlntpyarrow_methods, // m_methods
NULL, // m_slots
NULL, // m_traverse
NULL, // m_clear
NULL, // m_free
nullptr, // m_slots
nullptr, // m_traverse
nullptr, // m_clear
nullptr, // m_free
};
PyMODINIT_FUNC
@ -146,7 +69,7 @@ void
initxlntpyarrow(void)
#endif
{
PyObject *module = NULL;
PyObject *module = nullptr;
#if PY_MAJOR_VERSION >= 3
module = PyModule_Create(&xlntpyarrow_def);