mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
begin implementing xlsx2arrow, fix msvc warnings, other stuff
This commit is contained in:
parent
4367343e15
commit
5b95b3d463
|
@ -1,4 +1,6 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include <xlnt/xlnt_config.hpp>
|
#include <xlnt/xlnt_config.hpp>
|
||||||
|
|
||||||
namespace arrow {
|
namespace arrow {
|
||||||
|
@ -6,10 +8,8 @@ class Table;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
namespace arrow {
|
|
||||||
|
|
||||||
void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table);
|
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
|
||||||
void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s);
|
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s);
|
||||||
|
|
||||||
} // namespace arrow
|
|
||||||
} // namespace xlnt
|
} // namespace xlnt
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
cmake_minimum_required(VERSION 3.2)
|
cmake_minimum_required(VERSION 3.2)
|
||||||
project(xlnt VERSION 0.9)
|
project(xlnt VERSION 1.1)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
|
@ -1,49 +1,148 @@
|
||||||
|
// Copyright (c) 2017 Thomas Fussell
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE
|
||||||
|
//
|
||||||
|
// @license: http://www.opensource.org/licenses/mit-license.php
|
||||||
|
// @author: see AUTHORS file
|
||||||
|
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4458)
|
||||||
#include <arrow/api.h>
|
#include <arrow/api.h>
|
||||||
|
#pragma warning(pop)
|
||||||
|
|
||||||
#include <xlnt/cell/cell.hpp>
|
#include <xlnt/cell/cell.hpp>
|
||||||
#include <xlnt/cell/cell_reference.hpp>
|
#include <xlnt/cell/cell_reference.hpp>
|
||||||
|
#include <xlnt/utils/xlntarrow.hpp>
|
||||||
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
||||||
#include <xlnt/workbook/streaming_workbook_writer.hpp>
|
#include <xlnt/workbook/streaming_workbook_writer.hpp>
|
||||||
#include <xlnt/worksheet/worksheet.hpp>
|
#include <xlnt/worksheet/worksheet.hpp>
|
||||||
#include <xlnt/utils/xlntarrow.hpp>
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::unique_ptr<arrow::ArrayBuilder> make_array_builder(xlnt::cell::type type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case xlnt::cell::type::number:
|
||||||
|
return std::unique_ptr<arrow::ArrayBuilder>(new arrow::DoubleBuilder(arrow::default_memory_pool(), arrow::float64()));
|
||||||
|
case xlnt::cell::type::inline_string:
|
||||||
|
case xlnt::cell::type::shared_string:
|
||||||
|
case xlnt::cell::type::error:
|
||||||
|
case xlnt::cell::type::formula_string:
|
||||||
|
case xlnt::cell::type::empty:
|
||||||
|
return std::unique_ptr<arrow::StringBuilder>(new arrow::StringBuilder(arrow::default_memory_pool()));
|
||||||
|
case xlnt::cell::type::boolean:
|
||||||
|
return std::unique_ptr<arrow::ArrayBuilder>(new arrow::BooleanBuilder(arrow::default_memory_pool(), std::make_shared<arrow::BooleanType>()));
|
||||||
|
case xlnt::cell::type::date:
|
||||||
|
return std::unique_ptr<arrow::Date32Builder>(new arrow::Date32Builder(arrow::default_memory_pool()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case xlnt::cell::type::number:
|
||||||
|
return arrow::Field(name, arrow::float64());
|
||||||
|
case xlnt::cell::type::inline_string:
|
||||||
|
case xlnt::cell::type::shared_string:
|
||||||
|
case xlnt::cell::type::error:
|
||||||
|
case xlnt::cell::type::formula_string:
|
||||||
|
case xlnt::cell::type::empty:
|
||||||
|
return arrow::Field(name, std::make_shared<arrow::StringType>());
|
||||||
|
case xlnt::cell::type::boolean:
|
||||||
|
return arrow::Field(name, arrow::boolean());
|
||||||
|
case xlnt::cell::type::date:
|
||||||
|
return arrow::Field(name, arrow::date32());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
namespace arrow {
|
|
||||||
|
|
||||||
void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table)
|
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
||||||
{
|
{
|
||||||
xlnt::streaming_workbook_reader reader;
|
xlnt::streaming_workbook_reader reader;
|
||||||
reader.open(s);
|
reader.open(s);
|
||||||
|
|
||||||
reader.begin_worksheet();
|
reader.begin_worksheet();
|
||||||
int first_row = 0;
|
|
||||||
|
auto column_names = std::vector<std::string>();
|
||||||
|
auto columns = std::vector<std::unique_ptr<arrow::ArrayBuilder>>();
|
||||||
|
auto fields = std::vector<std::shared_ptr<arrow::Field>>();
|
||||||
|
|
||||||
|
auto arrow_check = [](arrow::Status s)
|
||||||
|
{
|
||||||
|
if (!s.ok())
|
||||||
|
{
|
||||||
|
throw xlnt::exception("conversion error");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
while (reader.has_cell())
|
while (reader.has_cell())
|
||||||
{
|
{
|
||||||
auto cell = reader.read_cell();
|
auto cell = reader.read_cell();
|
||||||
|
|
||||||
if (first_row < 1)
|
if (cell.row() == 1)
|
||||||
{
|
{
|
||||||
first_row = cell.row();
|
column_names.push_back(cell.value<std::string>());
|
||||||
}
|
}
|
||||||
|
else if (cell.row() == 2)
|
||||||
if (cell.reference().row() % 1000 == 1)
|
|
||||||
{
|
{
|
||||||
std::cout << cell.reference().to_string() << std::endl;
|
auto column_name = column_names.at(cell.column().index - 1);
|
||||||
|
auto field = make_type_field(column_name, cell.data_type());
|
||||||
|
fields.push_back(std::make_shared<arrow::Field>(field));
|
||||||
|
columns.push_back(make_array_builder(cell.data_type()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reader.end_worksheet();
|
reader.end_worksheet();
|
||||||
|
|
||||||
|
auto schema = std::make_shared<arrow::Schema>(fields);
|
||||||
|
auto arrays = std::vector<std::shared_ptr<arrow::Array>>();
|
||||||
|
|
||||||
|
for (size_t i = 0; i != columns.size(); ++i)
|
||||||
|
{
|
||||||
|
std::shared_ptr<arrow::Array> array;
|
||||||
|
columns[i]->Finish(&array);
|
||||||
|
arrays.emplace_back(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<arrow::Table> table;
|
||||||
|
arrow_check(MakeTable(schema, arrays, &table));
|
||||||
|
|
||||||
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s)
|
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s)
|
||||||
{
|
{
|
||||||
xlnt::streaming_workbook_writer writer;
|
xlnt::streaming_workbook_writer writer;
|
||||||
writer.open(s);
|
writer.open(s);
|
||||||
|
|
||||||
writer.add_worksheet("Sheet1");
|
writer.add_worksheet("Sheet1");
|
||||||
writer.add_cell("A1").value("test");
|
|
||||||
|
for (auto i = 0; i < table->num_columns(); ++i)
|
||||||
|
{
|
||||||
|
auto column_name = table->schema()->field(i)->name();
|
||||||
|
writer.add_cell(xlnt::cell_reference(i + 1, 1)).value(column_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace xlnt
|
||||||
}
|
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
|
|
||||||
namespace xlnt {
|
namespace xlnt {
|
||||||
namespace arrow {
|
|
||||||
|
|
||||||
/// A stream buffer getting data from and putting data into a Python file object
|
/// A stream buffer getting data from and putting data into a Python file object
|
||||||
/** The aims are as follow:
|
/** The aims are as follow:
|
||||||
|
@ -84,7 +83,7 @@ namespace arrow {
|
||||||
Note: references are to the C++ standard (the numbers between parentheses
|
Note: references are to the C++ standard (the numbers between parentheses
|
||||||
at the end of references are margin markers).
|
at the end of references are margin markers).
|
||||||
*/
|
*/
|
||||||
class streambuf : public std::basic_streambuf<char>
|
class python_streambuf : public std::basic_streambuf<char>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
typedef std::basic_streambuf<char> base_t;
|
typedef std::basic_streambuf<char> base_t;
|
||||||
|
@ -113,7 +112,7 @@ class streambuf : public std::basic_streambuf<char>
|
||||||
/// Construct from a Python file object
|
/// Construct from a Python file object
|
||||||
/** if buffer_size is 0 the current default_buffer_size is used.
|
/** if buffer_size is 0 the current default_buffer_size is used.
|
||||||
*/
|
*/
|
||||||
streambuf(
|
python_streambuf(
|
||||||
PyObject *python_file_obj,
|
PyObject *python_file_obj,
|
||||||
std::size_t buffer_size_ = 0)
|
std::size_t buffer_size_ = 0)
|
||||||
:
|
:
|
||||||
|
@ -162,7 +161,7 @@ class streambuf : public std::basic_streambuf<char>
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mundane destructor freeing the allocated resources
|
/// Mundane destructor freeing the allocated resources
|
||||||
virtual ~streambuf() {
|
virtual ~python_streambuf() {
|
||||||
if (write_buffer) delete[] write_buffer;
|
if (write_buffer) delete[] write_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,7 +323,7 @@ class streambuf : public std::basic_streambuf<char>
|
||||||
std::ios_base::openmode which= std::ios_base::in
|
std::ios_base::openmode which= std::ios_base::in
|
||||||
| std::ios_base::out)
|
| std::ios_base::out)
|
||||||
{
|
{
|
||||||
return streambuf::seekoff(sp, std::ios_base::beg, which);
|
return python_streambuf::seekoff(sp, std::ios_base::beg, which);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -402,8 +401,8 @@ class streambuf : public std::basic_streambuf<char>
|
||||||
if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
|
if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
|
||||||
|
|
||||||
// we are in wonderland
|
// we are in wonderland
|
||||||
if (which == std::ios_base::in) gbump(buf_sought - buf_cur);
|
if (which == std::ios_base::in) gbump(static_cast<int>(buf_sought - buf_cur));
|
||||||
else if (which == std::ios_base::out) pbump(buf_sought - buf_cur);
|
else if (which == std::ios_base::out) pbump(static_cast<int>(buf_sought - buf_cur));
|
||||||
return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
|
return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,73 +414,8 @@ class streambuf : public std::basic_streambuf<char>
|
||||||
|
|
||||||
return static_cast<T>(value);
|
return static_cast<T>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
class istream : public std::istream
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
istream(streambuf& buf) : std::istream(&buf)
|
|
||||||
{
|
|
||||||
exceptions(std::ios_base::badbit);
|
|
||||||
}
|
|
||||||
|
|
||||||
~istream() { if (this->good()) this->sync(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class ostream : public std::ostream
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ostream(streambuf& buf) : std::ostream(&buf)
|
|
||||||
{
|
|
||||||
exceptions(std::ios_base::badbit);
|
|
||||||
}
|
|
||||||
|
|
||||||
~ostream() { if (this->good()) this->flush(); }
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::size_t streambuf::default_buffer_size = 1024;
|
std::size_t python_streambuf::default_buffer_size = 1024;
|
||||||
|
|
||||||
struct streambuf_capsule
|
} // namespace xlnt
|
||||||
{
|
|
||||||
streambuf python_streambuf;
|
|
||||||
|
|
||||||
streambuf_capsule(
|
|
||||||
PyObject *python_file_obj,
|
|
||||||
std::size_t buffer_size=0)
|
|
||||||
:
|
|
||||||
python_streambuf(python_file_obj, buffer_size)
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ostream : private streambuf_capsule, streambuf::ostream
|
|
||||||
{
|
|
||||||
ostream(
|
|
||||||
PyObject *python_file_obj,
|
|
||||||
std::size_t buffer_size=0)
|
|
||||||
:
|
|
||||||
streambuf_capsule(python_file_obj, buffer_size),
|
|
||||||
streambuf::ostream(python_streambuf)
|
|
||||||
{}
|
|
||||||
|
|
||||||
~ostream()
|
|
||||||
{
|
|
||||||
if (this->good())
|
|
||||||
{
|
|
||||||
this->flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PyErr_Occurred() != nullptr)
|
|
||||||
{
|
|
||||||
PyErr_Clear();
|
|
||||||
throw std::runtime_error(
|
|
||||||
"Problem closing python ostream.\n"
|
|
||||||
" Known limitation: the error is unrecoverable. Sorry.\n"
|
|
||||||
" Suggestion for programmer: add ostream.flush() before"
|
|
||||||
" returning.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}} // namespace xlnt::arrow
|
|
||||||
|
|
|
@ -10,12 +10,9 @@
|
||||||
|
|
||||||
PyObject *xlsx2arrow(PyObject *file)
|
PyObject *xlsx2arrow(PyObject *file)
|
||||||
{
|
{
|
||||||
xlnt::arrow::streambuf buffer(file);
|
xlnt::python_streambuf buffer(file);
|
||||||
std::istream stream(&buffer);
|
std::istream stream(&buffer);
|
||||||
std::shared_ptr<arrow::Schema> schema;
|
auto table = xlnt::xlsx2arrow(stream);
|
||||||
std::vector<std::shared_ptr<arrow::Column>> columns;
|
|
||||||
arrow::Table table(schema, columns);
|
|
||||||
xlnt::arrow::xlsx2arrow(stream, table);
|
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user