mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
begin implementing xlsx2arrow, fix msvc warnings, other stuff
This commit is contained in:
parent
4367343e15
commit
5b95b3d463
|
@ -1,4 +1,6 @@
|
|||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include <xlnt/xlnt_config.hpp>
|
||||
|
||||
namespace arrow {
|
||||
|
@ -6,10 +8,8 @@ class Table;
|
|||
}
|
||||
|
||||
namespace xlnt {
|
||||
namespace arrow {
|
||||
|
||||
void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table);
|
||||
void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s);
|
||||
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s);
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s);
|
||||
|
||||
} // namespace arrow
|
||||
} // namespace xlnt
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
cmake_minimum_required(VERSION 3.2)
|
||||
project(xlnt VERSION 0.9)
|
||||
project(xlnt VERSION 1.1)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
|
|
@ -1,49 +1,148 @@
|
|||
// Copyright (c) 2017 Thomas Fussell
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE
|
||||
//
|
||||
// @license: http://www.opensource.org/licenses/mit-license.php
|
||||
// @author: see AUTHORS file
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4458)
|
||||
#include <arrow/api.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
#include <xlnt/cell/cell.hpp>
|
||||
#include <xlnt/cell/cell_reference.hpp>
|
||||
#include <xlnt/utils/xlntarrow.hpp>
|
||||
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
||||
#include <xlnt/workbook/streaming_workbook_writer.hpp>
|
||||
#include <xlnt/worksheet/worksheet.hpp>
|
||||
#include <xlnt/utils/xlntarrow.hpp>
|
||||
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<arrow::ArrayBuilder> make_array_builder(xlnt::cell::type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case xlnt::cell::type::number:
|
||||
return std::unique_ptr<arrow::ArrayBuilder>(new arrow::DoubleBuilder(arrow::default_memory_pool(), arrow::float64()));
|
||||
case xlnt::cell::type::inline_string:
|
||||
case xlnt::cell::type::shared_string:
|
||||
case xlnt::cell::type::error:
|
||||
case xlnt::cell::type::formula_string:
|
||||
case xlnt::cell::type::empty:
|
||||
return std::unique_ptr<arrow::StringBuilder>(new arrow::StringBuilder(arrow::default_memory_pool()));
|
||||
case xlnt::cell::type::boolean:
|
||||
return std::unique_ptr<arrow::ArrayBuilder>(new arrow::BooleanBuilder(arrow::default_memory_pool(), std::make_shared<arrow::BooleanType>()));
|
||||
case xlnt::cell::type::date:
|
||||
return std::unique_ptr<arrow::Date32Builder>(new arrow::Date32Builder(arrow::default_memory_pool()));
|
||||
}
|
||||
}
|
||||
|
||||
arrow::Field make_type_field(const std::string &name, xlnt::cell::type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case xlnt::cell::type::number:
|
||||
return arrow::Field(name, arrow::float64());
|
||||
case xlnt::cell::type::inline_string:
|
||||
case xlnt::cell::type::shared_string:
|
||||
case xlnt::cell::type::error:
|
||||
case xlnt::cell::type::formula_string:
|
||||
case xlnt::cell::type::empty:
|
||||
return arrow::Field(name, std::make_shared<arrow::StringType>());
|
||||
case xlnt::cell::type::boolean:
|
||||
return arrow::Field(name, arrow::boolean());
|
||||
case xlnt::cell::type::date:
|
||||
return arrow::Field(name, arrow::date32());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace xlnt {
|
||||
namespace arrow {
|
||||
|
||||
void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table)
|
||||
std::shared_ptr<arrow::Table> XLNT_API xlsx2arrow(std::istream &s)
|
||||
{
|
||||
xlnt::streaming_workbook_reader reader;
|
||||
reader.open(s);
|
||||
|
||||
reader.begin_worksheet();
|
||||
int first_row = 0;
|
||||
|
||||
auto column_names = std::vector<std::string>();
|
||||
auto columns = std::vector<std::unique_ptr<arrow::ArrayBuilder>>();
|
||||
auto fields = std::vector<std::shared_ptr<arrow::Field>>();
|
||||
|
||||
auto arrow_check = [](arrow::Status s)
|
||||
{
|
||||
if (!s.ok())
|
||||
{
|
||||
throw xlnt::exception("conversion error");
|
||||
}
|
||||
};
|
||||
|
||||
while (reader.has_cell())
|
||||
{
|
||||
auto cell = reader.read_cell();
|
||||
|
||||
if (first_row < 1)
|
||||
if (cell.row() == 1)
|
||||
{
|
||||
first_row = cell.row();
|
||||
column_names.push_back(cell.value<std::string>());
|
||||
}
|
||||
|
||||
if (cell.reference().row() % 1000 == 1)
|
||||
else if (cell.row() == 2)
|
||||
{
|
||||
std::cout << cell.reference().to_string() << std::endl;
|
||||
auto column_name = column_names.at(cell.column().index - 1);
|
||||
auto field = make_type_field(column_name, cell.data_type());
|
||||
fields.push_back(std::make_shared<arrow::Field>(field));
|
||||
columns.push_back(make_array_builder(cell.data_type()));
|
||||
}
|
||||
}
|
||||
|
||||
reader.end_worksheet();
|
||||
|
||||
auto schema = std::make_shared<arrow::Schema>(fields);
|
||||
auto arrays = std::vector<std::shared_ptr<arrow::Array>>();
|
||||
|
||||
for (size_t i = 0; i != columns.size(); ++i)
|
||||
{
|
||||
std::shared_ptr<arrow::Array> array;
|
||||
columns[i]->Finish(&array);
|
||||
arrays.emplace_back(array);
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
arrow_check(MakeTable(schema, arrays, &table));
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s)
|
||||
void XLNT_API arrow2xlsx(std::shared_ptr<const arrow::Table> &table, std::ostream &s)
|
||||
{
|
||||
xlnt::streaming_workbook_writer writer;
|
||||
writer.open(s);
|
||||
|
||||
writer.add_worksheet("Sheet1");
|
||||
writer.add_cell("A1").value("test");
|
||||
|
||||
for (auto i = 0; i < table->num_columns(); ++i)
|
||||
{
|
||||
auto column_name = table->schema()->field(i)->name();
|
||||
writer.add_cell(xlnt::cell_reference(i + 1, 1)).value(column_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace xlnt
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include <Python.h>
|
||||
|
||||
namespace xlnt {
|
||||
namespace arrow {
|
||||
|
||||
/// A stream buffer getting data from and putting data into a Python file object
|
||||
/** The aims are as follow:
|
||||
|
@ -84,7 +83,7 @@ namespace arrow {
|
|||
Note: references are to the C++ standard (the numbers between parentheses
|
||||
at the end of references are margin markers).
|
||||
*/
|
||||
class streambuf : public std::basic_streambuf<char>
|
||||
class python_streambuf : public std::basic_streambuf<char>
|
||||
{
|
||||
private:
|
||||
typedef std::basic_streambuf<char> base_t;
|
||||
|
@ -113,7 +112,7 @@ class streambuf : public std::basic_streambuf<char>
|
|||
/// Construct from a Python file object
|
||||
/** if buffer_size is 0 the current default_buffer_size is used.
|
||||
*/
|
||||
streambuf(
|
||||
python_streambuf(
|
||||
PyObject *python_file_obj,
|
||||
std::size_t buffer_size_ = 0)
|
||||
:
|
||||
|
@ -162,7 +161,7 @@ class streambuf : public std::basic_streambuf<char>
|
|||
}
|
||||
|
||||
/// Mundane destructor freeing the allocated resources
|
||||
virtual ~streambuf() {
|
||||
virtual ~python_streambuf() {
|
||||
if (write_buffer) delete[] write_buffer;
|
||||
}
|
||||
|
||||
|
@ -324,7 +323,7 @@ class streambuf : public std::basic_streambuf<char>
|
|||
std::ios_base::openmode which= std::ios_base::in
|
||||
| std::ios_base::out)
|
||||
{
|
||||
return streambuf::seekoff(sp, std::ios_base::beg, which);
|
||||
return python_streambuf::seekoff(sp, std::ios_base::beg, which);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -402,8 +401,8 @@ class streambuf : public std::basic_streambuf<char>
|
|||
if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
|
||||
|
||||
// we are in wonderland
|
||||
if (which == std::ios_base::in) gbump(buf_sought - buf_cur);
|
||||
else if (which == std::ios_base::out) pbump(buf_sought - buf_cur);
|
||||
if (which == std::ios_base::in) gbump(static_cast<int>(buf_sought - buf_cur));
|
||||
else if (which == std::ios_base::out) pbump(static_cast<int>(buf_sought - buf_cur));
|
||||
return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
|
||||
}
|
||||
|
||||
|
@ -415,73 +414,8 @@ class streambuf : public std::basic_streambuf<char>
|
|||
|
||||
return static_cast<T>(value);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
class istream : public std::istream
|
||||
{
|
||||
public:
|
||||
istream(streambuf& buf) : std::istream(&buf)
|
||||
{
|
||||
exceptions(std::ios_base::badbit);
|
||||
}
|
||||
|
||||
~istream() { if (this->good()) this->sync(); }
|
||||
};
|
||||
|
||||
class ostream : public std::ostream
|
||||
{
|
||||
public:
|
||||
ostream(streambuf& buf) : std::ostream(&buf)
|
||||
{
|
||||
exceptions(std::ios_base::badbit);
|
||||
}
|
||||
|
||||
~ostream() { if (this->good()) this->flush(); }
|
||||
};
|
||||
};
|
||||
|
||||
std::size_t streambuf::default_buffer_size = 1024;
|
||||
std::size_t python_streambuf::default_buffer_size = 1024;
|
||||
|
||||
struct streambuf_capsule
|
||||
{
|
||||
streambuf python_streambuf;
|
||||
|
||||
streambuf_capsule(
|
||||
PyObject *python_file_obj,
|
||||
std::size_t buffer_size=0)
|
||||
:
|
||||
python_streambuf(python_file_obj, buffer_size)
|
||||
{}
|
||||
};
|
||||
|
||||
struct ostream : private streambuf_capsule, streambuf::ostream
|
||||
{
|
||||
ostream(
|
||||
PyObject *python_file_obj,
|
||||
std::size_t buffer_size=0)
|
||||
:
|
||||
streambuf_capsule(python_file_obj, buffer_size),
|
||||
streambuf::ostream(python_streambuf)
|
||||
{}
|
||||
|
||||
~ostream()
|
||||
{
|
||||
if (this->good())
|
||||
{
|
||||
this->flush();
|
||||
}
|
||||
|
||||
if (PyErr_Occurred() != nullptr)
|
||||
{
|
||||
PyErr_Clear();
|
||||
throw std::runtime_error(
|
||||
"Problem closing python ostream.\n"
|
||||
" Known limitation: the error is unrecoverable. Sorry.\n"
|
||||
" Suggestion for programmer: add ostream.flush() before"
|
||||
" returning.");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}} // namespace xlnt::arrow
|
||||
} // namespace xlnt
|
||||
|
|
|
@ -10,12 +10,9 @@
|
|||
|
||||
PyObject *xlsx2arrow(PyObject *file)
|
||||
{
|
||||
xlnt::arrow::streambuf buffer(file);
|
||||
xlnt::python_streambuf buffer(file);
|
||||
std::istream stream(&buffer);
|
||||
std::shared_ptr<arrow::Schema> schema;
|
||||
std::vector<std::shared_ptr<arrow::Column>> columns;
|
||||
arrow::Table table(schema, columns);
|
||||
xlnt::arrow::xlsx2arrow(stream, table);
|
||||
auto table = xlnt::xlsx2arrow(stream);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user