From 5b95b3d4639e8475bc08460cf2197830ad9da2de Mon Sep 17 00:00:00 2001 From: Thomas Fussell Date: Fri, 14 Jul 2017 22:18:11 -0700 Subject: [PATCH] begin implementing xlsx2arrow, fix msvc warnings, other stuff --- include/xlnt/utils/xlntarrow.hpp | 8 +- source/CMakeLists.txt | 2 +- .../cryptography/{sha1-fast.c => sha1.c} | 0 source/utils/xlntarrow.cpp | 125 ++++++++++++++++-- xlntpyarrow/python_streambuf.hpp | 82 ++---------- xlntpyarrow/xlntpyarrow.cpp | 7 +- 6 files changed, 127 insertions(+), 97 deletions(-) rename source/detail/cryptography/{sha1-fast.c => sha1.c} (100%) diff --git a/include/xlnt/utils/xlntarrow.hpp b/include/xlnt/utils/xlntarrow.hpp index c42e8a56..87492595 100644 --- a/include/xlnt/utils/xlntarrow.hpp +++ b/include/xlnt/utils/xlntarrow.hpp @@ -1,4 +1,6 @@ #include +#include + #include namespace arrow { @@ -6,10 +8,8 @@ class Table; } namespace xlnt { -namespace arrow { -void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table); -void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s); +std::shared_ptr XLNT_API xlsx2arrow(std::istream &s); +void XLNT_API arrow2xlsx(std::shared_ptr &table, std::ostream &s); -} // namespace arrow } // namespace xlnt diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 7985edc1..2fe1c9a5 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.2) -project(xlnt VERSION 0.9) +project(xlnt VERSION 1.1) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/source/detail/cryptography/sha1-fast.c b/source/detail/cryptography/sha1.c similarity index 100% rename from source/detail/cryptography/sha1-fast.c rename to source/detail/cryptography/sha1.c diff --git a/source/utils/xlntarrow.cpp b/source/utils/xlntarrow.cpp index 0350729d..679fa7be 100644 --- a/source/utils/xlntarrow.cpp +++ b/source/utils/xlntarrow.cpp @@ -1,49 +1,148 @@ +// Copyright (c) 2017 Thomas Fussell +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// @license: http://www.opensource.org/licenses/mit-license.php +// @author: see AUTHORS file + +#pragma warning(push) +#pragma warning(disable: 4458) #include +#pragma warning(pop) #include #include +#include #include #include #include -#include + +namespace { + +std::unique_ptr make_array_builder(xlnt::cell::type type) +{ + switch (type) + { + case xlnt::cell::type::number: + return std::unique_ptr(new arrow::DoubleBuilder(arrow::default_memory_pool(), arrow::float64())); + case xlnt::cell::type::inline_string: + case xlnt::cell::type::shared_string: + case xlnt::cell::type::error: + case xlnt::cell::type::formula_string: + case xlnt::cell::type::empty: + return std::unique_ptr(new arrow::StringBuilder(arrow::default_memory_pool())); + case xlnt::cell::type::boolean: + return std::unique_ptr(new arrow::BooleanBuilder(arrow::default_memory_pool(), std::make_shared())); + case xlnt::cell::type::date: + return std::unique_ptr(new arrow::Date32Builder(arrow::default_memory_pool())); + } +} + +arrow::Field make_type_field(const std::string &name, xlnt::cell::type type) +{ + switch (type) + { + case xlnt::cell::type::number: + return arrow::Field(name, arrow::float64()); + case xlnt::cell::type::inline_string: + case xlnt::cell::type::shared_string: + case xlnt::cell::type::error: + case xlnt::cell::type::formula_string: + case xlnt::cell::type::empty: + return arrow::Field(name, std::make_shared()); + case xlnt::cell::type::boolean: + return arrow::Field(name, arrow::boolean()); + case xlnt::cell::type::date: + return arrow::Field(name, arrow::date32()); + } +} + +} // namespace namespace xlnt { -namespace arrow { -void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table) +std::shared_ptr XLNT_API xlsx2arrow(std::istream &s) { xlnt::streaming_workbook_reader reader; reader.open(s); reader.begin_worksheet(); - int first_row = 0; + + auto column_names = std::vector(); + auto columns = std::vector>(); + auto fields = std::vector>(); + + auto arrow_check = [](arrow::Status s) + { + if (!s.ok()) + { + throw xlnt::exception("conversion error"); + } + }; while (reader.has_cell()) { auto cell = reader.read_cell(); - if (first_row < 1) + if (cell.row() == 1) { - first_row = cell.row(); + column_names.push_back(cell.value()); } - - if (cell.reference().row() % 1000 == 1) + else if (cell.row() == 2) { - std::cout << cell.reference().to_string() << std::endl; + auto column_name = column_names.at(cell.column().index - 1); + auto field = make_type_field(column_name, cell.data_type()); + fields.push_back(std::make_shared(field)); + columns.push_back(make_array_builder(cell.data_type())); } } reader.end_worksheet(); + + auto schema = std::make_shared(fields); + auto arrays = std::vector>(); + + for (size_t i = 0; i != columns.size(); ++i) + { + std::shared_ptr array; + columns[i]->Finish(&array); + arrays.emplace_back(array); + } + + std::shared_ptr table; + arrow_check(MakeTable(schema, arrays, &table)); + + return table; } -void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s) +void XLNT_API arrow2xlsx(std::shared_ptr &table, std::ostream &s) { xlnt::streaming_workbook_writer writer; writer.open(s); writer.add_worksheet("Sheet1"); - writer.add_cell("A1").value("test"); + + for (auto i = 0; i < table->num_columns(); ++i) + { + auto column_name = table->schema()->field(i)->name(); + writer.add_cell(xlnt::cell_reference(i + 1, 1)).value(column_name); + } } -} -} +} // namespace xlnt diff --git a/xlntpyarrow/python_streambuf.hpp b/xlntpyarrow/python_streambuf.hpp index 6e8e3d66..4bf9abd1 100644 --- a/xlntpyarrow/python_streambuf.hpp +++ b/xlntpyarrow/python_streambuf.hpp @@ -9,7 +9,6 @@ #include namespace xlnt { -namespace arrow { /// A stream buffer getting data from and putting data into a Python file object /** The aims are as follow: @@ -84,7 +83,7 @@ namespace arrow { Note: references are to the C++ standard (the numbers between parentheses at the end of references are margin markers). */ -class streambuf : public std::basic_streambuf +class python_streambuf : public std::basic_streambuf { private: typedef std::basic_streambuf base_t; @@ -113,7 +112,7 @@ class streambuf : public std::basic_streambuf /// Construct from a Python file object /** if buffer_size is 0 the current default_buffer_size is used. */ - streambuf( + python_streambuf( PyObject *python_file_obj, std::size_t buffer_size_ = 0) : @@ -162,7 +161,7 @@ class streambuf : public std::basic_streambuf } /// Mundane destructor freeing the allocated resources - virtual ~streambuf() { + virtual ~python_streambuf() { if (write_buffer) delete[] write_buffer; } @@ -324,7 +323,7 @@ class streambuf : public std::basic_streambuf std::ios_base::openmode which= std::ios_base::in | std::ios_base::out) { - return streambuf::seekoff(sp, std::ios_base::beg, which); + return python_streambuf::seekoff(sp, std::ios_base::beg, which); } private: @@ -402,8 +401,8 @@ class streambuf : public std::basic_streambuf if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure; // we are in wonderland - if (which == std::ios_base::in) gbump(buf_sought - buf_cur); - else if (which == std::ios_base::out) pbump(buf_sought - buf_cur); + if (which == std::ios_base::in) gbump(static_cast(buf_sought - buf_cur)); + else if (which == std::ios_base::out) pbump(static_cast(buf_sought - buf_cur)); return pos_of_buffer_end_in_py_file + (buf_sought - buf_end); } @@ -415,73 +414,8 @@ class streambuf : public std::basic_streambuf return static_cast(value); } - - public: - - class istream : public std::istream - { - public: - istream(streambuf& buf) : std::istream(&buf) - { - exceptions(std::ios_base::badbit); - } - - ~istream() { if (this->good()) this->sync(); } - }; - - class ostream : public std::ostream - { - public: - ostream(streambuf& buf) : std::ostream(&buf) - { - exceptions(std::ios_base::badbit); - } - - ~ostream() { if (this->good()) this->flush(); } - }; }; -std::size_t streambuf::default_buffer_size = 1024; +std::size_t python_streambuf::default_buffer_size = 1024; -struct streambuf_capsule -{ - streambuf python_streambuf; - - streambuf_capsule( - PyObject *python_file_obj, - std::size_t buffer_size=0) - : - python_streambuf(python_file_obj, buffer_size) - {} -}; - -struct ostream : private streambuf_capsule, streambuf::ostream -{ - ostream( - PyObject *python_file_obj, - std::size_t buffer_size=0) - : - streambuf_capsule(python_file_obj, buffer_size), - streambuf::ostream(python_streambuf) - {} - - ~ostream() - { - if (this->good()) - { - this->flush(); - } - - if (PyErr_Occurred() != nullptr) - { - PyErr_Clear(); - throw std::runtime_error( - "Problem closing python ostream.\n" - " Known limitation: the error is unrecoverable. Sorry.\n" - " Suggestion for programmer: add ostream.flush() before" - " returning."); - } - } -}; - -}} // namespace xlnt::arrow +} // namespace xlnt diff --git a/xlntpyarrow/xlntpyarrow.cpp b/xlntpyarrow/xlntpyarrow.cpp index 0e9d51ac..5f025afa 100644 --- a/xlntpyarrow/xlntpyarrow.cpp +++ b/xlntpyarrow/xlntpyarrow.cpp @@ -10,12 +10,9 @@ PyObject *xlsx2arrow(PyObject *file) { - xlnt::arrow::streambuf buffer(file); + xlnt::python_streambuf buffer(file); std::istream stream(&buffer); - std::shared_ptr schema; - std::vector> columns; - arrow::Table table(schema, columns); - xlnt::arrow::xlsx2arrow(stream, table); + auto table = xlnt::xlsx2arrow(stream); Py_RETURN_NONE; }