2018-01-22 09:38:48 -05:00
|
|
|
// Copyright (c) 2017-2018 Thomas Fussell
|
2017-07-18 14:20:46 -07:00
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
2021-08-22 08:23:18 -04:00
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
2017-07-18 14:20:46 -07:00
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
// THE SOFTWARE
|
|
|
|
//
|
|
|
|
// @license: http://www.opensource.org/licenses/mit-license.php
|
|
|
|
// @author: see AUTHORS file
|
|
|
|
|
2017-08-06 11:02:13 -07:00
|
|
|
#include <exception>
|
2017-07-30 09:48:57 -07:00
|
|
|
#include <arrow/api.h>
|
|
|
|
#include <arrow/python/pyarrow.h>
|
|
|
|
#include <pybind11/pybind11.h>
|
|
|
|
#include <pybind11/stl.h>
|
|
|
|
#include <xlnt/xlnt.hpp>
|
|
|
|
#include <xlnt/workbook/streaming_workbook_reader.hpp>
|
|
|
|
#include <python_streambuf.hpp>
|
2017-06-28 10:08:39 -04:00
|
|
|
|
2017-07-30 09:48:57 -07:00
|
|
|
void import_pyarrow()
|
|
|
|
{
|
|
|
|
static auto imported = false;
|
|
|
|
|
|
|
|
if (!imported)
|
|
|
|
{
|
|
|
|
if (arrow::py::import_pyarrow() != 0)
|
|
|
|
{
|
2017-09-13 08:48:22 -04:00
|
|
|
throw xlnt::exception("Import of pyarrow failed.");
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
imported = true;
|
|
|
|
}
|
|
|
|
}
|
2017-06-28 10:08:39 -04:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
arrow::ArrayBuilder *make_array_builder(arrow::Type::type type)
|
2017-07-30 09:48:57 -07:00
|
|
|
{
|
|
|
|
auto pool = arrow::default_memory_pool();
|
2017-08-01 10:58:47 -07:00
|
|
|
auto builder = static_cast<arrow::ArrayBuilder *>(nullptr);
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
switch(type)
|
2017-07-30 09:48:57 -07:00
|
|
|
{
|
|
|
|
case arrow::Type::NA:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::UINT8:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::UInt8Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::INT8:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::Int8Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::UINT16:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::UInt16Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::INT16:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::Int16Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::UINT32:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::UInt32Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::INT32:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::Int32Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::UINT64:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::UInt64Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::INT64:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::Int64Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::DATE64:
|
|
|
|
builder = new arrow::TypeTraits<arrow::Date64Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::DATE32:
|
|
|
|
builder = new arrow::TypeTraits<arrow::Date32Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-01 10:58:47 -07:00
|
|
|
/*
|
|
|
|
case arrow::Type::TIMESTAMP:
|
|
|
|
builder = new arrow::TypeTraits<arrow::TimestampType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::TIME32:
|
|
|
|
builder = new arrow::TypeTraits<arrow::Time32Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::TIME64:
|
|
|
|
builder = new arrow::TypeTraits<arrow::Time64Type>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-01 10:58:47 -07:00
|
|
|
*/
|
|
|
|
case arrow::Type::HALF_FLOAT:
|
|
|
|
builder = new arrow::TypeTraits<arrow::HalfFloatType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-06-28 10:08:39 -04:00
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::FLOAT:
|
|
|
|
builder = new arrow::TypeTraits<arrow::FloatType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-06-28 10:08:39 -04:00
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::DOUBLE:
|
|
|
|
builder = new arrow::TypeTraits<arrow::DoubleType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-07 07:26:04 -07:00
|
|
|
/*
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::DECIMAL:
|
|
|
|
builder = new arrow::TypeTraits<arrow::DecimalType>::BuilderType(pool, type);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-07 07:26:04 -07:00
|
|
|
*/
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::BOOL:
|
|
|
|
builder = new arrow::TypeTraits<arrow::BooleanType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::STRING:
|
|
|
|
builder = new arrow::TypeTraits<arrow::StringType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
2017-08-01 10:58:47 -07:00
|
|
|
case arrow::Type::BINARY:
|
|
|
|
builder = new arrow::TypeTraits<arrow::BinaryType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-01 10:58:47 -07:00
|
|
|
/*
|
|
|
|
case arrow::Type::FIXED_SIZE_BINARY:
|
|
|
|
builder = new arrow::TypeTraits<arrow::FixedSizeBinaryType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::LIST:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::ListType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::STRUCT:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::StructType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::UNION:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::UnionType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case arrow::Type::DICTIONARY:
|
2017-08-01 10:58:47 -07:00
|
|
|
builder = new arrow::TypeTraits<arrow::DictionaryType>::BuilderType(pool);
|
2017-07-30 09:48:57 -07:00
|
|
|
break;
|
2017-08-01 10:58:47 -07:00
|
|
|
*/
|
|
|
|
default:
|
2017-09-13 08:48:22 -04:00
|
|
|
throw xlnt::exception("not implemented");
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
return builder;
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void open_file(xlnt::streaming_workbook_reader &reader, pybind11::object file)
|
2017-07-01 10:46:48 -04:00
|
|
|
{
|
2017-07-30 20:32:37 -07:00
|
|
|
reader.open(std::unique_ptr<std::streambuf>(new xlnt::python_streambuf(file)));
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
template<typename T>
|
|
|
|
T cell_value(xlnt::cell cell)
|
2017-07-01 10:46:48 -04:00
|
|
|
{
|
2017-09-28 09:05:23 -04:00
|
|
|
return static_cast<T>(cell.value<double>());
|
2017-08-07 07:26:04 -07:00
|
|
|
}
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
// from https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
|
|
|
|
std::uint16_t float_to_half(float f)
|
|
|
|
{
|
|
|
|
auto x = static_cast<std::uint32_t>(f);
|
|
|
|
auto half = ((x >> 16) & 0x8000)
|
|
|
|
| ((((x & 0x7f800000) - 0x38000000) >> 13) & 0x7c00)
|
|
|
|
| ((x >> 13) & 0x03ff);
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
return half;
|
|
|
|
}
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
void append_cell_value(arrow::ArrayBuilder *builder, arrow::Type::type type, xlnt::cell cell)
|
|
|
|
{
|
2017-09-28 09:05:23 -04:00
|
|
|
const status = arrow::Status::OK();
|
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
switch (type)
|
2017-07-30 09:48:57 -07:00
|
|
|
{
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::NA:
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::BOOL:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::BooleanBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<bool>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::UINT8:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::UInt8Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::uint8_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::INT8:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Int8Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::uint8_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::UINT16:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::UInt16Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::uint16_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::INT16:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Int16Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::int16_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::UINT32:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::UInt32Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::uint32_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::INT32:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Int32Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::int32_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::UINT64:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::UInt64Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::uint64_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::INT64:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Int64Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::int64_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::HALF_FLOAT:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::HalfFloatBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(float_to_half(cell_value<float>(cell)));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::FLOAT:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::FloatBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<float>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::DOUBLE:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::DoubleBuilder *>(builder)
|
|
|
|
->Append(cell_value<double>(cell));
|
2017-08-07 07:26:04 -07:00
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::STRING:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::StringBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::BINARY:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::BinaryBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::FIXED_SIZE_BINARY:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::FixedSizeBinaryBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::DATE32:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Date32Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<arrow::Date32Type::c_type>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::DATE64:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Date64Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<arrow::Date64Type::c_type>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::TIMESTAMP:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::TimestampBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<arrow::TimestampType::c_type>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::TIME32:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Time32Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<arrow::Time32Type::c_type>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::TIME64:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::Time64Builder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<arrow::Time64Type::c_type>(cell));
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
case arrow::Type::INTERVAL:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::IntervalBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell_value<std::int64_t>(cell));
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::DECIMAL:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::DecimalBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::LIST:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::ListBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::STRUCT:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::StructBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::UNION:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::UnionBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
case arrow::Type::DICTIONARY:
|
2017-09-28 09:05:23 -04:00
|
|
|
status = static_cast<arrow::DictionaryBuilder *>(builder)
|
2017-08-07 07:26:04 -07:00
|
|
|
->Append(cell.value<std::string>());
|
|
|
|
break;
|
|
|
|
*/
|
|
|
|
default:
|
2017-09-13 08:48:22 -04:00
|
|
|
throw xlnt::exception("not implemented");
|
2017-08-07 07:26:04 -07:00
|
|
|
}
|
2017-09-28 09:05:23 -04:00
|
|
|
|
|
|
|
if (status != arrow::Status::OK())
|
|
|
|
{
|
|
|
|
throw xlnt::exception("Append failed");
|
|
|
|
}
|
2017-08-07 07:26:04 -07:00
|
|
|
}
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
pybind11::handle read_batch(xlnt::streaming_workbook_reader &reader,
|
|
|
|
pybind11::object pyschema, int max_rows)
|
|
|
|
{
|
|
|
|
import_pyarrow();
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
std::shared_ptr<arrow::Schema> schema;
|
|
|
|
arrow::py::unwrap_schema(pyschema.ptr(), &schema);
|
|
|
|
|
|
|
|
std::vector<arrow::Type::type> column_types;
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
for (auto i = 0; i < schema->num_fields(); ++i)
|
|
|
|
{
|
|
|
|
column_types.push_back(schema->field(i)->type()->id());
|
|
|
|
}
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
auto builders = std::vector<std::unique_ptr<arrow::ArrayBuilder>>();
|
2017-07-30 09:48:57 -07:00
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
for (auto type : column_types)
|
|
|
|
{
|
|
|
|
builders.emplace_back(make_array_builder(type));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto row = std::int64_t(0);
|
|
|
|
|
|
|
|
while (row < max_rows)
|
|
|
|
{
|
|
|
|
if (!reader.has_cell()) break;
|
|
|
|
|
|
|
|
for (auto column = 0; column < schema->num_fields(); ++column)
|
|
|
|
{
|
|
|
|
if (!reader.has_cell()) break;
|
|
|
|
|
|
|
|
auto cell = reader.read_cell();
|
|
|
|
auto zero_indexed_column = cell.column().index - 1;
|
|
|
|
auto column_type = column_types.at(zero_indexed_column);
|
|
|
|
auto builder = builders.at(zero_indexed_column).get();
|
|
|
|
|
|
|
|
append_cell_value(builder, column_type, cell);
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
++row;
|
2017-07-30 09:48:57 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
auto columns = std::vector<std::shared_ptr<arrow::Array>>();
|
|
|
|
|
|
|
|
for (auto &builder : builders)
|
|
|
|
{
|
|
|
|
std::shared_ptr<arrow::Array> column;
|
|
|
|
builder->Finish(&column);
|
|
|
|
columns.emplace_back(column);
|
|
|
|
}
|
|
|
|
|
2017-08-07 07:26:04 -07:00
|
|
|
auto batch_pointer = std::make_shared<arrow::RecordBatch>(schema, row, columns);
|
2017-07-30 09:48:57 -07:00
|
|
|
auto batch_object = arrow::py::wrap_record_batch(batch_pointer);
|
|
|
|
auto batch_handle = pybind11::handle(batch_object); // don't need to incr. reference count, right?
|
|
|
|
|
|
|
|
return batch_handle;
|
2017-06-28 10:08:39 -04:00
|
|
|
}
|
|
|
|
|
2017-08-04 10:12:25 -07:00
|
|
|
PYBIND11_MODULE(lib, m)
|
2017-07-30 09:48:57 -07:00
|
|
|
{
|
|
|
|
m.doc() = "streaming read/write interface for C++ XLSX library xlnt";
|
|
|
|
|
|
|
|
pybind11::class_<xlnt::streaming_workbook_reader>(m, "StreamingWorkbookReader")
|
|
|
|
.def(pybind11::init<>())
|
|
|
|
.def("has_cell", &xlnt::streaming_workbook_reader::has_cell)
|
|
|
|
.def("read_cell", &xlnt::streaming_workbook_reader::read_cell)
|
|
|
|
.def("has_worksheet", &xlnt::streaming_workbook_reader::has_worksheet)
|
|
|
|
.def("begin_worksheet", &xlnt::streaming_workbook_reader::begin_worksheet)
|
|
|
|
.def("end_worksheet", &xlnt::streaming_workbook_reader::end_worksheet)
|
|
|
|
.def("sheet_titles", &xlnt::streaming_workbook_reader::sheet_titles)
|
|
|
|
.def("open", &open_file)
|
|
|
|
.def("read_batch", &read_batch);
|
|
|
|
|
2017-07-30 20:32:37 -07:00
|
|
|
pybind11::class_<xlnt::worksheet>(m, "Worksheet");
|
|
|
|
|
2017-07-30 09:48:57 -07:00
|
|
|
pybind11::class_<xlnt::cell> cell(m, "Cell");
|
2017-07-30 20:32:37 -07:00
|
|
|
cell.def("value_string", [](xlnt::cell &cell)
|
2017-07-30 09:48:57 -07:00
|
|
|
{
|
|
|
|
return cell.value<std::string>();
|
2017-07-30 20:32:37 -07:00
|
|
|
})
|
2017-08-01 10:58:47 -07:00
|
|
|
.def("value_bool", [](xlnt::cell &cell)
|
|
|
|
{
|
|
|
|
return cell.value<bool>();
|
|
|
|
})
|
|
|
|
.def("value_unsigned_int", [](xlnt::cell &cell)
|
|
|
|
{
|
|
|
|
return cell.value<unsigned int>();
|
|
|
|
})
|
2017-09-28 09:05:23 -04:00
|
|
|
.def("value_double", [](xlnt::cell &cell)
|
2017-08-01 10:58:47 -07:00
|
|
|
{
|
2017-09-28 09:05:23 -04:00
|
|
|
return cell.value<double>();
|
2017-08-01 10:58:47 -07:00
|
|
|
})
|
2017-07-30 20:32:37 -07:00
|
|
|
.def("data_type", [](xlnt::cell &cell)
|
|
|
|
{
|
|
|
|
return cell.data_type();
|
|
|
|
})
|
|
|
|
.def("row", &xlnt::cell::row)
|
|
|
|
.def("column", [](xlnt::cell &cell)
|
|
|
|
{
|
|
|
|
return cell.column().index;
|
2017-09-13 12:02:17 -04:00
|
|
|
})
|
|
|
|
.def("format_is_date", [](xlnt::cell &cell)
|
|
|
|
{
|
|
|
|
return cell.has_format() && cell.number_format().is_date_format();
|
2017-07-30 20:32:37 -07:00
|
|
|
});
|
2017-07-30 09:48:57 -07:00
|
|
|
|
|
|
|
pybind11::enum_<xlnt::cell::type>(cell, "Type")
|
|
|
|
.value("Empty", xlnt::cell::type::empty)
|
|
|
|
.value("Boolean", xlnt::cell::type::boolean)
|
|
|
|
.value("Date", xlnt::cell::type::date)
|
|
|
|
.value("Error", xlnt::cell::type::error)
|
|
|
|
.value("InlineString", xlnt::cell::type::inline_string)
|
|
|
|
.value("Number", xlnt::cell::type::number)
|
|
|
|
.value("SharedString", xlnt::cell::type::shared_string)
|
|
|
|
.value("FormulaString", xlnt::cell::type::formula_string);
|
|
|
|
}
|