Merge branch 'feature/arrow' into dev

This commit is contained in:
Thomas Fussell 2017-07-03 08:42:31 -07:00
commit 5e3476f755
28 changed files with 3171 additions and 743 deletions

1
.gitignore vendored
View File

@ -13,3 +13,4 @@ node_modules/
.DS_Store .DS_Store
__pycache__/ __pycache__/
Win32/ Win32/
*.pyd

View File

@ -11,6 +11,7 @@ option(STATIC "Set to ON to build xlnt as a static library instead of a shared l
option(TESTS "Set to OFF to skip building test executable (in ./tests)" ON) option(TESTS "Set to OFF to skip building test executable (in ./tests)" ON)
option(SAMPLES "Set to ON to build executable code samples (in ./samples)" OFF) option(SAMPLES "Set to ON to build executable code samples (in ./samples)" OFF)
option(BENCHMARKS "Set to ON to build performance benchmarks (in ./benchmarks)" OFF) option(BENCHMARKS "Set to ON to build performance benchmarks (in ./benchmarks)" OFF)
option(ARROW "Set to ON to build Arrow conversion functions (in ./contrib/xlntarrow)" OFF)
# Platform specific options # Platform specific options
if(NOT MSVC) if(NOT MSVC)
@ -30,4 +31,8 @@ if(TESTS)
add_subdirectory(tests) add_subdirectory(tests)
endif() endif()
if(ARROW)
add_subdirectory(arrow/xlntarrow)
endif()
add_subdirectory(source) add_subdirectory(source)

View File

@ -1,14 +1,14 @@
<img height="100" src="https://cloud.githubusercontent.com/assets/1735211/24962965/5c1cfc94-1f6b-11e7-8d86-54fe12907a23.png" alt="xlnt"><br/> <img height="100" src="https://cloud.githubusercontent.com/assets/1735211/24962965/5c1cfc94-1f6b-11e7-8d86-54fe12907a23.png" alt="xlnt"><br/>
==== ====
[![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg)](https://travis-ci.org/tfussell/xlnt) [![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg?branch=master)](https://travis-ci.org/tfussell/xlnt)
[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/2hs79a1xoxy16sol?svg=true)](https://ci.appveyor.com/project/tfussell/xlnt) [![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/2hs79a1xoxy16sol?svg=true)](https://ci.appveyor.com/project/tfussell/xlnt)
[![Coverage Status](https://coveralls.io/repos/github/tfussell/xlnt/badge.svg?branch=master)](https://coveralls.io/github/tfussell/xlnt?branch=master) [![Coverage Status](https://coveralls.io/repos/github/tfussell/xlnt/badge.svg?branch=master)](https://coveralls.io/github/tfussell/xlnt?branch=master)
[![ReadTheDocs Documentation Status](https://readthedocs.org/projects/xlnt/badge/?version=latest)](http://xlnt.readthedocs.org/en/latest/?badge=latest) [![ReadTheDocs Documentation Status](https://readthedocs.org/projects/xlnt/badge/?version=latest)](http://xlnt.readthedocs.org/en/latest/?badge=latest)
[![License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](http://opensource.org/licenses/MIT) [![License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](http://opensource.org/licenses/MIT)
## Introduction ## Introduction
xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). xlnt is currently under active feature development and is on track for the version 1.0 release in the next few weeks. Until then, the API could have significant changes. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html). xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). The first public release of xlnt version 1.0 was on May 10th, 2017. Current work is focused on increasing compatibility, improving performance, and brainstorming future development goals. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html). Contributions are welcome in the form of pull requests or discussions on [the repository's Issues page](https://github.com/tfussell/xlnt/issues).
## Example ## Example
@ -29,7 +29,7 @@ int main()
wb.save("example.xlsx"); wb.save("example.xlsx");
return 0; return 0;
} }
// compile with -std=c++14 -Ixlnt/include -Lxlnt/lib -lxlnt // compile with -std=c++14 -Ixlnt/include -lxlnt
``` ```
## Documentation ## Documentation

View File

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 3.2)
project(xlntarrow)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(XLNT_ARROW
${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.hpp
${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.cpp)
add_library(xlntarrow SHARED ${XLNT_ARROW})
target_link_libraries(xlntarrow PRIVATE xlnt)
target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../miniconda3/include)
target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)

View File

@ -0,0 +1,39 @@
#include <xlnt/xlnt.hpp>
#include <xlntarrow.hpp>
namespace xlnt {
namespace arrow {
void xlsx2arrow(std::istream &s, ::arrow::Table &table)
{
xlnt::streaming_workbook_reader reader;
reader.open(s);
reader.begin_worksheet();
int first_row = 0;
while (reader.has_cell())
{
auto cell = reader.read_cell();
if (first_row < 1)
{
first_row = cell.row();
}
if (cell.reference().row() % 1000 == 1)
{
std::cout << cell.reference().to_string() << std::endl;
}
}
reader.end_worksheet();
}
void arrow2xlsx(const ::arrow::Table &table, std::istream &s)
{
}
}
}

View File

@ -0,0 +1,11 @@
#include <iostream>
#include <arrow/api.h>
namespace xlnt {
namespace arrow {
void xlsx2arrow(std::istream &s, ::arrow::Table &table);
void arrow2xlsx(const ::arrow::Table &table, std::istream &s);
}
}

View File

@ -0,0 +1,487 @@
#pragma once
#include <boost/optional.hpp>
#include <boost/utility/typed_in_place_factory.hpp>
#include <cassert>
#include <stdexcept>
#include <iostream>
#include <Python.h>
namespace xlnt {
namespace arrow {
/// A stream buffer getting data from and putting data into a Python file object
/** The aims are as follow:
- Given a C++ function acting on a standard stream, e.g.
\code
void read_inputs(std::istream& input) {
...
input >> something >> something_else;
}
\endcode
and given a piece of Python code which creates a file-like object,
to be able to pass this file object to that C++ function, e.g.
\code
import gzip
gzip_file_obj = gzip.GzipFile(...)
read_inputs(gzip_file_obj)
\endcode
and have the standard stream pull data from and put data into the Python
file object.
- When Python \c read_inputs() returns, the Python object is able to
continue reading or writing where the C++ code left off.
- Operations in C++ on mere files should be competitively fast compared
to the direct use of \c std::fstream.
\b Motivation
- the standard Python library offer of file-like objects (files,
compressed files and archives, network, ...) is far superior to the
offer of streams in the C++ standard library and Boost C++ libraries.
- i/o code involves a fair amount of text processing which is more
efficiently prototyped in Python but then one may need to rewrite
a time-critical part in C++, in as seamless a manner as possible.
\b Usage
This is 2-step:
- a trivial wrapper function
\code
using boost_adaptbx::python::streambuf;
void read_inputs_wrapper(streambuf& input)
{
streambuf::istream is(input);
read_inputs(is);
}
def("read_inputs", read_inputs_wrapper);
\endcode
which has to be written every time one wants a Python binding for
such a C++ function.
- the Python side
\code
from boost.python import streambuf
read_inputs(streambuf(python_file_obj=obj, buffer_size=1024))
\endcode
\c buffer_size is optional. See also: \c default_buffer_size
Note: references are to the C++ standard (the numbers between parentheses
at the end of references are margin markers).
*/
class streambuf : public std::basic_streambuf<char>
{
private:
typedef std::basic_streambuf<char> base_t;
public:
/* The syntax
using base_t::char_type;
would be nicer but Visual Studio C++ 8 chokes on it
*/
typedef base_t::char_type char_type;
typedef base_t::int_type int_type;
typedef base_t::pos_type pos_type;
typedef base_t::off_type off_type;
typedef base_t::traits_type traits_type;
// work around Visual C++ 7.1 problem
inline static int
traits_type_eof() { return traits_type::eof(); }
/// The default size of the read and write buffer.
/** They are respectively used to buffer data read from and data written to
the Python file object. It can be modified from Python.
*/
static std::size_t default_buffer_size;
/// Construct from a Python file object
/** if buffer_size is 0 the current default_buffer_size is used.
*/
streambuf(
PyObject *python_file_obj,
std::size_t buffer_size_ = 0)
:
py_read (PyObject_GetAttrString(python_file_obj, "read")),
py_write(PyObject_GetAttrString(python_file_obj, "write")),
py_seek (PyObject_GetAttrString(python_file_obj, "seek")),
py_tell (PyObject_GetAttrString(python_file_obj, "tell")),
buffer_size(buffer_size_ != 0 ? buffer_size_ : default_buffer_size),
write_buffer(0),
pos_of_read_buffer_end_in_py_file(0),
pos_of_write_buffer_end_in_py_file(buffer_size),
farthest_pptr(0)
{
assert(buffer_size != 0);
/* Some Python file objects (e.g. sys.stdout and sys.stdin)
have non-functional seek and tell. If so, assign None to
py_tell and py_seek.
*/
if (py_tell != nullptr) {
PyObject_CallFunction(py_tell, nullptr);
if (PyErr_Occurred() != nullptr)
{
py_tell = nullptr;
py_seek = nullptr;
PyErr_Clear();
}
}
if (py_write != nullptr) {
// C-like string to make debugging easier
write_buffer = new char[buffer_size + 1];
write_buffer[buffer_size] = '\0';
setp(write_buffer, write_buffer + buffer_size); // 27.5.2.4.5 (5)
farthest_pptr = pptr();
}
else {
// The first attempt at output will result in a call to overflow
setp(0, 0);
}
if (py_tell != nullptr) {
auto py_pos = extract_int<off_type>(PyObject_CallFunction(py_tell, nullptr));
pos_of_read_buffer_end_in_py_file = py_pos;
pos_of_write_buffer_end_in_py_file = py_pos;
}
}
/// Mundane destructor freeing the allocated resources
virtual ~streambuf() {
if (write_buffer) delete[] write_buffer;
}
/// C.f. C++ standard section 27.5.2.4.3
/** It is essential to override this virtual function for the stream
member function readsome to work correctly (c.f. 27.6.1.3, alinea 30)
*/
virtual std::streamsize showmanyc() {
int_type const failure = traits_type::eof();
int_type status = underflow();
if (status == failure) return -1;
return egptr() - gptr();
}
/// C.f. C++ standard section 27.5.2.4.3
virtual int_type underflow() {
int_type const failure = traits_type::eof();
if (py_read == nullptr) {
throw std::invalid_argument(
"That Python file object has no 'read' attribute");
}
read_buffer = PyObject_CallFunction(py_read, "i", buffer_size);
char *read_buffer_data = nullptr;
Py_ssize_t py_n_read = 0;
if (PyBytes_AsStringAndSize(read_buffer, &read_buffer_data, &py_n_read) == -1) {
setg(0, 0, 0);
throw std::invalid_argument(
"The method 'read' of the Python file object "
"did not return a string.");
}
auto n_read = (off_type)py_n_read;
pos_of_read_buffer_end_in_py_file += n_read;
setg(read_buffer_data, read_buffer_data, read_buffer_data + n_read);
// ^^^27.5.2.3.1 (4)
if (n_read == 0) return failure;
return traits_type::to_int_type(read_buffer_data[0]);
}
/// C.f. C++ standard section 27.5.2.4.5
virtual int_type overflow(int_type c=traits_type_eof()) {
if (py_write == nullptr) {
throw std::invalid_argument(
"That Python file object has no 'write' attribute");
}
farthest_pptr = std::max(farthest_pptr, pptr());
auto n_written = (off_type)(farthest_pptr - pbase());
auto chunk = PyBytes_FromStringAndSize(pbase(), farthest_pptr - pbase());
PyObject_CallFunction(py_write, "O", chunk);
if (!traits_type::eq_int_type(c, traits_type::eof())) {
auto ch = traits_type::to_char_type(c);
PyObject_CallFunction(py_write, "y#", reinterpret_cast<char *>(&ch), 1);
n_written++;
}
if (n_written) {
pos_of_write_buffer_end_in_py_file += n_written;
setp(pbase(), epptr());
// ^^^ 27.5.2.4.5 (5)
farthest_pptr = pptr();
}
return traits_type::eq_int_type(
c, traits_type::eof()) ? traits_type::not_eof(c) : c;
}
/// Update the python file to reflect the state of this stream buffer
/** Empty the write buffer into the Python file object and set the seek
position of the latter accordingly (C++ standard section 27.5.2.4.2).
If there is no write buffer or it is empty, but there is a non-empty
read buffer, set the Python file object seek position to the
seek position in that read buffer.
*/
virtual int sync() {
int result = 0;
farthest_pptr = std::max(farthest_pptr, pptr());
if (farthest_pptr && farthest_pptr > pbase()) {
off_type delta = pptr() - farthest_pptr;
int_type status = overflow();
if (traits_type::eq_int_type(status, traits_type::eof())) result = -1;
if (py_seek != nullptr)
{
PyObject_CallFunction(py_seek, "i", delta);
}
}
else if (gptr() && gptr() < egptr()) {
if (py_seek != nullptr)
{
PyObject_CallFunction(py_seek, "ii", gptr() - egptr(), 1);
}
}
return result;
}
/// C.f. C++ standard section 27.5.2.4.2
/** This implementation is optimised to look whether the position is within
the buffers, so as to avoid calling Python seek or tell. It is
important for many applications that the overhead of calling into Python
is avoided as much as possible (e.g. parsers which may do a lot of
backtracking)
*/
virtual
pos_type seekoff(off_type off, std::ios_base::seekdir way,
std::ios_base::openmode which= std::ios_base::in
| std::ios_base::out)
{
/* In practice, "which" is either std::ios_base::in or out
since we end up here because either seekp or seekg was called
on the stream using this buffer. That simplifies the code
in a few places.
*/
int const failure = off_type(-1);
if (py_seek == nullptr) {
throw std::invalid_argument(
"That Python file object has no 'seek' attribute");
}
// we need the read buffer to contain something!
if (which == std::ios_base::in && !gptr()) {
if (traits_type::eq_int_type(underflow(), traits_type::eof())) {
return failure;
}
}
// compute the whence parameter for Python seek
int whence;
switch (way) {
case std::ios_base::beg:
whence = 0;
break;
case std::ios_base::cur:
whence = 1;
break;
case std::ios_base::end:
whence = 2;
break;
default:
return failure;
}
// Let's have a go
boost::optional<off_type> result = seekoff_without_calling_python(
off, way, which);
if (!result) {
// we need to call Python
if (which == std::ios_base::out) overflow();
if (way == std::ios_base::cur) {
if (which == std::ios_base::in) off -= egptr() - gptr();
else if (which == std::ios_base::out) off += pptr() - pbase();
}
PyObject_CallFunction(py_seek, "ii", off, whence);
result = extract_int<off_type>(PyObject_CallFunction(py_tell, nullptr));
if (which == std::ios_base::in) underflow();
}
return *result;
}
/// C.f. C++ standard section 27.5.2.4.2
virtual
pos_type seekpos(pos_type sp,
std::ios_base::openmode which= std::ios_base::in
| std::ios_base::out)
{
return streambuf::seekoff(sp, std::ios_base::beg, which);
}
private:
PyObject *py_read = nullptr;
PyObject *py_write = nullptr;
PyObject *py_seek = nullptr;
PyObject *py_tell = nullptr;
std::size_t buffer_size;
/* This is actually a Python string and the actual read buffer is
its internal data, i.e. an array of characters. We use a Boost.Python
object so as to hold on it: as a result, the actual buffer can't
go away.
*/
PyObject *read_buffer = nullptr;
/* A mere array of char's allocated on the heap at construction time and
de-allocated only at destruction time.
*/
char *write_buffer = nullptr;
off_type pos_of_read_buffer_end_in_py_file,
pos_of_write_buffer_end_in_py_file;
// the farthest place the buffer has been written into
char *farthest_pptr = nullptr;
boost::optional<off_type> seekoff_without_calling_python(
off_type off,
std::ios_base::seekdir way,
std::ios_base::openmode which)
{
boost::optional<off_type> const failure;
// Buffer range and current position
off_type buf_begin, buf_end, buf_cur, upper_bound;
off_type pos_of_buffer_end_in_py_file;
if (which == std::ios_base::in) {
pos_of_buffer_end_in_py_file = pos_of_read_buffer_end_in_py_file;
buf_begin = reinterpret_cast<std::streamsize>(eback());
buf_cur = reinterpret_cast<std::streamsize>(gptr());
buf_end = reinterpret_cast<std::streamsize>(egptr());
upper_bound = buf_end;
}
else if (which == std::ios_base::out) {
pos_of_buffer_end_in_py_file = pos_of_write_buffer_end_in_py_file;
buf_begin = reinterpret_cast<std::streamsize>(pbase());
buf_cur = reinterpret_cast<std::streamsize>(pptr());
buf_end = reinterpret_cast<std::streamsize>(epptr());
farthest_pptr = std::max(farthest_pptr, pptr());
upper_bound = reinterpret_cast<std::streamsize>(farthest_pptr) + 1;
}
else {
throw std::runtime_error("unreachable");
}
// Sought position in "buffer coordinate"
off_type buf_sought;
if (way == std::ios_base::cur) {
buf_sought = buf_cur + off;
}
else if (way == std::ios_base::beg) {
buf_sought = buf_end + (off - pos_of_buffer_end_in_py_file);
}
else if (way == std::ios_base::end) {
return failure;
}
else {
throw std::runtime_error("unreachable");
}
// if the sought position is not in the buffer, give up
if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
// we are in wonderland
if (which == std::ios_base::in) gbump(buf_sought - buf_cur);
else if (which == std::ios_base::out) pbump(buf_sought - buf_cur);
return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
}
template<typename T>
T extract_int(PyObject *o)
{
auto value = PyLong_AsLong(o);
Py_DECREF(o);
return static_cast<T>(value);
}
public:
class istream : public std::istream
{
public:
istream(streambuf& buf) : std::istream(&buf)
{
exceptions(std::ios_base::badbit);
}
~istream() { if (this->good()) this->sync(); }
};
class ostream : public std::ostream
{
public:
ostream(streambuf& buf) : std::ostream(&buf)
{
exceptions(std::ios_base::badbit);
}
~ostream() { if (this->good()) this->flush(); }
};
};
std::size_t streambuf::default_buffer_size = 1024;
struct streambuf_capsule
{
streambuf python_streambuf;
streambuf_capsule(
PyObject *python_file_obj,
std::size_t buffer_size=0)
:
python_streambuf(python_file_obj, buffer_size)
{}
};
struct ostream : private streambuf_capsule, streambuf::ostream
{
ostream(
PyObject *python_file_obj,
std::size_t buffer_size=0)
:
streambuf_capsule(python_file_obj, buffer_size),
streambuf::ostream(python_streambuf)
{}
~ostream()
{
if (this->good())
{
this->flush();
}
if (PyErr_Occurred() != nullptr)
{
PyErr_Clear();
throw std::runtime_error(
"Problem closing python ostream.\n"
" Known limitation: the error is unrecoverable. Sorry.\n"
" Suggestion for programmer: add ostream.flush() before"
" returning.");
}
}
};
}} // namespace xlnt::arrow

View File

@ -0,0 +1,64 @@
from distutils.core import setup, Extension
from distutils import sysconfig
description = """
xlntpyarrow allows Apache Arrow tables to be written to and read from an XLSX
file efficiently using the C++ library xlnt.
""".strip()
cfg_vars = sysconfig.get_config_vars()
if 'CFLAGS' in cfg_vars:
cfg_vars['CFLAGS'] = cfg_vars['CFLAGS'].replace('-Wstrict-prototypes', '')
xlntpyarrow_extension = Extension(
'xlntpyarrow',
['xlntpyarrow.cpp'],
language = 'c++',
include_dirs = [
'/root/xlnt/arrow/xlntarrow',
'/root/xlnt/arrow/xlntpyarrow',
'/root/miniconda3/include'
],
libraries = [
'arrow',
'xlntarrow',
'xlnt'
],
library_dirs = [
'/root/miniconda3/lib',
'/root/xlnt/build/arrow/xlntarrow',
'/root/xlnt/build/source'
],
extra_compile_args=['-std=c++11']
)
classifiers = [
'Development Status :: 5 - Production/Stable',
'Environment :: Plugins',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: Microsoft :: Windows',
'Operating System :: MacOS :: MacOS X',
'Operating System :: POSIX :: Linux',
'Programming Language :: C',
'Programming Language :: C++',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: Database',
'Topic :: Office/Business :: Financial :: Spreadsheet',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Software Development :: Libraries :: Python Modules'
]
setup(
name = 'xlntpyarrow',
version = '1.1.0',
classifiers = classifiers,
description = description,
ext_modules = [xlntpyarrow_extension],
author = 'Thomas Fussell',
author_email = 'thomas.fussell@gmail.com',
url = 'https://github.com/tfussell/xlnt'
)

View File

@ -0,0 +1,109 @@
#include <iostream>
#include <memory>
#include <vector>
#include <xlntarrow.hpp>
#include <python_streambuf.hpp>
#include <Python.h>
PyObject *xlsx2arrow(PyObject *file)
{
xlnt::arrow::streambuf buffer(file);
std::istream stream(&buffer);
std::shared_ptr<arrow::Schema> schema;
std::vector<std::shared_ptr<arrow::Column>> columns;
arrow::Table table(schema, columns);
xlnt::arrow::xlsx2arrow(stream, table);
Py_RETURN_NONE;
}
extern "C" {
/*
* Implements XLSX->pyarrow table function.
*/
PyDoc_STRVAR(xlntpyarrow_xlsx2arrow_doc, "xlsx2arrow(in_file)\
\
Returns an arrow table representing the given XLSX file object.");
PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *file = nullptr;
static const char *keywords[] = { "file", nullptr };
static auto keywords_nc = const_cast<char **>(keywords);
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
{
return nullptr;
}
return xlsx2arrow(file);
}
/*
* Implements pyarrow table->XLSX function.
*/
PyDoc_STRVAR(xlntpyarrow_arrow2xlsx_doc, "arrow2xlsx(table, out_file)\
\
Writes the given arrow table to out_file as an XLSX file.");
PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
{
PyObject *obj = nullptr;
static const char *keywords[] = { "file", nullptr };
static auto keywords_nc = const_cast<char **>(keywords);
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", keywords_nc, &obj))
{
return nullptr;
}
Py_RETURN_NONE;
}
static PyMethodDef xlntpyarrow_functions[] =
{
{ "xlsx2arrow", (PyCFunction)xlntpyarrow_xlsx2arrow, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_xlsx2arrow_doc },
{ "arrow2xlsx", (PyCFunction)xlntpyarrow_arrow2xlsx, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_arrow2xlsx_doc },
{ nullptr, nullptr, 0, nullptr }
};
int exec_xlntpyarrow(PyObject *module)
{
PyModule_AddFunctions(module, xlntpyarrow_functions);
PyModule_AddStringConstant(module, "__author__", "Thomas Fussell");
PyModule_AddStringConstant(module, "__version__", "0.9.0");
PyModule_AddIntConstant(module, "year", 2017);
return 0;
}
PyDoc_STRVAR(xlntpyarrow_doc, "The xlntpyarrow module");
static PyModuleDef_Slot xlntpyarrow_slots[] =
{
{ Py_mod_exec, (void *)exec_xlntpyarrow },
{ 0, nullptr }
};
static PyModuleDef xlntpyarrow_def =
{
PyModuleDef_HEAD_INIT,
"xlntpyarrow",
xlntpyarrow_doc,
0, /* m_size */
nullptr, /* m_methods */
xlntpyarrow_slots,
nullptr, /* m_traverse */
nullptr, /* m_clear */
nullptr, /* m_free */
};
PyMODINIT_FUNC PyInit_xlntpyarrow()
{
return PyModuleDef_Init(&xlntpyarrow_def);
}
} // extern "C"

View File

@ -0,0 +1,132 @@
// Copyright (c) 2014-2017 Thomas Fussell
// Copyright (c) 2010-2015 openpyxl
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#pragma once
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <xlnt/xlnt_config.hpp>
namespace xml {
class parser;
}
namespace xlnt {
class cell;
template<typename T>
class optional;
class path;
class workbook;
class worksheet;
namespace detail {
class xlsx_consumer;
}
/// <summary>
/// workbook is the container for all other parts of the document.
/// </summary>
class XLNT_API streaming_workbook_reader
{
public:
streaming_workbook_reader();
~streaming_workbook_reader();
/// <summary>
/// Closes currently open read stream. This will be called automatically
/// by the destructor if it hasn't already been called manually.
/// </summary>
void close();
bool has_cell();
/// <summary>
/// Reads the next cell in the current worksheet and optionally returns it if
/// the last cell in the sheet has not yet been read.
/// </summary>
cell read_cell();
bool has_worksheet();
/// <summary>
/// Beings reading of the next worksheet in the workbook and optionally
/// returns its title if the last worksheet has not yet been read.
/// </summary>
void begin_worksheet();
/// <summary>
/// Ends reading of the current worksheet in the workbook and optionally
/// returns a worksheet object corresponding to the worksheet with the title
/// returned by begin_worksheet().
/// </summary>
worksheet end_worksheet();
/// <summary>
/// Interprets byte vector data as an XLSX file and sets the content of this
/// workbook to match that file.
/// </summary>
void open(const std::vector<std::uint8_t> &data);
/// <summary>
/// Interprets file with the given filename as an XLSX file and sets
/// the content of this workbook to match that file.
/// </summary>
void open(const std::string &filename);
#ifdef _MSC_VER
/// <summary>
/// Interprets file with the given filename as an XLSX file and sets
/// the content of this workbook to match that file.
/// </summary>
void open(const std::wstring &filename);
#endif
/// <summary>
/// Interprets file with the given filename as an XLSX file and sets the
/// content of this workbook to match that file.
/// </summary>
void open(const path &filename);
/// <summary>
/// Interprets data in stream as an XLSX file and sets the content of this
/// workbook to match that file.
/// </summary>
void open(std::istream &stream);
private:
std::vector<std::string> worksheet_queue_;
std::unique_ptr<detail::xlsx_consumer> consumer_;
std::unique_ptr<workbook> workbook_;
std::unique_ptr<std::istream> stream_;
std::unique_ptr<std::streambuf> stream_buffer_;
std::unique_ptr<std::istream> part_stream_;
std::unique_ptr<std::streambuf> part_stream_buffer_;
std::unique_ptr<xml::parser> parser_;
};
} // namespace xlnt

View File

@ -0,0 +1,93 @@
// Copyright (c) 2014-2017 Thomas Fussell
// Copyright (c) 2010-2015 openpyxl
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#pragma once
#include <cstddef>
#include <iterator>
#include <xlnt/xlnt_config.hpp>
namespace xlnt {
/// <summary>
/// workbook is the container for all other parts of the document.
/// </summary>
class XLNT_API streaming_workbook_writer
{
public:
~streaming_workbook_writer();
/// <summary>
/// Finishes writing of the remaining contents of the workbook and closes
/// currently open write stream. This will be called automatically by the
/// destructor if it hasn't already been called manually.
/// </summary>
void close();
/// <summary>
/// Writes a cell to the currently active worksheet at the position given by
/// ref and with the given value. ref should be to the right of or below
/// the previously written cell.
/// </summary>
cell add_cell(const cell_reference &ref);
/// <summary>
/// Ends writing of data to the current sheet and begins writing a new sheet
/// with the given title.
/// </summary>
worksheet add_sheet(const std::string &title);
/// <summary>
/// Serializes the workbook into an XLSX file and saves the bytes into
/// byte vector data.
/// </summary>
void open(std::vector<std::uint8_t> &data) const;
/// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename.
/// </summary>
void open(const std::string &filename) const;
#ifdef _MSC_VER
/// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename.
/// </summary>
void open(const std::wstring &filename) const;
#endif
/// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename.
/// </summary>
void open(const xlnt::path &filename) const;
/// <summary>
/// Serializes the workbook into an XLSX file and saves the data into stream.
/// </summary>
void open(std::ostream &stream) const;
};
} // namespace xlnt

View File

@ -65,6 +65,7 @@
#include <xlnt/workbook/external_book.hpp> #include <xlnt/workbook/external_book.hpp>
#include <xlnt/workbook/metadata_property.hpp> #include <xlnt/workbook/metadata_property.hpp>
#include <xlnt/workbook/named_range.hpp> #include <xlnt/workbook/named_range.hpp>
#include <xlnt/workbook/streaming_workbook_reader.hpp>
#include <xlnt/workbook/theme.hpp> #include <xlnt/workbook/theme.hpp>
#include <xlnt/workbook/workbook.hpp> #include <xlnt/workbook/workbook.hpp>
#include <xlnt/workbook/worksheet_iterator.hpp> #include <xlnt/workbook/worksheet_iterator.hpp>

View File

@ -28,6 +28,7 @@ endif()
if(MSVC) if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unknown-pragmas") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unknown-pragmas")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded -Werror -Wno-documentation-unknown-command") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded -Werror -Wno-documentation-unknown-command")
@ -155,6 +156,11 @@ target_include_directories(xlnt PUBLIC ${XLNT_INCLUDE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}) target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/libstudxml) target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/libstudxml)
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0.0")
target_compile_definitions(xlnt PRIVATE UTFCPP=1)
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/utfcpp)
endif()
if(MSVC) if(MSVC)
set_target_properties(xlnt PROPERTIES COMPILE_FLAGS "/wd\"4251\" /wd\"4275\" /wd\"4068\" /MP") set_target_properties(xlnt PROPERTIES COMPILE_FLAGS "/wd\"4251\" /wd\"4275\" /wd\"4068\" /MP")
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/detail/serialization/miniz.cpp PROPERTIES COMPILE_FLAGS "/wd\"4244\" /wd\"4334\" /wd\"4127\"") set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/detail/serialization/miniz.cpp PROPERTIES COMPILE_FLAGS "/wd\"4244\" /wd\"4334\" /wd\"4127\"")

View File

@ -108,7 +108,7 @@ std::vector<std::uint8_t> decrypt_xlsx_agile(
++segment; ++segment;
} }
decrypted_package.resize(total_size); decrypted_package.resize(static_cast<std::size_t>(total_size));
return decrypted_package; return decrypted_package;
} }
@ -153,7 +153,8 @@ encryption_info::standard_encryption_info read_standard_encryption_info(std::ist
throw xlnt::exception("invalid header"); throw xlnt::exception("invalid header");
} }
const auto csp_name_length = (header_length - (info_stream.tellg() - index_at_start)) / 2; const auto csp_name_length = static_cast<std::size_t>((header_length
- (info_stream.tellg() - index_at_start)) / 2);
auto csp_name = xlnt::detail::read_string<char16_t>(info_stream, csp_name_length); auto csp_name = xlnt::detail::read_string<char16_t>(info_stream, csp_name_length);
csp_name.pop_back(); // remove extraneous trailing null csp_name.pop_back(); // remove extraneous trailing null
if (csp_name != u"Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)" if (csp_name != u"Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)"

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@
#include <functional> #include <functional>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -36,11 +37,15 @@
namespace xlnt { namespace xlnt {
class cell;
class color; class color;
class rich_text; class rich_text;
class manifest; class manifest;
template<typename T>
class optional;
class path; class path;
class relationship; class relationship;
class streaming_workbook_reader;
class variant; class variant;
class workbook; class workbook;
class worksheet; class worksheet;
@ -48,6 +53,8 @@ class worksheet;
namespace detail { namespace detail {
class izstream; class izstream;
struct cell_impl;
struct worksheet_impl;
/// <summary> /// <summary>
/// Handles writing a workbook into an XLSX file. /// Handles writing a workbook into an XLSX file.
@ -57,16 +64,31 @@ class xlsx_consumer
public: public:
xlsx_consumer(workbook &destination); xlsx_consumer(workbook &destination);
~xlsx_consumer();
void read(std::istream &source); void read(std::istream &source);
void read(std::istream &source, const std::string &password); void read(std::istream &source, const std::string &password);
private: private:
friend class xlnt::streaming_workbook_reader;
void open(std::istream &source);
bool has_cell();
/// <summary>
/// Reads the next cell in the current worksheet and optionally returns it if
/// the last cell in the sheet has not yet been read. An exception will be thrown
/// if this is not open as a streaming consumer.
/// </summary>
cell read_cell();
/// <summary> /// <summary>
/// Read all the files needed from the XLSX archive and initialize all of /// Read all the files needed from the XLSX archive and initialize all of
/// the data in the workbook to match. /// the data in the workbook to match.
/// </summary> /// </summary>
void populate_workbook(); void populate_workbook(bool streaming);
/// <summary> /// <summary>
/// ///
@ -168,17 +190,32 @@ private:
/// <summary> /// <summary>
/// xl/sheets/*.xml /// xl/sheets/*.xml
/// </summary> /// </summary>
void read_chartsheet(const std::string &title); void read_chartsheet(const std::string &rel_id);
/// <summary> /// <summary>
/// xl/sheets/*.xml /// xl/sheets/*.xml
/// </summary> /// </summary>
void read_dialogsheet(const std::string &title); void read_dialogsheet(const std::string &rel_id);
/// <summary> /// <summary>
/// xl/sheets/*.xml /// xl/sheets/*.xml
/// </summary> /// </summary>
void read_worksheet(const std::string &title); void read_worksheet(const std::string &rel_id);
/// <summary>
/// xl/sheets/*.xml
/// </summary>
std::string read_worksheet_begin(const std::string &rel_id);
/// <summary>
/// xl/sheets/*.xml
/// </summary>
void read_worksheet_sheetdata();
/// <summary>
/// xl/sheets/*.xml
/// </summary>
worksheet read_worksheet_end(const std::string &rel_id);
// Sheet Relationship Target Parts // Sheet Relationship Target Parts
@ -370,6 +407,14 @@ private:
std::vector<xml::qname> stack_; std::vector<xml::qname> stack_;
bool preserve_space_ = false; bool preserve_space_ = false;
bool streaming_ = false;
std::unique_ptr<detail::cell_impl> streaming_cell_;
detail::cell_impl *current_cell_;
detail::worksheet_impl *current_worksheet_;
}; };
} // namespace detail } // namespace detail

View File

@ -36,13 +36,12 @@
#include <xlnt/cell/cell.hpp> #include <xlnt/cell/cell.hpp>
#include <xlnt/packaging/manifest.hpp> #include <xlnt/packaging/manifest.hpp>
#include <xlnt/utils/path.hpp> #include <xlnt/utils/path.hpp>
#include <xlnt/utils/scoped_enum_hash.hpp>
#include <xlnt/workbook/workbook.hpp> #include <xlnt/workbook/workbook.hpp>
#include <xlnt/workbook/workbook_view.hpp> #include <xlnt/workbook/workbook_view.hpp>
#include <xlnt/worksheet/header_footer.hpp> #include <xlnt/worksheet/header_footer.hpp>
#include <xlnt/worksheet/worksheet.hpp> #include <xlnt/worksheet/worksheet.hpp>
using namespace std::string_literals;
namespace { namespace {
/// <summary> /// <summary>
@ -169,7 +168,7 @@ void xlsx_producer::write_content_types()
const auto content_types_path = path("[Content_Types].xml"); const auto content_types_path = path("[Content_Types].xml");
begin_part(content_types_path); begin_part(content_types_path);
const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types"s; const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types";
write_start_element(xmlns, "Types"); write_start_element(xmlns, "Types");
write_namespace(xmlns, ""); write_namespace(xmlns, "");
@ -288,7 +287,7 @@ void xlsx_producer::write_property(const std::string &name, const variant &value
write_start_element(constants::ns("vt"), "vector"); write_start_element(constants::ns("vt"), "vector");
auto vector = value.get<std::vector<variant>>(); auto vector = value.get<std::vector<variant>>();
std::unordered_set<variant::type> types; std::unordered_set<variant::type, scoped_enum_hash<variant::type>> types;
for (const auto &element : vector) for (const auto &element : vector)
{ {

View File

@ -482,7 +482,9 @@ std::unique_ptr<std::streambuf> ozstream::open(const path &filename)
zheader header; zheader header;
header.filename = filename.string(); header.filename = filename.string();
file_headers_.push_back(header); file_headers_.push_back(header);
return std::make_unique<zip_streambuf_compress>(&file_headers_.back(), destination_stream_); auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
return std::unique_ptr<zip_streambuf_compress>(buffer);
} }
izstream::izstream(std::istream &stream) izstream::izstream(std::istream &stream)
@ -595,7 +597,9 @@ std::unique_ptr<std::streambuf> izstream::open(const path &filename) const
auto header = file_headers_.at(filename.string()); auto header = file_headers_.at(filename.string());
source_stream_.seekg(header.header_offset); source_stream_.seekg(header.header_offset);
return std::make_unique<zip_streambuf_decompress>(source_stream_, header); auto buffer = new zip_streambuf_decompress(source_stream_, header);
return std::unique_ptr<zip_streambuf_decompress>(buffer);
} }
std::string izstream::read(const path &filename) const std::string izstream::read(const path &filename) const

View File

@ -21,15 +21,21 @@
// @license: http://www.opensource.org/licenses/mit-license.php // @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file // @author: see AUTHORS file
#include <codecvt>
#include <locale> #include <locale>
#include <string> #include <string>
#include <detail/unicode.hpp> #include <detail/unicode.hpp>
#ifdef UTFCPP
#include <utf8.h>
#else
#include <codecvt>
#endif
namespace xlnt { namespace xlnt {
namespace detail { namespace detail {
#ifndef UTFCPP
#ifdef _MSC_VER #ifdef _MSC_VER
std::u16string utf8_to_utf16(const std::string &utf8_string) std::u16string utf8_to_utf16(const std::string &utf8_string)
{ {
@ -63,6 +69,23 @@ std::string utf16_to_utf8(const std::u16string &utf16_string)
char16_t>{}.to_bytes(utf16_string); char16_t>{}.to_bytes(utf16_string);
} }
#endif #endif
#else
std::u16string utf8_to_utf16(const std::string &utf8_string)
{
std::u16string result;
utf8::utf8to16(utf8_string.begin(), utf8_string.end(), std::back_inserter(result));
return result;
}
std::string utf16_to_utf8(const std::u16string &utf16_string)
{
std::string result;
utf8::utf16to8(utf16_string.begin(), utf16_string.end(), std::back_inserter(result));
return result;
}
#endif
std::string latin1_to_utf8(const std::string &latin1) std::string latin1_to_utf8(const std::string &latin1)
{ {

View File

@ -21,7 +21,6 @@
// @license: http://www.opensource.org/licenses/mit-license.php // @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file // @author: see AUTHORS file
#include <codecvt>
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <sys/stat.h> #include <sys/stat.h>
@ -32,6 +31,8 @@
#include <linux/limits.h> #include <linux/limits.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#elif defined(_MSC_VER)
#include <codecvt>
#endif #endif
#include <detail/external/include_windows.hpp> #include <detail/external/include_windows.hpp>

View File

@ -0,0 +1,185 @@
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#include <fstream>
#include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_consumer.hpp>
#include <xlnt/cell/cell.hpp>
#include <xlnt/packaging/manifest.hpp>
#include <xlnt/utils/optional.hpp>
#include <xlnt/workbook/streaming_workbook_reader.hpp>
#include <xlnt/workbook/workbook.hpp>
#include <xlnt/worksheet/worksheet.hpp>
namespace {
//TODO: (important) this is duplicated from workbook.cpp, find a common place to keep it
#ifdef _MSC_VER
void open_stream(std::ifstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ifstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
void open_stream(std::ofstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
#else
void open_stream(std::ifstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
#endif
} // namespace
namespace xlnt {
streaming_workbook_reader::streaming_workbook_reader()
{
}
streaming_workbook_reader::~streaming_workbook_reader()
{
close();
}
void streaming_workbook_reader::close()
{
if (consumer_)
{
consumer_.reset(nullptr);
stream_buffer_.reset(nullptr);
}
}
bool streaming_workbook_reader::has_cell()
{
return consumer_->has_cell();
}
cell streaming_workbook_reader::read_cell()
{
return consumer_->read_cell();
}
bool streaming_workbook_reader::has_worksheet()
{
return !worksheet_queue_.empty();
}
void streaming_workbook_reader::begin_worksheet()
{
const auto next_worksheet_rel = worksheet_queue_.back();
const auto workbook_rel = workbook_->manifest()
.relationship(path("/"), relationship_type::office_document);
const auto worksheet_rel = workbook_->manifest()
.relationship(workbook_rel.target().path(), next_worksheet_rel);
auto rel_chain = std::vector<relationship>{ workbook_rel, worksheet_rel };
const auto &manifest = consumer_->target_.manifest();
const auto part_path = manifest.canonicalize(rel_chain);
auto part_stream_buffer = consumer_->archive_->open(part_path);
part_stream_buffer_.swap(part_stream_buffer);
part_stream_.reset(new std::istream(part_stream_buffer_.get()));
parser_.reset(new xml::parser(*part_stream_, part_path.string()));
consumer_->parser_ = parser_.get();
consumer_->read_worksheet_begin(next_worksheet_rel);
}
worksheet streaming_workbook_reader::end_worksheet()
{
auto next_worksheet_rel = worksheet_queue_.back();
worksheet_queue_.pop_back();
return consumer_->read_worksheet_end(next_worksheet_rel);
}
void streaming_workbook_reader::open(const std::vector<std::uint8_t> &data)
{
stream_buffer_.reset(new detail::vector_istreambuf(data));
stream_.reset(new std::istream(stream_buffer_.get()));
open(*stream_);
}
void streaming_workbook_reader::open(const std::string &filename)
{
stream_.reset(new std::ifstream());
open_stream((std::ifstream &)stream_, filename);
open(*stream_);
}
#ifdef _MSC_VER
void streaming_workbook_reader::open(const std::wstring &filename)
{
stream_.reset(new std::ifstream());
open_stream((std::ifstream &)*stream_, filename);
open(*stream_);
}
#endif
void streaming_workbook_reader::open(const xlnt::path &filename)
{
stream_.reset(new std::ifstream());
open_stream((std::ifstream &)*stream_, filename.string());
open(*stream_);
}
void streaming_workbook_reader::open(std::istream &stream)
{
workbook_.reset(new workbook());
consumer_.reset(new detail::xlsx_consumer(*workbook_));
consumer_->open(stream);
const auto workbook_rel = workbook_->manifest()
.relationship(path("/"), relationship_type::office_document);
const auto workbook_path = workbook_rel.target().path();
for (auto worksheet_rel : workbook_->manifest()
.relationships(workbook_path, relationship_type::worksheet))
{
worksheet_queue_.push_back(worksheet_rel.id());
}
}
} // namespace xlnt

View File

@ -1508,14 +1508,14 @@ void workbook::garbage_collect_formulae()
void workbook::update_sheet_properties() void workbook::update_sheet_properties()
{ {
if (has_extended_property(extended_property::titles_of_parts)) if (has_extended_property(xlnt::extended_property::titles_of_parts))
{ {
extended_property(extended_property::titles_of_parts, sheet_titles()); extended_property(xlnt::extended_property::titles_of_parts, sheet_titles());
} }
if (has_extended_property(extended_property::heading_pairs)) if (has_extended_property(xlnt::extended_property::heading_pairs))
{ {
extended_property(extended_property::heading_pairs, extended_property(xlnt::extended_property::heading_pairs,
std::vector<variant>{variant("Worksheets"), variant(static_cast<int>(sheet_count()))}); std::vector<variant>{variant("Worksheets"), variant(static_cast<int>(sheet_count()))});
} }
} }

View File

@ -619,9 +619,9 @@ private:
xlnt_assert_equals(cell.value<long double>(), 3.141592); xlnt_assert_equals(cell.value<long double>(), 3.141592);
auto cell2 = ws.cell("A2"); auto cell2 = ws.cell("A2");
cell2.value(std::string(100'000, 'a')); cell2.value(std::string(100000, 'a'));
cell.value(cell2); cell.value(cell2);
xlnt_assert_equals(cell.value<std::string>(), std::string(32'767, 'a')); xlnt_assert_equals(cell.value<std::string>(), std::string(32767, 'a'));
} }
void test_reference() void test_reference()

View File

@ -31,6 +31,8 @@
#include <helpers/test_suite.hpp> #include <helpers/test_suite.hpp>
#include <helpers/path_helper.hpp> #include <helpers/path_helper.hpp>
#include <helpers/xml_helper.hpp> #include <helpers/xml_helper.hpp>
#include <xlnt/workbook/streaming_workbook_reader.hpp>
#include <xlnt/workbook/streaming_workbook_writer.hpp>
#include <xlnt/workbook/workbook.hpp> #include <xlnt/workbook/workbook.hpp>
class serialization_test_suite : public test_suite class serialization_test_suite : public test_suite
@ -56,6 +58,8 @@ public:
register_test(test_read_custom_properties); register_test(test_read_custom_properties);
register_test(test_round_trip_rw); register_test(test_round_trip_rw);
register_test(test_round_trip_rw_encrypted); register_test(test_round_trip_rw_encrypted);
register_test(test_streaming_read);
//register_test(test_streaming_write);
} }
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file) bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
@ -461,4 +465,42 @@ public:
xlnt_assert(round_trip_matches_rw(path, password)); xlnt_assert(round_trip_matches_rw(path, password));
} }
} }
void test_streaming_read()
{
const auto path = path_helper::test_file("4_every_style.xlsx");
xlnt::streaming_workbook_reader reader;
reader.open(xlnt::path(path));
while (reader.has_worksheet())
{
reader.begin_worksheet();
while (reader.has_cell())
{
const auto cell = reader.read_cell();
//std::cout << cell.reference().to_string() << std::endl;
}
const auto ws = reader.end_worksheet();
}
}
void test_streaming_write()
{
const auto path = std::string("stream-out.xlsx");
xlnt::streaming_workbook_writer writer;
writer.open(path);
writer.add_sheet("stream");
auto b2 = writer.add_cell("B2");
b2.value("B2!");
auto c3 = writer.add_cell("C3");
b2.value("should not change");
c3.value("C3!");
}
}; };

34
third-party/utfcpp/utf8.h vendored Normal file
View File

@ -0,0 +1,34 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "utf8/checked.h"
#include "utf8/unchecked.h"
#endif // header guard

327
third-party/utfcpp/utf8/checked.h vendored Normal file
View File

@ -0,0 +1,327 @@
// Copyright 2006-2016 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include <stdexcept>
namespace utf8
{
// Base for the exceptions that may be thrown from the library
class exception : public ::std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
uint32_t cp;
public:
invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
virtual const char* what() const throw() { return "Invalid code point"; }
uint32_t code_point() const {return cp;}
};
class invalid_utf8 : public exception {
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
virtual const char* what() const throw() { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
class invalid_utf16 : public exception {
uint16_t u16;
public:
invalid_utf16 (uint16_t u) : u16(u) {}
virtual const char* what() const throw() { return "Invalid UTF-16"; }
uint16_t utf16_word() const {return u16;}
};
class not_enough_room : public exception {
public:
virtual const char* what() const throw() { return "Not enough space"; }
};
/// The library API - functions intended to be called by the users
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (!utf8::internal::is_code_point_valid(cp))
throw invalid_code_point(cp);
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator, typename output_iterator>
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
{
while (start != end) {
octet_iterator sequence_start = start;
internal::utf_error err_code = utf8::internal::validate_next(start, end);
switch (err_code) {
case internal::UTF8_OK :
for (octet_iterator it = sequence_start; it != start; ++it)
*out++ = *it;
break;
case internal::NOT_ENOUGH_ROOM:
throw not_enough_room();
case internal::INVALID_LEAD:
out = utf8::append (replacement, out);
++start;
break;
case internal::INCOMPLETE_SEQUENCE:
case internal::OVERLONG_SEQUENCE:
case internal::INVALID_CODE_POINT:
out = utf8::append (replacement, out);
++start;
// just one replacement mark for the sequence
while (start != end && utf8::internal::is_trail(*start))
++start;
break;
}
}
return out;
}
template <typename octet_iterator, typename output_iterator>
inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
{
static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
return utf8::replace_invalid(start, end, out, replacement_marker);
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it, octet_iterator end)
{
uint32_t cp = 0;
internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
switch (err_code) {
case internal::UTF8_OK :
break;
case internal::NOT_ENOUGH_ROOM :
throw not_enough_room();
case internal::INVALID_LEAD :
case internal::INCOMPLETE_SEQUENCE :
case internal::OVERLONG_SEQUENCE :
throw invalid_utf8(*it);
case internal::INVALID_CODE_POINT :
throw invalid_code_point(cp);
}
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it, octet_iterator end)
{
return utf8::next(it, end);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it, octet_iterator start)
{
// can't do much if it == start
if (it == start)
throw not_enough_room();
octet_iterator end = it;
// Go back until we hit either a lead octet or start
while (utf8::internal::is_trail(*(--it)))
if (it == start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
return utf8::peek_next(it, end);
}
/// Deprecated in versions that include "prior"
template <typename octet_iterator>
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
{
octet_iterator end = it;
while (utf8::internal::is_trail(*(--it)))
if (it == pass_start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
return utf8::next(temp, end);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n, octet_iterator end)
{
for (distance_type i = 0; i < n; ++i)
utf8::next(it, end);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
utf8::next(first, last);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
if (start != end) {
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
if (utf8::internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
}
else
throw invalid_utf16(static_cast<uint16_t>(cp));
}
// Lone trail surrogate
else if (utf8::internal::is_trail_surrogate(cp))
throw invalid_utf16(static_cast<uint16_t>(cp));
result = utf8::append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start < end) {
uint32_t cp = utf8::next(start, end);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = utf8::append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = utf8::next(start, end);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it,
const octet_iterator& rangestart,
const octet_iterator& rangeend) :
it(octet_it), range_start(rangestart), range_end(rangeend)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return utf8::next(temp, range_end);
}
bool operator == (const iterator& rhs) const
{
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
utf8::next(it, range_end);
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
utf8::next(it, range_end);
return temp;
}
iterator& operator -- ()
{
utf8::prior(it, range_start);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
utf8::prior(it, range_start);
return temp;
}
}; // class iterator
} // namespace utf8
#endif //header guard

332
third-party/utfcpp/utf8/core.h vendored Normal file
View File

@ -0,0 +1,332 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include <iterator>
namespace utf8
{
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
// You may need to change them to match your system.
// These typedefs have the same names as ones from cstdint, or boost/cstdint
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
// Helper code - not intended to be directly called by the library users. May be changed at any time
namespace internal
{
// Unicode constants
// Leading (high) surrogates: 0xd800 - 0xdbff
// Trailing (low) surrogates: 0xdc00 - 0xdfff
const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
// Maximum valid value for a Unicode code point
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
template<typename octet_type>
inline uint8_t mask8(octet_type oc)
{
return static_cast<uint8_t>(0xff & oc);
}
template<typename u16_type>
inline uint16_t mask16(u16_type oc)
{
return static_cast<uint16_t>(0xffff & oc);
}
template<typename octet_type>
inline bool is_trail(octet_type oc)
{
return ((utf8::internal::mask8(oc) >> 6) == 0x2);
}
template <typename u16>
inline bool is_lead_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
}
template <typename u16>
inline bool is_trail_surrogate(u16 cp)
{
return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u16>
inline bool is_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u32>
inline bool is_code_point_valid(u32 cp)
{
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
}
template <typename octet_iterator>
inline typename std::iterator_traits<octet_iterator>::difference_type
sequence_length(octet_iterator lead_it)
{
uint8_t lead = utf8::internal::mask8(*lead_it);
if (lead < 0x80)
return 1;
else if ((lead >> 5) == 0x6)
return 2;
else if ((lead >> 4) == 0xe)
return 3;
else if ((lead >> 3) == 0x1e)
return 4;
else
return 0;
}
template <typename octet_difference_type>
inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
{
if (cp < 0x80) {
if (length != 1)
return true;
}
else if (cp < 0x800) {
if (length != 2)
return true;
}
else if (cp < 0x10000) {
if (length != 3)
return true;
}
return false;
}
enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
/// Helper for get_sequence_x
template <typename octet_iterator>
utf_error increase_safely(octet_iterator& it, octet_iterator end)
{
if (++it == end)
return NOT_ENOUGH_ROOM;
if (!utf8::internal::is_trail(*it))
return INCOMPLETE_SEQUENCE;
return UTF8_OK;
}
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
/// get_sequence_x functions decode utf-8 sequences of the length x
template <typename octet_iterator>
utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
return UTF8_OK;
}
#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
// Of course, it does not make much sense with i.e. stream iterators
octet_iterator original_it = it;
uint32_t cp = 0;
// Determine the sequence length based on the lead octet
typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
const octet_difference_type length = utf8::internal::sequence_length(it);
// Get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
case 0:
return INVALID_LEAD;
case 1:
err = utf8::internal::get_sequence_1(it, end, cp);
break;
case 2:
err = utf8::internal::get_sequence_2(it, end, cp);
break;
case 3:
err = utf8::internal::get_sequence_3(it, end, cp);
break;
case 4:
err = utf8::internal::get_sequence_4(it, end, cp);
break;
}
if (err == UTF8_OK) {
// Decoding succeeded. Now, security checks...
if (utf8::internal::is_code_point_valid(cp)) {
if (!utf8::internal::is_overlong_sequence(cp, length)){
// Passed! Return here.
code_point = cp;
++it;
return UTF8_OK;
}
else
err = OVERLONG_SEQUENCE;
}
else
err = INVALID_CODE_POINT;
}
// Failure branch - restore the original value of the iterator
it = original_it;
return err;
}
template <typename octet_iterator>
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
uint32_t ignored;
return utf8::internal::validate_next(it, end, ignored);
}
} // namespace internal
/// The library API - functions intended to be called by the users
// Byte order mark
const uint8_t bom[] = {0xef, 0xbb, 0xbf};
template <typename octet_iterator>
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
{
octet_iterator result = start;
while (result != end) {
utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
if (err_code != internal::UTF8_OK)
return result;
}
return result;
}
template <typename octet_iterator>
inline bool is_valid(octet_iterator start, octet_iterator end)
{
return (utf8::find_invalid(start, end) == end);
}
template <typename octet_iterator>
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
{
return (
((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
}
//Deprecated in release 2.3
template <typename octet_iterator>
inline bool is_bom (octet_iterator it)
{
return (
(utf8::internal::mask8(*it++)) == bom[0] &&
(utf8::internal::mask8(*it++)) == bom[1] &&
(utf8::internal::mask8(*it)) == bom[2]
);
}
} // namespace utf8
#endif // header guard

228
third-party/utfcpp/utf8/unchecked.h vendored Normal file
View File

@ -0,0 +1,228 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
namespace utf8
{
namespace unchecked
{
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it)
{
uint32_t cp = utf8::internal::mask8(*it);
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
switch (length) {
case 1:
break;
case 2:
it++;
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
break;
case 3:
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
++it;
cp += (*it) & 0x3f;
break;
}
++it;
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
return utf8::unchecked::next(it);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it)
{
while (utf8::internal::is_trail(*(--it))) ;
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
template <typename octet_iterator>
inline uint32_t previous(octet_iterator& it)
{
return utf8::unchecked::prior(it);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n)
{
for (distance_type i = 0; i < n; ++i)
utf8::unchecked::next(it);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
utf8::unchecked::next(first);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}
result = utf8::unchecked::append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start < end) {
uint32_t cp = utf8::unchecked::next(start);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = utf8::unchecked::append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = utf8::unchecked::next(start);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
bool operator == (const iterator& rhs) const
{
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
::std::advance(it, utf8::internal::sequence_length(it));
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
::std::advance(it, utf8::internal::sequence_length(it));
return temp;
}
iterator& operator -- ()
{
utf8::unchecked::prior(it);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
utf8::unchecked::prior(it);
return temp;
}
}; // class iterator
} // namespace utf8::unchecked
} // namespace utf8
#endif // header guard