diff --git a/.gitignore b/.gitignore
index 29b572bf..0844650f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,5 @@ node_modules/
*~
.DS_Store
__pycache__/
-Win32/
\ No newline at end of file
+Win32/
+*.pyd
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f5ba09ec..c99fa633 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,7 @@ option(STATIC "Set to ON to build xlnt as a static library instead of a shared l
option(TESTS "Set to OFF to skip building test executable (in ./tests)" ON)
option(SAMPLES "Set to ON to build executable code samples (in ./samples)" OFF)
option(BENCHMARKS "Set to ON to build performance benchmarks (in ./benchmarks)" OFF)
+option(ARROW "Set to ON to build Arrow conversion functions (in ./contrib/xlntarrow)" OFF)
# Platform specific options
if(NOT MSVC)
@@ -30,4 +31,8 @@ if(TESTS)
add_subdirectory(tests)
endif()
+if(ARROW)
+ add_subdirectory(arrow/xlntarrow)
+endif()
+
add_subdirectory(source)
diff --git a/README.md b/README.md
index 62f14399..41800423 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
====
-[![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg)](https://travis-ci.org/tfussell/xlnt)
+[![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg?branch=master)](https://travis-ci.org/tfussell/xlnt)
[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/2hs79a1xoxy16sol?svg=true)](https://ci.appveyor.com/project/tfussell/xlnt)
[![Coverage Status](https://coveralls.io/repos/github/tfussell/xlnt/badge.svg?branch=master)](https://coveralls.io/github/tfussell/xlnt?branch=master)
[![ReadTheDocs Documentation Status](https://readthedocs.org/projects/xlnt/badge/?version=latest)](http://xlnt.readthedocs.org/en/latest/?badge=latest)
[![License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](http://opensource.org/licenses/MIT)
## Introduction
-xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). xlnt is currently under active feature development and is on track for the version 1.0 release in the next few weeks. Until then, the API could have significant changes. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html).
+xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). The first public release of xlnt version 1.0 was on May 10th, 2017. Current work is focused on increasing compatibility, improving performance, and brainstorming future development goals. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html). Contributions are welcome in the form of pull requests or discussions on [the repository's Issues page](https://github.com/tfussell/xlnt/issues).
## Example
@@ -29,7 +29,7 @@ int main()
wb.save("example.xlsx");
return 0;
}
-// compile with -std=c++14 -Ixlnt/include -Lxlnt/lib -lxlnt
+// compile with -std=c++14 -Ixlnt/include -lxlnt
```
## Documentation
diff --git a/arrow/xlntarrow/CMakeLists.txt b/arrow/xlntarrow/CMakeLists.txt
new file mode 100644
index 00000000..153cefc3
--- /dev/null
+++ b/arrow/xlntarrow/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.2)
+project(xlntarrow)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(XLNT_ARROW
+ ${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.cpp)
+
+add_library(xlntarrow SHARED ${XLNT_ARROW})
+target_link_libraries(xlntarrow PRIVATE xlnt)
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../miniconda3/include)
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
diff --git a/arrow/xlntarrow/xlntarrow.cpp b/arrow/xlntarrow/xlntarrow.cpp
new file mode 100644
index 00000000..917a4a06
--- /dev/null
+++ b/arrow/xlntarrow/xlntarrow.cpp
@@ -0,0 +1,39 @@
+#include
+#include
+
+namespace xlnt {
+namespace arrow {
+
+void xlsx2arrow(std::istream &s, ::arrow::Table &table)
+{
+ xlnt::streaming_workbook_reader reader;
+ reader.open(s);
+
+ reader.begin_worksheet();
+ int first_row = 0;
+
+ while (reader.has_cell())
+ {
+ auto cell = reader.read_cell();
+
+ if (first_row < 1)
+ {
+ first_row = cell.row();
+ }
+
+ if (cell.reference().row() % 1000 == 1)
+ {
+ std::cout << cell.reference().to_string() << std::endl;
+ }
+ }
+
+ reader.end_worksheet();
+}
+
+void arrow2xlsx(const ::arrow::Table &table, std::istream &s)
+{
+
+}
+
+}
+}
diff --git a/arrow/xlntarrow/xlntarrow.hpp b/arrow/xlntarrow/xlntarrow.hpp
new file mode 100644
index 00000000..8d3ae886
--- /dev/null
+++ b/arrow/xlntarrow/xlntarrow.hpp
@@ -0,0 +1,11 @@
+#include
+#include
+
+namespace xlnt {
+namespace arrow {
+
+void xlsx2arrow(std::istream &s, ::arrow::Table &table);
+void arrow2xlsx(const ::arrow::Table &table, std::istream &s);
+
+}
+}
diff --git a/arrow/xlntpyarrow/python_streambuf.hpp b/arrow/xlntpyarrow/python_streambuf.hpp
new file mode 100644
index 00000000..6e8e3d66
--- /dev/null
+++ b/arrow/xlntpyarrow/python_streambuf.hpp
@@ -0,0 +1,487 @@
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace xlnt {
+namespace arrow {
+
+/// A stream buffer getting data from and putting data into a Python file object
+/** The aims are as follow:
+
+ - Given a C++ function acting on a standard stream, e.g.
+
+ \code
+ void read_inputs(std::istream& input) {
+ ...
+ input >> something >> something_else;
+ }
+ \endcode
+
+ and given a piece of Python code which creates a file-like object,
+ to be able to pass this file object to that C++ function, e.g.
+
+ \code
+ import gzip
+ gzip_file_obj = gzip.GzipFile(...)
+ read_inputs(gzip_file_obj)
+ \endcode
+
+ and have the standard stream pull data from and put data into the Python
+ file object.
+
+ - When Python \c read_inputs() returns, the Python object is able to
+ continue reading or writing where the C++ code left off.
+
+ - Operations in C++ on mere files should be competitively fast compared
+ to the direct use of \c std::fstream.
+
+
+ \b Motivation
+
+ - the standard Python library offer of file-like objects (files,
+ compressed files and archives, network, ...) is far superior to the
+ offer of streams in the C++ standard library and Boost C++ libraries.
+
+ - i/o code involves a fair amount of text processing which is more
+ efficiently prototyped in Python but then one may need to rewrite
+ a time-critical part in C++, in as seamless a manner as possible.
+
+ \b Usage
+
+ This is 2-step:
+
+ - a trivial wrapper function
+
+ \code
+ using boost_adaptbx::python::streambuf;
+ void read_inputs_wrapper(streambuf& input)
+ {
+ streambuf::istream is(input);
+ read_inputs(is);
+ }
+
+ def("read_inputs", read_inputs_wrapper);
+ \endcode
+
+ which has to be written every time one wants a Python binding for
+ such a C++ function.
+
+ - the Python side
+
+ \code
+ from boost.python import streambuf
+ read_inputs(streambuf(python_file_obj=obj, buffer_size=1024))
+ \endcode
+
+ \c buffer_size is optional. See also: \c default_buffer_size
+
+ Note: references are to the C++ standard (the numbers between parentheses
+ at the end of references are margin markers).
+*/
+class streambuf : public std::basic_streambuf
+{
+ private:
+ typedef std::basic_streambuf base_t;
+
+ public:
+ /* The syntax
+ using base_t::char_type;
+ would be nicer but Visual Studio C++ 8 chokes on it
+ */
+ typedef base_t::char_type char_type;
+ typedef base_t::int_type int_type;
+ typedef base_t::pos_type pos_type;
+ typedef base_t::off_type off_type;
+ typedef base_t::traits_type traits_type;
+
+ // work around Visual C++ 7.1 problem
+ inline static int
+ traits_type_eof() { return traits_type::eof(); }
+
+ /// The default size of the read and write buffer.
+ /** They are respectively used to buffer data read from and data written to
+ the Python file object. It can be modified from Python.
+ */
+ static std::size_t default_buffer_size;
+
+ /// Construct from a Python file object
+ /** if buffer_size is 0 the current default_buffer_size is used.
+ */
+ streambuf(
+ PyObject *python_file_obj,
+ std::size_t buffer_size_ = 0)
+ :
+ py_read (PyObject_GetAttrString(python_file_obj, "read")),
+ py_write(PyObject_GetAttrString(python_file_obj, "write")),
+ py_seek (PyObject_GetAttrString(python_file_obj, "seek")),
+ py_tell (PyObject_GetAttrString(python_file_obj, "tell")),
+ buffer_size(buffer_size_ != 0 ? buffer_size_ : default_buffer_size),
+ write_buffer(0),
+ pos_of_read_buffer_end_in_py_file(0),
+ pos_of_write_buffer_end_in_py_file(buffer_size),
+ farthest_pptr(0)
+ {
+ assert(buffer_size != 0);
+ /* Some Python file objects (e.g. sys.stdout and sys.stdin)
+ have non-functional seek and tell. If so, assign None to
+ py_tell and py_seek.
+ */
+ if (py_tell != nullptr) {
+ PyObject_CallFunction(py_tell, nullptr);
+ if (PyErr_Occurred() != nullptr)
+ {
+ py_tell = nullptr;
+ py_seek = nullptr;
+ PyErr_Clear();
+ }
+ }
+
+ if (py_write != nullptr) {
+ // C-like string to make debugging easier
+ write_buffer = new char[buffer_size + 1];
+ write_buffer[buffer_size] = '\0';
+ setp(write_buffer, write_buffer + buffer_size); // 27.5.2.4.5 (5)
+ farthest_pptr = pptr();
+ }
+ else {
+ // The first attempt at output will result in a call to overflow
+ setp(0, 0);
+ }
+
+ if (py_tell != nullptr) {
+ auto py_pos = extract_int(PyObject_CallFunction(py_tell, nullptr));
+ pos_of_read_buffer_end_in_py_file = py_pos;
+ pos_of_write_buffer_end_in_py_file = py_pos;
+ }
+ }
+
+ /// Mundane destructor freeing the allocated resources
+ virtual ~streambuf() {
+ if (write_buffer) delete[] write_buffer;
+ }
+
+ /// C.f. C++ standard section 27.5.2.4.3
+ /** It is essential to override this virtual function for the stream
+ member function readsome to work correctly (c.f. 27.6.1.3, alinea 30)
+ */
+ virtual std::streamsize showmanyc() {
+ int_type const failure = traits_type::eof();
+ int_type status = underflow();
+ if (status == failure) return -1;
+ return egptr() - gptr();
+ }
+
+ /// C.f. C++ standard section 27.5.2.4.3
+ virtual int_type underflow() {
+ int_type const failure = traits_type::eof();
+ if (py_read == nullptr) {
+ throw std::invalid_argument(
+ "That Python file object has no 'read' attribute");
+ }
+ read_buffer = PyObject_CallFunction(py_read, "i", buffer_size);
+ char *read_buffer_data = nullptr;
+ Py_ssize_t py_n_read = 0;
+ if (PyBytes_AsStringAndSize(read_buffer, &read_buffer_data, &py_n_read) == -1) {
+ setg(0, 0, 0);
+ throw std::invalid_argument(
+ "The method 'read' of the Python file object "
+ "did not return a string.");
+ }
+ auto n_read = (off_type)py_n_read;
+ pos_of_read_buffer_end_in_py_file += n_read;
+ setg(read_buffer_data, read_buffer_data, read_buffer_data + n_read);
+ // ^^^27.5.2.3.1 (4)
+ if (n_read == 0) return failure;
+ return traits_type::to_int_type(read_buffer_data[0]);
+ }
+
+ /// C.f. C++ standard section 27.5.2.4.5
+ virtual int_type overflow(int_type c=traits_type_eof()) {
+ if (py_write == nullptr) {
+ throw std::invalid_argument(
+ "That Python file object has no 'write' attribute");
+ }
+ farthest_pptr = std::max(farthest_pptr, pptr());
+ auto n_written = (off_type)(farthest_pptr - pbase());
+ auto chunk = PyBytes_FromStringAndSize(pbase(), farthest_pptr - pbase());
+ PyObject_CallFunction(py_write, "O", chunk);
+ if (!traits_type::eq_int_type(c, traits_type::eof())) {
+ auto ch = traits_type::to_char_type(c);
+ PyObject_CallFunction(py_write, "y#", reinterpret_cast(&ch), 1);
+ n_written++;
+ }
+ if (n_written) {
+ pos_of_write_buffer_end_in_py_file += n_written;
+ setp(pbase(), epptr());
+ // ^^^ 27.5.2.4.5 (5)
+ farthest_pptr = pptr();
+ }
+ return traits_type::eq_int_type(
+ c, traits_type::eof()) ? traits_type::not_eof(c) : c;
+ }
+
+ /// Update the python file to reflect the state of this stream buffer
+ /** Empty the write buffer into the Python file object and set the seek
+ position of the latter accordingly (C++ standard section 27.5.2.4.2).
+ If there is no write buffer or it is empty, but there is a non-empty
+ read buffer, set the Python file object seek position to the
+ seek position in that read buffer.
+ */
+ virtual int sync() {
+ int result = 0;
+ farthest_pptr = std::max(farthest_pptr, pptr());
+ if (farthest_pptr && farthest_pptr > pbase()) {
+ off_type delta = pptr() - farthest_pptr;
+ int_type status = overflow();
+ if (traits_type::eq_int_type(status, traits_type::eof())) result = -1;
+ if (py_seek != nullptr)
+ {
+ PyObject_CallFunction(py_seek, "i", delta);
+ }
+ }
+ else if (gptr() && gptr() < egptr()) {
+ if (py_seek != nullptr)
+ {
+ PyObject_CallFunction(py_seek, "ii", gptr() - egptr(), 1);
+ }
+ }
+ return result;
+ }
+
+ /// C.f. C++ standard section 27.5.2.4.2
+ /** This implementation is optimised to look whether the position is within
+ the buffers, so as to avoid calling Python seek or tell. It is
+ important for many applications that the overhead of calling into Python
+ is avoided as much as possible (e.g. parsers which may do a lot of
+ backtracking)
+ */
+ virtual
+ pos_type seekoff(off_type off, std::ios_base::seekdir way,
+ std::ios_base::openmode which= std::ios_base::in
+ | std::ios_base::out)
+ {
+ /* In practice, "which" is either std::ios_base::in or out
+ since we end up here because either seekp or seekg was called
+ on the stream using this buffer. That simplifies the code
+ in a few places.
+ */
+ int const failure = off_type(-1);
+
+ if (py_seek == nullptr) {
+ throw std::invalid_argument(
+ "That Python file object has no 'seek' attribute");
+ }
+
+ // we need the read buffer to contain something!
+ if (which == std::ios_base::in && !gptr()) {
+ if (traits_type::eq_int_type(underflow(), traits_type::eof())) {
+ return failure;
+ }
+ }
+
+ // compute the whence parameter for Python seek
+ int whence;
+ switch (way) {
+ case std::ios_base::beg:
+ whence = 0;
+ break;
+ case std::ios_base::cur:
+ whence = 1;
+ break;
+ case std::ios_base::end:
+ whence = 2;
+ break;
+ default:
+ return failure;
+ }
+
+ // Let's have a go
+ boost::optional result = seekoff_without_calling_python(
+ off, way, which);
+ if (!result) {
+ // we need to call Python
+ if (which == std::ios_base::out) overflow();
+ if (way == std::ios_base::cur) {
+ if (which == std::ios_base::in) off -= egptr() - gptr();
+ else if (which == std::ios_base::out) off += pptr() - pbase();
+ }
+ PyObject_CallFunction(py_seek, "ii", off, whence);
+ result = extract_int(PyObject_CallFunction(py_tell, nullptr));
+ if (which == std::ios_base::in) underflow();
+ }
+ return *result;
+ }
+
+ /// C.f. C++ standard section 27.5.2.4.2
+ virtual
+ pos_type seekpos(pos_type sp,
+ std::ios_base::openmode which= std::ios_base::in
+ | std::ios_base::out)
+ {
+ return streambuf::seekoff(sp, std::ios_base::beg, which);
+ }
+
+ private:
+ PyObject *py_read = nullptr;
+ PyObject *py_write = nullptr;
+ PyObject *py_seek = nullptr;
+ PyObject *py_tell = nullptr;
+
+ std::size_t buffer_size;
+
+ /* This is actually a Python string and the actual read buffer is
+ its internal data, i.e. an array of characters. We use a Boost.Python
+ object so as to hold on it: as a result, the actual buffer can't
+ go away.
+ */
+ PyObject *read_buffer = nullptr;
+
+ /* A mere array of char's allocated on the heap at construction time and
+ de-allocated only at destruction time.
+ */
+ char *write_buffer = nullptr;
+
+ off_type pos_of_read_buffer_end_in_py_file,
+ pos_of_write_buffer_end_in_py_file;
+
+ // the farthest place the buffer has been written into
+ char *farthest_pptr = nullptr;
+
+
+ boost::optional seekoff_without_calling_python(
+ off_type off,
+ std::ios_base::seekdir way,
+ std::ios_base::openmode which)
+ {
+ boost::optional const failure;
+
+ // Buffer range and current position
+ off_type buf_begin, buf_end, buf_cur, upper_bound;
+ off_type pos_of_buffer_end_in_py_file;
+ if (which == std::ios_base::in) {
+ pos_of_buffer_end_in_py_file = pos_of_read_buffer_end_in_py_file;
+ buf_begin = reinterpret_cast(eback());
+ buf_cur = reinterpret_cast(gptr());
+ buf_end = reinterpret_cast(egptr());
+ upper_bound = buf_end;
+ }
+ else if (which == std::ios_base::out) {
+ pos_of_buffer_end_in_py_file = pos_of_write_buffer_end_in_py_file;
+ buf_begin = reinterpret_cast(pbase());
+ buf_cur = reinterpret_cast(pptr());
+ buf_end = reinterpret_cast(epptr());
+ farthest_pptr = std::max(farthest_pptr, pptr());
+ upper_bound = reinterpret_cast(farthest_pptr) + 1;
+ }
+ else {
+ throw std::runtime_error("unreachable");
+ }
+
+ // Sought position in "buffer coordinate"
+ off_type buf_sought;
+ if (way == std::ios_base::cur) {
+ buf_sought = buf_cur + off;
+ }
+ else if (way == std::ios_base::beg) {
+ buf_sought = buf_end + (off - pos_of_buffer_end_in_py_file);
+ }
+ else if (way == std::ios_base::end) {
+ return failure;
+ }
+ else {
+ throw std::runtime_error("unreachable");
+ }
+
+ // if the sought position is not in the buffer, give up
+ if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
+
+ // we are in wonderland
+ if (which == std::ios_base::in) gbump(buf_sought - buf_cur);
+ else if (which == std::ios_base::out) pbump(buf_sought - buf_cur);
+ return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
+ }
+
+ template
+ T extract_int(PyObject *o)
+ {
+ auto value = PyLong_AsLong(o);
+ Py_DECREF(o);
+
+ return static_cast(value);
+ }
+
+ public:
+
+ class istream : public std::istream
+ {
+ public:
+ istream(streambuf& buf) : std::istream(&buf)
+ {
+ exceptions(std::ios_base::badbit);
+ }
+
+ ~istream() { if (this->good()) this->sync(); }
+ };
+
+ class ostream : public std::ostream
+ {
+ public:
+ ostream(streambuf& buf) : std::ostream(&buf)
+ {
+ exceptions(std::ios_base::badbit);
+ }
+
+ ~ostream() { if (this->good()) this->flush(); }
+ };
+};
+
+std::size_t streambuf::default_buffer_size = 1024;
+
+struct streambuf_capsule
+{
+ streambuf python_streambuf;
+
+ streambuf_capsule(
+ PyObject *python_file_obj,
+ std::size_t buffer_size=0)
+ :
+ python_streambuf(python_file_obj, buffer_size)
+ {}
+};
+
+struct ostream : private streambuf_capsule, streambuf::ostream
+{
+ ostream(
+ PyObject *python_file_obj,
+ std::size_t buffer_size=0)
+ :
+ streambuf_capsule(python_file_obj, buffer_size),
+ streambuf::ostream(python_streambuf)
+ {}
+
+ ~ostream()
+ {
+ if (this->good())
+ {
+ this->flush();
+ }
+
+ if (PyErr_Occurred() != nullptr)
+ {
+ PyErr_Clear();
+ throw std::runtime_error(
+ "Problem closing python ostream.\n"
+ " Known limitation: the error is unrecoverable. Sorry.\n"
+ " Suggestion for programmer: add ostream.flush() before"
+ " returning.");
+ }
+ }
+};
+
+}} // namespace xlnt::arrow
diff --git a/arrow/xlntpyarrow/setup.py b/arrow/xlntpyarrow/setup.py
new file mode 100644
index 00000000..7860ce7d
--- /dev/null
+++ b/arrow/xlntpyarrow/setup.py
@@ -0,0 +1,64 @@
+from distutils.core import setup, Extension
+from distutils import sysconfig
+
+description = """
+xlntpyarrow allows Apache Arrow tables to be written to and read from an XLSX
+file efficiently using the C++ library xlnt.
+""".strip()
+
+cfg_vars = sysconfig.get_config_vars()
+if 'CFLAGS' in cfg_vars:
+ cfg_vars['CFLAGS'] = cfg_vars['CFLAGS'].replace('-Wstrict-prototypes', '')
+
+xlntpyarrow_extension = Extension(
+ 'xlntpyarrow',
+ ['xlntpyarrow.cpp'],
+ language = 'c++',
+ include_dirs = [
+ '/root/xlnt/arrow/xlntarrow',
+ '/root/xlnt/arrow/xlntpyarrow',
+ '/root/miniconda3/include'
+ ],
+ libraries = [
+ 'arrow',
+ 'xlntarrow',
+ 'xlnt'
+ ],
+ library_dirs = [
+ '/root/miniconda3/lib',
+ '/root/xlnt/build/arrow/xlntarrow',
+ '/root/xlnt/build/source'
+ ],
+ extra_compile_args=['-std=c++11']
+)
+
+classifiers = [
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Plugins',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: MIT License',
+ 'Natural Language :: English',
+ 'Operating System :: Microsoft :: Windows',
+ 'Operating System :: MacOS :: MacOS X',
+ 'Operating System :: POSIX :: Linux',
+ 'Programming Language :: C',
+ 'Programming Language :: C++',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Topic :: Database',
+ 'Topic :: Office/Business :: Financial :: Spreadsheet',
+ 'Topic :: Scientific/Engineering :: Information Analysis',
+ 'Topic :: Software Development :: Libraries :: Python Modules'
+]
+
+setup(
+ name = 'xlntpyarrow',
+ version = '1.1.0',
+ classifiers = classifiers,
+ description = description,
+ ext_modules = [xlntpyarrow_extension],
+ author = 'Thomas Fussell',
+ author_email = 'thomas.fussell@gmail.com',
+ url = 'https://github.com/tfussell/xlnt'
+)
diff --git a/arrow/xlntpyarrow/xlntpyarrow.cpp b/arrow/xlntpyarrow/xlntpyarrow.cpp
new file mode 100644
index 00000000..7c948c6e
--- /dev/null
+++ b/arrow/xlntpyarrow/xlntpyarrow.cpp
@@ -0,0 +1,109 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+PyObject *xlsx2arrow(PyObject *file)
+{
+ xlnt::arrow::streambuf buffer(file);
+ std::istream stream(&buffer);
+ std::shared_ptr schema;
+ std::vector> columns;
+ arrow::Table table(schema, columns);
+ xlnt::arrow::xlsx2arrow(stream, table);
+
+ Py_RETURN_NONE;
+}
+
+extern "C" {
+
+/*
+ * Implements XLSX->pyarrow table function.
+ */
+PyDoc_STRVAR(xlntpyarrow_xlsx2arrow_doc, "xlsx2arrow(in_file)\
+\
+Returns an arrow table representing the given XLSX file object.");
+
+PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ PyObject *file = nullptr;
+ static const char *keywords[] = { "file", nullptr };
+ static auto keywords_nc = const_cast(keywords);
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
+ {
+ return nullptr;
+ }
+
+ return xlsx2arrow(file);
+}
+
+
+/*
+* Implements pyarrow table->XLSX function.
+*/
+PyDoc_STRVAR(xlntpyarrow_arrow2xlsx_doc, "arrow2xlsx(table, out_file)\
+\
+Writes the given arrow table to out_file as an XLSX file.");
+
+PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ PyObject *obj = nullptr;
+ static const char *keywords[] = { "file", nullptr };
+ static auto keywords_nc = const_cast(keywords);
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", keywords_nc, &obj))
+ {
+ return nullptr;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef xlntpyarrow_functions[] =
+{
+ { "xlsx2arrow", (PyCFunction)xlntpyarrow_xlsx2arrow, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_xlsx2arrow_doc },
+ { "arrow2xlsx", (PyCFunction)xlntpyarrow_arrow2xlsx, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_arrow2xlsx_doc },
+ { nullptr, nullptr, 0, nullptr }
+};
+
+int exec_xlntpyarrow(PyObject *module)
+{
+ PyModule_AddFunctions(module, xlntpyarrow_functions);
+
+ PyModule_AddStringConstant(module, "__author__", "Thomas Fussell");
+ PyModule_AddStringConstant(module, "__version__", "0.9.0");
+ PyModule_AddIntConstant(module, "year", 2017);
+
+ return 0;
+}
+
+PyDoc_STRVAR(xlntpyarrow_doc, "The xlntpyarrow module");
+
+static PyModuleDef_Slot xlntpyarrow_slots[] =
+{
+ { Py_mod_exec, (void *)exec_xlntpyarrow },
+ { 0, nullptr }
+};
+
+static PyModuleDef xlntpyarrow_def =
+{
+ PyModuleDef_HEAD_INIT,
+ "xlntpyarrow",
+ xlntpyarrow_doc,
+ 0, /* m_size */
+ nullptr, /* m_methods */
+ xlntpyarrow_slots,
+ nullptr, /* m_traverse */
+ nullptr, /* m_clear */
+ nullptr, /* m_free */
+};
+
+PyMODINIT_FUNC PyInit_xlntpyarrow()
+{
+ return PyModuleDef_Init(&xlntpyarrow_def);
+}
+
+} // extern "C"
diff --git a/include/xlnt/workbook/streaming_workbook_reader.hpp b/include/xlnt/workbook/streaming_workbook_reader.hpp
new file mode 100644
index 00000000..6e975426
--- /dev/null
+++ b/include/xlnt/workbook/streaming_workbook_reader.hpp
@@ -0,0 +1,132 @@
+// Copyright (c) 2014-2017 Thomas Fussell
+// Copyright (c) 2010-2015 openpyxl
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+namespace xml {
+class parser;
+}
+
+namespace xlnt {
+
+class cell;
+template
+class optional;
+class path;
+class workbook;
+class worksheet;
+
+namespace detail {
+class xlsx_consumer;
+}
+
+///
+/// workbook is the container for all other parts of the document.
+///
+class XLNT_API streaming_workbook_reader
+{
+public:
+ streaming_workbook_reader();
+ ~streaming_workbook_reader();
+
+ ///
+ /// Closes currently open read stream. This will be called automatically
+ /// by the destructor if it hasn't already been called manually.
+ ///
+ void close();
+
+ bool has_cell();
+
+ ///
+ /// Reads the next cell in the current worksheet and optionally returns it if
+ /// the last cell in the sheet has not yet been read.
+ ///
+ cell read_cell();
+
+ bool has_worksheet();
+
+ ///
+ /// Beings reading of the next worksheet in the workbook and optionally
+ /// returns its title if the last worksheet has not yet been read.
+ ///
+ void begin_worksheet();
+
+ ///
+ /// Ends reading of the current worksheet in the workbook and optionally
+ /// returns a worksheet object corresponding to the worksheet with the title
+ /// returned by begin_worksheet().
+ ///
+ worksheet end_worksheet();
+
+ ///
+ /// Interprets byte vector data as an XLSX file and sets the content of this
+ /// workbook to match that file.
+ ///
+ void open(const std::vector &data);
+
+ ///
+ /// Interprets file with the given filename as an XLSX file and sets
+ /// the content of this workbook to match that file.
+ ///
+ void open(const std::string &filename);
+
+#ifdef _MSC_VER
+ ///
+ /// Interprets file with the given filename as an XLSX file and sets
+ /// the content of this workbook to match that file.
+ ///
+ void open(const std::wstring &filename);
+#endif
+
+ ///
+ /// Interprets file with the given filename as an XLSX file and sets the
+ /// content of this workbook to match that file.
+ ///
+ void open(const path &filename);
+
+ ///
+ /// Interprets data in stream as an XLSX file and sets the content of this
+ /// workbook to match that file.
+ ///
+ void open(std::istream &stream);
+
+private:
+ std::vector worksheet_queue_;
+ std::unique_ptr consumer_;
+ std::unique_ptr workbook_;
+ std::unique_ptr stream_;
+ std::unique_ptr stream_buffer_;
+ std::unique_ptr part_stream_;
+ std::unique_ptr part_stream_buffer_;
+ std::unique_ptr parser_;
+};
+
+} // namespace xlnt
diff --git a/include/xlnt/workbook/streaming_workbook_writer.hpp b/include/xlnt/workbook/streaming_workbook_writer.hpp
new file mode 100644
index 00000000..38d27880
--- /dev/null
+++ b/include/xlnt/workbook/streaming_workbook_writer.hpp
@@ -0,0 +1,93 @@
+// Copyright (c) 2014-2017 Thomas Fussell
+// Copyright (c) 2010-2015 openpyxl
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+#pragma once
+
+#include
+#include
+
+#include
+
+namespace xlnt {
+
+///
+/// workbook is the container for all other parts of the document.
+///
+class XLNT_API streaming_workbook_writer
+{
+public:
+ ~streaming_workbook_writer();
+
+ ///
+ /// Finishes writing of the remaining contents of the workbook and closes
+ /// currently open write stream. This will be called automatically by the
+ /// destructor if it hasn't already been called manually.
+ ///
+ void close();
+
+ ///
+ /// Writes a cell to the currently active worksheet at the position given by
+ /// ref and with the given value. ref should be to the right of or below
+ /// the previously written cell.
+ ///
+ cell add_cell(const cell_reference &ref);
+
+ ///
+ /// Ends writing of data to the current sheet and begins writing a new sheet
+ /// with the given title.
+ ///
+ worksheet add_sheet(const std::string &title);
+
+ ///
+ /// Serializes the workbook into an XLSX file and saves the bytes into
+ /// byte vector data.
+ ///
+ void open(std::vector &data) const;
+
+ ///
+ /// Serializes the workbook into an XLSX file and saves the data into a file
+ /// named filename.
+ ///
+ void open(const std::string &filename) const;
+
+#ifdef _MSC_VER
+ ///
+ /// Serializes the workbook into an XLSX file and saves the data into a file
+ /// named filename.
+ ///
+ void open(const std::wstring &filename) const;
+#endif
+
+ ///
+ /// Serializes the workbook into an XLSX file and saves the data into a file
+ /// named filename.
+ ///
+ void open(const xlnt::path &filename) const;
+
+ ///
+ /// Serializes the workbook into an XLSX file and saves the data into stream.
+ ///
+ void open(std::ostream &stream) const;
+};
+
+} // namespace xlnt
diff --git a/include/xlnt/xlnt.hpp b/include/xlnt/xlnt.hpp
index db839c3a..bf52609c 100644
--- a/include/xlnt/xlnt.hpp
+++ b/include/xlnt/xlnt.hpp
@@ -65,6 +65,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 94a474df..cf71e1e7 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -28,6 +28,7 @@ endif()
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unknown-pragmas")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded -Werror -Wno-documentation-unknown-command")
@@ -155,6 +156,11 @@ target_include_directories(xlnt PUBLIC ${XLNT_INCLUDE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR})
target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/libstudxml)
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0.0")
+ target_compile_definitions(xlnt PRIVATE UTFCPP=1)
+ target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/utfcpp)
+endif()
+
if(MSVC)
set_target_properties(xlnt PROPERTIES COMPILE_FLAGS "/wd\"4251\" /wd\"4275\" /wd\"4068\" /MP")
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/detail/serialization/miniz.cpp PROPERTIES COMPILE_FLAGS "/wd\"4244\" /wd\"4334\" /wd\"4127\"")
diff --git a/source/detail/cryptography/xlsx_crypto_consumer.cpp b/source/detail/cryptography/xlsx_crypto_consumer.cpp
index 10a319bf..fdc53a96 100644
--- a/source/detail/cryptography/xlsx_crypto_consumer.cpp
+++ b/source/detail/cryptography/xlsx_crypto_consumer.cpp
@@ -108,7 +108,7 @@ std::vector decrypt_xlsx_agile(
++segment;
}
- decrypted_package.resize(total_size);
+ decrypted_package.resize(static_cast(total_size));
return decrypted_package;
}
@@ -153,7 +153,8 @@ encryption_info::standard_encryption_info read_standard_encryption_info(std::ist
throw xlnt::exception("invalid header");
}
- const auto csp_name_length = (header_length - (info_stream.tellg() - index_at_start)) / 2;
+ const auto csp_name_length = static_cast((header_length
+ - (info_stream.tellg() - index_at_start)) / 2);
auto csp_name = xlnt::detail::read_string(info_stream, csp_name_length);
csp_name.pop_back(); // remove extraneous trailing null
if (csp_name != u"Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)"
diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp
index fce83802..500303e3 100644
--- a/source/detail/serialization/xlsx_consumer.cpp
+++ b/source/detail/serialization/xlsx_consumer.cpp
@@ -34,6 +34,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -132,10 +133,898 @@ xlsx_consumer::xlsx_consumer(workbook &target)
{
}
+xlsx_consumer::~xlsx_consumer()
+{
+}
+
void xlsx_consumer::read(std::istream &source)
{
archive_.reset(new izstream(source));
- populate_workbook();
+ populate_workbook(false);
+}
+
+void xlsx_consumer::open(std::istream &source)
+{
+ archive_.reset(new izstream(source));
+ populate_workbook(true);
+}
+
+cell xlsx_consumer::read_cell()
+{
+ if (!has_cell())
+ {
+ return cell(nullptr);
+ }
+
+ auto ws = worksheet(current_worksheet_);
+
+ if (in_element(qn("spreadsheetml", "sheetData")))
+ {
+ expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
+ auto row_index = parser().attribute("r");
+
+ if (parser().attribute_present("ht"))
+ {
+ ws.row_properties(row_index).height = parser().attribute("ht");
+ }
+
+ if (parser().attribute_present("customHeight"))
+ {
+ ws.row_properties(row_index).custom_height = is_true(parser().attribute("customHeight"));
+ }
+
+ if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
+ {
+ ws.row_properties(row_index).hidden = true;
+ }
+
+ skip_attributes({ qn("x14ac", "dyDescent") });
+ skip_attributes({ "customFormat", "s", "customFont",
+ "outlineLevel", "collapsed", "thickTop", "thickBot",
+ "ph", "spans" });
+ }
+
+ if (!in_element(qn("spreadsheetml", "row")))
+ {
+ return cell(nullptr);
+ }
+
+ expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
+
+ auto cell = streaming_ ? xlnt::cell(streaming_cell_.get())
+ : ws.cell(cell_reference(parser().attribute("r")));
+ auto reference = cell_reference(parser().attribute("r"));
+ cell.d_->parent_ = current_worksheet_;
+ cell.d_->column_ = reference.column_index();
+ cell.d_->row_ = reference.row();
+
+ auto has_type = parser().attribute_present("t");
+ auto type = has_type ? parser().attribute("t") : "n";
+
+ auto has_format = parser().attribute_present("s");
+ auto format_id = static_cast(has_format ? std::stoull(parser().attribute("s")) : 0LL);
+
+ auto has_value = false;
+ auto value_string = std::string();
+
+ auto has_formula = false;
+ auto has_shared_formula = false;
+ auto formula_value_string = std::string();
+
+ while (in_element(qn("spreadsheetml", "c")))
+ {
+ auto current_element = expect_start_element(xml::content::mixed);
+
+ if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
+ {
+ has_value = true;
+ value_string = read_text();
+ }
+ else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
+ {
+ has_formula = true;
+
+ if (parser().attribute_present("t"))
+ {
+ has_shared_formula = parser().attribute("t") == "shared";
+ }
+
+ skip_attributes(
+ { "aca", "ref", "dt2D", "dtr", "del1", "del2", "r1", "r2", "ca", "si", "bx" });
+
+ formula_value_string = read_text();
+ }
+ else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
+ {
+ expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
+ value_string = read_text();
+ expect_end_element(qn("spreadsheetml", "t"));
+ }
+ else
+ {
+ unexpected_element(current_element);
+ }
+
+ expect_end_element(current_element);
+ }
+
+ expect_end_element(qn("spreadsheetml", "c"));
+
+ if (has_formula && !has_shared_formula)
+ {
+ cell.formula(formula_value_string);
+ }
+
+ if (has_value)
+ {
+ if (type == "str")
+ {
+ cell.d_->value_text_ = value_string;
+ cell.data_type(cell::type::formula_string);
+ }
+ else if (type == "inlineStr")
+ {
+ cell.d_->value_text_ = value_string;
+ cell.data_type(cell::type::inline_string);
+ }
+ else if (type == "s")
+ {
+ cell.d_->value_numeric_ = std::stold(value_string);
+ cell.data_type(cell::type::shared_string);
+ }
+ else if (type == "b") // boolean
+ {
+ cell.value(is_true(value_string));
+ }
+ else if (type == "n") // numeric
+ {
+ cell.value(std::stold(value_string));
+ }
+ else if (!value_string.empty() && value_string[0] == '#')
+ {
+ cell.error(value_string);
+ }
+ }
+
+ if (has_format)
+ {
+ cell.format(target_.format(format_id));
+ }
+
+ if (!in_element(qn("spreadsheetml", "row")))
+ {
+ expect_end_element(qn("spreadsheetml", "row"));
+
+ if (!in_element(qn("spreadsheetml", "sheetData")))
+ {
+ expect_end_element(qn("spreadsheetml", "sheetData"));
+ }
+ }
+
+ return cell;
+}
+
+void xlsx_consumer::read_worksheet(const std::string &rel_id)
+{
+ read_worksheet_begin(rel_id);
+
+ if (!streaming_)
+ {
+ read_worksheet_sheetdata();
+ read_worksheet_end(rel_id);
+ }
+}
+
+std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
+{
+ if (streaming_ && streaming_cell_ == nullptr)
+ {
+ streaming_cell_.reset(new detail::cell_impl());
+ }
+
+ auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
+ target_.d_->sheet_title_rel_id_map_.end(),
+ [&](const std::pair &p) {
+ return p.second == rel_id;
+ })->first;
+
+ auto id = sheet_title_id_map_[title];
+ auto index = sheet_title_index_map_[title];
+
+ auto insertion_iter = target_.d_->worksheets_.begin();
+ while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
+ {
+ ++insertion_iter;
+ }
+
+ current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
+ auto ws = worksheet(current_worksheet_);
+
+ expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
+ skip_attributes({ qn("mc", "Ignorable") });
+ read_namespaces();
+
+ while (in_element(qn("spreadsheetml", "worksheet")))
+ {
+ auto current_worksheet_element = expect_start_element(xml::content::complex);
+
+ if (current_worksheet_element == qn("spreadsheetml", "sheetPr")) // CT_SheetPr 0-1
+ {
+ while (in_element(current_worksheet_element))
+ {
+ auto sheet_pr_child_element = expect_start_element(xml::content::simple);
+
+ if (sheet_pr_child_element == qn("spreadsheetml", "tabColor")) // CT_Color 0-1
+ {
+ read_color();
+ }
+ else if (sheet_pr_child_element == qn("spreadsheetml", "outlinePr")) // CT_OutlinePr 0-1
+ {
+ skip_attribute("applyStyles"); // optional, boolean, false
+ skip_attribute("summaryBelow"); // optional, boolean, true
+ skip_attribute("summaryRight"); // optional, boolean, true
+ skip_attribute("showOutlineSymbols"); // optional, boolean, true
+ }
+ else if (sheet_pr_child_element == qn("spreadsheetml", "pageSetUpPr")) // CT_PageSetUpPr 0-1
+ {
+ skip_attribute("autoPageBreaks"); // optional, boolean, true
+ skip_attribute("fitToPage"); // optional, boolean, false
+ }
+ else
+ {
+ unexpected_element(sheet_pr_child_element);
+ }
+
+ expect_end_element(sheet_pr_child_element);
+ }
+
+ skip_attribute("syncHorizontal"); // optional, boolean, false
+ skip_attribute("syncVertical"); // optional, boolean, false
+ skip_attribute("syncRef"); // optional, ST_Ref, false
+ skip_attribute("transitionEvaluation"); // optional, boolean, false
+ skip_attribute("transitionEntry"); // optional, boolean, false
+ skip_attribute("published"); // optional, boolean, true
+ skip_attribute("codeName"); // optional, string
+ skip_attribute("filterMode"); // optional, boolean, false
+ skip_attribute("enableFormatConditionsCalculation"); // optional, boolean, true
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "dimension")) // CT_SheetDimension 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "sheetViews")) // CT_SheetViews 0-1
+ {
+ while (in_element(current_worksheet_element))
+ {
+ expect_start_element(qn("spreadsheetml", "sheetView"), xml::content::complex); // CT_SheetView 1+
+
+ sheet_view new_view;
+ new_view.id(parser().attribute("workbookViewId"));
+
+ if (parser().attribute_present("showGridLines")) // default="true"
+ {
+ new_view.show_grid_lines(is_true(parser().attribute("showGridLines")));
+ }
+
+ if (parser().attribute_present("defaultGridColor")) // default="true"
+ {
+ new_view.default_grid_color(is_true(parser().attribute("defaultGridColor")));
+ }
+
+ if (parser().attribute_present("view") && parser().attribute("view") != "normal")
+ {
+ new_view.type(parser().attribute("view") == "pageBreakPreview" ? sheet_view_type::page_break_preview
+ : sheet_view_type::page_layout);
+ }
+
+ skip_attributes({ "windowProtection", "showFormulas", "showRowColHeaders", "showZeros", "rightToLeft",
+ "tabSelected", "showRuler", "showOutlineSymbols", "showWhiteSpace", "view", "topLeftCell",
+ "colorId", "zoomScale", "zoomScaleNormal", "zoomScaleSheetLayoutView", "zoomScalePageLayoutView" });
+
+ while (in_element(qn("spreadsheetml", "sheetView")))
+ {
+ auto sheet_view_child_element = expect_start_element(xml::content::simple);
+
+ if (sheet_view_child_element == qn("spreadsheetml", "pane")) // CT_Pane 0-1
+ {
+ pane new_pane;
+
+ if (parser().attribute_present("topLeftCell"))
+ {
+ new_pane.top_left_cell = cell_reference(parser().attribute("topLeftCell"));
+ }
+
+ if (parser().attribute_present("xSplit"))
+ {
+ new_pane.x_split = parser().attribute("xSplit");
+ }
+
+ if (parser().attribute_present("ySplit"))
+ {
+ new_pane.y_split = parser().attribute("ySplit");
+ }
+
+ if (parser().attribute_present("activePane"))
+ {
+ new_pane.active_pane = parser().attribute("activePane");
+ }
+
+ if (parser().attribute_present("state"))
+ {
+ new_pane.state = parser().attribute("state");
+ }
+
+ new_view.pane(new_pane);
+ }
+ else if (sheet_view_child_element == qn("spreadsheetml", "selection")) // CT_Selection 0-4
+ {
+ skip_remaining_content(sheet_view_child_element);
+ }
+ else if (sheet_view_child_element == qn("spreadsheetml", "pivotSelection")) // CT_PivotSelection 0-4
+ {
+ skip_remaining_content(sheet_view_child_element);
+ }
+ else if (sheet_view_child_element == qn("spreadsheetml", "extLst")) // CT_ExtensionList 0-1
+ {
+ skip_remaining_content(sheet_view_child_element);
+ }
+ else
+ {
+ unexpected_element(sheet_view_child_element);
+ }
+
+ expect_end_element(sheet_view_child_element);
+ }
+
+ expect_end_element(qn("spreadsheetml", "sheetView"));
+
+ ws.d_->views_.push_back(new_view);
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "sheetFormatPr")) // CT_SheetFormatPr 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "cols")) // CT_Cols 0+
+ {
+ while (in_element(qn("spreadsheetml", "cols")))
+ {
+ expect_start_element(qn("spreadsheetml", "col"), xml::content::simple);
+
+ skip_attributes({ "bestFit", "collapsed", "outlineLevel" });
+
+ auto min = static_cast(std::stoull(parser().attribute("min")));
+ auto max = static_cast(std::stoull(parser().attribute("max")));
+
+ optional width;
+
+ if (parser().attribute_present("width"))
+ {
+ width = parser().attribute("width");
+ }
+
+ optional column_style;
+
+ if (parser().attribute_present("style"))
+ {
+ column_style = parser().attribute("style");
+ }
+
+ auto custom = parser().attribute_present("customWidth")
+ ? is_true(parser().attribute("customWidth")) : false;
+ auto hidden = parser().attribute_present("hidden")
+ ? is_true(parser().attribute("hidden")) : false;
+
+ expect_end_element(qn("spreadsheetml", "col"));
+
+ for (auto column = min; column <= max; column++)
+ {
+ column_properties props;
+
+ if (width.is_set())
+ {
+ props.width = width.get();
+ }
+
+ if (column_style.is_set())
+ {
+ props.style = column_style.get();
+ }
+
+ props.hidden = hidden;
+ props.custom_width = custom;
+ ws.add_column_properties(column, props);
+ }
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "sheetData")) // CT_SheetData 1
+ {
+ return title;
+ }
+
+ expect_end_element(current_worksheet_element);
+ }
+
+ return title;
+}
+
+void xlsx_consumer::read_worksheet_sheetdata()
+{
+ auto ws = worksheet(current_worksheet_);
+
+ if (stack_.back() != qn("spreadsheetml", "sheetData"))
+ {
+ return;
+ }
+
+ while (in_element(qn("spreadsheetml", "sheetData")))
+ {
+ expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
+ auto row_index = parser().attribute("r");
+
+ if (parser().attribute_present("ht"))
+ {
+ ws.row_properties(row_index).height = parser().attribute("ht");
+ }
+
+ if (parser().attribute_present("customHeight"))
+ {
+ ws.row_properties(row_index).custom_height = is_true(parser().attribute("customHeight"));
+ }
+
+ if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
+ {
+ ws.row_properties(row_index).hidden = true;
+ }
+
+ skip_attributes({ qn("x14ac", "dyDescent") });
+ skip_attributes({ "customFormat", "s", "customFont",
+ "outlineLevel", "collapsed", "thickTop", "thickBot",
+ "ph", "spans" });
+
+ while (in_element(qn("spreadsheetml", "row")))
+ {
+ expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
+ auto cell = ws.cell(cell_reference(parser().attribute("r")));
+
+ auto has_type = parser().attribute_present("t");
+ auto type = has_type ? parser().attribute("t") : "n";
+
+ auto has_format = parser().attribute_present("s");
+ auto format_id = static_cast(has_format ? std::stoull(parser().attribute("s")) : 0LL);
+
+ auto has_value = false;
+ auto value_string = std::string();
+
+ auto has_formula = false;
+ auto has_shared_formula = false;
+ auto formula_value_string = std::string();
+
+ while (in_element(qn("spreadsheetml", "c")))
+ {
+ auto current_element = expect_start_element(xml::content::mixed);
+
+ if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
+ {
+ has_value = true;
+ value_string = read_text();
+ }
+ else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
+ {
+ has_formula = true;
+
+ if (parser().attribute_present("t"))
+ {
+ has_shared_formula = parser().attribute("t") == "shared";
+ }
+
+ skip_attributes(
+ { "aca", "ref", "dt2D", "dtr", "del1", "del2", "r1", "r2", "ca", "si", "bx" });
+
+ formula_value_string = read_text();
+ }
+ else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
+ {
+ expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
+ value_string = read_text();
+ expect_end_element(qn("spreadsheetml", "t"));
+ }
+ else
+ {
+ unexpected_element(current_element);
+ }
+
+ expect_end_element(current_element);
+ }
+
+ expect_end_element(qn("spreadsheetml", "c"));
+
+ if (has_formula && !has_shared_formula)
+ {
+ cell.formula(formula_value_string);
+ }
+
+ if (has_value)
+ {
+ if (type == "str")
+ {
+ cell.d_->value_text_ = value_string;
+ cell.data_type(cell::type::formula_string);
+ }
+ else if (type == "inlineStr")
+ {
+ cell.d_->value_text_ = value_string;
+ cell.data_type(cell::type::inline_string);
+ }
+ else if (type == "s")
+ {
+ cell.d_->value_numeric_ = std::stold(value_string);
+ cell.data_type(cell::type::shared_string);
+ }
+ else if (type == "b") // boolean
+ {
+ cell.value(is_true(value_string));
+ }
+ else if (type == "n") // numeric
+ {
+ cell.value(std::stold(value_string));
+ }
+ else if (!value_string.empty() && value_string[0] == '#')
+ {
+ cell.error(value_string);
+ }
+ }
+
+ if (has_format)
+ {
+ cell.format(target_.format(format_id));
+ }
+ }
+
+ expect_end_element(qn("spreadsheetml", "row"));
+ }
+
+ expect_end_element(qn("spreadsheetml", "sheetData"));
+}
+
+worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
+{
+ auto &manifest = target_.manifest();
+
+ const auto workbook_rel = manifest.relationship(path("/"), relationship_type::office_document);
+ const auto sheet_rel = manifest.relationship(workbook_rel.target().path(), rel_id);
+ path sheet_path(sheet_rel.source().path().parent().append(sheet_rel.target().path()));
+ auto hyperlinks = manifest.relationships(sheet_path, xlnt::relationship_type::hyperlink);
+
+ auto ws = worksheet(current_worksheet_);
+
+ while (in_element(qn("spreadsheetml", "worksheet")))
+ {
+ auto current_worksheet_element = expect_start_element(xml::content::complex);
+
+ if (current_worksheet_element == qn("spreadsheetml", "sheetCalcPr")) // CT_SheetCalcPr 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "sheetProtection")) // CT_SheetProtection 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "protectedRanges")) // CT_ProtectedRanges 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "scenarios")) // CT_Scenarios 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "autoFilter")) // CT_AutoFilter 0-1
+ {
+ ws.auto_filter(xlnt::range_reference(parser().attribute("ref")));
+ // auto filter complex
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "sortState")) // CT_SortState 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "dataConsolidate")) // CT_DataConsolidate 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "customSheetViews")) // CT_CustomSheetViews 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "mergeCells")) // CT_MergeCells 0-1
+ {
+ auto count = std::stoull(parser().attribute("count"));
+
+ while (in_element(qn("spreadsheetml", "mergeCells")))
+ {
+ expect_start_element(qn("spreadsheetml", "mergeCell"), xml::content::simple);
+ ws.merge_cells(range_reference(parser().attribute("ref")));
+ expect_end_element(qn("spreadsheetml", "mergeCell"));
+
+ count--;
+ }
+
+ if (count != 0)
+ {
+ throw invalid_file("sizes don't match");
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "phoneticPr")) // CT_PhoneticPr 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "conditionalFormatting")) // CT_ConditionalFormatting 0+
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "dataValidations")) // CT_DataValidations 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "hyperlinks")) // CT_Hyperlinks 0-1
+ {
+ while (in_element(qn("spreadsheetml", "hyperlinks")))
+ {
+ expect_start_element(qn("spreadsheetml", "hyperlink"), xml::content::simple);
+
+ auto cell = ws.cell(parser().attribute("ref"));
+
+ if (parser().attribute_present(qn("r", "id")))
+ {
+ auto hyperlink_rel_id = parser().attribute(qn("r", "id"));
+ auto hyperlink_rel = std::find_if(hyperlinks.begin(), hyperlinks.end(),
+ [&](const relationship &r) { return r.id() == hyperlink_rel_id; });
+
+ if (hyperlink_rel != hyperlinks.end())
+ {
+ cell.hyperlink(hyperlink_rel->target().path().string());
+ }
+ }
+
+ skip_attributes({ "location", "tooltip", "display" });
+ expect_end_element(qn("spreadsheetml", "hyperlink"));
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "printOptions")) // CT_PrintOptions 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "pageMargins")) // CT_PageMargins 0-1
+ {
+ page_margins margins;
+
+ margins.top(parser().attribute("top"));
+ margins.bottom(parser().attribute("bottom"));
+ margins.left(parser().attribute("left"));
+ margins.right(parser().attribute("right"));
+ margins.header(parser().attribute("header"));
+ margins.footer(parser().attribute("footer"));
+
+ ws.page_margins(margins);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "pageSetup")) // CT_PageSetup 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "headerFooter")) // CT_HeaderFooter 0-1
+ {
+ header_footer hf;
+
+ hf.align_with_margins(
+ !parser().attribute_present("alignWithMargins") || is_true(parser().attribute("alignWithMargins")));
+ hf.scale_with_doc(
+ !parser().attribute_present("alignWithMargins") || is_true(parser().attribute("alignWithMargins")));
+ auto different_odd_even =
+ parser().attribute_present("differentOddEven") && is_true(parser().attribute("differentOddEven"));
+ auto different_first =
+ parser().attribute_present("differentFirst") && is_true(parser().attribute("differentFirst"));
+
+ optional, 3>> odd_header;
+ optional, 3>> odd_footer;
+ optional, 3>> even_header;
+ optional, 3>> even_footer;
+ optional, 3>> first_header;
+ optional, 3>> first_footer;
+
+ using xlnt::detail::decode_header_footer;
+
+ while (in_element(current_worksheet_element))
+ {
+ auto current_hf_element = expect_start_element(xml::content::simple);
+
+ if (current_hf_element == qn("spreadsheetml", "oddHeader"))
+ {
+ odd_header = decode_header_footer(read_text());
+ }
+ else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
+ {
+ odd_footer = decode_header_footer(read_text());
+ }
+ else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
+ {
+ even_header = decode_header_footer(read_text());
+ }
+ else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
+ {
+ even_footer = decode_header_footer(read_text());
+ }
+ else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
+ {
+ first_header = decode_header_footer(read_text());
+ }
+ else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
+ {
+ first_footer = decode_header_footer(read_text());
+ }
+ else
+ {
+ unexpected_element(current_hf_element);
+ }
+
+ expect_end_element(current_hf_element);
+ }
+
+ for (std::size_t i = 0; i < 3; ++i)
+ {
+ auto loc = i == 0 ? header_footer::location::left
+ : i == 1 ? header_footer::location::center : header_footer::location::right;
+
+ if (different_odd_even)
+ {
+ if (odd_header.is_set() && odd_header.get().at(i).is_set() && even_header.is_set()
+ && even_header.get().at(i).is_set())
+ {
+ hf.odd_even_header(loc, odd_header.get().at(i).get(), even_header.get().at(i).get());
+ }
+
+ if (odd_footer.is_set() && odd_footer.get().at(i).is_set() && even_footer.is_set()
+ && even_footer.get().at(i).is_set())
+ {
+ hf.odd_even_footer(loc, odd_footer.get().at(i).get(), even_footer.get().at(i).get());
+ }
+ }
+ else
+ {
+ if (odd_header.is_set() && odd_header.get().at(i).is_set())
+ {
+ hf.header(loc, odd_header.get().at(i).get());
+ }
+
+ if (odd_footer.is_set() && odd_footer.get().at(i).is_set())
+ {
+ hf.footer(loc, odd_footer.get().at(i).get());
+ }
+ }
+
+ if (different_first)
+ {
+ }
+ }
+
+ ws.header_footer(hf);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "rowBreaks")) // CT_PageBreak 0-1
+ {
+ auto count = parser().attribute_present("count") ? parser().attribute("count") : 0;
+ auto manual_break_count = parser().attribute_present("manualBreakCount")
+ ? parser().attribute("manualBreakCount") : 0;
+
+ while (in_element(qn("spreadsheetml", "rowBreaks")))
+ {
+ expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
+
+ if (parser().attribute_present("id"))
+ {
+ ws.page_break_at_row(parser().attribute("id"));
+ --count;
+ }
+
+ if (parser().attribute_present("man") && is_true(parser().attribute("man")))
+ {
+ --manual_break_count;
+ }
+
+ skip_attributes({ "min", "max", "pt" });
+ expect_end_element(qn("spreadsheetml", "brk"));
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "colBreaks")) // CT_PageBreak 0-1
+ {
+ auto count = parser().attribute_present("count") ? parser().attribute("count") : 0;
+ auto manual_break_count = parser().attribute_present("manualBreakCount")
+ ? parser().attribute("manualBreakCount")
+ : 0;
+
+ while (in_element(qn("spreadsheetml", "colBreaks")))
+ {
+ expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
+
+ if (parser().attribute_present("id"))
+ {
+ ws.page_break_at_column(parser().attribute("id"));
+ --count;
+ }
+
+ if (parser().attribute_present("man") && is_true(parser().attribute("man")))
+ {
+ --manual_break_count;
+ }
+
+ skip_attributes({ "min", "max", "pt" });
+ expect_end_element(qn("spreadsheetml", "brk"));
+ }
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "customProperties")) // CT_CustomProperties 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "cellWatches")) // CT_CellWatches 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "ignoredErrors")) // CT_IgnoredErrors 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "smartTags")) // CT_SmartTags 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "drawing")) // CT_Drawing 0-1
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "legacyDrawing"))
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else if (current_worksheet_element == qn("spreadsheetml", "extLst"))
+ {
+ skip_remaining_content(current_worksheet_element);
+ }
+ else
+ {
+ unexpected_element(current_worksheet_element);
+ }
+
+ expect_end_element(current_worksheet_element);
+ }
+
+ expect_end_element(qn("spreadsheetml", "worksheet"));
+
+ if (manifest.has_relationship(sheet_path, xlnt::relationship_type::comments))
+ {
+ auto comments_part = manifest.canonicalize({ workbook_rel, sheet_rel,
+ manifest.relationship(sheet_path, xlnt::relationship_type::comments) });
+
+ auto receive = xml::parser::receive_default;
+ auto comments_part_streambuf = archive_->open(comments_part);
+ std::istream comments_part_stream(comments_part_streambuf.get());
+ xml::parser parser(comments_part_stream, comments_part.string(), receive);
+ parser_ = &parser;
+
+ read_comments(ws);
+
+ if (manifest.has_relationship(sheet_path, xlnt::relationship_type::vml_drawing))
+ {
+ auto vml_drawings_part = manifest.canonicalize({ workbook_rel, sheet_rel,
+ manifest.relationship(sheet_path, xlnt::relationship_type::vml_drawing) });
+
+ auto vml_drawings_part_streambuf = archive_->open(comments_part);
+ std::istream vml_drawings_part_stream(comments_part_streambuf.get());
+ xml::parser vml_parser(vml_drawings_part_stream, vml_drawings_part.string(), receive);
+ parser_ = &vml_parser;
+
+ read_vml_drawings(ws);
+ }
+ }
+
+ return ws;
}
xml::parser &xlsx_consumer::parser()
@@ -143,6 +1032,12 @@ xml::parser &xlsx_consumer::parser()
return *parser_;
}
+bool xlsx_consumer::has_cell()
+{
+ return in_element(qn("spreadsheetml", "row"))
+ || in_element(qn("spreadsheetml", "sheetData"));
+}
+
std::vector xlsx_consumer::read_relationships(const path &part)
{
const auto part_rels_path = part.parent().append("_rels")
@@ -167,7 +1062,7 @@ std::vector xlsx_consumer::read_relationships(const path &part)
: xlnt::target_mode::internal;
auto target = xlnt::uri(parser.attribute("Target"));
- if (target.path().is_absolute() && target_mode == target_mode::internal)
+ if (target.path().is_absolute() && target_mode == xlnt::target_mode::internal)
{
target = uri(target.path().relative_to(path(part.string()).resolve(path("/"))).string());
}
@@ -321,8 +1216,10 @@ void xlsx_consumer::read_part(const std::vector &rel_chain)
parser_ = nullptr;
}
-void xlsx_consumer::populate_workbook()
+void xlsx_consumer::populate_workbook(bool streaming)
{
+ streaming_ = streaming;
+
target_.clear();
read_content_types();
@@ -535,10 +1432,26 @@ void xlsx_consumer::read_office_document(const std::string &content_type) // CT_
"showHorizontalScroll", "showSheetTabs", "showVerticalScroll"});
workbook_view view;
- view.x_window = parser().attribute("xWindow");
- view.y_window = parser().attribute("yWindow");
- view.window_width = parser().attribute("windowWidth");
- view.window_height = parser().attribute("windowHeight");
+
+ if (parser().attribute_present("xWindow"))
+ {
+ view.x_window = parser().attribute("xWindow");
+ }
+
+ if (parser().attribute_present("yWindow"))
+ {
+ view.y_window = parser().attribute("yWindow");
+ }
+
+ if (parser().attribute_present("windowWidth"))
+ {
+ view.window_width = parser().attribute("windowWidth");
+ }
+
+ if (parser().attribute_present("windowHeight"))
+ {
+ view.window_height = parser().attribute("windowHeight");
+ }
if (parser().attribute_present("tabRatio"))
{
@@ -640,17 +1553,28 @@ void xlsx_consumer::read_office_document(const std::string &content_type) // CT_
if (manifest().has_relationship(workbook_path, relationship_type::shared_string_table))
{
- read_part({workbook_rel, manifest().relationship(workbook_path, relationship_type::shared_string_table)});
+ read_part({workbook_rel,
+ manifest().relationship(workbook_path,
+ relationship_type::shared_string_table)});
}
if (manifest().has_relationship(workbook_path, relationship_type::stylesheet))
{
- read_part({workbook_rel, manifest().relationship(workbook_path, relationship_type::stylesheet)});
+ read_part({workbook_rel,
+ manifest().relationship(workbook_path,
+ relationship_type::stylesheet)});
}
if (manifest().has_relationship(workbook_path, relationship_type::theme))
{
- read_part({workbook_rel, manifest().relationship(workbook_path, relationship_type::theme)});
+ read_part({workbook_rel,
+ manifest().relationship(workbook_path,
+ relationship_type::theme)});
+ }
+
+ if (streaming_)
+ {
+ return;
}
for (auto worksheet_rel : manifest().relationships(workbook_path, relationship_type::worksheet))
@@ -1364,15 +2288,19 @@ void xlsx_consumer::read_stylesheet()
void xlsx_consumer::read_theme()
{
- auto workbook_rel = manifest().relationship(path("/"), relationship_type::office_document);
- auto theme_rel = manifest().relationship(workbook_rel.target().path(), relationship_type::theme);
+ auto workbook_rel = manifest().relationship(path("/"),
+ relationship_type::office_document);
+ auto theme_rel = manifest().relationship(workbook_rel.target().path(),
+ relationship_type::theme);
auto theme_path = manifest().canonicalize({workbook_rel, theme_rel});
target_.theme(theme());
if (manifest().has_relationship(theme_path, relationship_type::image))
{
- read_part({workbook_rel, theme_rel, manifest().relationship(theme_path, relationship_type::image)});
+ read_part({workbook_rel, theme_rel,
+ manifest().relationship(theme_path,
+ relationship_type::image)});
}
}
@@ -1380,690 +2308,6 @@ void xlsx_consumer::read_volatile_dependencies()
{
}
-// CT_Worksheet
-void xlsx_consumer::read_worksheet(const std::string &rel_id)
-{
-/*
- static const auto &xmlns = constants::namespace_("spreadsheetml");
- static const auto &xmlns_mc = constants::namespace_("mc");
- static const auto &xmlns_x14ac = constants::namespace_("x14ac");
- static const auto &xmlns_r = constants::namespace_("r");
-*/
- auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
- target_.d_->sheet_title_rel_id_map_.end(),
- [&](const std::pair &p) {
- return p.second == rel_id;
- })->first;
-
- auto id = sheet_title_id_map_[title];
- auto index = sheet_title_index_map_[title];
-
- auto insertion_iter = target_.d_->worksheets_.begin();
- while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
- {
- ++insertion_iter;
- }
-
- target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
-
- auto ws = target_.sheet_by_id(id);
-
- expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
- skip_attributes({qn("mc", "Ignorable")});
- read_namespaces();
-
- xlnt::range_reference full_range;
- auto &manifest = target_.manifest();
-
- const auto workbook_rel = manifest.relationship(path("/"), relationship_type::office_document);
- const auto sheet_rel = manifest.relationship(workbook_rel.target().path(), rel_id);
- path sheet_path(sheet_rel.source().path().parent().append(sheet_rel.target().path()));
- auto hyperlinks = manifest.relationships(sheet_path, xlnt::relationship_type::hyperlink);
-
- while (in_element(qn("spreadsheetml", "worksheet")))
- {
- auto current_worksheet_element = expect_start_element(xml::content::complex);
-
- if (current_worksheet_element == qn("spreadsheetml", "sheetPr")) // CT_SheetPr 0-1
- {
- while (in_element(current_worksheet_element))
- {
- auto sheet_pr_child_element = expect_start_element(xml::content::simple);
-
- if (sheet_pr_child_element == qn("spreadsheetml", "tabColor")) // CT_Color 0-1
- {
- read_color();
- }
- else if (sheet_pr_child_element == qn("spreadsheetml", "outlinePr")) // CT_OutlinePr 0-1
- {
- skip_attribute("applyStyles"); // optional, boolean, false
- skip_attribute("summaryBelow"); // optional, boolean, true
- skip_attribute("summaryRight"); // optional, boolean, true
- skip_attribute("showOutlineSymbols"); // optional, boolean, true
- }
- else if (sheet_pr_child_element == qn("spreadsheetml", "pageSetUpPr")) // CT_PageSetUpPr 0-1
- {
- skip_attribute("autoPageBreaks"); // optional, boolean, true
- skip_attribute("fitToPage"); // optional, boolean, false
- }
- else
- {
- unexpected_element(sheet_pr_child_element);
- }
-
- expect_end_element(sheet_pr_child_element);
- }
-
- skip_attribute("syncHorizontal"); // optional, boolean, false
- skip_attribute("syncVertical"); // optional, boolean, false
- skip_attribute("syncRef"); // optional, ST_Ref, false
- skip_attribute("transitionEvaluation"); // optional, boolean, false
- skip_attribute("transitionEntry"); // optional, boolean, false
- skip_attribute("published"); // optional, boolean, true
- skip_attribute("codeName"); // optional, string
- skip_attribute("filterMode"); // optional, boolean, false
- skip_attribute("enableFormatConditionsCalculation"); // optional, boolean, true
- }
- else if (current_worksheet_element == qn("spreadsheetml", "dimension")) // CT_SheetDimension 0-1
- {
- full_range = xlnt::range_reference(parser().attribute("ref"));
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sheetViews")) // CT_SheetViews 0-1
- {
- while (in_element(current_worksheet_element))
- {
- expect_start_element(qn("spreadsheetml", "sheetView"), xml::content::complex); // CT_SheetView 1+
-
- sheet_view new_view;
- new_view.id(parser().attribute("workbookViewId"));
-
- if (parser().attribute_present("showGridLines")) // default="true"
- {
- new_view.show_grid_lines(is_true(parser().attribute("showGridLines")));
- }
-
- if (parser().attribute_present("defaultGridColor")) // default="true"
- {
- new_view.default_grid_color(is_true(parser().attribute("defaultGridColor")));
- }
-
- if (parser().attribute_present("view") && parser().attribute("view") != "normal")
- {
- new_view.type(parser().attribute("view") == "pageBreakPreview" ? sheet_view_type::page_break_preview
- : sheet_view_type::page_layout);
- }
-
- skip_attributes({"windowProtection", "showFormulas", "showRowColHeaders", "showZeros", "rightToLeft",
- "tabSelected", "showRuler", "showOutlineSymbols", "showWhiteSpace", "view", "topLeftCell",
- "colorId", "zoomScale", "zoomScaleNormal", "zoomScaleSheetLayoutView", "zoomScalePageLayoutView"});
-
- while (in_element(qn("spreadsheetml", "sheetView")))
- {
- auto sheet_view_child_element = expect_start_element(xml::content::simple);
-
- if (sheet_view_child_element == qn("spreadsheetml", "pane")) // CT_Pane 0-1
- {
- pane new_pane;
-
- if (parser().attribute_present("topLeftCell"))
- {
- new_pane.top_left_cell = cell_reference(parser().attribute("topLeftCell"));
- }
-
- if (parser().attribute_present("xSplit"))
- {
- new_pane.x_split = parser().attribute("xSplit");
- }
-
- if (parser().attribute_present("ySplit"))
- {
- new_pane.y_split = parser().attribute("ySplit");
- }
-
- if (parser().attribute_present("activePane"))
- {
- new_pane.active_pane = parser().attribute("activePane");
- }
-
- if (parser().attribute_present("state"))
- {
- new_pane.state = parser().attribute("state");
- }
-
- new_view.pane(new_pane);
- }
- else if (sheet_view_child_element == qn("spreadsheetml", "selection")) // CT_Selection 0-4
- {
- skip_remaining_content(sheet_view_child_element);
- }
- else if (sheet_view_child_element == qn("spreadsheetml", "pivotSelection")) // CT_PivotSelection 0-4
- {
- skip_remaining_content(sheet_view_child_element);
- }
- else if (sheet_view_child_element == qn("spreadsheetml", "extLst")) // CT_ExtensionList 0-1
- {
- skip_remaining_content(sheet_view_child_element);
- }
- else
- {
- unexpected_element(sheet_view_child_element);
- }
-
- expect_end_element(sheet_view_child_element);
- }
-
- expect_end_element(qn("spreadsheetml", "sheetView"));
-
- ws.d_->views_.push_back(new_view);
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sheetFormatPr")) // CT_SheetFormatPr 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "cols")) // CT_Cols 0+
- {
- while (in_element(qn("spreadsheetml", "cols")))
- {
- expect_start_element(qn("spreadsheetml", "col"), xml::content::simple);
-
- skip_attributes({"bestFit", "collapsed", "outlineLevel"});
-
- auto min = static_cast(std::stoull(parser().attribute("min")));
- auto max = static_cast(std::stoull(parser().attribute("max")));
-
- optional width;
-
- if (parser().attribute_present("width"))
- {
- width = parser().attribute("width");
- }
-
- optional column_style;
-
- if (parser().attribute_present("style"))
- {
- column_style = parser().attribute("style");
- }
-
- auto custom =
- parser().attribute_present("customWidth") ? is_true(parser().attribute("customWidth")) : false;
- auto hidden = parser().attribute_present("hidden") ? is_true(parser().attribute("hidden")) : false;
-
- expect_end_element(qn("spreadsheetml", "col"));
-
- for (auto column = min; column <= max; column++)
- {
- column_properties props;
-
- if (width.is_set())
- {
- props.width = width.get();
- }
-
- if (column_style.is_set())
- {
- props.style = column_style.get();
- }
-
- props.hidden = hidden;
- props.custom_width = custom;
- ws.add_column_properties(column, props);
- }
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sheetData")) // CT_SheetData 1
- {
- while (in_element(qn("spreadsheetml", "sheetData")))
- {
- expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
- auto row_index = parser().attribute("r");
-
- if (parser().attribute_present("ht"))
- {
- ws.row_properties(row_index).height = parser().attribute("ht");
- }
-
- if (parser().attribute_present("customHeight"))
- {
- ws.row_properties(row_index).custom_height = is_true(parser().attribute("customHeight"));
- }
-
- if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
- {
- ws.row_properties(row_index).hidden = true;
- }
-
- skip_attributes({qn("x14ac", "dyDescent")});
- skip_attributes({"customFormat", "s", "customFont",
- "outlineLevel", "collapsed", "thickTop", "thickBot",
- "ph", "spans"});
-
- while (in_element(qn("spreadsheetml", "row")))
- {
- expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
- auto cell = ws.cell(cell_reference(parser().attribute("r")));
-
- auto has_type = parser().attribute_present("t");
- auto type = has_type ? parser().attribute("t") : "n";
-
- auto has_format = parser().attribute_present("s");
- auto format_id = static_cast(has_format ? std::stoull(parser().attribute("s")) : 0LL);
-
- auto has_value = false;
- auto value_string = std::string();
-
- auto has_formula = false;
- auto has_shared_formula = false;
- auto formula_value_string = std::string();
-
- while (in_element(qn("spreadsheetml", "c")))
- {
- auto current_element = expect_start_element(xml::content::mixed);
-
- if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
- {
- has_value = true;
- value_string = read_text();
- }
- else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
- {
- has_formula = true;
-
- if (parser().attribute_present("t"))
- {
- has_shared_formula = parser().attribute("t") == "shared";
- }
-
- skip_attributes(
- {"aca", "ref", "dt2D", "dtr", "del1", "del2", "r1", "r2", "ca", "si", "bx"});
-
- formula_value_string = read_text();
- }
- else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
- {
- expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
- value_string = read_text();
- expect_end_element(qn("spreadsheetml", "t"));
- }
- else
- {
- unexpected_element(current_element);
- }
-
- expect_end_element(current_element);
- }
-
- expect_end_element(qn("spreadsheetml", "c"));
-
- if (has_formula && !has_shared_formula)
- {
- cell.formula(formula_value_string);
- }
-
- if (has_value)
- {
- if (type == "str")
- {
- cell.d_->value_text_ = value_string;
- cell.data_type(cell::type::formula_string);
- }
- else if (type == "inlineStr")
- {
- cell.d_->value_text_ = value_string;
- cell.data_type(cell::type::inline_string);
- }
- else if (type == "s")
- {
- cell.d_->value_numeric_ = std::stold(value_string);
- cell.data_type(cell::type::shared_string);
- }
- else if (type == "b") // boolean
- {
- cell.value(is_true(value_string));
- }
- else if (type == "n") // numeric
- {
- cell.value(std::stold(value_string));
- }
- else if (!value_string.empty() && value_string[0] == '#')
- {
- cell.error(value_string);
- }
- }
-
- if (has_format)
- {
- cell.format(target_.format(format_id));
- }
- }
-
- expect_end_element(qn("spreadsheetml", "row"));
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sheetCalcPr")) // CT_SheetCalcPr 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sheetProtection")) // CT_SheetProtection 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "protectedRanges")) // CT_ProtectedRanges 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "scenarios")) // CT_Scenarios 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "autoFilter")) // CT_AutoFilter 0-1
- {
- ws.auto_filter(xlnt::range_reference(parser().attribute("ref")));
- // auto filter complex
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "sortState")) // CT_SortState 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "dataConsolidate")) // CT_DataConsolidate 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "customSheetViews")) // CT_CustomSheetViews 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "mergeCells")) // CT_MergeCells 0-1
- {
- auto count = std::stoull(parser().attribute("count"));
-
- while (in_element(qn("spreadsheetml", "mergeCells")))
- {
- expect_start_element(qn("spreadsheetml", "mergeCell"), xml::content::simple);
- ws.merge_cells(range_reference(parser().attribute("ref")));
- expect_end_element(qn("spreadsheetml", "mergeCell"));
-
- count--;
- }
-
- if (count != 0)
- {
- throw invalid_file("sizes don't match");
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "phoneticPr")) // CT_PhoneticPr 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "conditionalFormatting")) // CT_ConditionalFormatting 0+
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "dataValidations")) // CT_DataValidations 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "hyperlinks")) // CT_Hyperlinks 0-1
- {
- while (in_element(qn("spreadsheetml", "hyperlinks")))
- {
- expect_start_element(qn("spreadsheetml", "hyperlink"), xml::content::simple);
-
- auto cell = ws.cell(parser().attribute("ref"));
-
- if (parser().attribute_present(qn("r", "id")))
- {
- auto hyperlink_rel_id = parser().attribute(qn("r", "id"));
- auto hyperlink_rel = std::find_if(hyperlinks.begin(), hyperlinks.end(),
- [&](const relationship &r) { return r.id() == hyperlink_rel_id; });
-
- if (hyperlink_rel != hyperlinks.end())
- {
- cell.hyperlink(hyperlink_rel->target().path().string());
- }
- }
-
- skip_attributes({"location", "tooltip", "display"});
- expect_end_element(qn("spreadsheetml", "hyperlink"));
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "printOptions")) // CT_PrintOptions 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "pageMargins")) // CT_PageMargins 0-1
- {
- page_margins margins;
-
- margins.top(parser().attribute("top"));
- margins.bottom(parser().attribute("bottom"));
- margins.left(parser().attribute("left"));
- margins.right(parser().attribute("right"));
- margins.header(parser().attribute("header"));
- margins.footer(parser().attribute("footer"));
-
- ws.page_margins(margins);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "pageSetup")) // CT_PageSetup 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "headerFooter")) // CT_HeaderFooter 0-1
- {
- header_footer hf;
-
- hf.align_with_margins(
- !parser().attribute_present("alignWithMargins") || is_true(parser().attribute("alignWithMargins")));
- hf.scale_with_doc(
- !parser().attribute_present("alignWithMargins") || is_true(parser().attribute("alignWithMargins")));
- auto different_odd_even =
- parser().attribute_present("differentOddEven") && is_true(parser().attribute("differentOddEven"));
- auto different_first =
- parser().attribute_present("differentFirst") && is_true(parser().attribute("differentFirst"));
-
- optional, 3>> odd_header;
- optional, 3>> odd_footer;
- optional, 3>> even_header;
- optional, 3>> even_footer;
- optional, 3>> first_header;
- optional, 3>> first_footer;
-
- using xlnt::detail::decode_header_footer;
-
- while (in_element(current_worksheet_element))
- {
- auto current_hf_element = expect_start_element(xml::content::simple);
-
- if (current_hf_element == qn("spreadsheetml", "oddHeader"))
- {
- odd_header = decode_header_footer(read_text());
- }
- else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
- {
- odd_footer = decode_header_footer(read_text());
- }
- else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
- {
- even_header = decode_header_footer(read_text());
- }
- else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
- {
- even_footer = decode_header_footer(read_text());
- }
- else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
- {
- first_header = decode_header_footer(read_text());
- }
- else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
- {
- first_footer = decode_header_footer(read_text());
- }
- else
- {
- unexpected_element(current_hf_element);
- }
-
- expect_end_element(current_hf_element);
- }
-
- for (std::size_t i = 0; i < 3; ++i)
- {
- auto loc = i == 0 ? header_footer::location::left
- : i == 1 ? header_footer::location::center : header_footer::location::right;
-
- if (different_odd_even)
- {
- if (odd_header.is_set() && odd_header.get().at(i).is_set() && even_header.is_set()
- && even_header.get().at(i).is_set())
- {
- hf.odd_even_header(loc, odd_header.get().at(i).get(), even_header.get().at(i).get());
- }
-
- if (odd_footer.is_set() && odd_footer.get().at(i).is_set() && even_footer.is_set()
- && even_footer.get().at(i).is_set())
- {
- hf.odd_even_footer(loc, odd_footer.get().at(i).get(), even_footer.get().at(i).get());
- }
- }
- else
- {
- if (odd_header.is_set() && odd_header.get().at(i).is_set())
- {
- hf.header(loc, odd_header.get().at(i).get());
- }
-
- if (odd_footer.is_set() && odd_footer.get().at(i).is_set())
- {
- hf.footer(loc, odd_footer.get().at(i).get());
- }
- }
-
- if (different_first)
- {
- }
- }
-
- ws.header_footer(hf);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "rowBreaks")) // CT_PageBreak 0-1
- {
- auto count = parser().attribute_present("count") ? parser().attribute("count") : 0;
- auto manual_break_count = parser().attribute_present("manualBreakCount")
- ? parser().attribute("manualBreakCount") : 0;
-
- while (in_element(qn("spreadsheetml", "rowBreaks")))
- {
- expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
-
- if (parser().attribute_present("id"))
- {
- ws.page_break_at_row(parser().attribute("id"));
- --count;
- }
-
- if (parser().attribute_present("man") && is_true(parser().attribute("man")))
- {
- --manual_break_count;
- }
-
- skip_attributes({"min", "max", "pt"});
- expect_end_element(qn("spreadsheetml", "brk"));
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "colBreaks")) // CT_PageBreak 0-1
- {
- auto count = parser().attribute_present("count") ? parser().attribute("count") : 0;
- auto manual_break_count = parser().attribute_present("manualBreakCount")
- ? parser().attribute("manualBreakCount")
- : 0;
-
- while (in_element(qn("spreadsheetml", "colBreaks")))
- {
- expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
-
- if (parser().attribute_present("id"))
- {
- ws.page_break_at_column(parser().attribute("id"));
- --count;
- }
-
- if (parser().attribute_present("man") && is_true(parser().attribute("man")))
- {
- --manual_break_count;
- }
-
- skip_attributes({"min", "max", "pt"});
- expect_end_element(qn("spreadsheetml", "brk"));
- }
- }
- else if (current_worksheet_element == qn("spreadsheetml", "customProperties")) // CT_CustomProperties 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "cellWatches")) // CT_CellWatches 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "ignoredErrors")) // CT_IgnoredErrors 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "smartTags")) // CT_SmartTags 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "drawing")) // CT_Drawing 0-1
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "legacyDrawing"))
- {
- skip_remaining_content(current_worksheet_element);
- }
- else if (current_worksheet_element == qn("spreadsheetml", "extLst"))
- {
- skip_remaining_content(current_worksheet_element);
- }
- else
- {
- unexpected_element(current_worksheet_element);
- }
-
- expect_end_element(current_worksheet_element);
- }
-
- expect_end_element(qn("spreadsheetml", "worksheet"));
-
- if (manifest.has_relationship(sheet_path, xlnt::relationship_type::comments))
- {
- auto comments_part = manifest.canonicalize(
- {workbook_rel, sheet_rel, manifest.relationship(sheet_path, xlnt::relationship_type::comments)});
-
- auto receive = xml::parser::receive_default;
- auto comments_part_streambuf = archive_->open(comments_part);
- std::istream comments_part_stream(comments_part_streambuf.get());
- xml::parser parser(comments_part_stream, comments_part.string(), receive);
- parser_ = &parser;
-
- read_comments(ws);
-
- if (manifest.has_relationship(sheet_path, xlnt::relationship_type::vml_drawing))
- {
- auto vml_drawings_part = manifest.canonicalize(
- {workbook_rel, sheet_rel, manifest.relationship(sheet_path, xlnt::relationship_type::vml_drawing)});
-
- auto vml_drawings_part_streambuf = archive_->open(comments_part);
- std::istream vml_drawings_part_stream(comments_part_streambuf.get());
- xml::parser vml_parser(vml_drawings_part_stream, vml_drawings_part.string(), receive);
- parser_ = &vml_parser;
-
- read_vml_drawings(ws);
- }
- }
-}
-
// Sheet Relationship Target Parts
void xlsx_consumer::read_vml_drawings(worksheet /*ws*/)
diff --git a/source/detail/serialization/xlsx_consumer.hpp b/source/detail/serialization/xlsx_consumer.hpp
index f8d65344..76475e17 100644
--- a/source/detail/serialization/xlsx_consumer.hpp
+++ b/source/detail/serialization/xlsx_consumer.hpp
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
@@ -36,11 +37,15 @@
namespace xlnt {
+class cell;
class color;
class rich_text;
class manifest;
+template
+class optional;
class path;
class relationship;
+class streaming_workbook_reader;
class variant;
class workbook;
class worksheet;
@@ -48,6 +53,8 @@ class worksheet;
namespace detail {
class izstream;
+struct cell_impl;
+struct worksheet_impl;
///
/// Handles writing a workbook into an XLSX file.
@@ -57,16 +64,31 @@ class xlsx_consumer
public:
xlsx_consumer(workbook &destination);
+ ~xlsx_consumer();
+
void read(std::istream &source);
void read(std::istream &source, const std::string &password);
private:
+ friend class xlnt::streaming_workbook_reader;
+
+ void open(std::istream &source);
+
+ bool has_cell();
+
+ ///
+ /// Reads the next cell in the current worksheet and optionally returns it if
+ /// the last cell in the sheet has not yet been read. An exception will be thrown
+ /// if this is not open as a streaming consumer.
+ ///
+ cell read_cell();
+
///
/// Read all the files needed from the XLSX archive and initialize all of
/// the data in the workbook to match.
///
- void populate_workbook();
+ void populate_workbook(bool streaming);
///
///
@@ -106,27 +128,27 @@ private:
void read_calculation_chain();
///
- ///
+ ///
///
void read_connections();
///
- ///
+ ///
///
void read_custom_property();
///
- ///
+ ///
///
void read_custom_xml_mappings();
///
- ///
+ ///
///
void read_external_workbook_references();
///
- ///
+ ///
///
void read_pivot_table();
@@ -136,17 +158,17 @@ private:
void read_shared_string_table();
///
- ///
+ ///
///
void read_shared_workbook_revision_headers();
///
- ///
+ ///
///
void read_shared_workbook();
///
- ///
+ ///
///
void read_shared_workbook_user_data();
@@ -161,56 +183,71 @@ private:
void read_theme();
///
- ///
+ ///
///
void read_volatile_dependencies();
///
/// xl/sheets/*.xml
///
- void read_chartsheet(const std::string &title);
+ void read_chartsheet(const std::string &rel_id);
///
/// xl/sheets/*.xml
///
- void read_dialogsheet(const std::string &title);
+ void read_dialogsheet(const std::string &rel_id);
///
/// xl/sheets/*.xml
///
- void read_worksheet(const std::string &title);
+ void read_worksheet(const std::string &rel_id);
+
+ ///
+ /// xl/sheets/*.xml
+ ///
+ std::string read_worksheet_begin(const std::string &rel_id);
+
+ ///
+ /// xl/sheets/*.xml
+ ///
+ void read_worksheet_sheetdata();
+
+ ///
+ /// xl/sheets/*.xml
+ ///
+ worksheet read_worksheet_end(const std::string &rel_id);
// Sheet Relationship Target Parts
///
- ///
+ ///
///
void read_comments(worksheet ws);
-
+
///
- ///
+ ///
///
void read_vml_drawings(worksheet ws);
///
- ///
+ ///
///
void read_drawings();
// Unknown Parts
///
- ///
+ ///
///
void read_unknown_parts();
///
- ///
+ ///
///
void read_unknown_relationships();
///
- ///
+ ///
///
void read_image(const path &part);
@@ -362,14 +399,22 @@ private:
///
/// This pointer is generally set by instantiating an xml::parser in a function
- /// scope and then calling a read_*() method which uses xlsx_consumer::parser()
+ /// scope and then calling a read_*() method which uses xlsx_consumer::parser()
/// to access the object.
///
xml::parser *parser_;
-
+
std::vector stack_;
bool preserve_space_ = false;
+
+ bool streaming_ = false;
+
+ std::unique_ptr streaming_cell_;
+
+ detail::cell_impl *current_cell_;
+
+ detail::worksheet_impl *current_worksheet_;
};
} // namespace detail
diff --git a/source/detail/serialization/xlsx_producer.cpp b/source/detail/serialization/xlsx_producer.cpp
index 8ab586aa..1c1c1626 100644
--- a/source/detail/serialization/xlsx_producer.cpp
+++ b/source/detail/serialization/xlsx_producer.cpp
@@ -36,13 +36,12 @@
#include
#include
#include
+#include
#include
#include
#include
#include
-using namespace std::string_literals;
-
namespace {
///
@@ -169,7 +168,7 @@ void xlsx_producer::write_content_types()
const auto content_types_path = path("[Content_Types].xml");
begin_part(content_types_path);
- const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types"s;
+ const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types";
write_start_element(xmlns, "Types");
write_namespace(xmlns, "");
@@ -288,7 +287,7 @@ void xlsx_producer::write_property(const std::string &name, const variant &value
write_start_element(constants::ns("vt"), "vector");
auto vector = value.get>();
- std::unordered_set types;
+ std::unordered_set> types;
for (const auto &element : vector)
{
@@ -2477,9 +2476,9 @@ void xlsx_producer::write_worksheet(const relationship &rel)
auto first_header = std::string();
auto first_footer = std::string();
- const auto locations =
+ const auto locations =
{
- header_footer::location::left,
+ header_footer::location::left,
header_footer::location::center,
header_footer::location::right
};
@@ -2715,7 +2714,7 @@ void xlsx_producer::write_comments(const relationship & /*rel*/, worksheet ws, c
if (run.second.is_set())
{
write_start_element(xmlns, "rPr");
-
+
if (run.second.get().bold())
{
write_start_element(xmlns, "b");
diff --git a/source/detail/serialization/zstream.cpp b/source/detail/serialization/zstream.cpp
index b9aab711..737e0300 100644
--- a/source/detail/serialization/zstream.cpp
+++ b/source/detail/serialization/zstream.cpp
@@ -482,7 +482,9 @@ std::unique_ptr ozstream::open(const path &filename)
zheader header;
header.filename = filename.string();
file_headers_.push_back(header);
- return std::make_unique(&file_headers_.back(), destination_stream_);
+ auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
+
+ return std::unique_ptr(buffer);
}
izstream::izstream(std::istream &stream)
@@ -526,7 +528,7 @@ bool izstream::read_central_header()
}
source_stream_.read(reinterpret_cast(buf.data()), read_start);
-
+
if (buf[0] == 0xd0 && buf[1] == 0xcf && buf[2] == 0x11 && buf[3] == 0xe0
&& buf[4] == 0xa1 && buf[5] == 0xb1 && buf[6] == 0x1a && buf[7] == 0xe1)
{
@@ -595,7 +597,9 @@ std::unique_ptr izstream::open(const path &filename) const
auto header = file_headers_.at(filename.string());
source_stream_.seekg(header.header_offset);
- return std::make_unique(source_stream_, header);
+ auto buffer = new zip_streambuf_decompress(source_stream_, header);
+
+ return std::unique_ptr(buffer);
}
std::string izstream::read(const path &filename) const
diff --git a/source/detail/unicode.cpp b/source/detail/unicode.cpp
index e0e01d6b..722b6800 100644
--- a/source/detail/unicode.cpp
+++ b/source/detail/unicode.cpp
@@ -21,15 +21,21 @@
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
-#include
#include
#include
#include
+#ifdef UTFCPP
+#include
+#else
+#include
+#endif
+
namespace xlnt {
namespace detail {
+#ifndef UTFCPP
#ifdef _MSC_VER
std::u16string utf8_to_utf16(const std::string &utf8_string)
{
@@ -63,6 +69,23 @@ std::string utf16_to_utf8(const std::u16string &utf16_string)
char16_t>{}.to_bytes(utf16_string);
}
#endif
+#else
+std::u16string utf8_to_utf16(const std::string &utf8_string)
+{
+ std::u16string result;
+ utf8::utf8to16(utf8_string.begin(), utf8_string.end(), std::back_inserter(result));
+
+ return result;
+}
+
+std::string utf16_to_utf8(const std::u16string &utf16_string)
+{
+ std::string result;
+ utf8::utf16to8(utf16_string.begin(), utf16_string.end(), std::back_inserter(result));
+
+ return result;
+}
+#endif
std::string latin1_to_utf8(const std::string &latin1)
{
diff --git a/source/utils/path.cpp b/source/utils/path.cpp
index 01da6803..2d22888e 100644
--- a/source/utils/path.cpp
+++ b/source/utils/path.cpp
@@ -21,7 +21,6 @@
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
-#include
#include
#include
#include
@@ -32,6 +31,8 @@
#include
#include
#include
+#elif defined(_MSC_VER)
+#include
#endif
#include
diff --git a/source/workbook/streaming_workbook_reader.cpp b/source/workbook/streaming_workbook_reader.cpp
new file mode 100644
index 00000000..03c9a1ab
--- /dev/null
+++ b/source/workbook/streaming_workbook_reader.cpp
@@ -0,0 +1,185 @@
+// Copyright (c) 2017 Thomas Fussell
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace {
+
+//TODO: (important) this is duplicated from workbook.cpp, find a common place to keep it
+#ifdef _MSC_VER
+void open_stream(std::ifstream &stream, const std::wstring &path)
+{
+ stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ofstream &stream, const std::wstring &path)
+{
+ stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ifstream &stream, const std::string &path)
+{
+ open_stream(stream, xlnt::path(path).wstring());
+}
+
+void open_stream(std::ofstream &stream, const std::string &path)
+{
+ open_stream(stream, xlnt::path(path).wstring());
+}
+#else
+void open_stream(std::ifstream &stream, const std::string &path)
+{
+ stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ofstream &stream, const std::string &path)
+{
+ stream.open(path, std::ios::binary);
+}
+#endif
+
+} // namespace
+
+
+namespace xlnt {
+
+streaming_workbook_reader::streaming_workbook_reader()
+{
+
+}
+
+streaming_workbook_reader::~streaming_workbook_reader()
+{
+ close();
+}
+
+void streaming_workbook_reader::close()
+{
+ if (consumer_)
+ {
+ consumer_.reset(nullptr);
+ stream_buffer_.reset(nullptr);
+ }
+}
+
+bool streaming_workbook_reader::has_cell()
+{
+ return consumer_->has_cell();
+}
+
+cell streaming_workbook_reader::read_cell()
+{
+ return consumer_->read_cell();
+}
+
+bool streaming_workbook_reader::has_worksheet()
+{
+ return !worksheet_queue_.empty();
+}
+
+void streaming_workbook_reader::begin_worksheet()
+{
+ const auto next_worksheet_rel = worksheet_queue_.back();
+ const auto workbook_rel = workbook_->manifest()
+ .relationship(path("/"), relationship_type::office_document);
+ const auto worksheet_rel = workbook_->manifest()
+ .relationship(workbook_rel.target().path(), next_worksheet_rel);
+
+ auto rel_chain = std::vector{ workbook_rel, worksheet_rel };
+
+ const auto &manifest = consumer_->target_.manifest();
+ const auto part_path = manifest.canonicalize(rel_chain);
+ auto part_stream_buffer = consumer_->archive_->open(part_path);
+ part_stream_buffer_.swap(part_stream_buffer);
+ part_stream_.reset(new std::istream(part_stream_buffer_.get()));
+ parser_.reset(new xml::parser(*part_stream_, part_path.string()));
+ consumer_->parser_ = parser_.get();
+
+ consumer_->read_worksheet_begin(next_worksheet_rel);
+}
+
+worksheet streaming_workbook_reader::end_worksheet()
+{
+ auto next_worksheet_rel = worksheet_queue_.back();
+ worksheet_queue_.pop_back();
+ return consumer_->read_worksheet_end(next_worksheet_rel);
+}
+
+void streaming_workbook_reader::open(const std::vector &data)
+{
+ stream_buffer_.reset(new detail::vector_istreambuf(data));
+ stream_.reset(new std::istream(stream_buffer_.get()));
+ open(*stream_);
+}
+
+void streaming_workbook_reader::open(const std::string &filename)
+{
+ stream_.reset(new std::ifstream());
+ open_stream((std::ifstream &)stream_, filename);
+ open(*stream_);
+}
+
+#ifdef _MSC_VER
+void streaming_workbook_reader::open(const std::wstring &filename)
+{
+ stream_.reset(new std::ifstream());
+ open_stream((std::ifstream &)*stream_, filename);
+ open(*stream_);
+}
+#endif
+
+void streaming_workbook_reader::open(const xlnt::path &filename)
+{
+ stream_.reset(new std::ifstream());
+ open_stream((std::ifstream &)*stream_, filename.string());
+ open(*stream_);
+}
+
+void streaming_workbook_reader::open(std::istream &stream)
+{
+ workbook_.reset(new workbook());
+ consumer_.reset(new detail::xlsx_consumer(*workbook_));
+ consumer_->open(stream);
+
+ const auto workbook_rel = workbook_->manifest()
+ .relationship(path("/"), relationship_type::office_document);
+ const auto workbook_path = workbook_rel.target().path();
+
+ for (auto worksheet_rel : workbook_->manifest()
+ .relationships(workbook_path, relationship_type::worksheet))
+ {
+ worksheet_queue_.push_back(worksheet_rel.id());
+ }
+}
+
+} // namespace xlnt
diff --git a/source/workbook/workbook.cpp b/source/workbook/workbook.cpp
index 60c22538..9dcbc06c 100644
--- a/source/workbook/workbook.cpp
+++ b/source/workbook/workbook.cpp
@@ -1508,14 +1508,14 @@ void workbook::garbage_collect_formulae()
void workbook::update_sheet_properties()
{
- if (has_extended_property(extended_property::titles_of_parts))
+ if (has_extended_property(xlnt::extended_property::titles_of_parts))
{
- extended_property(extended_property::titles_of_parts, sheet_titles());
+ extended_property(xlnt::extended_property::titles_of_parts, sheet_titles());
}
- if (has_extended_property(extended_property::heading_pairs))
+ if (has_extended_property(xlnt::extended_property::heading_pairs))
{
- extended_property(extended_property::heading_pairs,
+ extended_property(xlnt::extended_property::heading_pairs,
std::vector{variant("Worksheets"), variant(static_cast(sheet_count()))});
}
}
diff --git a/tests/cell/cell_test_suite.hpp b/tests/cell/cell_test_suite.hpp
index 7b79d1fe..e7611ab5 100644
--- a/tests/cell/cell_test_suite.hpp
+++ b/tests/cell/cell_test_suite.hpp
@@ -619,9 +619,9 @@ private:
xlnt_assert_equals(cell.value(), 3.141592);
auto cell2 = ws.cell("A2");
- cell2.value(std::string(100'000, 'a'));
+ cell2.value(std::string(100000, 'a'));
cell.value(cell2);
- xlnt_assert_equals(cell.value(), std::string(32'767, 'a'));
+ xlnt_assert_equals(cell.value(), std::string(32767, 'a'));
}
void test_reference()
diff --git a/tests/workbook/serialization_test_suite.hpp b/tests/workbook/serialization_test_suite.hpp
index aa2d6def..f1907a72 100644
--- a/tests/workbook/serialization_test_suite.hpp
+++ b/tests/workbook/serialization_test_suite.hpp
@@ -31,6 +31,8 @@
#include
#include
#include
+#include
+#include
#include
class serialization_test_suite : public test_suite
@@ -56,6 +58,8 @@ public:
register_test(test_read_custom_properties);
register_test(test_round_trip_rw);
register_test(test_round_trip_rw_encrypted);
+ register_test(test_streaming_read);
+ //register_test(test_streaming_write);
}
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
@@ -461,4 +465,42 @@ public:
xlnt_assert(round_trip_matches_rw(path, password));
}
}
+
+ void test_streaming_read()
+ {
+ const auto path = path_helper::test_file("4_every_style.xlsx");
+ xlnt::streaming_workbook_reader reader;
+
+ reader.open(xlnt::path(path));
+
+ while (reader.has_worksheet())
+ {
+ reader.begin_worksheet();
+
+ while (reader.has_cell())
+ {
+ const auto cell = reader.read_cell();
+ //std::cout << cell.reference().to_string() << std::endl;
+ }
+
+ const auto ws = reader.end_worksheet();
+ }
+ }
+
+ void test_streaming_write()
+ {
+ const auto path = std::string("stream-out.xlsx");
+ xlnt::streaming_workbook_writer writer;
+
+ writer.open(path);
+
+ writer.add_sheet("stream");
+
+ auto b2 = writer.add_cell("B2");
+ b2.value("B2!");
+
+ auto c3 = writer.add_cell("C3");
+ b2.value("should not change");
+ c3.value("C3!");
+ }
};
diff --git a/third-party/utfcpp/utf8.h b/third-party/utfcpp/utf8.h
new file mode 100644
index 00000000..82b13f59
--- /dev/null
+++ b/third-party/utfcpp/utf8.h
@@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
diff --git a/third-party/utfcpp/utf8/checked.h b/third-party/utfcpp/utf8/checked.h
new file mode 100644
index 00000000..2aef5838
--- /dev/null
+++ b/third-party/utfcpp/utf8/checked.h
@@ -0,0 +1,327 @@
+// Copyright 2006-2016 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include
+
+namespace utf8
+{
+ // Base for the exceptions that may be thrown from the library
+ class exception : public ::std::exception {
+ };
+
+ // Exceptions that may be thrown from the library functions.
+ class invalid_code_point : public exception {
+ uint32_t cp;
+ public:
+ invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
+ virtual const char* what() const throw() { return "Invalid code point"; }
+ uint32_t code_point() const {return cp;}
+ };
+
+ class invalid_utf8 : public exception {
+ uint8_t u8;
+ public:
+ invalid_utf8 (uint8_t u) : u8(u) {}
+ virtual const char* what() const throw() { return "Invalid UTF-8"; }
+ uint8_t utf8_octet() const {return u8;}
+ };
+
+ class invalid_utf16 : public exception {
+ uint16_t u16;
+ public:
+ invalid_utf16 (uint16_t u) : u16(u) {}
+ virtual const char* what() const throw() { return "Invalid UTF-16"; }
+ uint16_t utf16_word() const {return u16;}
+ };
+
+ class not_enough_room : public exception {
+ public:
+ virtual const char* what() const throw() { return "Not enough space"; }
+ };
+
+ /// The library API - functions intended to be called by the users
+
+ template
+ octet_iterator append(uint32_t cp, octet_iterator result)
+ {
+ if (!utf8::internal::is_code_point_valid(cp))
+ throw invalid_code_point(cp);
+
+ if (cp < 0x80) // one octet
+ *(result++) = static_cast(cp);
+ else if (cp < 0x800) { // two octets
+ *(result++) = static_cast((cp >> 6) | 0xc0);
+ *(result++) = static_cast((cp & 0x3f) | 0x80);
+ }
+ else if (cp < 0x10000) { // three octets
+ *(result++) = static_cast((cp >> 12) | 0xe0);
+ *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast((cp & 0x3f) | 0x80);
+ }
+ else { // four octets
+ *(result++) = static_cast((cp >> 18) | 0xf0);
+ *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80);
+ *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast((cp & 0x3f) | 0x80);
+ }
+ return result;
+ }
+
+ template
+ output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+ {
+ while (start != end) {
+ octet_iterator sequence_start = start;
+ internal::utf_error err_code = utf8::internal::validate_next(start, end);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ for (octet_iterator it = sequence_start; it != start; ++it)
+ *out++ = *it;
+ break;
+ case internal::NOT_ENOUGH_ROOM:
+ throw not_enough_room();
+ case internal::INVALID_LEAD:
+ out = utf8::append (replacement, out);
+ ++start;
+ break;
+ case internal::INCOMPLETE_SEQUENCE:
+ case internal::OVERLONG_SEQUENCE:
+ case internal::INVALID_CODE_POINT:
+ out = utf8::append (replacement, out);
+ ++start;
+ // just one replacement mark for the sequence
+ while (start != end && utf8::internal::is_trail(*start))
+ ++start;
+ break;
+ }
+ }
+ return out;
+ }
+
+ template
+ inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+ {
+ static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+ return utf8::replace_invalid(start, end, out, replacement_marker);
+ }
+
+ template
+ uint32_t next(octet_iterator& it, octet_iterator end)
+ {
+ uint32_t cp = 0;
+ internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ break;
+ case internal::NOT_ENOUGH_ROOM :
+ throw not_enough_room();
+ case internal::INVALID_LEAD :
+ case internal::INCOMPLETE_SEQUENCE :
+ case internal::OVERLONG_SEQUENCE :
+ throw invalid_utf8(*it);
+ case internal::INVALID_CODE_POINT :
+ throw invalid_code_point(cp);
+ }
+ return cp;
+ }
+
+ template
+ uint32_t peek_next(octet_iterator it, octet_iterator end)
+ {
+ return utf8::next(it, end);
+ }
+
+ template
+ uint32_t prior(octet_iterator& it, octet_iterator start)
+ {
+ // can't do much if it == start
+ if (it == start)
+ throw not_enough_room();
+
+ octet_iterator end = it;
+ // Go back until we hit either a lead octet or start
+ while (utf8::internal::is_trail(*(--it)))
+ if (it == start)
+ throw invalid_utf8(*it); // error - no lead byte in the sequence
+ return utf8::peek_next(it, end);
+ }
+
+ /// Deprecated in versions that include "prior"
+ template
+ uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+ {
+ octet_iterator end = it;
+ while (utf8::internal::is_trail(*(--it)))
+ if (it == pass_start)
+ throw invalid_utf8(*it); // error - no lead byte in the sequence
+ octet_iterator temp = it;
+ return utf8::next(temp, end);
+ }
+
+ template
+ void advance (octet_iterator& it, distance_type n, octet_iterator end)
+ {
+ for (distance_type i = 0; i < n; ++i)
+ utf8::next(it, end);
+ }
+
+ template
+ typename std::iterator_traits::difference_type
+ distance (octet_iterator first, octet_iterator last)
+ {
+ typename std::iterator_traits::difference_type dist;
+ for (dist = 0; first < last; ++dist)
+ utf8::next(first, last);
+ return dist;
+ }
+
+ template
+ octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+ {
+ while (start != end) {
+ uint32_t cp = utf8::internal::mask16(*start++);
+ // Take care of surrogate pairs first
+ if (utf8::internal::is_lead_surrogate(cp)) {
+ if (start != end) {
+ uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+ if (utf8::internal::is_trail_surrogate(trail_surrogate))
+ cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+ else
+ throw invalid_utf16(static_cast(trail_surrogate));
+ }
+ else
+ throw invalid_utf16(static_cast(cp));
+
+ }
+ // Lone trail surrogate
+ else if (utf8::internal::is_trail_surrogate(cp))
+ throw invalid_utf16(static_cast(cp));
+
+ result = utf8::append(cp, result);
+ }
+ return result;
+ }
+
+ template
+ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+ {
+ while (start < end) {
+ uint32_t cp = utf8::next(start, end);
+ if (cp > 0xffff) { //make a surrogate pair
+ *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET);
+ *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+ }
+ else
+ *result++ = static_cast(cp);
+ }
+ return result;
+ }
+
+ template
+ octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+ {
+ while (start != end)
+ result = utf8::append(*(start++), result);
+
+ return result;
+ }
+
+ template
+ u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+ {
+ while (start < end)
+ (*result++) = utf8::next(start, end);
+
+ return result;
+ }
+
+ // The iterator class
+ template