Merge branch 'feature/arrow' into dev

2024-03-22 13:11:17 +08:00 · 2017-07-03 08:42:31 -07:00 · 2017-07-03 08:42:31 -07:00 · 5e3476f755
commit 5e3476f755
parent eb201f5f70 dbe60a5227
28 changed files with 3171 additions and 743 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,4 +12,5 @@ node_modules/
 *~
 .DS_Store
 __pycache__/
-Win32/
+Win32/
+*.pyd
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,6 +11,7 @@ option(STATIC "Set to ON to build xlnt as a static library instead of a shared l
 option(TESTS "Set to OFF to skip building test executable (in ./tests)" ON)
 option(SAMPLES "Set to ON to build executable code samples (in ./samples)" OFF)
 option(BENCHMARKS "Set to ON to build performance benchmarks (in ./benchmarks)" OFF)
+option(ARROW "Set to ON to build Arrow conversion functions (in ./contrib/xlntarrow)" OFF)

 # Platform specific options
 if(NOT MSVC)
@ -30,4 +31,8 @@ if(TESTS)
    add_subdirectory(tests)
 endif()

+if(ARROW)
+    add_subdirectory(arrow/xlntarrow)
+endif()
+
 add_subdirectory(source)
--- a/README.md
+++ b/README.md
@ -1,14 +1,14 @@
 <img height="100" src="https://cloud.githubusercontent.com/assets/1735211/24962965/5c1cfc94-1f6b-11e7-8d86-54fe12907a23.png" alt="xlnt"><br/>
 ====

-[![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg)](https://travis-ci.org/tfussell/xlnt)
+[![Travis Build Status](https://travis-ci.org/tfussell/xlnt.svg?branch=master)](https://travis-ci.org/tfussell/xlnt)
 [![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/2hs79a1xoxy16sol?svg=true)](https://ci.appveyor.com/project/tfussell/xlnt)
 [![Coverage Status](https://coveralls.io/repos/github/tfussell/xlnt/badge.svg?branch=master)](https://coveralls.io/github/tfussell/xlnt?branch=master)
 [![ReadTheDocs Documentation Status](https://readthedocs.org/projects/xlnt/badge/?version=latest)](http://xlnt.readthedocs.org/en/latest/?badge=latest)
 [![License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](http://opensource.org/licenses/MIT)

 ## Introduction
-xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). xlnt is currently under active feature development and is on track for the version 1.0 release in the next few weeks. Until then, the API could have significant changes. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html).
+xlnt is a modern C++ library for manipulating spreadsheets in memory and reading/writing them from/to XLSX files as described in [ECMA 376 4th edition](http://www.ecma-international.org/publications/standards/Ecma-376.htm). The first public release of xlnt version 1.0 was on May 10th, 2017. Current work is focused on increasing compatibility, improving performance, and brainstorming future development goals. For a high-level summary of what you can do with this library, see [the feature list](https://tfussell.gitbooks.io/xlnt/content/docs/introduction/Features.html). Contributions are welcome in the form of pull requests or discussions on [the repository's Issues page](https://github.com/tfussell/xlnt/issues).

 ## Example

@ -29,7 +29,7 @@ int main()
    wb.save("example.xlsx");
    return 0;
 }
-// compile with -std=c++14 -Ixlnt/include -Lxlnt/lib -lxlnt
+// compile with -std=c++14 -Ixlnt/include -lxlnt
 ```

 ## Documentation
--- a/arrow/xlntarrow/CMakeLists.txt
+++ b/arrow/xlntarrow/CMakeLists.txt
@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.2)
+project(xlntarrow)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(XLNT_ARROW
+    ${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/xlntarrow.cpp)
+
+add_library(xlntarrow SHARED ${XLNT_ARROW})
+target_link_libraries(xlntarrow PRIVATE xlnt)
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../miniconda3/include)
+target_include_directories(xlntarrow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
--- a/arrow/xlntarrow/xlntarrow.cpp
+++ b/arrow/xlntarrow/xlntarrow.cpp
@ -0,0 +1,39 @@
+#include <xlnt/xlnt.hpp>
+#include <xlntarrow.hpp>
+
+namespace xlnt {
+namespace arrow {
+
+void xlsx2arrow(std::istream &s, ::arrow::Table &table)
+{
+    xlnt::streaming_workbook_reader reader;
+    reader.open(s);
+
+    reader.begin_worksheet();
+    int first_row = 0;
+
+    while (reader.has_cell())
+    {
+        auto cell = reader.read_cell();
+
+        if (first_row < 1)
+        {
+            first_row = cell.row();
+        }
+
+        if (cell.reference().row() % 1000 == 1)
+        {
+            std::cout << cell.reference().to_string() << std::endl;
+        }
+    }
+
+    reader.end_worksheet();
+}
+
+void arrow2xlsx(const ::arrow::Table &table, std::istream &s)
+{
+
+}
+
+}
+}
--- a/arrow/xlntarrow/xlntarrow.hpp
+++ b/arrow/xlntarrow/xlntarrow.hpp
@ -0,0 +1,11 @@
+#include <iostream>
+#include <arrow/api.h>
+
+namespace xlnt {
+namespace arrow {
+
+void xlsx2arrow(std::istream &s, ::arrow::Table &table);
+void arrow2xlsx(const ::arrow::Table &table, std::istream &s);
+
+}
+}
--- a/arrow/xlntpyarrow/python_streambuf.hpp
+++ b/arrow/xlntpyarrow/python_streambuf.hpp
@ -0,0 +1,487 @@
+#pragma once
+
+#include <boost/optional.hpp>
+#include <boost/utility/typed_in_place_factory.hpp>
+
+#include <cassert>
+#include <stdexcept>
+#include <iostream>
+#include <Python.h>
+
+namespace xlnt {
+namespace arrow {
+
+/// A stream buffer getting data from and putting data into a Python file object
+/** The aims are as follow:
+
+    - Given a C++ function acting on a standard stream, e.g.
+
+      \code
+      void read_inputs(std::istream& input) {
+        ...
+        input >> something >> something_else;
+      }
+      \endcode
+
+      and given a piece of Python code which creates a file-like object,
+      to be able to pass this file object to that C++ function, e.g.
+
+      \code
+      import gzip
+      gzip_file_obj = gzip.GzipFile(...)
+      read_inputs(gzip_file_obj)
+      \endcode
+
+      and have the standard stream pull data from and put data into the Python
+      file object.
+
+    - When Python \c read_inputs() returns, the Python object is able to
+      continue reading or writing where the C++ code left off.
+
+    - Operations in C++ on mere files should be competitively fast compared
+      to the direct use of \c std::fstream.
+
+
+    \b Motivation
+
+      - the standard Python library offer of file-like objects (files,
+        compressed files and archives, network, ...) is far superior to the
+        offer of streams in the C++ standard library and Boost C++ libraries.
+
+      - i/o code involves a fair amount of text processing which is more
+        efficiently prototyped in Python but then one may need to rewrite
+        a time-critical part in C++, in as seamless a manner as possible.
+
+    \b Usage
+
+    This is 2-step:
+
+      - a trivial wrapper function
+
+        \code
+          using boost_adaptbx::python::streambuf;
+          void read_inputs_wrapper(streambuf& input)
+          {
+            streambuf::istream is(input);
+            read_inputs(is);
+          }
+
+          def("read_inputs", read_inputs_wrapper);
+        \endcode
+
+        which has to be written every time one wants a Python binding for
+        such a C++ function.
+
+      - the Python side
+
+        \code
+          from boost.python import streambuf
+          read_inputs(streambuf(python_file_obj=obj, buffer_size=1024))
+        \endcode
+
+        \c buffer_size is optional. See also: \c default_buffer_size
+
+  Note: references are to the C++ standard (the numbers between parentheses
+  at the end of references are margin markers).
+*/
+class streambuf : public std::basic_streambuf<char>
+{
+  private:
+    typedef std::basic_streambuf<char> base_t;
+
+  public:
+    /* The syntax
+        using base_t::char_type;
+       would be nicer but Visual Studio C++ 8 chokes on it
+    */
+    typedef base_t::char_type   char_type;
+    typedef base_t::int_type    int_type;
+    typedef base_t::pos_type    pos_type;
+    typedef base_t::off_type    off_type;
+    typedef base_t::traits_type traits_type;
+
+    // work around Visual C++ 7.1 problem
+    inline static int
+    traits_type_eof() { return traits_type::eof(); }
+
+    /// The default size of the read and write buffer.
+    /** They are respectively used to buffer data read from and data written to
+        the Python file object. It can be modified from Python.
+    */
+    static std::size_t default_buffer_size;
+
+    /// Construct from a Python file object
+    /** if buffer_size is 0 the current default_buffer_size is used.
+    */
+    streambuf(
+      PyObject *python_file_obj,
+      std::size_t buffer_size_ = 0)
+    :
+      py_read (PyObject_GetAttrString(python_file_obj, "read")),
+      py_write(PyObject_GetAttrString(python_file_obj, "write")),
+      py_seek (PyObject_GetAttrString(python_file_obj, "seek")),
+      py_tell (PyObject_GetAttrString(python_file_obj, "tell")),
+      buffer_size(buffer_size_ != 0 ? buffer_size_ : default_buffer_size),
+      write_buffer(0),
+      pos_of_read_buffer_end_in_py_file(0),
+      pos_of_write_buffer_end_in_py_file(buffer_size),
+      farthest_pptr(0)
+    {
+      assert(buffer_size != 0);
+      /* Some Python file objects (e.g. sys.stdout and sys.stdin)
+         have non-functional seek and tell. If so, assign None to
+         py_tell and py_seek.
+       */
+      if (py_tell != nullptr) {
+          PyObject_CallFunction(py_tell, nullptr);
+	  if (PyErr_Occurred() != nullptr)
+	  {
+	    py_tell = nullptr;
+	    py_seek = nullptr;
+	    PyErr_Clear();
+	  }
+      }
+
+      if (py_write != nullptr) {
+        // C-like string to make debugging easier
+        write_buffer = new char[buffer_size + 1];
+        write_buffer[buffer_size] = '\0';
+        setp(write_buffer, write_buffer + buffer_size);  // 27.5.2.4.5 (5)
+        farthest_pptr = pptr();
+      }
+      else {
+        // The first attempt at output will result in a call to overflow
+        setp(0, 0);
+      }
+
+      if (py_tell != nullptr) {
+        auto py_pos = extract_int<off_type>(PyObject_CallFunction(py_tell, nullptr));
+        pos_of_read_buffer_end_in_py_file = py_pos;
+        pos_of_write_buffer_end_in_py_file = py_pos;
+      }
+    }
+
+    /// Mundane destructor freeing the allocated resources
+    virtual ~streambuf() {
+      if (write_buffer) delete[] write_buffer;
+    }
+
+    /// C.f. C++ standard section 27.5.2.4.3
+    /** It is essential to override this virtual function for the stream
+        member function readsome to work correctly (c.f. 27.6.1.3, alinea 30)
+     */
+    virtual std::streamsize showmanyc() {
+      int_type const failure = traits_type::eof();
+      int_type status = underflow();
+      if (status == failure) return -1;
+      return egptr() - gptr();
+    }
+
+    /// C.f. C++ standard section 27.5.2.4.3
+    virtual int_type underflow() {
+      int_type const failure = traits_type::eof();
+      if (py_read == nullptr) {
+        throw std::invalid_argument(
+          "That Python file object has no 'read' attribute");
+      }
+      read_buffer = PyObject_CallFunction(py_read, "i", buffer_size);
+      char *read_buffer_data = nullptr;
+      Py_ssize_t py_n_read = 0;
+      if (PyBytes_AsStringAndSize(read_buffer, &read_buffer_data, &py_n_read) == -1) {
+        setg(0, 0, 0);
+        throw std::invalid_argument(
+          "The method 'read' of the Python file object "
+          "did not return a string.");
+      }
+      auto n_read = (off_type)py_n_read;
+      pos_of_read_buffer_end_in_py_file += n_read;
+      setg(read_buffer_data, read_buffer_data, read_buffer_data + n_read);
+      // ^^^27.5.2.3.1 (4)
+      if (n_read == 0) return failure;
+      return traits_type::to_int_type(read_buffer_data[0]);
+    }
+
+    /// C.f. C++ standard section 27.5.2.4.5
+    virtual int_type overflow(int_type c=traits_type_eof()) {
+      if (py_write == nullptr) {
+        throw std::invalid_argument(
+          "That Python file object has no 'write' attribute");
+      }
+      farthest_pptr = std::max(farthest_pptr, pptr());
+      auto n_written = (off_type)(farthest_pptr - pbase());
+      auto chunk = PyBytes_FromStringAndSize(pbase(), farthest_pptr - pbase());
+      PyObject_CallFunction(py_write, "O", chunk);
+      if (!traits_type::eq_int_type(c, traits_type::eof())) {
+	auto ch = traits_type::to_char_type(c);
+        PyObject_CallFunction(py_write, "y#", reinterpret_cast<char *>(&ch), 1);
+        n_written++;
+      }
+      if (n_written) {
+        pos_of_write_buffer_end_in_py_file += n_written;
+        setp(pbase(), epptr());
+        // ^^^ 27.5.2.4.5 (5)
+        farthest_pptr = pptr();
+      }
+      return traits_type::eq_int_type(
+        c, traits_type::eof()) ? traits_type::not_eof(c) : c;
+    }
+
+    /// Update the python file to reflect the state of this stream buffer
+    /** Empty the write buffer into the Python file object and set the seek
+        position of the latter accordingly (C++ standard section 27.5.2.4.2).
+        If there is no write buffer or it is empty, but there is a non-empty
+        read buffer, set the Python file object seek position to the
+        seek position in that read buffer.
+    */
+    virtual int sync() {
+      int result = 0;
+      farthest_pptr = std::max(farthest_pptr, pptr());
+      if (farthest_pptr && farthest_pptr > pbase()) {
+        off_type delta = pptr() - farthest_pptr;
+        int_type status = overflow();
+        if (traits_type::eq_int_type(status, traits_type::eof())) result = -1;
+        if (py_seek != nullptr)
+        {
+          PyObject_CallFunction(py_seek, "i", delta);
+        }
+      }
+      else if (gptr() && gptr() < egptr()) {
+        if (py_seek != nullptr)
+        {
+          PyObject_CallFunction(py_seek, "ii", gptr() - egptr(), 1);
+        }
+      }
+      return result;
+    }
+
+    /// C.f. C++ standard section 27.5.2.4.2
+    /** This implementation is optimised to look whether the position is within
+        the buffers, so as to avoid calling Python seek or tell. It is
+        important for many applications that the overhead of calling into Python
+        is avoided as much as possible (e.g. parsers which may do a lot of
+        backtracking)
+    */
+    virtual
+    pos_type seekoff(off_type off, std::ios_base::seekdir way,
+                     std::ios_base::openmode which=  std::ios_base::in
+                                                   | std::ios_base::out)
+    {
+      /* In practice, "which" is either std::ios_base::in or out
+         since we end up here because either seekp or seekg was called
+         on the stream using this buffer. That simplifies the code
+         in a few places.
+      */
+      int const failure = off_type(-1);
+
+      if (py_seek == nullptr) {
+        throw std::invalid_argument(
+          "That Python file object has no 'seek' attribute");
+      }
+
+      // we need the read buffer to contain something!
+      if (which == std::ios_base::in && !gptr()) {
+        if (traits_type::eq_int_type(underflow(), traits_type::eof())) {
+          return failure;
+        }
+      }
+
+      // compute the whence parameter for Python seek
+      int whence;
+      switch (way) {
+        case std::ios_base::beg:
+          whence = 0;
+          break;
+        case std::ios_base::cur:
+          whence = 1;
+          break;
+        case std::ios_base::end:
+          whence = 2;
+          break;
+        default:
+          return failure;
+      }
+
+      // Let's have a go
+      boost::optional<off_type> result = seekoff_without_calling_python(
+        off, way, which);
+      if (!result) {
+        // we need to call Python
+        if (which == std::ios_base::out) overflow();
+        if (way == std::ios_base::cur) {
+          if      (which == std::ios_base::in)  off -= egptr() - gptr();
+          else if (which == std::ios_base::out) off += pptr() - pbase();
+        }
+        PyObject_CallFunction(py_seek, "ii", off, whence);
+        result = extract_int<off_type>(PyObject_CallFunction(py_tell, nullptr));
+        if (which == std::ios_base::in) underflow();
+      }
+      return *result;
+    }
+
+    /// C.f. C++ standard section 27.5.2.4.2
+    virtual
+    pos_type seekpos(pos_type sp,
+                     std::ios_base::openmode which=  std::ios_base::in
+                                                   | std::ios_base::out)
+    {
+      return streambuf::seekoff(sp, std::ios_base::beg, which);
+    }
+
+  private:
+    PyObject *py_read = nullptr;
+    PyObject *py_write = nullptr;
+    PyObject *py_seek = nullptr;
+    PyObject *py_tell = nullptr;
+
+    std::size_t buffer_size;
+
+    /* This is actually a Python string and the actual read buffer is
+       its internal data, i.e. an array of characters. We use a Boost.Python
+       object so as to hold on it: as a result, the actual buffer can't
+       go away.
+    */
+    PyObject *read_buffer = nullptr;
+
+    /* A mere array of char's allocated on the heap at construction time and
+       de-allocated only at destruction time.
+    */
+    char *write_buffer = nullptr;
+
+    off_type pos_of_read_buffer_end_in_py_file,
+             pos_of_write_buffer_end_in_py_file;
+
+    // the farthest place the buffer has been written into
+    char *farthest_pptr = nullptr;
+
+
+    boost::optional<off_type> seekoff_without_calling_python(
+      off_type off,
+      std::ios_base::seekdir way,
+      std::ios_base::openmode which)
+    {
+      boost::optional<off_type> const failure;
+
+      // Buffer range and current position
+      off_type buf_begin, buf_end, buf_cur, upper_bound;
+      off_type pos_of_buffer_end_in_py_file;
+      if (which == std::ios_base::in) {
+        pos_of_buffer_end_in_py_file = pos_of_read_buffer_end_in_py_file;
+        buf_begin = reinterpret_cast<std::streamsize>(eback());
+        buf_cur = reinterpret_cast<std::streamsize>(gptr());
+        buf_end = reinterpret_cast<std::streamsize>(egptr());
+        upper_bound = buf_end;
+      }
+      else if (which == std::ios_base::out) {
+        pos_of_buffer_end_in_py_file = pos_of_write_buffer_end_in_py_file;
+        buf_begin = reinterpret_cast<std::streamsize>(pbase());
+        buf_cur = reinterpret_cast<std::streamsize>(pptr());
+        buf_end = reinterpret_cast<std::streamsize>(epptr());
+        farthest_pptr = std::max(farthest_pptr, pptr());
+        upper_bound = reinterpret_cast<std::streamsize>(farthest_pptr) + 1;
+      }
+      else {
+        throw std::runtime_error("unreachable");
+      }
+
+      // Sought position in "buffer coordinate"
+      off_type buf_sought;
+      if (way == std::ios_base::cur) {
+        buf_sought = buf_cur + off;
+      }
+      else if (way == std::ios_base::beg) {
+        buf_sought = buf_end + (off - pos_of_buffer_end_in_py_file);
+      }
+      else if (way == std::ios_base::end) {
+        return failure;
+      }
+      else {
+        throw std::runtime_error("unreachable");
+      }
+
+      // if the sought position is not in the buffer, give up
+      if (buf_sought < buf_begin || buf_sought >= upper_bound) return failure;
+
+      // we are in wonderland
+      if      (which == std::ios_base::in)  gbump(buf_sought - buf_cur);
+      else if (which == std::ios_base::out) pbump(buf_sought - buf_cur);
+      return pos_of_buffer_end_in_py_file + (buf_sought - buf_end);
+    }
+
+  template<typename T>
+  T extract_int(PyObject *o)
+  {
+    auto value = PyLong_AsLong(o);
+    Py_DECREF(o);
+
+    return static_cast<T>(value);
+  }
+
+  public:
+
+    class istream : public std::istream
+    {
+      public:
+        istream(streambuf& buf) : std::istream(&buf)
+        {
+          exceptions(std::ios_base::badbit);
+        }
+
+        ~istream() { if (this->good()) this->sync(); }
+    };
+
+    class ostream : public std::ostream
+    {
+      public:
+        ostream(streambuf& buf) : std::ostream(&buf)
+        {
+          exceptions(std::ios_base::badbit);
+        }
+
+        ~ostream() { if (this->good()) this->flush(); }
+    };
+};
+
+std::size_t streambuf::default_buffer_size = 1024;
+
+struct streambuf_capsule
+{
+  streambuf python_streambuf;
+
+  streambuf_capsule(
+    PyObject *python_file_obj,
+    std::size_t buffer_size=0)
+  :
+    python_streambuf(python_file_obj, buffer_size)
+  {}
+};
+
+struct ostream : private streambuf_capsule, streambuf::ostream
+{
+  ostream(
+    PyObject *python_file_obj,
+    std::size_t buffer_size=0)
+  :
+    streambuf_capsule(python_file_obj, buffer_size),
+    streambuf::ostream(python_streambuf)
+  {}
+
+  ~ostream()
+  {
+    if (this->good())
+    {
+      this->flush();
+    }
+
+    if (PyErr_Occurred() != nullptr)
+    {
+      PyErr_Clear();
+      throw std::runtime_error(
+        "Problem closing python ostream.\n"
+        "  Known limitation: the error is unrecoverable. Sorry.\n"
+        "  Suggestion for programmer: add ostream.flush() before"
+        " returning.");
+    }
+  }
+};
+
+}} // namespace xlnt::arrow
--- a/arrow/xlntpyarrow/setup.py
+++ b/arrow/xlntpyarrow/setup.py
@ -0,0 +1,64 @@
+from distutils.core import setup, Extension
+from distutils import sysconfig
+
+description = """
+xlntpyarrow allows Apache Arrow tables to be written to and read from an XLSX
+file efficiently using the C++ library xlnt.
+""".strip()
+
+cfg_vars = sysconfig.get_config_vars()
+if 'CFLAGS' in cfg_vars:
+    cfg_vars['CFLAGS'] = cfg_vars['CFLAGS'].replace('-Wstrict-prototypes', '')
+
+xlntpyarrow_extension = Extension(
+    'xlntpyarrow',
+    ['xlntpyarrow.cpp'],
+    language = 'c++',
+    include_dirs = [
+        '/root/xlnt/arrow/xlntarrow',
+        '/root/xlnt/arrow/xlntpyarrow',
+        '/root/miniconda3/include'
+    ],
+    libraries = [
+        'arrow',
+        'xlntarrow',
+        'xlnt'
+    ],
+    library_dirs = [
+        '/root/miniconda3/lib',
+        '/root/xlnt/build/arrow/xlntarrow',
+        '/root/xlnt/build/source'
+    ],
+    extra_compile_args=['-std=c++11']
+)
+
+classifiers = [
+    'Development Status :: 5 - Production/Stable',
+    'Environment :: Plugins',
+    'Intended Audience :: Science/Research',
+    'License :: OSI Approved :: MIT License',
+    'Natural Language :: English',
+    'Operating System :: Microsoft :: Windows',
+    'Operating System :: MacOS :: MacOS X',
+    'Operating System :: POSIX :: Linux',
+    'Programming Language :: C',
+    'Programming Language :: C++',
+    'Programming Language :: Python :: 2.7',
+    'Programming Language :: Python :: 3.6',
+    'Programming Language :: Python :: Implementation :: CPython',
+    'Topic :: Database',
+    'Topic :: Office/Business :: Financial :: Spreadsheet',
+    'Topic :: Scientific/Engineering :: Information Analysis',
+    'Topic :: Software Development :: Libraries :: Python Modules'
+]
+
+setup(
+    name = 'xlntpyarrow',
+    version = '1.1.0',
+    classifiers = classifiers,
+    description = description,
+    ext_modules = [xlntpyarrow_extension],
+    author = 'Thomas Fussell',
+    author_email = 'thomas.fussell@gmail.com',
+    url = 'https://github.com/tfussell/xlnt'
+)
--- a/arrow/xlntpyarrow/xlntpyarrow.cpp
+++ b/arrow/xlntpyarrow/xlntpyarrow.cpp
@ -0,0 +1,109 @@
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <xlntarrow.hpp>
+#include <python_streambuf.hpp>
+#include <Python.h>
+
+PyObject *xlsx2arrow(PyObject *file)
+{
+    xlnt::arrow::streambuf buffer(file);
+    std::istream stream(&buffer);
+    std::shared_ptr<arrow::Schema> schema;
+    std::vector<std::shared_ptr<arrow::Column>> columns;
+    arrow::Table table(schema, columns);
+    xlnt::arrow::xlsx2arrow(stream, table);
+
+    Py_RETURN_NONE;
+}
+
+extern "C" {
+
+/*
+ * Implements XLSX->pyarrow table function.
+ */
+PyDoc_STRVAR(xlntpyarrow_xlsx2arrow_doc, "xlsx2arrow(in_file)\
+\
+Returns an arrow table representing the given XLSX file object.");
+
+PyObject *xlntpyarrow_xlsx2arrow(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    PyObject *file = nullptr;
+    static const char *keywords[] = { "file", nullptr };
+    static auto keywords_nc = const_cast<char **>(keywords);
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", keywords_nc, &file))
+    {
+        return nullptr;
+    }
+
+    return xlsx2arrow(file);
+}
+
+
+/*
+* Implements pyarrow table->XLSX function.
+*/
+PyDoc_STRVAR(xlntpyarrow_arrow2xlsx_doc, "arrow2xlsx(table, out_file)\
+\
+Writes the given arrow table to out_file as an XLSX file.");
+
+PyObject *xlntpyarrow_arrow2xlsx(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    PyObject *obj = nullptr;
+    static const char *keywords[] = { "file", nullptr };
+    static auto keywords_nc = const_cast<char **>(keywords);
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", keywords_nc, &obj))
+    {
+        return nullptr;
+    }
+
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef xlntpyarrow_functions[] =
+{
+    { "xlsx2arrow", (PyCFunction)xlntpyarrow_xlsx2arrow, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_xlsx2arrow_doc },
+    { "arrow2xlsx", (PyCFunction)xlntpyarrow_arrow2xlsx, METH_VARARGS | METH_KEYWORDS, xlntpyarrow_arrow2xlsx_doc },
+    { nullptr, nullptr, 0, nullptr }
+};
+
+int exec_xlntpyarrow(PyObject *module)
+{
+    PyModule_AddFunctions(module, xlntpyarrow_functions);
+
+    PyModule_AddStringConstant(module, "__author__", "Thomas Fussell");
+    PyModule_AddStringConstant(module, "__version__", "0.9.0");
+    PyModule_AddIntConstant(module, "year", 2017);
+
+    return 0;
+}
+
+PyDoc_STRVAR(xlntpyarrow_doc, "The xlntpyarrow module");
+
+static PyModuleDef_Slot xlntpyarrow_slots[] =
+{
+    { Py_mod_exec, (void *)exec_xlntpyarrow },
+    { 0, nullptr }
+};
+
+static PyModuleDef xlntpyarrow_def =
+{
+    PyModuleDef_HEAD_INIT,
+    "xlntpyarrow",
+    xlntpyarrow_doc,
+    0,              /* m_size */
+    nullptr,           /* m_methods */
+    xlntpyarrow_slots,
+    nullptr,           /* m_traverse */
+    nullptr,           /* m_clear */
+    nullptr,           /* m_free */
+};
+
+PyMODINIT_FUNC PyInit_xlntpyarrow()
+{
+    return PyModuleDef_Init(&xlntpyarrow_def);
+}
+
+} // extern "C"
--- a/include/xlnt/workbook/streaming_workbook_reader.hpp
+++ b/include/xlnt/workbook/streaming_workbook_reader.hpp
@ -0,0 +1,132 @@
+// Copyright (c) 2014-2017 Thomas Fussell
+// Copyright (c) 2010-2015 openpyxl
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+#pragma once
+
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <xlnt/xlnt_config.hpp>
+
+namespace xml {
+class parser;
+}
+
+namespace xlnt {
+
+class cell;
+template<typename T>
+class optional;
+class path;
+class workbook;
+class worksheet;
+
+namespace detail {
+class xlsx_consumer;
+}
+
+/// <summary>
+/// workbook is the container for all other parts of the document.
+/// </summary>
+class XLNT_API streaming_workbook_reader
+{
+public:
+    streaming_workbook_reader();
+    ~streaming_workbook_reader();
+
+    /// <summary>
+    /// Closes currently open read stream. This will be called automatically
+    /// by the destructor if it hasn't already been called manually.
+    /// </summary>
+    void close();
+
+    bool has_cell();
+
+    /// <summary>
+    /// Reads the next cell in the current worksheet and optionally returns it if
+    /// the last cell in the sheet has not yet been read.
+    /// </summary>
+    cell read_cell();
+
+    bool has_worksheet();
+
+    /// <summary>
+    /// Beings reading of the next worksheet in the workbook and optionally
+    /// returns its title if the last worksheet has not yet been read.
+    /// </summary>
+    void begin_worksheet();
+
+    /// <summary>
+    /// Ends reading of the current worksheet in the workbook and optionally
+    /// returns a worksheet object corresponding to the worksheet with the title
+    /// returned by begin_worksheet().
+    /// </summary>
+    worksheet end_worksheet();
+
+    /// <summary>
+    /// Interprets byte vector data as an XLSX file and sets the content of this
+    /// workbook to match that file.
+    /// </summary>
+    void open(const std::vector<std::uint8_t> &data);
+
+    /// <summary>
+    /// Interprets file with the given filename as an XLSX file and sets
+    /// the content of this workbook to match that file.
+    /// </summary>
+    void open(const std::string &filename);
+
+#ifdef _MSC_VER
+    /// <summary>
+    /// Interprets file with the given filename as an XLSX file and sets
+    /// the content of this workbook to match that file.
+    /// </summary>
+    void open(const std::wstring &filename);
+#endif
+
+    /// <summary>
+    /// Interprets file with the given filename as an XLSX file and sets the
+    /// content of this workbook to match that file.
+    /// </summary>
+    void open(const path &filename);
+
+    /// <summary>
+    /// Interprets data in stream as an XLSX file and sets the content of this
+    /// workbook to match that file.
+    /// </summary>
+    void open(std::istream &stream);
+
+private:
+    std::vector<std::string> worksheet_queue_;
+    std::unique_ptr<detail::xlsx_consumer> consumer_;
+    std::unique_ptr<workbook> workbook_;
+    std::unique_ptr<std::istream> stream_;
+    std::unique_ptr<std::streambuf> stream_buffer_;
+    std::unique_ptr<std::istream> part_stream_;
+    std::unique_ptr<std::streambuf> part_stream_buffer_;
+    std::unique_ptr<xml::parser> parser_;
+};
+
+} // namespace xlnt
--- a/include/xlnt/workbook/streaming_workbook_writer.hpp
+++ b/include/xlnt/workbook/streaming_workbook_writer.hpp
@ -0,0 +1,93 @@
+// Copyright (c) 2014-2017 Thomas Fussell
+// Copyright (c) 2010-2015 openpyxl
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+#pragma once
+
+#include <cstddef>
+#include <iterator>
+
+#include <xlnt/xlnt_config.hpp>
+
+namespace xlnt {
+
+/// <summary>
+/// workbook is the container for all other parts of the document.
+/// </summary>
+class XLNT_API streaming_workbook_writer
+{
+public:
+    ~streaming_workbook_writer();
+
+    /// <summary>
+    /// Finishes writing of the remaining contents of the workbook and closes
+    /// currently open write stream. This will be called automatically by the
+    /// destructor if it hasn't already been called manually.
+    /// </summary>
+    void close();
+
+    /// <summary>
+    /// Writes a cell to the currently active worksheet at the position given by
+    /// ref and with the given value. ref should be to the right of or below
+    /// the previously written cell.
+    /// </summary>
+    cell add_cell(const cell_reference &ref);
+
+    /// <summary>
+    /// Ends writing of data to the current sheet and begins writing a new sheet
+    /// with the given title.
+    /// </summary>
+    worksheet add_sheet(const std::string &title);
+
+    /// <summary>
+    /// Serializes the workbook into an XLSX file and saves the bytes into
+    /// byte vector data.
+    /// </summary>
+    void open(std::vector<std::uint8_t> &data) const;
+
+    /// <summary>
+    /// Serializes the workbook into an XLSX file and saves the data into a file
+    /// named filename.
+    /// </summary>
+    void open(const std::string &filename) const;
+
+#ifdef _MSC_VER
+    /// <summary>
+    /// Serializes the workbook into an XLSX file and saves the data into a file
+    /// named filename.
+    /// </summary>
+    void open(const std::wstring &filename) const;
+#endif
+
+    /// <summary>
+    /// Serializes the workbook into an XLSX file and saves the data into a file
+    /// named filename.
+    /// </summary>
+    void open(const xlnt::path &filename) const;
+
+    /// <summary>
+    /// Serializes the workbook into an XLSX file and saves the data into stream.
+    /// </summary>
+    void open(std::ostream &stream) const;
+};
+
+} // namespace xlnt
--- a/include/xlnt/xlnt.hpp
+++ b/include/xlnt/xlnt.hpp
@ -65,6 +65,7 @@
 #include <xlnt/workbook/external_book.hpp>
 #include <xlnt/workbook/metadata_property.hpp>
 #include <xlnt/workbook/named_range.hpp>
+#include <xlnt/workbook/streaming_workbook_reader.hpp>
 #include <xlnt/workbook/theme.hpp>
 #include <xlnt/workbook/workbook.hpp>
 #include <xlnt/workbook/worksheet_iterator.hpp>
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@ -28,6 +28,7 @@ endif()
 if(MSVC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unknown-pragmas")
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded -Werror -Wno-documentation-unknown-command")
@ -155,6 +156,11 @@ target_include_directories(xlnt PUBLIC ${XLNT_INCLUDE_DIR})
 target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR})
 target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/libstudxml)

+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0.0")
+    target_compile_definitions(xlnt PRIVATE UTFCPP=1)
+    target_include_directories(xlnt PRIVATE ${XLNT_SOURCE_DIR}/../third-party/utfcpp)
+endif()
+
 if(MSVC)
    set_target_properties(xlnt PROPERTIES COMPILE_FLAGS "/wd\"4251\" /wd\"4275\" /wd\"4068\" /MP")
    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/detail/serialization/miniz.cpp PROPERTIES COMPILE_FLAGS "/wd\"4244\" /wd\"4334\" /wd\"4127\"")
--- a/source/detail/cryptography/xlsx_crypto_consumer.cpp
+++ b/source/detail/cryptography/xlsx_crypto_consumer.cpp
@ -108,7 +108,7 @@ std::vector<std::uint8_t> decrypt_xlsx_agile(
        ++segment;
    }

-    decrypted_package.resize(total_size);
+    decrypted_package.resize(static_cast<std::size_t>(total_size));

    return decrypted_package;
 }
@ -153,7 +153,8 @@ encryption_info::standard_encryption_info read_standard_encryption_info(std::ist
        throw xlnt::exception("invalid header");
    }

-    const auto csp_name_length = (header_length - (info_stream.tellg() - index_at_start)) / 2;
+    const auto csp_name_length = static_cast<std::size_t>((header_length 
+        - (info_stream.tellg() - index_at_start)) / 2);
    auto csp_name = xlnt::detail::read_string<char16_t>(info_stream, csp_name_length);
    csp_name.pop_back(); // remove extraneous trailing null
    if (csp_name != u"Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)"
--- a/source/detail/serialization/xlsx_consumer.cpp
+++ b/source/detail/serialization/xlsx_consumer.cpp
--- a/source/detail/serialization/xlsx_consumer.hpp
+++ b/source/detail/serialization/xlsx_consumer.hpp
@ -28,6 +28,7 @@
 #include <functional>
 #include <iostream>
 #include <memory>
+#include <string>
 #include <unordered_map>
 #include <vector>

@ -36,11 +37,15 @@

 namespace xlnt {

+class cell;
 class color;
 class rich_text;
 class manifest;
+template<typename T>
+class optional;
 class path;
 class relationship;
+class streaming_workbook_reader;
 class variant;
 class workbook;
 class worksheet;
@ -48,6 +53,8 @@ class worksheet;
 namespace detail {

 class izstream;
+struct cell_impl;
+struct worksheet_impl;

 /// <summary>
 /// Handles writing a workbook into an XLSX file.
@ -57,16 +64,31 @@ class xlsx_consumer
 public:
 	xlsx_consumer(workbook &destination);

+	~xlsx_consumer();
+
 	void read(std::istream &source);

 	void read(std::istream &source, const std::string &password);

 private:
+    friend class xlnt::streaming_workbook_reader;
+
+    void open(std::istream &source);
+
+    bool has_cell();
+
+    /// <summary>
+    /// Reads the next cell in the current worksheet and optionally returns it if
+    /// the last cell in the sheet has not yet been read. An exception will be thrown
+    /// if this is not open as a streaming consumer.
+    /// </summary>
+    cell read_cell();
+
 	/// <summary>
 	/// Read all the files needed from the XLSX archive and initialize all of
 	/// the data in the workbook to match.
 	/// </summary>
-	void populate_workbook();
+	void populate_workbook(bool streaming);

    /// <summary>
    ///
@ -106,27 +128,27 @@ private:
 	void read_calculation_chain();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_connections();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_custom_property();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_custom_xml_mappings();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_external_workbook_references();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_pivot_table();

@ -136,17 +158,17 @@ private:
 	void read_shared_string_table();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_shared_workbook_revision_headers();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_shared_workbook();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_shared_workbook_user_data();

@ -161,56 +183,71 @@ private:
 	void read_theme();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_volatile_dependencies();

 	/// <summary>
 	/// xl/sheets/*.xml
 	/// </summary>
-	void read_chartsheet(const std::string &title);
+	void read_chartsheet(const std::string &rel_id);

 	/// <summary>
 	/// xl/sheets/*.xml
 	/// </summary>
-	void read_dialogsheet(const std::string &title);
+	void read_dialogsheet(const std::string &rel_id);

 	/// <summary>
 	/// xl/sheets/*.xml
 	/// </summary>
-	void read_worksheet(const std::string &title);
+	void read_worksheet(const std::string &rel_id);
+
+    /// <summary>
+    /// xl/sheets/*.xml
+    /// </summary>
+    std::string read_worksheet_begin(const std::string &rel_id);
+
+    /// <summary>
+    /// xl/sheets/*.xml
+    /// </summary>
+    void read_worksheet_sheetdata();
+
+    /// <summary>
+    /// xl/sheets/*.xml
+    /// </summary>
+    worksheet read_worksheet_end(const std::string &rel_id);

 	// Sheet Relationship Target Parts

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_comments(worksheet ws);
-    
+
 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_vml_drawings(worksheet ws);

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_drawings();

 	// Unknown Parts

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_unknown_parts();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_unknown_relationships();

 	/// <summary>
-	/// 
+	///
 	/// </summary>
 	void read_image(const path &part);

@ -362,14 +399,22 @@ private:

 	/// <summary>
 	/// This pointer is generally set by instantiating an xml::parser in a function
-	/// scope and then calling a read_*() method which uses xlsx_consumer::parser() 
+	/// scope and then calling a read_*() method which uses xlsx_consumer::parser()
 	/// to access the object.
 	/// </summary>
 	xml::parser *parser_;
-    
+
    std::vector<xml::qname> stack_;

    bool preserve_space_ = false;
+
+    bool streaming_ = false;
+
+    std::unique_ptr<detail::cell_impl> streaming_cell_;
+
+    detail::cell_impl *current_cell_;
+
+    detail::worksheet_impl *current_worksheet_;
 };

 } // namespace detail
--- a/source/detail/serialization/xlsx_producer.cpp
+++ b/source/detail/serialization/xlsx_producer.cpp
@ -36,13 +36,12 @@
 #include <xlnt/cell/cell.hpp>
 #include <xlnt/packaging/manifest.hpp>
 #include <xlnt/utils/path.hpp>
+#include <xlnt/utils/scoped_enum_hash.hpp>
 #include <xlnt/workbook/workbook.hpp>
 #include <xlnt/workbook/workbook_view.hpp>
 #include <xlnt/worksheet/header_footer.hpp>
 #include <xlnt/worksheet/worksheet.hpp>

-using namespace std::string_literals;
-
 namespace {

 /// <summary>
@ -169,7 +168,7 @@ void xlsx_producer::write_content_types()
    const auto content_types_path = path("[Content_Types].xml");
    begin_part(content_types_path);

-    const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types"s;
+    const auto xmlns = "http://schemas.openxmlformats.org/package/2006/content-types";

    write_start_element(xmlns, "Types");
    write_namespace(xmlns, "");
@ -288,7 +287,7 @@ void xlsx_producer::write_property(const std::string &name, const variant &value
            write_start_element(constants::ns("vt"), "vector");

            auto vector = value.get<std::vector<variant>>();
-            std::unordered_set<variant::type> types;
+            std::unordered_set<variant::type, scoped_enum_hash<variant::type>> types;

            for (const auto &element : vector)
            {
@ -2477,9 +2476,9 @@ void xlsx_producer::write_worksheet(const relationship &rel)
        auto first_header = std::string();
        auto first_footer = std::string();

-        const auto locations = 
+        const auto locations =
        {
-            header_footer::location::left, 
+            header_footer::location::left,
            header_footer::location::center,
            header_footer::location::right
        };
@ -2715,7 +2714,7 @@ void xlsx_producer::write_comments(const relationship & /*rel*/, worksheet ws, c
                if (run.second.is_set())
                {
                    write_start_element(xmlns, "rPr");
-                    
+
                    if (run.second.get().bold())
                    {
                        write_start_element(xmlns, "b");
--- a/source/detail/serialization/zstream.cpp
+++ b/source/detail/serialization/zstream.cpp
@ -482,7 +482,9 @@ std::unique_ptr<std::streambuf> ozstream::open(const path &filename)
    zheader header;
    header.filename = filename.string();
    file_headers_.push_back(header);
-    return std::make_unique<zip_streambuf_compress>(&file_headers_.back(), destination_stream_);
+    auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
+
+    return std::unique_ptr<zip_streambuf_compress>(buffer);
 }

 izstream::izstream(std::istream &stream)
@ -526,7 +528,7 @@ bool izstream::read_central_header()
    }

    source_stream_.read(reinterpret_cast<char *>(buf.data()), read_start);
-    
+
    if (buf[0] == 0xd0 && buf[1] == 0xcf && buf[2] == 0x11 && buf[3] == 0xe0
        && buf[4] == 0xa1 && buf[5] == 0xb1 && buf[6] == 0x1a && buf[7] == 0xe1)
    {
@ -595,7 +597,9 @@ std::unique_ptr<std::streambuf> izstream::open(const path &filename) const

    auto header = file_headers_.at(filename.string());
    source_stream_.seekg(header.header_offset);
-    return std::make_unique<zip_streambuf_decompress>(source_stream_, header);
+    auto buffer = new zip_streambuf_decompress(source_stream_, header);
+
+    return std::unique_ptr<zip_streambuf_decompress>(buffer);
 }

 std::string izstream::read(const path &filename) const
--- a/source/detail/unicode.cpp
+++ b/source/detail/unicode.cpp
@ -21,15 +21,21 @@
 // @license: http://www.opensource.org/licenses/mit-license.php
 // @author: see AUTHORS file

-#include <codecvt>
 #include <locale>
 #include <string>

 #include <detail/unicode.hpp>

+#ifdef UTFCPP
+#include <utf8.h>
+#else
+#include <codecvt>
+#endif
+
 namespace xlnt {
 namespace detail {

+#ifndef UTFCPP
 #ifdef _MSC_VER
 std::u16string utf8_to_utf16(const std::string &utf8_string)
 {
@ -63,6 +69,23 @@ std::string utf16_to_utf8(const std::u16string &utf16_string)
        char16_t>{}.to_bytes(utf16_string);
 }
 #endif
+#else
+std::u16string utf8_to_utf16(const std::string &utf8_string)
+{
+    std::u16string result;
+    utf8::utf8to16(utf8_string.begin(), utf8_string.end(), std::back_inserter(result));
+
+    return result;
+}
+
+std::string utf16_to_utf8(const std::u16string &utf16_string)
+{
+    std::string result;
+    utf8::utf16to8(utf16_string.begin(), utf16_string.end(), std::back_inserter(result));
+
+    return result;
+}
+#endif

 std::string latin1_to_utf8(const std::string &latin1)
 {
--- a/source/utils/path.cpp
+++ b/source/utils/path.cpp
@ -21,7 +21,6 @@
 // @license: http://www.opensource.org/licenses/mit-license.php
 // @author: see AUTHORS file

-#include <codecvt>
 #include <fstream>
 #include <sstream>
 #include <sys/stat.h>
@ -32,6 +31,8 @@
 #include <linux/limits.h>
 #include <sys/types.h>
 #include <unistd.h>
+#elif defined(_MSC_VER)
+#include <codecvt>
 #endif

 #include <detail/external/include_windows.hpp>
--- a/source/workbook/streaming_workbook_reader.cpp
+++ b/source/workbook/streaming_workbook_reader.cpp
@ -0,0 +1,185 @@
+// Copyright (c) 2017 Thomas Fussell
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE
+//
+// @license: http://www.opensource.org/licenses/mit-license.php
+// @author: see AUTHORS file
+
+#include <fstream>
+
+#include <detail/serialization/vector_streambuf.hpp>
+#include <detail/serialization/xlsx_consumer.hpp>
+#include <xlnt/cell/cell.hpp>
+#include <xlnt/packaging/manifest.hpp>
+#include <xlnt/utils/optional.hpp>
+#include <xlnt/workbook/streaming_workbook_reader.hpp>
+#include <xlnt/workbook/workbook.hpp>
+#include <xlnt/worksheet/worksheet.hpp>
+
+
+namespace {
+
+//TODO: (important) this is duplicated from workbook.cpp, find a common place to keep it
+#ifdef _MSC_VER
+void open_stream(std::ifstream &stream, const std::wstring &path)
+{
+    stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ofstream &stream, const std::wstring &path)
+{
+    stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ifstream &stream, const std::string &path)
+{
+    open_stream(stream, xlnt::path(path).wstring());
+}
+
+void open_stream(std::ofstream &stream, const std::string &path)
+{
+    open_stream(stream, xlnt::path(path).wstring());
+}
+#else
+void open_stream(std::ifstream &stream, const std::string &path)
+{
+    stream.open(path, std::ios::binary);
+}
+
+void open_stream(std::ofstream &stream, const std::string &path)
+{
+    stream.open(path, std::ios::binary);
+}
+#endif
+
+} // namespace
+
+
+namespace xlnt {
+
+streaming_workbook_reader::streaming_workbook_reader()
+{
+
+}
+
+streaming_workbook_reader::~streaming_workbook_reader()
+{
+    close();
+}
+
+void streaming_workbook_reader::close()
+{
+    if (consumer_)
+    {
+        consumer_.reset(nullptr);
+        stream_buffer_.reset(nullptr);
+    }
+}
+
+bool streaming_workbook_reader::has_cell()
+{
+    return consumer_->has_cell();
+}
+
+cell streaming_workbook_reader::read_cell()
+{
+    return consumer_->read_cell();
+}
+
+bool streaming_workbook_reader::has_worksheet()
+{
+    return !worksheet_queue_.empty();
+}
+
+void streaming_workbook_reader::begin_worksheet()
+{
+    const auto next_worksheet_rel = worksheet_queue_.back();
+    const auto workbook_rel = workbook_->manifest()
+        .relationship(path("/"), relationship_type::office_document);
+    const auto worksheet_rel = workbook_->manifest()
+        .relationship(workbook_rel.target().path(), next_worksheet_rel);
+
+    auto rel_chain = std::vector<relationship>{ workbook_rel, worksheet_rel };
+
+    const auto &manifest = consumer_->target_.manifest();
+    const auto part_path = manifest.canonicalize(rel_chain);
+    auto part_stream_buffer = consumer_->archive_->open(part_path);
+    part_stream_buffer_.swap(part_stream_buffer);
+    part_stream_.reset(new std::istream(part_stream_buffer_.get()));
+    parser_.reset(new xml::parser(*part_stream_, part_path.string()));
+    consumer_->parser_ = parser_.get();
+
+    consumer_->read_worksheet_begin(next_worksheet_rel);
+}
+
+worksheet streaming_workbook_reader::end_worksheet()
+{
+    auto next_worksheet_rel = worksheet_queue_.back();
+    worksheet_queue_.pop_back();
+    return consumer_->read_worksheet_end(next_worksheet_rel);
+}
+
+void streaming_workbook_reader::open(const std::vector<std::uint8_t> &data)
+{
+    stream_buffer_.reset(new detail::vector_istreambuf(data));
+    stream_.reset(new std::istream(stream_buffer_.get()));
+    open(*stream_);
+}
+
+void streaming_workbook_reader::open(const std::string &filename)
+{
+    stream_.reset(new std::ifstream());
+    open_stream((std::ifstream &)stream_, filename);
+    open(*stream_);
+}
+
+#ifdef _MSC_VER
+void streaming_workbook_reader::open(const std::wstring &filename)
+{
+    stream_.reset(new std::ifstream());
+    open_stream((std::ifstream &)*stream_, filename);
+    open(*stream_);
+}
+#endif
+
+void streaming_workbook_reader::open(const xlnt::path &filename)
+{
+    stream_.reset(new std::ifstream());
+    open_stream((std::ifstream &)*stream_, filename.string());
+    open(*stream_);
+}
+
+void streaming_workbook_reader::open(std::istream &stream)
+{
+    workbook_.reset(new workbook());
+    consumer_.reset(new detail::xlsx_consumer(*workbook_));
+    consumer_->open(stream);
+
+    const auto workbook_rel = workbook_->manifest()
+        .relationship(path("/"), relationship_type::office_document);
+    const auto workbook_path = workbook_rel.target().path();
+
+    for (auto worksheet_rel : workbook_->manifest()
+        .relationships(workbook_path, relationship_type::worksheet))
+    {
+        worksheet_queue_.push_back(worksheet_rel.id());
+    }
+}
+
+} // namespace xlnt
--- a/source/workbook/workbook.cpp
+++ b/source/workbook/workbook.cpp
@ -1508,14 +1508,14 @@ void workbook::garbage_collect_formulae()

 void workbook::update_sheet_properties()
 {
-    if (has_extended_property(extended_property::titles_of_parts))
+    if (has_extended_property(xlnt::extended_property::titles_of_parts))
    {
-        extended_property(extended_property::titles_of_parts, sheet_titles());
+        extended_property(xlnt::extended_property::titles_of_parts, sheet_titles());
    }

-    if (has_extended_property(extended_property::heading_pairs))
+    if (has_extended_property(xlnt::extended_property::heading_pairs))
    {
-        extended_property(extended_property::heading_pairs,
+        extended_property(xlnt::extended_property::heading_pairs,
            std::vector<variant>{variant("Worksheets"), variant(static_cast<int>(sheet_count()))});
    }
 }
--- a/tests/cell/cell_test_suite.hpp
+++ b/tests/cell/cell_test_suite.hpp
@ -619,9 +619,9 @@ private:
        xlnt_assert_equals(cell.value<long double>(), 3.141592);

        auto cell2 = ws.cell("A2");
-        cell2.value(std::string(100'000, 'a'));
+        cell2.value(std::string(100000, 'a'));
        cell.value(cell2);
-        xlnt_assert_equals(cell.value<std::string>(), std::string(32'767, 'a'));
+        xlnt_assert_equals(cell.value<std::string>(), std::string(32767, 'a'));
    }

    void test_reference()
--- a/tests/workbook/serialization_test_suite.hpp
+++ b/tests/workbook/serialization_test_suite.hpp
@ -31,6 +31,8 @@
 #include <helpers/test_suite.hpp>
 #include <helpers/path_helper.hpp>
 #include <helpers/xml_helper.hpp>
+#include <xlnt/workbook/streaming_workbook_reader.hpp>
+#include <xlnt/workbook/streaming_workbook_writer.hpp>
 #include <xlnt/workbook/workbook.hpp>

 class serialization_test_suite : public test_suite
@ -56,6 +58,8 @@ public:
        register_test(test_read_custom_properties);
        register_test(test_round_trip_rw);
        register_test(test_round_trip_rw_encrypted);
+        register_test(test_streaming_read);
+        //register_test(test_streaming_write);
    }

 	bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
@ -461,4 +465,42 @@ public:
            xlnt_assert(round_trip_matches_rw(path, password));
        }
    }
+
+    void test_streaming_read()
+    {
+        const auto path = path_helper::test_file("4_every_style.xlsx");
+        xlnt::streaming_workbook_reader reader;
+
+        reader.open(xlnt::path(path));
+
+        while (reader.has_worksheet())
+        {
+            reader.begin_worksheet();
+
+            while (reader.has_cell())
+            {
+                const auto cell = reader.read_cell();
+                //std::cout << cell.reference().to_string() << std::endl;
+            }
+
+            const auto ws = reader.end_worksheet();
+        }
+    }
+
+    void test_streaming_write()
+    {
+        const auto path = std::string("stream-out.xlsx");
+        xlnt::streaming_workbook_writer writer;
+
+        writer.open(path);
+
+        writer.add_sheet("stream");
+
+        auto b2 = writer.add_cell("B2");
+        b2.value("B2!");
+
+        auto c3 = writer.add_cell("C3");
+        b2.value("should not change");
+        c3.value("C3!");
+    }
 };
--- a/third-party/utfcpp/utf8.h
+++ b/third-party/utfcpp/utf8.h
@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
--- a/third-party/utfcpp/utf8/checked.h
+++ b/third-party/utfcpp/utf8/checked.h
@ -0,0 +1,327 @@
+// Copyright 2006-2016 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public ::std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        uint32_t cp;
+    public:
+        invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
+        virtual const char* what() const throw() { return "Invalid code point"; }
+        uint32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        uint8_t u8;
+    public:
+        invalid_utf8 (uint8_t u) : u8(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-8"; }
+        uint8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        uint16_t u16;
+    public:
+        invalid_utf16 (uint16_t u) : u16(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-16"; }
+        uint16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const throw() { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<uint8_t>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    throw not_enough_room();
+                case internal::INVALID_LEAD:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (start != end && utf8::internal::is_trail(*start))
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+        return utf8::replace_invalid(start, end, out, replacement_marker);
+    }
+
+    template <typename octet_iterator>
+    uint32_t next(octet_iterator& it, octet_iterator end)
+    {
+        uint32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(*it);
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    uint32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return utf8::peek_next(it, end);
+    }
+
+    /// Deprecated in versions that include "prior"
+    template <typename octet_iterator>
+    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+    {
+        octet_iterator end = it;
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == pass_start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        octet_iterator temp = it;
+        return utf8::next(temp, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        for (distance_type i = 0; i < n; ++i)
+            utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            utf8::next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+            if (utf8::internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (utf8::internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            result = utf8::append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start < end) {
+            uint32_t cp = utf8::next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<uint16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = utf8::append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start < end)
+            (*result++) = utf8::next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      iterator () {}
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& rangestart,
+                         const octet_iterator& rangeend) :
+               it(octet_it), range_start(rangestart), range_end(rangeend)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      uint32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return utf8::next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          utf8::next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          utf8::next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          utf8::prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          utf8::prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#endif //header guard
+
+
--- a/third-party/utfcpp/utf8/core.h
+++ b/third-party/utfcpp/utf8/core.h
@ -0,0 +1,332 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+namespace utf8
+{
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+    // You may need to change them to match your system.
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
+    typedef unsigned char   uint8_t;
+    typedef unsigned short  uint16_t;
+    typedef unsigned int    uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
+    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+
+    // Maximum valid value for a Unicode code point
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline uint8_t mask8(octet_type oc)
+    {
+        return static_cast<uint8_t>(0xff & oc);
+    }
+    template<typename u16_type>
+    inline uint16_t mask16(u16_type oc)
+    {
+        return static_cast<uint16_t>(0xffff & oc);
+    }
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+    }
+
+    template <typename u16>
+    inline bool is_lead_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_trail_surrogate(u16 cp)
+    {
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u32>
+    inline bool is_code_point_valid(u32 cp)
+    {
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+    }
+
+    template <typename octet_iterator>
+    inline typename std::iterator_traits<octet_iterator>::difference_type
+    sequence_length(octet_iterator lead_it)
+    {
+        uint8_t lead = utf8::internal::mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    template <typename octet_difference_type>
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+    {
+        if (cp < 0x80) {
+            if (length != 1) 
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2) 
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3) 
+                return true;
+        }
+
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// Helper for get_sequence_x
+    template <typename octet_iterator>
+    utf_error increase_safely(octet_iterator& it, octet_iterator end)
+    {
+        if (++it == end)
+            return NOT_ENOUGH_ROOM;
+
+        if (!utf8::internal::is_trail(*it))
+            return INCOMPLETE_SEQUENCE;
+        
+        return UTF8_OK;
+    }
+
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end) 
+            return NOT_ENOUGH_ROOM;
+        
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+            
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+           return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+	if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        uint32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+        const octet_difference_type length = utf8::internal::sequence_length(it);
+
+        // Get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 0: 
+                return INVALID_LEAD;
+            case 1:
+                err = utf8::internal::get_sequence_1(it, end, cp);
+                break;
+            case 2:
+                err = utf8::internal::get_sequence_2(it, end, cp);
+            break;
+            case 3:
+                err = utf8::internal::get_sequence_3(it, end, cp);
+            break;
+            case 4:
+                err = utf8::internal::get_sequence_4(it, end, cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (utf8::internal::is_code_point_valid(cp)) {
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else 
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        uint32_t ignored;
+        return utf8::internal::validate_next(it, end, ignored);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (utf8::find_invalid(start, end) == end);
+    }
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
+           );
+    }
+	
+    //Deprecated in release 2.3 
+    template <typename octet_iterator>
+    inline bool is_bom (octet_iterator it)
+    {
+        return (
+            (utf8::internal::mask8(*it++)) == bom[0] &&
+            (utf8::internal::mask8(*it++)) == bom[1] &&
+            (utf8::internal::mask8(*it))   == bom[2]
+           );
+    }
+} // namespace utf8
+
+#endif // header guard
+
+
--- a/third-party/utfcpp/utf8/unchecked.h
+++ b/third-party/utfcpp/utf8/unchecked.h
@ -0,0 +1,228 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked 
+    {
+        template <typename octet_iterator>
+        octet_iterator append(uint32_t cp, octet_iterator result)
+        {
+            if (cp < 0x80)                        // one octet
+                *(result++) = static_cast<uint8_t>(cp);  
+            else if (cp < 0x800) {                // two octets
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else if (cp < 0x10000) {              // three octets
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else {                                // four octets
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator>
+        uint32_t next(octet_iterator& it)
+        {
+            uint32_t cp = utf8::internal::mask8(*it);
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+            switch (length) {
+                case 1:
+                    break;
+                case 2:
+                    it++;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it; 
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp += (*it) & 0x3f;
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
+                    ++it;
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
+                    ++it;
+                    cp += (*it) & 0x3f; 
+                    break;
+            }
+            ++it;
+            return cp;        
+        }
+
+        template <typename octet_iterator>
+        uint32_t peek_next(octet_iterator it)
+        {
+            return utf8::unchecked::next(it);    
+        }
+
+        template <typename octet_iterator>
+        uint32_t prior(octet_iterator& it)
+        {
+            while (utf8::internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return utf8::unchecked::next(temp);
+        }
+
+        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
+        template <typename octet_iterator>
+        inline uint32_t previous(octet_iterator& it)
+        {
+            return utf8::unchecked::prior(it);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance (octet_iterator& it, distance_type n)
+        {
+            for (distance_type i = 0; i < n; ++i)
+                utf8::unchecked::next(it);
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance (octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist) 
+                utf8::unchecked::next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {       
+            while (start != end) {
+                uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+                if (utf8::internal::is_lead_surrogate(cp)) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = utf8::unchecked::append(cp, result);
+            }
+            return result;         
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                uint32_t cp = utf8::unchecked::next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<uint16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = utf8::unchecked::append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = utf8::unchecked::next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
+            octet_iterator it;
+            public:
+            iterator () {}
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            uint32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return utf8::unchecked::next(temp);
+            }
+            bool operator == (const iterator& rhs) const 
+            { 
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ () 
+            {
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return temp;
+            }  
+            iterator& operator -- ()
+            {
+                utf8::unchecked::prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                utf8::unchecked::prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8 
+
+
+#endif // header guard
+