start working on streaming write

This commit is contained in:
Thomas Fussell 2017-07-04 17:52:46 -07:00
parent 16bfbf78f5
commit f97ad6c1bd
12 changed files with 332 additions and 95 deletions

View File

@ -30,9 +30,29 @@ void xlsx2arrow(std::istream &s, ::arrow::Table &table)
reader.end_worksheet(); reader.end_worksheet();
} }
void arrow2xlsx(const ::arrow::Table &table, std::istream &s) void arrow2xlsx(const ::arrow::Table &table, std::ostream &s)
{ {
xlnt::streaming_workbook_writer writer;
writer.open(s);
writer.begin_worksheet();
while (reader.has_cell())
{
auto cell = reader.read_cell();
if (first_row < 1)
{
first_row = cell.row();
}
if (cell.reference().row() % 1000 == 1)
{
std::cout << cell.reference().to_string() << std::endl;
}
}
reader.end_worksheet();
} }
} }

View File

@ -6,7 +6,7 @@ namespace xlnt {
namespace arrow { namespace arrow {
void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table); void XLNT_API xlsx2arrow(std::istream &s, ::arrow::Table &table);
void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::istream &s); void XLNT_API arrow2xlsx(const ::arrow::Table &table, std::ostream &s);
} }
} }

View File

@ -28,14 +28,23 @@
#include <xlnt/xlnt_config.hpp> #include <xlnt/xlnt_config.hpp>
namespace xml {
class serializer;
}
namespace xlnt { namespace xlnt {
namespace detail {
class xlsx_producer;
} // namespace detail
/// <summary> /// <summary>
/// workbook is the container for all other parts of the document. /// workbook is the container for all other parts of the document.
/// </summary> /// </summary>
class XLNT_API streaming_workbook_writer class XLNT_API streaming_workbook_writer
{ {
public: public:
streaming_workbook_writer();
~streaming_workbook_writer(); ~streaming_workbook_writer();
/// <summary> /// <summary>
@ -56,38 +65,46 @@ public:
/// Ends writing of data to the current sheet and begins writing a new sheet /// Ends writing of data to the current sheet and begins writing a new sheet
/// with the given title. /// with the given title.
/// </summary> /// </summary>
worksheet add_sheet(const std::string &title); worksheet add_worksheet(const std::string &title);
/// <summary> /// <summary>
/// Serializes the workbook into an XLSX file and saves the bytes into /// Serializes the workbook into an XLSX file and saves the bytes into
/// byte vector data. /// byte vector data.
/// </summary> /// </summary>
void open(std::vector<std::uint8_t> &data) const; void open(std::vector<std::uint8_t> &data);
/// <summary> /// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file /// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename. /// named filename.
/// </summary> /// </summary>
void open(const std::string &filename) const; void open(const std::string &filename);
#ifdef _MSC_VER #ifdef _MSC_VER
/// <summary> /// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file /// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename. /// named filename.
/// </summary> /// </summary>
void open(const std::wstring &filename) const; void open(const std::wstring &filename);
#endif #endif
/// <summary> /// <summary>
/// Serializes the workbook into an XLSX file and saves the data into a file /// Serializes the workbook into an XLSX file and saves the data into a file
/// named filename. /// named filename.
/// </summary> /// </summary>
void open(const xlnt::path &filename) const; void open(const xlnt::path &filename);
/// <summary> /// <summary>
/// Serializes the workbook into an XLSX file and saves the data into stream. /// Serializes the workbook into an XLSX file and saves the data into stream.
/// </summary> /// </summary>
void open(std::ostream &stream) const; void open(std::ostream &stream);
std::unique_ptr<xlnt::detail::xlsx_producer> producer_;
std::unique_ptr<workbook> workbook_;
std::unique_ptr<std::ostream> stream_;
std::unique_ptr<std::streambuf> stream_buffer_;
std::unique_ptr<std::ostream> part_stream_;
std::unique_ptr<std::streambuf> part_stream_buffer_;
std::unique_ptr<xml::serializer> serializer_;
}; };
} // namespace xlnt } // namespace xlnt

View File

@ -32,6 +32,7 @@
#include <detail/external/include_libstudxml.hpp> #include <detail/external/include_libstudxml.hpp>
#include <detail/serialization/vector_streambuf.hpp> #include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_producer.hpp> #include <detail/serialization/xlsx_producer.hpp>
#include <detail/serialization/zstream.hpp>
#include <xlnt/utils/exceptions.hpp> #include <xlnt/utils/exceptions.hpp>
namespace { namespace {
@ -314,6 +315,7 @@ void xlsx_producer::write(std::ostream &destination, const std::string &password
vector_ostreambuf plaintext_buffer(plaintext); vector_ostreambuf plaintext_buffer(plaintext);
std::ostream decrypted_stream(&plaintext_buffer); std::ostream decrypted_stream(&plaintext_buffer);
write(decrypted_stream); write(decrypted_stream);
archive_.reset();
const auto ciphertext = ::encrypt_xlsx(plaintext, utf8_to_utf16(password)); const auto ciphertext = ::encrypt_xlsx(plaintext, utf8_to_utf16(password));
vector_istreambuf encrypted_buffer(ciphertext); vector_istreambuf encrypted_buffer(ciphertext);

View File

@ -0,0 +1,63 @@
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#include <detail/serialization/open_stream.hpp>
#include <xlnt/utils/path.hpp>
namespace xlnt {
namespace detail {
#ifdef _MSC_VER
void open_stream(std::ifstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ifstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
void open_stream(std::ofstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
#else
void open_stream(std::ifstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
#endif
} // namespace detail
} // namespace xlnt

View File

@ -0,0 +1,48 @@
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#pragma once
#include <fstream>
#include <iostream>
#include <string>
namespace xlnt {
namespace detail {
#ifdef _MSC_VER
void open_stream(std::ifstream &stream, const std::wstring &path);
void open_stream(std::ofstream &stream, const std::wstring &path);
void open_stream(std::ifstream &stream, const std::string &path);
void open_stream(std::ofstream &stream, const std::string &path);
#else
void open_stream(std::ifstream &stream, const std::string &path);
void open_stream(std::ofstream &stream, const std::string &path);
#endif
} // namespace detail
} // namespace xlnt

View File

@ -90,17 +90,40 @@ xlsx_producer::xlsx_producer(const workbook &target)
{ {
} }
xlsx_producer::~xlsx_producer()
{
end_part();
archive_.reset();
}
void xlsx_producer::write(std::ostream &destination) void xlsx_producer::write(std::ostream &destination)
{ {
ozstream archive(destination); archive_.reset(new ozstream(destination));
archive_ = &archive; populate_archive(false);
populate_archive(); }
void xlsx_producer::open(std::ostream &destination)
{
archive_.reset(new ozstream(destination));
populate_archive(true);
}
cell xlsx_producer::add_cell(const cell_reference &ref)
{
return cell(current_cell_);
}
worksheet xlsx_producer::add_worksheet(const std::string &title)
{
return worksheet(current_worksheet_);
} }
// Part Writing Methods // Part Writing Methods
void xlsx_producer::populate_archive() void xlsx_producer::populate_archive(bool streaming)
{ {
streaming_ = streaming;
write_content_types(); write_content_types();
const auto root_rels = source_.manifest().relationships(path("/")); const auto root_rels = source_.manifest().relationships(path("/"));

View File

@ -38,12 +38,14 @@ class serializer;
namespace xlnt { namespace xlnt {
class border; class border;
class cell;
class cell_reference; class cell_reference;
class color; class color;
class fill; class fill;
class font; class font;
class path; class path;
class relationship; class relationship;
class streaming_workbook_writer;
class variant; class variant;
class workbook; class workbook;
class worksheet; class worksheet;
@ -51,6 +53,8 @@ class worksheet;
namespace detail { namespace detail {
class ozstream; class ozstream;
struct cell_impl;
struct worksheet_impl;
/// <summary> /// <summary>
/// Handles writing a workbook into an XLSX file. /// Handles writing a workbook into an XLSX file.
@ -60,16 +64,26 @@ class xlsx_producer
public: public:
xlsx_producer(const workbook &target); xlsx_producer(const workbook &target);
~xlsx_producer();
void write(std::ostream &destination); void write(std::ostream &destination);
void write(std::ostream &destination, const std::string &password); void write(std::ostream &destination, const std::string &password);
private: private:
friend class xlnt::streaming_workbook_writer;
void open(std::ostream &destination);
cell add_cell(const cell_reference &ref);
worksheet add_worksheet(const std::string &title);
/// <summary> /// <summary>
/// Write all files needed to create a valid XLSX file which represents all /// Write all files needed to create a valid XLSX file which represents all
/// data contained in workbook. /// data contained in workbook.
/// </summary> /// </summary>
void populate_archive(); void populate_archive(bool streaming);
void begin_part(const path &part); void begin_part(const path &part);
void end_part(); void end_part();
@ -179,10 +193,18 @@ private:
/// </summary> /// </summary>
const workbook &source_; const workbook &source_;
ozstream *archive_; std::unique_ptr<ozstream> archive_;
std::unique_ptr<xml::serializer> current_part_serializer_; std::unique_ptr<xml::serializer> current_part_serializer_;
std::unique_ptr<std::streambuf> current_part_streambuf_; std::unique_ptr<std::streambuf> current_part_streambuf_;
std::ostream current_part_stream_; std::ostream current_part_stream_;
bool streaming_ = false;
std::unique_ptr<detail::cell_impl> streaming_cell_;
detail::cell_impl *current_cell_;
detail::worksheet_impl *current_worksheet_;
}; };
} // namespace detail } // namespace detail

View File

@ -23,6 +23,7 @@
#include <fstream> #include <fstream>
#include <detail/serialization/open_stream.hpp>
#include <detail/serialization/vector_streambuf.hpp> #include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_consumer.hpp> #include <detail/serialization/xlsx_consumer.hpp>
#include <xlnt/cell/cell.hpp> #include <xlnt/cell/cell.hpp>
@ -32,45 +33,6 @@
#include <xlnt/workbook/workbook.hpp> #include <xlnt/workbook/workbook.hpp>
#include <xlnt/worksheet/worksheet.hpp> #include <xlnt/worksheet/worksheet.hpp>
namespace {
//TODO: (important) this is duplicated from workbook.cpp, find a common place to keep it
#ifdef _MSC_VER
void open_stream(std::ifstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ifstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
void open_stream(std::ofstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
#else
void open_stream(std::ifstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
#endif
} // namespace
namespace xlnt { namespace xlnt {
streaming_workbook_reader::streaming_workbook_reader() streaming_workbook_reader::streaming_workbook_reader()
@ -145,7 +107,7 @@ void streaming_workbook_reader::open(const std::vector<std::uint8_t> &data)
void streaming_workbook_reader::open(const std::string &filename) void streaming_workbook_reader::open(const std::string &filename)
{ {
stream_.reset(new std::ifstream()); stream_.reset(new std::ifstream());
open_stream((std::ifstream &)stream_, filename); xlnt::detail::open_stream((std::ifstream &)stream_, filename);
open(*stream_); open(*stream_);
} }
@ -153,7 +115,7 @@ void streaming_workbook_reader::open(const std::string &filename)
void streaming_workbook_reader::open(const std::wstring &filename) void streaming_workbook_reader::open(const std::wstring &filename)
{ {
stream_.reset(new std::ifstream()); stream_.reset(new std::ifstream());
open_stream((std::ifstream &)*stream_, filename); xlnt::detail::open_stream((std::ifstream &)*stream_, filename);
open(*stream_); open(*stream_);
} }
#endif #endif
@ -161,7 +123,7 @@ void streaming_workbook_reader::open(const std::wstring &filename)
void streaming_workbook_reader::open(const xlnt::path &filename) void streaming_workbook_reader::open(const xlnt::path &filename)
{ {
stream_.reset(new std::ifstream()); stream_.reset(new std::ifstream());
open_stream((std::ifstream &)*stream_, filename.string()); xlnt::detail::open_stream((std::ifstream &)*stream_, filename.string());
open(*stream_); open(*stream_);
} }

View File

@ -0,0 +1,109 @@
// Copyright (c) 2017 Thomas Fussell
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
#include <fstream>
#include <detail/implementations/cell_impl.hpp>
#include <detail/implementations/worksheet_impl.hpp>
#include <detail/serialization/open_stream.hpp>
#include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/xlsx_producer.hpp>
#include <xlnt/cell/cell.hpp>
#include <xlnt/packaging/manifest.hpp>
#include <xlnt/utils/optional.hpp>
#include <xlnt/workbook/streaming_workbook_writer.hpp>
#include <xlnt/workbook/workbook.hpp>
#include <xlnt/worksheet/worksheet.hpp>
namespace xlnt {
streaming_workbook_writer::streaming_workbook_writer()
{
}
streaming_workbook_writer::~streaming_workbook_writer()
{
close();
}
void streaming_workbook_writer::close()
{
if (producer_)
{
producer_.reset(nullptr);
stream_buffer_.reset(nullptr);
}
}
cell streaming_workbook_writer::add_cell(const cell_reference &ref)
{
return producer_->add_cell(ref);
}
worksheet streaming_workbook_writer::add_worksheet(const std::string &title)
{
return producer_->add_worksheet(title);
}
void streaming_workbook_writer::open(std::vector<std::uint8_t> &data)
{
stream_buffer_.reset(new detail::vector_ostreambuf(data));
stream_.reset(new std::ostream(stream_buffer_.get()));
open(*stream_);
}
void streaming_workbook_writer::open(const std::string &filename)
{
stream_.reset(new std::ofstream());
xlnt::detail::open_stream(static_cast<std::ofstream &>(*stream_), filename);
open(*stream_);
}
#ifdef _MSC_VER
void streaming_workbook_writer::open(const std::wstring &filename)
{
stream_.reset(new std::ofstream());
xlnt::detail::open_stream(static_cast<std::ofstream &>(*stream_), filename);
open(*stream_);
}
#endif
void streaming_workbook_writer::open(const xlnt::path &filename)
{
stream_.reset(new std::ofstream());
xlnt::detail::open_stream(static_cast<std::ofstream &>(*stream_), filename.string());
open(*stream_);
}
void streaming_workbook_writer::open(std::ostream &stream)
{
workbook_.reset(new workbook());
producer_.reset(new detail::xlsx_producer(*workbook_));
producer_->open(stream);
producer_->current_worksheet_ = new detail::worksheet_impl(workbook_.get(), 1, "Sheet1");
producer_->current_cell_ = new detail::cell_impl();
producer_->current_cell_->parent_ = producer_->current_worksheet_;
}
} // namespace xlnt

View File

@ -35,6 +35,7 @@
#include <detail/implementations/worksheet_impl.hpp> #include <detail/implementations/worksheet_impl.hpp>
#include <detail/serialization/excel_thumbnail.hpp> #include <detail/serialization/excel_thumbnail.hpp>
#include <detail/serialization/vector_streambuf.hpp> #include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/open_stream.hpp>
#include <detail/serialization/xlsx_consumer.hpp> #include <detail/serialization/xlsx_consumer.hpp>
#include <detail/serialization/xlsx_producer.hpp> #include <detail/serialization/xlsx_producer.hpp>
#include <xlnt/cell/cell.hpp> #include <xlnt/cell/cell.hpp>
@ -63,37 +64,7 @@
namespace { namespace {
#ifdef _MSC_VER using xlnt::detail::open_stream;
void open_stream(std::ifstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::wstring &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ifstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
void open_stream(std::ofstream &stream, const std::string &path)
{
open_stream(stream, xlnt::path(path).wstring());
}
#else
void open_stream(std::ifstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
void open_stream(std::ofstream &stream, const std::string &path)
{
stream.open(path, std::ios::binary);
}
#endif
template<typename T> template<typename T>
std::vector<T> keys(const std::vector<std::pair<T, xlnt::variant>> &container) std::vector<T> keys(const std::vector<std::pair<T, xlnt::variant>> &container)

View File

@ -59,7 +59,7 @@ public:
register_test(test_round_trip_rw); register_test(test_round_trip_rw);
register_test(test_round_trip_rw_encrypted); register_test(test_round_trip_rw_encrypted);
register_test(test_streaming_read); register_test(test_streaming_read);
//register_test(test_streaming_write); register_test(test_streaming_write);
} }
bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file) bool workbook_matches_file(xlnt::workbook &wb, const xlnt::path &file)
@ -494,7 +494,7 @@ public:
writer.open(path); writer.open(path);
writer.add_sheet("stream"); writer.add_worksheet("stream");
auto b2 = writer.add_cell("B2"); auto b2 = writer.add_cell("B2");
b2.value("B2!"); b2.value("B2!");