xlnt/source/detail/excel_serializer.cpp

387 lines
12 KiB
C++
Raw Normal View History

2015-12-25 06:10:02 +08:00
// Copyright (c) 2014-2016 Thomas Fussell
// Copyright (c) 2010-2015 openpyxl
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE
//
// @license: http://www.opensource.org/licenses/mit-license.php
// @author: see AUTHORS file
2015-10-31 18:54:41 +08:00
#include <algorithm>
2015-10-31 18:52:59 +08:00
#include <iterator>
#include <pugixml.hpp>
2015-10-31 18:52:59 +08:00
#include <detail/constants.hpp>
#include <detail/excel_serializer.hpp>
#include <detail/manifest_serializer.hpp>
#include <detail/relationship_serializer.hpp>
#include <detail/shared_strings_serializer.hpp>
#include <detail/style_serializer.hpp>
#include <detail/stylesheet.hpp>
#include <detail/theme_serializer.hpp>
#include <detail/workbook_impl.hpp>
#include <detail/workbook_serializer.hpp>
#include <detail/worksheet_serializer.hpp>
#include <xlnt/cell/text.hpp>
2015-11-03 22:06:01 +08:00
#include <xlnt/packaging/document_properties.hpp>
#include <xlnt/packaging/manifest.hpp>
2016-05-15 01:57:07 +08:00
#include <xlnt/styles/format.hpp>
#include <xlnt/styles/style.hpp>
#include <xlnt/utils/exceptions.hpp>
2015-10-21 11:30:10 +08:00
#include <xlnt/workbook/workbook.hpp>
2016-01-25 00:15:49 +08:00
#include <xlnt/workbook/worksheet_iterator.hpp>
#include <xlnt/worksheet/range_iterator.hpp>
2015-10-30 07:37:07 +08:00
#include <xlnt/worksheet/worksheet.hpp>
2015-10-21 11:30:10 +08:00
namespace {
std::string::size_type find_string_in_string(const std::string &string, const std::string &substring)
2015-10-21 11:30:10 +08:00
{
std::string::size_type possible_match_index = string.find(substring.at(0));
while (possible_match_index != std::string::npos)
2015-10-21 11:30:10 +08:00
{
if (string.substr(possible_match_index, substring.size()) == substring)
2015-10-21 11:30:10 +08:00
{
return possible_match_index;
}
2015-10-21 11:30:10 +08:00
possible_match_index = string.find(substring.at(0), possible_match_index + 1);
}
2015-10-21 11:30:10 +08:00
return possible_match_index;
}
2016-07-18 02:59:11 +08:00
bool load_workbook(xlnt::zip_file &archive, bool guess_types, bool data_only, xlnt::workbook &wb, xlnt::detail::stylesheet &stylesheet)
2015-10-24 02:42:36 +08:00
{
wb.set_guess_types(guess_types);
wb.set_data_only(data_only);
2015-11-03 05:45:05 +08:00
if(!archive.has_file(xlnt::constants::ArcContentTypes()))
2015-11-02 12:52:19 +08:00
{
throw xlnt::invalid_file_exception("missing [Content Types].xml");
}
2015-10-30 07:37:07 +08:00
xlnt::manifest_serializer ms(wb.get_manifest());
pugi::xml_document manifest_xml;
manifest_xml.load(archive.read(xlnt::constants::ArcContentTypes()).c_str());
2015-11-03 03:22:13 +08:00
ms.read_manifest(manifest_xml);
if (ms.determine_document_type() != "excel")
2015-10-24 02:42:36 +08:00
{
2015-11-02 12:52:19 +08:00
throw xlnt::invalid_file_exception("package is not an OOXML SpreadsheetML");
2015-10-24 02:42:36 +08:00
}
2015-10-24 02:42:36 +08:00
wb.clear();
2015-11-02 12:52:19 +08:00
2015-11-03 05:45:05 +08:00
if(archive.has_file(xlnt::constants::ArcCore()))
2015-11-02 12:52:19 +08:00
{
xlnt::workbook_serializer workbook_serializer_(wb);
pugi::xml_document core_properties_xml;
core_properties_xml.load(archive.read(xlnt::constants::ArcCore()).c_str());
2015-11-03 03:22:13 +08:00
workbook_serializer_.read_properties_core(core_properties_xml);
2015-11-02 12:52:19 +08:00
}
2015-11-04 07:26:33 +08:00
if(archive.has_file(xlnt::constants::ArcApp()))
{
xlnt::workbook_serializer workbook_serializer_(wb);
pugi::xml_document app_properties_xml;
app_properties_xml.load(archive.read(xlnt::constants::ArcApp()).c_str());
workbook_serializer_.read_properties_app(app_properties_xml);
}
2015-11-04 07:26:33 +08:00
xlnt::relationship_serializer relationship_serializer_(archive);
auto root_relationships = relationship_serializer_.read_relationships("");
for (const auto &relationship : root_relationships)
{
wb.create_root_relationship(relationship.get_id(), relationship.get_target_uri(), relationship.get_type());
}
2015-11-04 07:26:33 +08:00
auto workbook_relationships = relationship_serializer_.read_relationships(xlnt::constants::ArcWorkbook());
for (const auto &relationship : workbook_relationships)
2015-10-24 02:42:36 +08:00
{
wb.create_relationship(relationship.get_id(), relationship.get_target_uri(), relationship.get_type());
}
pugi::xml_document xml;
xml.load(archive.read(xlnt::constants::ArcWorkbook()).c_str());
auto root_node = xml.child("workbook");
auto workbook_pr_node = root_node.child("workbookPr");
wb.get_properties().excel_base_date =
(workbook_pr_node.attribute("date1904") && workbook_pr_node.attribute("date1904").value() != std::string("0"))
? xlnt::calendar::mac_1904
: xlnt::calendar::windows_1900;
2015-11-03 05:45:05 +08:00
if(archive.has_file(xlnt::constants::ArcSharedString()))
2015-10-31 06:54:04 +08:00
{
std::vector<xlnt::text> shared_strings;
pugi::xml_document shared_strings_xml;
shared_strings_xml.load(archive.read(xlnt::constants::ArcSharedString()).c_str());
2015-11-03 06:25:10 +08:00
xlnt::shared_strings_serializer::read_shared_strings(shared_strings_xml, shared_strings);
2015-11-02 12:52:19 +08:00
for (auto &shared_string : shared_strings)
2015-11-02 12:52:19 +08:00
{
wb.add_shared_string(shared_string, true);
2015-11-02 12:52:19 +08:00
}
2015-10-31 06:54:04 +08:00
}
2016-06-11 01:40:50 +08:00
xlnt::style_serializer style_serializer(stylesheet);
pugi::xml_document style_xml;
style_xml.load(archive.read(xlnt::constants::ArcStyles()).c_str());
2016-06-11 01:40:50 +08:00
style_serializer.read_stylesheet(style_xml);
auto sheets_node = root_node.child("sheets");
for (auto sheet_node : sheets_node.children())
2015-10-24 02:42:36 +08:00
{
auto rel = wb.get_relationship(sheet_node.attribute("r:id").value());
auto ws = wb.create_sheet(sheet_node.attribute("name").value(), rel);
2015-11-03 05:45:05 +08:00
2015-11-02 12:52:19 +08:00
//TODO: this is really bad
auto ws_filename = (rel.get_target_uri().substr(0, 3) != "xl/" ? "xl/" : "") + rel.get_target_uri();
auto sheet_type = wb.get_manifest().get_override_type(ws_filename);
2015-11-03 05:45:05 +08:00
if(rel.get_type() != xlnt::relationship::type::worksheet)
{
continue;
}
2015-11-02 12:52:19 +08:00
2015-10-30 07:37:07 +08:00
xlnt::worksheet_serializer worksheet_serializer(ws);
pugi::xml_document worksheet_xml;
worksheet_xml.load(archive.read(ws_filename).c_str());
2016-06-11 01:40:50 +08:00
worksheet_serializer.read_worksheet(worksheet_xml, stylesheet);
2015-10-24 02:42:36 +08:00
}
if (archive.has_file("docProps/thumbnail.jpeg"))
{
auto thumbnail_data = archive.read("docProps/thumbnail.jpeg");
wb.set_thumbnail(std::vector<std::uint8_t>(thumbnail_data.begin(), thumbnail_data.end()));
}
2015-10-30 07:37:07 +08:00
return true;
2015-10-24 02:42:36 +08:00
}
2015-10-21 11:30:10 +08:00
} // namespace
namespace xlnt {
const std::string excel_serializer::central_directory_signature()
2015-10-21 11:30:10 +08:00
{
return "\x50\x4b\x05\x06";
}
std::string excel_serializer::repair_central_directory(const std::string &original)
2015-10-21 11:30:10 +08:00
{
2015-10-30 07:37:07 +08:00
auto pos = find_string_in_string(original, central_directory_signature());
if (pos != std::string::npos)
2015-10-21 11:30:10 +08:00
{
return original.substr(0, pos + 22);
}
2015-10-21 11:30:10 +08:00
return original;
}
2015-10-30 07:37:07 +08:00
bool excel_serializer::load_stream_workbook(std::istream &stream, bool guess_types, bool data_only)
2015-10-21 11:30:10 +08:00
{
2015-11-02 12:52:19 +08:00
std::vector<std::uint8_t> bytes;
2015-11-03 05:45:05 +08:00
//TODO: inefficient?
2015-11-02 12:52:19 +08:00
while (stream.good())
{
2015-11-03 05:45:05 +08:00
bytes.push_back(static_cast<std::uint8_t>(stream.get()));
2015-11-02 12:52:19 +08:00
}
2015-10-30 07:37:07 +08:00
return load_virtual_workbook(bytes, guess_types, data_only);
2015-10-21 11:30:10 +08:00
}
bool excel_serializer::load_workbook(const std::string &filename, bool guess_types, bool data_only)
2015-10-21 11:30:10 +08:00
{
2015-10-24 02:42:36 +08:00
try
{
2015-10-30 07:37:07 +08:00
archive_.load(filename);
2015-10-24 02:42:36 +08:00
}
catch (std::runtime_error)
2015-10-24 02:42:36 +08:00
{
throw invalid_file_exception(filename);
}
2016-07-18 02:59:11 +08:00
return ::load_workbook(archive_, guess_types, data_only, workbook_, get_stylesheet());
2015-10-24 02:42:36 +08:00
}
2015-10-21 11:30:10 +08:00
2015-10-30 07:37:07 +08:00
bool excel_serializer::load_virtual_workbook(const std::vector<std::uint8_t> &bytes, bool guess_types, bool data_only)
2015-10-24 02:42:36 +08:00
{
2015-10-30 07:37:07 +08:00
archive_.load(bytes);
2016-07-18 02:59:11 +08:00
return ::load_workbook(archive_, guess_types, data_only, workbook_, get_stylesheet());
2015-10-21 11:30:10 +08:00
}
2015-10-31 06:54:04 +08:00
excel_serializer::excel_serializer(workbook &wb) : workbook_(wb)
2015-10-30 01:46:56 +08:00
{
}
2015-10-31 06:54:04 +08:00
void excel_serializer::write_data(bool /*as_template*/)
2015-10-30 01:46:56 +08:00
{
2015-11-04 07:26:33 +08:00
relationship_serializer relationship_serializer_(archive_);
relationship_serializer_.write_relationships(workbook_.get_root_relationships(), "");
relationship_serializer_.write_relationships(workbook_.get_relationships(), constants::ArcWorkbook());
pugi::xml_document properties_app_xml;
2015-10-31 06:54:04 +08:00
workbook_serializer workbook_serializer_(workbook_);
workbook_serializer_.write_properties_app(properties_app_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
properties_app_xml.save(ss);
archive_.writestr(constants::ArcApp(), ss.str());
}
pugi::xml_document properties_core_xml;
workbook_serializer_.write_properties_core(properties_core_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
properties_core_xml.save(ss);
archive_.writestr(constants::ArcCore(), ss.str());
}
pugi::xml_document theme_xml;
2015-10-30 07:37:07 +08:00
theme_serializer theme_serializer_;
theme_serializer_.write_theme(workbook_.get_loaded_theme(), theme_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
theme_xml.save(ss);
archive_.writestr(constants::ArcTheme(), ss.str());
}
if (!workbook_.get_shared_strings().empty())
{
const auto &strings = workbook_.get_shared_strings();
pugi::xml_document shared_strings_xml;
2016-07-15 10:05:39 +08:00
shared_strings_serializer::write_shared_strings(strings, shared_strings_xml);
2016-07-15 10:05:39 +08:00
std::ostringstream ss;
shared_strings_xml.save(ss);
archive_.writestr(constants::ArcSharedString(), ss.str());
}
pugi::xml_document workbook_xml;
workbook_serializer_.write_workbook(workbook_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
workbook_xml.save(ss);
archive_.writestr(constants::ArcWorkbook(), ss.str());
}
2016-06-11 01:40:50 +08:00
style_serializer style_serializer(workbook_.d_->stylesheet_);
pugi::xml_document style_xml;
2016-06-11 01:40:50 +08:00
style_serializer.write_stylesheet(style_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
style_xml.save(ss);
archive_.writestr(constants::ArcStyles(), ss.str());
}
2015-10-31 06:54:04 +08:00
manifest_serializer manifest_serializer_(workbook_.get_manifest());
pugi::xml_document manifest_xml;
manifest_serializer_.write_manifest(manifest_xml);
2016-07-15 10:05:39 +08:00
{
std::ostringstream ss;
manifest_xml.save(ss);
archive_.writestr(constants::ArcContentTypes(), ss.str());
}
2015-10-30 07:37:07 +08:00
write_worksheets();
2016-07-15 10:05:39 +08:00
if(!workbook_.get_thumbnail().empty())
{
const auto &thumbnail = workbook_.get_thumbnail();
archive_.writestr("docProps/thumbnail.jpeg", std::string(thumbnail.begin(), thumbnail.end()));
}
2015-10-30 01:46:56 +08:00
}
2015-10-30 07:37:07 +08:00
void excel_serializer::write_worksheets()
2015-10-30 01:46:56 +08:00
{
std::size_t index = 0;
for (auto ws : workbook_)
2015-10-30 01:46:56 +08:00
{
for (auto relationship : workbook_.get_relationships())
2015-10-30 01:46:56 +08:00
{
if (relationship.get_type() == relationship::type::worksheet &&
workbook::index_from_ws_filename(relationship.get_target_uri()) == index)
2015-10-30 01:46:56 +08:00
{
2015-10-30 07:37:07 +08:00
worksheet_serializer serializer_(ws);
std::string ws_filename = (relationship.get_target_uri().substr(0, 3) != "xl/" ? "xl/" : "") + relationship.get_target_uri();
std::ostringstream ss;
pugi::xml_document worksheet_xml;
serializer_.write_worksheet(worksheet_xml);
worksheet_xml.save(ss);
archive_.writestr(ws_filename, ss.str());
2015-10-30 01:46:56 +08:00
break;
}
}
2015-10-30 01:46:56 +08:00
index++;
}
}
2015-10-30 07:37:07 +08:00
void excel_serializer::write_external_links()
{
}
2015-10-30 07:37:07 +08:00
bool excel_serializer::save_stream_workbook(std::ostream &stream, bool as_template)
2015-10-30 01:46:56 +08:00
{
2015-10-30 07:37:07 +08:00
write_data(as_template);
archive_.save(stream);
2015-10-30 07:37:07 +08:00
return true;
2015-10-30 01:46:56 +08:00
}
bool excel_serializer::save_workbook(const std::string &filename, bool as_template)
2015-10-30 01:46:56 +08:00
{
2015-10-30 07:37:07 +08:00
write_data(as_template);
archive_.save(filename);
2015-10-30 01:46:56 +08:00
return true;
}
2015-10-30 07:37:07 +08:00
bool excel_serializer::save_virtual_workbook(std::vector<std::uint8_t> &bytes, bool as_template)
2015-10-30 01:46:56 +08:00
{
2015-10-30 07:37:07 +08:00
write_data(as_template);
archive_.save(bytes);
2015-10-30 07:37:07 +08:00
return true;
2015-10-30 01:46:56 +08:00
}
2016-06-11 01:40:50 +08:00
detail::stylesheet &excel_serializer::get_stylesheet()
{
return workbook_.d_->stylesheet_;
}
2015-10-21 11:30:10 +08:00
} // namespace xlnt