2017-01-02 20:35:18 -05:00
|
|
|
// Copyright (c) 2014-2017 Thomas Fussell
|
2016-08-05 01:52:05 -04:00
|
|
|
// Copyright (c) 2010-2015 openpyxl
|
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
// THE SOFTWARE
|
|
|
|
//
|
|
|
|
// @license: http://www.opensource.org/licenses/mit-license.php
|
|
|
|
// @author: see AUTHORS file
|
2017-04-20 14:03:03 -04:00
|
|
|
|
2016-08-05 01:52:05 -04:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <cstdint>
|
2016-11-19 17:03:10 -05:00
|
|
|
#include <functional>
|
2016-08-05 01:52:05 -04:00
|
|
|
#include <iostream>
|
2016-10-30 15:48:40 -04:00
|
|
|
#include <memory>
|
2017-06-15 18:10:27 -04:00
|
|
|
#include <string>
|
2016-08-05 01:52:05 -04:00
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
|
|
|
|
2017-04-20 14:03:03 -04:00
|
|
|
#include <detail/external/include_libstudxml.hpp>
|
|
|
|
#include <detail/serialization/zstream.hpp>
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
namespace xlnt {
|
|
|
|
|
2017-06-15 18:10:27 -04:00
|
|
|
class cell;
|
2016-11-30 00:31:06 +01:00
|
|
|
class color;
|
2016-12-23 06:51:30 -05:00
|
|
|
class rich_text;
|
2016-12-02 14:37:50 +01:00
|
|
|
class manifest;
|
2017-06-15 18:10:27 -04:00
|
|
|
template<typename T>
|
|
|
|
class optional;
|
2016-08-05 01:52:05 -04:00
|
|
|
class path;
|
|
|
|
class relationship;
|
2017-06-17 10:53:37 -04:00
|
|
|
class streaming_workbook_reader;
|
2017-04-11 12:02:35 -04:00
|
|
|
class variant;
|
2016-08-05 01:52:05 -04:00
|
|
|
class workbook;
|
2016-10-29 10:23:04 -04:00
|
|
|
class worksheet;
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
namespace detail {
|
|
|
|
|
2017-01-04 19:02:31 -05:00
|
|
|
class izstream;
|
2017-06-17 10:53:37 -04:00
|
|
|
struct cell_impl;
|
|
|
|
struct worksheet_impl;
|
2016-10-31 20:48:43 -04:00
|
|
|
|
2016-08-05 01:52:05 -04:00
|
|
|
/// <summary>
|
|
|
|
/// Handles writing a workbook into an XLSX file.
|
|
|
|
/// </summary>
|
2016-10-30 15:48:40 -04:00
|
|
|
class xlsx_consumer
|
2016-08-05 01:52:05 -04:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
xlsx_consumer(workbook &destination);
|
|
|
|
|
2017-07-02 18:54:32 -07:00
|
|
|
~xlsx_consumer();
|
2017-06-25 09:29:45 -04:00
|
|
|
|
2017-06-17 10:53:37 -04:00
|
|
|
void read(std::istream &source);
|
|
|
|
|
|
|
|
void read(std::istream &source, const std::string &password);
|
|
|
|
|
|
|
|
private:
|
2017-07-02 18:54:32 -07:00
|
|
|
friend class xlnt::streaming_workbook_reader;
|
2017-06-17 10:53:37 -04:00
|
|
|
|
2017-06-15 18:10:27 -04:00
|
|
|
void open(std::istream &source);
|
|
|
|
|
2017-06-17 10:53:37 -04:00
|
|
|
bool has_cell();
|
|
|
|
|
2017-06-15 18:10:27 -04:00
|
|
|
/// <summary>
|
|
|
|
/// Reads the next cell in the current worksheet and optionally returns it if
|
|
|
|
/// the last cell in the sheet has not yet been read. An exception will be thrown
|
|
|
|
/// if this is not open as a streaming consumer.
|
|
|
|
/// </summary>
|
|
|
|
cell read_cell();
|
|
|
|
|
2016-08-05 01:52:05 -04:00
|
|
|
/// <summary>
|
|
|
|
/// Read all the files needed from the XLSX archive and initialize all of
|
|
|
|
/// the data in the workbook to match.
|
|
|
|
/// </summary>
|
2017-06-15 18:10:27 -04:00
|
|
|
void populate_workbook(bool streaming);
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
///
|
|
|
|
/// </summary>
|
|
|
|
void read_content_types();
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2017-01-16 13:05:19 -05:00
|
|
|
// Metadata Property Readers
|
2016-10-11 23:16:14 -04:00
|
|
|
|
|
|
|
/// <summary>
|
2016-12-02 14:37:50 +01:00
|
|
|
/// Parse the core properties about the current package.
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
2017-01-16 13:05:19 -05:00
|
|
|
void read_core_properties();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Parse the core properties about the current package.
|
|
|
|
/// </summary>
|
|
|
|
void read_extended_properties();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Parse the core properties about the current package.
|
|
|
|
/// </summary>
|
|
|
|
void read_custom_properties();
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
// SpreadsheetML-Specific Package Part Readers
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
|
|
|
/// Parse the main XML document about the workbook and then all child relationships
|
|
|
|
/// of the workbook (e.g. worksheets).
|
|
|
|
/// </summary>
|
2017-06-25 09:29:45 -04:00
|
|
|
void read_office_document(const std::string &content_type);
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
// Workbook Relationship Target Parts
|
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
|
|
|
/// xl/calcChain.xml
|
|
|
|
/// </summary>
|
|
|
|
void read_calculation_chain();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_connections();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_custom_property();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_custom_xml_mappings();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_external_workbook_references();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_pivot_table();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sharedStrings.xml
|
|
|
|
/// </summary>
|
|
|
|
void read_shared_string_table();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_shared_workbook_revision_headers();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_shared_workbook();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_shared_workbook_user_data();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/styles.xml
|
|
|
|
/// </summary>
|
|
|
|
void read_stylesheet();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/theme/theme1.xml
|
|
|
|
/// </summary>
|
|
|
|
void read_theme();
|
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_volatile_dependencies();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
2017-06-21 09:17:06 -04:00
|
|
|
void read_chartsheet(const std::string &rel_id);
|
2016-10-11 23:16:14 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
2017-06-21 09:17:06 -04:00
|
|
|
void read_dialogsheet(const std::string &rel_id);
|
2016-10-11 23:16:14 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
2017-06-25 09:29:45 -04:00
|
|
|
void read_worksheet(const std::string &rel_id);
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2017-06-17 10:53:37 -04:00
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
2017-06-21 09:17:06 -04:00
|
|
|
std::string read_worksheet_begin(const std::string &rel_id);
|
2017-06-22 08:56:01 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
|
|
|
void read_worksheet_sheetdata();
|
2017-06-17 10:53:37 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// xl/sheets/*.xml
|
|
|
|
/// </summary>
|
2017-06-21 09:17:06 -04:00
|
|
|
worksheet read_worksheet_end(const std::string &rel_id);
|
2017-06-17 10:53:37 -04:00
|
|
|
|
2016-08-05 01:52:05 -04:00
|
|
|
// Sheet Relationship Target Parts
|
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
2016-10-29 10:23:04 -04:00
|
|
|
void read_comments(worksheet ws);
|
2017-07-02 18:54:32 -07:00
|
|
|
|
2016-11-13 11:52:39 -05:00
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-11-13 11:52:39 -05:00
|
|
|
/// </summary>
|
|
|
|
void read_vml_drawings(worksheet ws);
|
2016-10-11 23:16:14 -04:00
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_drawings();
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
// Unknown Parts
|
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_unknown_parts();
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-10-11 23:16:14 -04:00
|
|
|
/// </summary>
|
|
|
|
void read_unknown_relationships();
|
2016-12-02 14:37:50 +01:00
|
|
|
|
|
|
|
/// <summary>
|
2017-07-02 18:54:32 -07:00
|
|
|
///
|
2016-12-02 14:37:50 +01:00
|
|
|
/// </summary>
|
|
|
|
void read_image(const path &part);
|
|
|
|
|
|
|
|
// Common Section Readers
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Read part from the archive and return a vector of relationships
|
|
|
|
/// based on the content of that part.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
std::vector<relationship> read_relationships(const path &part);
|
2016-12-02 14:37:50 +01:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Read a CT_Color from the document currently being parsed.
|
|
|
|
/// </summary>
|
|
|
|
color read_color();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Read a rich text CT_RElt from the document currently being parsed.
|
|
|
|
/// </summary>
|
2016-12-23 06:51:30 -05:00
|
|
|
rich_text read_rich_text(const xml::qname &parent);
|
2016-12-02 14:37:50 +01:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns true if the givent document type represents an XLSX file.
|
|
|
|
/// </summary>
|
|
|
|
bool document_type_is_xlsx(const std::string &document_content_type);
|
|
|
|
|
|
|
|
// SAX Parsing Helpers
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// In mixed content XML elements, whitespace before and after is not ignored.
|
|
|
|
/// Additionally, if PCDATA spans the boundary of the XML read buffer, it will
|
|
|
|
/// be parsed as two separate strings instead of on longer string. This method
|
|
|
|
/// will read character data until non-character data is peek()ed from the parser
|
|
|
|
/// and returns the combined strings. This should be used when parsing mixed
|
|
|
|
/// content to ignore whitespace and whenever character data is expected between
|
|
|
|
/// tags.
|
|
|
|
/// </summary>
|
2016-11-19 16:42:16 -05:00
|
|
|
std::string read_text();
|
2016-12-02 14:37:50 +01:00
|
|
|
|
2017-01-16 13:05:19 -05:00
|
|
|
variant read_variant();
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Read the part from the archive and parse it as XML. After this is called,
|
|
|
|
/// xlsx_consumer::parser() will return a reference to the parser that reads
|
|
|
|
/// this part.
|
|
|
|
/// </summary>
|
2017-06-25 09:29:45 -04:00
|
|
|
void read_part(const std::vector<relationship> &rel_chain);
|
2016-10-11 23:16:14 -04:00
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// libstudxml will throw an exception if all attributes on an element are not
|
|
|
|
/// read with xml::parser::attribute(const std::string &). This should therefore
|
|
|
|
/// be called if every remaining attribute should be ignored on an element.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void skip_attributes();
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Skip attribute name if it exists on the currently parsed element in the XML
|
|
|
|
/// parser.
|
|
|
|
/// </summary>
|
|
|
|
void skip_attribute(const std::string &name);
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Skip attribute name if it exists on the currently parsed element in the XML
|
|
|
|
/// parser.
|
|
|
|
/// </summary>
|
|
|
|
void skip_attribute(const xml::qname &name);
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Call skip_attribute on every name in names.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void skip_attributes(const std::vector<xml::qname> &names);
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Call skip_attribute on every name in names.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void skip_attributes(const std::vector<std::string> &names);
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Read all content in name until the closing tag is reached.
|
|
|
|
/// The closing tag will not be handled after this is called.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void skip_remaining_content(const xml::qname &name);
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Handles the next event in the XML parser and throws an exception
|
|
|
|
/// if it is not the start of an element. Additionally sets the content
|
|
|
|
/// type of the element to content.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
xml::qname expect_start_element(xml::content content);
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Handles the next event in the XML parser and throws an exception
|
|
|
|
/// if the next element is not named name. Sets the content type of
|
|
|
|
/// the element to content.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void expect_start_element(const xml::qname &name, xml::content content);
|
2016-12-02 14:37:50 +01:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Throws an exception if the next event in the XML parser is not
|
|
|
|
/// the end of element called name.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
void expect_end_element(const xml::qname &name);
|
2016-12-02 14:37:50 +01:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns true if the top of the parsing stack is called name and
|
|
|
|
/// the end of that element hasn't been reached in the XML document.
|
|
|
|
/// </summary>
|
2016-11-25 13:13:55 +00:00
|
|
|
bool in_element(const xml::qname &name);
|
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
// Properties
|
2016-11-30 00:31:06 +01:00
|
|
|
|
2016-12-02 14:37:50 +01:00
|
|
|
/// <summary>
|
|
|
|
/// Convenience method to dereference the pointer to the current parser to avoid
|
|
|
|
/// having to use "parser_->" constantly.
|
|
|
|
/// </summary>
|
|
|
|
xml::parser &parser();
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Convenience method to access the target workbook's manifest.
|
|
|
|
/// </summary>
|
2016-12-03 16:46:48 +01:00
|
|
|
class manifest &manifest();
|
2016-11-20 18:36:52 -05:00
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
|
|
|
/// The ZIP file containing the files that make up the OOXML package.
|
2016-08-05 01:52:05 -04:00
|
|
|
/// </summary>
|
2017-01-04 19:02:31 -05:00
|
|
|
std::unique_ptr<izstream> archive_;
|
2016-08-05 01:52:05 -04:00
|
|
|
|
2016-10-11 23:16:14 -04:00
|
|
|
/// <summary>
|
|
|
|
/// Map of sheet titles to relationship IDs.
|
|
|
|
/// </summary>
|
2016-08-06 10:40:17 -04:00
|
|
|
std::unordered_map<std::string, std::size_t> sheet_title_id_map_;
|
2016-10-11 23:16:14 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Map of sheet titles to indices. Used to ensure sheets are maintained
|
|
|
|
/// in the correct order.
|
|
|
|
/// </summary>
|
2016-08-06 10:40:17 -04:00
|
|
|
std::unordered_map<std::string, std::size_t> sheet_title_index_map_;
|
2016-08-05 01:52:05 -04:00
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A reference to the workbook which is being read.
|
|
|
|
/// </summary>
|
2016-10-11 23:16:14 -04:00
|
|
|
workbook &target_;
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// This pointer is generally set by instantiating an xml::parser in a function
|
2017-07-02 18:54:32 -07:00
|
|
|
/// scope and then calling a read_*() method which uses xlsx_consumer::parser()
|
2016-10-11 23:16:14 -04:00
|
|
|
/// to access the object.
|
|
|
|
/// </summary>
|
|
|
|
xml::parser *parser_;
|
2017-07-02 18:54:32 -07:00
|
|
|
|
2016-11-25 13:13:55 +00:00
|
|
|
std::vector<xml::qname> stack_;
|
2016-12-14 07:23:49 +00:00
|
|
|
|
|
|
|
bool preserve_space_ = false;
|
2017-06-17 10:53:37 -04:00
|
|
|
|
2017-06-25 09:29:45 -04:00
|
|
|
bool streaming_ = false;
|
|
|
|
|
|
|
|
std::unique_ptr<detail::cell_impl> streaming_cell_;
|
|
|
|
|
|
|
|
detail::cell_impl *current_cell_;
|
2017-06-17 10:53:37 -04:00
|
|
|
|
2017-06-25 09:29:45 -04:00
|
|
|
detail::worksheet_impl *current_worksheet_;
|
2016-08-05 01:52:05 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
} // namespace xlnt
|