// Copyright (c) 2014-2016 Thomas Fussell // Copyright (c) 2010-2015 openpyxl // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE // // @license: http://www.opensource.org/licenses/mit-license.php // @author: see AUTHORS file #pragma once #include #include #include #include #include #include #include #include namespace xlnt { class color; class formatted_text; class manifest; class path; class relationship; class workbook; class worksheet; namespace detail { class ZipFileReader; ///

/// Handles writing a workbook into an XLSX file. ///

class xlsx_consumer { public: xlsx_consumer(workbook &destination); void read(std::istream &source); void read(std::istream &source, const std::string &password); private: ///

/// Read all the files needed from the XLSX archive and initialize all of /// the data in the workbook to match. ///

void populate_workbook(); ///

/// ///

void read_content_types(); // Metadata Readers ///

/// Read core, extended, and custom properties. ///

void read_metadata_properties(); ///

/// Parse the core properties about the current package. ///

void read_properties(const path &part, const xml::qname &root); // SpreadsheetML-Specific Package Part Readers ///

/// Parse the main XML document about the workbook and then all child relationships /// of the workbook (e.g. worksheets). ///

void read_office_document(const std::string &content_type); // Workbook Relationship Target Parts ///

/// xl/calcChain.xml ///

void read_calculation_chain(); ///

/// ///

void read_connections(); ///

/// ///

void read_custom_property(); ///

/// ///

void read_custom_xml_mappings(); ///

/// ///

void read_external_workbook_references(); ///

/// ///

void read_metadata(); ///

/// ///

void read_pivot_table(); ///

/// xl/sharedStrings.xml ///

void read_shared_string_table(); ///

/// ///

void read_shared_workbook_revision_headers(); ///

/// ///

void read_shared_workbook(); ///

/// ///

void read_shared_workbook_user_data(); ///

/// xl/styles.xml ///

void read_stylesheet(); ///

/// xl/theme/theme1.xml ///

void read_theme(); ///

/// ///

void read_volatile_dependencies(); ///

/// xl/sheets/*.xml ///

void read_chartsheet(const std::string &title); ///

/// xl/sheets/*.xml ///

void read_dialogsheet(const std::string &title); ///

/// xl/sheets/*.xml ///

void read_worksheet(const std::string &title); // Sheet Relationship Target Parts ///

/// ///

void read_comments(worksheet ws); ///

/// ///

void read_vml_drawings(worksheet ws); ///

/// ///

void read_drawings(); // Unknown Parts ///

/// ///

void read_unknown_parts(); ///

/// ///

void read_unknown_relationships(); ///

/// ///

void read_image(const path &part); // Common Section Readers ///

/// Read part from the archive and return a vector of relationships /// based on the content of that part. ///

std::vector read_relationships(const path &part); ///

/// Read a CT_Color from the document currently being parsed. ///

color read_color(); ///

/// Read a rich text CT_RElt from the document currently being parsed. ///

formatted_text read_formatted_text(const xml::qname &parent); ///

/// Returns true if the givent document type represents an XLSX file. ///

bool document_type_is_xlsx(const std::string &document_content_type); // SAX Parsing Helpers ///

/// In mixed content XML elements, whitespace before and after is not ignored. /// Additionally, if PCDATA spans the boundary of the XML read buffer, it will /// be parsed as two separate strings instead of on longer string. This method /// will read character data until non-character data is peek()ed from the parser /// and returns the combined strings. This should be used when parsing mixed /// content to ignore whitespace and whenever character data is expected between /// tags. ///

std::string read_text(); ///

/// Read the part from the archive and parse it as XML. After this is called, /// xlsx_consumer::parser() will return a reference to the parser that reads /// this part. ///

void read_part(const std::vector &rel_chain); ///

/// libstudxml will throw an exception if all attributes on an element are not /// read with xml::parser::attribute(const std::string &). This should therefore /// be called if every remaining attribute should be ignored on an element. ///

void skip_attributes(); ///

/// Skip attribute name if it exists on the currently parsed element in the XML /// parser. ///

void skip_attribute(const std::string &name); ///

/// Skip attribute name if it exists on the currently parsed element in the XML /// parser. ///

void skip_attribute(const xml::qname &name); ///

/// Call skip_attribute on every name in names. ///

void skip_attributes(const std::vector &names); ///

/// Call skip_attribute on every name in names. ///

void skip_attributes(const std::vector &names); ///

/// Read all content in name until the closing tag is reached. /// The closing tag will not be handled after this is called. ///

void skip_remaining_content(const xml::qname &name); ///

/// Handles the next event in the XML parser and throws an exception /// if it is not the start of an element. Additionally sets the content /// type of the element to content. ///

xml::qname expect_start_element(xml::content content); ///

/// Handles the next event in the XML parser and throws an exception /// if the next element is not named name. Sets the content type of /// the element to content. ///

void expect_start_element(const xml::qname &name, xml::content content); ///

/// Throws an exception if the next event in the XML parser is not /// the end of element called name. ///

void expect_end_element(const xml::qname &name); ///

/// Returns true if the top of the parsing stack is called name and /// the end of that element hasn't been reached in the XML document. ///

bool in_element(const xml::qname &name); ///

/// Handles all start and end namespace events from the current parser /// and returns a vector of strings containing the URL for each namespace. ///

std::vector read_namespaces(); // Properties ///

/// Convenience method to dereference the pointer to the current parser to avoid /// having to use "parser_->" constantly. ///

xml::parser &parser(); ///

/// Convenience method to access the target workbook's manifest. ///

class manifest &manifest(); ///

/// The ZIP file containing the files that make up the OOXML package. ///

std::unique_ptr archive_; ///

/// Map of sheet titles to relationship IDs. ///

std::unordered_map sheet_title_id_map_; ///

/// Map of sheet titles to indices. Used to ensure sheets are maintained /// in the correct order. ///

std::unordered_map sheet_title_index_map_; ///

/// A reference to the workbook which is being read. ///

workbook &target_; ///

/// This pointer is generally set by instantiating an xml::parser in a function /// scope and then calling a read_*() method which uses xlsx_consumer::parser() /// to access the object. ///

xml::parser *parser_; std::vector stack_; }; } // namespace detail } // namespace xlnt