// Copyright (c) 2014-2018 Thomas Fussell // Copyright (c) 2010-2015 openpyxl // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE // // @license: http://www.opensource.org/licenses/mit-license.php // @author: see AUTHORS file #pragma once #include #include #include #include #include #include #include #include #include namespace xlnt { class cell; class color; class rich_text; class manifest; template class optional; class path; class relationship; class streaming_workbook_reader; class variant; class workbook; class worksheet; namespace detail { class izstream; struct cell_impl; struct worksheet_impl; /// /// Handles writing a workbook into an XLSX file. /// class xlsx_consumer { public: xlsx_consumer(workbook &destination); ~xlsx_consumer(); void read(std::istream &source); void read(std::istream &source, const std::string &password); private: friend class xlnt::streaming_workbook_reader; void open(std::istream &source); bool has_cell(); /// /// Reads the next cell in the current worksheet and optionally returns it if /// the last cell in the sheet has not yet been read. An exception will be thrown /// if this is not open as a streaming consumer. /// cell read_cell(); /// /// Read all the files needed from the XLSX archive and initialize all of /// the data in the workbook to match. /// void populate_workbook(bool streaming); /// /// /// void read_content_types(); // Metadata Property Readers /// /// Parse the core properties about the current package. /// void read_core_properties(); /// /// Parse the core properties about the current package. /// void read_extended_properties(); /// /// Parse the core properties about the current package. /// void read_custom_properties(); // SpreadsheetML-Specific Package Part Readers /// /// Parse the main XML document about the workbook and then all child relationships /// of the workbook (e.g. worksheets). /// void read_office_document(const std::string &content_type); // Workbook Relationship Target Parts /// /// xl/calcChain.xml /// void read_calculation_chain(); /// /// /// void read_connections(); /// /// /// void read_custom_property(); /// /// /// void read_custom_xml_mappings(); /// /// /// void read_external_workbook_references(); /// /// /// void read_pivot_table(); /// /// xl/sharedStrings.xml /// void read_shared_string_table(); /// /// /// void read_shared_workbook_revision_headers(); /// /// /// void read_shared_workbook(); /// /// /// void read_shared_workbook_user_data(); /// /// xl/styles.xml /// void read_stylesheet(); /// /// xl/theme/theme1.xml /// void read_theme(); /// /// /// void read_volatile_dependencies(); /// /// xl/sheets/*.xml /// void read_chartsheet(const std::string &rel_id); /// /// xl/sheets/*.xml /// void read_dialogsheet(const std::string &rel_id); /// /// xl/sheets/*.xml /// void read_worksheet(const std::string &rel_id); /// /// xl/sheets/*.xml /// std::string read_worksheet_begin(const std::string &rel_id); /// /// xl/sheets/*.xml /// void read_worksheet_sheetdata(); /// /// xl/sheets/*.xml /// worksheet read_worksheet_end(const std::string &rel_id); // Sheet Relationship Target Parts /// /// /// void read_comments(worksheet ws); /// /// /// void read_vml_drawings(worksheet ws); /// /// /// void read_drawings(worksheet ws, const path &part); // Unknown Parts /// /// /// void read_unknown_parts(); /// /// /// void read_unknown_relationships(); /// /// /// void read_image(const path &part); // Common Section Readers /// /// Read part from the archive and return a vector of relationships /// based on the content of that part. /// std::vector read_relationships(const path &part); /// /// Read a CT_Color from the document currently being parsed. /// color read_color(); /// /// Read a rich text CT_RElt from the document currently being parsed. /// rich_text read_rich_text(const xml::qname &parent); /// /// Returns true if the givent document type represents an XLSX file. /// bool document_type_is_xlsx(const std::string &document_content_type); // SAX Parsing Helpers /// /// In mixed content XML elements, whitespace before and after is not ignored. /// Additionally, if PCDATA spans the boundary of the XML read buffer, it will /// be parsed as two separate strings instead of on longer string. This method /// will read character data until non-character data is peek()ed from the parser /// and returns the combined strings. This should be used when parsing mixed /// content to ignore whitespace and whenever character data is expected between /// tags. /// std::string read_text(); variant read_variant(); /// /// Read the part from the archive and parse it as XML. After this is called, /// xlsx_consumer::parser() will return a reference to the parser that reads /// this part. /// void read_part(const std::vector &rel_chain); /// /// libstudxml will throw an exception if all attributes on an element are not /// read with xml::parser::attribute(const std::string &). This should therefore /// be called if every remaining attribute should be ignored on an element. /// void skip_attributes(); /// /// Skip attribute name if it exists on the currently parsed element in the XML /// parser. /// void skip_attribute(const std::string &name); /// /// Skip attribute name if it exists on the currently parsed element in the XML /// parser. /// void skip_attribute(const xml::qname &name); /// /// Call skip_attribute on every name in names. /// void skip_attributes(const std::vector &names); /// /// Call skip_attribute on every name in names. /// void skip_attributes(const std::vector &names); /// /// Read all content in name until the closing tag is reached. /// The closing tag will not be handled after this is called. /// void skip_remaining_content(const xml::qname &name); /// /// Handles the next event in the XML parser and throws an exception /// if it is not the start of an element. Additionally sets the content /// type of the element to content. /// xml::qname expect_start_element(xml::content content); /// /// Handles the next event in the XML parser and throws an exception /// if the next element is not named name. Sets the content type of /// the element to content. /// void expect_start_element(const xml::qname &name, xml::content content); /// /// Throws an exception if the next event in the XML parser is not /// the end of element called name. /// void expect_end_element(const xml::qname &name); /// /// Returns true if the top of the parsing stack is called name and /// the end of that element hasn't been reached in the XML document. /// bool in_element(const xml::qname &name); // Properties /// /// Convenience method to dereference the pointer to the current parser to avoid /// having to use "parser_->" constantly. /// xml::parser &parser(); /// /// Convenience method to access the target workbook's manifest. /// class manifest &manifest(); /// /// The ZIP file containing the files that make up the OOXML package. /// std::unique_ptr archive_; /// /// Map of sheet titles to relationship IDs. /// std::unordered_map sheet_title_id_map_; /// /// Map of sheet titles to indices. Used to ensure sheets are maintained /// in the correct order. /// std::unordered_map sheet_title_index_map_; /// /// A reference to the workbook which is being read. /// workbook &target_; /// /// This pointer is generally set by instantiating an xml::parser in a function /// scope and then calling a read_*() method which uses xlsx_consumer::parser() /// to access the object. /// xml::parser *parser_; std::vector stack_; bool preserve_space_ = false; bool streaming_ = false; std::unique_ptr streaming_cell_; detail::cell_impl *current_cell_; detail::worksheet_impl *current_worksheet_; }; } // namespace detail } // namespace xlnt