#include #include #include #include #include #include #include #include #include #include #include #include namespace { bool is_true(const std::string &bool_string) { return bool_string == "1" || bool_string == "true"; } std::size_t string_to_size_t(const std::string &s) { #if ULLONG_MAX == SIZE_MAX return std::stoull(s); #else return std::stoul(s); #endif } xlnt::datetime w3cdtf_to_datetime(const std::string &string) { xlnt::datetime result(1900, 1, 1); auto separator_index = string.find('-'); result.year = std::stoi(string.substr(0, separator_index)); result.month = std::stoi(string.substr(separator_index + 1, string.find('-', separator_index + 1))); separator_index = string.find('-', separator_index + 1); result.day = std::stoi(string.substr(separator_index + 1, string.find('T', separator_index + 1))); separator_index = string.find('T', separator_index + 1); result.hour = std::stoi(string.substr(separator_index + 1, string.find(':', separator_index + 1))); separator_index = string.find(':', separator_index + 1); result.minute = std::stoi(string.substr(separator_index + 1, string.find(':', separator_index + 1))); separator_index = string.find(':', separator_index + 1); result.second = std::stoi(string.substr(separator_index + 1, string.find('Z', separator_index + 1))); return result; } /* struct EnumClassHash { template std::size_t operator()(T t) const { return static_cast(t); } }; */ xlnt::protection read_protection(xml::parser &parser) { parser.next_expect(xml::parser::event_type::start_element, "protection"); xlnt::protection prot; prot.locked(is_true(parser.attribute("locked"))); prot.hidden(is_true(parser.attribute("hidden"))); parser.next_expect(xml::parser::event_type::end_element, "protection"); return prot; } xlnt::alignment read_alignment(xml::parser &parser) { xlnt::alignment align; align.wrap(is_true(parser.attribute("wrapText"))); align.shrink(is_true(parser.attribute("shrinkToFit"))); if (parser.attribute_present("vertical")) { align.vertical(parser.attribute("vertical")); } if (parser.attribute_present("horizontal")) { align.horizontal(parser.attribute("horizontal")); } return align; } xlnt::color read_color(xml::parser &parser) { xlnt::color result; if (parser.attribute_present("auto")) { return result; } if (parser.attribute_present("rgb")) { result = xlnt::rgb_color(parser.attribute("rgb")); } else if (parser.attribute_present("theme")) { result = xlnt::theme_color(string_to_size_t(parser.attribute("theme"))); } else if (parser.attribute_present("indexed")) { result = xlnt::indexed_color(string_to_size_t(parser.attribute("indexed"))); } if (parser.attribute_present("tint")) { result.set_tint(parser.attribute("tint", 0.0)); } return result; } xlnt::font read_font(xml::parser &parser) { static const auto xmlns = xlnt::constants::get_namespace("worksheet"); xlnt::font new_font; parser.next_expect(xml::parser::event_type::start_element, xmlns, "font"); parser.content(xml::parser::content_type::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.content(xml::parser::content_type::simple); if (parser.name() == "sz") { new_font.size(string_to_size_t(parser.attribute("val"))); } else if (parser.name() == "name") { new_font.name(parser.attribute("val")); } else if (parser.name() == "color") { new_font.color(read_color(parser)); } else if (parser.name() == "family") { new_font.family(string_to_size_t(parser.attribute("val"))); } else if (parser.name() == "scheme") { new_font.scheme(parser.attribute("val")); } else if (parser.name() == "b") { if (parser.attribute_present("val")) { new_font.bold(is_true(parser.attribute("val"))); } else { new_font.bold(true); } } else if (parser.name() == "strike") { if (parser.attribute_present("val")) { new_font.strikethrough(is_true(parser.attribute("val"))); } else { new_font.strikethrough(true); } } else if (parser.name() == "i") { if (parser.attribute_present("val")) { new_font.italic(is_true(parser.attribute("val"))); } else { new_font.italic(true); } } else if (parser.name() == "u") { if (parser.attribute_present("val")) { new_font.underline(parser.attribute("val")); } else { new_font.underline(xlnt::font::underline_style::single); } } parser.next_expect(xml::parser::event_type::end_element); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "font"); return new_font; } void read_indexed_colors(xml::parser &parser, std::vector &colors) { colors.clear(); while (true) { if (parser.peek() == xml::parser::event_type::end_element) { break; } colors.push_back(read_color(parser)); } parser.next_expect(xml::parser::event_type::end_element, "indexedColors"); } xlnt::fill read_fill(xml::parser &parser) { static const auto xmlns = xlnt::constants::get_namespace("worksheet"); xlnt::fill new_fill; parser.next_expect(xml::parser::event_type::start_element, xmlns, "fill"); parser.content(xml::parser::content_type::complex); parser.next_expect(xml::parser::event_type::start_element); if (parser.qname() == xml::qname(xmlns, "patternFill")) { xlnt::pattern_fill pattern; if (parser.attribute_present("patternType")) { pattern.type(parser.attribute("patternType")); while (true) { if (parser.peek() == xml::parser::event_type::end_element) { break; } parser.next_expect(xml::parser::event_type::start_element); if (parser.name() == "fgColor") { pattern.foreground(read_color(parser)); } else if (parser.name() == "bgColor") { pattern.background(read_color(parser)); } parser.next_expect(xml::parser::event_type::end_element); } } new_fill = pattern; } else if (parser.qname() == xml::qname(xmlns, "gradientFill")) { xlnt::gradient_fill gradient; if (parser.attribute_present("type")) { gradient.type(parser.attribute("type")); } else { gradient.type(xlnt::gradient_fill_type::linear); } while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, "stop"); auto position = parser.attribute("position"); parser.next_expect(xml::parser::event_type::start_element, "color"); auto color = read_color(parser); parser.next_expect(xml::parser::event_type::end_element, "color"); parser.next_expect(xml::parser::event_type::end_element, "stop"); gradient.add_stop(position, color); } new_fill = gradient; } parser.next_expect(xml::parser::event_type::end_element); // or parser.next_expect(xml::parser::event_type::end_element); // return new_fill; } xlnt::border::border_property read_side(xml::parser &parser) { xlnt::border::border_property new_side; if (parser.attribute_present("style")) { new_side.style(parser.attribute("style")); } if (parser.peek() == xml::parser::event_type::start_element) { parser.next_expect(xml::parser::event_type::start_element, "color"); new_side.color(read_color(parser)); parser.next_expect(xml::parser::event_type::end_element, "color"); } return new_side; } xlnt::border read_border(xml::parser &parser) { xlnt::border new_border; parser.next_expect(xml::parser::event_type::start_element); // parser.content(xml::parser::content_type::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); auto side_type = xml::value_traits::parse(parser.name(), parser); auto side = read_side(parser); new_border.side(side_type, side); parser.next_expect(xml::parser::event_type::end_element); } parser.next_expect(xml::parser::event_type::end_element); // return new_border; } std::vector read_relationships(const xlnt::path &part, xlnt::zip_file &archive) { std::vector relationships; if (!archive.has_file(part)) return relationships; std::istringstream rels_stream(archive.read(part)); xml::parser parser(rels_stream, part.string()); xlnt::uri source(part.string()); const auto xmlns = xlnt::constants::get_namespace("relationships"); parser.next_expect(xml::parser::event_type::start_element, xmlns, "Relationships"); parser.content(xml::content::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "Relationship"); relationships.emplace_back(parser.attribute("Id"), parser.attribute("Type"), source, xlnt::uri(parser.attribute("Target")), xlnt::target_mode::internal); parser.next_expect(xml::parser::event_type::end_element, xlnt::constants::get_namespace("relationships"), "Relationship"); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "Relationships"); return relationships; } void check_document_type(const std::string &document_content_type) { if (document_content_type != "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" && document_content_type != "application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml ") { throw xlnt::invalid_file(document_content_type); } } } // namespace namespace xlnt { namespace detail { xlsx_consumer::xlsx_consumer(workbook &destination) : destination_(destination) { } void xlsx_consumer::read(const path &source) { destination_.clear(); source_.load(source); populate_workbook(); } void xlsx_consumer::read(std::istream &source) { destination_.clear(); source_.load(source); populate_workbook(); } void xlsx_consumer::read(const std::vector &source) { destination_.clear(); source_.load(source); populate_workbook(); } // Part Writing Methods void xlsx_consumer::populate_workbook() { auto &manifest = destination_.get_manifest(); read_manifest(); for (const auto &rel : manifest.get_relationships(path("/"))) { std::istringstream parser_stream(source_.read(rel.get_target().get_path())); xml::parser parser(parser_stream, rel.get_target().get_path().string()); switch (rel.get_type()) { case relationship::type::core_properties: read_core_properties(parser); break; case relationship::type::extended_properties: read_extended_properties(parser); break; case relationship::type::custom_properties: read_custom_property(parser); break; case relationship::type::office_document: check_document_type(manifest.get_content_type(rel.get_target().get_path())); read_workbook(parser); break; case relationship::type::connections: read_connections(parser); break; case relationship::type::custom_xml_mappings: read_custom_xml_mappings(parser); break; case relationship::type::external_workbook_references: read_external_workbook_references(parser); break; case relationship::type::metadata: read_metadata(parser); break; case relationship::type::pivot_table: read_pivot_table(parser); break; case relationship::type::shared_workbook_revision_headers: read_shared_workbook_revision_headers(parser); break; case relationship::type::volatile_dependencies: read_volatile_dependencies(parser); break; default: break; } } const auto workbook_rel = manifest.get_relationship(path("/"), relationship::type::office_document); // First pass of workbook relationship parts which must be read before sheets (e.g. shared strings) for (const auto &rel : manifest.get_relationships(workbook_rel.get_target().get_path())) { path part_path(rel.get_source().get_path().parent().append(rel.get_target().get_path())); std::istringstream parser_stream(source_.read(part_path)); auto using_namespaces = rel.get_type() == relationship::type::styles; auto receive = xml::parser::receive_default | (using_namespaces ? xml::parser::receive_namespace_decls : 0); xml::parser parser(parser_stream, rel.get_target().get_path().string(), receive); switch (rel.get_type()) { case relationship::type::shared_string_table: read_shared_string_table(parser); break; case relationship::type::styles: read_stylesheet(parser); break; case relationship::type::theme: read_theme(parser); break; default: break; } } // Second pass, read sheets themselves for (const auto &rel : manifest.get_relationships(workbook_rel.get_target().get_path())) { path part_path(rel.get_source().get_path().parent().append(rel.get_target().get_path())); std::istringstream parser_stream(source_.read(part_path)); auto receive = xml::parser::receive_default | xml::parser::receive_namespace_decls; xml::parser parser(parser_stream, rel.get_target().get_path().string(), receive); switch (rel.get_type()) { case relationship::type::chartsheet: read_chartsheet(rel.get_id(), parser); break; case relationship::type::dialogsheet: read_dialogsheet(rel.get_id(), parser); break; case relationship::type::worksheet: read_worksheet(rel.get_id(), parser); break; default: break; } } // Unknown Parts void read_unknown_parts(); void read_unknown_relationships(); } // Package Parts void xlsx_consumer::read_manifest() { path package_rels_path("_rels/.rels"); if (!source_.has_file(package_rels_path)) throw invalid_file("missing package rels"); auto package_rels = read_relationships(package_rels_path, source_); std::istringstream parser_stream(source_.read(path("[Content_Types].xml"))); xml::parser parser(parser_stream, "[Content_Types].xml"); auto &manifest = destination_.get_manifest(); static const auto xmlns = constants::get_namespace("content-types"); parser.next_expect(xml::parser::event_type::start_element, xmlns, "Types"); parser.content(xml::content::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); if (parser.name() == "Default") { manifest.register_default_type(parser.attribute("Extension"), parser.attribute("ContentType")); parser.next_expect(xml::parser::event_type::end_element, xmlns, "Default"); } else if (parser.name() == "Override") { manifest.register_override_type(path(parser.attribute("PartName")), parser.attribute("ContentType")); parser.next_expect(xml::parser::event_type::end_element, xmlns, "Override"); } } parser.next_expect(xml::parser::event_type::end_element, xmlns, "Types"); for (const auto &package_rel : package_rels) { manifest.register_relationship(uri("/"), package_rel.get_type(), package_rel.get_target(), package_rel.get_target_mode(), package_rel.get_id()); } for (const auto &relationship_source : source_.infolist()) { if (relationship_source.filename == path("_rels/.rels") || relationship_source.filename.extension() != "rels") continue; path part(relationship_source.filename.parent().parent()); part = part.append(relationship_source.filename.split_extension().first); uri source(part.string()); path source_directory = part.parent(); auto part_rels = read_relationships(relationship_source.filename, source_); for (const auto part_rel : part_rels) { path target_path(source_directory.append(part_rel.get_target().get_path())); manifest.register_relationship(source, part_rel.get_type(), part_rel.get_target(), part_rel.get_target_mode(), part_rel.get_id()); } } } void xlsx_consumer::read_extended_properties(xml::parser &parser) { static const auto xmlns = constants::get_namespace("extended-properties"); static const auto xmlns_vt = constants::get_namespace("vt"); parser.next_expect(xml::parser::event_type::start_element, xmlns, "Properties"); parser.content(xml::parser::content_type::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); auto name = parser.name(); auto text = std::string(); while (parser.peek() == xml::parser::event_type::characters) { parser.next_expect(xml::parser::event_type::characters); text.append(parser.value()); } if (name == "Application") destination_.set_application(text); else if (name == "DocSecurity") destination_.set_doc_security(std::stoi(text)); else if (name == "ScaleCrop") destination_.set_scale_crop(is_true(text)); else if (name == "Company") destination_.set_company(text); else if (name == "SharedDoc") destination_.set_shared_doc(is_true(text)); else if (name == "HyperlinksChanged") destination_.set_hyperlinks_changed(is_true(text)); else if (name == "AppVersion") destination_.set_app_version(text); else if (name == "Application") destination_.set_application(text); else if (name == "HeadingPairs") { parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "vector"); parser.content(xml::parser::content_type::complex); parser.attribute("size"); parser.attribute("baseType"); parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "variant"); parser.content(xml::parser::content_type::complex); parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "lpstr"); parser.next_expect(xml::parser::event_type::characters); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "lpstr"); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "variant"); parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "variant"); parser.content(xml::parser::content_type::complex); parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "i4"); parser.next_expect(xml::parser::event_type::characters); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "i4"); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "variant"); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "vector"); } else if (name == "TitlesOfParts") { parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "vector"); parser.content(xml::parser::content_type::complex); parser.attribute("size"); parser.attribute("baseType"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns_vt, "lpstr"); parser.content(xml::parser::content_type::simple); parser.next_expect(xml::parser::event_type::characters); parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "lpstr"); } parser.next_expect(xml::parser::event_type::end_element, xmlns_vt, "vector"); } while (parser.peek() == xml::parser::event_type::characters) { parser.next_expect(xml::parser::event_type::characters); } parser.next_expect(xml::parser::event_type::end_element); } } void xlsx_consumer::read_core_properties(xml::parser &parser) { static const auto xmlns_cp = constants::get_namespace("core-properties"); static const auto xmlns_dc = constants::get_namespace("dc"); static const auto xmlns_dcterms = constants::get_namespace("dcterms"); static const auto xmlns_dcmitype = constants::get_namespace("dcmitype"); static const auto xmlns_xsi = constants::get_namespace("xsi"); parser.next_expect(xml::parser::event_type::start_element, xmlns_cp, "coreProperties"); parser.content(xml::parser::content_type::complex); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.next_expect(xml::parser::event_type::characters); if (parser.namespace_() == xmlns_dc && parser.name() == "creator") { destination_.set_creator(parser.value()); } else if (parser.namespace_() == xmlns_cp && parser.name() == "lastModifiedBy") { destination_.set_last_modified_by(parser.value()); } else if (parser.namespace_() == xmlns_dcterms && parser.name() == "created") { parser.attribute(xml::qname(xmlns_xsi, "type")); destination_.set_created(w3cdtf_to_datetime(parser.value())); } else if (parser.namespace_() == xmlns_dcterms && parser.name() == "modified") { parser.attribute(xml::qname(xmlns_xsi, "type")); destination_.set_modified(w3cdtf_to_datetime(parser.value())); } parser.next_expect(xml::parser::event_type::end_element); } parser.next_expect(xml::parser::event_type::end_element, xmlns_cp, "coreProperties"); } void xlsx_consumer::read_custom_file_properties(xml::parser &/*parser*/) { } // Write SpreadsheetML-Specific Package Parts void xlsx_consumer::read_workbook(xml::parser &parser) { static const auto xmlns = constants::get_namespace("workbook"); static const auto xmlns_mc = constants::get_namespace("mc"); static const auto xmlns_mx = constants::get_namespace("mx"); static const auto xmlns_r = constants::get_namespace("r"); static const auto xmlns_s = constants::get_namespace("worksheet"); static const auto xmlns_x15ac = constants::get_namespace("x15ac"); parser.next_expect(xml::parser::event_type::start_element, xmlns, "workbook"); parser.content(xml::parser::content_type::complex); while (parser.peek() == xml::parser::event_type::start_namespace_decl) { parser.next_expect(xml::parser::event_type::start_namespace_decl); if (parser.name() == "x15") destination_.enable_x15(); parser.next_expect(xml::parser::event_type::end_namespace_decl); } if (parser.attribute_present(xml::qname(xmlns_mc, "Ignorable"))) { parser.attribute(xml::qname(xmlns_mc, "Ignorable")); } while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.content(xml::parser::content_type::complex); auto qname = parser.qname(); if (qname == xml::qname(xmlns, "fileVersion")) { destination_.d_->has_file_version_ = true; destination_.d_->file_version_.app_name = parser.attribute("appName"); destination_.d_->file_version_.last_edited = string_to_size_t(parser.attribute("lastEdited")); destination_.d_->file_version_.lowest_edited = string_to_size_t(parser.attribute("lowestEdited")); destination_.d_->file_version_.rup_build = string_to_size_t(parser.attribute("rupBuild")); parser.next_expect(xml::parser::event_type::end_element, xmlns, "fileVersion"); } else if (qname == xml::qname(xmlns_mc, "AlternateContent")) { parser.next_expect(xml::parser::event_type::start_element, xmlns_mc, "Choice"); parser.content(xml::parser::content_type::complex); parser.attribute("Requires"); parser.next_expect(xml::parser::event_type::start_element, xmlns_x15ac, "absPath"); destination_.set_absolute_path(path(parser.attribute("url"))); parser.next_expect(xml::parser::event_type::end_element, xmlns_x15ac, "absPath"); parser.next_expect(xml::parser::event_type::end_element, xmlns_mc, "Choice"); parser.next_expect(xml::parser::event_type::end_element, xmlns_mc, "AlternateContent"); } else if (qname == xml::qname(xmlns, "bookViews")) { if (parser.peek() == xml::parser::event_type::start_element) { parser.next_expect(xml::parser::event_type::start_element, xmlns, "workbookView"); workbook_view view; view.x_window = string_to_size_t(parser.attribute("xWindow")); view.y_window = string_to_size_t(parser.attribute("yWindow")); view.window_width = string_to_size_t(parser.attribute("windowWidth")); view.window_height = string_to_size_t(parser.attribute("windowHeight")); view.tab_ratio = string_to_size_t(parser.attribute("tabRatio")); destination_.set_view(view); parser.next_expect(xml::parser::event_type::end_element, xmlns, "workbookView"); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "bookViews"); } else if (qname == xml::qname(xmlns, "workbookPr")) { destination_.d_->has_properties_ = true; if (parser.attribute_present("date1904")) { const auto value = parser.attribute("date1904"); if (value == "1" || value == "true") { destination_.set_base_date(xlnt::calendar::mac_1904); } } parser.next_expect(xml::parser::event_type::end_element, xmlns, "workbookPr"); } else if (qname == xml::qname(xmlns, "sheets")) { std::size_t index = 0; while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns_s, "sheet"); std::string rel_id(parser.attribute(xml::qname(xmlns_r, "id"))); std::string title(parser.attribute("name")); auto id = string_to_size_t(parser.attribute("sheetId")); sheet_title_id_map_[title] = id; sheet_title_index_map_[title] = index++; destination_.d_->sheet_title_rel_id_map_[title] = rel_id; parser.next_expect(xml::parser::event_type::end_element, xmlns_s, "sheet"); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "sheets"); } else if (qname == xml::qname(xmlns, "calcPr")) { destination_.d_->has_calculation_properties_ = true; parser.attribute("calcId"); parser.attribute("concurrentCalc"); parser.next_expect(xml::parser::event_type::end_element, xmlns, "calcPr"); } else if (qname == xml::qname(xmlns, "extLst")) { parser.next_expect(xml::parser::event_type::start_element, xmlns, "ext"); parser.content(xml::parser::content_type::complex); parser.attribute("uri"); parser.next_expect(xml::parser::event_type::start_element, xmlns_mx, "ArchID"); destination_.d_->has_arch_id_ = true; parser.attribute("Flags"); parser.next_expect(xml::parser::event_type::end_element, xmlns_mx, "ArchID"); parser.next_expect(xml::parser::event_type::end_element, xmlns, "ext"); parser.next_expect(xml::parser::event_type::end_element, xmlns, "extLst"); } } parser.next_expect(xml::parser::event_type::end_element, xmlns, "workbook"); } // Write Workbook Relationship Target Parts void xlsx_consumer::read_calculation_chain(xml::parser &/*parser*/) { } void xlsx_consumer::read_chartsheet(const std::string &/*title*/, xml::parser &/*parser*/) { } void xlsx_consumer::read_connections(xml::parser &/*parser*/) { } void xlsx_consumer::read_custom_property(xml::parser &/*parser*/) { } void xlsx_consumer::read_custom_xml_mappings(xml::parser &/*parser*/) { } void xlsx_consumer::read_dialogsheet(const std::string &/*title*/, xml::parser &/*parser*/) { } void xlsx_consumer::read_external_workbook_references(xml::parser &/*parser*/) { } void xlsx_consumer::read_metadata(xml::parser &/*parser*/) { } void xlsx_consumer::read_pivot_table(xml::parser &/*parser*/) { } void xlsx_consumer::read_shared_string_table(xml::parser &parser) { static const auto xmlns = constants::get_namespace("shared-strings"); parser.next_expect(xml::parser::event_type::start_element, xmlns, "sst"); std::size_t unique_count = 0; if (parser.attribute_present("uniqueCount")) { unique_count = string_to_size_t(parser.attribute("uniqueCount")); } auto &strings = destination_.get_shared_strings(); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "si"); parser.next_expect(xml::parser::event_type::start_element); text t; if (parser.name() == "t") { t.set_plain_string(parser.value()); } else if (parser.name() == "r") // possible multiple text entities. { while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "t"); text_run run; run.set_string(parser.value()); if (parser.peek() == xml::parser::event_type::start_element) { parser.next_expect(xml::parser::event_type::start_element, xmlns, "rPr"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); if (parser.qname() == xml::qname(xmlns, "sz")) { run.set_size(string_to_size_t(parser.attribute("val"))); } else if (parser.qname() == xml::qname(xmlns, "rFont")) { run.set_font(parser.attribute("val")); } else if (parser.qname() == xml::qname(xmlns, "color")) { run.set_color(parser.attribute("rgb")); } else if (parser.qname() == xml::qname(xmlns, "family")) { run.set_family(string_to_size_t(parser.attribute("val"))); } else if (parser.qname() == xml::qname(xmlns, "scheme")) { run.set_scheme(parser.attribute("val")); } parser.next_expect(xml::parser::event_type::end_element, parser.qname()); } } t.add_run(run); } } strings.push_back(t); } if (unique_count != strings.size()) { throw invalid_file("sizes don't match"); } } void xlsx_consumer::read_shared_workbook_revision_headers(xml::parser &/*parser*/) { } void xlsx_consumer::read_shared_workbook(xml::parser &/*parser*/) { } void xlsx_consumer::read_shared_workbook_user_data(xml::parser &/*parser*/) { } void xlsx_consumer::read_stylesheet(xml::parser &parser) { static const auto xmlns = constants::get_namespace("worksheet"); static const auto xmlns_mc = constants::get_namespace("mc"); static const auto xmlns_x14 = constants::get_namespace("x14"); static const auto xmlns_x14ac = constants::get_namespace("x14ac"); auto &stylesheet = destination_.impl().stylesheet_; parser.next_expect(xml::parser::event_type::start_element, xmlns, "styleSheet"); parser.content(xml::parser::content_type::complex); while (true) { if (parser.peek() != xml::parser::event_type::start_namespace_decl) break; parser.next_expect(xml::parser::event_type::start_namespace_decl); if (parser.namespace_() == xmlns_x14ac) { destination_.enable_x15(); } } if (parser.attribute_present(xml::qname(xmlns_mc, "Ignorable"))) { parser.attribute(xml::qname(xmlns_mc, "Ignorable")); } struct formatting_record { template using togglable = std::pair; togglable alignment = { {}, 0 }; togglable border_id = { 0, false }; togglable fill_id = { 0, false }; togglable font_id = { 0, false }; togglable number_format_id = { 0, false }; togglable protection = { {}, false }; togglable style_id = { 0, false }; }; struct style_data { std::string name; std::size_t record_id; std::size_t builtin_id; }; std::vector style_datas; std::vector style_records; std::vector format_records; while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.content(xml::parser::content_type::complex); if (parser.qname() == xml::qname(xmlns, "borders")) { stylesheet.borders.clear(); auto count = parser.attribute("count"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; stylesheet.borders.push_back(read_border(parser)); } if (count != stylesheet.borders.size()) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "fills")) { stylesheet.fills.clear(); auto count = parser.attribute("count"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; stylesheet.fills.push_back(read_fill(parser)); } if (count != stylesheet.fills.size()) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "fonts")) { stylesheet.fonts.clear(); auto count = parser.attribute("count"); if (parser.attribute_present(xml::qname(xmlns_x14ac, "knownFonts"))) { parser.attribute(xml::qname(xmlns_x14ac, "knownFonts")); } while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; stylesheet.fonts.push_back(read_font(parser)); } if (count != stylesheet.fonts.size()) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "numFmts")) { stylesheet.number_formats.clear(); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, "numFmt"); auto format_string = parser.attribute("formatCode"); if (format_string == "GENERAL") { format_string = "General"; } xlnt::number_format nf; nf.set_format_string(format_string); nf.set_id(string_to_size_t(parser.attribute("numFmtId"))); stylesheet.number_formats.push_back(nf); } } else if (parser.qname() == xml::qname(xmlns, "colors")) { } else if (parser.qname() == xml::qname(xmlns, "cellStyles")) { auto count = parser.attribute("count"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; auto &data = *style_datas.emplace(style_datas.end()); parser.next_expect(xml::parser::event_type::start_element, xmlns, "cellStyle"); data.name = parser.attribute("name"); data.record_id = parser.attribute("xfId"); data.builtin_id = parser.attribute("builtinId"); parser.next_expect(xml::parser::event_type::end_element, xmlns, "cellStyle"); } if (count != style_datas.size()) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "cellStyleXfs") || parser.qname() == xml::qname(xmlns, "cellXfs")) { auto in_style_records = parser.name() == "cellStyleXfs"; auto count = parser.attribute("count"); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "xf"); auto &record = *(!in_style_records ? format_records.emplace(format_records.end()) : style_records.emplace(style_records.end())); auto apply_alignment_present = parser.attribute_present("applyAlignment"); auto alignment_applied = apply_alignment_present && is_true(parser.attribute("applyAlignment")); record.alignment.second = alignment_applied; auto border_applied = parser.attribute_present("applyBorder") && is_true(parser.attribute("applyBorder")); auto border_index = parser.attribute_present("borderId") ? string_to_size_t(parser.attribute("borderId")) : 0; record.border_id = { border_index, border_applied }; auto fill_applied = parser.attribute_present("applyFill") && is_true(parser.attribute("applyFill")); auto fill_index = parser.attribute_present("fillId") ? string_to_size_t(parser.attribute("fillId")) : 0; record.fill_id = { fill_index, fill_applied }; auto font_applied = parser.attribute_present("applyFont") && is_true(parser.attribute("applyFont")); auto font_index = parser.attribute_present("fontId") ? string_to_size_t(parser.attribute("fontId")) : 0; record.font_id = { font_index, font_applied }; auto number_format_applied = parser.attribute_present("applyNumberFormat") && is_true(parser.attribute("applyNumberFormat")); auto number_format_id = parser.attribute_present("numFmtId") ? string_to_size_t(parser.attribute("numFmtId")) : 0; record.number_format_id = { number_format_id, number_format_applied }; auto apply_protection_present = parser.attribute_present("applyProtection"); auto protection_applied = apply_protection_present && is_true(parser.attribute("applyProtection")); record.protection.second = protection_applied; if (parser.attribute_present("xfId") && parser.name() == "cellXfs") { record.style_id = { parser.attribute("xfId"), true }; } while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); if (parser.qname() == xml::qname(xmlns, "alignment")) { record.alignment.first = read_alignment(parser); record.alignment.second = !apply_alignment_present || alignment_applied; } else if (parser.qname() == xml::qname(xmlns, "protection")) { record.protection.first = read_protection(parser); record.protection.second = !apply_protection_present || protection_applied; } parser.next_expect(xml::parser::event_type::end_element, parser.qname()); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "xf"); } if ((in_style_records && count != style_records.size()) || (!in_style_records && count != format_records.size())) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "dxfs")) { auto count = parser.attribute("count"); std::size_t processed = 0; while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.next_expect(xml::parser::event_type::end_element); } if (count != processed) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "tableStyles")) { auto default_table_style = parser.attribute("defaultTableStyle"); auto default_pivot_style = parser.attribute("defaultPivotStyle"); auto count = parser.attribute("count"); std::size_t processed = 0; while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.next_expect(xml::parser::event_type::end_element); } if (count != processed) { throw xlnt::exception("counts don't match"); } } else if (parser.qname() == xml::qname(xmlns, "extLst")) { parser.next_expect(xml::parser::event_type::start_element, xmlns, "ext"); parser.content(xml::parser::content_type::complex); parser.attribute("uri"); parser.next_expect(xml::parser::event_type::start_namespace_decl); parser.next_expect(xml::parser::event_type::start_element, xmlns_x14, "slicerStyles"); parser.attribute("defaultSlicerStyle"); parser.next_expect(xml::parser::event_type::end_element, xmlns_x14, "slicerStyles"); parser.next_expect(xml::parser::event_type::end_element, xmlns, "ext"); parser.next_expect(xml::parser::event_type::end_namespace_decl); } parser.next_expect(xml::parser::event_type::end_element); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "styleSheet"); auto lookup_number_format = [&](std::size_t number_format_id) { auto result = number_format::general(); bool is_custom_number_format = false; for (const auto &nf : stylesheet.number_formats) { if (nf.get_id() == number_format_id) { result = nf; is_custom_number_format = true; break; } } if (number_format_id < 164 && !is_custom_number_format) { result = number_format::from_builtin_id(number_format_id); } return result; }; auto style_data_iter = style_datas.begin(); for (const auto &record : style_records) { auto &new_style = stylesheet.create_style(); new_style.name(style_data_iter->name); new_style.builtin_id(style_data_iter->builtin_id); new_style.alignment(record.alignment.first, record.alignment.second); new_style.border(stylesheet.borders.at(record.border_id.first), record.border_id.second); new_style.fill(stylesheet.fills.at(record.fill_id.first), record.fill_id.second); new_style.font(stylesheet.fonts.at(record.font_id.first), record.font_id.second); new_style.number_format(lookup_number_format(record.number_format_id.first), record.number_format_id.second); new_style.protection(record.protection.first, record.protection.second); ++style_data_iter; } for (const auto &record : format_records) { auto &new_format = stylesheet.create_format(); new_format.style(stylesheet.styles.at(record.style_id.first).name()); new_format.alignment(record.alignment.first, record.alignment.second); new_format.border(stylesheet.borders.at(record.border_id.first), record.border_id.second); new_format.fill(stylesheet.fills.at(record.fill_id.first), record.fill_id.second); new_format.font(stylesheet.fonts.at(record.font_id.first), record.font_id.second); new_format.number_format(lookup_number_format(record.number_format_id.first), record.number_format_id.second); new_format.protection(record.protection.first, record.protection.second); } } void xlsx_consumer::read_theme(xml::parser &/*parser*/) { destination_.set_theme(theme()); } void xlsx_consumer::read_volatile_dependencies(xml::parser &/*parser*/) { } void xlsx_consumer::read_worksheet(const std::string &rel_id, xml::parser &parser) { static const auto xmlns = constants::get_namespace("worksheet"); static const auto xmlns_mc = constants::get_namespace("mc"); static const auto xmlns_x14ac = constants::get_namespace("x14ac"); auto title = std::find_if(destination_.d_->sheet_title_rel_id_map_.begin(), destination_.d_->sheet_title_rel_id_map_.end(), [&](const std::pair &p) { return p.second == rel_id; })->first; auto id = sheet_title_id_map_[title]; auto index = sheet_title_index_map_[title]; auto insertion_iter = destination_.d_->worksheets_.begin(); while (insertion_iter != destination_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index) { ++insertion_iter; } destination_.d_->worksheets_.emplace(insertion_iter, &destination_, id, title); auto ws = destination_.get_sheet_by_id(id); parser.next_expect(xml::parser::event_type::start_element, xmlns, "worksheet"); parser.content(xml::parser::content_type::complex); while (parser.peek() == xml::parser::event_type::start_namespace_decl) { parser.next_expect(xml::parser::event_type::start_namespace_decl); if (parser.namespace_() == xmlns_x14ac) { ws.enable_x14ac(); } } if (parser.attribute_present(xml::qname(xmlns_mc, "Ignorable"))) { parser.attribute(xml::qname(xmlns_mc, "Ignorable")); } xlnt::range_reference full_range; while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); parser.content(xml::parser::content_type::complex); if (parser.qname() == xml::qname(xmlns, "dimension")) { full_range = xlnt::range_reference(parser.attribute("ref")); ws.d_->has_dimension_ = true; parser.next_expect(xml::parser::event_type::end_element, xmlns, "dimension"); } else if (parser.qname() == xml::qname(xmlns, "sheetViews")) { ws.d_->has_view_ = true; while (true) { parser.attribute_map(); if (parser.next() == xml::parser::event_type::end_element && parser.name() == "sheetViews") { break; } } //parser.next_expect(xml::parser::event_type::end_element, xmlns, "sheetViews"); } else if (parser.qname() == xml::qname(xmlns, "sheetFormatPr")) { ws.d_->has_format_properties_ = true; while (true) { parser.attribute_map(); if (parser.next() == xml::parser::event_type::end_element && parser.name() == "sheetFormatPr") { break; } } //parser.next_expect(xml::parser::event_type::end_element, xmlns, "sheetFormatPr"); } else if (parser.qname() == xml::qname(xmlns, "mergeCells")) { auto count = std::stoull(parser.attribute("count")); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "mergeCell"); ws.merge_cells(range_reference(parser.attribute("ref"))); parser.next_expect(xml::parser::event_type::start_element, xmlns, "mergeCell"); count--; } if (count != 0) { throw invalid_file("sizes don't match"); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "mergeCells"); } else if (parser.qname() == xml::qname(xmlns, "sheetData")) { auto &shared_strings = destination_.get_shared_strings(); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "row"); auto row_index = static_cast(std::stoull(parser.attribute("r"))); if (parser.attribute_present("ht")) { ws.get_row_properties(row_index).height = std::stold(parser.attribute("ht")); } std::string span_string = parser.attribute("spans"); auto colon_index = span_string.find(':'); column_t min_column = 0; column_t max_column = 0; if (colon_index != std::string::npos) { min_column = static_cast(std::stoll(span_string.substr(0, colon_index))); max_column = static_cast(std::stoll(span_string.substr(colon_index + 1))); } else { min_column = full_range.get_top_left().get_column_index(); max_column = full_range.get_bottom_right().get_column_index(); } while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "c"); auto cell = ws.get_cell(cell_reference(parser.attribute("r"))); auto has_type = parser.attribute_present("t"); auto type = has_type ? parser.attribute("t") : ""; auto has_format = parser.attribute_present("s"); auto format_id = static_cast(has_format ? std::stoull(parser.attribute("s")) : 0LL); auto has_value = false; auto value_string = std::string(); auto has_formula = false; auto has_shared_formula = false; auto formula_value_string = std::string(); while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element); if (parser.qname() == xml::qname(xmlns, "v")) { has_value = true; value_string = parser.value(); } else if (parser.qname() == xml::qname(xmlns, "f")) { has_formula = true; has_shared_formula = parser.attribute_present("t") && parser.attribute("t") == "shared"; formula_value_string = parser.value(); } else if (parser.qname() == xml::qname(xmlns, "is")) { parser.next_expect(xml::parser::event_type::start_element, xmlns, "t"); value_string = parser.value(); parser.next_expect(xml::parser::event_type::end_element, xmlns, "t"); } parser.next_expect(xml::parser::event_type::end_element, parser.qname()); } if (has_formula && !has_shared_formula && !ws.get_workbook().get_data_only()) { cell.set_formula(formula_value_string); } if (has_type && (type == "inlineStr" || type =="str")) { cell.set_value(value_string); } else if (has_type && type == "s" && !has_formula) { auto shared_string_index = static_cast(std::stoull(value_string)); auto shared_string = shared_strings.at(shared_string_index); cell.set_value(shared_string); } else if (has_type && type == "b") // boolean { cell.set_value(value_string != "0"); } else if (has_value && !value_string.empty()) { if (!value_string.empty() && value_string[0] == '#') { cell.set_error(value_string); } else { cell.set_value(std::stold(value_string)); } } if (has_format) { cell.set_format(destination_.get_format(format_id)); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "c"); } } parser.next_expect(xml::parser::event_type::end_element, xmlns, "sheetData"); } else if (parser.qname() == xml::qname(xmlns, "cols")) { while (true) { if (parser.peek() == xml::parser::event_type::end_element) break; parser.next_expect(xml::parser::event_type::start_element, xmlns, "col"); auto min = static_cast(std::stoull(parser.attribute("min"))); auto max = static_cast(std::stoull(parser.attribute("max"))); auto width = std::stold(parser.attribute("width")); bool custom = parser.attribute("customWidth") == std::string("1"); auto column_style = static_cast(parser.attribute_present("style") ? std::stoull(parser.attribute("style")) : 0); for (auto column = min; column <= max; column++) { if (!ws.has_column_properties(column)) { ws.add_column_properties(column, column_properties()); } ws.get_column_properties(min).width = width; ws.get_column_properties(min).style = column_style; ws.get_column_properties(min).custom = custom; } parser.next_expect(xml::parser::event_type::end_element, xmlns, "col"); } parser.next_expect(xml::parser::event_type::end_element, xmlns, "cols"); } else if (parser.qname() == xml::qname(xmlns, "autoFilter")) { ws.auto_filter(xlnt::range_reference(parser.attribute("ref"))); parser.next_expect(xml::parser::event_type::end_element, xmlns, "autoFilter"); } else if (parser.qname() == xml::qname(xmlns, "pageMargins")) { page_margins margins; margins.set_top(parser.attribute("top")); margins.set_bottom(parser.attribute("bottom")); margins.set_left(parser.attribute("left")); margins.set_right(parser.attribute("right")); margins.set_header(parser.attribute("header")); margins.set_footer(parser.attribute("footer")); ws.set_page_margins(margins); parser.next_expect(xml::parser::event_type::end_element, xmlns, "pageMargins"); } } parser.next_expect(xml::parser::event_type::end_element, xmlns, "worksheet"); } // Sheet Relationship Target Parts void xlsx_consumer::read_comments(xml::parser &/*parser*/) { } void xlsx_consumer::read_drawings(xml::parser &/*parser*/) { } // Unknown Parts void xlsx_consumer::read_unknown_parts(xml::parser &/*parser*/) { } void xlsx_consumer::read_unknown_relationships(xml::parser &/*parser*/) { } } // namespace detail } // namepsace xlnt