// Copyright (C) 2016-2017 Thomas Fussell // Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org). // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include #include #include #include #include #include #include #include #include namespace { using xlnt::detail::byte; using xlnt::detail::binary_reader; using xlnt::detail::binary_writer; using directory_id = std::int32_t; using sector_id = std::int32_t; using sector_chain = std::vector; const sector_id FreeSector = -1; const sector_id EndOfChainSector = -2; const sector_id AllocationTableSector = -3; const sector_id MasterAllocationTableSector = -4; sector_chain follow_sector_chain(const sector_chain &table, sector_id start) { auto chain = sector_chain(); auto added = std::unordered_set(); auto last_sector = static_cast(table.size()); if (start >= last_sector) { return chain; } auto current = start; while (current < last_sector && current >= 0) { if (added.find(current) != added.end()) { break; } chain.push_back(current); added.insert(current); //TODO: why would there be a repeat? current = table[current]; } return chain; } struct header { enum class byte_order_type : uint16_t { big_endian = 0xFFFE, little_endian = 0xFEFF }; std::uint64_t file_id = 0xe11ab1a1e011cfd0; std::array ignore1 = {{0}}; std::uint16_t revision = 0x003E; std::uint16_t version = 0x0003; byte_order_type byte_order = byte_order_type::little_endian; std::uint16_t sector_size_power = 9; std::uint16_t short_sector_size_power = 6; std::array ignore2 = {{0}}; std::uint32_t num_sectors = 0; sector_id directory_start = 0; std::array ignore3 = {{0}}; std::uint32_t threshold = 4096; sector_id short_table_start = 0; std::uint32_t num_short_sectors = 0; sector_id sector_table_start = 0; std::uint32_t num_master_alloc_table_sectors = 0; std::array master_sector_alloc_table = {{FreeSector}}; }; bool header_is_valid(const header &h) { if (h.threshold != 4096) { return false; } if (h.num_sectors == 0 || (h.num_sectors > 109 && h.num_sectors > (h.num_master_alloc_table_sectors * 127) + 109) || ((h.num_sectors < 109) && (h.num_master_alloc_table_sectors != 0))) { return false; } if (h.short_sector_size_power > h.sector_size_power || h.sector_size_power <= 6 || h.sector_size_power >= 31) { return false; } return true; } struct directory_entry { enum class entry_type : std::uint8_t { Empty = 0, UserStorage = 1, UserStream = 2, LockBytes = 3, Property = 4, RootStorage = 5 }; enum class entry_color : std::uint8_t { Red = 0, Black = 1 }; std::array name = {{0}}; std::uint16_t name_length = 0; entry_type type; entry_color color; directory_id prev = -1; directory_id next = -1; directory_id child = -1; std::array ignore; sector_id first = 0; std::uint32_t size = 0; std::uint32_t ignore2; }; class directory_tree { public: static void entry_name(directory_entry &entry, std::u16string name) { if (name.size() > 31) { name.resize(31); } std::copy(name.begin(), name.end(), entry.name.begin()); entry.name[name.size()] = 0; entry.name_length = static_cast((name.size() + 1) * 2); } directory_tree() { clear(); } void clear() { entries = { create_root_entry() }; } std::size_t entry_count() const { return entries.size(); } directory_entry &entry(directory_id index) { return entries[static_cast(index)]; } const directory_entry &entry(directory_id index) const { return entries[static_cast(index)]; } const directory_entry &entry(const std::u16string &name) const { return entry(find_entry(name).first); } directory_entry &entry(const std::u16string &name, bool create) { auto find_result = find_entry(name); auto index = find_result.first; auto found = find_result.second; if (!found) { // not found among children if (!create) { throw xlnt::exception("not found"); } // create a new entry auto parent = index; entries.push_back(directory_entry()); index = static_cast(entry_count() - 1); auto &e = entry(index); e.first = 0; entry(parent).prev = index; } return entry(index); } /* directory_id parent(directory_id index) { // brute-force, basically we iterate for each entries, find its children // and check if one of the children is 'index' for (auto j = directory_id(0); j < static_cast(entry_count()); j++) { auto chi = children(j); for (std::size_t i = 0; i < chi.size(); i++) { if (chi[i] == index) { return j; } } } return -1; } */ /* std::u16string path(directory_id index) { // don't use root name ("Root Entry"), just give "/" if (index == 0) return u"/"; auto current_entry = entry(index); auto result = std::u16string(entry(index).name.data()); result.insert(0, u"/"); auto current_parent = parent(index); while (current_parent > 0) { current_entry = entry(current_parent); result.insert(0, std::u16string(current_entry.name.data())); result.insert(0, u"/"); --current_parent; index = current_parent; if (current_parent <= 0) break; } return result; } */ std::vector children(directory_id index) const { auto result = std::vector(); auto &e = entry(index); if (e.child >= 0 && e.child < static_cast(entry_count())) { find_siblings(result, e.child); } return result; } void load(const std::vector &data) { auto reader = binary_reader(data); entries = reader.as_vector_of(); auto is_empty = [](const directory_entry &entry) { return entry.type == directory_entry::entry_type::Empty; }; entries.erase(std::remove_if(entries.begin(), entries.end(), is_empty)); } directory_entry create_root_entry() const { directory_entry root; entry_name(root, u"Root Entry"); root.type = directory_entry::entry_type::RootStorage; root.color = directory_entry::entry_color::Black; root.size = 0; return root; } private: // helper function: recursively find siblings of index void find_siblings(std::vector &result, directory_id index) const { auto e = entry(index); // prevent infinite loop for (std::size_t i = 0; i < result.size(); i++) { if (result[i] == index) return; } // add myself result.push_back(index); // visit previous sibling, don't go infinitely auto prev = e.prev; if ((prev > 0) && (prev < static_cast(entry_count()))) { for (std::size_t i = 0; i < result.size(); i++) { if (result[i] == prev) { prev = 0; } } if (prev) { find_siblings(result, prev); } } // visit next sibling, don't go infinitely auto next = e.next; if ((next > 0) && (next < static_cast(entry_count()))) { for (std::size_t i = 0; i < result.size(); i++) { if (result[i] == next) next = 0; } if (next) { find_siblings(result, next); } } } std::pair find_entry(const std::u16string &name) const { // quick check for "/" (that's root) if (name == u"/Root Entry") { return { 0, true }; } // split the names, e.g "/ObjectPool/_1020961869" will become: // "ObjectPool" and "_1020961869" auto names = std::vector(); auto start = std::size_t(0); auto end = std::size_t(0); if (name[0] == u'/') start++; while (start < name.length()) { end = name.find_first_of('/', start); if (end == std::string::npos) end = name.length(); names.push_back(name.substr(start, end - start)); start = end + 1; } // start from root auto index = directory_id(0); for (auto it = names.begin(); it != names.end(); ++it) { // find among the children of index auto chi = children(index); std::ptrdiff_t child = 0; for (std::size_t i = 0; i < chi.size(); i++) { auto ce = entry(chi[i]); if (std::u16string(ce.name.data()) == *it) { child = static_cast(chi[i]); } } // traverse to the child if (child > 0) { index = static_cast(child); } else { return { index, false }; } } return { index, true }; } std::vector entries; }; } // namespace namespace xlnt { namespace detail { class compound_document_reader_impl { public: compound_document_reader_impl(const std::vector &bytes) : sectors_(bytes.data() + sizeof(header)), sectors_size_(bytes.size()) { auto reader = binary_reader(bytes); header_ = reader.read
(); // Master allocation table const auto sector_size = 1 << header_.sector_size_power; const auto sector_table_sectors = load_master_sector_allocation_table(); const auto sector_table_bytes = read(sector_table_sectors); auto sector_table_reader = binary_reader(sector_table_bytes); sector_table_ = sector_table_reader.as_vector_of(); // Short sector allocation table const auto short_sector_size = 1 << header_.short_sector_size_power; const auto short_table_chain = follow_sector_chain(sector_table_, header_.short_table_start); const auto short_table_bytes = read(short_table_chain); auto short_sector_table_reader = binary_reader(short_table_bytes); short_sector_table_ = short_sector_table_reader.as_vector_of(); // Directory const auto directory_chain = follow_sector_chain(sector_table_, header_.directory_start); const auto directory_sectors = read(directory_chain); directory_.load(directory_sectors); // Short stream container auto first_short_sector = directory_.entry(u"/Root Entry", false).first; short_container_stream_ = follow_sector_chain(sector_table_, first_short_sector); } std::vector read(const sector_chain §ors) const { const auto sector_size = 1 << header_.sector_size_power; auto result = std::vector(); auto writer = binary_writer(result); for (auto sector : sectors) { auto position = static_cast(sector_size * sector); writer.append(sectors_, sectors_size_, position, sector_size); } return result; } std::vector read_short(const sector_chain §ors) const { const auto short_sector_size = 1 << header_.short_sector_size_power; const auto sector_size = 1 << header_.sector_size_power; auto result = std::vector(); auto writer = binary_writer(result); for (auto sector : sectors) { auto position = static_cast(short_sector_size * sector); auto master_allocation_table_index = position / sector_size; auto sector_data = read({ short_container_stream_[master_allocation_table_index] }); auto offset = position % sector_size; writer.append(sector_data, offset, short_sector_size); } return result; } sector_chain load_master_sector_allocation_table() const { auto sectors = sector_chain( header_.master_sector_alloc_table.begin(), header_.master_sector_alloc_table.begin() + std::min(header_.master_sector_alloc_table.size(), static_cast(header_.num_sectors))); if (header_.num_sectors > std::uint32_t(109)) { auto current_sector = header_.sector_table_start; for (auto r = std::uint32_t(0); r < header_.num_master_alloc_table_sectors; ++r) { auto current_sector_data = read({ current_sector }); auto current_sector_reader = binary_reader(current_sector_data); auto current_sector_sectors = current_sector_reader.as_vector_of(); current_sector = current_sector_sectors.back(); current_sector_sectors.pop_back(); sectors.insert( current_sector_sectors.begin(), current_sector_sectors.end(), sectors.end()); } } return sectors; } std::vector read_stream(const std::u16string &name) const { const auto entry = directory_.entry(name); const auto entry_sectors = entry.size < header_.threshold ? follow_sector_chain(short_sector_table_, entry.first) : follow_sector_chain(sector_table_, entry.first); auto result = entry.size < header_.threshold ? read_short(entry_sectors) : read(entry_sectors); result.resize(entry.size); return result; } private: const byte *sectors_; const std::size_t sectors_size_; directory_tree directory_; header header_; std::vector sector_table_; std::vector short_sector_table_; std::vector short_container_stream_; }; class compound_document_writer_impl { public: compound_document_writer_impl(std::vector &bytes) : writer_(bytes), sector_table_(128, FreeSector), short_sector_table_(128, FreeSector) { } void write_sectors(const std::vector &data, directory_entry &/*entry*/) { const auto sector_size = 1 << header_.sector_size_power; const auto num_sectors = data.size() / sector_size; for (auto i = std::size_t(0); i < num_sectors; ++i) { auto position = sector_size * i; auto current_sector_size = data.size() % sector_size; writer_.append(data, position, current_sector_size); } } void write_short_sectors(const std::vector &data, directory_entry &/*entry*/) { const auto sector_size = 1 << header_.sector_size_power; const auto num_sectors = data.size() / sector_size; for (auto i = std::size_t(0); i < num_sectors; ++i) { auto position = sector_size * i; auto current_sector_size = data.size() % sector_size; writer_.append(data, position, current_sector_size); } } void write_stream(const std::u16string &name, const std::vector &data) { auto &entry = directory_.entry(name, true); if (entry.size < header_.threshold) { write_short_sectors(data, entry); } else { write_sectors(data, entry); } } private: binary_writer writer_; directory_tree directory_; header header_; std::vector sector_table_; std::vector short_sector_table_; std::vector short_container_stream_; }; compound_document_reader::compound_document_reader(const std::vector &data) : d_(new compound_document_reader_impl(data)) { } compound_document_reader::~compound_document_reader() { } std::vector compound_document_reader::read_stream(const std::u16string &name) const { return d_->read_stream(name); } compound_document_writer::compound_document_writer(std::vector &data) : d_(new compound_document_writer_impl(data)) { } compound_document_writer::~compound_document_writer() { } void compound_document_writer::write_stream(const std::u16string &name, const std::vector &data) { d_->write_stream(name, data); } } // namespace detail } // namespace xlnt