2017-04-22 09:58:40 +08:00
|
|
|
// Copyright (C) 2016-2017 Thomas Fussell
|
|
|
|
// Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
|
|
// modification, are permitted provided that the following conditions
|
|
|
|
// are met:
|
|
|
|
//
|
|
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer.
|
|
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
|
|
// documentation and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
2017-04-22 07:52:02 +08:00
|
|
|
#include <array>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cstring>
|
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <list>
|
|
|
|
#include <string>
|
2017-04-24 08:27:16 +08:00
|
|
|
#include <unordered_set>
|
2017-04-22 07:52:02 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
#include <detail/binary.hpp>
|
2017-04-22 07:52:02 +08:00
|
|
|
#include <detail/cryptography/compound_document.hpp>
|
|
|
|
#include <xlnt/utils/exceptions.hpp>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
using xlnt::detail::byte;
|
|
|
|
using xlnt::detail::binary_reader;
|
|
|
|
using xlnt::detail::binary_writer;
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
using directory_id = std::int32_t;
|
|
|
|
using sector_id = std::int32_t;
|
2017-04-24 08:27:16 +08:00
|
|
|
using sector_chain = std::vector<sector_id>;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
const sector_id FreeSector = -1;
|
|
|
|
const sector_id EndOfChainSector = -2;
|
|
|
|
const sector_id AllocationTableSector = -3;
|
|
|
|
const sector_id MasterAllocationTableSector = -4;
|
|
|
|
|
|
|
|
sector_chain follow_sector_chain(const sector_chain &table, sector_id start)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
auto chain = sector_chain();
|
|
|
|
auto added = std::unordered_set<sector_id>();
|
|
|
|
auto last_sector = static_cast<sector_id>(table.size());
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
if (start >= last_sector)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
return chain;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
auto current = start;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
while (current < last_sector && current >= 0)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
if (added.find(current) != added.end())
|
2017-04-23 23:53:52 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
break;
|
2017-04-23 23:53:52 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
chain.push_back(current);
|
|
|
|
added.insert(current); //TODO: why would there be a repeat?
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
current = table[current];
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
return chain;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
struct header
|
|
|
|
{
|
|
|
|
enum class byte_order_type : uint16_t
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
big_endian = 0xFFFE,
|
|
|
|
little_endian = 0xFEFF
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
std::uint64_t file_id = 0xe11ab1a1e011cfd0;
|
|
|
|
std::array<std::uint8_t, 16> ignore1 = {{0}};
|
|
|
|
std::uint16_t revision = 0x003E;
|
|
|
|
std::uint16_t version = 0x0003;
|
|
|
|
byte_order_type byte_order = byte_order_type::little_endian;
|
|
|
|
std::uint16_t sector_size_power = 9;
|
|
|
|
std::uint16_t short_sector_size_power = 6;
|
|
|
|
std::array<std::uint8_t, 10> ignore2 = {{0}};
|
|
|
|
std::uint32_t num_sectors = 0;
|
|
|
|
sector_id directory_start = 0;
|
|
|
|
std::array<std::uint8_t, 4> ignore3 = {{0}};
|
|
|
|
std::uint32_t threshold = 4096;
|
|
|
|
sector_id short_table_start = 0;
|
|
|
|
std::uint32_t num_short_sectors = 0;
|
|
|
|
sector_id sector_table_start = 0;
|
|
|
|
std::uint32_t num_master_alloc_table_sectors = 0;
|
|
|
|
std::array<sector_id, 109> master_sector_alloc_table = {{FreeSector}};
|
2017-04-23 02:25:27 +08:00
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
bool header_is_valid(const header &h)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
if (h.threshold != 4096)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
return false;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
if (h.num_sectors == 0
|
|
|
|
|| (h.num_sectors > 109 && h.num_sectors > (h.num_master_alloc_table_sectors * 127) + 109)
|
|
|
|
|| ((h.num_sectors < 109) && (h.num_master_alloc_table_sectors != 0)))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
return false;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
if (h.short_sector_size_power > h.sector_size_power
|
|
|
|
|| h.sector_size_power <= 6
|
|
|
|
|| h.sector_size_power >= 31)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
return false;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
return true;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
struct directory_entry
|
|
|
|
{
|
2017-04-24 08:51:50 +08:00
|
|
|
void name(const std::u16string &new_name)
|
|
|
|
{
|
|
|
|
name_length = std::min(static_cast<std::uint16_t>(new_name.size()), std::uint16_t(31));
|
|
|
|
std::copy(new_name.begin(), new_name.begin() + name_length, name_array.begin());
|
|
|
|
name_array[name_length] = 0;
|
|
|
|
name_length = (name_length + 1) * 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::u16string name() const
|
|
|
|
{
|
|
|
|
return std::u16string(name_array.begin(),
|
|
|
|
name_array.begin() + (name_length - 1) / 2);
|
|
|
|
}
|
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
enum class entry_type : std::uint8_t
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
Empty = 0,
|
|
|
|
UserStorage = 1,
|
|
|
|
UserStream = 2,
|
|
|
|
LockBytes = 3,
|
|
|
|
Property = 4,
|
|
|
|
RootStorage = 5
|
2017-04-24 08:27:16 +08:00
|
|
|
};
|
2017-04-23 02:25:27 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
enum class entry_color : std::uint8_t
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
Red = 0,
|
|
|
|
Black = 1
|
2017-04-24 08:27:16 +08:00
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:51:50 +08:00
|
|
|
std::array<char16_t, 32> name_array = {{0}};
|
2017-04-24 08:27:16 +08:00
|
|
|
std::uint16_t name_length = 0;
|
|
|
|
entry_type type;
|
|
|
|
entry_color color;
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_id prev = -1;
|
|
|
|
directory_id next = -1;
|
|
|
|
directory_id child = -1;
|
2017-04-23 08:43:26 +08:00
|
|
|
std::array<std::uint8_t, 36> ignore;
|
2017-04-23 02:25:27 +08:00
|
|
|
sector_id first = 0;
|
|
|
|
std::uint32_t size = 0;
|
2017-04-23 08:43:26 +08:00
|
|
|
std::uint32_t ignore2;
|
2017-04-23 02:25:27 +08:00
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
class directory_tree
|
|
|
|
{
|
|
|
|
public:
|
2017-04-24 08:51:50 +08:00
|
|
|
static const directory_id End;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
directory_tree()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
clear();
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
void clear()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
entries = { create_root_entry() };
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::size_t entry_count() const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
return entries.size();
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry &entry(directory_id index)
|
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
return entries[static_cast<std::size_t>(index)];
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
const directory_entry &entry(directory_id index) const
|
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
return entries[static_cast<std::size_t>(index)];
|
2017-04-23 08:43:26 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
const directory_entry &entry(const std::u16string &name) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
return entry(find_entry(name).first);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry &entry(const std::u16string &name, bool create)
|
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
auto find_result = find_entry(name);
|
|
|
|
auto index = find_result.first;
|
|
|
|
auto found = find_result.second;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (!found)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
// not found among children
|
|
|
|
if (!create)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
throw xlnt::exception("not found");
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
// create a new entry
|
|
|
|
auto parent = index;
|
|
|
|
entries.push_back(directory_entry());
|
|
|
|
index = static_cast<directory_id>(entry_count() - 1);
|
|
|
|
auto &e = entry(index);
|
2017-04-24 08:51:50 +08:00
|
|
|
e.name(name);
|
|
|
|
e.type = directory_entry::entry_type::UserStream;
|
|
|
|
e.size = 0;
|
2017-04-23 08:43:26 +08:00
|
|
|
e.first = 0;
|
2017-04-24 08:51:50 +08:00
|
|
|
e.child = End;
|
|
|
|
e.prev = End;
|
|
|
|
e.next = entry(parent).child;
|
|
|
|
entry(parent).child = index;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
/*
|
2017-04-23 08:43:26 +08:00
|
|
|
directory_id parent(directory_id index)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
// brute-force, basically we iterate for each entries, find its children
|
|
|
|
// and check if one of the children is 'index'
|
|
|
|
for (auto j = directory_id(0); j < static_cast<directory_id>(entry_count()); j++)
|
|
|
|
{
|
|
|
|
auto chi = children(j);
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < chi.size(); i++)
|
|
|
|
{
|
|
|
|
if (chi[i] == index)
|
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
return j;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
*/
|
|
|
|
/*
|
2017-04-23 08:43:26 +08:00
|
|
|
std::u16string path(directory_id index)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
// don't use root name ("Root Entry"), just give "/"
|
|
|
|
if (index == 0) return u"/";
|
|
|
|
|
|
|
|
auto current_entry = entry(index);
|
|
|
|
|
|
|
|
auto result = std::u16string(entry(index).name.data());
|
|
|
|
result.insert(0, u"/");
|
|
|
|
|
|
|
|
auto current_parent = parent(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
while (current_parent > 0)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
current_entry = entry(current_parent);
|
|
|
|
|
|
|
|
result.insert(0, std::u16string(current_entry.name.data()));
|
|
|
|
result.insert(0, u"/");
|
|
|
|
|
|
|
|
--current_parent;
|
|
|
|
index = current_parent;
|
|
|
|
|
|
|
|
if (current_parent <= 0) break;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
*/
|
2017-04-23 08:43:26 +08:00
|
|
|
std::vector<directory_id> children(directory_id index) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
auto result = std::vector<directory_id>();
|
|
|
|
auto &e = entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (e.child >= 0 && e.child < static_cast<directory_id>(entry_count()))
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
find_siblings(result, e.child);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void load(const std::vector<byte> &data)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto reader = binary_reader(data);
|
|
|
|
entries = reader.as_vector_of<directory_entry>();
|
|
|
|
|
|
|
|
auto is_empty = [](const directory_entry &entry)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
return entry.type == directory_entry::entry_type::Empty;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
entries.erase(std::remove_if(entries.begin(), entries.end(), is_empty));
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry create_root_entry() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry root;
|
|
|
|
|
2017-04-24 08:51:50 +08:00
|
|
|
root.name(u"Root Entry");
|
2017-04-23 08:43:26 +08:00
|
|
|
root.type = directory_entry::entry_type::RootStorage;
|
|
|
|
root.color = directory_entry::entry_color::Black;
|
2017-04-23 02:25:27 +08:00
|
|
|
root.size = 0;
|
|
|
|
|
|
|
|
return root;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
|
|
|
// helper function: recursively find siblings of index
|
2017-04-23 08:43:26 +08:00
|
|
|
void find_siblings(std::vector<directory_id> &result, directory_id index) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
auto e = entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// prevent infinite loop
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == index) return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// add myself
|
|
|
|
result.push_back(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// visit previous sibling, don't go infinitely
|
|
|
|
auto prev = e.prev;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if ((prev > 0) && (prev < static_cast<directory_id>(entry_count())))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == prev)
|
|
|
|
{
|
|
|
|
prev = 0;
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
if (prev)
|
|
|
|
{
|
|
|
|
find_siblings(result, prev);
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// visit next sibling, don't go infinitely
|
|
|
|
auto next = e.next;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if ((next > 0) && (next < static_cast<directory_id>(entry_count())))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == next) next = 0;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
if (next)
|
|
|
|
{
|
|
|
|
find_siblings(result, next);
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::pair<directory_id, bool> find_entry(const std::u16string &name) const
|
|
|
|
{
|
|
|
|
// quick check for "/" (that's root)
|
|
|
|
if (name == u"/Root Entry")
|
|
|
|
{
|
|
|
|
return { 0, true };
|
|
|
|
}
|
|
|
|
|
|
|
|
// split the names, e.g "/ObjectPool/_1020961869" will become:
|
|
|
|
// "ObjectPool" and "_1020961869"
|
|
|
|
auto names = std::vector<std::u16string>();
|
|
|
|
auto start = std::size_t(0);
|
|
|
|
auto end = std::size_t(0);
|
|
|
|
|
|
|
|
if (name[0] == u'/') start++;
|
|
|
|
|
|
|
|
while (start < name.length())
|
|
|
|
{
|
|
|
|
end = name.find_first_of('/', start);
|
|
|
|
if (end == std::string::npos) end = name.length();
|
|
|
|
names.push_back(name.substr(start, end - start));
|
|
|
|
start = end + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// start from root
|
|
|
|
auto index = directory_id(0);
|
|
|
|
|
2017-04-24 08:51:50 +08:00
|
|
|
for (auto &name : names)
|
2017-04-23 08:43:26 +08:00
|
|
|
{
|
|
|
|
// find among the children of index
|
|
|
|
auto chi = children(index);
|
|
|
|
std::ptrdiff_t child = 0;
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < chi.size(); i++)
|
|
|
|
{
|
|
|
|
auto ce = entry(chi[i]);
|
|
|
|
|
2017-04-24 08:51:50 +08:00
|
|
|
if (ce.name() == name)
|
2017-04-23 08:43:26 +08:00
|
|
|
{
|
|
|
|
child = static_cast<std::ptrdiff_t>(chi[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// traverse to the child
|
|
|
|
if (child > 0)
|
|
|
|
{
|
|
|
|
index = static_cast<directory_id>(child);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return { index, false };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return { index, true };
|
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::vector<directory_entry> entries;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:51:50 +08:00
|
|
|
const directory_id directory_tree::End = -1;
|
|
|
|
|
2017-04-22 07:52:02 +08:00
|
|
|
} // namespace
|
|
|
|
|
|
|
|
namespace xlnt {
|
|
|
|
namespace detail {
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
class compound_document_reader_impl
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
public:
|
2017-04-24 06:18:35 +08:00
|
|
|
compound_document_reader_impl(const std::vector<byte> &bytes)
|
|
|
|
: sectors_(bytes.data() + sizeof(header)),
|
|
|
|
sectors_size_(bytes.size())
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto reader = binary_reader(bytes);
|
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
header_ = reader.read<header>();
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
// Master allocation table
|
|
|
|
const auto sector_size = 1 << header_.sector_size_power;
|
|
|
|
const auto sector_table_sectors = load_master_sector_allocation_table();
|
|
|
|
const auto sector_table_bytes = read(sector_table_sectors);
|
|
|
|
auto sector_table_reader = binary_reader(sector_table_bytes);
|
|
|
|
sector_table_ = sector_table_reader.as_vector_of<sector_id>();
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
// Short sector allocation table
|
|
|
|
const auto short_sector_size = 1 << header_.short_sector_size_power;
|
|
|
|
const auto short_table_chain = follow_sector_chain(sector_table_, header_.short_table_start);
|
|
|
|
const auto short_table_bytes = read(short_table_chain);
|
|
|
|
auto short_sector_table_reader = binary_reader(short_table_bytes);
|
|
|
|
short_sector_table_ = short_sector_table_reader.as_vector_of<sector_id>();
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
// Directory
|
|
|
|
const auto directory_chain = follow_sector_chain(sector_table_, header_.directory_start);
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto directory_sectors = read(directory_chain);
|
|
|
|
directory_.load(directory_sectors);
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
// Short stream container
|
2017-04-24 04:56:01 +08:00
|
|
|
auto first_short_sector = directory_.entry(u"/Root Entry", false).first;
|
2017-04-24 08:27:16 +08:00
|
|
|
short_container_stream_ = follow_sector_chain(sector_table_, first_short_sector);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
std::vector<byte> read(const sector_chain §ors) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
const auto sector_size = 1 << header_.sector_size_power;
|
2017-04-24 06:18:35 +08:00
|
|
|
auto result = std::vector<byte>();
|
|
|
|
auto writer = binary_writer(result);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
for (auto sector : sectors)
|
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
auto position = static_cast<std::size_t>(sector_size * sector);
|
2017-04-24 06:18:35 +08:00
|
|
|
writer.append(sectors_, sectors_size_, position, sector_size);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-24 06:18:35 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
std::vector<byte> read_short(const sector_chain §ors) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
const auto short_sector_size = 1 << header_.short_sector_size_power;
|
|
|
|
const auto sector_size = 1 << header_.sector_size_power;
|
2017-04-24 06:18:35 +08:00
|
|
|
auto result = std::vector<byte>();
|
|
|
|
auto writer = binary_writer(result);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
for (auto sector : sectors)
|
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
auto position = static_cast<std::size_t>(short_sector_size * sector);
|
2017-04-23 02:25:27 +08:00
|
|
|
auto master_allocation_table_index = position / sector_size;
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
auto sector_data = read({ short_container_stream_[master_allocation_table_index] });
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
auto offset = position % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer.append(sector_data, offset, short_sector_size);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
sector_chain load_master_sector_allocation_table() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
auto sectors = sector_chain(
|
|
|
|
header_.master_sector_alloc_table.begin(),
|
|
|
|
header_.master_sector_alloc_table.begin()
|
|
|
|
+ std::min(header_.master_sector_alloc_table.size(),
|
|
|
|
static_cast<std::size_t>(header_.num_sectors)));
|
2017-04-22 09:58:40 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
if (header_.num_sectors > std::uint32_t(109))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
auto current_sector = header_.sector_table_start;
|
2017-04-22 09:58:40 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
for (auto r = std::uint32_t(0); r < header_.num_master_alloc_table_sectors; ++r)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto current_sector_data = read({ current_sector });
|
|
|
|
auto current_sector_reader = binary_reader(current_sector_data);
|
|
|
|
auto current_sector_sectors = current_sector_reader.as_vector_of<sector_id>();
|
|
|
|
|
|
|
|
current_sector = current_sector_sectors.back();
|
|
|
|
current_sector_sectors.pop_back();
|
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
sectors.insert(
|
2017-04-24 06:18:35 +08:00
|
|
|
current_sector_sectors.begin(),
|
|
|
|
current_sector_sectors.end(),
|
2017-04-24 08:27:16 +08:00
|
|
|
sectors.end());
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
return sectors;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
std::vector<byte> read_stream(const std::u16string &name) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
const auto entry = directory_.entry(name);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
const auto entry_sectors = entry.size < header_.threshold
|
|
|
|
? follow_sector_chain(short_sector_table_, entry.first)
|
|
|
|
: follow_sector_chain(sector_table_, entry.first);
|
|
|
|
auto result = entry.size < header_.threshold
|
|
|
|
? read_short(entry_sectors)
|
|
|
|
: read(entry_sectors);
|
2017-04-24 06:18:35 +08:00
|
|
|
result.resize(entry.size);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
private:
|
2017-04-24 06:18:35 +08:00
|
|
|
const byte *sectors_;
|
|
|
|
const std::size_t sectors_size_;
|
2017-04-24 04:56:01 +08:00
|
|
|
directory_tree directory_;
|
|
|
|
header header_;
|
2017-04-24 08:27:16 +08:00
|
|
|
std::vector<sector_id> sector_table_;
|
|
|
|
std::vector<sector_id> short_sector_table_;
|
2017-04-24 04:56:01 +08:00
|
|
|
std::vector<sector_id> short_container_stream_;
|
|
|
|
};
|
2017-04-23 08:43:26 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
class compound_document_writer_impl
|
|
|
|
{
|
|
|
|
public:
|
2017-04-24 06:18:35 +08:00
|
|
|
compound_document_writer_impl(std::vector<byte> &bytes)
|
2017-04-24 08:27:16 +08:00
|
|
|
: writer_(bytes),
|
|
|
|
sector_table_(128, FreeSector),
|
|
|
|
short_sector_table_(128, FreeSector)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_sectors(const std::vector<byte> &data, directory_entry &/*entry*/)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
const auto sector_size = 1 << header_.sector_size_power;
|
2017-04-24 04:56:01 +08:00
|
|
|
const auto num_sectors = data.size() / sector_size;
|
|
|
|
|
|
|
|
for (auto i = std::size_t(0); i < num_sectors; ++i)
|
|
|
|
{
|
|
|
|
auto position = sector_size * i;
|
|
|
|
auto current_sector_size = data.size() % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer_.append(data, position, current_sector_size);
|
2017-04-24 04:56:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_short_sectors(const std::vector<byte> &data, directory_entry &/*entry*/)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
2017-04-24 08:27:16 +08:00
|
|
|
const auto sector_size = 1 << header_.sector_size_power;
|
2017-04-24 04:56:01 +08:00
|
|
|
const auto num_sectors = data.size() / sector_size;
|
|
|
|
|
|
|
|
for (auto i = std::size_t(0); i < num_sectors; ++i)
|
|
|
|
{
|
|
|
|
auto position = sector_size * i;
|
|
|
|
auto current_sector_size = data.size() % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer_.append(data, position, current_sector_size);
|
2017-04-24 04:56:01 +08:00
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_stream(const std::u16string &name, const std::vector<byte> &data)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
auto &entry = directory_.entry(name, true);
|
2017-04-23 08:43:26 +08:00
|
|
|
|
2017-04-24 08:27:16 +08:00
|
|
|
if (entry.size < header_.threshold)
|
2017-04-23 08:43:26 +08:00
|
|
|
{
|
|
|
|
write_short_sectors(data, entry);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
write_sectors(data, entry);
|
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
2017-04-24 06:18:35 +08:00
|
|
|
binary_writer writer_;
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_tree directory_;
|
|
|
|
header header_;
|
2017-04-24 08:27:16 +08:00
|
|
|
std::vector<sector_id> sector_table_;
|
|
|
|
std::vector<sector_id> short_sector_table_;
|
2017-04-23 02:25:27 +08:00
|
|
|
std::vector<sector_id> short_container_stream_;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_reader::compound_document_reader(const std::vector<std::uint8_t> &data)
|
|
|
|
: d_(new compound_document_reader_impl(data))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_reader::~compound_document_reader()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
std::vector<std::uint8_t> compound_document_reader::read_stream(const std::u16string &name) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
return d_->read_stream(name);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_writer::compound_document_writer(std::vector<std::uint8_t> &data)
|
|
|
|
: d_(new compound_document_writer_impl(data))
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_writer::~compound_document_writer()
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
void compound_document_writer::write_stream(const std::u16string &name, const std::vector<std::uint8_t> &data)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
d_->write_stream(name, data);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
} // namespace xlnt
|