2017-04-22 09:58:40 +08:00
|
|
|
// Copyright (C) 2016-2017 Thomas Fussell
|
|
|
|
// Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
|
|
// modification, are permitted provided that the following conditions
|
|
|
|
// are met:
|
|
|
|
//
|
|
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer.
|
|
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
|
|
// documentation and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
2017-04-22 07:52:02 +08:00
|
|
|
#include <array>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cstring>
|
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <list>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
#include <detail/binary.hpp>
|
2017-04-22 07:52:02 +08:00
|
|
|
#include <detail/cryptography/compound_document.hpp>
|
|
|
|
#include <xlnt/utils/exceptions.hpp>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
using xlnt::detail::byte;
|
|
|
|
using xlnt::detail::binary_reader;
|
|
|
|
using xlnt::detail::binary_writer;
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
using directory_id = std::int32_t;
|
|
|
|
using sector_id = std::int32_t;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
class allocation_table
|
|
|
|
{
|
|
|
|
public:
|
2017-04-23 08:43:26 +08:00
|
|
|
static const sector_id FreeSector;
|
|
|
|
static const sector_id EndOfChainSector;
|
|
|
|
static const sector_id AllocationTableSector;
|
|
|
|
static const sector_id MasterAllocationTableSector;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
allocation_table()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
resize(128);
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::size_t count() const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
return data_.size();
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
void resize(std::size_t newsize)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
data_.resize(newsize, FreeSector);
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
/*
|
|
|
|
void set(sector_id index, sector_id value)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
if (static_cast<std::size_t>(index) >= count())
|
|
|
|
{
|
|
|
|
resize(static_cast<std::size_t>(index + 1));
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 23:53:52 +08:00
|
|
|
data_[static_cast<std::size_t>(index)] = value;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
/*
|
2017-04-23 08:43:26 +08:00
|
|
|
void setChain(std::vector<sector_id> chain)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
if (chain.size())
|
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < chain.size() - 1; i++)
|
|
|
|
{
|
|
|
|
set(chain[i], chain[i + 1]);
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
set(chain[chain.size() - 1], EndOfChainSector);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
*/
|
2017-04-23 08:43:26 +08:00
|
|
|
std::vector<sector_id> follow(sector_id start) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
auto chain = std::vector<sector_id>();
|
2017-04-23 08:43:26 +08:00
|
|
|
|
|
|
|
if (start >= static_cast<sector_id>(count()))
|
|
|
|
{
|
|
|
|
return chain;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
auto p = start;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 23:53:52 +08:00
|
|
|
auto already_exists = [](const std::vector<sector_id> &check, sector_id item)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
for (std::size_t i = 0; i < check.size(); i++)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
if (check[i] == item) return true;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return false;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
while (p < static_cast<sector_id>(count()))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
if (p == EndOfChainSector) break;
|
|
|
|
if (p == AllocationTableSector) break;
|
|
|
|
if (p == MasterAllocationTableSector) break;
|
2017-04-23 02:25:27 +08:00
|
|
|
if (already_exists(chain, p)) break;
|
|
|
|
chain.push_back(p);
|
2017-04-23 23:53:52 +08:00
|
|
|
if (data_[static_cast<std::size_t>(p)] >= static_cast<sector_id>(count())) break;
|
|
|
|
p = data_[static_cast<std::size_t>(p)];
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return chain;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:56:31 +08:00
|
|
|
void load(const std::vector<byte> §ors)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto reader = binary_reader(sectors);
|
|
|
|
data_ = reader.as_vector_of<sector_id>();
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::size_t sector_size() const
|
|
|
|
{
|
|
|
|
return sector_size_;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
void sector_size(std::size_t size)
|
|
|
|
{
|
|
|
|
sector_size_ = size;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
|
|
|
std::size_t sector_size_ = 4096;
|
|
|
|
std::vector<sector_id> data_;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
const sector_id allocation_table::FreeSector = -1;
|
|
|
|
const sector_id allocation_table::EndOfChainSector = -2;
|
|
|
|
const sector_id allocation_table::AllocationTableSector = -3;
|
|
|
|
const sector_id allocation_table::MasterAllocationTableSector = -4;
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
class header
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
public:
|
|
|
|
header()
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
bool is_valid() const
|
|
|
|
{
|
|
|
|
if (threshold_ != 4096) return false;
|
|
|
|
if (num_sectors_ == 0) return false;
|
|
|
|
if ((num_sectors_ > 109) && (num_sectors_ > (num_master_sectors_ * 127) + 109)) return false;
|
|
|
|
if ((num_sectors_ < 109) && (num_master_sectors_ != 0)) return false;
|
|
|
|
if (short_sector_size_power_ > sector_size_power_) return false;
|
|
|
|
if (sector_size_power_ <= 6) return false;
|
|
|
|
if (sector_size_power_ >= 31) return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void load(binary_reader &reader)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
if (reader.size() < 512)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
throw xlnt::exception("bad header");
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
*this = reader.read<header>();
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (file_id_ != 0xe11ab1a1e011cfd0)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
throw xlnt::exception("not ole");
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (!is_valid())
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
throw xlnt::exception("bad ole");
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::size_t sector_size() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
return std::size_t(1) << sector_size_power_;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::size_t short_sector_size() const
|
|
|
|
{
|
|
|
|
return std::size_t(1) << short_sector_size_power_;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::vector<sector_id> sectors() const
|
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
const auto num_header_sectors = std::min(num_sectors_, std::uint32_t(109));
|
|
|
|
return std::vector<sector_id>(
|
|
|
|
first_master_table.begin(),
|
|
|
|
first_master_table.begin() + num_header_sectors);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::size_t num_master_sectors() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
return static_cast<std::size_t>(num_master_sectors_);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
sector_id master_table_start() const
|
|
|
|
{
|
|
|
|
return master_start_;
|
|
|
|
}
|
|
|
|
|
|
|
|
sector_id short_table_start() const
|
|
|
|
{
|
|
|
|
return short_start_;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
sector_id directory_start() const
|
|
|
|
{
|
|
|
|
return directory_start_;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::size_t threshold() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
return threshold_;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
2017-04-23 08:43:26 +08:00
|
|
|
std::uint64_t file_id_ = 0xe11ab1a1e011cfd0;
|
2017-04-23 02:25:27 +08:00
|
|
|
std::array<std::uint8_t, 16> ignore1 = {{0}};
|
|
|
|
std::uint16_t revision_ = 0x003E;
|
|
|
|
std::uint16_t version_ = 0x0003;
|
|
|
|
std::uint16_t byte_order_ = 0xFEFF;
|
|
|
|
std::uint16_t sector_size_power_ = 9;
|
|
|
|
std::uint16_t short_sector_size_power_ = 6;
|
|
|
|
std::array<std::uint8_t, 10> ignore2 = {{0}};
|
|
|
|
std::uint32_t num_sectors_ = 0;
|
|
|
|
sector_id directory_start_ = 0;
|
|
|
|
std::array<std::uint8_t, 4> ignore3 = {{0}};
|
|
|
|
std::uint32_t threshold_ = 4096;
|
|
|
|
sector_id short_start_ = 0;
|
|
|
|
std::uint32_t num_short_sectors_ = 0;
|
|
|
|
sector_id master_start_ = 0;
|
|
|
|
std::uint32_t num_master_sectors_ = 0;
|
2017-04-23 23:53:52 +08:00
|
|
|
std::array<sector_id, 109> first_master_table = {{allocation_table::FreeSector}};
|
2017-04-23 02:25:27 +08:00
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
struct directory_entry
|
|
|
|
{
|
|
|
|
std::array<char16_t, 32> name = {{0}};
|
|
|
|
std::uint16_t name_length = 0;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
enum class entry_type : std::uint8_t
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
Empty = 0,
|
|
|
|
UserStorage = 1,
|
|
|
|
UserStream = 2,
|
|
|
|
LockBytes = 3,
|
|
|
|
Property = 4,
|
|
|
|
RootStorage = 5
|
|
|
|
} type;
|
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
enum class entry_color : std::uint8_t
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
Red = 0,
|
|
|
|
Black = 1
|
|
|
|
} color;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_id prev = -1;
|
|
|
|
directory_id next = -1;
|
|
|
|
directory_id child = -1;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::array<std::uint8_t, 36> ignore;
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
sector_id first = 0;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::uint32_t size = 0;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::uint32_t ignore2;
|
2017-04-23 02:25:27 +08:00
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
class directory_tree
|
|
|
|
{
|
|
|
|
public:
|
2017-04-23 23:53:52 +08:00
|
|
|
//static const directory_id End = -1;
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
static void entry_name(directory_entry &entry, std::u16string name)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
if (name.size() > 31)
|
|
|
|
{
|
|
|
|
name.resize(31);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::copy(name.begin(), name.end(), entry.name.begin());
|
|
|
|
entry.name[name.size()] = 0;
|
|
|
|
entry.name_length = static_cast<std::uint16_t>((name.size() + 1) * 2);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
directory_tree()
|
2017-04-23 02:25:27 +08:00
|
|
|
: entries()
|
|
|
|
{
|
|
|
|
clear();
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
void clear()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
entries = { create_root_entry() };
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::size_t entry_count() const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
return entries.size();
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry &entry(directory_id index)
|
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
return entries[static_cast<std::size_t>(index)];
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
const directory_entry &entry(directory_id index) const
|
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
return entries[static_cast<std::size_t>(index)];
|
2017-04-23 08:43:26 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
const directory_entry &entry(const std::u16string &name) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
return entry(find_entry(name).first);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry &entry(const std::u16string &name, bool create)
|
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
auto find_result = find_entry(name);
|
|
|
|
auto index = find_result.first;
|
|
|
|
auto found = find_result.second;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (!found)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
// not found among children
|
|
|
|
if (!create)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
throw xlnt::exception("not found");
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
// create a new entry
|
|
|
|
auto parent = index;
|
|
|
|
entries.push_back(directory_entry());
|
|
|
|
index = static_cast<directory_id>(entry_count() - 1);
|
|
|
|
auto &e = entry(index);
|
|
|
|
e.first = 0;
|
|
|
|
entry(parent).prev = index;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
/*
|
2017-04-23 08:43:26 +08:00
|
|
|
directory_id parent(directory_id index)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
// brute-force, basically we iterate for each entries, find its children
|
|
|
|
// and check if one of the children is 'index'
|
|
|
|
for (auto j = directory_id(0); j < static_cast<directory_id>(entry_count()); j++)
|
|
|
|
{
|
|
|
|
auto chi = children(j);
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < chi.size(); i++)
|
|
|
|
{
|
|
|
|
if (chi[i] == index)
|
|
|
|
{
|
2017-04-23 08:43:26 +08:00
|
|
|
return j;
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
*/
|
|
|
|
/*
|
2017-04-23 08:43:26 +08:00
|
|
|
std::u16string path(directory_id index)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
// don't use root name ("Root Entry"), just give "/"
|
|
|
|
if (index == 0) return u"/";
|
|
|
|
|
|
|
|
auto current_entry = entry(index);
|
|
|
|
|
|
|
|
auto result = std::u16string(entry(index).name.data());
|
|
|
|
result.insert(0, u"/");
|
|
|
|
|
|
|
|
auto current_parent = parent(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
while (current_parent > 0)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
current_entry = entry(current_parent);
|
|
|
|
|
|
|
|
result.insert(0, std::u16string(current_entry.name.data()));
|
|
|
|
result.insert(0, u"/");
|
|
|
|
|
|
|
|
--current_parent;
|
|
|
|
index = current_parent;
|
|
|
|
|
|
|
|
if (current_parent <= 0) break;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-23 23:53:52 +08:00
|
|
|
*/
|
2017-04-23 08:43:26 +08:00
|
|
|
std::vector<directory_id> children(directory_id index) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
auto result = std::vector<directory_id>();
|
|
|
|
auto &e = entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if (e.child >= 0 && e.child < static_cast<directory_id>(entry_count()))
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
find_siblings(result, e.child);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void load(const std::vector<byte> &data)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto reader = binary_reader(data);
|
|
|
|
entries = reader.as_vector_of<directory_entry>();
|
|
|
|
|
|
|
|
auto is_empty = [](const directory_entry &entry)
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
return entry.type == directory_entry::entry_type::Empty;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
entries.erase(std::remove_if(entries.begin(), entries.end(), is_empty));
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry create_root_entry() const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_entry root;
|
|
|
|
|
|
|
|
entry_name(root, u"Root Entry");
|
2017-04-23 08:43:26 +08:00
|
|
|
root.type = directory_entry::entry_type::RootStorage;
|
|
|
|
root.color = directory_entry::entry_color::Black;
|
2017-04-23 02:25:27 +08:00
|
|
|
root.size = 0;
|
|
|
|
|
|
|
|
return root;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
|
|
|
// helper function: recursively find siblings of index
|
2017-04-23 08:43:26 +08:00
|
|
|
void find_siblings(std::vector<directory_id> &result, directory_id index) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
auto e = entry(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// prevent infinite loop
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == index) return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// add myself
|
|
|
|
result.push_back(index);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// visit previous sibling, don't go infinitely
|
|
|
|
auto prev = e.prev;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if ((prev > 0) && (prev < static_cast<directory_id>(entry_count())))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == prev)
|
|
|
|
{
|
|
|
|
prev = 0;
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
if (prev)
|
|
|
|
{
|
|
|
|
find_siblings(result, prev);
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
// visit next sibling, don't go infinitely
|
|
|
|
auto next = e.next;
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
if ((next > 0) && (next < static_cast<directory_id>(entry_count())))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
for (std::size_t i = 0; i < result.size(); i++)
|
|
|
|
{
|
|
|
|
if (result[i] == next) next = 0;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
if (next)
|
|
|
|
{
|
|
|
|
find_siblings(result, next);
|
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
|
2017-04-23 08:43:26 +08:00
|
|
|
std::pair<directory_id, bool> find_entry(const std::u16string &name) const
|
|
|
|
{
|
|
|
|
// quick check for "/" (that's root)
|
|
|
|
if (name == u"/Root Entry")
|
|
|
|
{
|
|
|
|
return { 0, true };
|
|
|
|
}
|
|
|
|
|
|
|
|
// split the names, e.g "/ObjectPool/_1020961869" will become:
|
|
|
|
// "ObjectPool" and "_1020961869"
|
|
|
|
auto names = std::vector<std::u16string>();
|
|
|
|
auto start = std::size_t(0);
|
|
|
|
auto end = std::size_t(0);
|
|
|
|
|
|
|
|
if (name[0] == u'/') start++;
|
|
|
|
|
|
|
|
while (start < name.length())
|
|
|
|
{
|
|
|
|
end = name.find_first_of('/', start);
|
|
|
|
if (end == std::string::npos) end = name.length();
|
|
|
|
names.push_back(name.substr(start, end - start));
|
|
|
|
start = end + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// start from root
|
|
|
|
auto index = directory_id(0);
|
|
|
|
|
|
|
|
for (auto it = names.begin(); it != names.end(); ++it)
|
|
|
|
{
|
|
|
|
// find among the children of index
|
|
|
|
auto chi = children(index);
|
|
|
|
std::ptrdiff_t child = 0;
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < chi.size(); i++)
|
|
|
|
{
|
|
|
|
auto ce = entry(chi[i]);
|
|
|
|
|
|
|
|
if (std::u16string(ce.name.data()) == *it)
|
|
|
|
{
|
|
|
|
child = static_cast<std::ptrdiff_t>(chi[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// traverse to the child
|
|
|
|
if (child > 0)
|
|
|
|
{
|
|
|
|
index = static_cast<directory_id>(child);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return { index, false };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return { index, true };
|
|
|
|
}
|
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
std::vector<directory_entry> entries;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
namespace xlnt {
|
|
|
|
namespace detail {
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
class compound_document_reader_impl
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
public:
|
2017-04-24 06:18:35 +08:00
|
|
|
compound_document_reader_impl(const std::vector<byte> &bytes)
|
|
|
|
: sectors_(bytes.data() + sizeof(header)),
|
|
|
|
sectors_size_(bytes.size())
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto reader = binary_reader(bytes);
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
header_.load(reader);
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto sector_size = header_.sector_size();
|
2017-04-24 06:56:31 +08:00
|
|
|
sector_table_.sector_size(sector_size);
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto master_table_chain = load_master_table();
|
|
|
|
const auto master_sectors = read(master_table_chain);
|
2017-04-24 06:56:31 +08:00
|
|
|
sector_table_.load(master_sectors);
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto short_sector_size = header_.short_sector_size();
|
2017-04-24 06:56:31 +08:00
|
|
|
short_sector_table_.sector_size(short_sector_size);
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto short_start = header_.short_table_start();
|
|
|
|
const auto short_table_chain = sector_table_.follow(short_start);
|
|
|
|
const auto short_sectors = read(short_table_chain);
|
2017-04-24 06:56:31 +08:00
|
|
|
short_sector_table_.load(short_sectors);
|
2017-04-24 04:56:01 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
const auto directory_start = header_.directory_start();
|
|
|
|
const auto directory_chain = sector_table_.follow(directory_start);
|
|
|
|
const auto directory_sectors = read(directory_chain);
|
|
|
|
directory_.load(directory_sectors);
|
2017-04-24 04:56:01 +08:00
|
|
|
|
|
|
|
auto first_short_sector = directory_.entry(u"/Root Entry", false).first;
|
|
|
|
short_container_stream_ = sector_table_.follow(first_short_sector);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
std::vector<byte> read(const std::vector<sector_id> §ors) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
const auto sector_size = sector_table_.sector_size();
|
2017-04-24 06:18:35 +08:00
|
|
|
auto result = std::vector<byte>();
|
|
|
|
auto writer = binary_writer(result);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
for (auto sector : sectors)
|
|
|
|
{
|
2017-04-24 06:56:31 +08:00
|
|
|
auto position = sector_size * static_cast<std::size_t>(sector);
|
2017-04-24 06:18:35 +08:00
|
|
|
writer.append(sectors_, sectors_size_, position, sector_size);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-24 06:18:35 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return result;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
std::vector<byte> read_short(const std::vector<sector_id> §ors) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
const auto short_sector_size = short_sector_table_.sector_size();
|
|
|
|
const auto sector_size = sector_table_.sector_size();
|
2017-04-24 06:18:35 +08:00
|
|
|
auto result = std::vector<byte>();
|
|
|
|
auto writer = binary_writer(result);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
for (auto sector : sectors)
|
|
|
|
{
|
2017-04-23 23:53:52 +08:00
|
|
|
auto position = short_sector_size * static_cast<std::size_t>(sector);
|
2017-04-23 02:25:27 +08:00
|
|
|
auto master_allocation_table_index = position / sector_size;
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
auto sector_data = read({ short_container_stream_[master_allocation_table_index] });
|
2017-04-23 02:25:27 +08:00
|
|
|
|
|
|
|
auto offset = position % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer.append(sector_data, offset, short_sector_size);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
std::vector<sector_id> load_master_table()
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-23 02:25:27 +08:00
|
|
|
auto master_sectors = header_.sectors();
|
2017-04-22 09:58:40 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
if (header_.num_master_sectors() > 109)
|
|
|
|
{
|
|
|
|
auto current_sector = header_.master_table_start();
|
2017-04-22 09:58:40 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
for (auto r = std::size_t(0); r < header_.num_master_sectors(); ++r)
|
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
auto current_sector_data = read({ current_sector });
|
|
|
|
auto current_sector_reader = binary_reader(current_sector_data);
|
|
|
|
auto current_sector_sectors = current_sector_reader.as_vector_of<sector_id>();
|
|
|
|
|
|
|
|
current_sector = current_sector_sectors.back();
|
|
|
|
current_sector_sectors.pop_back();
|
|
|
|
|
|
|
|
master_sectors.insert(
|
|
|
|
current_sector_sectors.begin(),
|
|
|
|
current_sector_sectors.end(),
|
|
|
|
master_sectors.end());
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
return master_sectors;
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
std::vector<byte> read_stream(const std::u16string &name) const
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
const auto entry = directory_.entry(name);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
auto result = entry.size < header_.threshold()
|
|
|
|
? read_short(short_sector_table_.follow(entry.first))
|
|
|
|
: read(sector_table_.follow(entry.first));
|
|
|
|
result.resize(entry.size);
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
private:
|
2017-04-24 06:18:35 +08:00
|
|
|
const byte *sectors_;
|
|
|
|
const std::size_t sectors_size_;
|
2017-04-24 04:56:01 +08:00
|
|
|
directory_tree directory_;
|
|
|
|
header header_;
|
|
|
|
allocation_table sector_table_;
|
|
|
|
allocation_table short_sector_table_;
|
|
|
|
std::vector<sector_id> short_container_stream_;
|
|
|
|
};
|
2017-04-23 08:43:26 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
class compound_document_writer_impl
|
|
|
|
{
|
|
|
|
public:
|
2017-04-24 06:18:35 +08:00
|
|
|
compound_document_writer_impl(std::vector<byte> &bytes)
|
|
|
|
: writer_(bytes)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
|
|
|
sector_table_.sector_size(header_.sector_size());
|
|
|
|
short_sector_table_.sector_size(header_.short_sector_size());
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_sectors(const std::vector<byte> &data, directory_entry &/*entry*/)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
|
|
|
const auto sector_size = sector_table_.sector_size();
|
|
|
|
const auto num_sectors = data.size() / sector_size;
|
|
|
|
|
|
|
|
for (auto i = std::size_t(0); i < num_sectors; ++i)
|
|
|
|
{
|
|
|
|
auto position = sector_size * i;
|
|
|
|
auto current_sector_size = data.size() % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer_.append(data, position, current_sector_size);
|
2017-04-24 04:56:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_short_sectors(const std::vector<byte> &data, directory_entry &/*entry*/)
|
2017-04-24 04:56:01 +08:00
|
|
|
{
|
|
|
|
const auto sector_size = sector_table_.sector_size();
|
|
|
|
const auto num_sectors = data.size() / sector_size;
|
|
|
|
|
|
|
|
for (auto i = std::size_t(0); i < num_sectors; ++i)
|
|
|
|
{
|
|
|
|
auto position = sector_size * i;
|
|
|
|
auto current_sector_size = data.size() % sector_size;
|
2017-04-24 06:18:35 +08:00
|
|
|
writer_.append(data, position, current_sector_size);
|
2017-04-24 04:56:01 +08:00
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 06:18:35 +08:00
|
|
|
void write_stream(const std::u16string &name, const std::vector<byte> &data)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
auto &entry = directory_.entry(name, true);
|
2017-04-23 08:43:26 +08:00
|
|
|
|
|
|
|
if (entry.size < header_.threshold())
|
|
|
|
{
|
|
|
|
write_short_sectors(data, entry);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
write_sectors(data, entry);
|
|
|
|
}
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-23 02:25:27 +08:00
|
|
|
private:
|
2017-04-24 06:18:35 +08:00
|
|
|
binary_writer writer_;
|
2017-04-23 02:25:27 +08:00
|
|
|
directory_tree directory_;
|
|
|
|
header header_;
|
|
|
|
allocation_table sector_table_;
|
|
|
|
allocation_table short_sector_table_;
|
|
|
|
std::vector<sector_id> short_container_stream_;
|
|
|
|
};
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_reader::compound_document_reader(const std::vector<std::uint8_t> &data)
|
|
|
|
: d_(new compound_document_reader_impl(data))
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_reader::~compound_document_reader()
|
2017-04-23 02:25:27 +08:00
|
|
|
{
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
std::vector<std::uint8_t> compound_document_reader::read_stream(const std::u16string &name) const
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 06:18:35 +08:00
|
|
|
return d_->read_stream(name);
|
2017-04-22 07:52:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_writer::compound_document_writer(std::vector<std::uint8_t> &data)
|
|
|
|
: d_(new compound_document_writer_impl(data))
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
compound_document_writer::~compound_document_writer()
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-04-24 04:56:01 +08:00
|
|
|
void compound_document_writer::write_stream(const std::u16string &name, const std::vector<std::uint8_t> &data)
|
2017-04-22 07:52:02 +08:00
|
|
|
{
|
2017-04-24 04:56:01 +08:00
|
|
|
d_->write_stream(name, data);
|
2017-04-23 02:25:27 +08:00
|
|
|
}
|
2017-04-22 07:52:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
} // namespace xlnt
|