switch to full in-memory approach, still working

This commit is contained in:
Thomas Fussell 2017-04-26 23:19:28 -04:00
parent bb91308dd4
commit a90b32f386
6 changed files with 725 additions and 853 deletions

View File

@ -34,13 +34,23 @@ namespace detail {
using byte = std::uint8_t;
template<typename T>
class binary_reader
{
public:
binary_reader() = delete;
binary_reader(const std::vector<byte> &bytes)
: bytes_(&bytes)
binary_reader(const std::vector<T> &vector)
: vector_(&vector),
data_(nullptr),
size_(0)
{
}
binary_reader(const T *source_data, std::size_t size)
: vector_(nullptr),
data_(source_data),
size_(size)
{
}
@ -48,8 +58,9 @@ public:
binary_reader &operator=(const binary_reader &other)
{
vector_ = other.vector_;
offset_ = other.offset_;
bytes_ = other.bytes_;
data_ = other.data_;
return *this;
}
@ -58,9 +69,9 @@ public:
{
}
const std::vector<std::uint8_t> &data() const
const T *data() const
{
return *bytes_;
return vector_ == nullptr ? data_ : vector_->data();
}
void offset(std::size_t offset)
@ -78,55 +89,69 @@ public:
offset_ = 0;
}
template<typename T>
T read()
template<typename U>
U read()
{
return read_reference<T>();
return read_reference<U>();
}
template<typename T>
const T &read_reference()
template<typename U>
const U *read_pointer()
{
const auto &result = *reinterpret_cast<const T *>(bytes_->data() + offset_);
offset_ += sizeof(T);
const auto result = reinterpret_cast<const U *>(data() + offset_);
offset_ += sizeof(U) / sizeof(T);
return result;
}
template<typename T>
std::vector<T> to_vector() const
template<typename U>
const U &read_reference()
{
auto result = std::vector<T>(size() / sizeof(T), T());
std::memcpy(result.data(), bytes_->data(), size());
return *read_pointer<U>();
}
template<typename U>
std::vector<U> as_vector() const
{
auto result = std::vector<T>(bytes() / sizeof(U), U());
std::memcpy(result.data(), data(), bytes());
return result;
}
template<typename T>
std::vector<T> read_vector(std::size_t count)
template<typename U>
std::vector<U> read_vector(std::size_t count)
{
auto result = std::vector<T>(count, T());
std::memcpy(result.data(), bytes_->data() + offset_, count * sizeof(T));
offset_ += count * sizeof(T);
auto result = std::vector<U>(count, U());
std::memcpy(result.data(), data() + offset_, count * sizeof(U));
offset_ += count * sizeof(T) / sizeof(U);
return result;
}
std::size_t size() const
std::size_t count() const
{
return bytes_->size();
return vector_ != nullptr ? vector_->size() : size_;
}
std::size_t bytes() const
{
return count() * sizeof(T);
}
private:
std::size_t offset_ = 0;
const std::vector<std::uint8_t> *bytes_;
const std::vector<T> *vector_;
const T *data_;
const std::size_t size_;
};
template<typename T>
class binary_writer
{
public:
binary_writer(std::vector<byte> &bytes)
: bytes_(&bytes)
binary_writer(std::vector<T> &bytes)
: data_(&bytes)
{
}
@ -141,29 +166,33 @@ public:
binary_writer &operator=(const binary_writer &other)
{
bytes_ = other.bytes_;
data_ = other.data_;
offset_ = other.offset_;
return *this;
}
std::vector<byte> &data()
std::vector<T> &data()
{
return *bytes_;
return *data_;
}
template<typename T>
void assign(const std::vector<T> &ints)
// Make the bytes of the data pointed to by this writer equivalent to those in the given vector
// sizeof(U) should be a multiple of sizeof(T)
template<typename U>
void assign(const std::vector<U> &ints)
{
resize(ints.size() * sizeof(T));
std::memcpy(bytes_->data(), ints.data(), bytes_->size());
resize(ints.size() * sizeof(U));
std::memcpy(data_->data(), ints.data(), bytes());
}
template<typename T>
void assign(const std::basic_string<T> &string)
// Make the bytes of the data pointed to by this writer equivalent to those in the given string
// sizeof(U) should be a multiple of sizeof(T)
template<typename U>
void assign(const std::basic_string<U> &string)
{
resize(string.size() * sizeof(T));
std::memcpy(bytes_->data(), string.data(), bytes_->size());
resize(string.size() * sizeof(U));
std::memcpy(data_->data(), string.data(), bytes());
}
void offset(std::size_t new_offset)
@ -179,79 +208,78 @@ public:
void reset()
{
offset_ = 0;
bytes_->clear();
data_->clear();
}
template<typename T>
void write(T value)
template<typename U>
void write(U value)
{
const auto num_bytes = sizeof(T);
const auto num_bytes = sizeof(U);
const auto remaining_bytes = bytes() - offset() * sizeof(T);
if (offset() + num_bytes > size())
if (remaining_bytes < num_bytes)
{
extend(offset() + num_bytes - size());
extend((num_bytes - remaining_bytes) / sizeof(T));
}
std::memcpy(bytes_->data() + offset(), &value, num_bytes);
offset_ += num_bytes;
std::memcpy(data_->data() + offset(), &value, num_bytes);
offset_ += num_bytes / sizeof(T);
}
std::size_t size() const
std::size_t count() const
{
return bytes_->size();
return data_->size();
}
std::size_t bytes() const
{
return count() * sizeof(T);
}
void resize(std::size_t new_size, byte fill = 0)
{
bytes_->resize(new_size, fill);
data_->resize(new_size, fill);
}
void extend(std::size_t amount, byte fill = 0)
{
bytes_->resize(size() + amount, fill);
data_->resize(count() + amount, fill);
}
std::vector<byte>::iterator iterator()
{
return bytes_->begin() + static_cast<std::ptrdiff_t>(offset());
return data_->begin() + static_cast<std::ptrdiff_t>(offset());
}
/*
void append(const byte *data, const std::size_t data_size, std::size_t offset, std::size_t count)
template<typename U>
void append(const std::vector<U> &data)
{
if (offset + count > data_size)
binary_reader<U> reader(data);
append(reader, data.size() * sizeof(U));
}
template<typename U>
void append(binary_reader<U> &reader, std::size_t reader_element_count)
{
const auto num_bytes = sizeof(U) * reader_element_count;
const auto remaining_bytes = bytes() - offset() * sizeof(T);
if (remaining_bytes < num_bytes)
{
throw xlnt::exception("out of bounds read");
extend((num_bytes - remaining_bytes) / sizeof(T));
}
const auto end_index = size();
extend(count);
std::memcpy(bytes_->data() + end_index, data + offset, count);
}
*/
template<typename T>
void append(const std::vector<T> &data)
{
append(data, 0, data.size());
}
template<typename T>
void append(const std::vector<T> &data, std::size_t offset, std::size_t count)
{
const auto byte_count = count * sizeof(T);
if (offset_ + byte_count > size())
if ((reader.offset() + reader_element_count) * sizeof(U) > reader.bytes())
{
extend(offset_ + byte_count - size());
throw xlnt::exception("reading past end");
}
std::memcpy(bytes_->data() + offset_, data.data() + offset, byte_count);
offset_ += byte_count;
std::memcpy(data_->data() + offset_, reader.data() + reader.offset(), reader_element_count * sizeof(U));
offset_ += reader_element_count * sizeof(U) / sizeof(T);
}
private:
std::vector<byte> *bytes_;
std::vector<T> *data_;
std::size_t offset_ = 0;
};
@ -259,7 +287,7 @@ template<typename T>
std::vector<byte> string_to_bytes(const std::basic_string<T> &string)
{
std::vector<byte> bytes;
binary_writer writer(bytes);
binary_writer<byte> writer(bytes);
writer.assign(string);
return bytes;

File diff suppressed because it is too large Load Diff

View File

@ -111,8 +111,6 @@ struct compound_document_entry
std::uint32_t ignore2;
};
class red_black_tree;
class compound_document
{
public:
@ -120,55 +118,70 @@ public:
compound_document(const std::vector<std::uint8_t> &data);
~compound_document();
std::vector<std::uint8_t> read_stream(const std::u16string &filename);
void write_stream(const std::u16string &filename, const std::vector<std::uint8_t> &data);
std::vector<std::uint8_t> read_stream(const std::string &filename);
void write_stream(const std::string &filename, const std::vector<std::uint8_t> &data);
private:
template<typename T>
void read_sector(sector_id id, binary_writer<T> &writer);
template<typename T>
void read_short_sector(sector_id id, binary_writer<T> &writer);
template<typename T>
void write_sector(binary_reader<T> &reader, sector_id id);
template<typename T>
void write_short_sector(binary_reader<T> &reader, sector_id id);
std::size_t sector_size();
std::size_t short_sector_size();
std::size_t sector_data_start();
bool contains_entry(const std::u16string &path);
compound_document_entry &find_entry(const std::u16string &path);
sector_chain follow_sat_chain(sector_id start);
sector_chain follow_ssat_chain(sector_id start);
std::vector<byte> read(sector_id start);
std::vector<byte> read_short(sector_id start);
sector_chain follow_chain(sector_id start);
void read_msat();
void read_sat();
void read_ssat();
void read_header();
void read_directory_tree();
void write(const std::vector<byte> &data, sector_id start);
void write_short(const std::vector<byte> &data, sector_id start);
void write_header();
void write_directory_tree();
sector_id msat(sector_id id);
sector_id sat(sector_id id);
void print_directory();
sector_id allocate_sectors(std::size_t sectors);
void reallocate_sectors(sector_id start, std::size_t sectors);
sector_id allocate_short_sectors(std::size_t sectors);
sector_id allocate_msat_sector();
sector_id allocate_sat_sector();
sector_id allocate_ssat_sector();
compound_document_entry &insert_entry(const std::u16string &path,
sector_id allocate_sector();
sector_chain allocate_sectors(std::size_t sectors);
sector_id allocate_short_sector();
sector_chain allocate_short_sectors(std::size_t sectors);
compound_document_header &header();
bool contains_entry(const std::u16string &path);
directory_id find_entry(const std::u16string &path);
directory_id next_empty_entry();
directory_id insert_entry(const std::u16string &path,
compound_document_entry::entry_type type);
std::unique_ptr<binary_reader> reader_;
std::unique_ptr<binary_writer> writer_;
// Red black tree helper functions
void tree_initialize_parent_maps();
void tree_insert(directory_id new_id, directory_id storage_id);
void tree_insert_fixup(directory_id x);
std::u16string tree_path(directory_id id);
void tree_rotate_left(directory_id x);
void tree_rotate_right(directory_id y);
directory_id &tree_left(directory_id id);
directory_id &tree_right(directory_id id);
directory_id &tree_parent(directory_id id);
directory_id &tree_root(directory_id id);
directory_id &tree_child(directory_id id);
std::u16string tree_key(directory_id id);
compound_document_entry::entry_color &tree_color(directory_id id);
compound_document_header header_;
std::unique_ptr<binary_reader<byte>> reader_;
std::unique_ptr<binary_writer<byte>> writer_;
sector_chain msat_;
sector_chain sat_;
sector_chain ssat_;
std::vector<compound_document_entry> entries_;
std::unique_ptr<red_black_tree> rb_tree_;
std::unordered_map<directory_id, directory_id> parent_storage_;
std::unordered_map<directory_id, directory_id> parent_;
std::unordered_map<directory_id, compound_document_entry *> entry_cache_;
};
} // namespace detail

View File

@ -51,7 +51,7 @@ std::vector<std::uint8_t> decrypt_xlsx_standard(
{
const auto key = info.calculate_key();
auto reader = binary_reader(encrypted_package);
auto reader = binary_reader<byte>(encrypted_package);
auto decrypted_size = reader.read<std::uint64_t>();
auto decrypted = xlnt::detail::aes_ecb_decrypt(encrypted_package, key, reader.offset());
decrypted.resize(static_cast<std::size_t>(decrypted_size));
@ -107,7 +107,7 @@ encryption_info::standard_encryption_info read_standard_encryption_info(const st
{
encryption_info::standard_encryption_info result;
auto reader = binary_reader(info_bytes);
auto reader = binary_reader<byte>(info_bytes);
// skip version info
reader.read<std::uint32_t>();
@ -275,7 +275,7 @@ encryption_info read_encryption_info(const std::vector<std::uint8_t> &info_bytes
info.password = password;
auto reader = binary_reader(info_bytes);
auto reader = binary_reader<byte>(info_bytes);
auto version_major = reader.read<std::uint16_t>();
auto version_minor = reader.read<std::uint16_t>();
@ -333,8 +333,8 @@ std::vector<std::uint8_t> decrypt_xlsx(
xlnt::detail::compound_document document(bytes);
auto encryption_info = read_encryption_info(
document.read_stream(u"EncryptionInfo"), password);
auto encrypted_package = document.read_stream(u"EncryptedPackage");
document.read_stream("EncryptionInfo"), password);
auto encrypted_package = document.read_stream("EncryptedPackage");
return encryption_info.is_agile
? decrypt_xlsx_agile(encryption_info, encrypted_package)

View File

@ -172,7 +172,7 @@ std::vector<std::uint8_t> write_agile_encryption_info(
std::vector<std::uint8_t> write_standard_encryption_info(const encryption_info &info)
{
auto result = std::vector<std::uint8_t>();
auto writer = xlnt::detail::binary_writer(result);
auto writer = xlnt::detail::binary_writer<std::uint8_t>(result);
const auto version_major = std::uint16_t(4);
const auto version_minor = std::uint16_t(2);
@ -252,10 +252,10 @@ std::vector<std::uint8_t> encrypt_xlsx(
auto ciphertext = std::vector<std::uint8_t>();
xlnt::detail::compound_document document(ciphertext);
document.write_stream(u"EncryptionInfo", encryption_info.is_agile
document.write_stream("EncryptionInfo", encryption_info.is_agile
? write_agile_encryption_info(encryption_info)
: write_standard_encryption_info(encryption_info));
document.write_stream(u"EncryptedPackage", encryption_info.is_agile
document.write_stream("EncryptedPackage", encryption_info.is_agile
? encrypt_xlsx_agile(encryption_info, plaintext)
: encrypt_xlsx_standard(encryption_info, plaintext));

View File

@ -73,11 +73,14 @@ void print_summary()
int main()
{
const auto bytes2 = xlnt::detail::to_vector(std::ifstream("C:/Users/Thomas/Development/xlnt/tests/data/6_encrypted_libre.xlsx", std::ios::binary));
xlnt::detail::compound_document doc2(bytes2);
std::vector<std::uint8_t> bytes;
xlnt::detail::compound_document doc(bytes);
doc.write_stream(u"aaa", std::vector<std::uint8_t>(4095, 'a'));
doc.write_stream(u"bbb", std::vector<std::uint8_t>(4095, 'b'));
doc.write_stream(u"ccc", std::vector<std::uint8_t>(4095, 'c'));
doc.write_stream("aaa", std::vector<std::uint8_t>(4095, 'a'));
doc.write_stream("bbb", std::vector<std::uint8_t>(4095, 'b'));
doc.write_stream("ccc", std::vector<std::uint8_t>(4095, 'c'));
xlnt::detail::to_stream(bytes, std::ofstream("cd.xlsx", std::ios::binary));
// cell