switch to full in-memory approach, still working

This commit is contained in:
Thomas Fussell 2017-04-26 23:19:28 -04:00
parent bb91308dd4
commit a90b32f386
6 changed files with 725 additions and 853 deletions

View File

@ -34,13 +34,23 @@ namespace detail {
using byte = std::uint8_t; using byte = std::uint8_t;
template<typename T>
class binary_reader class binary_reader
{ {
public: public:
binary_reader() = delete; binary_reader() = delete;
binary_reader(const std::vector<byte> &bytes) binary_reader(const std::vector<T> &vector)
: bytes_(&bytes) : vector_(&vector),
data_(nullptr),
size_(0)
{
}
binary_reader(const T *source_data, std::size_t size)
: vector_(nullptr),
data_(source_data),
size_(size)
{ {
} }
@ -48,8 +58,9 @@ public:
binary_reader &operator=(const binary_reader &other) binary_reader &operator=(const binary_reader &other)
{ {
vector_ = other.vector_;
offset_ = other.offset_; offset_ = other.offset_;
bytes_ = other.bytes_; data_ = other.data_;
return *this; return *this;
} }
@ -58,9 +69,9 @@ public:
{ {
} }
const std::vector<std::uint8_t> &data() const const T *data() const
{ {
return *bytes_; return vector_ == nullptr ? data_ : vector_->data();
} }
void offset(std::size_t offset) void offset(std::size_t offset)
@ -78,55 +89,69 @@ public:
offset_ = 0; offset_ = 0;
} }
template<typename T> template<typename U>
T read() U read()
{ {
return read_reference<T>(); return read_reference<U>();
} }
template<typename T> template<typename U>
const T &read_reference() const U *read_pointer()
{ {
const auto &result = *reinterpret_cast<const T *>(bytes_->data() + offset_); const auto result = reinterpret_cast<const U *>(data() + offset_);
offset_ += sizeof(T); offset_ += sizeof(U) / sizeof(T);
return result; return result;
} }
template<typename T> template<typename U>
std::vector<T> to_vector() const const U &read_reference()
{ {
auto result = std::vector<T>(size() / sizeof(T), T()); return *read_pointer<U>();
std::memcpy(result.data(), bytes_->data(), size()); }
template<typename U>
std::vector<U> as_vector() const
{
auto result = std::vector<T>(bytes() / sizeof(U), U());
std::memcpy(result.data(), data(), bytes());
return result; return result;
} }
template<typename T> template<typename U>
std::vector<T> read_vector(std::size_t count) std::vector<U> read_vector(std::size_t count)
{ {
auto result = std::vector<T>(count, T()); auto result = std::vector<U>(count, U());
std::memcpy(result.data(), bytes_->data() + offset_, count * sizeof(T)); std::memcpy(result.data(), data() + offset_, count * sizeof(U));
offset_ += count * sizeof(T); offset_ += count * sizeof(T) / sizeof(U);
return result; return result;
} }
std::size_t size() const std::size_t count() const
{ {
return bytes_->size(); return vector_ != nullptr ? vector_->size() : size_;
}
std::size_t bytes() const
{
return count() * sizeof(T);
} }
private: private:
std::size_t offset_ = 0; std::size_t offset_ = 0;
const std::vector<std::uint8_t> *bytes_; const std::vector<T> *vector_;
const T *data_;
const std::size_t size_;
}; };
template<typename T>
class binary_writer class binary_writer
{ {
public: public:
binary_writer(std::vector<byte> &bytes) binary_writer(std::vector<T> &bytes)
: bytes_(&bytes) : data_(&bytes)
{ {
} }
@ -141,29 +166,33 @@ public:
binary_writer &operator=(const binary_writer &other) binary_writer &operator=(const binary_writer &other)
{ {
bytes_ = other.bytes_; data_ = other.data_;
offset_ = other.offset_; offset_ = other.offset_;
return *this; return *this;
} }
std::vector<byte> &data() std::vector<T> &data()
{ {
return *bytes_; return *data_;
} }
template<typename T> // Make the bytes of the data pointed to by this writer equivalent to those in the given vector
void assign(const std::vector<T> &ints) // sizeof(U) should be a multiple of sizeof(T)
template<typename U>
void assign(const std::vector<U> &ints)
{ {
resize(ints.size() * sizeof(T)); resize(ints.size() * sizeof(U));
std::memcpy(bytes_->data(), ints.data(), bytes_->size()); std::memcpy(data_->data(), ints.data(), bytes());
} }
template<typename T> // Make the bytes of the data pointed to by this writer equivalent to those in the given string
void assign(const std::basic_string<T> &string) // sizeof(U) should be a multiple of sizeof(T)
template<typename U>
void assign(const std::basic_string<U> &string)
{ {
resize(string.size() * sizeof(T)); resize(string.size() * sizeof(U));
std::memcpy(bytes_->data(), string.data(), bytes_->size()); std::memcpy(data_->data(), string.data(), bytes());
} }
void offset(std::size_t new_offset) void offset(std::size_t new_offset)
@ -179,79 +208,78 @@ public:
void reset() void reset()
{ {
offset_ = 0; offset_ = 0;
bytes_->clear(); data_->clear();
} }
template<typename T> template<typename U>
void write(T value) void write(U value)
{ {
const auto num_bytes = sizeof(T); const auto num_bytes = sizeof(U);
const auto remaining_bytes = bytes() - offset() * sizeof(T);
if (offset() + num_bytes > size()) if (remaining_bytes < num_bytes)
{ {
extend(offset() + num_bytes - size()); extend((num_bytes - remaining_bytes) / sizeof(T));
} }
std::memcpy(bytes_->data() + offset(), &value, num_bytes); std::memcpy(data_->data() + offset(), &value, num_bytes);
offset_ += num_bytes; offset_ += num_bytes / sizeof(T);
} }
std::size_t size() const std::size_t count() const
{ {
return bytes_->size(); return data_->size();
}
std::size_t bytes() const
{
return count() * sizeof(T);
} }
void resize(std::size_t new_size, byte fill = 0) void resize(std::size_t new_size, byte fill = 0)
{ {
bytes_->resize(new_size, fill); data_->resize(new_size, fill);
} }
void extend(std::size_t amount, byte fill = 0) void extend(std::size_t amount, byte fill = 0)
{ {
bytes_->resize(size() + amount, fill); data_->resize(count() + amount, fill);
} }
std::vector<byte>::iterator iterator() std::vector<byte>::iterator iterator()
{ {
return bytes_->begin() + static_cast<std::ptrdiff_t>(offset()); return data_->begin() + static_cast<std::ptrdiff_t>(offset());
} }
/* template<typename U>
void append(const byte *data, const std::size_t data_size, std::size_t offset, std::size_t count) void append(const std::vector<U> &data)
{ {
if (offset + count > data_size) binary_reader<U> reader(data);
append(reader, data.size() * sizeof(U));
}
template<typename U>
void append(binary_reader<U> &reader, std::size_t reader_element_count)
{
const auto num_bytes = sizeof(U) * reader_element_count;
const auto remaining_bytes = bytes() - offset() * sizeof(T);
if (remaining_bytes < num_bytes)
{ {
throw xlnt::exception("out of bounds read"); extend((num_bytes - remaining_bytes) / sizeof(T));
} }
const auto end_index = size(); if ((reader.offset() + reader_element_count) * sizeof(U) > reader.bytes())
extend(count);
std::memcpy(bytes_->data() + end_index, data + offset, count);
}
*/
template<typename T>
void append(const std::vector<T> &data)
{
append(data, 0, data.size());
}
template<typename T>
void append(const std::vector<T> &data, std::size_t offset, std::size_t count)
{
const auto byte_count = count * sizeof(T);
if (offset_ + byte_count > size())
{ {
extend(offset_ + byte_count - size()); throw xlnt::exception("reading past end");
} }
std::memcpy(bytes_->data() + offset_, data.data() + offset, byte_count); std::memcpy(data_->data() + offset_, reader.data() + reader.offset(), reader_element_count * sizeof(U));
offset_ += byte_count; offset_ += reader_element_count * sizeof(U) / sizeof(T);
} }
private: private:
std::vector<byte> *bytes_; std::vector<T> *data_;
std::size_t offset_ = 0; std::size_t offset_ = 0;
}; };
@ -259,7 +287,7 @@ template<typename T>
std::vector<byte> string_to_bytes(const std::basic_string<T> &string) std::vector<byte> string_to_bytes(const std::basic_string<T> &string)
{ {
std::vector<byte> bytes; std::vector<byte> bytes;
binary_writer writer(bytes); binary_writer<byte> writer(bytes);
writer.assign(string); writer.assign(string);
return bytes; return bytes;

File diff suppressed because it is too large Load Diff

View File

@ -111,8 +111,6 @@ struct compound_document_entry
std::uint32_t ignore2; std::uint32_t ignore2;
}; };
class red_black_tree;
class compound_document class compound_document
{ {
public: public:
@ -120,55 +118,70 @@ public:
compound_document(const std::vector<std::uint8_t> &data); compound_document(const std::vector<std::uint8_t> &data);
~compound_document(); ~compound_document();
std::vector<std::uint8_t> read_stream(const std::u16string &filename); std::vector<std::uint8_t> read_stream(const std::string &filename);
void write_stream(const std::u16string &filename, const std::vector<std::uint8_t> &data); void write_stream(const std::string &filename, const std::vector<std::uint8_t> &data);
private: private:
template<typename T>
void read_sector(sector_id id, binary_writer<T> &writer);
template<typename T>
void read_short_sector(sector_id id, binary_writer<T> &writer);
template<typename T>
void write_sector(binary_reader<T> &reader, sector_id id);
template<typename T>
void write_short_sector(binary_reader<T> &reader, sector_id id);
std::size_t sector_size(); std::size_t sector_size();
std::size_t short_sector_size(); std::size_t short_sector_size();
std::size_t sector_data_start(); std::size_t sector_data_start();
bool contains_entry(const std::u16string &path); sector_chain follow_sat_chain(sector_id start);
compound_document_entry &find_entry(const std::u16string &path); sector_chain follow_ssat_chain(sector_id start);
std::vector<byte> read(sector_id start); sector_id msat(sector_id id);
std::vector<byte> read_short(sector_id start); sector_id sat(sector_id id);
sector_chain follow_chain(sector_id start);
void read_msat();
void read_sat();
void read_ssat();
void read_header();
void read_directory_tree();
void write(const std::vector<byte> &data, sector_id start);
void write_short(const std::vector<byte> &data, sector_id start);
void write_header();
void write_directory_tree();
void print_directory(); void print_directory();
sector_id allocate_sectors(std::size_t sectors); sector_id allocate_msat_sector();
void reallocate_sectors(sector_id start, std::size_t sectors); sector_id allocate_sat_sector();
sector_id allocate_short_sectors(std::size_t sectors); sector_id allocate_ssat_sector();
compound_document_entry &insert_entry(const std::u16string &path, sector_id allocate_sector();
sector_chain allocate_sectors(std::size_t sectors);
sector_id allocate_short_sector();
sector_chain allocate_short_sectors(std::size_t sectors);
compound_document_header &header();
bool contains_entry(const std::u16string &path);
directory_id find_entry(const std::u16string &path);
directory_id next_empty_entry();
directory_id insert_entry(const std::u16string &path,
compound_document_entry::entry_type type); compound_document_entry::entry_type type);
std::unique_ptr<binary_reader> reader_; // Red black tree helper functions
std::unique_ptr<binary_writer> writer_; void tree_initialize_parent_maps();
void tree_insert(directory_id new_id, directory_id storage_id);
void tree_insert_fixup(directory_id x);
std::u16string tree_path(directory_id id);
void tree_rotate_left(directory_id x);
void tree_rotate_right(directory_id y);
directory_id &tree_left(directory_id id);
directory_id &tree_right(directory_id id);
directory_id &tree_parent(directory_id id);
directory_id &tree_root(directory_id id);
directory_id &tree_child(directory_id id);
std::u16string tree_key(directory_id id);
compound_document_entry::entry_color &tree_color(directory_id id);
compound_document_header header_; std::unique_ptr<binary_reader<byte>> reader_;
std::unique_ptr<binary_writer<byte>> writer_;
sector_chain msat_; std::unordered_map<directory_id, directory_id> parent_storage_;
sector_chain sat_; std::unordered_map<directory_id, directory_id> parent_;
sector_chain ssat_; std::unordered_map<directory_id, compound_document_entry *> entry_cache_;
std::vector<compound_document_entry> entries_;
std::unique_ptr<red_black_tree> rb_tree_;
}; };
} // namespace detail } // namespace detail

View File

@ -51,7 +51,7 @@ std::vector<std::uint8_t> decrypt_xlsx_standard(
{ {
const auto key = info.calculate_key(); const auto key = info.calculate_key();
auto reader = binary_reader(encrypted_package); auto reader = binary_reader<byte>(encrypted_package);
auto decrypted_size = reader.read<std::uint64_t>(); auto decrypted_size = reader.read<std::uint64_t>();
auto decrypted = xlnt::detail::aes_ecb_decrypt(encrypted_package, key, reader.offset()); auto decrypted = xlnt::detail::aes_ecb_decrypt(encrypted_package, key, reader.offset());
decrypted.resize(static_cast<std::size_t>(decrypted_size)); decrypted.resize(static_cast<std::size_t>(decrypted_size));
@ -107,7 +107,7 @@ encryption_info::standard_encryption_info read_standard_encryption_info(const st
{ {
encryption_info::standard_encryption_info result; encryption_info::standard_encryption_info result;
auto reader = binary_reader(info_bytes); auto reader = binary_reader<byte>(info_bytes);
// skip version info // skip version info
reader.read<std::uint32_t>(); reader.read<std::uint32_t>();
@ -275,7 +275,7 @@ encryption_info read_encryption_info(const std::vector<std::uint8_t> &info_bytes
info.password = password; info.password = password;
auto reader = binary_reader(info_bytes); auto reader = binary_reader<byte>(info_bytes);
auto version_major = reader.read<std::uint16_t>(); auto version_major = reader.read<std::uint16_t>();
auto version_minor = reader.read<std::uint16_t>(); auto version_minor = reader.read<std::uint16_t>();
@ -333,8 +333,8 @@ std::vector<std::uint8_t> decrypt_xlsx(
xlnt::detail::compound_document document(bytes); xlnt::detail::compound_document document(bytes);
auto encryption_info = read_encryption_info( auto encryption_info = read_encryption_info(
document.read_stream(u"EncryptionInfo"), password); document.read_stream("EncryptionInfo"), password);
auto encrypted_package = document.read_stream(u"EncryptedPackage"); auto encrypted_package = document.read_stream("EncryptedPackage");
return encryption_info.is_agile return encryption_info.is_agile
? decrypt_xlsx_agile(encryption_info, encrypted_package) ? decrypt_xlsx_agile(encryption_info, encrypted_package)

View File

@ -172,7 +172,7 @@ std::vector<std::uint8_t> write_agile_encryption_info(
std::vector<std::uint8_t> write_standard_encryption_info(const encryption_info &info) std::vector<std::uint8_t> write_standard_encryption_info(const encryption_info &info)
{ {
auto result = std::vector<std::uint8_t>(); auto result = std::vector<std::uint8_t>();
auto writer = xlnt::detail::binary_writer(result); auto writer = xlnt::detail::binary_writer<std::uint8_t>(result);
const auto version_major = std::uint16_t(4); const auto version_major = std::uint16_t(4);
const auto version_minor = std::uint16_t(2); const auto version_minor = std::uint16_t(2);
@ -252,10 +252,10 @@ std::vector<std::uint8_t> encrypt_xlsx(
auto ciphertext = std::vector<std::uint8_t>(); auto ciphertext = std::vector<std::uint8_t>();
xlnt::detail::compound_document document(ciphertext); xlnt::detail::compound_document document(ciphertext);
document.write_stream(u"EncryptionInfo", encryption_info.is_agile document.write_stream("EncryptionInfo", encryption_info.is_agile
? write_agile_encryption_info(encryption_info) ? write_agile_encryption_info(encryption_info)
: write_standard_encryption_info(encryption_info)); : write_standard_encryption_info(encryption_info));
document.write_stream(u"EncryptedPackage", encryption_info.is_agile document.write_stream("EncryptedPackage", encryption_info.is_agile
? encrypt_xlsx_agile(encryption_info, plaintext) ? encrypt_xlsx_agile(encryption_info, plaintext)
: encrypt_xlsx_standard(encryption_info, plaintext)); : encrypt_xlsx_standard(encryption_info, plaintext));

View File

@ -73,11 +73,14 @@ void print_summary()
int main() int main()
{ {
const auto bytes2 = xlnt::detail::to_vector(std::ifstream("C:/Users/Thomas/Development/xlnt/tests/data/6_encrypted_libre.xlsx", std::ios::binary));
xlnt::detail::compound_document doc2(bytes2);
std::vector<std::uint8_t> bytes; std::vector<std::uint8_t> bytes;
xlnt::detail::compound_document doc(bytes); xlnt::detail::compound_document doc(bytes);
doc.write_stream(u"aaa", std::vector<std::uint8_t>(4095, 'a')); doc.write_stream("aaa", std::vector<std::uint8_t>(4095, 'a'));
doc.write_stream(u"bbb", std::vector<std::uint8_t>(4095, 'b')); doc.write_stream("bbb", std::vector<std::uint8_t>(4095, 'b'));
doc.write_stream(u"ccc", std::vector<std::uint8_t>(4095, 'c')); doc.write_stream("ccc", std::vector<std::uint8_t>(4095, 'c'));
xlnt::detail::to_stream(bytes, std::ofstream("cd.xlsx", std::ios::binary)); xlnt::detail::to_stream(bytes, std::ofstream("cd.xlsx", std::ios::binary));
// cell // cell