xlnt/source/detail/serialization/zstream.cpp
2019-12-26 12:51:02 -05:00

632 lines
19 KiB
C++

/*
PARTIO SOFTWARE
Copyright 2010 Disney Enterprises, Inc. All rights reserved
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
Studios" or the names of its contributors may NOT be used to
endorse or promote products derived from this software without
specific prior written permission from Walt Disney Pictures.
Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <algorithm>
#include <array>
#include <cassert>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator> // for std::back_inserter
#include <stdexcept>
#include <string>
#include <miniz.h>
#include <xlnt/utils/exceptions.hpp>
#include <detail/serialization/vector_streambuf.hpp>
#include <detail/serialization/zstream.hpp>
namespace {
template <class T>
T read_int(std::istream &stream)
{
T value;
stream.read(reinterpret_cast<char *>(&value), sizeof(T));
return value;
}
template <class T>
void write_int(std::ostream &stream, T value)
{
stream.write(reinterpret_cast<char *>(&value), sizeof(T));
}
xlnt::detail::zheader read_header(std::istream &istream, const bool global)
{
xlnt::detail::zheader header;
auto sig = read_int<std::uint32_t>(istream);
// read and check for local/global magic
if (global)
{
if (sig != 0x02014b50)
{
throw xlnt::exception("missing global header signature");
}
header.version = read_int<std::uint16_t>(istream);
}
else if (sig != 0x04034b50)
{
throw xlnt::exception("missing local header signature");
}
// Read rest of header
header.version = read_int<std::uint16_t>(istream);
header.flags = read_int<std::uint16_t>(istream);
header.compression_type = read_int<std::uint16_t>(istream);
header.stamp_date = read_int<std::uint16_t>(istream);
header.stamp_time = read_int<std::uint16_t>(istream);
header.crc = read_int<std::uint32_t>(istream);
header.compressed_size = read_int<std::uint32_t>(istream);
header.uncompressed_size = read_int<std::uint32_t>(istream);
auto filename_length = read_int<std::uint16_t>(istream);
auto extra_length = read_int<std::uint16_t>(istream);
std::uint16_t comment_length = 0;
if (global)
{
comment_length = read_int<std::uint16_t>(istream);
/*std::uint16_t disk_number_start = */ read_int<std::uint16_t>(istream);
/*std::uint16_t int_file_attrib = */ read_int<std::uint16_t>(istream);
/*std::uint32_t ext_file_attrib = */ read_int<std::uint32_t>(istream);
header.header_offset = read_int<std::uint32_t>(istream);
}
header.filename.resize(filename_length, '\0');
istream.read(&header.filename[0], filename_length);
header.extra.resize(extra_length, 0);
istream.read(reinterpret_cast<char *>(header.extra.data()), extra_length);
if (global)
{
header.comment.resize(comment_length, '\0');
istream.read(&header.comment[0], comment_length);
}
return header;
}
void write_header(const xlnt::detail::zheader &header, std::ostream &ostream, const bool global)
{
if (global)
{
write_int(ostream, static_cast<std::uint32_t>(0x02014b50)); // header sig
write_int(ostream, static_cast<std::uint16_t>(20)); // version made by
}
else
{
write_int(ostream, static_cast<std::uint32_t>(0x04034b50));
}
write_int(ostream, header.version);
write_int(ostream, header.flags);
write_int(ostream, header.compression_type);
write_int(ostream, header.stamp_date);
write_int(ostream, header.stamp_time);
write_int(ostream, header.crc);
write_int(ostream, header.compressed_size);
write_int(ostream, header.uncompressed_size);
write_int(ostream, static_cast<std::uint16_t>(header.filename.length()));
write_int(ostream, static_cast<std::uint16_t>(0)); // extra lengthx
if (global)
{
write_int(ostream, static_cast<std::uint16_t>(0)); // filecomment
write_int(ostream, static_cast<std::uint16_t>(0)); // disk# start
write_int(ostream, static_cast<std::uint16_t>(0)); // internal file
write_int(ostream, static_cast<std::uint32_t>(0)); // ext final
write_int(ostream, static_cast<std::uint32_t>(header.header_offset)); // rel offset
}
for (auto c : header.filename)
{
write_int(ostream, c);
}
}
} // namespace
namespace xlnt {
namespace detail {
static const std::size_t buffer_size = 512;
class zip_streambuf_decompress : public std::streambuf
{
std::istream &istream;
z_stream strm;
std::array<char, buffer_size> in;
std::array<char, buffer_size> out;
zheader header;
std::size_t total_read;
std::size_t total_uncompressed;
bool valid;
bool compressed_data;
static const unsigned short DEFLATE = 8;
static const unsigned short UNCOMPRESSED = 0;
public:
zip_streambuf_decompress(std::istream &stream, zheader central_header)
: istream(stream), header(central_header), total_read(0), total_uncompressed(0), valid(true)
{
in.fill(0);
out.fill(0);
strm.zalloc = nullptr;
strm.zfree = nullptr;
strm.opaque = nullptr;
strm.avail_in = 0;
strm.next_in = nullptr;
setg(in.data(), in.data(), in.data());
setp(nullptr, nullptr);
// skip the header
read_header(istream, false);
if (header.compression_type == DEFLATE)
{
compressed_data = true;
}
else if (header.compression_type == UNCOMPRESSED)
{
compressed_data = false;
}
else
{
compressed_data = false;
throw xlnt::exception("unsupported compression type, should be DEFLATE or uncompressed");
}
// initialize the inflate
if (compressed_data && valid)
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
int result = inflateInit2(&strm, -MAX_WBITS);
#pragma clang diagnostic pop
if (result != Z_OK)
{
throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
}
}
header = central_header;
}
virtual ~zip_streambuf_decompress()
{
if (compressed_data && valid)
{
inflateEnd(&strm);
}
}
int process()
{
if (!valid) return -1;
if (compressed_data)
{
strm.avail_out = buffer_size - 4;
strm.next_out = reinterpret_cast<Bytef *>(out.data() + 4);
while (strm.avail_out != 0)
{
if (strm.avail_in == 0)
{
// buffer empty, read some more from file
istream.read(in.data(),
static_cast<std::streamsize>(std::min(buffer_size, header.compressed_size - total_read)));
strm.avail_in = static_cast<unsigned int>(istream.gcount());
total_read += strm.avail_in;
strm.next_in = reinterpret_cast<Bytef *>(in.data());
}
const auto ret = inflate(&strm, Z_NO_FLUSH); // decompress
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
{
throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
}
if (ret == Z_STREAM_END) break;
}
auto unzip_count = buffer_size - strm.avail_out - 4;
total_uncompressed += unzip_count;
return static_cast<int>(unzip_count);
}
// uncompressed, so just read
istream.read(out.data() + 4,
static_cast<std::streamsize>(std::min(buffer_size - 4, header.uncompressed_size - total_read)));
auto count = istream.gcount();
total_read += static_cast<std::size_t>(count);
return static_cast<int>(count);
}
virtual int underflow()
{
if (gptr() && (gptr() < egptr()))
return traits_type::to_int_type(*gptr()); // if we already have data just use it
auto put_back_count = gptr() - eback();
if (put_back_count > 4) put_back_count = 4;
std::memmove(
out.data() + (4 - put_back_count), gptr() - put_back_count, static_cast<std::size_t>(put_back_count));
int num = process();
setg(out.data() + 4 - put_back_count, out.data() + 4, out.data() + 4 + num);
if (num <= 0) return EOF;
return traits_type::to_int_type(*gptr());
}
virtual int overflow(int c = EOF);
};
int zip_streambuf_decompress::overflow(int)
{
throw xlnt::exception("writing to read-only buffer");
}
class zip_streambuf_compress : public std::streambuf
{
std::ostream &ostream; // owned when header==0 (when not part of zip file)
z_stream strm;
std::array<char, buffer_size> in;
std::array<char, buffer_size> out;
zheader *header;
std::uint32_t uncompressed_size;
std::uint32_t crc;
bool valid;
public:
zip_streambuf_compress(zheader *central_header, std::ostream &stream)
: ostream(stream), header(central_header), valid(true)
{
strm.zalloc = nullptr;
strm.zfree = nullptr;
strm.opaque = nullptr;
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
int ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY);
#pragma clang diagnostic pop
if (ret != Z_OK)
{
std::cerr << "libz: failed to deflateInit" << std::endl;
valid = false;
return;
}
setg(nullptr, nullptr, nullptr);
setp(in.data(), in.data() + buffer_size - 4); // we want to be 4 aligned
// Write appropriate header
if (header)
{
header->header_offset = static_cast<std::uint32_t>(stream.tellp());
write_header(*header, ostream, false);
}
uncompressed_size = crc = 0;
}
virtual ~zip_streambuf_compress()
{
if (valid)
{
process(true);
deflateEnd(&strm);
if (header)
{
auto final_position = ostream.tellp();
header->uncompressed_size = uncompressed_size;
header->crc = crc;
ostream.seekp(header->header_offset);
write_header(*header, ostream, false);
ostream.seekp(final_position);
}
else
{
write_int(ostream, crc);
write_int(ostream, uncompressed_size);
}
}
if (!header) delete &ostream;
}
protected:
int process(bool flush)
{
if (!valid) return -1;
strm.next_in = reinterpret_cast<Bytef *>(pbase());
strm.avail_in = static_cast<unsigned int>(pptr() - pbase());
while (strm.avail_in != 0 || flush)
{
strm.avail_out = buffer_size;
strm.next_out = reinterpret_cast<Bytef *>(out.data());
int ret = deflate(&strm, flush ? Z_FINISH : Z_NO_FLUSH);
if (!(ret != Z_BUF_ERROR && ret != Z_STREAM_ERROR))
{
valid = false;
std::cerr << "gzip: gzip error " << strm.msg << std::endl;
return -1;
}
auto generated_output = static_cast<int>(strm.next_out - reinterpret_cast<std::uint8_t *>(out.data()));
ostream.write(out.data(), generated_output);
if (header) header->compressed_size += static_cast<std::uint32_t>(generated_output);
if (ret == Z_STREAM_END) break;
}
// update counts, crc's and buffers
auto consumed_input = static_cast<std::uint32_t>(pptr() - pbase());
uncompressed_size += consumed_input;
crc = static_cast<std::uint32_t>(crc32(crc, reinterpret_cast<Bytef *>(in.data()), consumed_input));
setp(pbase(), pbase() + buffer_size - 4);
return 1;
}
virtual int sync()
{
if (pptr() && pptr() > pbase()) return process(false);
return 0;
}
virtual int underflow()
{
throw xlnt::exception("Attempt to read write only ostream");
}
virtual int overflow(int c = EOF);
};
int zip_streambuf_compress::overflow(int c)
{
if (c != EOF)
{
*pptr() = static_cast<char>(c);
pbump(1);
}
if (process(false) == EOF) return EOF;
return c;
}
ozstream::ozstream(std::ostream &stream)
: destination_stream_(stream)
{
if (!destination_stream_)
{
throw xlnt::exception("bad zip stream");
}
}
ozstream::~ozstream()
{
// Write all file headers
auto final_position = destination_stream_.tellp();
for (const auto &header : file_headers_)
{
write_header(header, destination_stream_, true);
}
auto central_end = destination_stream_.tellp();
// Write end of central
write_int(destination_stream_, static_cast<std::uint32_t>(0x06054b50)); // end of central
write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center in this disk
write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center
write_int(destination_stream_, static_cast<std::uint32_t>(central_end - final_position)); // size of header
write_int(destination_stream_, static_cast<std::uint32_t>(final_position)); // offset to header
write_int(destination_stream_, static_cast<std::uint16_t>(0)); // zip comment
}
std::unique_ptr<std::streambuf> ozstream::open(const path &filename)
{
zheader header;
header.filename = filename.string();
file_headers_.push_back(header);
auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
return std::unique_ptr<zip_streambuf_compress>(buffer);
}
izstream::izstream(std::istream &stream)
: source_stream_(stream)
{
if (!stream)
{
throw xlnt::exception("Invalid file handle");
}
read_central_header();
}
izstream::~izstream()
{
}
bool izstream::read_central_header()
{
// Find the header
// NOTE: this assumes the zip file header is the last thing written to file...
source_stream_.seekg(0, std::ios_base::end);
auto end_position = source_stream_.tellg();
auto max_comment_size = std::uint32_t(0xffff); // max size of header
auto read_size_before_comment = std::uint32_t(22);
std::streamoff read_start = max_comment_size + read_size_before_comment;
if (read_start > end_position)
{
read_start = end_position;
}
source_stream_.seekg(end_position - read_start);
std::vector<std::uint8_t> buf(static_cast<std::size_t>(read_start), '\0');
if (read_start <= 0)
{
throw xlnt::exception("file is empty");
}
source_stream_.read(reinterpret_cast<char *>(buf.data()), read_start);
if (buf[0] == 0xd0 && buf[1] == 0xcf && buf[2] == 0x11 && buf[3] == 0xe0
&& buf[4] == 0xa1 && buf[5] == 0xb1 && buf[6] == 0x1a && buf[7] == 0xe1)
{
throw xlnt::exception("encrypted xlsx, password required");
}
auto found_header = false;
std::streamoff header_index = 0;
for (std::streamoff i = 0; i < read_start - 3; ++i)
{
if (buf[static_cast<std::size_t>(i)] == 0x50
&& buf[static_cast<std::size_t>(i) + 1] == 0x4b
&& buf[static_cast<std::size_t>(i) + 2] == 0x05
&& buf[static_cast<std::size_t>(i) + 3] == 0x06)
{
found_header = true;
header_index = i;
break;
}
}
if (!found_header)
{
throw xlnt::exception("failed to find zip header");
}
// seek to end of central header and read
source_stream_.seekg(end_position - (read_start - header_index));
/*auto word = */ read_int<std::uint32_t>(source_stream_);
auto disk_number1 = read_int<std::uint16_t>(source_stream_);
auto disk_number2 = read_int<std::uint16_t>(source_stream_);
if (disk_number1 != disk_number2 || disk_number1 != 0)
{
throw xlnt::exception("multiple disk zip files are not supported");
}
auto num_files = read_int<std::uint16_t>(source_stream_); // one entry in center in this disk
auto num_files_this_disk = read_int<std::uint16_t>(source_stream_); // one entry in center
if (num_files != num_files_this_disk)
{
throw xlnt::exception("multi disk zip files are not supported");
}
/*auto size_of_header = */ read_int<std::uint32_t>(source_stream_); // size of header
auto header_offset = read_int<std::uint32_t>(source_stream_); // offset to header
// go to header and read all file headers
source_stream_.seekg(header_offset);
for (std::uint16_t i = 0; i < num_files; ++i)
{
auto header = read_header(source_stream_, true);
file_headers_[header.filename] = header;
}
return true;
}
std::unique_ptr<std::streambuf> izstream::open(const path &filename) const
{
if (!has_file(filename))
{
throw xlnt::exception("file not found");
}
auto header = file_headers_.at(filename.string());
source_stream_.seekg(header.header_offset);
auto buffer = new zip_streambuf_decompress(source_stream_, header);
return std::unique_ptr<zip_streambuf_decompress>(buffer);
}
std::string izstream::read(const path &filename) const
{
auto buffer = open(filename);
std::istream stream(buffer.get());
auto bytes = to_vector(stream);
return std::string(bytes.begin(), bytes.end());
}
std::vector<path> izstream::files() const
{
std::vector<path> filenames;
std::transform(file_headers_.begin(), file_headers_.end(), std::back_inserter(filenames),
[](const std::pair<std::string, zheader> &h) { return path(h.first); });
return filenames;
}
bool izstream::has_file(const path &filename) const
{
return file_headers_.count(filename.string()) != 0;
}
} // namespace detail
} // namespace xlnt