2014-06-05 06:42:17 +08:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <sstream>
|
2015-10-30 11:16:31 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
#include <detail/include_libstudxml.hpp>
|
|
|
|
#include <detail/vector_streambuf.hpp>
|
2016-10-31 03:48:40 +08:00
|
|
|
#include <detail/zip.hpp>
|
2016-09-06 10:17:36 +08:00
|
|
|
#include <helpers/path_helper.hpp>
|
|
|
|
#include <xlnt/packaging/manifest.hpp>
|
2014-06-05 06:42:17 +08:00
|
|
|
|
2016-07-21 07:04:44 +08:00
|
|
|
class xml_helper
|
2014-06-05 06:42:17 +08:00
|
|
|
{
|
|
|
|
public:
|
2014-07-20 02:43:48 +08:00
|
|
|
enum class difference_type
|
2014-06-05 06:42:17 +08:00
|
|
|
{
|
2014-07-20 02:43:48 +08:00
|
|
|
names_differ,
|
|
|
|
missing_attribute,
|
|
|
|
attribute_values_differ,
|
|
|
|
missing_text,
|
|
|
|
text_values_differ,
|
|
|
|
missing_child,
|
|
|
|
child_order_differs,
|
|
|
|
equivalent,
|
|
|
|
};
|
2016-10-31 03:48:40 +08:00
|
|
|
|
2014-07-20 02:43:48 +08:00
|
|
|
struct comparison_result
|
|
|
|
{
|
|
|
|
difference_type difference;
|
2015-11-11 08:46:57 +08:00
|
|
|
std::string value_left;
|
|
|
|
std::string value_right;
|
2016-07-21 07:04:44 +08:00
|
|
|
|
|
|
|
operator bool() const
|
|
|
|
{
|
|
|
|
return difference == difference_type::equivalent;
|
|
|
|
}
|
2014-07-20 02:43:48 +08:00
|
|
|
};
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
static bool compare_files(const std::string &left,
|
|
|
|
const std::string &right, const std::string &content_type)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-26 22:38:26 +08:00
|
|
|
// content types are stored in unordered maps, too complicated to compare
|
|
|
|
if (content_type == "[Content_Types].xml") return true;
|
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
auto is_xml = (content_type.substr(0, 12) == "application/"
|
|
|
|
&& content_type.substr(content_type.size() - 4) == "+xml")
|
|
|
|
|| content_type == "application/xml"
|
|
|
|
|| content_type == "[Content_Types].xml"
|
|
|
|
|| content_type == "application/vnd.openxmlformats-officedocument.vmlDrawing";
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
if (is_xml)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
return compare_xml_exact(left, right);
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
return left == right;
|
|
|
|
}
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
static bool compare_xml_exact(const std::string &left, const std::string &right, bool suppress_debug_info = false)
|
|
|
|
{
|
|
|
|
xml::parser left_parser(left.data(), left.size(), "left");
|
|
|
|
xml::parser right_parser(right.data(), right.size(), "right");
|
|
|
|
|
|
|
|
bool difference = false;
|
|
|
|
auto right_iter = right_parser.begin();
|
|
|
|
|
|
|
|
auto is_whitespace = [](const std::string &v)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-26 22:38:26 +08:00
|
|
|
return v.find_first_not_of("\n\r\t ") == std::string::npos;
|
2016-12-10 08:18:50 +08:00
|
|
|
};
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
for (auto left_event : left_parser)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
if (left_event == xml::parser::event_type::characters
|
|
|
|
&& is_whitespace(left_parser.value())) continue;
|
|
|
|
|
|
|
|
if (right_iter == right_parser.end())
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
difference = true;
|
|
|
|
break;
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
2016-12-10 08:18:50 +08:00
|
|
|
|
|
|
|
auto right_event = *right_iter;
|
|
|
|
|
|
|
|
while (right_iter != right_parser.end()
|
|
|
|
&& right_event == xml::parser::event_type::characters
|
|
|
|
&& is_whitespace(right_parser.value()))
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
++right_iter;
|
|
|
|
right_event = *right_iter;
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
2016-12-10 08:18:50 +08:00
|
|
|
|
|
|
|
if (left_event != right_event)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
difference = true;
|
|
|
|
break;
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
2016-12-10 08:18:50 +08:00
|
|
|
|
|
|
|
if (left_event == xml::parser::event_type::start_element)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
auto left_attr_map = left_parser.attribute_map();
|
|
|
|
auto right_attr_map = right_parser.attribute_map();
|
|
|
|
|
|
|
|
for (auto attr : left_attr_map)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
if (right_attr_map.find(attr.first) == right_attr_map.end())
|
|
|
|
{
|
|
|
|
difference = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (attr.second.value != right_attr_map.at(attr.first).value)
|
|
|
|
{
|
|
|
|
difference = true;
|
|
|
|
break;
|
|
|
|
}
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
for (auto attr : right_attr_map)
|
|
|
|
{
|
|
|
|
if (left_attr_map.find(attr.first) == left_attr_map.end())
|
|
|
|
{
|
|
|
|
difference = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (attr.second.value != left_attr_map.at(attr.first).value)
|
|
|
|
{
|
|
|
|
difference = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
if (difference)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
if (left_parser.qname() != right_parser.qname())
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
difference = true;
|
2016-09-06 10:17:36 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-12-10 08:18:50 +08:00
|
|
|
else if (left_event == xml::parser::event_type::characters)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-10 08:18:50 +08:00
|
|
|
if (left_parser.value() != right_parser.value())
|
|
|
|
{
|
|
|
|
difference = true;
|
|
|
|
break;
|
|
|
|
}
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
++right_iter;
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
2016-08-03 12:12:18 +08:00
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
if (difference && !suppress_debug_info)
|
2016-08-03 12:12:18 +08:00
|
|
|
{
|
|
|
|
std::cout << "documents don't match" << std::endl;
|
|
|
|
|
2016-09-06 10:17:36 +08:00
|
|
|
std::cout << "left:" << std::endl;
|
2016-12-24 23:04:57 +08:00
|
|
|
for (auto c : left)
|
|
|
|
{
|
|
|
|
std::cout << c << std::flush;
|
|
|
|
}
|
2016-08-03 12:12:18 +08:00
|
|
|
std::cout << std::endl;
|
|
|
|
|
2016-09-06 10:17:36 +08:00
|
|
|
std::cout << "right:" << std::endl;
|
2016-12-24 23:04:57 +08:00
|
|
|
for (auto c : right)
|
|
|
|
{
|
|
|
|
std::cout << c << std::flush;
|
|
|
|
}
|
2016-08-03 12:12:18 +08:00
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
|
|
|
|
2016-12-10 08:18:50 +08:00
|
|
|
return !difference;
|
2015-10-31 06:54:04 +08:00
|
|
|
}
|
2016-07-04 07:22:08 +08:00
|
|
|
|
2016-08-05 13:52:05 +08:00
|
|
|
static bool string_matches_workbook_part(const std::string &expected,
|
2016-09-06 10:17:36 +08:00
|
|
|
xlnt::workbook &wb, const xlnt::path &part, const std::string &content_type)
|
2016-08-05 13:52:05 +08:00
|
|
|
{
|
|
|
|
std::vector<std::uint8_t> bytes;
|
|
|
|
wb.save(bytes);
|
2016-10-31 03:48:40 +08:00
|
|
|
std::istringstream file_stream(std::string(bytes.begin(), bytes.end()));
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader archive(file_stream);
|
2016-08-05 13:52:05 +08:00
|
|
|
|
2016-09-06 10:17:36 +08:00
|
|
|
return string_matches_archive_member(expected, archive, part, content_type);
|
2016-08-05 13:52:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool file_matches_workbook_part(const xlnt::path &expected,
|
2016-09-06 10:17:36 +08:00
|
|
|
xlnt::workbook &wb, const xlnt::path &part, const std::string &content_type)
|
2016-08-05 13:52:05 +08:00
|
|
|
{
|
|
|
|
std::vector<std::uint8_t> bytes;
|
|
|
|
wb.save(bytes);
|
2016-10-31 03:48:40 +08:00
|
|
|
std::istringstream file_stream(std::string(bytes.begin(), bytes.end()));
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader archive(file_stream);
|
2016-08-05 13:52:05 +08:00
|
|
|
|
2016-09-06 10:17:36 +08:00
|
|
|
return file_matches_archive_member(expected, archive, part, content_type);
|
2016-08-05 13:52:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool string_matches_archive_member(const std::string &expected,
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader &archive,
|
2016-09-06 10:17:36 +08:00
|
|
|
const xlnt::path &member,
|
|
|
|
const std::string &content_type)
|
2016-08-05 13:52:05 +08:00
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
auto streambuf = archive.open(member);
|
|
|
|
std::istream stream(streambuf.get());
|
2016-11-01 08:48:43 +08:00
|
|
|
std::string contents((std::istreambuf_iterator<char>(stream)), (std::istreambuf_iterator<char>()));
|
2016-10-31 03:48:40 +08:00
|
|
|
return compare_files(expected, contents, content_type);
|
2016-08-05 13:52:05 +08:00
|
|
|
}
|
2016-10-31 09:44:43 +08:00
|
|
|
|
|
|
|
static bool file_matches_archive_member(const xlnt::path &file,
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader &archive,
|
2016-09-06 10:17:36 +08:00
|
|
|
const xlnt::path &member,
|
|
|
|
const std::string &content_type)
|
2016-08-05 13:52:05 +08:00
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
if (!archive.has_file(member)) return false;
|
2016-10-31 09:44:43 +08:00
|
|
|
std::vector<std::uint8_t> member_data;
|
|
|
|
xlnt::detail::vector_ostreambuf member_data_buffer(member_data);
|
|
|
|
std::ostream member_data_stream(&member_data_buffer);
|
2016-12-24 23:04:57 +08:00
|
|
|
auto member_streambuf = archive.open(member);
|
|
|
|
std::ostream member_stream(member_streambuf.get());
|
2016-11-01 08:48:43 +08:00
|
|
|
member_data_stream << member_stream.rdbuf();
|
2016-10-31 09:44:43 +08:00
|
|
|
std::string contents(member_data.begin(), member_data.end());
|
|
|
|
return compare_files(file.read_contents(), contents, content_type);
|
2016-08-05 13:52:05 +08:00
|
|
|
}
|
|
|
|
|
2016-10-31 03:48:40 +08:00
|
|
|
static bool xlsx_archives_match(const std::vector<std::uint8_t> &left, const std::vector<std::uint8_t> &right)
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-10-31 03:48:40 +08:00
|
|
|
xlnt::detail::vector_istreambuf left_buffer(left);
|
|
|
|
std::istream left_stream(&left_buffer);
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader left_archive(left_stream);
|
2016-10-31 03:48:40 +08:00
|
|
|
|
2016-11-01 08:48:43 +08:00
|
|
|
const auto left_info = left_archive.files();
|
2016-10-31 03:48:40 +08:00
|
|
|
|
|
|
|
xlnt::detail::vector_istreambuf right_buffer(right);
|
|
|
|
std::istream right_stream(&right_buffer);
|
2016-12-24 23:04:57 +08:00
|
|
|
xlnt::detail::zip_file_reader right_archive(right_stream);
|
2016-10-31 03:48:40 +08:00
|
|
|
|
2016-11-01 08:48:43 +08:00
|
|
|
const auto right_info = right_archive.files();
|
2016-08-06 22:40:17 +08:00
|
|
|
|
2016-08-12 12:22:14 +08:00
|
|
|
if (left_info.size() != right_info.size())
|
|
|
|
{
|
|
|
|
std::cout << "left has a different number of files than right" << std::endl;
|
|
|
|
|
|
|
|
std::cout << "left has: ";
|
|
|
|
for (auto &info : left_info)
|
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
std::cout << info.string() << ", ";
|
2016-08-12 12:22:14 +08:00
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
|
|
|
|
std::cout << "right has: ";
|
2016-08-16 12:23:49 +08:00
|
|
|
for (auto &info : right_info)
|
2016-08-12 12:22:14 +08:00
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
std::cout << info.string() << ", ";
|
2016-08-12 12:22:14 +08:00
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool match = true;
|
2016-10-31 03:48:40 +08:00
|
|
|
|
|
|
|
xlnt::workbook left_workbook;
|
|
|
|
left_workbook.load(left);
|
2016-12-02 21:37:50 +08:00
|
|
|
|
|
|
|
xlnt::workbook right_workbook;
|
|
|
|
right_workbook.load(right);
|
2016-09-06 10:17:36 +08:00
|
|
|
|
2016-12-02 21:37:50 +08:00
|
|
|
auto &left_manifest = left_workbook.manifest();
|
|
|
|
auto &right_manifest = right_workbook.manifest();
|
2016-08-06 22:40:17 +08:00
|
|
|
|
|
|
|
for (auto left_member : left_info)
|
|
|
|
{
|
2016-11-01 08:48:43 +08:00
|
|
|
if (!right_archive.has_file(left_member))
|
2016-08-12 12:22:14 +08:00
|
|
|
{
|
|
|
|
match = false;
|
2016-12-24 23:04:57 +08:00
|
|
|
std::cout << "right is missing file: " << left_member.string() << std::endl;
|
2016-08-12 12:22:14 +08:00
|
|
|
continue;
|
|
|
|
}
|
2016-08-06 22:40:17 +08:00
|
|
|
|
2016-12-24 23:04:57 +08:00
|
|
|
auto left_member_streambuf = left_archive.open(left_member);
|
|
|
|
std::istream left_member_stream(left_member_streambuf.get());
|
2016-10-31 03:48:40 +08:00
|
|
|
std::vector<std::uint8_t> left_contents_raw;
|
|
|
|
xlnt::detail::vector_ostreambuf left_contents_buffer(left_contents_raw);
|
|
|
|
std::ostream left_contents_stream(&left_contents_buffer);
|
2016-11-01 08:48:43 +08:00
|
|
|
left_contents_stream << left_member_stream.rdbuf();
|
2016-10-31 03:48:40 +08:00
|
|
|
std::string left_member_contents(left_contents_raw.begin(), left_contents_raw.end());
|
|
|
|
|
2017-01-01 08:15:09 +08:00
|
|
|
auto right_member_streambuf = left_archive.open(left_member);
|
2016-12-24 23:04:57 +08:00
|
|
|
std::istream right_member_stream(right_member_streambuf.get());
|
2016-10-31 03:48:40 +08:00
|
|
|
std::vector<std::uint8_t> right_contents_raw;
|
|
|
|
xlnt::detail::vector_ostreambuf right_contents_buffer(right_contents_raw);
|
|
|
|
std::ostream right_contents_stream(&right_contents_buffer);
|
2016-11-01 08:48:43 +08:00
|
|
|
right_contents_stream << right_member_stream.rdbuf();
|
2016-10-31 03:48:40 +08:00
|
|
|
std::string right_member_contents(right_contents_raw.begin(), right_contents_raw.end());
|
|
|
|
|
2016-09-06 10:17:36 +08:00
|
|
|
std::string left_content_type, right_content_type;
|
|
|
|
|
2016-12-24 23:04:57 +08:00
|
|
|
if (left_member.string() != "[Content_Types].xml")
|
2016-09-06 10:17:36 +08:00
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
left_content_type = left_manifest.content_type(left_member);
|
|
|
|
right_content_type = right_manifest.content_type(left_member);
|
2016-09-06 10:17:36 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
left_content_type = right_content_type = "[Content_Types].xml";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (left_content_type != right_content_type)
|
|
|
|
{
|
|
|
|
std::cout << "content types differ: "
|
2016-12-24 23:04:57 +08:00
|
|
|
<< left_member.string()
|
2016-09-06 10:17:36 +08:00
|
|
|
<< " "
|
|
|
|
<< left_content_type
|
|
|
|
<< " "
|
|
|
|
<< right_content_type
|
|
|
|
<< std::endl;
|
|
|
|
match = false;
|
|
|
|
}
|
|
|
|
else if (!compare_files(left_member_contents, right_member_contents, left_content_type))
|
2016-08-06 22:40:17 +08:00
|
|
|
{
|
2016-12-24 23:04:57 +08:00
|
|
|
std::cout << left_member.string() << std::endl;
|
2016-08-12 12:22:14 +08:00
|
|
|
match = false;
|
2016-08-06 22:40:17 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-12 12:22:14 +08:00
|
|
|
return match;
|
2016-08-06 22:40:17 +08:00
|
|
|
}
|
2014-06-05 06:42:17 +08:00
|
|
|
};
|