diff --git a/include/xlnt/cell/cell.hpp b/include/xlnt/cell/cell.hpp index fc79ee13..658ab54d 100644 --- a/include/xlnt/cell/cell.hpp +++ b/include/xlnt/cell/cell.hpp @@ -340,6 +340,11 @@ public: /// calendar get_base_date() const; + /// + /// Return to_check after checking encoding, size, and illegal characters. + /// + std::string check_string(const std::string &to_check); + // operators /// diff --git a/include/xlnt/utils/exceptions.hpp b/include/xlnt/utils/exceptions.hpp index cfb4c6f7..924c80f3 100644 --- a/include/xlnt/utils/exceptions.hpp +++ b/include/xlnt/utils/exceptions.hpp @@ -35,4 +35,5 @@ #include #include #include +#include #include diff --git a/include/xlnt/utils/timedelta.hpp b/include/xlnt/utils/timedelta.hpp index 79462084..400c0dec 100644 --- a/include/xlnt/utils/timedelta.hpp +++ b/include/xlnt/utils/timedelta.hpp @@ -36,10 +36,9 @@ struct XLNT_CLASS timedelta { static timedelta from_number(long double number); - timedelta(int days_, int hours_, int minutes_ = 0, int seconds_ = 0, int microseconds_ = 0) - : days(days_), hours(hours_), minutes(minutes_), seconds(seconds_), microseconds(microseconds_) - { - } + timedelta(); + + timedelta(int days_, int hours_, int minutes_, int seconds_, int microseconds_); double to_number() const; diff --git a/include/xlnt/utils/unicode_decode_error.hpp b/include/xlnt/utils/unicode_decode_error.hpp new file mode 100644 index 00000000..c554a723 --- /dev/null +++ b/include/xlnt/utils/unicode_decode_error.hpp @@ -0,0 +1,44 @@ +// Copyright (c) 2014-2015 Thomas Fussell +// Copyright (c) 2010-2015 openpyxl +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// @license: http://www.opensource.org/licenses/mit-license.php +// @author: see AUTHORS file +#pragma once + +#include +#include + +#include + +namespace xlnt { + +/// +/// Error for string encoding not matching workbook encoding +/// +class XLNT_CLASS unicode_decode_error : public std::runtime_error +{ + public: + unicode_decode_error(); + unicode_decode_error(char c); + unicode_decode_error(std::uint8_t b); +}; + +} // namespace xlnt diff --git a/include/xlnt/workbook/workbook.hpp b/include/xlnt/workbook/workbook.hpp index 81f561df..ca654fe8 100644 --- a/include/xlnt/workbook/workbook.hpp +++ b/include/xlnt/workbook/workbook.hpp @@ -116,6 +116,8 @@ class XLNT_CLASS workbook friend void swap(workbook &left, workbook &right); + encoding get_encoding() const; + worksheet get_active_sheet(); bool get_guess_types() const; @@ -153,14 +155,8 @@ class XLNT_CLASS workbook iterator begin(); iterator end(); - const_iterator begin() const - { - return cbegin(); - } - const_iterator end() const - { - return cend(); - } + const_iterator begin() const; + const_iterator end() const; const_iterator cbegin() const; const_iterator cend() const; diff --git a/source/cell/cell.cpp b/source/cell/cell.cpp index b7790371..28ed03c2 100644 --- a/source/cell/cell.cpp +++ b/source/cell/cell.cpp @@ -6,7 +6,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -32,6 +34,159 @@ const std::unordered_map &cell::error_codes() return *codes; }; +std::string cell::check_string(const std::string &to_check) +{ + // so we can modify it + std::string s = to_check; + + if (s.size() == 0) + { + return s; + } + + auto wb_encoding = get_parent().get_parent().get_encoding(); + + //XXX: use utfcpp for this! + switch(wb_encoding) + { + case encoding::latin1: break; // all bytes are valid in latin1 + case encoding::ascii: + for (char c : s) + { + if (c < 0) + { + throw xlnt::unicode_decode_error(c); + } + } + break; + case encoding::utf8: + { + std::vector bytes; + + for (char c : s) + { + auto byte = static_cast(c); + + if (byte < 128) + { + if(!bytes.empty()) + { + throw xlnt::unicode_decode_error(c); + } + } + else + { + if(!bytes.empty()) + { + if(byte >> 6 != 2) + { + throw xlnt::unicode_decode_error(c); + } + } + } + + bytes.push_back(byte); + + auto first_byte = bytes[0]; + auto num_bytes = 0; + + if(first_byte < 128) + { + num_bytes = 1; + } + else if(first_byte >> 5 == 0b110) + { + num_bytes = 2; + } + else if(first_byte >> 4 == 0b1110) + { + num_bytes = 3; + } + else if(first_byte >> 3 == 0b11110) + { + num_bytes = 4; + } + else if(first_byte >> 2 == 0b111110) + { + num_bytes = 5; + } + else if(first_byte >> 1 == 0b1111110) + { + num_bytes = 6; + } + + if(num_bytes > bytes.size()) + { + throw xlnt::unicode_decode_error(c); + } + + if(num_bytes == bytes.size()) + { + bytes.clear(); + } + } + + // Check last code point + if(!bytes.empty()) + { + auto first_byte = bytes[0]; + auto num_bytes = 0; + + if(first_byte < 128) + { + num_bytes = 1; + } + else if(first_byte >> 5 == 0b110) + { + num_bytes = 2; + } + else if(first_byte >> 4 == 0b1110) + { + num_bytes = 3; + } + else if(first_byte >> 3 == 0b11110) + { + num_bytes = 4; + } + else if(first_byte >> 2 == 0b111110) + { + num_bytes = 5; + } + else if(first_byte >> 1 == 0b1111110) + { + num_bytes = 6; + } + + if(num_bytes > bytes.size()) + { + throw xlnt::unicode_decode_error(); + } + } + + break; + } + default: + // other encodings not supported yet + break; + } // switch(wb_encoding) + + // check encoding? + if (s.size() > 32767) + { + s = s.substr(0, 32767); // max string length in Excel + } + + for (char c : s) + { + if (c >= 0 && (c <= 8 || c == 11 || c == 12 || (c >= 14 && c <= 31))) + { + throw xlnt::illegal_character_error(c); + } + } + + return s; +} + cell::cell() : d_(nullptr) { } @@ -169,7 +324,7 @@ XLNT_FUNCTION void cell::set_value(long double d) template <> XLNT_FUNCTION void cell::set_value(std::string s) { - d_->set_string(s, get_parent().get_parent().get_guess_types()); + d_->set_string(check_string(s), get_parent().get_parent().get_guess_types()); if (get_data_type() == type::string && !s.empty()) { @@ -497,7 +652,15 @@ const number_format &cell::get_number_format() const const font &cell::get_font() const { - return get_parent().get_parent().get_font(d_->style_id_); + if (d_->has_style_) + { + auto font_id = get_parent().get_parent().get_styles()[d_->style_id_].get_font_id(); + return get_parent().get_parent().get_font(font_id); + } + else + { + return get_parent().get_parent().get_font(0); + } } const fill &cell::get_fill() const @@ -642,6 +805,12 @@ XLNT_FUNCTION timedelta cell::get_value() const return timedelta::from_number(d_->value_numeric_); } +void cell::set_font(const font &font_) +{ + d_->has_style_ = true; + d_->style_id_ = get_parent().get_parent().set_font(font_, d_->style_id_); +} + void cell::set_number_format(const number_format &number_format_) { d_->has_style_ = true; diff --git a/source/cell/tests/test_cell.hpp b/source/cell/tests/test_cell.hpp index 0dd0655b..a3f8977c 100644 --- a/source/cell/tests/test_cell.hpp +++ b/source/cell/tests/test_cell.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -265,31 +266,14 @@ public: cell.set_value(std::string(1, 13)); // Carriage return cell.set_value(" Leading and trailing spaces are legal "); } - /* - values = ( - ('30:33.865633336', [('', '', '', '30', '33', '865633')]), - ('03:40:16', [('03', '40', '16', '', '', '')]), - ('03:40', [('03', '40', '', '', '', '')]), - ('55:72:12', []), - ) - @pytest.mark.parametrize("value, expected", - values) - */ - void test_time_regex() - { - /* - from openpyxl.cell.cell import TIME_REGEX; - m = TIME_REGEX.findall(value); - TS_ASSERT(m == expected; - */ - } void test_timedelta() { auto ws = wb.create_sheet(); auto cell = ws.get_cell(xlnt::cell_reference(1, 1)); - cell.set_value(xlnt::timedelta(1, 3)); + cell.set_value(xlnt::timedelta(1, 3, 0, 0, 0)); + TS_ASSERT(cell.get_value() == 1.125); TS_ASSERT(cell.get_data_type() == xlnt::cell::type::numeric); TS_ASSERT(!cell.is_date()); @@ -352,43 +336,42 @@ public: void test_cell_offset() { - /* auto ws = wb.create_sheet(); auto cell = ws.get_cell(xlnt::cell_reference(1, 1)); - TS_ASSERT(cell.offset(2, 1).get_reference() == "B3"); - */ + TS_ASSERT(cell.offset(1, 2).get_reference() == "B3"); + } + + std::string make_latin1_string() + { + unsigned char pound = 163; + auto test_string = "Compound Value (" + std::string(1, pound) + ")"; + return test_string; } void test_bad_encoding() { - /* - unsigned char pound = 163; - auto test_string = "Compount Value (" + std::string(pound) + ")"; auto ws = wb.create_sheet(); - cell = ws[xlnt::cell_reference("A1")]; - TS_ASSERT_THROWS(cell.check_string(test_string), xlnt::unicode_decode_error); - TS_ASSERT_THROWS(cell.set_value(test_string), xlnt::unicode_decode_error); - */ + auto cell = ws[xlnt::cell_reference("A1")]; + TS_ASSERT_THROWS(cell.check_string(make_latin1_string()), xlnt::unicode_decode_error); + TS_ASSERT_THROWS(cell.set_value(make_latin1_string()), xlnt::unicode_decode_error); } void test_good_encoding() { - /* - auto wb = xlnt::workbook(xlnt::encoding::latin1); - auto ws = wb.get_active_sheet(); + xlnt::workbook latin1_wb(xlnt::encoding::latin1); + auto ws = latin1_wb.get_active_sheet(); auto cell = ws[xlnt::cell_reference("A1")]; - cell.set_value(test_string); - */ + cell.check_string(make_latin1_string()); + cell.set_value(make_latin1_string()); } - void _test_font() + void test_font() { xlnt::font font; font.set_bold(true); - auto ws = wb.create_sheet(); - ws.get_parent().add_font(font); - + auto ws = wb.create_sheet(); auto cell = xlnt::cell(ws, "A1"); + cell.set_font(font); TS_ASSERT_EQUALS(cell.get_font(), font); } diff --git a/source/detail/cell_impl.cpp b/source/detail/cell_impl.cpp index 7da4131a..ed60b3fe 100644 --- a/source/detail/cell_impl.cpp +++ b/source/detail/cell_impl.cpp @@ -56,5 +56,59 @@ cell_impl &cell_impl::operator=(const cell_impl &rhs) return *this; } +cell cell_impl::self() +{ + return xlnt::cell(this); +} + +void cell_impl::set_string(const std::string &s, bool guess_types) +{ + value_string_ = s; + type_ = cell::type::string; + + if (value_string_.size() > 1 && value_string_.front() == '=') + { + formula_ = value_string_; + type_ = cell::type::formula; + value_string_.clear(); + } + else if (cell::error_codes().find(s) != cell::error_codes().end()) + { + type_ = cell::type::error; + } + else if (guess_types) + { + auto percentage = cast_percentage(s); + + if (percentage.first) + { + value_numeric_ = percentage.second; + type_ = cell::type::numeric; + self().set_number_format(xlnt::number_format::percentage()); + } + else + { + auto time = cast_time(s); + + if (time.first) + { + type_ = cell::type::numeric; + self().set_number_format(number_format::date_time6()); + value_numeric_ = time.second.to_number(); + } + else + { + auto numeric = cast_numeric(s); + + if (numeric.first) + { + value_numeric_ = numeric.second; + type_ = cell::type::numeric; + } + } + } + } +} + } // namespace detail } // namespace xlnt diff --git a/source/detail/cell_impl.hpp b/source/detail/cell_impl.hpp index 49beea12..bf5cb31d 100644 --- a/source/detail/cell_impl.hpp +++ b/source/detail/cell_impl.hpp @@ -14,32 +14,6 @@ namespace { -// return s after checking encoding, size, and illegal characters -std::string check_string(std::string s) -{ - if (s.size() == 0) - { - return s; - } - - // check encoding? - - if (s.size() > 32767) - { - s = s.substr(0, 32767); // max string length in Excel - } - - for (char c : s) - { - if (c >= 0 && (c <= 8 || c == 11 || c == 12 || (c >= 14 && c <= 31))) - { - throw xlnt::illegal_character_error(c); - } - } - - return s; -} - std::pair cast_numeric(const std::string &s) { const char *str = s.c_str(); @@ -124,59 +98,9 @@ struct cell_impl cell_impl(const cell_impl &rhs); cell_impl &operator=(const cell_impl &rhs); - cell self() - { - return xlnt::cell(this); - } + cell self(); - void set_string(const std::string &s, bool guess_types) - { - value_string_ = check_string(s); - type_ = cell::type::string; - - if (value_string_.size() > 1 && value_string_.front() == '=') - { - formula_ = value_string_; - type_ = cell::type::formula; - value_string_.clear(); - } - else if (cell::error_codes().find(s) != cell::error_codes().end()) - { - type_ = cell::type::error; - } - else if (guess_types) - { - auto percentage = cast_percentage(s); - - if (percentage.first) - { - value_numeric_ = percentage.second; - type_ = cell::type::numeric; - self().set_number_format(xlnt::number_format::percentage()); - } - else - { - auto time = cast_time(s); - - if (time.first) - { - type_ = cell::type::numeric; - self().set_number_format(number_format::date_time6()); - value_numeric_ = time.second.to_number(); - } - else - { - auto numeric = cast_numeric(s); - - if (numeric.first) - { - value_numeric_ = numeric.second; - type_ = cell::type::numeric; - } - } - } - } - } + void set_string(const std::string &s, bool guess_types); cell::type type_; diff --git a/source/detail/workbook_impl.hpp b/source/detail/workbook_impl.hpp index 81e015f7..6cbf6816 100644 --- a/source/detail/workbook_impl.hpp +++ b/source/detail/workbook_impl.hpp @@ -83,6 +83,8 @@ struct workbook_impl manifest manifest_; theme theme_; + + encoding encoding_; }; } // namespace detail diff --git a/source/serialization/tests/test_read.hpp b/source/serialization/tests/test_read.hpp index 92238ec6..7fb21b29 100644 --- a/source/serialization/tests/test_read.hpp +++ b/source/serialization/tests/test_read.hpp @@ -299,8 +299,7 @@ public: { auto path = PathHelper::GetDataDirectory("/reader/formulae.xlsx"); - - xlnt::workbook wb; + xlnt::workbook wb(xlnt::encoding::latin1); xlnt::excel_serializer serializer(wb); serializer.load_workbook(path, false, true); diff --git a/source/utils/exceptions.cpp b/source/utils/exceptions.cpp index 5045c86c..bb372b72 100644 --- a/source/utils/exceptions.cpp +++ b/source/utils/exceptions.cpp @@ -48,6 +48,16 @@ illegal_character_error::illegal_character_error(char c) { } +unicode_decode_error::unicode_decode_error() + : std::runtime_error("unicode decode error") +{ +} + +unicode_decode_error::unicode_decode_error(char) + : std::runtime_error("unicode decode error") +{ +} + value_error::value_error() : std::runtime_error("value error") { diff --git a/source/utils/tests/test_timedelta.hpp b/source/utils/tests/test_timedelta.hpp index 88858c75..5e6ca2f0 100644 --- a/source/utils/tests/test_timedelta.hpp +++ b/source/utils/tests/test_timedelta.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include class test_timedelta : public CxxTest::TestSuite @@ -8,10 +9,25 @@ public: void test_from_number() { auto td = xlnt::timedelta::from_number(1.0); + TS_ASSERT(td.days == 1); TS_ASSERT(td.hours == 0); TS_ASSERT(td.minutes == 0); TS_ASSERT(td.seconds == 0); TS_ASSERT(td.microseconds == 0); } + + void test_round_trip() + { + long double time = 3.14159265; + auto td = xlnt::timedelta::from_number(time); + auto time_rt = td.to_number(); + TS_ASSERT_EQUALS(time, time_rt); + } + + void test_to_number() + { + xlnt::timedelta td(1, 1, 1, 1, 1); + TS_ASSERT_EQUALS(td.to_number(), 1.0423726851999999); + } }; diff --git a/source/utils/timedelta.cpp b/source/utils/timedelta.cpp index ddcc976a..edce0d9e 100644 --- a/source/utils/timedelta.cpp +++ b/source/utils/timedelta.cpp @@ -5,28 +5,72 @@ namespace xlnt { -double timedelta::to_number() const +timedelta::timedelta() : timedelta(0, 0, 0, 0, 0) { - return days + hours / 24.0; } -timedelta timedelta::from_number(long double number) +timedelta::timedelta(int days_, int hours_, int minutes_, int seconds_, int microseconds_) + : days(days_), + hours(hours_), + minutes(minutes_), + seconds(seconds_), + microseconds(microseconds_) + { + } + +double timedelta::to_number() const { - int days = static_cast(number); - number -= days; - number *= 24; - int hours = static_cast(number); - number -= hours; - number *= 60; - int minutes = static_cast(number); - number -= minutes; - number *= 60; - int seconds = static_cast(number); - number -= seconds; - number *= 1000000; - int microseconds = static_cast(number + 0.5); + std::uint64_t total_microseconds = static_cast(microseconds); + total_microseconds += static_cast(seconds * 1e6); + total_microseconds += static_cast(minutes * 1e6 * 60); + auto microseconds_per_hour = static_cast(1e6) * 60 * 60; + total_microseconds += static_cast(hours) * microseconds_per_hour; + auto number = total_microseconds / (24.0L * microseconds_per_hour); + auto hundred_billion = static_cast(1e9) * 100; + number = std::floor(number * hundred_billion + 0.5L) / hundred_billion; + number += days; - return timedelta(days, hours, minutes, seconds, microseconds); + return number; +} + +timedelta timedelta::from_number(long double raw_time) +{ + timedelta result; + + double integer_part; + double fractional_part = std::modf((double)raw_time, &integer_part); + + result.days = integer_part; + + fractional_part *= 24; + result.hours = (int)fractional_part; + fractional_part = 60 * (fractional_part - result.hours); + result.minutes = (int)fractional_part; + fractional_part = 60 * (fractional_part - result.minutes); + result.seconds = (int)fractional_part; + fractional_part = 1000000 * (fractional_part - result.seconds); + result.microseconds = (int)fractional_part; + + if (result.microseconds == 999999 && fractional_part - result.microseconds > 0.5) + { + result.microseconds = 0; + result.seconds += 1; + + if (result.seconds == 60) + { + result.seconds = 0; + result.minutes += 1; + + //TODO: too much nesting + if (result.minutes == 60) + { + result.minutes = 0; + result.hours += 1; + } + } + } + + return result; } } // namespace xlnt diff --git a/source/workbook/workbook.cpp b/source/workbook/workbook.cpp index 5a9c353f..6fa8998b 100644 --- a/source/workbook/workbook.cpp +++ b/source/workbook/workbook.cpp @@ -60,6 +60,11 @@ workbook::workbook() : d_(new detail::workbook_impl()) d_->manifest_.add_override_type("/docProps/app.xml", "application/vnd.openxmlformats-officedocument.extended-properties+xml"); } +workbook::workbook(encoding e) : workbook() +{ + d_->encoding_ = e; +} + workbook::iterator::iterator(workbook &wb, std::size_t index) : wb_(wb), index_(index) { } @@ -423,6 +428,11 @@ worksheet workbook::create_sheet(const std::string &title) return ws; } +encoding workbook::get_encoding() const +{ + return d_->encoding_; +} + workbook::iterator workbook::begin() { return iterator(*this, 0); @@ -433,6 +443,16 @@ workbook::iterator workbook::end() return iterator(*this, d_->worksheets_.size()); } +workbook::const_iterator workbook::begin() const +{ + return cbegin(); +} + +workbook::const_iterator workbook::end() const +{ + return cend(); +} + workbook::const_iterator workbook::cbegin() const { return const_iterator(*this, 0); @@ -681,39 +701,78 @@ const font &workbook::get_font(std::size_t font_id) const std::size_t workbook::set_font(const font &font_, std::size_t style_id) { auto match = std::find(d_->fonts_.begin(), d_->fonts_.end(), font_); - std::size_t font_index = 0; + std::size_t font_id = 0; if (match == d_->fonts_.end()) { d_->fonts_.push_back(font_); - font_index = d_->fonts_.size() - 1; + font_id = d_->fonts_.size() - 1; } else { - font_index = static_cast(match - d_->fonts_.begin()); + font_id = match - d_->fonts_.begin(); } - auto existing_style = d_->styles_[style_id]; + if (d_->styles_.empty()) + { + style new_style; - if (font_index == existing_style.font_id_) + new_style.id_ = 0; + new_style.border_id_ = 0; + new_style.fill_id_ = 0; + new_style.font_id_ = font_id; + new_style.font_apply_ = true; + new_style.number_format_id_ = 0; + + if (d_->borders_.empty()) + { + d_->borders_.push_back(new_style.get_border()); + } + + if (d_->fills_.empty()) + { + d_->fills_.push_back(new_style.get_fill()); + } + + if (d_->number_formats_.empty()) + { + d_->number_formats_.push_back(new_style.get_number_format()); + } + + d_->styles_.push_back(new_style); + + return 0; + } + + // If the style is unchanged, just return it. + auto &existing_style = d_->styles_[style_id]; + existing_style.font_apply_ = true; + + if (font_id == existing_style.font_id_) { // no change return style_id; } + // Make a new style with this format. auto new_style = existing_style; - new_style.font_id_ = font_index; + new_style.font_id_ = font_id; + new_style.font_ = font_; + + // Check if the new style is already applied to a different cell. If so, reuse it. auto style_match = std::find(d_->styles_.begin(), d_->styles_.end(), new_style); if (style_match != d_->styles_.end()) { - return static_cast(style_match - d_->styles_.begin()); + return style_match->get_id(); } + // No match found, so add it. + new_style.id_ = d_->styles_.size(); d_->styles_.push_back(new_style); - return d_->styles_.size() - 1; + return new_style.id_; } const fill &workbook::get_fill(std::size_t fill_id) const @@ -766,6 +825,7 @@ bool workbook::get_quote_prefix(std::size_t style_id) const return d_->styles_[style_id].quote_prefix_; } +//TODO: this is terrible! std::size_t workbook::set_number_format(const xlnt::number_format &format, std::size_t style_id) { auto match = std::find(d_->number_formats_.begin(), d_->number_formats_.end(), format);