diff --git a/docs/source/codecvt.rst b/docs/source/codecvt.rst index e1fb8702..4633f692 100644 --- a/docs/source/codecvt.rst +++ b/docs/source/codecvt.rst @@ -7,7 +7,7 @@ because this is surprisingly hard using standard C++ The ```` header is no longer used and sol2 now converts utf8, utf16, and utf32 with internal routines. If you have a problem with the transcoding, please `file an issue report`_. -``std::(w)string(u16/u32)`` are assumed to be in the platform's native wide (for ``wstring``) or unicode format. Lua canonically stores its string literals as utf8 and embraces utf8, albeit its storage is simply a sequence of bytes that are also null-terminated (it is also counted and the size is kept around, so embedded nulls can be used in the string). Therefore, if you need to interact with the unicode or wide alternatives of strings, runtime conversions are performed from the (assumed) utf8 string data into other forms. These conversions check for well-formed UTF, and will error if they are not when converting. +``std::(w)string(u16/u32)`` are assumed to be in the platform's native wide (for ``wstring``) or unicode format. Lua canonically stores its string literals as utf8 and embraces utf8, albeit its storage is simply a sequence of bytes that are also null-terminated (it is also counted and the size is kept around, so embedded nulls can be used in the string). Therefore, if you need to interact with the unicode or wide alternatives of strings, runtime conversions are performed from the (assumed) utf8 string data into other forms. These conversions check for well-formed UTF, and will replace ill-formed characters with the unicode replacement codepoint, 0xFFFD. Note that we cannot give you ``string_view``s to utf16 or utf32 strings: Lua does not hold them in memory this way. You can perhaps do your own customization to provide for this if need be. Remember that Lua stores a counted sequence of bytes: serializing your string as bytes and pushing a string type into Lua's stack will work, though do not except any complex string routines or printing to behave nicely with your code. diff --git a/docs/source/functions.rst b/docs/source/functions.rst index 6aaac76a..69b00cbb 100644 --- a/docs/source/functions.rst +++ b/docs/source/functions.rst @@ -29,17 +29,7 @@ There are a number of examples dealing with functions and how they can be bound working with callables/lambdas ------------------------------ -To be explicit about wanting a struct to be interpreted as a function, use ``mytable.set_function( key, func_value );``. You can be explicit about wanting a function as well by using the :doc:`sol::as_function<../api/as_function>` call, which will wrap and identify your type as a function. - -.. note:: - - As of sol 2.18.1, the below - -.. note:: - - Function objects ``obj`` -- a struct with a ``return_type operator()( ... )`` member defined on them, like all C++ lambdas -- are not interpreted as functions when you use ``set`` for ``mytable.set( key, value )`` and ``state.create_table(_with)( ... )``. This only happens automagically with ``mytable[key] = obj``. - - Note that this also applies to calling functions, for example: ``my_state["table"]["sort"]( some_table, sorting_object );``. +To be explicit about wanting a struct to be interpreted as a function, use ``mytable.set_function( key, func_value );``. You can also use the :doc:`sol::as_function<../api/as_function>` call, which will wrap and identify your type as a function. Furthermore, it is important to know that lambdas without a specified return type (and a non-const, non-reference-qualified ``auto``) will decay return values. To capture or return references explicitly, use ``decltype(auto)`` or specify the return type **exactly** as desired: diff --git a/single/sol/sol.hpp b/single/sol/sol.hpp index 85f158d7..16b49547 100644 --- a/single/sol/sol.hpp +++ b/single/sol/sol.hpp @@ -20,8 +20,8 @@ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // This file was generated with a script. -// Generated 2018-02-23 21:59:31.750406 UTC -// This header was generated with sol v2.19.4 (revision b60132e) +// Generated 2018-02-24 23:50:10.791344 UTC +// This header was generated with sol v2.19.4 (revision 22c41d9) // https://github.com/ThePhD/sol2 #ifndef SOL_SINGLE_INCLUDE_HPP @@ -8296,7 +8296,7 @@ namespace sol { // Everything here was lifted pretty much straight out of // ogonek, because fuck figuring it out= namespace unicode { - enum error_code { + enum class error_code { ok = 0, invalid_code_point, invalid_code_unit, @@ -8388,6 +8388,7 @@ namespace sol { static constexpr int lead_surrogate_bitmask = 0xFFC00; static constexpr int trail_surrogate_bitmask = 0x3FF; static constexpr int lead_shifted_bits = 10; + static constexpr char32_t replacement = 0xFFFD; static char32_t combine_surrogates(char16_t lead, char16_t trail) { auto hi = lead - first_lead_surrogate; @@ -9022,6 +9023,24 @@ namespace stack { template struct getter> { + template + static void convert(const char* strb, const char* stre, F&& f) { + char32_t cp = 0; + for (const char* strtarget = strb; strtarget < stre;) { + auto dr = unicode::utf8_to_code_point(strtarget, stre); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + ++strtarget; + } + else { + cp = dr.codepoint; + strtarget = dr.next; + } + auto er = unicode::code_point_to_utf16(cp); + f(er); + } + } + template static S get_into(lua_State* L, int index, record& tracking) { typedef typename S::value_type Ch; @@ -9033,22 +9052,18 @@ namespace stack { std::size_t needed_size = 0; const char* strb = utf8p; const char* stre = utf8p + len; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + auto count_units = [&needed_size](const unicode::encoded_result er) { needed_size += er.code_units_size; - strtarget = dr.next; - } + }; + convert(strb, stre, count_units); S r(needed_size, static_cast(0)); r.resize(needed_size); Ch* target = &r[0]; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + auto copy_units = [&target](const unicode::encoded_result er) { std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch)); - strtarget = dr.next; target += er.code_units_size; - } + }; + convert(strb, stre, copy_units); return r; } @@ -9059,6 +9074,24 @@ namespace stack { template struct getter> { + template + static void convert(const char* strb, const char* stre, F&& f) { + char32_t cp = 0; + for (const char* strtarget = strb; strtarget < stre;) { + auto dr = unicode::utf8_to_code_point(strtarget, stre); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + ++strtarget; + } + else { + cp = dr.codepoint; + strtarget = dr.next; + } + auto er = unicode::code_point_to_utf32(cp); + f(er); + } + } + template static S get_into(lua_State* L, int index, record& tracking) { typedef typename S::value_type Ch; @@ -9070,22 +9103,18 @@ namespace stack { std::size_t needed_size = 0; const char* strb = utf8p; const char* stre = utf8p + len; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + auto count_units = [&needed_size](const unicode::encoded_result er) { needed_size += er.code_units_size; - strtarget = dr.next; - } + }; + convert(strb, stre, count_units); S r(needed_size, static_cast(0)); r.resize(needed_size); Ch* target = &r[0]; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + auto copy_units = [&target](const unicode::encoded_result er) { std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch)); - strtarget = dr.next; target += er.code_units_size; - } + }; + convert(strb, stre, copy_units); return r; } @@ -9100,8 +9129,15 @@ namespace stack { string_view utf8 = stack::get(L, index, tracking); const char* strb = utf8.data(); const char* stre = utf8.data() + utf8.size(); + char32_t cp = 0; auto dr = unicode::utf8_to_code_point(strb, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf16(cp); return er.code_units[0]; } }; @@ -9112,8 +9148,15 @@ namespace stack { string_view utf8 = stack::get(L, index, tracking); const char* strb = utf8.data(); const char* stre = utf8.data() + utf8.size(); + char32_t cp = 0; auto dr = unicode::utf8_to_code_point(strb, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf32(cp); return er.code_units[0]; } }; @@ -10197,9 +10240,16 @@ namespace stack { struct pusher { static int convert_into(lua_State* L, char* start, std::size_t, const char16_t* strb, const char16_t* stre) { char* target = start; + char32_t cp = 0; for (const char16_t* strtarget = strb; strtarget < stre;) { auto dr = unicode::utf16_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf8(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf8(cp); const char* utf8data = er.code_units.data(); std::memcpy(target, utf8data, er.code_units_size); target += er.code_units_size; @@ -10270,9 +10320,16 @@ namespace stack { struct pusher { static int convert_into(lua_State* L, char* start, std::size_t, const char32_t* strb, const char32_t* stre) { char* target = start; + char32_t cp = 0; for (const char32_t* strtarget = strb; strtarget < stre;) { auto dr = unicode::utf32_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf8(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf8(cp); const char* data = er.code_units.data(); std::memcpy(target, data, er.code_units_size); target += er.code_units_size; diff --git a/single/sol/sol_forward.hpp b/single/sol/sol_forward.hpp index 7f6b4322..d2acb13d 100644 --- a/single/sol/sol_forward.hpp +++ b/single/sol/sol_forward.hpp @@ -20,8 +20,8 @@ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // This file was generated with a script. -// Generated 2018-02-23 21:59:32.038059 UTC -// This header was generated with sol v2.19.4 (revision b60132e) +// Generated 2018-02-24 23:50:11.000275 UTC +// This header was generated with sol v2.19.4 (revision 22c41d9) // https://github.com/ThePhD/sol2 #ifndef SOL_SINGLE_INCLUDE_FORWARD_HPP diff --git a/sol/stack_get.hpp b/sol/stack_get.hpp index ab2b03d4..32566711 100644 --- a/sol/stack_get.hpp +++ b/sol/stack_get.hpp @@ -467,6 +467,24 @@ namespace stack { template struct getter> { + template + static void convert(const char* strb, const char* stre, F&& f) { + char32_t cp = 0; + for (const char* strtarget = strb; strtarget < stre;) { + auto dr = unicode::utf8_to_code_point(strtarget, stre); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + ++strtarget; + } + else { + cp = dr.codepoint; + strtarget = dr.next; + } + auto er = unicode::code_point_to_utf16(cp); + f(er); + } + } + template static S get_into(lua_State* L, int index, record& tracking) { typedef typename S::value_type Ch; @@ -478,22 +496,18 @@ namespace stack { std::size_t needed_size = 0; const char* strb = utf8p; const char* stre = utf8p + len; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + auto count_units = [&needed_size](const unicode::encoded_result er) { needed_size += er.code_units_size; - strtarget = dr.next; - } + }; + convert(strb, stre, count_units); S r(needed_size, static_cast(0)); r.resize(needed_size); Ch* target = &r[0]; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + auto copy_units = [&target](const unicode::encoded_result er) { std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch)); - strtarget = dr.next; target += er.code_units_size; - } + }; + convert(strb, stre, copy_units); return r; } @@ -504,6 +518,24 @@ namespace stack { template struct getter> { + template + static void convert(const char* strb, const char* stre, F&& f) { + char32_t cp = 0; + for (const char* strtarget = strb; strtarget < stre;) { + auto dr = unicode::utf8_to_code_point(strtarget, stre); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + ++strtarget; + } + else { + cp = dr.codepoint; + strtarget = dr.next; + } + auto er = unicode::code_point_to_utf32(cp); + f(er); + } + } + template static S get_into(lua_State* L, int index, record& tracking) { typedef typename S::value_type Ch; @@ -515,22 +547,18 @@ namespace stack { std::size_t needed_size = 0; const char* strb = utf8p; const char* stre = utf8p + len; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + auto count_units = [&needed_size](const unicode::encoded_result er) { needed_size += er.code_units_size; - strtarget = dr.next; - } + }; + convert(strb, stre, count_units); S r(needed_size, static_cast(0)); r.resize(needed_size); Ch* target = &r[0]; - for (const char* strtarget = strb; strtarget < stre;) { - auto dr = unicode::utf8_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + auto copy_units = [&target](const unicode::encoded_result er) { std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch)); - strtarget = dr.next; target += er.code_units_size; - } + }; + convert(strb, stre, copy_units); return r; } @@ -545,8 +573,15 @@ namespace stack { string_view utf8 = stack::get(L, index, tracking); const char* strb = utf8.data(); const char* stre = utf8.data() + utf8.size(); + char32_t cp = 0; auto dr = unicode::utf8_to_code_point(strb, stre); - auto er = unicode::code_point_to_utf16(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf16(cp); return er.code_units[0]; } }; @@ -557,8 +592,15 @@ namespace stack { string_view utf8 = stack::get(L, index, tracking); const char* strb = utf8.data(); const char* stre = utf8.data() + utf8.size(); + char32_t cp = 0; auto dr = unicode::utf8_to_code_point(strb, stre); - auto er = unicode::code_point_to_utf32(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf32(cp); return er.code_units[0]; } }; diff --git a/sol/stack_push.hpp b/sol/stack_push.hpp index 4e27c23e..e17d56c9 100644 --- a/sol/stack_push.hpp +++ b/sol/stack_push.hpp @@ -692,9 +692,16 @@ namespace stack { struct pusher { static int convert_into(lua_State* L, char* start, std::size_t, const char16_t* strb, const char16_t* stre) { char* target = start; + char32_t cp = 0; for (const char16_t* strtarget = strb; strtarget < stre;) { auto dr = unicode::utf16_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf8(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf8(cp); const char* utf8data = er.code_units.data(); std::memcpy(target, utf8data, er.code_units_size); target += er.code_units_size; @@ -765,9 +772,16 @@ namespace stack { struct pusher { static int convert_into(lua_State* L, char* start, std::size_t, const char32_t* strb, const char32_t* stre) { char* target = start; + char32_t cp = 0; for (const char32_t* strtarget = strb; strtarget < stre;) { auto dr = unicode::utf32_to_code_point(strtarget, stre); - auto er = unicode::code_point_to_utf8(dr.codepoint); + if (dr.error != unicode::error_code::ok) { + cp = unicode::unicode_detail::replacement; + } + else { + cp = dr.codepoint; + } + auto er = unicode::code_point_to_utf8(cp); const char* data = er.code_units.data(); std::memcpy(target, data, er.code_units_size); target += er.code_units_size; diff --git a/sol/unicode.hpp b/sol/unicode.hpp index 3a549dd3..7d997fd4 100644 --- a/sol/unicode.hpp +++ b/sol/unicode.hpp @@ -8,7 +8,7 @@ namespace sol { // Everything here was lifted pretty much straight out of // ogonek, because fuck figuring it out= namespace unicode { - enum error_code { + enum class error_code { ok = 0, invalid_code_point, invalid_code_unit, @@ -100,6 +100,7 @@ namespace sol { static constexpr int lead_surrogate_bitmask = 0xFFC00; static constexpr int trail_surrogate_bitmask = 0x3FF; static constexpr int lead_shifted_bits = 10; + static constexpr char32_t replacement = 0xFFFD; static char32_t combine_surrogates(char16_t lead, char16_t trail) { auto hi = lead - first_lead_surrogate;