Add shiny new unicode methods to replace shitty codecvt. It needs some work to be safer but this will work for now

string_view to avoid allocations added to a few more places
stack proxy's unused members were removed
container_traits now has the index_adjustment function added onto it.
Fixes #571
Fixes #574
Fixes #575
This commit is contained in:
ThePhD 2018-02-03 01:29:06 -05:00
parent f2d4150934
commit f48ba8b101
13 changed files with 604 additions and 265 deletions

View File

@ -576,7 +576,8 @@ namespace sol {
}
static error_result get_category(std::input_iterator_tag, lua_State* L, T& self, K& key) {
if (key < 1) {
key += deferred_traits::index_adjustment(L, self);
if (key < 0) {
return stack::push(L, lua_nil);
}
auto it = deferred_traits::begin(L, self);
@ -584,7 +585,7 @@ namespace sol {
if (it == e) {
return stack::push(L, lua_nil);
}
while (key > 1) {
while (key > 0) {
--key;
++it;
if (it == e) {
@ -596,10 +597,10 @@ namespace sol {
static error_result get_category(std::random_access_iterator_tag, lua_State* L, T& self, K& key) {
std::ptrdiff_t len = static_cast<std::ptrdiff_t>(size_start(L, self));
if (key < 1 || key > len) {
key += deferred_traits::index_adjustment(L, self);
if (key < 0 || key >= len) {
return stack::push(L, lua_nil);
}
--key;
auto it = std::next(deferred_traits::begin(L, self), key);
return get_associative(is_associative(), L, it);
}
@ -650,14 +651,15 @@ namespace sol {
static error_result set_category(std::input_iterator_tag, lua_State* L, T& self, stack_object okey, stack_object value) {
decltype(auto) key = okey.as<K>();
key += deferred_traits::index_adjustment(L, self);
auto e = deferred_traits::end(L, self);
auto it = deferred_traits::begin(L, self);
auto backit = it;
for (; key > 1 && it != e; --key, ++it) {
for (; key > 0 && it != e; --key, ++it) {
backit = it;
}
if (it == e) {
if (key == 1) {
if (key == 0) {
return add_copyable(is_copyable(), L, self, std::move(value), meta::has_insert_after<T>::value ? backit : it);
}
return error_result("out of bounds (too big) for set on '%s'", detail::demangle<T>().c_str());
@ -667,10 +669,10 @@ namespace sol {
static error_result set_category(std::random_access_iterator_tag, lua_State* L, T& self, stack_object okey, stack_object value) {
decltype(auto) key = okey.as<K>();
if (key < 1) {
if (key <= 0) {
return error_result("sol: out of bounds (too small) for set on '%s'", detail::demangle<T>().c_str());
}
--key;
key += deferred_traits::index_adjustment(L, self);
std::ptrdiff_t len = static_cast<std::ptrdiff_t>(size_start(L, self));
if (key == len) {
return add_copyable(is_copyable(), L, self, std::move(value));
@ -888,7 +890,7 @@ namespace sol {
static error_result insert_lookup(std::false_type, lua_State* L, T& self, stack_object where, stack_object value) {
auto it = deferred_traits::begin(L, self);
auto key = where.as<K>();
--key;
key += deferred_traits::index_adjustment(L, self);
std::advance(it, key);
self.insert(it, value.as<V>());
return {};
@ -898,7 +900,7 @@ namespace sol {
auto key = where.as<K>();
auto backit = self.before_begin();
{
--key;
key += deferred_traits::index_adjustment(L, self);
auto e = deferred_traits::end(L, self);
for (auto it = deferred_traits::begin(L, self); key > 0; ++backit, ++it, --key) {
if (backit == e) {
@ -932,7 +934,7 @@ namespace sol {
static error_result erase_integral(std::true_type, lua_State* L, T& self, K& key) {
auto it = deferred_traits::begin(L, self);
--key;
key += deferred_traits::index_adjustment(L, self);
std::advance(it, key);
self.erase(it);
@ -965,7 +967,7 @@ namespace sol {
static error_result erase_after_has(std::true_type, lua_State* L, T& self, K& key) {
auto backit = self.before_begin();
{
--key;
key += deferred_traits::index_adjustment(L, self);
auto e = deferred_traits::end(L, self);
for (auto it = deferred_traits::begin(L, self); key > 0; ++backit, ++it, --key) {
if (backit == e) {
@ -1182,6 +1184,10 @@ namespace sol {
return stack::push(L, empty_start(L, self));
}
static std::ptrdiff_t index_adjustment(lua_State*, T&) {
return static_cast<std::ptrdiff_t>(-1);
}
static int pairs(lua_State* L) {
typedef meta::any<is_associative, meta::all<is_lookup, meta::neg<is_matched_lookup>>> is_assoc;
return pairs_associative<false>(is_assoc(), L);
@ -1275,10 +1281,10 @@ namespace sol {
static int get(lua_State* L) {
T& self = get_src(L);
std::ptrdiff_t idx = stack::get<std::ptrdiff_t>(L, 2);
if (idx > static_cast<std::ptrdiff_t>(std::extent<T>::value) || idx < 1) {
idx += deferred_traits::index_adjustment(L, self);
if (idx >= static_cast<std::ptrdiff_t>(std::extent<T>::value) || idx < 0) {
return stack::push(L, lua_nil);
}
--idx;
return stack::push_reference(L, detail::deref(self[idx]));
}
@ -1289,13 +1295,13 @@ namespace sol {
static int set(lua_State* L) {
T& self = get_src(L);
std::ptrdiff_t idx = stack::get<std::ptrdiff_t>(L, 2);
if (idx > static_cast<std::ptrdiff_t>(std::extent<T>::value)) {
idx += deferred_traits::index_adjustment(L, self);
if (idx >= static_cast<std::ptrdiff_t>(std::extent<T>::value)) {
return luaL_error(L, "sol: index out of bounds (too big) for set on '%s'", detail::demangle<T>().c_str());
}
if (idx < 1) {
if (idx < 0) {
return luaL_error(L, "sol: index out of bounds (too small) for set on '%s'", detail::demangle<T>().c_str());
}
--idx;
self[idx] = stack::get<value_type>(L, 3);
return 0;
}
@ -1328,6 +1334,10 @@ namespace sol {
return pairs(L);
}
static std::ptrdiff_t index_adjustment(lua_State*, T&) {
return -1;
}
static iterator begin(lua_State*, T& self) {
return std::addressof(self[0]);
}

View File

@ -42,21 +42,6 @@
#endif // compiler-specific checks
#endif // C++17 only
#if defined(_WIN32) || defined(_MSC_VER)
#ifndef SOL_CODECVT_SUPPORT
#define SOL_CODECVT_SUPPORT 1
#endif // sol codecvt support
#elif defined(__GNUC__)
#if __GNUC__ >= 5
#ifndef SOL_CODECVT_SUPPORT
#define SOL_CODECVT_SUPPORT 1
#endif // codecvt support
#endif // g++ 5.x.x (MinGW too)
#else
// Clang sucks and doesn't really utilize codecvt support,
// not without checking the library versions explicitly (and we're not gonna do that)
#endif // Windows/VC++ vs. g++ vs Others
#ifdef _MSC_VER
#if defined(_DEBUG) && !defined(NDEBUG)
@ -187,4 +172,8 @@
#endif // SOL_UNORDERED_MAP_COMPATIBLE_HASH
#endif // Boost has unordered_map with Compatible Key and CompatibleHash
#ifndef SOL_STACK_STRING_OPTIMIZATION_SIZE
#define SOL_STACK_STRING_OPTIMIZATION_SIZE 1024
#endif // Optimized conversion routines using a KB or so off the stack
#endif // SOL_FEATURE_TEST_HPP

View File

@ -29,15 +29,13 @@
#include "inheritance.hpp"
#include "overload.hpp"
#include "error.hpp"
#include "unicode.hpp"
#include <memory>
#include <functional>
#include <utility>
#include <cstdlib>
#include <cmath>
#ifdef SOL_CODECVT_SUPPORT
#include <codecvt>
#include <locale>
#endif // codecvt header support
#ifdef SOL_CXX17_FEATURES
#include <string_view>
#include <variant>
@ -444,111 +442,136 @@ namespace stack {
}
};
template <>
struct getter<string_view> {
template <typename Traits>
struct getter<basic_string_view<char, Traits>> {
static string_view get(lua_State* L, int index, record& tracking) {
tracking.use(1);
size_t sz;
const char* str = lua_tolstring(L, index, &sz);
return string_view(str, sz);
return basic_string_view<char, Traits>(str, sz);
}
};
#ifdef SOL_CODECVT_SUPPORT
template <>
struct getter<std::wstring> {
static std::wstring get(lua_State* L, int index, record& tracking) {
template <typename Traits, typename Al>
struct getter<std::basic_string<wchar_t, Traits, Al>> {
static std::basic_string<wchar_t, Traits, Al> get(lua_State* L, int index, record& tracking) {
typedef std::conditional_t<sizeof(wchar_t) == 2, char16_t, char32_t> Ch;
typedef std::allocator_traits<Al>::rebind_alloc<Ch> ChAl;
typedef std::char_traits<Ch> ChTraits;
getter<std::basic_string<Ch, ChTraits, ChAl>> g;
(void)g;
return g.get_into<std::basic_string<wchar_t, Traits, Al>>(L, index, tracking);
}
};
template <typename Traits, typename Al>
struct getter<std::basic_string<char16_t, Traits, Al>> {
template <typename S>
static S get_into(lua_State* L, int index, record& tracking) {
typedef typename S::value_type Ch;
tracking.use(1);
size_t len;
auto str = lua_tolstring(L, index, &len);
auto utf8p = lua_tolstring(L, index, &len);
if (len < 1)
return std::wstring();
if (sizeof(wchar_t) == 2) {
thread_local std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> convert;
std::wstring r = convert.from_bytes(str, str + len);
#if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ < 7
// Thanks, MinGW and libstdc++, for introducing this absolutely asinine bug
// https://sourceforge.net/p/mingw-w64/bugs/538/
// http://chat.stackoverflow.com/transcript/message/32271369#32271369
for (auto& c : r) {
uint8_t* b = reinterpret_cast<uint8_t*>(&c);
std::swap(b[0], b[1]);
return S();
std::size_t needed_size = 0;
const char* strb = utf8p;
const char* stre = utf8p + len;
for (const char* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf8_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf16(dr.codepoint);
needed_size += er.code_units_size;
strtarget = dr.next;
}
S r(needed_size, static_cast<Ch>(0));
r.resize(needed_size);
Ch* target = &r[0];
for (const char* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf8_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf16(dr.codepoint);
std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch));
strtarget = dr.next;
target += er.code_units_size;
}
#endif
return r;
}
thread_local std::wstring_convert<std::codecvt_utf8<wchar_t>> convert;
std::wstring r = convert.from_bytes(str, str + len);
return r;
static std::basic_string<char16_t, Traits, Al> get(lua_State* L, int index, record& tracking) {
return get_into<std::basic_string<char16_t, Traits, Al>>(L, index, tracking);
}
};
template <>
struct getter<std::u16string> {
static std::u16string get(lua_State* L, int index, record& tracking) {
template <typename Traits, typename Al>
struct getter<std::basic_string<char32_t, Traits, Al>> {
template <typename S>
static S get_into(lua_State* L, int index, record& tracking) {
typedef typename S::value_type Ch;
tracking.use(1);
size_t len;
auto str = lua_tolstring(L, index, &len);
auto utf8p = lua_tolstring(L, index, &len);
if (len < 1)
return std::u16string();
#ifdef _MSC_VER
thread_local std::wstring_convert<std::codecvt_utf8_utf16<int16_t>, int16_t> convert;
auto intd = convert.from_bytes(str, str + len);
std::u16string r(intd.size(), '\0');
std::memcpy(&r[0], intd.data(), intd.size() * sizeof(char16_t));
#else
thread_local std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
std::u16string r = convert.from_bytes(str, str + len);
#endif // VC++ is a shit
return S();
std::size_t needed_size = 0;
const char* strb = utf8p;
const char* stre = utf8p + len;
for (const char* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf8_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf32(dr.codepoint);
needed_size += er.code_units_size;
strtarget = dr.next;
}
S r(needed_size, static_cast<Ch>(0));
r.resize(needed_size);
Ch* target = &r[0];
for (const char* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf8_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf32(dr.codepoint);
std::memcpy(target, er.code_units.data(), er.code_units_size * sizeof(Ch));
strtarget = dr.next;
target += er.code_units_size;
}
return r;
}
};
template <>
struct getter<std::u32string> {
static std::u32string get(lua_State* L, int index, record& tracking) {
tracking.use(1);
size_t len;
auto str = lua_tolstring(L, index, &len);
if (len < 1)
return std::u32string();
#ifdef _MSC_VER
thread_local std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t> convert;
auto intd = convert.from_bytes(str, str + len);
std::u32string r(intd.size(), '\0');
std::memcpy(&r[0], intd.data(), r.size() * sizeof(char32_t));
#else
thread_local std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
std::u32string r = convert.from_bytes(str, str + len);
#endif // VC++ is a shit
return r;
}
};
template <>
struct getter<wchar_t> {
static wchar_t get(lua_State* L, int index, record& tracking) {
auto str = getter<std::wstring>{}.get(L, index, tracking);
return str.size() > 0 ? str[0] : wchar_t(0);
static std::basic_string<char32_t, Traits, Al> get(lua_State* L, int index, record& tracking) {
return get_into<std::basic_string<char32_t, Traits, Al>>(L, index, tracking);
}
};
template <>
struct getter<char16_t> {
static char16_t get(lua_State* L, int index, record& tracking) {
auto str = getter<std::u16string>{}.get(L, index, tracking);
return str.size() > 0 ? str[0] : char16_t(0);
string_view utf8 = stack::get<string_view>(L, index, tracking);
const char* strb = utf8.data();
const char* stre = utf8.data() + utf8.size();
auto dr = unicode::utf8_to_code_point(strb, stre);
auto er = unicode::code_point_to_utf16(dr.codepoint);
return er.code_units[0];
}
};
template <>
struct getter<char32_t> {
static char32_t get(lua_State* L, int index, record& tracking) {
auto str = getter<std::u32string>{}.get(L, index, tracking);
return str.size() > 0 ? str[0] : char32_t(0);
string_view utf8 = stack::get<string_view>(L, index, tracking);
const char* strb = utf8.data();
const char* stre = utf8.data() + utf8.size();
auto dr = unicode::utf8_to_code_point(strb, stre);
auto er = unicode::code_point_to_utf32(dr.codepoint);
return er.code_units[0];
}
};
template <>
struct getter<wchar_t> {
static wchar_t get(lua_State* L, int index, record& tracking) {
typedef std::conditional_t<sizeof(wchar_t) == 2, char16_t, char32_t> Ch;
getter<Ch> g;
(void)g;
auto c = g.get(L, index, tracking);
return static_cast<wchar_t>(c);
}
};
#endif // codecvt header support
template <>
struct getter<meta_function> {

View File

@ -28,10 +28,6 @@
namespace sol {
struct stack_proxy : public stack_proxy_base {
private:
lua_State* L;
int index;
public:
stack_proxy()
: stack_proxy_base() {

View File

@ -29,15 +29,12 @@
#include "optional.hpp"
#include "usertype_traits.hpp"
#include "filters.hpp"
#include "unicode.hpp"
#include <memory>
#include <type_traits>
#include <cassert>
#include <limits>
#ifdef SOL_CODECVT_SUPPORT
#include <codecvt>
#include <locale>
#endif // codecvt support
#ifdef SOL_CXX17_FEATURES
#include <string_view>
#include <variant>
@ -388,6 +385,7 @@ namespace stack {
return 1;
}
};
#ifdef SOL_NOEXCEPT_FUNCTION_TYPE
template <>
struct pusher<std::remove_pointer_t<detail::lua_CFunction_noexcept>> {
@ -590,26 +588,26 @@ namespace stack {
}
};
template <>
struct pusher<std::string> {
static int push(lua_State* L, const std::string& str) {
template <typename Traits, typename Al>
struct pusher<std::basic_string<char, Traits, Al>> {
static int push(lua_State* L, const std::basic_string<char, Traits, Al>& str) {
lua_pushlstring(L, str.c_str(), str.size());
return 1;
}
static int push(lua_State* L, const std::string& str, std::size_t sz) {
static int push(lua_State* L, const std::basic_string<char, Traits, Al>& str, std::size_t sz) {
lua_pushlstring(L, str.c_str(), sz);
return 1;
}
};
template <>
struct pusher<string_view> {
static int push(lua_State* L, const string_view& sv) {
template <typename Ch, typename Traits>
struct pusher<basic_string_view<Ch, Traits>> {
static int push(lua_State* L, const basic_string_view<Ch, Traits>& sv) {
return stack::push(L, sv.data(), sv.length());
}
static int push(lua_State* L, const string_view& sv, std::size_t n) {
static int push(lua_State* L, const basic_string_view<Ch, Traits>& sv, std::size_t n) {
return stack::push(L, sv.data(), n);
}
};
@ -647,7 +645,6 @@ namespace stack {
}
};
#ifdef SOL_CODECVT_SUPPORT
template <>
struct pusher<const wchar_t*> {
static int push(lua_State* L, const wchar_t* wstr) {
@ -660,13 +657,13 @@ namespace stack {
static int push(lua_State* L, const wchar_t* strb, const wchar_t* stre) {
if (sizeof(wchar_t) == 2) {
thread_local std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> convert;
std::string u8str = convert.to_bytes(strb, stre);
return stack::push(L, u8str);
const char16_t* sb = reinterpret_cast<const char16_t*>(strb);
const char16_t* se = reinterpret_cast<const char16_t*>(stre);
return stack::push(L, sb, se);
}
thread_local std::wstring_convert<std::codecvt_utf8<wchar_t>> convert;
std::string u8str = convert.to_bytes(strb, stre);
return stack::push(L, u8str);
const char32_t* sb = reinterpret_cast<const char32_t*>(strb);
const char32_t* se = reinterpret_cast<const char32_t*>(stre);
return stack::push(L, sb, se);
}
};
@ -693,6 +690,20 @@ namespace stack {
template <>
struct pusher<const char16_t*> {
static int convert_into(lua_State* L, char* start, std::size_t, const char16_t* strb, const char16_t* stre) {
char* target = start;
for (const char16_t* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf16_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf8(dr.codepoint);
const char* utf8data = er.code_units.data();
std::memcpy(target, utf8data, er.code_units_size);
target += er.code_units_size;
strtarget = dr.next;
}
return stack::push(L, start, target);
}
static int push(lua_State* L, const char16_t* u16str) {
return push(L, u16str, std::char_traits<char16_t>::length(u16str));
}
@ -702,14 +713,30 @@ namespace stack {
}
static int push(lua_State* L, const char16_t* strb, const char16_t* stre) {
#ifdef _MSC_VER
thread_local std::wstring_convert<std::codecvt_utf8_utf16<int16_t>, int16_t> convert;
std::string u8str = convert.to_bytes(reinterpret_cast<const int16_t*>(strb), reinterpret_cast<const int16_t*>(stre));
#else
thread_local std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
std::string u8str = convert.to_bytes(strb, stre);
#endif // VC++ is a shit
return stack::push(L, u8str);
// TODO: use new unicode methods
// TODO: use new unicode methods
char sbo[SOL_STACK_STRING_OPTIMIZATION_SIZE];
// if our max string space is small enough, use SBO
// right off the bat
std::size_t max_possible_code_units = (stre - strb) * 4;
if (max_possible_code_units <= SOL_STACK_STRING_OPTIMIZATION_SIZE) {
return convert_into(L, sbo, max_possible_code_units, strb, stre);
}
// otherwise, we must manually count/check size
std::size_t needed_size = 0;
for (const char16_t* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf16_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf8(dr.codepoint);
needed_size += er.code_units_size;
strtarget = dr.next;
}
if (needed_size < SOL_STACK_STRING_OPTIMIZATION_SIZE) {
return convert_into(L, sbo, needed_size, strb, stre);
}
std::string u8str("", 0);
u8str.resize(needed_size);
char* target = &u8str[0];
return convert_into(L, target, needed_size, strb, stre);
}
};
@ -736,6 +763,19 @@ namespace stack {
template <>
struct pusher<const char32_t*> {
static int convert_into(lua_State* L, char* start, std::size_t, const char32_t* strb, const char32_t* stre) {
char* target = start;
for (const char32_t* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf32_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf8(dr.codepoint);
const char* data = er.code_units.data();
std::memcpy(target, data, er.code_units_size);
target += er.code_units_size;
strtarget = dr.next;
}
return stack::push(L, start, target);
}
static int push(lua_State* L, const char32_t* u32str) {
return push(L, u32str, u32str + std::char_traits<char32_t>::length(u32str));
}
@ -745,14 +785,29 @@ namespace stack {
}
static int push(lua_State* L, const char32_t* strb, const char32_t* stre) {
#ifdef _MSC_VER
thread_local std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t> convert;
std::string u8str = convert.to_bytes(reinterpret_cast<const int32_t*>(strb), reinterpret_cast<const int32_t*>(stre));
#else
thread_local std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
std::string u8str = convert.to_bytes(strb, stre);
#endif // VC++ is a shit
return stack::push(L, u8str);
// TODO: use new unicode methods
char sbo[SOL_STACK_STRING_OPTIMIZATION_SIZE];
// if our max string space is small enough, use SBO
// right off the bat
std::size_t max_possible_code_units = (stre - strb) * 4;
if (max_possible_code_units <= SOL_STACK_STRING_OPTIMIZATION_SIZE) {
return convert_into(L, sbo, max_possible_code_units, strb, stre);
}
// otherwise, we must manually count/check size
std::size_t needed_size = 0;
for (const char32_t* strtarget = strb; strtarget < stre;) {
auto dr = unicode::utf32_to_code_point(strtarget, stre);
auto er = unicode::code_point_to_utf8(dr.codepoint);
needed_size += er.code_units_size;
strtarget = dr.next;
}
if (needed_size < SOL_STACK_STRING_OPTIMIZATION_SIZE) {
return convert_into(L, sbo, needed_size, strb, stre);
}
std::string u8str("", 0);
u8str.resize(needed_size);
char* target = &u8str[0];
return convert_into(L, target, needed_size, strb, stre);
}
};
@ -814,7 +869,7 @@ namespace stack {
struct pusher<wchar_t> {
static int push(lua_State* L, wchar_t c) {
const wchar_t str[2] = { c, '\0' };
return stack::push(L, str, 1);
return stack::push(L, &str[0], 1);
}
};
@ -822,7 +877,7 @@ namespace stack {
struct pusher<char16_t> {
static int push(lua_State* L, char16_t c) {
const char16_t str[2] = { c, '\0' };
return stack::push(L, str, 1);
return stack::push(L, &str[0], 1);
}
};
@ -830,77 +885,43 @@ namespace stack {
struct pusher<char32_t> {
static int push(lua_State* L, char32_t c) {
const char32_t str[2] = { c, '\0' };
return stack::push(L, str, 1);
return stack::push(L, &str[0], 1);
}
};
template <>
struct pusher<std::wstring> {
static int push(lua_State* L, const std::wstring& wstr) {
return push(L, wstr.data(), wstr.size());
template <typename Traits, typename Al>
struct pusher<std::basic_string<wchar_t, Traits, Al>> {
static int push(lua_State* L, const std::basic_string<wchar_t, Traits, Al>& wstr) {
return push(L, wstr, wstr.size());
}
static int push(lua_State* L, const std::wstring& wstr, std::size_t sz) {
static int push(lua_State* L, const std::basic_string<wchar_t, Traits, Al>& wstr, std::size_t sz) {
return stack::push(L, wstr.data(), wstr.data() + sz);
}
};
template <>
struct pusher<std::u16string> {
static int push(lua_State* L, const std::u16string& u16str) {
template <typename Traits, typename Al>
struct pusher<std::basic_string<char16_t, Traits, Al>> {
static int push(lua_State* L, const std::basic_string<char16_t, Traits, Al>& u16str) {
return push(L, u16str, u16str.size());
}
static int push(lua_State* L, const std::u16string& u16str, std::size_t sz) {
static int push(lua_State* L, const std::basic_string<char16_t, Traits, Al>& u16str, std::size_t sz) {
return stack::push(L, u16str.data(), u16str.data() + sz);
}
};
template <>
struct pusher<std::u32string> {
static int push(lua_State* L, const std::u32string& u32str) {
template <typename Traits, typename Al>
struct pusher<std::basic_string<char32_t, Traits, Al>> {
static int push(lua_State* L, const std::basic_string<char32_t, Traits, Al>& u32str) {
return push(L, u32str, u32str.size());
}
static int push(lua_State* L, const std::u32string& u32str, std::size_t sz) {
static int push(lua_State* L, const std::basic_string<char32_t, Traits, Al>& u32str, std::size_t sz) {
return stack::push(L, u32str.data(), u32str.data() + sz);
}
};
template <>
struct pusher<wstring_view> {
static int push(lua_State* L, const wstring_view& sv) {
return stack::push(L, sv.data(), sv.length());
}
static int push(lua_State* L, const wstring_view& sv, std::size_t n) {
return stack::push(L, sv.data(), n);
}
};
template <>
struct pusher<u16string_view> {
static int push(lua_State* L, const u16string_view& sv) {
return stack::push(L, sv.data(), sv.length());
}
static int push(lua_State* L, const u16string_view& sv, std::size_t n) {
return stack::push(L, sv.data(), n);
}
};
template <>
struct pusher<u32string_view> {
static int push(lua_State* L, const u32string_view& sv) {
return stack::push(L, sv.data(), sv.length());
}
static int push(lua_State* L, const u32string_view& sv, std::size_t n) {
return stack::push(L, sv.data(), n);
}
};
#endif // codecvt Header Support
template <typename... Args>
struct pusher<std::tuple<Args...>> {
template <std::size_t... I, typename T>

View File

@ -647,13 +647,13 @@ namespace sol {
}
template <bool read_only = true, typename... Args>
state_view& new_enum(const std::string& name, Args&&... args) {
state_view& new_enum(const string_view& name, Args&&... args) {
global.new_enum<read_only>(name, std::forward<Args>(args)...);
return *this;
}
template <typename T, bool read_only = true>
state_view& new_enum(const std::string& name, std::initializer_list<std::pair<string_view, T>> items) {
state_view& new_enum(const string_view& name, std::initializer_list<std::pair<string_view, T>> items) {
global.new_enum<T, read_only>(name, std::move(items));
return *this;
}

View File

@ -37,6 +37,8 @@
namespace sol {
#ifdef SOL_CXX17_FEATURES
template <typename C, typename T = std::char_traits<C>>
using basic_string_view = std::basic_string_view<C, T>;
typedef std::string_view string_view;
typedef std::wstring_view wstring_view;
typedef std::u16string_view u16string_view;

View File

@ -26,6 +26,7 @@
#include "tuple.hpp"
#include "bind_traits.hpp"
#include "string_view.hpp"
#include <type_traits>
#include <cstdint>
@ -33,9 +34,7 @@
#include <functional>
#include <iterator>
#include <iosfwd>
#ifdef SOL_CXX17_FEATURES
#include <string_view>
#endif
namespace sol {
template <std::size_t I>
@ -553,6 +552,13 @@ namespace sol {
template <typename T>
struct is_matched_lookup : meta_detail::is_matched_lookup_impl<T, is_lookup<T>::value> {};
template <typename T>
using is_string_like = any<
is_specialization_of<std::basic_string, meta::unqualified_t<T>>,
is_specialization_of<basic_string_view, meta::unqualified_t<T>>,
meta::all<std::is_array<unqualified_t<T>>, meta::any_same<meta::unqualified_t<std::remove_all_extents_t<meta::unqualified_t<T>>>, char, char16_t, char32_t, wchar_t>>
>;
template <typename T>
using is_string_constructible = any<
meta::all<std::is_array<unqualified_t<T>>, std::is_same<meta::unqualified_t<std::remove_all_extents_t<meta::unqualified_t<T>>>, char>>,

View File

@ -805,35 +805,17 @@ namespace sol {
template <typename T>
struct is_container<std::initializer_list<T>> : std::false_type {};
template <>
struct is_container<std::string> : std::false_type {};
template <typename C, typename T, typename A>
struct is_container<std::basic_string<C, T, A>> : std::false_type {};
template <>
struct is_container<std::wstring> : std::false_type {};
template <>
struct is_container<std::u16string> : std::false_type {};
template <>
struct is_container<std::u32string> : std::false_type {};
#ifdef SOL_CXX17_FEATURES
template <>
struct is_container<std::string_view> : std::false_type {};
template <>
struct is_container<std::wstring_view> : std::false_type {};
template <>
struct is_container<std::u16string_view> : std::false_type {};
template <>
struct is_container<std::u32string_view> : std::false_type {};
#endif // C++ 17
template <typename C, typename T>
struct is_container<basic_string_view<C, T>> : std::false_type {};
template <typename T>
struct is_container<T,
std::enable_if_t<meta::has_begin_end<meta::unqualified_t<T>>::value && !is_initializer_list<meta::unqualified_t<T>>::value>> : std::true_type {};
struct is_container<T, std::enable_if_t<meta::has_begin_end<meta::unqualified_t<T>>::value
&& !is_initializer_list<meta::unqualified_t<T>>::value
&& !meta::is_string_like<meta::unqualified_t<T>>::value
>> : std::true_type {};
template <typename T>
struct is_container<T, std::enable_if_t<std::is_array<meta::unqualified_t<T>>::value && !meta::any_same<std::remove_all_extents_t<meta::unqualified_t<T>>, char, wchar_t, char16_t, char32_t>::value>> : std::true_type {};
@ -849,17 +831,11 @@ namespace sol {
template <typename T, typename = void>
struct lua_type_of : std::integral_constant<type, type::userdata> {};
template <>
struct lua_type_of<std::string> : std::integral_constant<type, type::string> {};
template <typename C, typename T, typename A>
struct lua_type_of<std::basic_string<C, T, A>> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<std::wstring> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<std::u16string> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<std::u32string> : std::integral_constant<type, type::string> {};
template <typename C, typename T>
struct lua_type_of<basic_string_view<C, T>> : std::integral_constant<type, type::string> {};
template <std::size_t N>
struct lua_type_of<char[N]> : std::integral_constant<type, type::string> {};
@ -1023,18 +999,6 @@ namespace sol {
template <>
struct lua_type_of<meta_function> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<string_view> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<wstring_view> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<u16string_view> : std::integral_constant<type, type::string> {};
template <>
struct lua_type_of<u32string_view> : std::integral_constant<type, type::string> {};
#ifdef SOL_CXX17_FEATURES
template <typename... Tn>
struct lua_type_of<std::variant<Tn...>> : std::integral_constant<type, type::poly> {};

289
sol/unicode.hpp Normal file
View File

@ -0,0 +1,289 @@
#pragma once
#include "string_view.hpp"
#include <array>
namespace sol {
// Everything here was lifted pretty much straight out of
// ogonek, because fuck figuring it out=
namespace unicode {
enum error_code {
ok = 0,
invalid_code_point,
invalid_code_unit,
invalid_leading_surrogate,
invalid_trailing_surrogate,
sequence_too_short,
overlong_sequence,
};
inline const string_view& to_string(error_code ec) {
const string_view arr[4] = {
"ok",
"invalid code points",
"invalid code unit",
"overlong sequence"
};
return arr[static_cast<std::size_t>(ec)];
}
template <typename It>
struct decoded_result {
error_code error;
char32_t codepoint;
It next;
};
template <typename C>
struct encoded_result {
error_code error;
std::size_t code_units_size;
std::array<C, 4> code_units;
};
struct unicode_detail {
// codepoint related
static constexpr char32_t last_code_point = 0x10FFFF;
static constexpr char32_t first_lead_surrogate = 0xD800;
static constexpr char32_t last_lead_surrogate = 0xDBFF;
static constexpr char32_t first_trail_surrogate = 0xDC00;
static constexpr char32_t last_trail_surrogate = 0xDFFF;
static constexpr char32_t first_surrogate = first_lead_surrogate;
static constexpr char32_t last_surrogate = last_trail_surrogate;
static constexpr bool is_lead_surrogate(char32_t u) {
return u >= first_lead_surrogate && u <= last_lead_surrogate;
}
static constexpr bool is_trail_surrogate(char32_t u) {
return u >= first_trail_surrogate && u <= last_trail_surrogate;
}
static constexpr bool is_surrogate(char32_t u) {
return u >= first_surrogate && u <= last_surrogate;
}
// utf8 related
static constexpr auto last_1byte_value = 0x7Fu;
static constexpr auto last_2byte_value = 0x7FFu;
static constexpr auto last_3byte_value = 0xFFFFu;
static constexpr auto start_2byte_mask = 0x80u;
static constexpr auto start_3byte_mask = 0xE0u;
static constexpr auto start_4byte_mask = 0xF0u;
static constexpr auto continuation_mask = 0xC0u;
static constexpr auto continuation_signature = 0x80u;
static constexpr int sequence_length(unsigned char b) {
return (b & start_2byte_mask) == 0 ? 1
: (b & start_3byte_mask) != start_3byte_mask ? 2
: (b & start_4byte_mask) != start_4byte_mask ? 3
: 4;
}
static constexpr char32_t decode(unsigned char b0, unsigned char b1) {
return ((b0 & 0x1F) << 6) | (b1 & 0x3F);
}
static constexpr char32_t decode(unsigned char b0, unsigned char b1, unsigned char b2) {
return ((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
}
static constexpr char32_t decode(unsigned char b0, unsigned char b1, unsigned char b2, unsigned char b3) {
return ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
}
// utf16 related
static constexpr char32_t last_bmp_value = 0xFFFF;
static constexpr char32_t normalizing_value = 0x10000;
static constexpr int lead_surrogate_bitmask = 0xFFC00;
static constexpr int trail_surrogate_bitmask = 0x3FF;
static constexpr int lead_shifted_bits = 10;
static char32_t combine_surrogates(char16_t lead, char16_t trail) {
auto hi = lead - first_lead_surrogate;
auto lo = trail - first_trail_surrogate;
return normalizing_value + ((hi << lead_shifted_bits) | lo);
}
};
inline encoded_result<char> code_point_to_utf8(char32_t codepoint) {
encoded_result<char> er;
er.error = error_code::ok;
if (codepoint <= unicode_detail::last_1byte_value) {
er.code_units_size = 1;
er.code_units = std::array<char, 4>{ static_cast<char>(codepoint) };
}
else if (codepoint <= unicode_detail::last_2byte_value) {
er.code_units_size = 2;
er.code_units = std::array<char, 4>{
static_cast<char>(0xC0 | ((codepoint & 0x7C0) >> 6)),
static_cast<char>(0x80 | (codepoint & 0x3F)),
};
}
else if (codepoint <= unicode_detail::last_3byte_value) {
er.code_units_size = 3;
er.code_units = std::array<char, 4>{
static_cast<char>(0xE0 | ((codepoint & 0xF000) >> 12)),
static_cast<char>(0x80 | ((codepoint & 0xFC0) >> 6)),
static_cast<char>(0x80 | (codepoint & 0x3F)),
};
}
else {
er.code_units_size = 4;
er.code_units = std::array<char, 4>{
static_cast<char>(0xF0 | ((codepoint & 0x1C0000) >> 18)),
static_cast<char>(0x80 | ((codepoint & 0x3F000) >> 12)),
static_cast<char>(0x80 | ((codepoint & 0xFC0) >> 6)),
static_cast<char>(0x80 | (codepoint & 0x3F)),
};
}
return er;
}
inline encoded_result<char16_t> code_point_to_utf16(char32_t codepoint) {
encoded_result<char16_t> er;
if (codepoint <= unicode_detail::last_bmp_value) {
er.code_units_size = 1;
er.code_units = std::array<char16_t, 4>{ static_cast<char16_t>(codepoint) };
er.error = error_code::ok;
}
else {
auto normal = codepoint - unicode_detail::normalizing_value;
auto lead = unicode_detail::first_lead_surrogate + ((normal & unicode_detail::lead_surrogate_bitmask) >> unicode_detail::lead_shifted_bits);
auto trail = unicode_detail::first_trail_surrogate + (normal & unicode_detail::trail_surrogate_bitmask);
er.code_units = std::array<char16_t, 4>{
static_cast<char16_t>(lead),
static_cast<char16_t>(trail)
};
er.code_units_size = 2;
er.error = error_code::ok;
}
return er;
}
inline encoded_result<char32_t> code_point_to_utf32(char32_t codepoint) {
encoded_result<char32_t> er;
er.code_units_size = 1;
er.code_units[0] = codepoint;
er.error = error_code::ok;
return er;
}
template <typename It>
inline decoded_result<It> utf8_to_code_point(It it, It) {
decoded_result<It> dr;
unsigned char b0 = *it;
std::size_t length = unicode_detail::sequence_length(b0);
if (length == 1) {
dr.codepoint = static_cast<char32_t>(b0);
dr.error = error_code::ok;
++it;
dr.next = it;
return dr;
}
auto is_invalid = [](unsigned char b) { return b == 0xC0 || b == 0xC1 || b > 0xF4; };
auto is_continuation = [](unsigned char b) {
return (b & unicode_detail::continuation_mask) == unicode_detail::continuation_signature;
};
if (is_invalid(b0) || is_continuation(b0)) {
dr.error = error_code::invalid_code_unit;
dr.next = it;
return dr;
}
++it;
std::array<unsigned char, 4> b;
b[0] = b0;
for (int i = 1; i < length; ++i) {
b[i] = *it;
if (!is_continuation(b[i])) {
dr.error = error_code::invalid_code_unit;
dr.next = it;
return dr;
}
++it;
}
char32_t decoded;
switch (length) {
case 2:
decoded = unicode_detail::decode(b[0], b[1]);
break;
case 3:
decoded = unicode_detail::decode(b[0], b[1], b[2]);
break;
default:
decoded = unicode_detail::decode(b[0], b[1], b[2], b[3]);
break;
}
auto is_overlong = [](char32_t u, std::size_t bytes) {
return u <= unicode_detail::last_1byte_value
|| (u <= unicode_detail::last_2byte_value && bytes > 2)
|| (u <= unicode_detail::last_3byte_value && bytes > 3);
};
if (is_overlong(decoded, length)) {
dr.error = error_code::overlong_sequence;
return dr;
}
if (unicode_detail::is_surrogate(decoded) || decoded > unicode_detail::last_code_point) {
dr.error = error_code::invalid_code_point;
return dr;
}
// then everything is fine
dr.codepoint = decoded;
dr.error = error_code::ok;
dr.next = it;
return dr;
}
template <typename It>
inline decoded_result<It> utf16_to_code_point(It it, It) {
decoded_result<It> dr;
char16_t lead = static_cast<char16_t>(*it);
if (!unicode_detail::is_surrogate(lead)) {
++it;
dr.codepoint = static_cast<char32_t>(lead);
dr.next = it;
dr.error = error_code::ok;
return dr;
}
if (!unicode_detail::is_lead_surrogate(lead)) {
dr.error = error_code::invalid_leading_surrogate;
dr.next = it;
return dr;
}
++it;
auto trail = *it;
if (!unicode_detail::is_trail_surrogate(trail)) {
dr.error = error_code::invalid_trailing_surrogate;
dr.next = it;
return dr;
}
dr.codepoint = unicode_detail::combine_surrogates(lead, trail);
dr.next = ++it;
dr.error = error_code::ok;
return dr;
}
template <typename It>
inline decoded_result<It> utf32_to_code_point(It it, It) {
decoded_result<It> dr;
dr.codepoint = static_cast<char32_t>(*it);
dr.next = ++it;
dr.error = error_code::ok;
return dr;
}
}
}

View File

@ -134,6 +134,30 @@ public:
}
};
struct my_vec : public std::vector<int> {
typedef std::vector<int> base_t;
using base_t::base_t;
};
namespace sol {
template <>
struct is_container<my_vec> : std::true_type {};
template <>
struct container_traits<my_vec> {
static auto begin(lua_State*, my_vec& self) {
return self.begin();
}
static auto end(lua_State*, my_vec& self) {
return self.end();
}
static std::ptrdiff_t index_adjustment(lua_State*, my_vec&) {
return 0;
}
};
} // namespace sol
auto test_table_return_one() {
return sol::as_table(std::vector<int>{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
}
@ -1236,3 +1260,17 @@ end
check_unordered_values(src, t1ummap.source);
}
}
TEST_CASE("containers/custom indexing", "allow containers to set a custom indexing offset") {
sol::state lua;
lua.open_libraries(sol::lib::base);
lua["c"] = my_vec{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
auto result1 = lua.safe_script("for i=0,9 do assert(i == c[i]) end", sol::script_pass_on_error);
REQUIRE(result1.valid());
auto result2 = lua.safe_script("assert(c[10] == nil)", sol::script_pass_on_error);
REQUIRE(result2.valid());
auto result3 = lua.safe_script("assert(c[-1] == nil)", sol::script_pass_on_error);
REQUIRE(result3.valid());
}

View File

@ -39,8 +39,6 @@ namespace muh_namespace {
} // namespace
} // namespace muh_namespace
// There isn't a single library roundtripping which codecvt works on. We'll do the nitty-gritty of it later...
#if (defined(_MSC_VER) || defined(__clang__) || defined(__MINGW64__))
TEST_CASE("stack/strings", "test that strings can be roundtripped") {
sol::state lua;
@ -59,7 +57,6 @@ TEST_CASE("stack/strings", "test that strings can be roundtripped") {
static const std::u32string utf32str_s = utf32str;
static const std::wstring widestr_s = widestr;
#ifdef SOL_CODECVT_SUPPORT
INFO("sizeof(wchar_t): " << sizeof(wchar_t));
INFO("sizeof(char16_t): " << sizeof(char16_t));
INFO("sizeof(char32_t): " << sizeof(char32_t));
@ -120,9 +117,17 @@ TEST_CASE("stack/strings", "test that strings can be roundtripped") {
REQUIRE(utf16_to_char32 == utf32str[0]);
REQUIRE(utf32_to_char32 == utf32str[0]);
REQUIRE(wide_to_char32 == utf32str[0]);
#endif // codecvt support
char16_t utf8_to_char16 = lua["utf8"];
char16_t utf16_to_char16 = lua["utf16"];
char16_t utf32_to_char16 = lua["utf32"];
char16_t wide_to_char16 = lua["wide"];
REQUIRE(utf8_to_char16 == utf16str[0]);
REQUIRE(utf16_to_char16 == utf16str[0]);
REQUIRE(utf32_to_char16 == utf16str[0]);
REQUIRE(wide_to_char16 == utf16str[0]);
}
#endif // MinGW fails with codecvt
TEST_CASE("detail/demangling", "test some basic demangling cases") {
std::string teststr = sol::detail::short_demangle<test>();

View File

@ -585,36 +585,32 @@ TEST_CASE("proxy/proper-pushing", "allow proxies to reference other proxies and
REQUIRE(b);
}
#if !(defined(__clang__) && defined(SOL2_CI))
TEST_CASE("proxy/equality", "check to make sure equality tests work") {
sol::state lua;
REQUIRE((lua["a"] == sol::lua_nil));
REQUIRE_FALSE((lua["a"] == nullptr));
REQUIRE_FALSE((lua["a"] == 0));
REQUIRE_FALSE((lua["a"] == 2));
REQUIRE_FALSE((lua["a"] != sol::lua_nil));
REQUIRE((lua["a"] != nullptr));
#ifndef __clang__
REQUIRE_FALSE((lua["a"] == 0));
REQUIRE_FALSE((lua["a"] == 2));
REQUIRE((lua["a"] != 0));
REQUIRE((lua["a"] != 2));
#endif // clang screws up by trying to access int128 types that it doesn't support, even when we don't ask for them
lua["a"] = 2;
REQUIRE_FALSE((lua["a"] == sol::lua_nil));
REQUIRE_FALSE((lua["a"] == nullptr));
REQUIRE_FALSE((lua["a"] == 0));
REQUIRE((lua["a"] == 2));
REQUIRE((lua["a"] != sol::lua_nil));
REQUIRE((lua["a"] != nullptr));
#ifndef __clang__
REQUIRE_FALSE((lua["a"] == 0));
REQUIRE((lua["a"] == 2));
REQUIRE((lua["a"] != 0));
REQUIRE_FALSE((lua["a"] != 2));
#endif // clang screws up by trying to access int128 types that it doesn't support, even when we don't ask for them
}
#endif // Clang and Continuous Integration problems...
TEST_CASE("compilation/const regression", "make sure constness in tables is respected all the way down") {
struct State {
public: