Skip to content

Commit

Permalink
Using 32bit wchar_t by default, uint32_t on Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
jaime-m-p committed Aug 13, 2024
1 parent 50e1b1e commit dcac747
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,16 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
// std::wregex does not support unicode whitespaces \s: 0x85, 0xA0, 0x001680 ... 0x003000.
// std::wregex supports full 32 bit codepoints, not limited to standard max 0x110000.
namespace std {
using codepoint = uint32_t; // codepoint type for all template specializations

// codepoint type for all template specializations
#if (WCHAR_MAX > 0xFFFF)
using codepoint = wchar_t; // sizeof(wchar_t) == 4
#else
using codepoint = uint32_t; // Windows: sizeof(wchar_t) == 2
#define CUSTOM_CTYPE_CODEPOINT
#endif

#ifdef CUSTOM_CTYPE_CODEPOINT
// Minimal required implementation for std::regex string processing
template<> // custom specialized std::ctype<codepoint>
class ctype<codepoint> {
Expand Down Expand Up @@ -530,6 +538,7 @@ namespace std {
const std::ctype<codepoint> & use_facet<const std::ctype<codepoint>>(const std::locale & loc) {
return use_facet<std::ctype<codepoint>>(loc);
}
#endif

// Minimal required implementation for std::regex string processing
template<> // custom specialized std::regex_traits<codepoint>
Expand Down

0 comments on commit dcac747

Please sign in to comment.