Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utf8_length in String class #99893

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions core/io/pck_packer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,12 @@ Error PCKPacker::flush(bool p_verbose) {
}

for (int i = 0; i < files.size(); i++) {
int string_len = files[i].path.utf8().length();
CharString utf8_string = files[i].path.utf8();
int string_len = utf8_string.length();
int pad = _get_pad(4, string_len);

fhead->store_32(string_len + pad);
fhead->store_buffer((const uint8_t *)files[i].path.utf8().get_data(), string_len);
fhead->store_buffer((const uint8_t *)utf8_string.get_data(), string_len);
for (int j = 0; j < pad; j++) {
fhead->store_8(0);
}
Expand Down
2 changes: 1 addition & 1 deletion core/io/plist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ size_t PListNode::get_asn1_size(uint8_t p_len_octets) const {

for (const KeyValue<String, Ref<PListNode>> &E : data_dict) {
size += 1 + _asn1_size_len(p_len_octets); // Sequence.
size += 1 + _asn1_size_len(p_len_octets) + E.key.utf8().length(); //Key.
size += 1 + _asn1_size_len(p_len_octets) + E.key.utf8_length(); //Key.
size += 1 + _asn1_size_len(p_len_octets) + E.value->get_asn1_size(p_len_octets); // Value.
}
return size;
Expand Down
65 changes: 39 additions & 26 deletions core/string/ustring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2243,43 +2243,56 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
}
}

CharString String::utf8() const {
int l = length();
if (!l) {
return CharString();
uint32_t String::unicode_codepoint_as_utf8_length(char32_t c) {
uint32_t utf8_size = 0;
if (c <= 0x7f) { // 7 bits.
utf8_size += 1;
} else if (c <= 0x7ff) { // 11 bits
utf8_size += 2;
} else if (c <= 0xffff) { // 16 bits
utf8_size += 3;
} else if (c <= 0x001fffff) { // 21 bits
utf8_size += 4;
} else if (c <= 0x03ffffff) { // 26 bits
utf8_size += 5;
print_error(vformat("Invalid unicode codepoint (%x)", c));
} else if (c <= 0x7fffffff) { // 31 bits
utf8_size += 6;
print_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
utf8_size += 1;
print_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c));
}

const char32_t *d = &operator[](0);
int fl = 0;
for (int i = 0; i < l; i++) {
uint32_t c = d[i];
if (c <= 0x7f) { // 7 bits.
fl += 1;
} else if (c <= 0x7ff) { // 11 bits
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
} else if (c <= 0x001fffff) { // 21 bits
fl += 4;
} else if (c <= 0x03ffffff) { // 26 bits
fl += 5;
print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else if (c <= 0x7fffffff) { // 31 bits
fl += 6;
print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
fl += 1;
print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
}
return utf8_size;
}

uint32_t String::utf8_length() const {
int l = length();
const char32_t *dst = ptr();

uint32_t utf8_size = 0;

for (int i = 0; i < l; ++i) {
char32_t c = dst[i];
utf8_size += unicode_codepoint_as_utf8_length(c);
}

return utf8_size;
}

CharString String::utf8() const {
int fl = utf8_length();

CharString utf8s;
if (fl == 0) {
return utf8s;
}

utf8s.resize(fl + 1);
uint8_t *cdst = (uint8_t *)utf8s.get_data();
const char32_t *d = &operator[](0);
int l = length();

#define APPEND_CHAR(m_c) *(cdst++) = m_c

Expand Down
2 changes: 2 additions & 0 deletions core/string/ustring.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ class String {

CharString ascii(bool p_allow_extended = false) const;
CharString utf8() const;
uint32_t utf8_length() const;
static uint32_t unicode_codepoint_as_utf8_length(char32_t c);
Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false);
static String utf8(const char *p_utf8, int p_len = -1);

Expand Down
2 changes: 1 addition & 1 deletion modules/gltf/gltf_document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7167,7 +7167,7 @@ PackedByteArray GLTFDocument::_serialize_glb_buffer(Ref<GLTFState> p_state, Erro
const int32_t header_size = 12;
const int32_t chunk_header_size = 8;

int32_t padding = (chunk_header_size + json.utf8().length()) % 4;
int32_t padding = (chunk_header_size + json.utf8_length()) % 4;
json += String(" ").repeat(padding);

CharString cs = json.utf8();
Expand Down
3 changes: 2 additions & 1 deletion platform/linuxbsd/x11/display_server_x11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,8 @@ void DisplayServerX11::window_set_title(const String &p_title, WindowID p_window
Atom _net_wm_name = XInternAtom(x11_display, "_NET_WM_NAME", false);
Atom utf8_string = XInternAtom(x11_display, "UTF8_STRING", false);
if (_net_wm_name != None && utf8_string != None) {
XChangeProperty(x11_display, wd.x11_window, _net_wm_name, utf8_string, 8, PropModeReplace, (unsigned char *)p_title.utf8().get_data(), p_title.utf8().length());
CharString utf8_title = p_title.utf8();
XChangeProperty(x11_display, wd.x11_window, _net_wm_name, utf8_string, 8, PropModeReplace, (unsigned char *)utf8_title.get_data(), utf8_title.length());
}
}

Expand Down
31 changes: 18 additions & 13 deletions platform/windows/windows_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,32 +161,37 @@ Error WindowsUtils::copy_and_rename_pdb(const String &p_dll_path) {
return ERR_SKIP;
}

String new_pdb_base_name = p_dll_path.get_file().get_basename() + "_";
String new_pdb_base_name = p_dll_path.get_file().get_basename();

// Checking the available space for the updated string
// and trying to shorten it if there is not much space.
{
// e.g. 999.pdb
const uint8_t suffix_size = String::num_characters((int64_t)max_pdb_names - 1) + 4;
// e.g. ~lib_ + 1 for the \0
const uint8_t min_base_size = 5 + 1;
int original_path_size = pdb_info.path.utf8().length();
CharString utf8_name = new_pdb_base_name.utf8();
int new_expected_buffer_size = utf8_name.length() + suffix_size;
// e.g. _999.pdb
const uint8_t suffix_size = String::num_characters((int64_t)max_pdb_names - 1) + 5;
int original_path_size = pdb_info.path.utf8_length();
int new_expected_buffer_size = new_pdb_base_name.utf8_length() + suffix_size;

// Since we have limited space inside the DLL to patch the path to the PDB,
// it is necessary to limit the size based on the number of bytes occupied by the string.
if (new_expected_buffer_size > original_path_size) {
// e.g. ~lib + 1 for the \0
const uint8_t min_base_size = 4 + 1;
ERR_FAIL_COND_V_MSG(original_path_size < min_base_size + suffix_size, FAILED, vformat("The original PDB path size in bytes is too small: '%s'. Expected size: %d or more bytes, but available %d.", pdb_info.path, min_base_size + suffix_size, original_path_size));

utf8_name.resize(original_path_size - suffix_size + 1); // +1 for the \0
utf8_name[utf8_name.size() - 1] = '\0';
new_pdb_base_name.parse_utf8(utf8_name);
new_pdb_base_name[new_pdb_base_name.length() - 1] = '_'; // Restore the last '_'
int too_much = new_expected_buffer_size - original_path_size;
const char32_t *ptr = new_pdb_base_name.ptr();
int32_t pdb_utf32_size = new_pdb_base_name.length();
while (too_much > 0) {
too_much -= String::unicode_codepoint_as_utf8_length(ptr[pdb_utf32_size]);
--pdb_utf32_size;
}
new_pdb_base_name.resize(pdb_utf32_size);
WARN_PRINT(vformat("The original path size of '%s' in bytes was too small to fit the new name, so it was shortened to '%s%d.pdb'.", pdb_info.path, new_pdb_base_name, max_pdb_names - 1));
}
}

new_pdb_base_name += '_';

// Delete old PDB files.
for (const String &file : DirAccess::get_files_at(dll_base_dir)) {
if (file.begins_with(new_pdb_base_name) && file.ends_with(".pdb")) {
Expand Down Expand Up @@ -222,7 +227,7 @@ Error WindowsUtils::copy_and_rename_pdb(const String &p_dll_path) {
Ref<FileAccess> file = FileAccess::open(p_dll_path, FileAccess::READ_WRITE, &err);
ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to open '%s' to patch the PDB path.", p_dll_path));

int original_path_size = pdb_info.path.utf8().length();
int original_path_size = pdb_info.path.utf8_length();
// Double-check file bounds.
ERR_FAIL_UNSIGNED_INDEX_V_MSG(pdb_info.address + original_path_size, file->get_length(), FAILED, vformat("Failed to write a new PDB path. Probably '%s' has been changed.", p_dll_path));

Expand Down