Skip to content

Commit

Permalink
Added utf8_length to avoid allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
kiroxas committed Dec 1, 2024
1 parent db66bd3 commit cd4522f
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 44 deletions.
5 changes: 3 additions & 2 deletions core/io/pck_packer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,12 @@ Error PCKPacker::flush(bool p_verbose) {
}

for (int i = 0; i < files.size(); i++) {
int string_len = files[i].path.utf8().length();
CharString utf8_string = files[i].path.utf8();
int string_len = utf8_string.length();
int pad = _get_pad(4, string_len);

fhead->store_32(string_len + pad);
fhead->store_buffer((const uint8_t *)files[i].path.utf8().get_data(), string_len);
fhead->store_buffer((const uint8_t *)utf8_string.get_data(), string_len);
for (int j = 0; j < pad; j++) {
fhead->store_8(0);
}
Expand Down
2 changes: 1 addition & 1 deletion core/io/plist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ size_t PListNode::get_asn1_size(uint8_t p_len_octets) const {

for (const KeyValue<String, Ref<PListNode>> &E : data_dict) {
size += 1 + _asn1_size_len(p_len_octets); // Sequence.
size += 1 + _asn1_size_len(p_len_octets) + E.key.utf8().length(); //Key.
size += 1 + _asn1_size_len(p_len_octets) + E.key.utf8_length(); //Key.
size += 1 + _asn1_size_len(p_len_octets) + E.value->get_asn1_size(p_len_octets); // Value.
}
return size;
Expand Down
66 changes: 40 additions & 26 deletions core/string/ustring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2243,43 +2243,57 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
}
}

CharString String::utf8() const {
int l = length();
if (!l) {
return CharString();
uint32_t String::unicode_codepoint_as_utf8_length(char32_t c)
{
uint32_t utf8_size = 0;
if (c <= 0x7f) { // 7 bits.
utf8_size += 1;
} else if (c <= 0x7ff) { // 11 bits
utf8_size += 2;
} else if (c <= 0xffff) { // 16 bits
utf8_size += 3;
} else if (c <= 0x001fffff) { // 21 bits
utf8_size += 4;
} else if (c <= 0x03ffffff) { // 26 bits
utf8_size += 5;
print_error(vformat("Invalid unicode codepoint (%x)", c));
} else if (c <= 0x7fffffff) { // 31 bits
utf8_size += 6;
print_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
utf8_size += 1;
print_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c));
}

const char32_t *d = &operator[](0);
int fl = 0;
for (int i = 0; i < l; i++) {
uint32_t c = d[i];
if (c <= 0x7f) { // 7 bits.
fl += 1;
} else if (c <= 0x7ff) { // 11 bits
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
} else if (c <= 0x001fffff) { // 21 bits
fl += 4;
} else if (c <= 0x03ffffff) { // 26 bits
fl += 5;
print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else if (c <= 0x7fffffff) { // 31 bits
fl += 6;
print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
fl += 1;
print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
}
return utf8_size;
}

uint32_t String::utf8_length() const{
int l = length();
const char32_t *dst = ptr();

uint32_t utf8_size = 0;

for (int i = 0; i < l; ++i) {
char32_t c = dst[i];
utf8_size += unicode_codepoint_as_utf8_length(c);
}

return utf8_size;
}

CharString String::utf8() const {
int fl = utf8_length();

CharString utf8s;
if (fl == 0) {
return utf8s;
}

utf8s.resize(fl + 1);
uint8_t *cdst = (uint8_t *)utf8s.get_data();
const char32_t *d = &operator[](0);
int l = length();

#define APPEND_CHAR(m_c) *(cdst++) = m_c

Expand Down
2 changes: 2 additions & 0 deletions core/string/ustring.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ class String {

CharString ascii(bool p_allow_extended = false) const;
CharString utf8() const;
uint32_t utf8_length() const;
static uint32_t unicode_codepoint_as_utf8_length(char32_t c);
Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false);
static String utf8(const char *p_utf8, int p_len = -1);

Expand Down
2 changes: 1 addition & 1 deletion modules/gltf/gltf_document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7167,7 +7167,7 @@ PackedByteArray GLTFDocument::_serialize_glb_buffer(Ref<GLTFState> p_state, Erro
const int32_t header_size = 12;
const int32_t chunk_header_size = 8;

int32_t padding = (chunk_header_size + json.utf8().length()) % 4;
int32_t padding = (chunk_header_size + json.utf8_length()) % 4;
json += String(" ").repeat(padding);

CharString cs = json.utf8();
Expand Down
3 changes: 2 additions & 1 deletion platform/linuxbsd/x11/display_server_x11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,8 @@ void DisplayServerX11::window_set_title(const String &p_title, WindowID p_window
Atom _net_wm_name = XInternAtom(x11_display, "_NET_WM_NAME", false);
Atom utf8_string = XInternAtom(x11_display, "UTF8_STRING", false);
if (_net_wm_name != None && utf8_string != None) {
XChangeProperty(x11_display, wd.x11_window, _net_wm_name, utf8_string, 8, PropModeReplace, (unsigned char *)p_title.utf8().get_data(), p_title.utf8().length());
CharString utf8_title = p_title.utf8();
XChangeProperty(x11_display, wd.x11_window, _net_wm_name, utf8_string, 8, PropModeReplace, (unsigned char *)utf8_title.get_data(), utf8_title.length());
}
}

Expand Down
31 changes: 18 additions & 13 deletions platform/windows/windows_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,32 +161,37 @@ Error WindowsUtils::copy_and_rename_pdb(const String &p_dll_path) {
return ERR_SKIP;
}

String new_pdb_base_name = p_dll_path.get_file().get_basename() + "_";
String new_pdb_base_name = p_dll_path.get_file().get_basename();

// Checking the available space for the updated string
// and trying to shorten it if there is not much space.
{
// e.g. 999.pdb
const uint8_t suffix_size = String::num_characters((int64_t)max_pdb_names - 1) + 4;
// e.g. ~lib_ + 1 for the \0
const uint8_t min_base_size = 5 + 1;
int original_path_size = pdb_info.path.utf8().length();
CharString utf8_name = new_pdb_base_name.utf8();
int new_expected_buffer_size = utf8_name.length() + suffix_size;
// e.g. _999.pdb
const uint8_t suffix_size = String::num_characters((int64_t)max_pdb_names - 1) + 5;
int original_path_size = pdb_info.path.utf8_length();
int new_expected_buffer_size = new_pdb_base_name.utf8_length() + suffix_size;

// Since we have limited space inside the DLL to patch the path to the PDB,
// it is necessary to limit the size based on the number of bytes occupied by the string.
if (new_expected_buffer_size > original_path_size) {
// e.g. ~lib + 1 for the \0
const uint8_t min_base_size = 4 + 1;
ERR_FAIL_COND_V_MSG(original_path_size < min_base_size + suffix_size, FAILED, vformat("The original PDB path size in bytes is too small: '%s'. Expected size: %d or more bytes, but available %d.", pdb_info.path, min_base_size + suffix_size, original_path_size));

utf8_name.resize(original_path_size - suffix_size + 1); // +1 for the \0
utf8_name[utf8_name.size() - 1] = '\0';
new_pdb_base_name.parse_utf8(utf8_name);
new_pdb_base_name[new_pdb_base_name.length() - 1] = '_'; // Restore the last '_'
int too_much = new_expected_buffer_size - original_path_size;
const char32_t *ptr = new_pdb_base_name.ptr();
int32_t pdb_utf32_size = new_pdb_base_name.length();
while (too_much > 0) {
too_much -= String::unicode_codepoint_as_utf8_length(ptr[pdb_utf32_size]);
--pdb_utf32_size;
}
new_pdb_base_name.resize(pdb_utf32_size);
WARN_PRINT(vformat("The original path size of '%s' in bytes was too small to fit the new name, so it was shortened to '%s%d.pdb'.", pdb_info.path, new_pdb_base_name, max_pdb_names - 1));
}
}

new_pdb_base_name += '_';

// Delete old PDB files.
for (const String &file : DirAccess::get_files_at(dll_base_dir)) {
if (file.begins_with(new_pdb_base_name) && file.ends_with(".pdb")) {
Expand Down Expand Up @@ -222,7 +227,7 @@ Error WindowsUtils::copy_and_rename_pdb(const String &p_dll_path) {
Ref<FileAccess> file = FileAccess::open(p_dll_path, FileAccess::READ_WRITE, &err);
ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to open '%s' to patch the PDB path.", p_dll_path));

int original_path_size = pdb_info.path.utf8().length();
int original_path_size = pdb_info.path.utf8_length();
// Double-check file bounds.
ERR_FAIL_UNSIGNED_INDEX_V_MSG(pdb_info.address + original_path_size, file->get_length(), FAILED, vformat("Failed to write a new PDB path. Probably '%s' has been changed.", p_dll_path));

Expand Down

0 comments on commit cd4522f

Please sign in to comment.