diff --git a/core/src/ldml/ldml_markers.cpp b/core/src/ldml/ldml_markers.cpp index 1faed416cdd..13c711d9d06 100644 --- a/core/src/ldml/ldml_markers.cpp +++ b/core/src/ldml/ldml_markers.cpp @@ -58,7 +58,12 @@ bool normalize_nfd(std::u16string &str) { return normalize(nfd, str, status); } -static void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) { +void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) { + if (map.empty()) { + // quick check, nothing to do if no markers + str = src; + return; + } // need to reconstitute. marker_map map2(map); // make a copy of the map // clear the string @@ -110,9 +115,6 @@ bool normalize_nfd_markers_segment(std::u32string &str, marker_map &map, marker_ std::u32string str_unmarked_nfd = str_unmarked; if(!normalize_nfd(str_unmarked_nfd)) { return false; // normalize failed. - } else if (map.size() == 0) { - // no markers. Return the normalized unmarked str - str = str_unmarked_nfd; } else if (str_unmarked_nfd == str_unmarked) { // Normalization produced no change when markers were removed. // So, we'll call this a no-op. diff --git a/core/src/ldml/ldml_markers.hpp b/core/src/ldml/ldml_markers.hpp index 5527c851c7c..ff0aa057b9b 100644 --- a/core/src/ldml/ldml_markers.hpp +++ b/core/src/ldml/ldml_markers.hpp @@ -93,6 +93,8 @@ void prepend_hex_quad(std::u32string &str, KMX_DWORD marker); /** parse 0001...FFFF into a KMX_DWORD. Returns 0 on failure */ KMX_DWORD parse_hex_quad(const km_core_usv hex_str[]); +/** re-add markers */ +void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding); // bool normalize_nfc_markers(std::u16string &str, marker_encoding encoding) { // marker_map m; diff --git a/core/src/ldml/ldml_transforms.cpp b/core/src/ldml/ldml_transforms.cpp index 9c0fd1f8d49..e383c2d0258 100644 --- a/core/src/ldml/ldml_transforms.cpp +++ b/core/src/ldml/ldml_transforms.cpp @@ -331,15 +331,19 @@ reorder_group::apply(std::u32string &str) const { /** did we match anything */ bool some_match = false; + // markers need to 'pass through' reorders. remove and re-add if needed + marker_map markers; + std::u32string out = remove_markers(str, markers, plain_sentinel); + // get a baseline sort key - auto sort_keys = reorder_sort_key::from(str); + auto sort_keys = reorder_sort_key::from(out); // apply ALL reorders in the group. for (const auto &r : list) { // work backward from end of string forward // That is, see if "abc" matches "abc" or "ab" or "a" - for (size_t s = str.size(); s > 0; s--) { - size_t submatch = r.match_end(str, 0, s); + for (size_t s = out.size(); s > 0; s--) { + size_t submatch = r.match_end(out, 0, s); if (submatch != 0) { #if KMXPLUS_DEBUG_TRANSFORM DebugTran("Matched: %S (off=%d, len=%d)", str.c_str(), 0, s); @@ -367,18 +371,6 @@ reorder_group::apply(std::u32string &str) const { } #endif - // TODO-LDML: for now, assume matches entire string. - // A needed optimization here would be to detect a common substring - // at the end of the old and new strings, and keep the match_len - // minimal. This could reduce thrash in core's context. - // However, the calling code does check for a common substring with mismatch() - size_t match_len = str.size(); - - // 'prefix' is the unmatched string before the match - // TODO-LDML: right now, this is empty, because match_len is the entire size. - std::u32string prefix = str; - prefix.resize(str.size() - match_len); // just the part before the matched part. - // Now, we need to actually do the sorting, but we must only sort // 'runs' beginning with 0-weight keys. @@ -420,7 +412,7 @@ reorder_group::apply(std::u32string &str) const { } // recombine into a string by pulling out the 'ch' value // that's in each sortkey element. - std::u32string newSuffix; + out.clear(); // will re-add all text signed char q = sort_keys.begin()->quaternary; // start with the first quaternary for (auto e = sort_keys.begin(); e < sort_keys.end(); e++, q++) { if (q != e->quaternary) { @@ -428,13 +420,12 @@ reorder_group::apply(std::u32string &str) const { applied = true; } // collect the characters - newSuffix.append(1, e->ch); + out.append(1, e->ch); } - if (applied) { - str.resize(prefix.size()); - str.append(newSuffix); - } else { + if (!applied) { DebugTran("Skip: sorting caused no reordering"); + // exit early to avoid string copying and possibly marker re-adding. + return false; // no change } #if KMXPLUS_DEBUG_TRANSFORM DebugTran("Sorted sortkey"); @@ -442,7 +433,8 @@ reorder_group::apply(std::u32string &str) const { r.dump(); } #endif - return applied; + add_back_markers(str, out, markers, plain_sentinel); + return true; // updated } transform_entry::transform_entry(const transform_entry &other) diff --git a/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml b/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml index e97ebd35b66..c1510ba1dae 100644 --- a/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml +++ b/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml @@ -21,4 +21,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml b/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml index 0b49446ab1f..5f3134ee05e 100644 --- a/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml +++ b/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml @@ -1,10 +1,5 @@ - - @@ -19,6 +14,10 @@ + + + + @@ -29,6 +28,12 @@ + + + + + +