diff --git a/core/src/ldml/ldml_markers.cpp b/core/src/ldml/ldml_markers.cpp
index 1faed416cdd..13c711d9d06 100644
--- a/core/src/ldml/ldml_markers.cpp
+++ b/core/src/ldml/ldml_markers.cpp
@@ -58,7 +58,12 @@ bool normalize_nfd(std::u16string &str) {
return normalize(nfd, str, status);
}
-static void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) {
+void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) {
+ if (map.empty()) {
+ // quick check, nothing to do if no markers
+ str = src;
+ return;
+ }
// need to reconstitute.
marker_map map2(map); // make a copy of the map
// clear the string
@@ -110,9 +115,6 @@ bool normalize_nfd_markers_segment(std::u32string &str, marker_map &map, marker_
std::u32string str_unmarked_nfd = str_unmarked;
if(!normalize_nfd(str_unmarked_nfd)) {
return false; // normalize failed.
- } else if (map.size() == 0) {
- // no markers. Return the normalized unmarked str
- str = str_unmarked_nfd;
} else if (str_unmarked_nfd == str_unmarked) {
// Normalization produced no change when markers were removed.
// So, we'll call this a no-op.
diff --git a/core/src/ldml/ldml_markers.hpp b/core/src/ldml/ldml_markers.hpp
index 5527c851c7c..ff0aa057b9b 100644
--- a/core/src/ldml/ldml_markers.hpp
+++ b/core/src/ldml/ldml_markers.hpp
@@ -93,6 +93,8 @@ void prepend_hex_quad(std::u32string &str, KMX_DWORD marker);
/** parse 0001...FFFF into a KMX_DWORD. Returns 0 on failure */
KMX_DWORD parse_hex_quad(const km_core_usv hex_str[]);
+/** re-add markers */
+void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding);
// bool normalize_nfc_markers(std::u16string &str, marker_encoding encoding) {
// marker_map m;
diff --git a/core/src/ldml/ldml_transforms.cpp b/core/src/ldml/ldml_transforms.cpp
index 9c0fd1f8d49..e383c2d0258 100644
--- a/core/src/ldml/ldml_transforms.cpp
+++ b/core/src/ldml/ldml_transforms.cpp
@@ -331,15 +331,19 @@ reorder_group::apply(std::u32string &str) const {
/** did we match anything */
bool some_match = false;
+ // markers need to 'pass through' reorders. remove and re-add if needed
+ marker_map markers;
+ std::u32string out = remove_markers(str, markers, plain_sentinel);
+
// get a baseline sort key
- auto sort_keys = reorder_sort_key::from(str);
+ auto sort_keys = reorder_sort_key::from(out);
// apply ALL reorders in the group.
for (const auto &r : list) {
// work backward from end of string forward
// That is, see if "abc" matches "abc" or "ab" or "a"
- for (size_t s = str.size(); s > 0; s--) {
- size_t submatch = r.match_end(str, 0, s);
+ for (size_t s = out.size(); s > 0; s--) {
+ size_t submatch = r.match_end(out, 0, s);
if (submatch != 0) {
#if KMXPLUS_DEBUG_TRANSFORM
DebugTran("Matched: %S (off=%d, len=%d)", str.c_str(), 0, s);
@@ -367,18 +371,6 @@ reorder_group::apply(std::u32string &str) const {
}
#endif
- // TODO-LDML: for now, assume matches entire string.
- // A needed optimization here would be to detect a common substring
- // at the end of the old and new strings, and keep the match_len
- // minimal. This could reduce thrash in core's context.
- // However, the calling code does check for a common substring with mismatch()
- size_t match_len = str.size();
-
- // 'prefix' is the unmatched string before the match
- // TODO-LDML: right now, this is empty, because match_len is the entire size.
- std::u32string prefix = str;
- prefix.resize(str.size() - match_len); // just the part before the matched part.
-
// Now, we need to actually do the sorting, but we must only sort
// 'runs' beginning with 0-weight keys.
@@ -420,7 +412,7 @@ reorder_group::apply(std::u32string &str) const {
}
// recombine into a string by pulling out the 'ch' value
// that's in each sortkey element.
- std::u32string newSuffix;
+ out.clear(); // will re-add all text
signed char q = sort_keys.begin()->quaternary; // start with the first quaternary
for (auto e = sort_keys.begin(); e < sort_keys.end(); e++, q++) {
if (q != e->quaternary) {
@@ -428,13 +420,12 @@ reorder_group::apply(std::u32string &str) const {
applied = true;
}
// collect the characters
- newSuffix.append(1, e->ch);
+ out.append(1, e->ch);
}
- if (applied) {
- str.resize(prefix.size());
- str.append(newSuffix);
- } else {
+ if (!applied) {
DebugTran("Skip: sorting caused no reordering");
+ // exit early to avoid string copying and possibly marker re-adding.
+ return false; // no change
}
#if KMXPLUS_DEBUG_TRANSFORM
DebugTran("Sorted sortkey");
@@ -442,7 +433,8 @@ reorder_group::apply(std::u32string &str) const {
r.dump();
}
#endif
- return applied;
+ add_back_markers(str, out, markers, plain_sentinel);
+ return true; // updated
}
transform_entry::transform_entry(const transform_entry &other)
diff --git a/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml b/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml
index e97ebd35b66..c1510ba1dae 100644
--- a/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml
+++ b/core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml
@@ -21,4 +21,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml b/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml
index 0b49446ab1f..5f3134ee05e 100644
--- a/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml
+++ b/core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml
@@ -1,10 +1,5 @@
-
-
@@ -19,6 +14,10 @@
+
+
+
+
@@ -29,6 +28,12 @@
+
+
+
+
+
+