Skip to content

Commit

Permalink
Merge pull request #10539 from keymanapp/feat/core/10516-reorder-norm…
Browse files Browse the repository at this point in the history
…-epic-ldml

feat(core): ldml reorder marker processing 🙀
  • Loading branch information
srl295 authored Feb 1, 2024
2 parents 95c2fdb + ec03a68 commit 2016d5d
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 31 deletions.
10 changes: 6 additions & 4 deletions core/src/ldml/ldml_markers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,12 @@ bool normalize_nfd(std::u16string &str) {
return normalize(nfd, str, status);
}

static void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) {
void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding) {
if (map.empty()) {
// quick check, nothing to do if no markers
str = src;
return;
}
// need to reconstitute.
marker_map map2(map); // make a copy of the map
// clear the string
Expand Down Expand Up @@ -110,9 +115,6 @@ bool normalize_nfd_markers_segment(std::u32string &str, marker_map &map, marker_
std::u32string str_unmarked_nfd = str_unmarked;
if(!normalize_nfd(str_unmarked_nfd)) {
return false; // normalize failed.
} else if (map.size() == 0) {
// no markers. Return the normalized unmarked str
str = str_unmarked_nfd;
} else if (str_unmarked_nfd == str_unmarked) {
// Normalization produced no change when markers were removed.
// So, we'll call this a no-op.
Expand Down
2 changes: 2 additions & 0 deletions core/src/ldml/ldml_markers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ void prepend_hex_quad(std::u32string &str, KMX_DWORD marker);
/** parse 0001...FFFF into a KMX_DWORD. Returns 0 on failure */
KMX_DWORD parse_hex_quad(const km_core_usv hex_str[]);

/** re-add markers */
void add_back_markers(std::u32string &str, const std::u32string &src, marker_map &map, marker_encoding encoding);

// bool normalize_nfc_markers(std::u16string &str, marker_encoding encoding) {
// marker_map m;
Expand Down
36 changes: 14 additions & 22 deletions core/src/ldml/ldml_transforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,19 @@ reorder_group::apply(std::u32string &str) const {
/** did we match anything */
bool some_match = false;

// markers need to 'pass through' reorders. remove and re-add if needed
marker_map markers;
std::u32string out = remove_markers(str, markers, plain_sentinel);

// get a baseline sort key
auto sort_keys = reorder_sort_key::from(str);
auto sort_keys = reorder_sort_key::from(out);

// apply ALL reorders in the group.
for (const auto &r : list) {
// work backward from end of string forward
// That is, see if "abc" matches "abc" or "ab" or "a"
for (size_t s = str.size(); s > 0; s--) {
size_t submatch = r.match_end(str, 0, s);
for (size_t s = out.size(); s > 0; s--) {
size_t submatch = r.match_end(out, 0, s);
if (submatch != 0) {
#if KMXPLUS_DEBUG_TRANSFORM
DebugTran("Matched: %S (off=%d, len=%d)", str.c_str(), 0, s);
Expand Down Expand Up @@ -367,18 +371,6 @@ reorder_group::apply(std::u32string &str) const {
}
#endif

// TODO-LDML: for now, assume matches entire string.
// A needed optimization here would be to detect a common substring
// at the end of the old and new strings, and keep the match_len
// minimal. This could reduce thrash in core's context.
// However, the calling code does check for a common substring with mismatch()
size_t match_len = str.size();

// 'prefix' is the unmatched string before the match
// TODO-LDML: right now, this is empty, because match_len is the entire size.
std::u32string prefix = str;
prefix.resize(str.size() - match_len); // just the part before the matched part.

// Now, we need to actually do the sorting, but we must only sort
// 'runs' beginning with 0-weight keys.

Expand Down Expand Up @@ -420,29 +412,29 @@ reorder_group::apply(std::u32string &str) const {
}
// recombine into a string by pulling out the 'ch' value
// that's in each sortkey element.
std::u32string newSuffix;
out.clear(); // will re-add all text
signed char q = sort_keys.begin()->quaternary; // start with the first quaternary
for (auto e = sort_keys.begin(); e < sort_keys.end(); e++, q++) {
if (q != e->quaternary) {
// something rearranged in this subrange, because the quaternary values are out of order.
applied = true;
}
// collect the characters
newSuffix.append(1, e->ch);
out.append(1, e->ch);
}
if (applied) {
str.resize(prefix.size());
str.append(newSuffix);
} else {
if (!applied) {
DebugTran("Skip: sorting caused no reordering");
// exit early to avoid string copying and possibly marker re-adding.
return false; // no change
}
#if KMXPLUS_DEBUG_TRANSFORM
DebugTran("Sorted sortkey");
for (const auto &r : sort_keys) {
r.dump();
}
#endif
return applied;
add_back_markers(str, out, markers, plain_sentinel);
return true; // updated
}

transform_entry::transform_entry(const transform_entry &other)
Expand Down
42 changes: 42 additions & 0 deletions core/tests/unit/ldml/keyboards/k_201_reorder_esk-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,46 @@
<check result="a\u0332\u0305xz" />
</test>
</tests>
<tests name="onekey" >
<!-- same as above, but a single key-->
<test name="1short">
<startContext to="" />
<keystroke key="test0" />
<check result="a\u0305x" />
<keystroke key="o"/>
<backspace />
<!-- no change -->
<check result="a\u0305x" />
</test>
<test name="2longer">
<startContext to="" />
<keystroke key="test1" />
<check result="a\u0332\u0305xz" />
<keystroke key="o"/>
<backspace />
<!-- no change -->
<check result="a\u0332\u0305xz" />
</test>
</tests>
<tests name="marker" >
<!-- same as above, but a single key with markers -->
<test name="1short">
<startContext to="" />
<keystroke key="markertest0" />
<check result="a\u0305x" />
<keystroke key="o"/>
<backspace />
<!-- no change -->
<check result="a\u0305x" />
</test>
<test name="2longer">
<startContext to="" />
<keystroke key="markertest1" />
<check result="a\u0332\u0305xz" />
<keystroke key="o"/>
<backspace />
<!-- no change -->
<check result="a\u0332\u0305xz" />
</test>
</tests>
</keyboardTest3>
15 changes: 10 additions & 5 deletions core/tests/unit/ldml/keyboards/k_201_reorder_esk.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>

<!--
Test Keyboard from Spec
see https://keyman.com/keyboards/sil_boonkit
-->

<!DOCTYPE keyboard3 SYSTEM "../../../../../resources/standards-data/ldml-keyboards/techpreview/dtd/ldmlKeyboard3.dtd">
<keyboard3 locale="en-t-k0-esk" conformsTo="techpreview">
<info author="srl295" indicator="🙀" layout="qwerty" name="esk reorder test"/>
Expand All @@ -19,6 +14,10 @@
<key id="overbar" output="${overbar}" />
<key id="underbar" output="${underbar}" />
<key id="circumflex" output="${circumflex}" />
<key id="test0" output="ax${overbar}" />
<key id="markertest0" output="a\m{m4}x\m{marker0}${overbar}" />
<key id="test1" output="az${overbar}x${underbar}" />
<key id="markertest1" output="a\m{m0}z\m{m1}${overbar}\m{m2}x\m{m3}${underbar}\m{m3}" />
</keys>

<layers formId="us">
Expand All @@ -29,6 +28,12 @@
<row keys="z x" />
<row keys="space" />
</layer>
<layer modifiers="shift">
<row keys="gap test0 test1" />
</layer>
<layer modifiers="altR shift">
<row keys="gap markertest0 markertest1" />
</layer>
</layers>

<variables>
Expand Down

0 comments on commit 2016d5d

Please sign in to comment.