From b7b29b2f0f583f015fdbfe4735704d179fc01be2 Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 27 Jul 2024 05:58:29 +0900 Subject: [PATCH 1/5] refactor: fix compile warnings --- capi/data/mini.dat | Bin 91033 -> 91050 bytes fuzzer/src/bin/trieloader.rs | 13 ++++++++----- src/dictionary/uhash.rs | 10 ++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/capi/data/mini.dat b/capi/data/mini.dat index d89a041c1322fa7e8d2716940cc6de0003cf31f7..a98633fc3b52d54358387c7040b4268698cda3f7 100644 GIT binary patch delta 86 zcmbPvoORW4Rsn-%#^j|uEY2RT;Y^GS2K5sKyiKGH^epuZbc>SpjC4(P(@j!ROp=mJ ibW<{mN-BB8k~31vGxO4QlXEf^5DFXXw$?F9Cj$U59vLnG delta 69 zcmZ2=oOR}LRsn-%#^fnHEY2RT;Y^GS26+<&yp_2O^epuZbc>Sp40*(oGg8Yl^U`&b Qb21eW;*I@V`x&K^0ixCulK=n! diff --git a/fuzzer/src/bin/trieloader.rs b/fuzzer/src/bin/trieloader.rs index 16b8f1a5a..0be720630 100644 --- a/fuzzer/src/bin/trieloader.rs +++ b/fuzzer/src/bin/trieloader.rs @@ -1,7 +1,7 @@ use std::{env, io::Result}; use chewing::{ - dictionary::{Dictionary, Trie}, + dictionary::{Dictionary, LookupStrategy, Trie}, syl, zhuyin::Bopomofo, }; @@ -22,10 +22,13 @@ pub fn main() -> Result<()> { debug!("[+] {:?}", info); info!("[*] try to lookup a phrase..."); - let entries = dict.lookup_all_phrases(&[ - syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4], - syl![Bopomofo::SH, Bopomofo::TONE4], - ]); + let entries = dict.lookup_all_phrases( + &[ + syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4], + syl![Bopomofo::SH, Bopomofo::TONE4], + ], + LookupStrategy::Standard, + ); for phrase in entries { debug!("[+] found {:?}", phrase); } diff --git a/src/dictionary/uhash.rs b/src/dictionary/uhash.rs index b8c160d87..662498c3b 100644 --- a/src/dictionary/uhash.rs +++ b/src/dictionary/uhash.rs @@ -13,7 +13,7 @@ use std::{ ffi::{c_int, c_ushort}, io::{self, BufRead, BufReader, Read}, - mem, + mem::size_of, str::{self, FromStr}, }; @@ -74,7 +74,7 @@ pub(crate) fn try_load_bin(mut input: R) -> io::Result()])?; + input.read_exact(&mut buf[0..size_of::()])?; let mut result = Vec::new(); loop { @@ -125,7 +125,7 @@ pub(crate) fn try_load_bin(mut input: R) -> io::Result()), - ); + input.extend(iter::repeat(0).take(BIN_FIELD_SIZE - input.len() + 4 + size_of::())); let phrases = try_load_bin(&input[..]).unwrap(); assert_eq!( vec![( From 1d634acac47bbf7848e22d578d96b00bb1adcbd7 Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 27 Jul 2024 07:54:06 +0900 Subject: [PATCH 2/5] test: generate test cases from fuzzer input --- fuzzer/src/bin/fuzzer.rs | 272 +++++++++++++++++++++++++++------------ 1 file changed, 189 insertions(+), 83 deletions(-) diff --git a/fuzzer/src/bin/fuzzer.rs b/fuzzer/src/bin/fuzzer.rs index 8cfeda3ed..2b8745996 100644 --- a/fuzzer/src/bin/fuzzer.rs +++ b/fuzzer/src/bin/fuzzer.rs @@ -103,6 +103,8 @@ pub fn main() -> Result<()> { .expect("The required argument system dictionary path is not provided."); let syspath = CString::new(syspath).unwrap(); + let gen = env::var("GEN").is_ok(); + unsafe { let ctx = chewing_new2( syspath.as_ptr(), @@ -114,97 +116,123 @@ pub fn main() -> Result<()> { let mut ops = stdin().bytes(); // Take first few bytes as mode settings - chewing_set_KBType(ctx, ops.next().transpose()?.unwrap_or_default().into()); - chewing_set_candPerPage( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(5, 10) - .into(), - ); - chewing_set_maxChiSymbolLen( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 39) - .into(), - ); - chewing_set_addPhraseDirection( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_spaceAsSelection( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_escCleanAllBuf( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_autoShiftCur( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_easySymbolInput( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_phraseChoiceRearward( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); - chewing_set_autoLearn( - ctx, - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), - ); + let kb_type = ops.next().transpose()?.unwrap_or_default().into(); + let cand_per_page = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(5, 10) + .into(); + let max_chi_symbol_len = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 39) + .into(); + let add_phrase_direction = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let space_as_selection = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let esc_clean_all = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let auto_shift_cur = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let easy_symbol_input = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let phrase_choice_rearward = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let auto_learn = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let fullwidth_toggle = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 1) + .into(); + let conversion_engine = ops + .next() + .transpose()? + .unwrap_or_default() + .clamp(0, 2) + .into(); + chewing_set_KBType(ctx, kb_type); + chewing_set_candPerPage(ctx, cand_per_page); + chewing_set_maxChiSymbolLen(ctx, max_chi_symbol_len); + chewing_set_addPhraseDirection(ctx, add_phrase_direction); + chewing_set_spaceAsSelection(ctx, space_as_selection); + chewing_set_escCleanAllBuf(ctx, esc_clean_all); + chewing_set_autoShiftCur(ctx, auto_shift_cur); + chewing_set_easySymbolInput(ctx, easy_symbol_input); + chewing_set_phraseChoiceRearward(ctx, phrase_choice_rearward); + chewing_set_autoLearn(ctx, auto_learn); chewing_config_set_int( ctx, c"chewing.enable_fullwidth_toggle_key".as_ptr(), - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 1) - .into(), + fullwidth_toggle, ); chewing_config_set_int( ctx, c"chewing.conversion_engine".as_ptr(), - ops.next() - .transpose()? - .unwrap_or_default() - .clamp(0, 2) - .into(), + conversion_engine, ); + if gen { + println!("# chewing_set_KBType(ctx, {});", kb_type); + println!("# chewing_set_candPerPage(ctx, {});", cand_per_page); + println!( + "# chewing_set_maxChiSymbolLen(ctx, {});", + max_chi_symbol_len + ); + println!( + "# chewing_set_addPhraseDirection(ctx, {});", + add_phrase_direction + ); + println!( + "# chewing_set_spaceAsSelection(ctx, {});", + space_as_selection + ); + println!("# chewing_set_escCleanAllBuf(ctx, {});", esc_clean_all); + println!("# chewing_set_autoShiftCur(ctx, {});", auto_shift_cur); + println!("# chewing_set_easySymbolInput(ctx, {});", easy_symbol_input); + println!( + "# chewing_set_phraseChoiceRearward(ctx, {});", + phrase_choice_rearward + ); + println!("# chewing_set_autoLearn(ctx, {});", auto_learn); + println!( + "# chewing.enable_fullwidth_toggle_key = {}", + fullwidth_toggle + ); + println!("# chewing.conversion_engine = {}", conversion_engine); + println!("") + } while let Some(Ok(op)) = ops.next() { use ChewingHandle::*; @@ -213,91 +241,166 @@ pub fn main() -> Result<()> { Default => { if let Some(Ok(key)) = ops.next() { if key.is_ascii() && !key.is_ascii_control() { + if gen { + print!("{}", char::from(key)); + } chewing_handle_Default(ctx, key as i32); } } } Backspace => { + if gen { + print!(""); + } chewing_handle_Backspace(ctx); } Capslock => { + if gen { + print!(""); + } chewing_handle_Capslock(ctx); } CtrlNum => { if let Some(Ok(key)) = ops.next() { if key.is_ascii_digit() { + if gen { + print!("", key); + } chewing_handle_CtrlNum(ctx, key as i32); } } } Del => { + if gen { + print!(""); + } chewing_handle_Del(ctx); } Enter => { + if gen { + print!(""); + } chewing_handle_Enter(ctx); } Esc => { + if gen { + print!(""); + } chewing_handle_Esc(ctx); } Space => { + if gen { + print!(" "); + } chewing_handle_Space(ctx); } Tab => { + if gen { + print!(""); + } chewing_handle_Tab(ctx); } Home => { + if gen { + print!(""); + } chewing_handle_Home(ctx); } End => { + if gen { + print!(""); + } chewing_handle_End(ctx); } Left => { + if gen { + print!(""); + } chewing_handle_Left(ctx); } Right => { + if gen { + print!(""); + } chewing_handle_Right(ctx); } Up => { + if gen { + print!(""); + } chewing_handle_Up(ctx); } Down => { + if gen { + print!(""); + } chewing_handle_Down(ctx); } ShiftLeft => { + if gen { + print!(""); + } chewing_handle_ShiftLeft(ctx); } ShiftRight => { + if gen { + print!(""); + } chewing_handle_ShiftRight(ctx); } ShiftSpace => { + if gen { + print!(""); + } chewing_handle_ShiftSpace(ctx); } PageUp => { + if gen { + print!(""); + } chewing_handle_PageUp(ctx); } PageDown => { + if gen { + print!(""); + } chewing_handle_PageDown(ctx); } DblTab => { + if gen { + print!(""); + } chewing_handle_DblTab(ctx); } Numlock => { if let Some(Ok(key)) = ops.next() { if key.is_ascii_digit() { + if gen { + print!("", key); + } chewing_handle_Numlock(ctx, key as i32); } } } Reset => { + if gen { + print!("\n# chewing_Reset(ctx);\n"); + } chewing_Reset(ctx); } ChiEngMode => { if let Some(Ok(key)) = ops.next() { + if gen { + print!("\n# chewing_set_ChiEngMode(ctx, {});\n", key % 2); + } chewing_set_ChiEngMode(ctx, (key % 2) as i32); } } ShapeMode => { if let Some(Ok(key)) = ops.next() { + if gen { + print!("\n# chewing_set_ShapeMode(ctx, {});\n", key % 2); + } chewing_set_ShapeMode(ctx, (key % 2) as i32); } } @@ -341,6 +444,9 @@ pub fn main() -> Result<()> { } chewing_delete(ctx); + if gen { + println!(""); + } } Ok(()) From 982bd8b70fe897199fc9128331d432a913e829ce Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 27 Jul 2024 07:55:51 +0900 Subject: [PATCH 3/5] fix(conv): don't form connected phrase from symbols --- src/conversion/chewing.rs | 4 ++++ tests/test-regression.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/conversion/chewing.rs b/src/conversion/chewing.rs index 74649d7b6..e687006d2 100644 --- a/src/conversion/chewing.rs +++ b/src/conversion/chewing.rs @@ -71,6 +71,10 @@ fn glue_fn(com: &Composition, mut acc: Vec, interval: Interval) -> Vec return acc; } let last = acc.last().expect("acc should have at least one item"); + if !last.is_phrase || !interval.is_phrase { + acc.push(interval); + return acc; + } if let Some(Gap::Glue) = com.gap(last.end) { let last = acc.pop().expect("acc should have at least one item"); let mut phrase = last.str.into_string(); diff --git a/tests/test-regression.c b/tests/test-regression.c index f01ad8642..df07aed67 100644 --- a/tests/test-regression.c +++ b/tests/test-regression.c @@ -261,6 +261,23 @@ void test_crash_found_by_fuzzing_20240505_0() chewing_delete(ctx); } +void test_glue_two_symbols() +{ + ChewingContext *ctx; + + clean_userphrase(); + + ctx = chewing_new(); + start_testcase(ctx, fd); + + chewing_config_set_int(ctx, "chewing.conversion_engine", 2); + + type_keystroke_by_string(ctx, "!!"); + ok_preedit_buffer(ctx, "!!"); + + chewing_delete(ctx); +} + int main(int argc, char *argv[]) { char *logname; @@ -288,6 +305,7 @@ int main(int argc, char *argv[]) test_empty_prefix_in_conversion_search(); test_empty_preedit_ignore_certain_keys(); test_crash_found_by_fuzzing_20240505_0(); + test_glue_two_symbols(); fclose(fd); From adbf55b9dbd82586414eb0f0c0d81c0c7e5d7f81 Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 27 Jul 2024 21:20:30 +0900 Subject: [PATCH 4/5] fix(editor): crashes when select phrase backwards at end of buffer --- capi/data/mini.dat | Bin 91050 -> 91050 bytes src/editor/selection/phrase.rs | 1 + tests/genkeystroke.c | 1 + tests/test-regression.c | 19 +++++++++++++++++++ 4 files changed, 21 insertions(+) diff --git a/capi/data/mini.dat b/capi/data/mini.dat index a98633fc3b52d54358387c7040b4268698cda3f7..6e55a9e826fac9cf002f68e0f813516499cb32cf 100644 GIT binary patch delta 26 hcmZ2=oORW4)(Poc=DO*IDQ3ndiOG$HTMHR2lL3c)31k2O delta 26 hcmZ2=oORW4)(Pocrn>1SDJdpNNhXbjTMHR2lL3kq36uZ; diff --git a/src/editor/selection/phrase.rs b/src/editor/selection/phrase.rs index 9632ed9a9..08f89fb12 100644 --- a/src/editor/selection/phrase.rs +++ b/src/editor/selection/phrase.rs @@ -170,6 +170,7 @@ impl PhraseSelector { } else { self.begin += 1; if self.begin == self.end { + self.begin -= 1; self.begin = self.after_previous_break_point(self.begin); } } diff --git a/tests/genkeystroke.c b/tests/genkeystroke.c index b9d06e7c5..4f9a36358 100644 --- a/tests/genkeystroke.c +++ b/tests/genkeystroke.c @@ -342,6 +342,7 @@ int main(int argc, char *argv[]) chewing_set_addPhraseDirection(ctx, 1); chewing_set_selKey(ctx, selKey_define, 10); chewing_set_spaceAsSelection(ctx, 1); + chewing_set_phraseChoiceRearward(ctx, 1); clear(); diff --git a/tests/test-regression.c b/tests/test-regression.c index df07aed67..ab07f88d8 100644 --- a/tests/test-regression.c +++ b/tests/test-regression.c @@ -278,6 +278,24 @@ void test_glue_two_symbols() chewing_delete(ctx); } +void test_end_of_buffer_select_phrase_backwards() +{ + ChewingContext *ctx; + + clean_userphrase(); + + ctx = chewing_new(); + start_testcase(ctx, fd); + + chewing_set_spaceAsSelection(ctx, 1); + chewing_set_phraseChoiceRearward(ctx, 1); + + type_keystroke_by_string(ctx, "0 0 0"); + ok_preedit_buffer(ctx, "鵪"); + + chewing_delete(ctx); +} + int main(int argc, char *argv[]) { char *logname; @@ -306,6 +324,7 @@ int main(int argc, char *argv[]) test_empty_preedit_ignore_certain_keys(); test_crash_found_by_fuzzing_20240505_0(); test_glue_two_symbols(); + test_end_of_buffer_select_phrase_backwards(); fclose(fd); From 35d698dc4a7d0a304373418b1497a414c33fb2ac Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Sat, 27 Jul 2024 21:49:30 +0900 Subject: [PATCH 5/5] fix(editor): only try to autocommit when in entering state --- src/editor/mod.rs | 2 +- tests/test-regression.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/editor/mod.rs b/src/editor/mod.rs index 5396ea633..a25a509cb 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -763,7 +763,7 @@ impl BasicEditor for Editor { Transition::Spin(behavior) => self.shared.last_key_behavior = behavior, } - if self.shared.last_key_behavior == EditorKeyBehavior::Absorb { + if self.is_entering() && self.shared.last_key_behavior == EditorKeyBehavior::Absorb { self.shared.try_auto_commit(); } trace!("last_key_behavior = {:?}", self.shared.last_key_behavior); diff --git a/tests/test-regression.c b/tests/test-regression.c index ab07f88d8..dc5a0ebb9 100644 --- a/tests/test-regression.c +++ b/tests/test-regression.c @@ -296,6 +296,25 @@ void test_end_of_buffer_select_phrase_backwards() chewing_delete(ctx); } +void test_zero_capacity_buffer_simple_conversion_engine() +{ + ChewingContext *ctx; + + clean_userphrase(); + + ctx = chewing_new(); + start_testcase(ctx, fd); + + chewing_set_KBType(ctx, 1); + chewing_set_maxChiSymbolLen(ctx, 0); + chewing_config_set_int(ctx, "chewing.conversion_engine", 0); + + type_keystroke_by_string(ctx, "x 0"); + ok_commit_buffer(ctx, "鄔"); + + chewing_delete(ctx); +} + int main(int argc, char *argv[]) { char *logname; @@ -325,6 +344,7 @@ int main(int argc, char *argv[]) test_crash_found_by_fuzzing_20240505_0(); test_glue_two_symbols(); test_end_of_buffer_select_phrase_backwards(); + test_zero_capacity_buffer_simple_conversion_engine(); fclose(fd);