From e831cab7a4c31da02c72044190e9afc1a9ed584c Mon Sep 17 00:00:00 2001 From: Liam Dyer Date: Fri, 3 Jan 2025 14:34:30 -0500 Subject: [PATCH] fix: get full unicode char at cursor position Closes #864 --- lua/blink/cmp/completion/trigger/init.lua | 4 ++- lua/blink/cmp/completion/trigger/utils.lua | 29 ++++++++++++++++++++++ lua/blink/cmp/fuzzy/keyword.rs | 22 +++++++++++++--- 3 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 lua/blink/cmp/completion/trigger/utils.lua diff --git a/lua/blink/cmp/completion/trigger/init.lua b/lua/blink/cmp/completion/trigger/init.lua index e463daf2..03d43720 100644 --- a/lua/blink/cmp/completion/trigger/init.lua +++ b/lua/blink/cmp/completion/trigger/init.lua @@ -23,6 +23,7 @@ local config = require('blink.cmp.config').completion.trigger local context = require('blink.cmp.completion.trigger.context') +local utils = require('blink.cmp.completion.trigger.utils') --- @type blink.cmp.CompletionTrigger --- @diagnostic disable-next-line: missing-fields @@ -71,7 +72,8 @@ function trigger.activate() local cursor = context.get_cursor() local cursor_col = cursor[2] - local char_under_cursor = context.get_line():sub(cursor_col, cursor_col) + + local char_under_cursor = utils.get_char_at_cursor() local is_on_trigger_for_show = trigger.is_trigger_character(char_under_cursor) local insert_enter_on_trigger_character = config.show_on_trigger_character diff --git a/lua/blink/cmp/completion/trigger/utils.lua b/lua/blink/cmp/completion/trigger/utils.lua new file mode 100644 index 00000000..3ceb6435 --- /dev/null +++ b/lua/blink/cmp/completion/trigger/utils.lua @@ -0,0 +1,29 @@ +local context = require('blink.cmp.completion.trigger.context') +local utils = {} + +--- Gets the full Unicode character at cursor position +--- @return string +function utils.get_char_at_cursor() + local line = context.get_line() + local cursor_col = context.get_cursor()[2] + + -- Find the start of the UTF-8 character + local start_col = cursor_col + while start_col > 1 do + local char = string.byte(line:sub(start_col, start_col)) + if char < 0x80 or char > 0xBF then break end + start_col = start_col - 1 + end + + -- Find the end of the UTF-8 character + local end_col = cursor_col + while end_col < #line do + local char = string.byte(line:sub(end_col + 1, end_col + 1)) + if char < 0x80 or char > 0xBF then break end + end_col = end_col + 1 + end + + return line:sub(start_col, end_col) +end + +return utils diff --git a/lua/blink/cmp/fuzzy/keyword.rs b/lua/blink/cmp/fuzzy/keyword.rs index c8045a7e..94aeb78a 100644 --- a/lua/blink/cmp/fuzzy/keyword.rs +++ b/lua/blink/cmp/fuzzy/keyword.rs @@ -8,14 +8,16 @@ lazy_static! { /// Given a line and cursor position, returns the start and end indices of the keyword pub fn get_keyword_range(line: &str, col: usize, match_suffix: bool) -> (usize, usize) { - let line_before = line.chars().take(col).collect::(); - let before_match_start = BACKWARD_REGEX.find(&line_before).map(|m| m.start()); + let before_match_start = BACKWARD_REGEX + .find(&line[0..col.min(line.len())]) + .map(|m| m.start()); if !match_suffix { return (before_match_start.unwrap_or(col), col); } - let line_after = line.chars().skip(col).collect::(); - let after_match_end = FORWARD_REGEX.find(&line_after).map(|m| m.end() + col); + let after_match_end = FORWARD_REGEX + .find(&line[col.min(line.len())..]) + .map(|m| m.end() + col); ( before_match_start.unwrap_or(col), after_match_end.unwrap_or(col), @@ -63,3 +65,15 @@ pub fn guess_keyword_from_item( let (start, end) = guess_keyword_range_from_item(item_text, line, cursor_col, match_suffix); line[start..end].to_string() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_keyword_range_unicode() { + let line = "'вest'"; + let col = line.len() - 1; + assert_eq!(get_keyword_range(line, col, false), (1, line.len() - 1)); + } +}