diff --git a/pkuseg/__init__.py b/pkuseg/__init__.py index becad8c..011c5dd 100644 --- a/pkuseg/__init__.py +++ b/pkuseg/__init__.py @@ -57,16 +57,23 @@ def solve(self, txt): now = self.trie j = i found = False + last_word_idx = -1 # 表示从当前位置i往后匹配,最长匹配词词尾的idx while True: c = txt[j] - if not c in now.children: + if not c in now.children and last_word_idx != -1: + found = True + break + if not c in now.children and last_word_idx == -1: break now = now.children[c] - j += 1 if now.isword: - found = True + last_word_idx = j + j += 1 + if j == l and last_word_idx == -1: break - if j == l: + if j == l and last_word_idx != -1 : + j = last_word_idx + 1 + found = True break if found: if last != i: