Skip to content

Commit

Permalink
min character-ops
Browse files Browse the repository at this point in the history
  • Loading branch information
sepehrsoh committed Feb 5, 2025
1 parent f38dacb commit 55f1afa
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

*.json
internal/tests/vocab.txt
internal/tests/vocab_bigram.txt
internal/tests/vocab_bigram*
14 changes: 13 additions & 1 deletion internal/lookup_compound.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (s *SymSpell) LookupCompound(phrase string, maxEditDistance int) *items.Sug
}
for i := range terms1 {
cp.terms1 = terms1[i]
cp.suggestions, _ = s.Lookup(cp.terms1, verbositypkg.Top, maxEditDistance)
s.getSuggestion(&cp, maxEditDistance)
// Combine adjacent terms
if i > 0 && !cp.isLastCombi {
cp.terms2 = terms1[i-1]
Expand Down Expand Up @@ -109,6 +109,18 @@ func (s *SymSpell) LookupCompound(phrase string, maxEditDistance int) *items.Sug
return s.finalizeAnswer(phrase, cp.suggestionParts)
}

func (s *SymSpell) getSuggestion(cp *compoundProcessor, maxEditDistance int) {
if len(cp.terms1) > s.MinimumCharToChange {
cp.suggestions, _ = s.Lookup(cp.terms1, verbositypkg.Top, maxEditDistance)
} else {
cp.suggestions = []items.SuggestItem{{
Term: cp.terms1,
Distance: 0,
Count: math.MaxInt,
}}
}
}

func (s *SymSpell) getBestSuggestion2(cp compoundProcessor, maxEditDistance int) items.SuggestItem {
var best2 items.SuggestItem
if len(cp.suggestions) > 0 {
Expand Down
2 changes: 2 additions & 0 deletions internal/symspell.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type SymSpell struct {
SplitThreshold int
PreserveCase bool
SplitWordBySpace bool
MinimumCharToChange int
Words map[string]int
BelowThresholdWords map[string]int
Deletes map[string][]string
Expand Down Expand Up @@ -59,6 +60,7 @@ func NewSymSpell(opt ...options.Options) (*SymSpell, error) {
SplitThreshold: opts.SplitItemThreshold,
PreserveCase: opts.PreserveCase,
SplitWordBySpace: opts.SplitWordBySpace,
MinimumCharToChange: opts.MinimumCharacterToChange,
Words: make(map[string]int),
BelowThresholdWords: make(map[string]int),
Deletes: make(map[string][]string),
Expand Down
8 changes: 8 additions & 0 deletions pkg/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ var DefaultOptions = SymspellOptions{
SplitItemThreshold: 1,
PreserveCase: false,
SplitWordBySpace: false,
MinimumCharacterToChange: 1,
}

type SymspellOptions struct {
Expand All @@ -16,6 +17,7 @@ type SymspellOptions struct {
SplitItemThreshold int
PreserveCase bool
SplitWordBySpace bool
MinimumCharacterToChange int
}

type Options interface {
Expand Down Expand Up @@ -69,3 +71,9 @@ func WithSplitWordBySpace() Options {
options.SplitWordBySpace = true
})
}

func WithMinimumCharacterToChange(charLength int) Options {
return NewFuncWireOption(func(options *SymspellOptions) {
options.MinimumCharacterToChange = charLength
})
}
15 changes: 13 additions & 2 deletions symspell_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ func TestLookupCompound(t *testing.T) {
options.WithCountThreshold(1),
options.WithMaxDictionaryEditDistance(3),
options.WithPrefixLength(7),
options.WithSplitWordBySpace(),
)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -184,15 +185,25 @@ func TestSymspellLookupCompoundUnigram(t *testing.T) {
},
want: "ازمایشگاه",
},
{
name: "Test Min Character",
args: args{
a: "بیمارستان ا",
maxEditDistance: 3,
},
want: "بیمارستان ا",
},
}
symSpell := NewSymSpellWithLoadBigramDictionary("internal/tests/vocab_fa.txt",
"",
"internal/tests/vocab_bigram_fa.txt",
0,
1,
options.WithCountThreshold(1),
options.WithCountThreshold(0),
options.WithPrefixLength(5),
options.WithMaxDictionaryEditDistance(3),
options.WithSplitItemThreshold(100),
options.WithSplitWordBySpace(),
options.WithMinimumCharacterToChange(2),
)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 55f1afa

Please sign in to comment.