Skip to content

Commit

Permalink
Merge pull request #84 from go-ego/range-pr
Browse files Browse the repository at this point in the history
add splitWord() function
  • Loading branch information
vcaesar authored Jan 3, 2021
2 parents 9538046 + d3b3c9b commit dcdb98c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
6 changes: 6 additions & 0 deletions segmenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ func updateJumper(jumper *jumper, baseDistance float32, token *Token) {
}
}

// SplitWords 将文本划分成字元
func SplitWords(text Text) []Text {
var seg Segmenter
return seg.SplitTextToWords(text)
}

// SplitTextToWords 将文本划分成字元
func (seg *Segmenter) SplitTextToWords(text Text) []Text {
output := make([]Text, 0, len(text)/3)
Expand Down
2 changes: 1 addition & 1 deletion segmenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestSplit(t *testing.T) {
bytesToString(seg1.SplitTextToWords([]byte("世界有七十亿人口"))))

tt.Expect(t, "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./",
bytesToString(seg1.SplitTextToWords([]byte(
bytesToString(SplitWords([]byte(
"GitHub is a web-based hosting service, for software development projects."))))

tt.Expect(t, "雅/虎/yahoo/!/ /致/力/于/,/领/先/的/门/户/网/站/。/",
Expand Down

0 comments on commit dcdb98c

Please sign in to comment.