From d3b3c9b94db61a08443c41ca9f2396a4a251dab8 Mon Sep 17 00:00:00 2001 From: vcaesar Date: Sun, 3 Jan 2021 13:53:11 -0400 Subject: [PATCH] add splitWord() function --- segmenter.go | 6 ++++++ segmenter_test.go | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/segmenter.go b/segmenter.go index 13df832..97cf8a1 100755 --- a/segmenter.go +++ b/segmenter.go @@ -176,6 +176,12 @@ func updateJumper(jumper *jumper, baseDistance float32, token *Token) { } } +// SplitWords 将文本划分成字元 +func SplitWords(text Text) []Text { + var seg Segmenter + return seg.SplitTextToWords(text) +} + // SplitTextToWords 将文本划分成字元 func (seg *Segmenter) SplitTextToWords(text Text) []Text { output := make([]Text, 0, len(text)/3) diff --git a/segmenter_test.go b/segmenter_test.go index 5e8112a..45cd2c7 100755 --- a/segmenter_test.go +++ b/segmenter_test.go @@ -30,7 +30,7 @@ func TestSplit(t *testing.T) { bytesToString(seg1.SplitTextToWords([]byte("世界有七十亿人口")))) tt.Expect(t, "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./", - bytesToString(seg1.SplitTextToWords([]byte( + bytesToString(SplitWords([]byte( "GitHub is a web-based hosting service, for software development projects.")))) tt.Expect(t, "雅/虎/yahoo/!/ /致/力/于/,/领/先/的/门/户/网/站/。/",