Skip to content

Commit

Permalink
Merge pull request #78 from go-ego/thtml-pr
Browse files Browse the repository at this point in the history
add CutTrimHtml and TrimSymbol support
  • Loading branch information
vcaesar authored Oct 12, 2020
2 parents 556b2ec + dde1bbb commit 40de692
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
3 changes: 3 additions & 0 deletions gse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,7 @@ func TestStop(t *testing.T) {
t2 := `<p>test: </p> <div class="bot"> bot 机器人 <<银河系漫游指南>> </div>`
s = FilterHtml(t2)
tt.Equal(t, "test: bot 机器人 <<银河系漫游指南>> ", s)

s = prodSeg.CutTrimHtmls(t2, true)
tt.Equal(t, "test bot 机器人 银河系 漫游 指南", s)
}
25 changes: 25 additions & 0 deletions trim.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ func (seg *Segmenter) Trim(s []string) (r []string) {
return
}

// TrimSymbol trim []string exclude symbol, space and punct
func (seg *Segmenter) TrimSymbol(s []string) (r []string) {
for i := 0; i < len(s); i++ {
si := FilterSymbol(s[i])
if si != "" {
r = append(r, si)
}
}

return
}

// TrimPos trim SegPos not symbol, space and punct
func (seg *Segmenter) TrimPos(s []SegPos) (r []SegPos) {
for i := 0; i < len(s); i++ {
Expand Down Expand Up @@ -144,6 +156,19 @@ func (seg *Segmenter) PosTrimStr(str string, search bool, pos ...string) string
return seg.CutStr(pa)
}

// CutTrimHtml cut string trim html and symbol return []string
func (seg *Segmenter) CutTrimHtml(str string, hmm ...bool) []string {
str = FilterHtml(str)
s := seg.Cut(str, hmm...)
return seg.TrimSymbol(s)
}

// CutTrimHtmls cut string trim html and symbol return string
func (seg *Segmenter) CutTrimHtmls(str string, hmm ...bool) string {
s := seg.CutTrimHtml(str, hmm...)
return seg.CutStr(s, " ")
}

// FilterEmoji filter the emoji
func FilterEmoji(text string) (new string) {
for _, value := range text {
Expand Down

0 comments on commit 40de692

Please sign in to comment.