-
Notifications
You must be signed in to change notification settings - Fork 154
/
filter.go
113 lines (95 loc) · 2.2 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package sensitive
import (
"bufio"
"io"
"net/http"
"os"
"regexp"
"time"
)
// Filter 敏感词过滤器
type Filter struct {
trie *Trie
noise *regexp.Regexp
}
// New 返回一个敏感词过滤器
func New() *Filter {
return &Filter{
trie: NewTrie(),
noise: regexp.MustCompile(`[\|\s&%$@*]+`),
}
}
// UpdateNoisePattern 更新去噪模式
func (filter *Filter) UpdateNoisePattern(pattern string) {
filter.noise = regexp.MustCompile(pattern)
}
// LoadWordDict 加载敏感词字典
func (filter *Filter) LoadWordDict(path string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
return filter.Load(f)
}
// LoadNetWordDict 加载网络敏感词字典
func (filter *Filter) LoadNetWordDict(url string) error {
c := http.Client{
Timeout: 5 * time.Second,
}
rsp, err := c.Get(url)
if err != nil {
return err
}
defer rsp.Body.Close()
return filter.Load(rsp.Body)
}
// Load common method to add words
func (filter *Filter) Load(rd io.Reader) error {
buf := bufio.NewReader(rd)
for {
line, _, err := buf.ReadLine()
if err != nil {
if err != io.EOF {
return err
}
break
}
filter.trie.Add(string(line))
}
return nil
}
// AddWord 添加敏感词
func (filter *Filter) AddWord(words ...string) {
filter.trie.Add(words...)
}
// DelWord 删除敏感词
func (filter *Filter) DelWord(words ...string) {
filter.trie.Del(words...)
}
// Filter 过滤敏感词
func (filter *Filter) Filter(text string) string {
return filter.trie.Filter(text)
}
// Replace 和谐敏感词
func (filter *Filter) Replace(text string, repl rune) string {
return filter.trie.Replace(text, repl)
}
// FindIn 检测敏感词
func (filter *Filter) FindIn(text string) (bool, string) {
text = filter.RemoveNoise(text)
return filter.trie.FindIn(text)
}
// FindAll 找到所有匹配词
func (filter *Filter) FindAll(text string) []string {
return filter.trie.FindAll(text)
}
// Validate 检测字符串是否合法
func (filter *Filter) Validate(text string) (bool, string) {
text = filter.RemoveNoise(text)
return filter.trie.Validate(text)
}
// RemoveNoise 去除空格等噪音
func (filter *Filter) RemoveNoise(text string) string {
return filter.noise.ReplaceAllString(text, "")
}