-
Notifications
You must be signed in to change notification settings - Fork 208
/
Copy pathtrain_bow.py
37 lines (29 loc) · 1.16 KB
/
train_bow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# -*- coding:utf-8 -*-
'''
-------------------------------------------------
Description : bow train
Author : machinelp
Date : 2020-06-03
-------------------------------------------------
'''
import sys
from textmatch.models.text_embedding.bow_sklearn import Bow
from textmatch.config.constant import Constant as const
if __name__ == '__main__':
# 训练集
words_list = ["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
# doc
words_list1 = ["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟", "哈哈哈哈"]
# 训练
bow = Bow(dic_path=const.BOW_DIC_PATH, bow_index_path=const.BOW_INDEX_PARH, )
bow.fit(words_list)
# query
bow = Bow(dic_path=const.BOW_DIC_PATH, bow_index_path=const.BOW_INDEX_PARH, )
bow.init(words_list1, update=False)
testword = "我在九寨沟,很喜欢"
#for word in jieba.cut(testword):
# print ('>>>>', word)
pre = bow.predict(testword)
print ('pre>>>>>', pre)
pre = bow._predict(testword)[0]
print ('pre>>>>>', pre)