-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstemmer.py
28 lines (24 loc) · 857 Bytes
/
stemmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from nltk.stem import snowball
class Stemmer:
def __init__(self):
self.stemmer = snowball.SnowballStemmer("english")
def stem_term(self, token):
"""
This function stem a token
:param token: string of a token
:return: stemmed token
"""
return self.stemmer.stem(token)
def stem_terms(self, tokens):
after_stemming = []
for token in tokens:
if token[0] == '#':
stem_token = token
elif token == token.title():
stem_token = self.stem_term(token).title()
elif token.isupper():
stem_token = self.stem_term(token).upper()
else:
stem_token = self.stem_term(token)
after_stemming.append(stem_token)
return after_stemming