-
Notifications
You must be signed in to change notification settings - Fork 2
/
speed_benchmark.py
49 lines (41 loc) · 1.11 KB
/
speed_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import logging
import time
from os.path import join
from pathlib import Path
from underthesea import sent_tokenize
from pyvi import ViTokenizer
from underthesea.file_utils import DATASETS_FOLDER, CACHE_ROOT
from predict import load_model, word_tokenize
logging.root.setLevel(logging.NOTSET)
FORMAT = '%(asctime)-11s %(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger('playground')
file = join(DATASETS_FOLDER, "LTA", "VNESEScorpus.txt")
NUM_SENTS = 200
LOG_EVERY_NUM_SENTS = 50
# Predict
base_path = Path(CACHE_ROOT) / "models/wtk_crf_4"
tagger = load_model(base_path)
f = open(file, "r")
start = time.time()
for i, line in enumerate(f):
output = word_tokenize(tagger, line)
if i % LOG_EVERY_NUM_SENTS == 0:
logger.info(i)
if i == NUM_SENTS:
break
end = time.time()
logger.info(f"Underthesea {end-start}")
f.close()
# Pyvi
f = open(file, "r")
start = time.time()
for i, line in enumerate(f):
ViTokenizer.tokenize(line)
if i % LOG_EVERY_NUM_SENTS == 0:
logger.info(i)
if i == NUM_SENTS:
break
end = time.time()
logger.info(f"Pyvi {end-start}")
f.close()