diff --git a/egs/vlsp2013_crf/conlleval.py b/egs/vlsp2013_crf/conlleval.py index 7687e85..eadc5dc 100644 --- a/egs/vlsp2013_crf/conlleval.py +++ b/egs/vlsp2013_crf/conlleval.py @@ -274,6 +274,17 @@ def evaluate(input, args): # compute metrics and print evaluate_tags(correctChunk, foundGuessed, foundCorrect, correctTags, tokenCounter, latex=args.latex) +def evaluate_(input): + class Args(object): + pass + + args = Args() + args.latex = False + args.raw = False + args.delimiter = None + args.oTag = "O" + + evaluate(open(input), args) if __name__ == "__main__": args = parse_args() diff --git a/egs/vlsp2013_crf/trainer.py b/egs/vlsp2013_crf/trainer.py index 3f7c97c..09da3f6 100644 --- a/egs/vlsp2013_crf/trainer.py +++ b/egs/vlsp2013_crf/trainer.py @@ -1,7 +1,11 @@ +import os + import pycrfsuite from languageflow.transformer.tagged import TaggedTransformer import logging +from conlleval import evaluate_ + logger = logging.getLogger(__name__) logger.setLevel(10) FORMAT = "%(asctime)-15s %(message)s" @@ -25,7 +29,7 @@ def train(self, c1, c2, feature): params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty - 'max_iterations': 1000, # + 'max_iterations': 200, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } @@ -44,9 +48,13 @@ def train(self, c1, c2, feature): y_pred = [tagger.tag(xseq) for x_seq in X_test] sentences = [[item[0] for item in sentence] for sentence in self.corpus.test] sentences = zip(sentences, y_test, y_pred) - output = [] + texts = [] for s in sentences: tokens, y_true, y_pred = s - print(0) - + tokens_ = ["\t".join(item) for item in zip(tokens, y_true, y_pred)] + text = "\n".join(tokens_) + texts.append(text) + text = "\n\n".join(texts) + open("tmp/output.txt", "w").write(text) + evaluate_("tmp/output.txt") logger.info("Finish tagger")