-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path4_train_and_predict_fasttext.sh
executable file
·43 lines (32 loc) · 1.15 KB
/
4_train_and_predict_fasttext.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
VECTORBASE="results/embeddings/"
OUTBASE="results/models/"
## ensure output dir exists
mkdir -p OUTBASE
TRAINPATH="data/labeled/ctsa_fasttext_input_"
TESTPATH="data/labeled/ctsa_fasttext_input_"
## iterate
#for i in {1..5} ## for cross validation
for VECTORS in fasttext_skip_hier word2vec_skip_hier word2vec_cbow_hier
do
## train on all, test on all
TRAIN=$TRAINPATH"wholeset.txt"
TEST=$TRAIN
## example about how to perform cross validation
#TRAIN=$TRAINPATH$i".txt"
#TEST=$TESTPATH$i".txt"
MODEL=$OUTBASE$VECTORS"_model"
echo $TRAIN
echo $MODEL
echo $VECTORS
echo $TEST
## learn the model with pretrainedVectors
fastText/fasttext supervised -input $TRAIN -output $MODEL -pretrainedVectors $VECTORBASE$VECTORS".vec" -dim 200
## precision recall
fastText/fasttext test $MODEL".bin" $TEST 1
## test and save to file
fastText/fasttext predict-prob $MODEL".bin" $TEST 3 > $MODEL"_predictions.txt"
## now call R to extract probabilities, save a tidy format and AUC plot
Rscript 4_general_extract_predictions.R $MODEL"_predictions.txt" $TEST
done
echo "Done!"