-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
70 lines (51 loc) · 2.74 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
LANGUAGE := tamil
TASK := pos_tag
DATA_DIR := ./data
CHECKPOINT_DIR := ./checkpoints
UD_DIR_BASE := $(DATA_DIR)/ud
UDURL := https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz
UD_DIR := $(UD_DIR_BASE)/ud-treebanks-v2.5
UD_FILE := $(UD_DIR_BASE)/ud-treebanks-v2.5.tgz
PROCESSED_DIR_BASE := $(DATA_DIR)/processed/
PROCESSED_DIR := $(PROCESSED_DIR_BASE)/$(LANGUAGE)
PROCESSED_FILE := $(PROCESSED_DIR)/test--bert.pickle.bz2
TRAIN_DIR := $(CHECKPOINT_DIR)/$(TASK)/$(LANGUAGE)
TRAIN_BERT := $(TRAIN_DIR)/bert/finished.txt
TRAIN_FAST := $(TRAIN_DIR)/fast/finished.txt
TRAIN_ONEHOT := $(TRAIN_DIR)/onehot/finished.txt
TRAIN_RANDOM := $(TRAIN_DIR)/random/finished.txt
all: get_ud process train
echo "Finished everything"
train: $(TRAIN_BERT) $(TRAIN_FAST) $(TRAIN_ONEHOT) $(TRAIN_RANDOM)
train_bert: $(TRAIN_BERT)
train_fast: $(TRAIN_FAST)
train_onehot: $(TRAIN_ONEHOT)
train_random: $(TRAIN_RANDOM)
process: $(PROCESSED_FILE)
get_ud: $(UD_DIR)
$(TRAIN_RANDOM):
echo "Train onehot model"
python -u src/h02_learn/random_search.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'random' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
# python src/h02_learn/train.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'random' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
$(TRAIN_ONEHOT):
echo "Train onehot model"
python -u src/h02_learn/random_search.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'onehot' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
# python src/h02_learn/train.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'onehot' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
$(TRAIN_FAST):
echo "Train fasttext model"
python -u src/h02_learn/random_search.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'fast' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
# python src/h02_learn/train.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'fast' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
$(TRAIN_BERT):
echo "Train bert model"
python -u src/h02_learn/random_search.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'bert' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
# python src/h02_learn/train.py --language $(LANGUAGE) --data-path $(PROCESSED_DIR_BASE) --rep 'bert' --checkpoint-path $(CHECKPOINT_DIR) --task $(TASK)
# Preprocess data
$(PROCESSED_FILE):
echo "Process language in ud " $(LANGUAGE)
python src/h01_data/process.py --language $(LANGUAGE) --ud-path $(UD_DIR) --output-path $(PROCESSED_DIR_BASE)
# Get Universal Dependencies data
$(UD_DIR):
echo "Get ud data"
mkdir -p $(UD_DIR_BASE)
wget -P $(UD_DIR_BASE) $(UDURL)
tar -xvzf $(UD_FILE) -C $(UD_DIR_BASE)