-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig-default.yaml
223 lines (166 loc) · 7.91 KB
/
config-default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# Default configuration options for all jobs.
# Any specified configuration options in job-specific config
# files will overwrite the options in this file.
# Options specific to this job.
job:
# The device to run the job on. Should be one of {cuda,cpu}
device: cuda
# Options specific to the dataset for fine-tuning
dataset:
path: data/conceptnet/full
# Pretokenize the KB triples and save to dataset.triples_tokenized_path
pretokenize_triples: False
# Pretokenize the KB phrases and save to dataset.phrases_tokenized_path
pretokenize_phrases: False
# Subdirectory of tokenized triples/IDs/masks, under main data directory
triples_tokenized_path: triples_tokenized/
# Subdirectory of tokenized phrases/IDs/masks, under main data directory
phrases_tokenized_path: phrases_tokenized/
# Options for fine-tuning and testing a transformers language model.
fine_tune:
model:
# We experimented with:
# - bert-base-uncased
# - roberta-base
# - sentence-transformers/bert-base-nli-cls-token
# - sentence-transformers/bert-base-nli-mean-tokens
# - sentence-transformers/bert-base-nli-max-tokens
# - sentence-transformers/roberta-base-nli-stsb-mean-tokens
# See https://huggingface.co/transformers/v3.0.2/pretrained_models.html and
# https://huggingface.co/sentence-transformers for names
pretrained_name: sentence-transformers/bert-base-nli-mean-tokens
# The pooling strategy for representing triples.
# Choices: {cls,mean,max}
pool_strategy: mean
# The maximum length of sequences (triples) to be fed to the model.
max_seq_len: 32
# Options for the training part of fine-tuning
train:
# The number of fine-tuning epochs.
n_epochs: 3
# The size of batches for fine-tuning.
batch_size: 16
# The learning rate. NOTE: Adam is the only optimizer currently supported.
lr: !!float 2e-5
# The value of epsilon for Adam.
eps: !!float 1e-8
# The number of warmup steps for the scheduler.
warmup_steps: 10000
# Set to True to save a checkpoint from each epoch of training
# with the name checkpoint_{000x}.pt.
save_all_epochs: False
# The checkpoint name for the best-performing model at validation time,
# e.g., checkpoint_{fine_tune.train.best_checkpoint}.pt
best_checkpoint: best
# Options specific to how negative samples are selected during fine-tuning.
negative_sampling:
# Each entry tells the sampler how many times per positive we should corrupt
# the head/relation/tail slot.
nsamp:
head: 1
relation: 0
tail: 0
# The negative sampler type can be one of:
# {uniform,slots,simple-neg,antonyms,sans,precomputed}
# - uniform corrupts the head/relation/tail slots by sampling uniformly at random.
# - slots corrupts the head/tail slots by sampling from phrases that appear for the
# given relation in the KB.
# - simple-neg prepends a "not" token to the head/tail phrases.
# - antonyms replaces the first verb (for verb phrases), noun (for noun phrases),
# or adjective (for adjective phrases) in the sequence with a randomly selected
# antonym, as provided by a precomputed antonym dictionary
# - sans uses the pre-computed k-hop matrices from the SANS method, detailed in
# https://www.aclweb.org/anthology/2020.emnlp-main.492.pdf.
# - precomputed loads negatives from a file and uses them in conjunction with a
# base sampler if there aren't enough precomputed negatives per epoch
sampler: uniform
# Files needed for antonym-based negative sampling
antonyms:
# Maps tokens to their antonyms.
# We provide a dictionary constructed from WordNet and the lexical contrast dataset from
# https://www.ims.uni-stuttgart.de/en/research/resources/experiment-data/lexical-contrast-dataset/
mapping_file: configs/conceptnet/true-neg/antonyms/antonyms_gold.json
# Maps each phrase in the KB to its tokenization.
# Our dictionary uses the tokenizations provided by SpaCy's part-of-speech tagger.
token_file: configs/conceptnet/true-neg/antonyms/tokenized_phrases.json
# Maps each phrase in the KB to an integer index indicating the first token
# (in the tokenized version of the phrase) to be replaced by its antonym.
# If no such token exists, the tag index is -1.
# Our provided tag dictionary uses the SpaCy tokenizations from above,
# and tags the first verb in the phrase (for verb phrases),
# the first noun in the phrase (for noun phrases), and the first
# adjective in the phrase (for adjective phrases).
tag_file: configs/conceptnet/true-neg/antonyms/tags.json
# Files needed for SANS sampling
sans:
# The kmat file for sans can be generated with the code provided by the authors:
# https://github.com/kahrabian/SANS.
# Here we provide a k-hop matrix for k=2 on our ConceptNet dataset.
kmat_file: configs/conceptnet/true-neg/sans/matrix_ConceptNet_k2_nrw0.npz
# Files needed for sampling from a set of precomputed negatives (e.g., NegatER, self-adversarial)
precomputed:
# The TSV file of negative samples
negative_file: configs/conceptnet/true-neg/negater-thresholds/negatives.tsv
# Options for the evaluation stage of fine-tuning
eval:
# Batch size for scoring
batch_size: 100
# If True, set a separate decision threshold per relation during validation that
# maximizes validation accuracy.
# If False, set a global decision threshold during validation that maximizes
# validation accuracy.
threshold_per_relation: True
# The name of the checkpoint that we want to test on, i.e.,
# checkpoint_{eval.test_checkpoint}.pt
test_checkpoint: best
# If True, save the true and predicted labels of each test instance to a TSV file.
save_predictions: true
# Options specific to the NegatER negative generation framework.
negater:
# Batch size for training and computing embeddings
batch_size: 16
# Options for the k-nearest-neighbors index built in the first step of NegatER
index:
# Should we build a new nearest-neighbor index?
build: True
# The "k" in k-NN
k: 10
# Should we save the index?
save: True
# The type of model to generate sequence embeddings
model:
# SentenceBert model names here: https://huggingface.co/sentence-transformers
pretrained_name: sentence-transformers/bert-base-nli-mean-tokens
# The pooling strategy for representing sequences
# Choices: {cls,mean,max}
# Note that if SentenceBert is used, the pooling strategy should match
# whatever the model was trained to optimize
pool_strategy: mean
# The maximum length of sequences (phrases from the KB) to be fed to the model.
max_seq_len: 16
# The fine-tuned model that will be mined for negative knowledge in the second step of NegatER
fine_tuned_model:
# The checkpoint to load the model from
filename: configs/conceptnet/full/classify/checkpoint_bert_best.pt
# Specifications of the model
pretrained_name: bert-base-uncased
# The pooling strategy for representing negative candidates
# Choices: {cls,mean,max}
pool_strategy: cls
# The maximum length of sequences (negative candidates) to be fed to the model.
max_seq_len: 32
# Options for the "proxy" approach for the NegatER-$\nabla$ ranking method
# learns to predict the magnitude of the gradients.
proxy:
# The hidden dimension of the network (two layers)
d2: 100
# The dropout probability
dropout: !!float 0.1
init:
# The number of features (true gradient magnitudes) to train on
steps: 20000
train:
# The number of proxy training epochs
epochs: 100
# The learning rate. Note that only the Adam optimizer is currently supported
lr: !!float 1e-5