-
Notifications
You must be signed in to change notification settings - Fork 19
/
config.yaml
28 lines (25 loc) · 1.72 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
file_path: 'data/pretrain.csv' # pretrain data path
epochs: 30 # total number of epochs
batch_size: 100 # batch size
lr_rate: 0.00005 # learning rate
scheduler_type: 'linear' # scheduler type
weight_decay: 0.0 # weight decay for AdamW
warmup_ratio: 0.05 # warm-up ratio for scheduler
save_strategy: 'epoch' # save strategy of trainer
overwrite_output_dir: True # whether to overwrite output directory (i.e. True/False)
save_total_limit: 3 # save total limit of trainer
fp16: True # float precision 16 (i.e. True/False)
logging_strategy: 'epoch' # logging frequency
evaluation_strategy: 'epoch' # validation frequency
report_to: 'tensorboard' # integrations to report the results and logs to
dataloader_num_workers: 18 # Number of subprocesses to use for data loading
sharded_ddp: False # option of Sharded DDP training
save_path: 'ckpt/pretrain.pt' # logging and save path of the pretrained model
load_checkpoint: False
max_position_embeddings: 514 # max position embeddings of Transformer
blocksize: 175 # max length of sequences after tokenization
num_attention_heads: 12 # number of attention heads in each hidden layer
num_hidden_layers: 6 # number of hidden layers
hidden_dropout_prob: 0.1 # hidden layer dropout
attention_probs_dropout_prob: 0.1 # attention dropout
mlm_probability: 0.15 # masked probability in mlm