CUDA_VISIBLE_DEVICES='1' nohup python run_finetune.py --do_train --do_eval --fp16 --evaluate_during_training --overwrite_cache --task_name sst2 --model_type bert --model_name_or_path bert-base-uncased --max_seq_length 128 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 5e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir=../data/gluedata/SST2 --output_dir PM_91 > log.bert-base-uncased.SST2.128.128.5e-5 &
CUDA_VISIBLE_DEVICES='0' nohup python run_finetune.py --do_train --do_eval --fp16 --evaluate_during_training --overwrite_cache --task_name eem --model_type bert --model_name_or_path bert-base-uncased --max_seq_length 32 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 1e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir=../data/EEM/raw --output_dir PM_931 > log.bert-base-uncased.EEM.32.128.1e-5 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 10 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --task_name openentity --fp16 True --add_knowledge False --update_K_module False --data_dir ../data/knowledge/OpenEntity --output_dir PM_13 > logs/log.KFormers.roberta-large.distilbert.OpenEntity.32.1e-5.256.noKnowledge &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 32 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 256 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps 1000 --warmup_steps -1 --gradient_accumulation_steps 1 --task_name figer --fp16 True --add_knowledge False --data_dir ../data/knowledge/FIGER --output_dir AM_20 > logs/log.KFormers.roberta-large.distilbert.FIGER.32.256.1e-5.noKnowledge &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 10 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --task_name fewrel --fp16 True --add_knowledge False --update_K_module False --data_dir ../data/knowledge/fewrel --output_dir FR_14 > logs/log.KFormers.roberta-large.distilbert.FewRel.32.1e-5.256.noKnowledge.10 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --logging_steps 500 --task_name tacred --fp16 True --add_knowledge False --update_K_module False --data_dir ../data/knowledge/tacred --output_dir TR_13 > logs/log.KFormers.roberta-large.distilbert.TACRED.32.1e-5.256.noKnowledge.3 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 64 --valid_batch_size 512 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --task_name sst2 --fp16 True --add_knowledge False --update_K_module False --data_dir ../data/gluedata/SST2/SST2_tagme --output_dir AM_146 > log.KFormers.roberta-large.distilbert.SST2.64.5e-5.128.noKnowledge &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 32 --knowledge_seq_length 32 --max_num_entity 1 --train_batch_size 128 --valid_batch_size 512 --max_steps -1 --epochs 3 --learning_rate 5e-5 --eval_steps 1000 --task_name eem --fp16 True --add_knowledge False --update_K_module False --data_dir ../data/EEM/EntityPane_hh_offset --output_dir AM_17 > noKnowledge
CUDA_VISIBLE_DEVICES='1' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name sst2 --model_type bert --model_name_or_path bert-base-uncased --config_name bert-base-uncased --origin_seq_length 128 --max_seq_length 128 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 2e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/gluedata/SST2/SST2_offset --output_dir AM_15 > log.kt-attn-bert.sst2.2e-5 &
CUDA_VISIBLE_DEVICES='0' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name eem --model_type bert --model_name_or_path bert-base-uncased --origin_seq_length 32 --max_seq_length 32 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 5e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/EEM/EntityPane_hh_offset --output_dir PM_640 > log.kt-attn-bert.eem.5e-5.32.32 &
CUDA_VISIBLE_DEVICES='1' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name sst2 --model_type roberta --model_name_or_path roberta-base --origin_seq_length 128 --max_seq_length 128 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 1e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/gluedata/SST2/SST2_offset --output_dir AM_15 > log.kt-attn-roberta-base.sst2.1e-5 &
CUDA_VISIBLE_DEVICES='0' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name eem --model_type roberta --model_name_or_path roberta-base --origin_seq_length 32 --max_seq_length 32 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 5e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/EEM/EntityPane_hh_offset --output_dir PM_640 > log.kt-attn-roberta-base.eem.5e-5.32.32 &
CUDA_VISIBLE_DEVICES='1' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name sst2 --model_type roberta --model_name_or_path roberta-large --origin_seq_length 128 --max_seq_length 128 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 1e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/gluedata/SST2/SST2_offset --output_dir AM_15 > log.kt-attn-roberta-large.sst2.1e-5 &
CUDA_VISIBLE_DEVICES='0' nohup python run_finetune_KT-attn.py --do_train --do_eval --fp16 --evaluate_during_training --task_name eem --model_type roberta --model_name_or_path roberta-large --origin_seq_length 32 --max_seq_length 32 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 5e-5 --num_train_epochs 3 --max_steps -1 --logging_steps 500 --data_dir ../data/EEM/EntityPane_hh_offset --output_dir PM_640 > log.kt-attn-roberta-large.eem.5e-5.32.32 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_ent_num 1 --train_batch_size 4 --valid_batch_size 128 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --warmup_steps 0.06 --task_name openentity --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/OpenEntity --output_dir PM_13 > logs/log.KFormers.roberta-large.distilbert.OpenEntity.4.1e-5.256.64 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 256 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps 500 --warmup_steps -1 --gradient_accumulation_steps 1 --task_name figer --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/FIGER --output_dir AM_20 > logs/log.KFormers.roberta-large.distilbert.FIGER.32.256.64.1e-5.KFormers &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --logging_steps 500 --task_name tacred --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/tacred --output_dir TR_14 > logs/log.KFormers.roberta-large.distilbert.TACRED.32.1e-5.256.KFormers.3 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 10 --learning_rate 3e-5 --eval_steps -1 --warmup_steps -1 --task_name fewrel --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/fewrel --output_dir FR_14 > logs/log.KFormers.roberta-large.distilbert.FewRel.32.3e-5.256.KFormers.10 &
2 GPU: nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 64 --valid_batch_size 512 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --task_name sst2 --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/gluedata/SST2 --output_dir AM_146 > log.KFormers.roberta-large.distilbert.SST2.64.1e-5.128.KFormers &
% nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 128 --valid_batch_size 512 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --task_name eem --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/EEM/EEM_EntityPane_json --output_dir AM_146 > log.KFormers.roberta-large.distilbert.EEM.64.1e-5.128.KFormers &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --post_trained_checkpoint ../phase2_pretrain_KFormers/output/checkpoint-30000/ --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 10 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --task_name openentity --fp16 True --add_knowledge True --update_K_module False --update_K_module False --data_dir ../data/knowledge/OpenEntity --output_dir PM_13 > logs/log.KFormers.post-3W.distilbert.OpenEntity.32.1e-5.256.64.KFormers &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --post_trained_checkpoint ../phase2_pretrain_KFormers/output/checkpoint-30000/ --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 256 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps 500 --warmup_steps -1 --gradient_accumulation_steps 1 --task_name figer --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/FIGER --output_dir AM_20 > logs/log.KFormers.post-3W.distilbert.FIGER.32.256.64.1e-5.KFormers &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --post_trained_checkpoint ../phase2_pretrain_KFormers/output/checkpoint-30000/ --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --warmup_steps -1 --logging_steps 500 --task_name tacred --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/tacred --output_dir TR_14 > logs/log.KFormers.post-3W.distilbert.TACRED.32.1e-5.256.KFormers.3 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --post_trained_checkpoint ../phase2_pretrain_KFormers/output/checkpoint-30000/ --backbone_seq_length 256 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 32 --valid_batch_size 128 --max_steps -1 --epochs 10 --learning_rate 3e-5 --eval_steps -1 --warmup_steps -1 --task_name fewrel --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/knowledge/fewrel --output_dir FR_14 > logs/log.KFormers.post-3W.distilbert.FewRel.32.3e-5.256.KFormers.10 &
nohup python run_KFormers.py --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --post_trained_checkpoint ../phase2_pretrain_KFormers/output/checkpoint-30000/ --backbone_seq_length 128 --knowledge_seq_length 64 --max_num_entity 1 --train_batch_size 128 --valid_batch_size 512 --max_steps -1 --epochs 3 --learning_rate 1e-5 --eval_steps -1 --task_name eem --fp16 True --add_knowledge True --update_K_module False --data_dir ../data/EEM/EEM_EntityPane_json --output_dir AM_146 > log.KFormers.roberta-large.distilbert.EEM.64.1e-5.128.KFormers &
nohup python run_finetune_qualityControl_adapter_roberta.py --model_type roberta --model_name_or_path roberta-large --do_train --do_eval --fp16 --task_name eem --output_dir ./output --max_seq_length 32 --per_gpu_eval_batch_size 512 --per_gpu_train_batch_size 128 --learning_rate 2e-5 --gradient_accumulation_steps 1 --max_steps -1 --num_train_epochs 3 --overwrite_output_dir --overwrite_cache --warmup_steps 317 --save_steps -1 --logging_steps 500 --freeze_adapter "True" --adapter_size 768 --adapter_list "0,11,22" --fusion_mode "add" --adapter_skip_layers 0 --meta_fac_adaptermodel ../checkpoints/fac-adapter/pytorch_model.bin --data_dir ../data/EEM/raw > logs/kadapter.roberta-large.EEM.2e-5 &
CUDA_VISIBLE_DEVICES='0, 1' nohup python -m torch.distributed.launch --nproc_per_node=2 run_finetune_qualityControl_adapter_roberta.py --model_type roberta --model_name_or_path roberta-large --do_train --do_eval --fp16 --task_name sst2 --output_dir ./PM359 --max_seq_length 128 --per_gpu_eval_batch_size 256 --per_gpu_train_batch_size 64 --learning_rate 2e-5 --gradient_accumulation_steps 1 --max_steps -1 --num_train_epochs 3 --overwrite_output_dir --overwrite_cache --warmup_steps 47 --save_steps -1 --logging_steps 500 --freeze_adapter "True" --adapter_size 768 --adapter_list "0,11,22" --fusion_mode "add" --adapter_skip_layers 0 --meta_fac_adaptermodel ../checkpoints/fac-adapter/pytorch_model.bin --data_dir ../data/gluedata/SST2 > logs/kadapter.roberta-large.SST2.2e-5 &
python run_luke.py --data_dir ../data/knowledge/OpenEntity --output_dir pm_20 --do_train --baseline_model_name roberta-large --checkpoint_file ../checkpoints/luke_large_500k --train_batch_size 4 --eval_batch_size 64 --gradient_accumulation_steps 1 --learning_rate 1e-5 --num_train_epochs 3 --fp16 True --seed 12
python run_finetune.py --do_train --fp16 --task_name openentity --model_type roberta --model_name_or_path roberta-large --max_seq_length 256 --per_gpu_train_batch_size 4 --per_gpu_eval_batch_size 128 --learning_rate 1e-5 --seed 42 --warmup_steps -1 --num_train_epochs 3 --max_steps -1 --logging_steps 50 --evaluate_steps 50 --data_dir ../data/knowledge/OpenEntity --output_dir PM_33 > logs/openentity.lr=$lr.seed=42
nohup python run_finetune.py --do_train --fp16 --task_name figer --model_type roberta --model_name_or_path roberta-large --max_seq_length 128 --per_gpu_train_batch_size 64 --per_gpu_eval_batch_size 256 --learning_rate 2e-5 --seed 42 --warmup_steps -1 --num_train_epochs 2 --gradient_accumulation_steps 8 --max_steps -1 --logging_steps 50 --evaluate_steps 50 --data_dir ../data/knowledge/FIGER --output_dir PM_46 > figer.lr=2e-5.seed=42.bsz=6448 &
nohup python run_finetune.py --do_train --fp16 --task_name fewrel --model_type roberta --model_name_or_path roberta-large --max_seq_length 256 --per_gpu_train_batch_size 32 --per_gpu_eval_batch_size 128 --learning_rate 1e-5 --seed 42 --warmup_steps -1 --num_train_epochs 10 --max_steps -1 --logging_steps 50 --evaluate_steps 50 --data_dir ../data/knowledge/fewrel --output_dir AM_28
nohup python run_finetune.py --do_train --fp16 --task_name tacred --model_type roberta --model_name_or_path roberta-large --max_seq_length 256 --per_gpu_train_batch_size 32 --per_gpu_eval_batch_size 128 --learning_rate 1e-5 --seed 42 --warmup_steps -1 --num_train_epochs 5 --max_steps -1 --logging_steps 50 --evaluate_steps 500 --data_dir ../data/knowledge/tacred --output_dir TR_13
nohup python run_finetune.py --do_train --fp16 --task_name sst2 --model_type roberta --model_name_or_path roberta-large --max_seq_length 128 --per_gpu_train_batch_size 128 --per_gpu_eval_batch_size 512 --learning_rate 5e-5 --seed 42 --warmup_steps -1 --num_train_epochs 3 --max_steps -1 --logging_steps 50 --evaluate_steps 50 --data_dir=../data/gluedata/SST2 --output_dir AM_37
python run_finetune_openentity_adapter.py --model_type roberta --model_name_or_path roberta-large --model_name=roberta-large --config_name roberta-large --do_train --do_eval --task_name openentity --data_dir=../data/OpenEntity --output_dir=./kadapter_openentity --comment 'combine-adapter-trf' --max_seq_length 256 --per_gpu_eval_batch_size 128 --per_gpu_train_batch_size 4 --learning_rate=1e-5 --gradient_accumulation_steps 1 --num_train_epochs 3 --max_steps -1 --logging_steps 100 --warmup_steps 0.1 --save_steps -1 --freeze_bert="" --freeze_adapter="True" --adapter_size 768 --adapter_list "0,11,22" --adapter_skip_layers 0 --meta_fac_adaptermodel="../checkpoints/fac-adapter/pytorch_model.bin" --meta_lin_adaptermodel="../checkpoints/lin-adapter/pytorch_model.bin" --seed 42
nohup python run_finetune_figer_adapter.py --do_train --do_eval --model_type roberta --model_name_or_path roberta-large --config_name roberta-large --task_name figer --data_dir ../data/FIGER --output_dir ./kadapter_figer --comment 'figer-adapter' --max_seq_length 128 --per_gpu_eval_batch_size 128 --per_gpu_train_batch_size 128 --learning_rate 2e-5 --gradient_accumulation_steps 8 --max_steps -1 --logging_steps 50 --num_train_epochs 2 --model_name roberta-large --warmup_steps 0.1 --save_steps -1 --freeze_bert="" --freeze_adapter="True" --adapter_size 768 --adapter_list "0,11,22" --adapter_skip_layers 0 --task_adapter '' --meta_fac_adaptermodel="../checkpoints/fac-adapter/pytorch_model.bin" --meta_lin_adaptermodel="../checkpoints/lin-adapter/pytorch_model.bin" --seed 42 > kadapter.figer.bsz=12828 &
python run_finetune_TACRED_adapter.py --do_train --do_eval --model_type roberta --model_name_or_path roberta-large --config_name roberta-large --model_name roberta-large --task_name fewrel --data_dir=../data/fewrel --output_dir=./kadapter_fewrel --comment 'combine-adapter-dif-trf' --max_seq_length 256 --per_gpu_train_batch_size 16 --per_gpu_eval_batch_size 128 --learning_rate 2e-5 --gradient_accumulation_steps 2 --logging_steps 50 --max_steps -1 --num_train_epochs 10 --warmup_steps 0.1 --negative_sample 45000 --save_steps -1 --freeze_bert="" --freeze_adapter="True" --adapter_size 768 --adapter_list "0,11,22" --adapter_skip_layers 0 --meta_fac_adaptermodel="../checkpoints/fac-adapter/pytorch_model.bin" --meta_lin_adaptermodel="../checkpoints/lin-adapter/pytorch_model.bin" --seed 42
python run_finetune_TACRED_adapter.py --do_train --do_eval --model_type roberta --model_name_or_path roberta-large --config_name roberta-large --model_name roberta-large --task_name tacred --data_dir=../data/tacred --output_dir=./kadapter_tacred --comment 'combine-adapter-dif-trf' --max_seq_length 256 --per_gpu_train_batch_size 16 --per_gpu_eval_batch_size 128 --learning_rate 2e-5 --gradient_accumulation_steps 2 --logging_steps 500 --max_steps -1 --num_train_epochs 5 --warmup_steps 0.1 --negative_sample 45000 --save_steps -1 --freeze_bert="" --freeze_adapter="True" --adapter_size 768 --adapter_list "0,11,22" --adapter_skip_layers 0 --meta_fac_adaptermodel="../checkpoints/fac-adapter/pytorch_model.bin" --meta_lin_adaptermodel="../checkpoints/lin-adapter/pytorch_model.bin" --seed 42
python run_KFormers.py --task_name openentity --fp16 True --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 4 --max_des_num 1 --train_batch_size 4 --gradient_accumulation_steps 1 --valid_batch_size 256 --max_steps -1 --update_K_module False --data_dir ../data/knowledge/OpenEntity --output_dir PM_13 --epochs 3 --eval_steps 50 --learning_rate 1e-5 --warmup_steps -1 --alpha 1.0 --beta 0.01 --seed 42
nohup python -m torch.distributed.launch --nproc_per_node=4 run_KFormers.py --task_name figer --fp16 True --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 2 --max_des_num 1 --train_batch_size 64 --gradient_accumulation_steps 8 --valid_batch_size 256 --max_steps -1 --update_K_module False --data_dir ../data/knowledge/FIGER --output_dir FIGER_output --epochs 2 --eval_steps 5 --save_steps -1 --warmup_steps -1 --learning_rate 2e-5 --seed 42 > task-a.figer.64216 &
nohup python -m torch.distributed.launch --nproc_per_node=4 run_KFormers.py --task_name figer --fp16 True --model_type KFormers --backbone_model_type luke --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 2 --max_des_num 1 --train_batch_size 64 --gradient_accumulation_steps 8 --valid_batch_size 256 --max_steps -1 --update_K_module False --data_dir ../data/knowledge/FIGER --output_dir FIGER_output --epochs 2 --eval_steps 5 --save_steps -1 --warmup_steps -1 --learning_rate 2e-5 --seed 42 > task-a.figer.64216 &
python run_KFormers.py --task_name fewrel --fp16 True --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 4 --max_des_num 1 --train_batch_size 32 --valid_batch_size 256 --gradient_accumulation_steps 1 --max_steps -1 --update_K_module False --data_dir ../data/knowledge/fewrel --output_dir FR_14 --epochs 10 --eval_steps 50 --warmup_steps -1 --learning_rate 1e-5 --seed 42
python run_KFormers.py --task_name tacred --fp16 True --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 256 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 4 --max_des_num 1 --train_batch_size 32 --valid_batch_size 256 --gradient_accumulation_steps 1 --max_steps -1 --update_K_module False --data_dir ../data/knowledge/tacred --output_dir FR_15 --epochs 5 --learning_rate 1e-5 --eval_steps 500 --warmup_steps -1 --seed 42
python run_KFormers.py --task_name sst2 --fp16 True --model_type KFormers --backbone_model_type roberta --knowledge_model_type distilbert --backbone_model_name_or_path roberta-large --knowledge_model_name_or_path distilbert-base-uncased --backbone_seq_length 128 --knowledge_seq_length 32 --qid_file ../data/knowledge/pretrain/wikipedia_description/wikidata5m_des.wikipedia --max_ent_num 4 --max_des_num 1 --train_batch_size 64 --valid_batch_size 256 --gradient_accumulation_steps 1 --max_steps -1 --update_K_module False --data_dir ../data/gluedata/SST2/SST2_tagme --output_dir S_15 --epochs 3 --learning_rate 1e-5 --eval_steps 50 --warmup_steps -1 --seed 42
python -m cli --model-file=../checkpoints/luke_large_500k.tar.gz --output-dir=./output entity-typing run --data-dir ../data/OpenEntity --train-batch-size 4 --gradient-accumulation-steps 1 --learning-rate $lr --num-train-epochs 3 --fp16 --seed $seed > logs_openentity/len=256.lr=$lr.seed=$seed