-
Notifications
You must be signed in to change notification settings - Fork 0
/
trick_utils.py
164 lines (160 loc) · 6.95 KB
/
trick_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
来自CCF小样本文本分类比赛某选手公布的baseline的trick
链接:https://discussion.datafountain.cn/articles/detail/2513
相应的操作请看:A100机器:/home/zhk/complte/CCF_fewshot_TextClassify/train.py
"""
import logging
import random
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
import torch
from transformers import AdamW, get_linear_schedule_with_warmup
import warnings
warnings.filterwarnings("ignore")
import torch.nn as nn
# 设置设备
def setup_device(args):
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.n_gpu = torch.cuda.device_count()
#设置随机种子
def setup_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
#设置日志
def setup_logging():
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.INFO)
logger = logging.getLogger(__name__)
return logger
# 设置分层学习率
def build_optimizer(args, model):
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
large_lr = ['']
optimizer_grouped_parameters = [
{'params': [j for i, j in model.named_parameters() if (not 'bert' in i and not any(nd in i for nd in no_decay))],
'lr': args.learning_rate, 'weight_decay': args.weight_decay},
{'params': [j for i, j in model.named_parameters() if (not 'bert' in i and any(nd in i for nd in no_decay))],
'lr': args.learning_rate, 'weight_decay': 0.0},
{'params': [j for i, j in model.named_parameters() if ('bert' in i and not any(nd in i for nd in no_decay))],
'lr': args.bert_learning_rate, 'weight_decay': args.weight_decay},
{'params': [j for i, j in model.named_parameters() if ('bert' in i and any(nd in i for nd in no_decay))],
'lr': args.bert_learning_rate, 'weight_decay': 0.0},
]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
num_training_steps=args.max_steps)
return optimizer, scheduler
def evaluate(predictions, labels):
# prediction and labels are all level class ids
temp_dict=dict()
no_ignores_2=list()
no_ignores_4=list()
for key in labels:
temp_dict[key] = temp_dict.get(key, 0) + 1
for i in range(36):
if i in temp_dict.keys():
if temp_dict[i]>2:
no_ignores_2.append(i)
if temp_dict[i]>4:
no_ignores_4.append(i)
f1_macro = f1_score(labels, predictions,average='macro')#
f1_micro = f1_score(labels, predictions,average='micro')#
f1_weight = f1_score(labels, predictions,average='weighted')#
f1_macro_2 = f1_score(labels, predictions,labels=no_ignores_2,average='macro')#
f1_macro_4 = f1_score(labels, predictions,labels=no_ignores_4,average='macro')#
eval_results = {'f1_macro':f1_macro,'f1_micro':f1_micro,'f1_weight':f1_weight,'f1_macro_2':f1_macro_2,'f1_macro_4':f1_macro_4}
return eval_results
# FGM对抗训练
class FGM:
def __init__(self, model: nn.Module, eps=1.):
self.model = (
model.module if hasattr(model, "module") else model
)
self.eps = eps
self.backup = {}
# only attack word embedding
def attack(self, emb_name='word_embeddings'):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
self.backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm and not torch.isnan(norm):
r_at = self.eps * param.grad / norm
param.data.add_(r_at)
def restore(self, emb_name='word_embeddings'):
for name, para in self.model.named_parameters():
if para.requires_grad and emb_name in name:
assert name in self.backup
para.data = self.backup[name]
self.backup = {}
# PGD基于梯度的攻击
class PGD:
def __init__(self, model, eps=1., alpha=0.3):
self.model = (
model.module if hasattr(model, "module") else model
)
self.eps = eps
self.alpha = alpha
self.emb_backup = {}
self.grad_backup = {}
def attack(self, emb_name='embeddings', is_first_attack=False):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
if is_first_attack:
self.emb_backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm != 0 and not torch.isnan(norm):
r_at = self.alpha * param.grad / norm
param.data.add_(r_at)
param.data = self.project(name, param.data)
def restore(self, emb_name='embeddings'):
for name, param in self.model.named_parameters():
if param.requires_grad and emb_name in name:
assert name in self.emb_backup
param.data = self.emb_backup[name]
self.emb_backup = {}
def project(self, param_name, param_data):
r = param_data - self.emb_backup[param_name]
if torch.norm(r) > self.eps:
r = self.eps * r / torch.norm(r)
return self.emb_backup[param_name] + r
def backup_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
self.grad_backup[name] = param.grad.clone()
def restore_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
param.grad = self.grad_backup[name]
#指数平滑
class EMA():
def __init__(self, model, decay):
self.model = model
self.decay = decay
self.shadow = {}
self.backup = {}
def register(self):
for name, param in self.model.named_parameters():
if param.requires_grad:
self.shadow[name] = param.data.clone()
def update(self):
for name, param in self.model.named_parameters():
if param.requires_grad:
assert name in self.shadow
new_average = (1.0 - self.decay) * param.data + self.decay * self.shadow[name]
self.shadow[name] = new_average.clone()
def apply_shadow(self):
for name, param in self.model.named_parameters():
if param.requires_grad:
assert name in self.shadow
self.backup[name] = param.data
param.data = self.shadow[name]
def restore(self):
for name, param in self.model.named_parameters():
if param.requires_grad:
assert name in self.backup
param.data = self.backup[name]
self.backup = {}