-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlr_sched.py
83 lines (66 loc) · 2.81 KB
/
lr_sched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import math
import copy
def adjust_learning_rate(args, optimizer, glo_epoch):
"""Decay the learning rate with half-cycle cosine after warmup"""
lr = args.lr
# if glo_epoch < args.warmup_epochs:
# _lr = lr * glo_epoch / args.warmup_epochs
# else:
# _lr = args.min_lr + (lr - args.min_lr) * 0.5 * \
# (1. + math.cos(math.pi * (glo_epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
_lr = args.min_lr + (lr - args.min_lr) * 0.5 * (1. + math.cos(math.pi * glo_epoch / args.epochs))
for param_group in optimizer.param_groups:
if "lr_decay" in param_group:
param_group["lr"] = _lr * param_group["lr_decay"]
else:
param_group["lr"] = _lr
def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12):
"""
Calculate lr decay rate for different ViT blocks.
Args:
name (string): parameter name.
lr_decay_rate (float): base lr decay rate.
num_layers (int): number of ViT blocks.
Returns:
lr decay rate for the given parameter.
"""
layer_id = num_layers + 1
if name.startswith("backbone"):
if ".pos_embed" in name or ".patch_embed" in name: # absence of number
layer_id = 0
elif ".blocks." in name and ".residual." not in name:
layer_id = int(name[name.find(".blocks."):].split(".")[2]) + 1
return lr_decay_rate ** (num_layers + 1 - layer_id)
def build_param_group(args, model):
base_lr = args.lr
weight_decay = args.weight_decay
params = []
memo = set()
defaults = {}
if base_lr is not None:
defaults["lr"] = base_lr
if weight_decay is not None:
defaults["weight_decay"] = weight_decay
overrides = {'pos_embed': {'weight_decay': 0.0}}
for module_name, module in model.named_modules():
for module_param_name, value in module.named_parameters(recurse=False):
if not value.requires_grad:
continue
# Avoid duplicating parameters
if value in memo:
continue
memo.add(value)
hyperparams = copy.copy(defaults)
hyperparams["lr_decay"] = get_vit_lr_decay_rate(f"{module_name}.{module_param_name}", num_layers=12,
lr_decay_rate=0.7)
hyperparams["lr"] *= hyperparams["lr_decay"]
hyperparams.update(overrides.get(module_param_name, {}))
params.append({"params": [value], **hyperparams})
return params
if __name__ == '__main__':
lr = 1e-4
min_lr = 1e-5
epochs = 100
for glo_epoch in range(101):
_lr = min_lr + (lr - min_lr) * 0.5 * (1. + math.cos(math.pi * glo_epoch / epochs))
print(_lr)