-
Notifications
You must be signed in to change notification settings - Fork 0
/
a3c_params.py
100 lines (82 loc) · 3.27 KB
/
a3c_params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import torch
from carbon_reader import max_carbon, min_carbon
import math
# Global parameters
IS_CENTRAL = True
model_type = 0
budget = 40
max_hv = 4150.7236328125
continuous = False
# Checkpoint file names
actor_checkpoint = 'actor_params.pth' if not continuous else 'actor_continuous.pth'
critic_checkpoint = 'critic_params.pth' if not continuous else 'critic_continuous.pth'
# State and action dimensions
state_len = 4
action_len = 10
def set_state(remaining_budget, carbon_trace, cur_hv, t_samples):
"""
Set the state for the A3C algorithm.
Args:
remaining_budget (float): Remaining time budget
carbon_trace (float): Next carbon trace
cur_hv (float): Current hypervolume
t_samples (int): Total number of samples
Returns:
torch.Tensor: State tensor
"""
relative_carbon_rate = (carbon_trace - min_carbon) / (max_carbon - min_carbon)
hv_rate = cur_hv / max_hv
remaining_budget_rate = remaining_budget / budget
state = torch.tensor([[remaining_budget_rate, relative_carbon_rate - 0.5, hv_rate, t_samples]])
return state.cuda() if torch.cuda.is_available() else state
def get_action(action, action_len=action_len):
"""
Convert the raw action to a rate for Vanilla NAS GPU allocation.
Args:
action (float): Raw action value
action_len (int): Action space dimension
Returns:
float: Normalized action value between 0 and 1
"""
return action / action_len
# Reward hyperparameters
alpha = 20 # Hypervolume improvement weight
beta = 2 / max_carbon # Carbon cost weight
theta = 0.05 # Unused in current implementation
gamma = 0.05 # Number of samples weight
def get_reward(prev_hv, cur_hv, carbon_cost, n_samples, remaining_budget, alpha=alpha, beta=beta, theta=theta):
"""
Part 1: Hypervolume improvement
Part 2: Carbon cost of this step
Part 3: difference between current HV and Max HV
Args:
prev_hv (float): Previous hypervolume
cur_hv (float): Current hypervolume
carbon_cost (float): Carbon cost of the current step
n_samples (int): Number of finished samples in this step
remaining_budget (float): Remaining time budget
alpha (float): Hypervolume improvement weight
beta (float): Carbon cost weight
theta (float): Unused in current implementation
Returns:
tuple: Reward tensor and a tensor of individual reward components
"""
cur_hv = cur_hv / max_hv
prev_hv = prev_hv / max_hv
hv_improvement = cur_hv - prev_hv
cur_step = budget - remaining_budget
step_ratio = (cur_step / budget) * 0.8 + 0.8
hv_reward = hv_improvement * alpha * step_ratio
carbon_reward = -carbon_cost * beta
samples_reward = n_samples * gamma
print(f'hv_improvement reward is {hv_reward}')
print(f'carbon_cost reward is {carbon_reward}')
print(f'n_samples reward is {samples_reward}')
reward = hv_reward + carbon_reward + samples_reward
print(f'total reward is {reward}')
reward_tensor = torch.tensor([reward])
components_tensor = torch.tensor([hv_improvement, -carbon_cost, cur_hv, n_samples, cur_step, budget])
if torch.cuda.is_available():
return reward_tensor.cuda(), components_tensor.cuda()
else:
return reward_tensor, components_tensor