This repository has been archived by the owner on Nov 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
114 lines (97 loc) · 3.86 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import torch
import torch.nn as nn
from torch import distributions as pyd
import torch.nn.functional as F
import numpy as np
import math
import random
def get_goal_image(env, goal, width=84, height=84, solve_iters=100):
"""
Given a goal, set the simulator state to the goal.
This is used to render snapshots of the goal.
Goals are always xyz coordinates, either of gripper end effector or of object.
"""
if env.has_object:
object_qpos = env.sim.data.get_joint_qpos('object0:joint')
object_qpos[:3] = goal
env.sim.data.set_joint_qpos('object0:joint', object_qpos)
env.sim.data.set_mocap_pos('robot0:mocap', goal)
env.sim.forward()
for solve_iter in range(solve_iters):
env.sim.step()
obs_img = env.render(mode='rgb_array', width=self.width, height=self.height).T.copy()
assert obs_img.shape == env.observation_space['image_observation'].shape
return obs_img
def mlp(input_dimensions, output_dimensions, hidden_sizes=[400, 300], activation=nn.ReLU(), out_activation=None):
layer_sizes = [input_dimensions] + hidden_sizes + [output_dimensions]
modules = []
for i in range(len(layer_sizes)-1):
modules.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
modules.append(activation)
modules.pop() # remove last activation
if out_activation is not None:
modules.append(out_activation)
return nn.Sequential(*modules)
def set_seed_everywhere(seed):
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
def to_np(t):
if t is None:
return None
elif t.nelement() == 0:
return np.array([])
else:
return t.cpu().detach().numpy()
def soft_update_params(net, target_net, tau):
for param, target_param in zip(net.parameters(), target_net.parameters()):
target_param.data.copy_(tau * param.data +
(1 - tau) * target_param.data)
def weight_init(m):
"""Custom weight init for Conv2D and Linear layers."""
if isinstance(m, nn.Linear):
nn.init.orthogonal_(m.weight.data)
if hasattr(m.bias, 'data'):
m.bias.data.fill_(0.0)
elif isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
gain = nn.init.calculate_gain('relu')
nn.init.orthogonal_(m.weight.data, gain)
if hasattr(m.bias, 'data'):
m.bias.data.fill_(0.0)
class TanhTransform(pyd.transforms.Transform):
domain = pyd.constraints.real
codomain = pyd.constraints.interval(-1.0, 1.0)
bijective = True
sign = +1
def __init__(self, cache_size=1):
super().__init__(cache_size=cache_size)
@staticmethod
def atanh(x):
return 0.5 * (x.log1p() - (-x).log1p())
def __eq__(self, other):
return isinstance(other, TanhTransform)
def _call(self, x):
return x.tanh()
def _inverse(self, y):
# We do not clamp to the boundary here as it may degrade the performance of certain algorithms.
# one should use `cache_size=1` instead
return self.atanh(y)
def log_abs_det_jacobian(self, x, y):
# We use a formula that is more numerically stable, see details in the following link
# https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
return 2. * (math.log(2.) - x - F.softplus(-2. * x))
class SquashedNormal(pyd.transformed_distribution.TransformedDistribution):
def __init__(self, loc, scale):
self.loc = loc
self.scale = scale
self.base_dist = pyd.Normal(loc, scale)
transforms = [TanhTransform()]
super().__init__(self.base_dist, transforms)
@property
def mean(self):
mu = self.loc
for tr in self.transforms:
mu = tr(mu)
return mu