-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
39 lines (34 loc) · 1.39 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
from tqdm import trange
def test(test_case, environment, compute_action):
n_episodes = 50
confidence_pass = 50
episode_rewards = []
episodes = trange(n_episodes, desc='Episode: ', leave=True)
for episode in episodes:
episodes.set_description(f"Episode {episode}")
done = False
state = environment.reset()
episode_reward = 0.
while not done:
action = compute_action(state)
next_state, reward, done, _ = environment.step(action)
episode_reward += reward
state = next_state
episode_rewards.append(episode_reward)
environment.close()
# Assumption: episode reward has Gaussian distribution
# Goal: estimate the mean value by taking the sample mean
# Problem: how close the sample mean is from the true mean value?
#
# Confidence level: 0.95
# Confidence interval: (sample_mean - confidence, sample_mean + confidence)
# Confidence: confidence = q_0.975 * std_reward / sqrt(n)
#
# See "Philosophy of Science and Research Methodology" course
avg_reward = np.mean(episode_rewards)
confidence = np.std(episode_rewards) * 1.96 / np.sqrt(n_episodes)
test_case.assertTrue(
expr=avg_reward - confidence >= confidence_pass,
msg=f"Avg reward ({avg_reward}) - Confidence ({confidence}) < Confidence pass ({confidence_pass})"
)