Skip to content

Commit

Permalink
Merge branch 'main' of github.com:lasgroup/simulation_transfer
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasrothfuss committed Jan 4, 2024
2 parents 781d697 + 65603eb commit fc71297
Show file tree
Hide file tree
Showing 11 changed files with 350 additions and 160 deletions.
19 changes: 13 additions & 6 deletions experiments/data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,10 @@ def get_rccar_recorded_data_new(encode_angle: bool = True, skip_first_n_points:
num_train_traj = 8
recordings_dir = [os.path.join(DATA_DIR, 'recordings_rc_car_v1')]
elif car_id == 2:
num_train_traj = 10
num_train_traj = 12
recordings_dir = [os.path.join(DATA_DIR, 'recordings_rc_car_v2'),
os.path.join(DATA_DIR, 'recordings_rc_car_v3')]
os.path.join(DATA_DIR, 'recordings_rc_car_v3'),
os.path.join(DATA_DIR, 'recordings_rc_car_v4')]
else:
raise ValueError(f"Unknown car id {car_id}")
files = [sorted(glob.glob(rd + '/*.pickle')) for rd in recordings_dir]
Expand All @@ -210,14 +211,20 @@ def get_rccar_recorded_data_new(encode_angle: bool = True, skip_first_n_points:

# load and shuffle transitions
transitions = _load_transitions(file_names)
indices = jax.random.permutation(key=jax.random.PRNGKey(9345), x=jnp.arange(0, len(transitions)))
transitions = [transitions[idx] for idx in indices]
# indices = jax.random.permutation(key=jax.random.PRNGKey(9345), x=jnp.arange(0, len(transitions)))
# transitions = [transitions[idx] for idx in indices]

# transform transitions into supervised learning datasets
prep_fn = partial(_rccar_transitions_to_dataset, encode_angles=encode_angle, skip_first_n=skip_first_n_points,
action_delay=action_delay, action_stacking=action_stacking)
x_train, y_train = map(lambda x: jnp.concatenate(x, axis=0), zip(*map(prep_fn, transitions[:num_train_traj])))
x_test, y_test = map(lambda x: jnp.concatenate(x, axis=0), zip(*map(prep_fn, transitions[num_train_traj:])))
x, y = map(lambda x: jnp.concatenate(x, axis=0), zip(*map(prep_fn, transitions)))
# x_test, y_test = map(lambda x: jnp.concatenate(x, axis=0), zip(*map(prep_fn, transitions[num_train_traj:])))
indices = jnp.arange(start=0, stop=x.shape[0], step=1)
indices = jax.random.shuffle(key=jax.random.PRNGKey(9345), x=indices)
x, y = x[indices], y[indices]
num_test_points = 20_000
x_train, y_train, x_test, y_test = x[:-num_test_points], y[:-num_test_points], \
x[-num_test_points:], y[-num_test_points:]
return x_train, y_train, x_test, y_test


Expand Down
2 changes: 1 addition & 1 deletion experiments/offline_rl_from_recorded_data/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def experiment(horizon_len: int,
if high_fidelity:
outputscales_racecar = [0.008, 0.008, 0.009, 0.009, 0.05, 0.05, 0.20]
else:
outputscales_racecar = [0.008, 0.008, 0.01, 0.01, 0.08, 0.08, 0.5]
outputscales_racecar = [0.008, 0.008, 0.01, 0.01, 0.1, 0.1, 0.5]
sim = AdditiveSim(base_sims=[sim,
GaussianProcessSim(sim.input_size, sim.output_size,
output_scale=outputscales_racecar,
Expand Down
54 changes: 39 additions & 15 deletions experiments/offline_rl_from_recorded_data/launcher.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,103 @@
import exp
from experiments.util import generate_run_commands, generate_base_command, dict_permutations

PROJECT_NAME = 'OfflineRLRunsSimConsecutive'
PROJECT_NAME = 'OfflineRLRunsGreyHW'

_applicable_configs = {
'horizon_len': [200],
'seed': list(range(5)),
'project_name': [PROJECT_NAME],
'sac_num_env_steps': [2_000_000],
'num_epochs': [50],
'max_train_steps': [100_000],
'min_train_steps': [40_000], # for HW 30_000 worked the best.
'max_train_steps': [40_000],
'min_train_steps': [40_000],
'learnable_likelihood_std': ['yes'],
'include_aleatoric_noise': [1],
'best_bnn_model': [1],
'best_policy': [1],
'margin_factor': [20.0],
'ctrl_cost_weight': [0.005],
'ctrl_diff_weight': [0.0],
'num_offline_collected_transitions': [20, 50, 100, 200, 400, 800, 1600, 2000, 2500, 10_000, 20_000],
'num_offline_collected_transitions': [20, 50, 100, 200, 400, 800, 1600, 2000, 2500, 5_000, 10_000, 20_000],
'test_data_ratio': [0.0],
'eval_on_all_offline_data': [1],
'eval_only_on_init_states': [1],
'share_of_x0s_in_sac_buffer': [0.5],
'bnn_batch_size': [32], # for HW 8 worked the best
'bnn_batch_size': [32],
'likelihood_exponent': [0.5],
'train_sac_only_from_init_states': [0],
'data_from_simulation': [1],
'data_from_simulation': [0],
'num_frame_stack': [3],
'bandwidth_svgd': [0.2],
'length_scale_aditive_sim_gp': [10.0],
'length_scale_aditive_sim_gp': [5.0],
'input_from_recorded_data': [1],
'obtain_consecutive_data': [0, 1],
'obtain_consecutive_data': [1],
'lr': [3e-4],
}

_applicable_configs_no_sim_prior = {'use_sim_prior': [0],
'use_grey_box': [0],
'use_sim_model': [0],
'high_fidelity': [0],
'predict_difference': [1],
'num_measurement_points': [8]
} | _applicable_configs
_applicable_configs_high_fidelity = {'use_sim_prior': [1],
'use_grey_box': [0],
'use_sim_model': [0],
'high_fidelity': [1],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs
_applicable_configs_low_fidelity = {'use_sim_prior': [1],
'use_grey_box': [0],
'use_sim_model': [0],
'high_fidelity': [0],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs

_applicable_configs_grey_box_low_fidelity = {'use_sim_prior': [0],
'high_fidelity': [0],
'use_grey_box': [1],
'predict_difference': [0],
'use_sim_model': [0],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs

_applicable_configs_grey_box_high_fidelity = {'use_sim_prior': [0],
'high_fidelity': [1],
'use_grey_box': [1],
'predict_difference': [0],
'use_sim_model': [0],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs

_applicable_configs_sim_model_high_fidelity = {'use_sim_prior': [0],
'high_fidelity': [1],
'use_grey_box': [0],
'use_sim_model': [1],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs

_applicable_configs_sim_model_low_fidelity = {'use_sim_prior': [0],
'high_fidelity': [0],
'use_grey_box': [0],
'use_sim_model': [1],
'predict_difference': [1],
'num_measurement_points': [8]} | _applicable_configs

# all_flags_combinations = dict_permutations(_applicable_configs_no_sim_prior) + dict_permutations(
# _applicable_configs_high_fidelity) + dict_permutations(_applicable_configs_low_fidelity) + dict_permutations(
# _applicable_configs_grey_box)

all_flags_combinations = dict_permutations(_applicable_configs_no_sim_prior) + dict_permutations(
_applicable_configs_high_fidelity) + dict_permutations(_applicable_configs_low_fidelity) # + dict_permutations(
sim_flags = dict_permutations(_applicable_configs_no_sim_prior) + dict_permutations(
_applicable_configs_high_fidelity) + dict_permutations(_applicable_configs_low_fidelity) + \
dict_permutations(_applicable_configs_grey_box_low_fidelity) + \
dict_permutations(_applicable_configs_sim_model_low_fidelity)

# _applicable_configs_grey_box)
hw_flags = dict_permutations(_applicable_configs_no_sim_prior) + dict_permutations(
_applicable_configs_high_fidelity) + dict_permutations(_applicable_configs_low_fidelity) + \
dict_permutations(_applicable_configs_grey_box_high_fidelity) + \
dict_permutations(_applicable_configs_sim_model_high_fidelity)

all_flags_combinations += dict_permutations(_applicable_configs_grey_box_low_fidelity) + dict_permutations(
_applicable_configs_grey_box_high_fidelity)
all_flags_combinations = sim_flags


def main():
Expand Down
20 changes: 15 additions & 5 deletions experiments/online_rl_hardware/launcher.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
import online_rl_loop
from experiments.util import generate_run_commands, generate_base_command, dict_permutations


def main(args):
_applicable_configs = {
'prior': ['none_FVSGD', 'none_SVGD', 'high_fidelity', 'low_fidelity'], # 'high_fidelity_no_aditive_GP'],
'prior': ['none_FVSGD', 'high_fidelity', 'low_fidelity',
'low_fidelity_grey_box'],
'seed': list(range(5)),
'run_remote': [0],
'machine': ['local'],
'gpu': [1],
'wandb_tag': ['gpu' if args.num_gpus > 0 else 'cpu'],
'project_name': ['OnlineRLDebug3'],
'project_name': ['OnlineRLTestFull'],
'reset_bnn': [1],
'deterministic_policy': [1],
'initial_state_fraction': [0.5],
'bnn_train_steps': [40_000],
'sac_num_env_steps': [500_000],
'num_sac_envs': [128],
'num_env_steps': [100],
'num_f_samples': [512]
}

all_flags_combinations = dict_permutations(_applicable_configs)
Expand All @@ -25,8 +34,9 @@ def main(args):

if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Meta-BO run')
parser.add_argument('--num_cpus', type=int, default=2)
parser.add_argument('--num_cpus', type=int, default=1)
parser.add_argument('--num_gpus', type=int, default=1)
args = parser.parse_args()
main(args)
Loading

0 comments on commit fc71297

Please sign in to comment.