From f978ac2065f1c54b284f7701872843c39a2cff1a Mon Sep 17 00:00:00 2001 From: ymahlau Date: Tue, 28 Nov 2023 11:31:19 +0100 Subject: [PATCH] Changes in training setup --- config/cfg_oc_proxy_0.yaml | 20 ++++++++++---------- config/cfg_oc_proxy_1.yaml | 20 ++++++++++---------- config/cfg_oc_proxy_2.yaml | 20 ++++++++++---------- config/cfg_oc_proxy_3.yaml | 20 ++++++++++---------- config/cfg_oc_proxy_4.yaml | 20 ++++++++++---------- config/debug_config.yaml | 14 +++++++------- scripts/training/generate_training_cfg_oc.py | 16 ++++++++-------- scripts/training/play_overcooked.py | 6 ++++-- scripts/training/script_start_training_oc.py | 14 +++++++------- src/cpp/source/overcooked.cpp | 8 ++++---- src/game/overcooked/config.py | 2 +- src/game/overcooked/overcooked.py | 2 +- src/trainer/az_inference_server.py | 5 +++-- 13 files changed, 85 insertions(+), 82 deletions(-) diff --git a/config/cfg_oc_proxy_0.yaml b/config/cfg_oc_proxy_0.yaml index 064d212..8325588 100644 --- a/config/cfg_oc_proxy_0.yaml +++ b/config/cfg_oc_proxy_0.yaml @@ -68,7 +68,7 @@ data: soup_delivery: 20 soup_pickup: 5 start_cooking: 3 - reward_scaling_factor: 0.5 + reward_scaling_factor: 1 single_temperature_input: true start_pos: __module__: src.misc.serialization @@ -109,16 +109,16 @@ data: id: 0 name: oc_proxy project_name: overcooked_cramped - updater_bucket_size: 100 + updater_bucket_size: 1000 wandb_mode: online - worker_episode_bucket_size: 25 - max_batch_size: 3000 + worker_episode_bucket_size: 2 + max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 max_cpu_worker: 11 - max_eval_per_worker: 6000 + max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet @@ -213,14 +213,14 @@ data: __name__: SaverConfig data: save_all_checkpoints: false - save_interval_sec: 30 + save_interval_sec: 300 single_sbr_temperature: true temperature_input: true updater_cfg: __module__: src.trainer.config __name__: UpdaterConfig data: - gradient_max_norm: 1.0 + gradient_max_norm: 100 mse_policy_loss: true optim_cfg: __module__: src.supervised.optim @@ -242,7 +242,7 @@ data: value: COSINE cyclic: false end_times_min: - - 30 + - 60 - 1400 init_temp: 0 sampling: false @@ -321,7 +321,7 @@ data: __name__: SbrMode value: NAGURNEY use_cpp: true - discount: 0.93 + discount: 0.9 eval_func_cfg: __module__: src.search.config __name__: InferenceServerEvalConfig @@ -346,7 +346,7 @@ data: __module__: src.game.values __name__: UtilityNorm value: FULL_COOP - search_iterations: 1 + search_iterations: 2 temperature: 1 use_symmetries: true hydra: diff --git a/config/cfg_oc_proxy_1.yaml b/config/cfg_oc_proxy_1.yaml index dd79829..8e9a037 100644 --- a/config/cfg_oc_proxy_1.yaml +++ b/config/cfg_oc_proxy_1.yaml @@ -68,7 +68,7 @@ data: soup_delivery: 20 soup_pickup: 5 start_cooking: 3 - reward_scaling_factor: 0.5 + reward_scaling_factor: 1 single_temperature_input: true start_pos: __module__: src.misc.serialization @@ -109,16 +109,16 @@ data: id: 1 name: oc_proxy project_name: overcooked_cramped - updater_bucket_size: 100 + updater_bucket_size: 1000 wandb_mode: online - worker_episode_bucket_size: 25 - max_batch_size: 3000 + worker_episode_bucket_size: 2 + max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 max_cpu_worker: 11 - max_eval_per_worker: 6000 + max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet @@ -213,14 +213,14 @@ data: __name__: SaverConfig data: save_all_checkpoints: false - save_interval_sec: 30 + save_interval_sec: 300 single_sbr_temperature: true temperature_input: true updater_cfg: __module__: src.trainer.config __name__: UpdaterConfig data: - gradient_max_norm: 1.0 + gradient_max_norm: 100 mse_policy_loss: true optim_cfg: __module__: src.supervised.optim @@ -242,7 +242,7 @@ data: value: COSINE cyclic: false end_times_min: - - 30 + - 60 - 1400 init_temp: 0 sampling: false @@ -321,7 +321,7 @@ data: __name__: SbrMode value: NAGURNEY use_cpp: true - discount: 0.93 + discount: 0.9 eval_func_cfg: __module__: src.search.config __name__: InferenceServerEvalConfig @@ -346,7 +346,7 @@ data: __module__: src.game.values __name__: UtilityNorm value: FULL_COOP - search_iterations: 1 + search_iterations: 2 temperature: 1 use_symmetries: true hydra: diff --git a/config/cfg_oc_proxy_2.yaml b/config/cfg_oc_proxy_2.yaml index ae146b2..7daf119 100644 --- a/config/cfg_oc_proxy_2.yaml +++ b/config/cfg_oc_proxy_2.yaml @@ -68,7 +68,7 @@ data: soup_delivery: 20 soup_pickup: 5 start_cooking: 3 - reward_scaling_factor: 0.5 + reward_scaling_factor: 1 single_temperature_input: true start_pos: __module__: src.misc.serialization @@ -109,16 +109,16 @@ data: id: 2 name: oc_proxy project_name: overcooked_cramped - updater_bucket_size: 100 + updater_bucket_size: 1000 wandb_mode: online - worker_episode_bucket_size: 25 - max_batch_size: 3000 + worker_episode_bucket_size: 2 + max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 max_cpu_worker: 11 - max_eval_per_worker: 6000 + max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet @@ -213,14 +213,14 @@ data: __name__: SaverConfig data: save_all_checkpoints: false - save_interval_sec: 30 + save_interval_sec: 300 single_sbr_temperature: true temperature_input: true updater_cfg: __module__: src.trainer.config __name__: UpdaterConfig data: - gradient_max_norm: 1.0 + gradient_max_norm: 100 mse_policy_loss: true optim_cfg: __module__: src.supervised.optim @@ -242,7 +242,7 @@ data: value: COSINE cyclic: false end_times_min: - - 30 + - 60 - 1400 init_temp: 0 sampling: false @@ -321,7 +321,7 @@ data: __name__: SbrMode value: NAGURNEY use_cpp: true - discount: 0.93 + discount: 0.9 eval_func_cfg: __module__: src.search.config __name__: InferenceServerEvalConfig @@ -346,7 +346,7 @@ data: __module__: src.game.values __name__: UtilityNorm value: FULL_COOP - search_iterations: 1 + search_iterations: 2 temperature: 1 use_symmetries: true hydra: diff --git a/config/cfg_oc_proxy_3.yaml b/config/cfg_oc_proxy_3.yaml index 0cba6d7..fd9b5ed 100644 --- a/config/cfg_oc_proxy_3.yaml +++ b/config/cfg_oc_proxy_3.yaml @@ -68,7 +68,7 @@ data: soup_delivery: 20 soup_pickup: 5 start_cooking: 3 - reward_scaling_factor: 0.5 + reward_scaling_factor: 1 single_temperature_input: true start_pos: __module__: src.misc.serialization @@ -109,16 +109,16 @@ data: id: 3 name: oc_proxy project_name: overcooked_cramped - updater_bucket_size: 100 + updater_bucket_size: 1000 wandb_mode: online - worker_episode_bucket_size: 25 - max_batch_size: 3000 + worker_episode_bucket_size: 2 + max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 max_cpu_worker: 11 - max_eval_per_worker: 6000 + max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet @@ -213,14 +213,14 @@ data: __name__: SaverConfig data: save_all_checkpoints: false - save_interval_sec: 30 + save_interval_sec: 300 single_sbr_temperature: true temperature_input: true updater_cfg: __module__: src.trainer.config __name__: UpdaterConfig data: - gradient_max_norm: 1.0 + gradient_max_norm: 100 mse_policy_loss: true optim_cfg: __module__: src.supervised.optim @@ -242,7 +242,7 @@ data: value: COSINE cyclic: false end_times_min: - - 30 + - 60 - 1400 init_temp: 0 sampling: false @@ -321,7 +321,7 @@ data: __name__: SbrMode value: NAGURNEY use_cpp: true - discount: 0.93 + discount: 0.9 eval_func_cfg: __module__: src.search.config __name__: InferenceServerEvalConfig @@ -346,7 +346,7 @@ data: __module__: src.game.values __name__: UtilityNorm value: FULL_COOP - search_iterations: 1 + search_iterations: 2 temperature: 1 use_symmetries: true hydra: diff --git a/config/cfg_oc_proxy_4.yaml b/config/cfg_oc_proxy_4.yaml index 33eba5b..50e3910 100644 --- a/config/cfg_oc_proxy_4.yaml +++ b/config/cfg_oc_proxy_4.yaml @@ -68,7 +68,7 @@ data: soup_delivery: 20 soup_pickup: 5 start_cooking: 3 - reward_scaling_factor: 0.5 + reward_scaling_factor: 1 single_temperature_input: true start_pos: __module__: src.misc.serialization @@ -109,16 +109,16 @@ data: id: 4 name: oc_proxy project_name: overcooked_cramped - updater_bucket_size: 100 + updater_bucket_size: 1000 wandb_mode: online - worker_episode_bucket_size: 25 - max_batch_size: 3000 + worker_episode_bucket_size: 2 + max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 max_cpu_worker: 11 - max_eval_per_worker: 6000 + max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet @@ -213,14 +213,14 @@ data: __name__: SaverConfig data: save_all_checkpoints: false - save_interval_sec: 30 + save_interval_sec: 300 single_sbr_temperature: true temperature_input: true updater_cfg: __module__: src.trainer.config __name__: UpdaterConfig data: - gradient_max_norm: 1.0 + gradient_max_norm: 100 mse_policy_loss: true optim_cfg: __module__: src.supervised.optim @@ -242,7 +242,7 @@ data: value: COSINE cyclic: false end_times_min: - - 30 + - 60 - 1400 init_temp: 0 sampling: false @@ -321,7 +321,7 @@ data: __name__: SbrMode value: NAGURNEY use_cpp: true - discount: 0.93 + discount: 0.9 eval_func_cfg: __module__: src.search.config __name__: InferenceServerEvalConfig @@ -346,7 +346,7 @@ data: __module__: src.game.values __name__: UtilityNorm value: FULL_COOP - search_iterations: 1 + search_iterations: 2 temperature: 1 use_symmetries: true hydra: diff --git a/config/debug_config.yaml b/config/debug_config.yaml index 7f71ee5..addbaa1 100644 --- a/config/debug_config.yaml +++ b/config/debug_config.yaml @@ -30,7 +30,7 @@ data: temperature: 1 game_cfg: __module__: src.game.overcooked.config - __name__: SimpleCrampedRoomOvercookedConfig + __name__: OneStateCrampedRoomOvercookedConfig data: board: - - 1 @@ -56,7 +56,7 @@ data: cooking_time: 20 flat_obs: false h: 4 - horizon: 2 + horizon: 1 num_actions: 6 num_players: 2 reward_cfg: @@ -79,8 +79,8 @@ data: __name__: TupleWrapper data: data: - - 2 - 1 + - 2 - 0 - 0 - __module__: src.misc.serialization @@ -88,7 +88,7 @@ data: data: data: - 2 - - 2 + - 1 - 0 - 1 temperature_input: true @@ -110,7 +110,7 @@ data: name: null project_name: test updater_bucket_size: 100 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 25 max_batch_size: 2000 max_cpu_evaluator: 1 @@ -260,8 +260,8 @@ data: utility_loss: __module__: src.game.values __name__: UtilityNorm - value: FULL_COOP - utility_loss_factor: 1 + value: NONE + utility_loss_factor: 0 value_reg_loss_factor: 0 updater_in_qsize: 100 updater_out_qsize: 10 diff --git a/scripts/training/generate_training_cfg_oc.py b/scripts/training/generate_training_cfg_oc.py index 2695fcb..b3a271a 100644 --- a/scripts/training/generate_training_cfg_oc.py +++ b/scripts/training/generate_training_cfg_oc.py @@ -75,7 +75,7 @@ def generate_training_structured_configs(): # net_cfg = EquivariantMobileNetConfig3x3(predict_game_len=True) # search # eval_func_cfg = NetworkEvalConfig(zero_sum_norm=ZeroSumNorm.LINEAR) - batch_size = 3000 + batch_size = 15000 # eval_func_cfg = NetworkEvalConfig( # max_batch_size=batch_size, # random_symmetry=False, @@ -148,7 +148,7 @@ def generate_training_structured_configs(): backup_func_cfg=backup_func_cfg, extract_func_cfg=extraction_func_cfg, average_eval=False, - discount=0.93, + discount=0.9, ) # search_cfg = SMOOSConfig( # eval_func_cfg=eval_func_cfg, @@ -184,7 +184,7 @@ def generate_training_structured_configs(): # cyclic=True, # sampling=True, # ) for _ in range(game_cfg.num_players)], - search_iterations=1, + search_iterations=2, temperature=1, max_random_start_steps=0, use_symmetries=True, @@ -210,7 +210,7 @@ def generate_training_structured_configs(): optim_type=OptimType.ADAM_W, anneal_cfg=TemperatureAnnealingConfig( init_temp=0, - end_times_min=[30, 1400], + end_times_min=[60, 1400], anneal_temps=[1e-3, 1e-6], anneal_types=[AnnealingType.LINEAR, AnnealingType.COSINE], ), @@ -235,16 +235,16 @@ def generate_training_structured_configs(): policy_loss_factor=5, value_reg_loss_factor=0, utility_loss_factor=1, - + gradient_max_norm=100, ) logger_cfg = LoggerConfig( project_name="overcooked_cramped", buffer_gen=False, - name='oc_proxy_luis', + name='oc_proxy', id=seed, updater_bucket_size=1000, worker_episode_bucket_size=2, - wandb_mode='offline', + wandb_mode='online', ) saver_cfg = SaverConfig( save_interval_sec=300, @@ -278,7 +278,7 @@ def generate_training_structured_configs(): only_generate_buffer=False, restrict_cpu=True, # only works on LINUX max_cpu_updater=2, - max_cpu_worker=22, + max_cpu_worker=11, max_cpu_evaluator=1, max_cpu_log_dist_save_collect=1, max_cpu_inference_server=2, diff --git a/scripts/training/play_overcooked.py b/scripts/training/play_overcooked.py index 6af3051..bfcca4c 100644 --- a/scripts/training/play_overcooked.py +++ b/scripts/training/play_overcooked.py @@ -12,7 +12,7 @@ def play_overcooked_example(): - path = Path(__file__).parent.parent.parent / 'outputs' / 'simple_proxy2.pt' + path = Path(__file__).parent.parent.parent / 'outputs' / 'latest.pt' temperature_input = True single_temperature = True @@ -56,12 +56,14 @@ def play_overcooked_example(): # sample_temperatures = [1, 1] # play - temperatures = np.linspace(0, 10, 15) + # temperatures = np.linspace(0, 10, 15) + temperatures = [5.] for t in temperatures: agent0.temperatures = [t, t] agent1.temperatures = [t, t] game.reset() game.render() + # for _ in range(20): while not game.is_terminal(): joint_action_list: list[int] = [] prob_list = [] diff --git a/scripts/training/script_start_training_oc.py b/scripts/training/script_start_training_oc.py index eb22596..37d969f 100644 --- a/scripts/training/script_start_training_oc.py +++ b/scripts/training/script_start_training_oc.py @@ -55,8 +55,8 @@ def start_training_from_structured_configs(): # game_cfg = survive_on_7x7_4_player_royale() # game_cfg = perform_choke_5x5_4_player(centered=True) # game_cfg.all_actions_legal = False - # game_cfg = OneStateCrampedRoomOvercookedConfig() - game_cfg = SimpleCrampedRoomOvercookedConfig() + game_cfg = OneStateCrampedRoomOvercookedConfig() + # game_cfg = SimpleCrampedRoomOvercookedConfig() # game_cfg = Simple2CrampedRoomOvercookedConfig() game_cfg.temperature_input = temperature_input @@ -146,7 +146,7 @@ def start_training_from_structured_configs(): eval_func_cfg=eval_func_cfg, backup_func_cfg=backup_func_cfg, extract_func_cfg=extraction_func_cfg, - average_eval=True, + average_eval=False, discount=0.99, ) # search_cfg = SMOOSConfig( @@ -228,12 +228,12 @@ def start_training_from_structured_configs(): updates_until_distribution=5, optim_cfg=optim_cfg, use_gpu=True, - utility_loss=UtilityNorm.FULL_COOP, + utility_loss=UtilityNorm.NONE, mse_policy_loss=True, policy_loss_factor=1, value_reg_loss_factor=0, - utility_loss_factor=1, - + utility_loss_factor=0, + gradient_max_norm=100, ) logger_cfg = LoggerConfig( project_name="test", @@ -242,7 +242,7 @@ def start_training_from_structured_configs(): id=0, updater_bucket_size=100, worker_episode_bucket_size=25, - wandb_mode='online', + wandb_mode='offline', ) saver_cfg = SaverConfig( save_interval_sec=30, diff --git a/src/cpp/source/overcooked.cpp b/src/cpp/source/overcooked.cpp index 3c32a9a..560d4e4 100644 --- a/src/cpp/source/overcooked.cpp +++ b/src/cpp/source/overcooked.cpp @@ -382,7 +382,7 @@ void construct_overcooked_encoding( ){ int x_dim = state->w; int y_dim = state->h; - int z_dim = 16; + int z_dim = 15; if (include_temperature){ z_dim += 1; } @@ -510,9 +510,9 @@ void construct_overcooked_encoding( if (state->turn > state->horizon - 40){ fill_layer_overcooked(arr, 1, 14, x_dim, y_dim, z_dim); } - // time step in environment: layer 15 - float time_remaining = (float) (state->horizon - state->turn) / (float) state->horizon; - fill_layer_overcooked(arr, time_remaining, 15, x_dim, y_dim, z_dim); + // time step in environment: layer 15 (removed) + // float time_remaining = (float) (state->horizon - state->turn) / (float) state->horizon; + // fill_layer_overcooked(arr, time_remaining, 15, x_dim, y_dim, z_dim); // temperature: layer 16 if (include_temperature){ fill_layer_overcooked(arr, temperature, 16, x_dim, y_dim, z_dim); diff --git a/src/game/overcooked/config.py b/src/game/overcooked/config.py index 2573033..0cb3b1a 100644 --- a/src/game/overcooked/config.py +++ b/src/game/overcooked/config.py @@ -56,7 +56,7 @@ class CrampedRoomOvercookedConfig(OvercookedGameConfig): [1, 2, 1, 5, 1], ]) start_pos: tuple[tuple[int, int, int, int], tuple[int, int, int, int]] = field(default_factory=lambda: ((1, 2, 0, 0), (3, 1, 0, 0))) - reward_scaling_factor: float = field(default=0.5) + reward_scaling_factor: float = field(default=1) @dataclass diff --git a/src/game/overcooked/overcooked.py b/src/game/overcooked/overcooked.py index bf858ee..244d908 100644 --- a/src/game/overcooked/overcooked.py +++ b/src/game/overcooked/overcooked.py @@ -127,7 +127,7 @@ def get_symmetry_count(self): def get_obs_shape(self, never_flatten=False) -> tuple[int, ...]: max_dim = max(self.cfg.h, self.cfg.w) - return max_dim, max_dim, 16 + self.cfg.temperature_input + return max_dim, max_dim, 15 + self.cfg.temperature_input def get_obs( self, diff --git a/src/trainer/az_inference_server.py b/src/trainer/az_inference_server.py index a7898a7..949182a 100644 --- a/src/trainer/az_inference_server.py +++ b/src/trainer/az_inference_server.py @@ -112,17 +112,18 @@ def run_inference_server( obs_filtered = input_np[input_rdy_cpy] n = obs_filtered.shape[0] # forward pass for all encodings, but do not exceed max batch size + batch_size = int(trainer_cfg.max_batch_size / 2) if start_phase: out_tensor = torch.zeros(size=(n, net.output_size), dtype=torch.float32) else: - if n <= trainer_cfg.max_batch_size: + if n <= batch_size: enc_tensor = torch.from_numpy(obs_filtered).to(device) out_tensor_with_grad = net(enc_tensor) out_tensor = out_tensor_with_grad.cpu().detach().float().numpy() else: start_idx = 0 out_tensor_list = [] - end_idx_list = list(range(trainer_cfg.max_batch_size, n, trainer_cfg.max_batch_size)) + end_idx_list = list(range(batch_size, n, batch_size)) if end_idx_list[-1] < n: end_idx_list.append(n) for end_idx in end_idx_list: