From 0b0f2f06bd601a93dd33aa959e0cc79fea31fed0 Mon Sep 17 00:00:00 2001 From: Yannik Mahlau Date: Sat, 2 Dec 2023 14:23:50 +0100 Subject: [PATCH 1/3] setup proxy luis --- config/cfg_luis_proxy_aa_0.yaml | 13 +- config/cfg_luis_proxy_aa_1.yaml | 105 +++- config/cfg_luis_proxy_aa_2.yaml | 528 ++++++++++++++++++ config/cfg_luis_proxy_aa_3.yaml | 528 ++++++++++++++++++ config/cfg_luis_proxy_aa_4.yaml | 528 ++++++++++++++++++ config/cfg_luis_proxy_cc_0.yaml | 15 +- config/cfg_luis_proxy_cc_1.yaml | 102 +++- ...oxy_cc_0.yaml => cfg_luis_proxy_cc_2.yaml} | 120 +++- config/cfg_luis_proxy_cc_3.yaml | 518 +++++++++++++++++ config/cfg_luis_proxy_cc_4.yaml | 518 +++++++++++++++++ ...oxy_oc_0.yaml => cfg_luis_proxy_co_0.yaml} | 33 +- ...oxy_cr_0.yaml => cfg_luis_proxy_co_1.yaml} | 94 +++- config/cfg_luis_proxy_co_2.yaml | 483 ++++++++++++++++ config/cfg_luis_proxy_co_3.yaml | 483 ++++++++++++++++ config/cfg_luis_proxy_co_4.yaml | 483 ++++++++++++++++ config/cfg_luis_proxy_cr_0.yaml | 29 +- config/cfg_luis_proxy_cr_1.yaml | 96 +++- ...oxy_oc_1.yaml => cfg_luis_proxy_cr_2.yaml} | 77 ++- ...oxy_oc_0.yaml => cfg_luis_proxy_cr_3.yaml} | 89 ++- ...oxy_aa_0.yaml => cfg_luis_proxy_cr_4.yaml} | 130 +++-- config/cfg_luis_proxy_fc_0.yaml | 4 +- config/cfg_luis_proxy_fc_1.yaml | 76 ++- ...oxy_fc_0.yaml => cfg_luis_proxy_fc_2.yaml} | 94 +++- config/cfg_luis_proxy_fc_3.yaml | 483 ++++++++++++++++ config/cfg_luis_proxy_fc_4.yaml | 483 ++++++++++++++++ config/debug_config.yaml | 53 +- scripts/training/generate_training_cfg_oc.py | 28 +- scripts/training/play_overcooked.py | 8 +- scripts/training/script_start_training_oc.py | 68 +-- src/network/resnet.py | 34 ++ start_training.py | 22 +- test/network/test_resnet.py | 36 +- 32 files changed, 6122 insertions(+), 239 deletions(-) create mode 100644 config/cfg_luis_proxy_aa_2.yaml create mode 100644 config/cfg_luis_proxy_aa_3.yaml create mode 100644 config/cfg_luis_proxy_aa_4.yaml rename config/{cfg_proxy_cc_0.yaml => cfg_luis_proxy_cc_2.yaml} (81%) create mode 100644 config/cfg_luis_proxy_cc_3.yaml create mode 100644 config/cfg_luis_proxy_cc_4.yaml rename config/{cfg_luis_proxy_oc_0.yaml => cfg_luis_proxy_co_0.yaml} (97%) rename config/{cfg_proxy_cr_0.yaml => cfg_luis_proxy_co_1.yaml} (83%) create mode 100644 config/cfg_luis_proxy_co_2.yaml create mode 100644 config/cfg_luis_proxy_co_3.yaml create mode 100644 config/cfg_luis_proxy_co_4.yaml rename config/{cfg_luis_proxy_oc_1.yaml => cfg_luis_proxy_cr_2.yaml} (85%) rename config/{cfg_proxy_oc_0.yaml => cfg_luis_proxy_cr_3.yaml} (84%) rename config/{cfg_proxy_aa_0.yaml => cfg_luis_proxy_cr_4.yaml} (83%) rename config/{cfg_proxy_fc_0.yaml => cfg_luis_proxy_fc_2.yaml} (83%) create mode 100644 config/cfg_luis_proxy_fc_3.yaml create mode 100644 config/cfg_luis_proxy_fc_4.yaml diff --git a/config/cfg_luis_proxy_aa_0.yaml b/config/cfg_luis_proxy_aa_0.yaml index 248e34a..b45083f 100644 --- a/config/cfg_luis_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_aa_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -151,7 +151,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig9x9 data: activation_type: __module__: src.network.utils @@ -173,8 +173,13 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 + - 1 + - 3 + - 0 - 1 + - - 128 + - 2 - 3 - 1 - 1 @@ -367,7 +372,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_aa_1.yaml b/config/cfg_luis_proxy_aa_1.yaml index 977b8f7..af2d8ef 100644 --- a/config/cfg_luis_proxy_aa_1.yaml +++ b/config/cfg_luis_proxy_aa_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -151,7 +151,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig9x9 data: activation_type: __module__: src.network.utils @@ -161,7 +161,97 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 layer_specs: - - 32 - 3 @@ -173,9 +263,14 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 @@ -367,7 +462,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_aa_2.yaml b/config/cfg_luis_proxy_aa_2.yaml new file mode 100644 index 0000000..2163647 --- /dev/null +++ b/config/cfg_luis_proxy_aa_2.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 2 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_3.yaml b/config/cfg_luis_proxy_aa_3.yaml new file mode 100644 index 0000000..650e2fa --- /dev/null +++ b/config/cfg_luis_proxy_aa_3.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_4.yaml b/config/cfg_luis_proxy_aa_4.yaml new file mode 100644 index 0000000..dc9bd33 --- /dev/null +++ b/config/cfg_luis_proxy_aa_4.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_0.yaml index 1f457a7..16270ab 100644 --- a/config/cfg_luis_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -146,7 +146,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -168,15 +168,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 + - 1 + - 3 + - 0 - 1 + - - 128 + - 2 - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +367,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_cc_1.yaml b/config/cfg_luis_proxy_cc_1.yaml index f018a37..266af84 100644 --- a/config/cfg_luis_proxy_cc_1.yaml +++ b/config/cfg_luis_proxy_cc_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -146,7 +146,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -156,7 +156,92 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 layer_specs: - - 32 - 3 @@ -168,15 +253,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +452,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_2.yaml similarity index 81% rename from config/cfg_proxy_cc_0.yaml rename to config/cfg_luis_proxy_cc_2.yaml index 9e1a534..bddf45c 100644 --- a/config/cfg_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_2.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -130,23 +130,23 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_cc + id: 2 + name: luis_proxy_cc project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -156,7 +156,92 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 layer_specs: - - 32 - 3 @@ -168,15 +253,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +452,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -425,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_3.yaml b/config/cfg_luis_proxy_cc_3.yaml new file mode 100644 index 0000000..c1de5f8 --- /dev/null +++ b/config/cfg_luis_proxy_cc_3.yaml @@ -0,0 +1,518 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_cc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig8x8 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 1 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_4.yaml b/config/cfg_luis_proxy_cc_4.yaml new file mode 100644 index 0000000..83231ac --- /dev/null +++ b/config/cfg_luis_proxy_cc_4.yaml @@ -0,0 +1,518 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_cc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig8x8 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 1 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_oc_0.yaml b/config/cfg_luis_proxy_co_0.yaml similarity index 97% rename from config/cfg_luis_proxy_oc_0.yaml rename to config/cfg_luis_proxy_co_0.yaml index 443ff4d..94e4d15 100644 --- a/config/cfg_luis_proxy_oc_0.yaml +++ b/config/cfg_luis_proxy_co_0.yaml @@ -26,39 +26,44 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CrampedRoomOvercookedConfig + __name__: CoordinationRingOvercookedConfig data: board: - - 1 - 1 - - 4 - 1 + - 4 - 1 - - - 3 + - - 1 - 0 - 0 - 0 - - 3 - - - 1 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 - 0 - 0 - 0 - 1 - - 1 - - 2 - - 1 + - 3 - 5 - 1 + - 1 cooking_time: 20 flat_obs: false - h: 4 + h: 5 horizon: 400 num_actions: 6 num_players: 2 @@ -82,16 +87,16 @@ data: __name__: TupleWrapper data: data: - - 1 - 2 + - 1 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: - - 3 - 1 + - 2 - 0 - 0 temperature_input: true @@ -111,7 +116,7 @@ data: data: buffer_gen: false id: 0 - name: luis_proxy_oc + name: luis_proxy_co project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline @@ -342,7 +347,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +410,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_proxy_cr_0.yaml b/config/cfg_luis_proxy_co_1.yaml similarity index 83% rename from config/cfg_proxy_cr_0.yaml rename to config/cfg_luis_proxy_co_1.yaml index 64daa56..1342795 100644 --- a/config/cfg_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_co_1.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -115,18 +115,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_cr + id: 1 + name: luis_proxy_co project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -410,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_2.yaml b/config/cfg_luis_proxy_co_2.yaml new file mode 100644 index 0000000..14c0432 --- /dev/null +++ b/config/cfg_luis_proxy_co_2.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 2 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_3.yaml b/config/cfg_luis_proxy_co_3.yaml new file mode 100644 index 0000000..ae2b13b --- /dev/null +++ b/config/cfg_luis_proxy_co_3.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_4.yaml b/config/cfg_luis_proxy_co_4.yaml new file mode 100644 index 0000000..81cc701 --- /dev/null +++ b/config/cfg_luis_proxy_co_4.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_0.yaml b/config/cfg_luis_proxy_cr_0.yaml index c93bd87..5700525 100644 --- a/config/cfg_luis_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_cr_0.yaml @@ -26,44 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CoordinationRingOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - - 1 - 1 - 4 - 1 - - - 1 - - 0 - - 0 + - 1 + - - 3 - 0 - - 4 - - - 2 - 0 - - 1 - 0 - - 1 - - - 3 + - 3 + - - 1 - 0 - 0 - 0 - 1 - - 1 - - 3 - - 5 + - 2 - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -87,16 +82,16 @@ data: __name__: TupleWrapper data: data: - - 2 - 1 + - 2 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: + - 3 - 1 - - 2 - 0 - 0 temperature_input: true @@ -347,7 +342,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_cr_1.yaml b/config/cfg_luis_proxy_cr_1.yaml index 7be7109..1d764f2 100644 --- a/config/cfg_luis_proxy_cr_1.yaml +++ b/config/cfg_luis_proxy_cr_1.yaml @@ -26,44 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CoordinationRingOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - - 1 - 1 - 4 - 1 - - - 1 - - 0 + - 1 + - - 3 - 0 - 0 - - 4 - - - 2 - 0 - - 1 - - 0 - - 1 - - - 3 + - 3 + - - 1 - 0 - 0 - 0 - 1 - - 1 - - 3 - - 5 + - 2 - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -87,16 +82,16 @@ data: __name__: TupleWrapper data: data: - - 2 - 1 + - 2 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: + - 3 - 1 - - 2 - 0 - 0 temperature_input: true @@ -141,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_oc_1.yaml b/config/cfg_luis_proxy_cr_2.yaml similarity index 85% rename from config/cfg_luis_proxy_oc_1.yaml rename to config/cfg_luis_proxy_cr_2.yaml index 9e01ab8..0ea3edc 100644 --- a/config/cfg_luis_proxy_oc_1.yaml +++ b/config/cfg_luis_proxy_cr_2.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -110,8 +110,8 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 1 - name: luis_proxy_oc + id: 2 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline @@ -136,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -342,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_proxy_oc_0.yaml b/config/cfg_luis_proxy_cr_3.yaml similarity index 84% rename from config/cfg_proxy_oc_0.yaml rename to config/cfg_luis_proxy_cr_3.yaml index 25baea4..d5683f7 100644 --- a/config/cfg_proxy_oc_0.yaml +++ b/config/cfg_luis_proxy_cr_3.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -110,18 +110,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_oc + id: 3 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -136,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -342,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_proxy_aa_0.yaml b/config/cfg_luis_proxy_cr_4.yaml similarity index 83% rename from config/cfg_proxy_aa_0.yaml rename to config/cfg_luis_proxy_cr_4.yaml index 24ab7d9..1d36413 100644 --- a/config/cfg_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_cr_4.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,64 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: AsymmetricAdvantageOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - 1 - - 1 - - 1 - - 1 - - 1 - - 1 + - 4 - 1 - 1 - - 3 - - 0 - - 1 - - 5 - - 1 - - 3 - - 1 - - 0 - - 5 - - - 1 - - 0 - - 0 - - 0 - - 4 - 0 - 0 - 0 - - 1 + - 3 - - 1 - - 0 - - 0 - - 0 - - 4 - 0 - 0 - 0 - 1 - - 1 - - 1 - - 1 - - 2 - - 1 - 2 - 1 - - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -107,7 +82,7 @@ data: __name__: TupleWrapper data: data: - - 6 + - 1 - 2 - 0 - 0 @@ -115,13 +90,13 @@ data: __name__: TupleWrapper data: data: - - 1 - 3 + - 1 - 0 - 0 temperature_input: true unstuck_behavior: false - w: 9 + w: 5 inf_cfg: __module__: src.trainer.config __name__: InferenceServerConfig @@ -135,18 +110,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_aa + id: 4 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -161,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -367,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -430,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_0.yaml index 14a9a09..a0d9585 100644 --- a/config/cfg_luis_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -347,7 +347,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_fc_1.yaml b/config/cfg_luis_proxy_fc_1.yaml index 65d0e04..ae29adb 100644 --- a/config/cfg_luis_proxy_fc_1.yaml +++ b/config/cfg_luis_proxy_fc_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_2.yaml similarity index 83% rename from config/cfg_proxy_fc_0.yaml rename to config/cfg_luis_proxy_fc_2.yaml index b32e229..12fac1d 100644 --- a/config/cfg_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_2.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -115,18 +115,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_fc + id: 2 + name: luis_proxy_fc project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -410,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_3.yaml b/config/cfg_luis_proxy_fc_3.yaml new file mode 100644 index 0000000..c6a5783 --- /dev/null +++ b/config/cfg_luis_proxy_fc_3.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_fc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_4.yaml b/config/cfg_luis_proxy_fc_4.yaml new file mode 100644 index 0000000..f9ac520 --- /dev/null +++ b/config/cfg_luis_proxy_fc_4.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_fc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/debug_config.yaml b/config/debug_config.yaml index 553ac87..abd213d 100644 --- a/config/debug_config.yaml +++ b/config/debug_config.yaml @@ -114,7 +114,7 @@ data: name: null project_name: test updater_bucket_size: 100 - wandb_mode: offline + wandb_mode: online worker_episode_bucket_size: 5 max_batch_size: 3000 max_cpu_evaluator: 1 @@ -204,12 +204,12 @@ data: value: GROUP_NORM num_layers: 1 num_inference_server: 1 - num_worker: 5 + num_worker: 30 only_generate_buffer: false prev_run_dir: null prev_run_idx: null - proxy_net_path: /home/mahlau/nobackup/albatross/albatross/outputs/working_proxy_simple2.pt - restrict_cpu: true + proxy_net_path: C:\Users\mahla\Programming\battlesnake\albatross\scripts\training\outputs\proxy2.pt + restrict_cpu: false save_state: false save_state_after_seconds: 30 saver_cfg: @@ -279,7 +279,7 @@ data: __name__: TemperatureAnnealingConfig data: anneal_temps: - - 10 + - 1 anneal_types: - __module__: src.supervised.annealer __name__: AnnealingType @@ -293,7 +293,7 @@ data: __name__: TemperatureAnnealingConfig data: anneal_temps: - - 10 + - 1 anneal_types: - __module__: src.supervised.annealer __name__: AnnealingType @@ -352,9 +352,46 @@ data: utility_norm: __module__: src.game.values __name__: UtilityNorm - value: FULL_COOP + value: NONE search_iterations: 1 - temp_scaling_cfgs: null + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 2 + - 5 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false temperature: 1 use_symmetries: true hydra: diff --git a/scripts/training/generate_training_cfg_oc.py b/scripts/training/generate_training_cfg_oc.py index 726ebdf..39a6ec1 100644 --- a/scripts/training/generate_training_cfg_oc.py +++ b/scripts/training/generate_training_cfg_oc.py @@ -22,7 +22,8 @@ from src.network.initialization import get_network_from_config from src.network.mobile_one import MobileOneConfig3x3 from src.network.mobilenet_v3 import MobileNetConfig3x3, MobileNetConfig5x5 -from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5 +from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5, \ + OvercookedResNetConfig9x9, OvercookedResNetConfig8x8 from src.network.utils import ActivationType from src.network.vision_net import EquivarianceType from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ @@ -48,16 +49,15 @@ def generate_training_structured_configs(): Main method to start the training using dataclasses specified below """ # for seed in range(5): - game_cfg_dict = { - 'oc': CrampedRoomOvercookedConfig(), - 'aa': AsymmetricAdvantageOvercookedConfig(), - 'cr': CoordinationRingOvercookedConfig(), - 'fc': ForcedCoordinationOvercookedConfig(), - 'cc': CounterCircuitOvercookedConfig(), - + cfg_dict = { + 'aa': (AsymmetricAdvantageOvercookedConfig(), OvercookedResNetConfig9x9()), + 'cc': (CounterCircuitOvercookedConfig(), OvercookedResNetConfig8x8()), + 'co': (CoordinationRingOvercookedConfig(), OvercookedResNetConfig5x5()), + 'cr': (CrampedRoomOvercookedConfig(), OvercookedResNetConfig5x5()), + 'fc': (ForcedCoordinationOvercookedConfig(), OvercookedResNetConfig5x5()), } - for name, game_cfg in game_cfg_dict.items(): - for seed in range(2): + for name, (game_cfg, net_cfg) in cfg_dict.items(): + for seed in range(5): temperature_input = True single_temperature = True # game @@ -81,7 +81,7 @@ def generate_training_structured_configs(): # net_cfg = MobileOneConfig3x3(predict_policy=True, predict_game_len=False, eq_type=eq_type) # net_cfg = MobileNetConfig5x5(predict_policy=True, predict_game_len=False, eq_type=eq_type) # net_cfg = ResNetConfig7x7Best() - net_cfg = OvercookedResNetConfig5x5(predict_policy=True, eq_type=eq_type, lff_features=False) + # net_cfg = OvercookedResNetConfig5x5() # net_cfg = EquivariantMobileNetConfig3x3(predict_game_len=True) # search @@ -104,7 +104,7 @@ def generate_training_structured_configs(): temperature_input=temperature_input, single_temperature=single_temperature, min_clip_value=-math.inf, - max_clip_value=math.inf, + max_clip_value=30, policy_prediction=net_cfg.predict_policy, utility_norm=UtilityNorm.FULL_COOP, ) @@ -145,7 +145,7 @@ def generate_training_structured_configs(): utility_norm=UtilityNorm.FULL_COOP, min_clip_value=-math.inf, max_clip_value=30, - ) + ) # extraction_func_cfg = StandardExtractConfig() # extraction_func_cfg = MeanPolicyExtractConfig() # extraction_func_cfg = PolicyExtractConfig() @@ -230,7 +230,7 @@ def generate_training_structured_configs(): ) evaluator_cfg = EvaluatorConfig( eval_rate_sec=60, - num_episodes=[100, 20], + num_episodes=[100, 2], enemy_iterations=1, enemy_cfgs=[ RandomAgentConfig() diff --git a/scripts/training/play_overcooked.py b/scripts/training/play_overcooked.py index fb8e6d9..0474d66 100644 --- a/scripts/training/play_overcooked.py +++ b/scripts/training/play_overcooked.py @@ -12,9 +12,9 @@ def play_overcooked_example(): - path = Path(__file__).parent.parent.parent / 'outputs' / 'latest copy 2.pt' + path = Path(__file__).parent / 'outputs' / 'response2.pt' temperature_input = True - single_temperature = False + single_temperature = True net = get_network_from_file(path).eval() game_cfg = net.cfg.game_cfg @@ -49,15 +49,15 @@ def play_overcooked_example(): # agent1.net = net agent_list = [ - agent1, agent0, + agent1, ] sample_temperatures = [math.inf, math.inf] # sample_temperatures = [5, 5] # play # temperatures = np.linspace(0, 10, 15) - temperatures = [0.1] + temperatures = [0.5] for t in temperatures: agent0.temperatures = [t, t] # agent1.temperatures = [t, t] diff --git a/scripts/training/script_start_training_oc.py b/scripts/training/script_start_training_oc.py index 7c5059c..d979b00 100644 --- a/scripts/training/script_start_training_oc.py +++ b/scripts/training/script_start_training_oc.py @@ -14,7 +14,9 @@ from src.game.battlesnake.bootcamp.test_envs_3x3 import perform_choke_2_player from src.game.battlesnake.bootcamp.test_envs_5x5 import perform_choke_5x5_4_player from src.game.battlesnake.bootcamp.test_envs_7x7 import survive_on_7x7_4_player_royale -from src.game.overcooked.config import CrampedRoomOvercookedConfig, OneStateCrampedRoomOvercookedConfig, Simple2CrampedRoomOvercookedConfig, Simple3CrampedRoomOvercookedConfig, Simple4CrampedRoomOvercookedConfig, SimpleCrampedRoomOvercookedConfig, TwoStateCrampedRoomOvercookedConfig +from src.game.overcooked.config import CrampedRoomOvercookedConfig, OneStateCrampedRoomOvercookedConfig, \ + Simple2CrampedRoomOvercookedConfig, Simple3CrampedRoomOvercookedConfig, Simple4CrampedRoomOvercookedConfig, \ + SimpleCrampedRoomOvercookedConfig, TwoStateCrampedRoomOvercookedConfig from src.game.values import UtilityNorm from src.misc.const import PHI from src.misc.serialization import serialize_dataclass @@ -24,7 +26,8 @@ from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5 from src.network.utils import ActivationType from src.network.vision_net import EquivarianceType -from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, ResponseInferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ +from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, \ + ResponseInferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ DecoupledUCTSelectionConfig, LogitBackupConfig, FixedDepthConfig, SpecialExtractConfig, NashBackupConfig, \ Exp3SelectionConfig, MeanPolicyExtractConfig, Exp3BackupConfig, RegretMatchingSelectionConfig, \ RegretMatchingBackupConfig, SMOOSConfig, PolicyExtractConfig, EnemyExploitationEvalConfig, \ @@ -58,7 +61,7 @@ def start_training_from_structured_configs(): # game_cfg = OneStateCrampedRoomOvercookedConfig() # game_cfg = SimpleCrampedRoomOvercookedConfig() game_cfg = Simple2CrampedRoomOvercookedConfig() - + game_cfg.temperature_input = temperature_input game_cfg.single_temperature_input = single_temperature @@ -97,13 +100,12 @@ def start_training_from_structured_configs(): # utility_norm=UtilityNorm.FULL_COOP, # ) eval_func_cfg = ResponseInferenceServerEvalConfig( - random_symmetry= False, + random_symmetry=False, min_clip_value=-math.inf, max_clip_value=50, policy_prediction=True, ) - - + # sel_func_cfg = DecoupledUCTSelectionConfig(exp_bonus=1.414) # 1.4) # sel_func_cfg = SampleSelectionConfig(dirichlet_alpha=math.inf, dirichlet_eps=0.25, temperature=1.0) # sel_func_cfg = AlphaZeroDecoupledSelectionConfig(exp_bonus=1.414, dirichlet_alpha=0.3, dirichlet_eps=0.25) @@ -129,7 +131,7 @@ def start_training_from_structured_configs(): # backup_func_cfg = Exp3BackupConfig() # backup_func_cfg = RegretMatchingBackupConfig(avg_backup=True) extraction_func_cfg = SpecialExtractConfig( - utility_norm=UtilityNorm.FULL_COOP, + utility_norm=UtilityNorm.NONE, min_clip_value=-math.inf, max_clip_value=30, ) @@ -170,24 +172,24 @@ def start_training_from_structured_configs(): worker_cfg = WorkerConfig( search_cfg=search_cfg, policy_eval_cfg=policy_eval_cfg, - # temp_scaling_cfgs=( - # TemperatureAnnealingConfig( - # init_temp=5, - # end_times_min=[20, 40], - # anneal_temps=[5, 0], - # anneal_types=[AnnealingType.CONST, AnnealingType.LINEAR], - # cyclic=False, - # sampling=False, - # ), - # TemperatureAnnealingConfig( - # init_temp=10, - # end_times_min=[1], - # anneal_temps=[10], - # anneal_types=[AnnealingType.CONST], - # cyclic=True, - # sampling=False, - # ), - # ), + temp_scaling_cfgs=( + TemperatureAnnealingConfig( + init_temp=5, + end_times_min=[2, 5], + anneal_temps=[5, 0], + anneal_types=[AnnealingType.CONST, AnnealingType.LINEAR], + cyclic=False, + sampling=False, + ), + TemperatureAnnealingConfig( + init_temp=10, + end_times_min=[1], + anneal_temps=[10], + anneal_types=[AnnealingType.CONST], + cyclic=True, + sampling=False, + ), + ), # anneal_cfgs=None, # anneal_cfgs=[TemperatureAnnealingConfig( # init_temp=0, @@ -196,11 +198,11 @@ def start_training_from_structured_configs(): # anneal_types=[AnnealingType.COSINE], # cyclic=True, # sampling=True, - # )], - anneal_cfgs=[TemperatureAnnealingConfig( + # )], + anneal_cfgs=[TemperatureAnnealingConfig( init_temp=0, end_times_min=[1], - anneal_temps=[10], + anneal_temps=[1], anneal_types=[AnnealingType.COSINE], cyclic=True, sampling=True, @@ -272,7 +274,7 @@ def start_training_from_structured_configs(): id=0, updater_bucket_size=100, worker_episode_bucket_size=5, - wandb_mode='offline', + wandb_mode='online', ) saver_cfg = SaverConfig( save_interval_sec=10, @@ -281,7 +283,7 @@ def start_training_from_structured_configs(): use_gpu=True, ) trainer_cfg = AlphaZeroTrainerConfig( - num_worker=5, # IMPORTANT + num_worker=30, # IMPORTANT num_inference_server=1, save_state=False, save_state_after_seconds=30, @@ -295,7 +297,7 @@ def start_training_from_structured_configs(): collector_cfg=collector_cfg, inf_cfg=inf_cfg, max_batch_size=batch_size, - max_eval_per_worker=batch_size*2, + max_eval_per_worker=batch_size * 2, data_qsize=10, info_qsize=100, updater_in_qsize=100, @@ -304,7 +306,7 @@ def start_training_from_structured_configs(): prev_run_dir=None, prev_run_idx=None, only_generate_buffer=False, - restrict_cpu=True, # only works on LINUX + restrict_cpu=False, # only works on LINUX max_cpu_updater=1, max_cpu_worker=10, max_cpu_evaluator=1, @@ -316,7 +318,7 @@ def start_training_from_structured_configs(): compile_mode='max-autotune', merge_inference_update_gpu=True, # proxy_net_path=None, - proxy_net_path=str(Path(__file__).parent.parent.parent / 'outputs' / 'working_proxy_simple2.pt'), + proxy_net_path=str(Path(__file__).parent / 'outputs' / 'proxy2.pt'), ) # initialize yaml file and hydra print(os.getcwd()) diff --git a/src/network/resnet.py b/src/network/resnet.py index b259d93..139d9e0 100644 --- a/src/network/resnet.py +++ b/src/network/resnet.py @@ -229,6 +229,40 @@ class OvercookedResNetConfig5x5(ResNetConfig): value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) +# channels, num_blocks, kernel_size, padding, norm +best_9x9 = [ + [32, 3, 3, 1, 1], + [64, 2, 3, 1, 1], + [64, 1, 3, 0, 1], + [128, 2, 3, 1, 1], + [256, 1, 3, 0, 1], +] + + +@dataclass +class OvercookedResNetConfig9x9(ResNetConfig): + layer_specs: list[list[int]] = field(default_factory=lambda: best_9x9) + policy_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + + +# channels, num_blocks, kernel_size, padding, norm +best_8x8 = [ + [32, 3, 3, 1, 1], + [64, 2, 3, 1, 1], + [64, 1, 3, 0, 1], + [128, 2, 3, 1, 1], + [256, 1, 3, 1, 1], +] + + +@dataclass +class OvercookedResNetConfig8x8(ResNetConfig): + layer_specs: list[list[int]] = field(default_factory=lambda: best_8x8) + policy_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + + default_7x7 = [ [32, 2, 3, 1, 1], [48, 1, 3, 0, 1], diff --git a/start_training.py b/start_training.py index 8962151..925ad79 100644 --- a/start_training.py +++ b/start_training.py @@ -1,3 +1,4 @@ +import itertools import os import sys from pathlib import Path @@ -31,7 +32,26 @@ def main(cfg: AlphaZeroTrainerConfig): config_path = Path(__file__).parent / 'config' config_name = 'config' - if len(sys.argv) > 1 and sys.argv[1].startswith("config="): + if len(sys.argv) > 3 and sys.argv[1].startswith("config="): + config_prefix = sys.argv[1].split("=")[-1] + sys.argv.pop(1) + arr_id = int(sys.argv[1]) + sys.argv.pop(1) + + pref_lists = [ + # list(range(1, 6)), + # [1] + list(range(5, 51, 5)), + ['aa', 'cc', 'co', 'cr', 'fc'], + list(range(5)), + ] + prod = list(itertools.product(*pref_lists)) + tpl = prod[arr_id] + # config_name = f"{config_prefix}_{tpl[0]}_{tpl[1]}_{tpl[2]}" + config_name = f"{config_prefix}_{tpl[0]}_{tpl[1]}" + # config_name = f"{config_prefix}_{prefix_arr[t]}_{seed}" + # config_name = f"{config_prefix}_{seed}_{prefix_arr[t]}" + elif len(sys.argv) > 2 and sys.argv[1].startswith("config="): config_name = sys.argv[1].split("=")[-1] sys.argv.pop(1) + print(f"{config_name=}", flush=True) hydra.main(config_path=str(config_path), config_name=config_name, version_base=None)(main)() diff --git a/test/network/test_resnet.py b/test/network/test_resnet.py index 0be2f73..569d52d 100644 --- a/test/network/test_resnet.py +++ b/test/network/test_resnet.py @@ -10,10 +10,13 @@ from src.game.battlesnake.bootcamp.test_envs_7x7 import survive_on_7x7 from src.game.initialization import get_game_from_config from src.network.initialization import get_network_from_config -from src.game.overcooked.config import CrampedRoomOvercookedConfig +from src.game.overcooked.config import CrampedRoomOvercookedConfig, CoordinationRingOvercookedConfig, \ + ForcedCoordinationOvercookedConfig, AsymmetricAdvantageOvercookedConfig, CounterCircuitOvercookedConfig from src.misc.utils import set_seed from src.network.resnet import ResNetConfig3x3, ResNetConfig5x5, ResNetConfig7x7, ResNetConfig9x9, ResNetConfig7x7New, \ - ResNetConfig11x11, ResNetConfig7x7Best, OvercookedResNetConfig5x5 + ResNetConfig11x11, ResNetConfig7x7Best, OvercookedResNetConfig5x5, OvercookedResNetConfig9x9, \ + OvercookedResNetConfig8x8 + class TestResNet(unittest.TestCase): def test_resnet_game(self): @@ -192,9 +195,10 @@ def test_resnet_centered_11_speed(self): print(f"{(end_time - start_time) / n=}") def test_resnet_oc(self): - game_cfg = CrampedRoomOvercookedConfig() + game_cfg = ForcedCoordinationOvercookedConfig() game = get_game_from_config(game_cfg) obs, _, _ = game.get_obs() + print(obs.shape) obs_tensor = torch.tensor(obs) net_cfg = OvercookedResNetConfig5x5(game_cfg=game_cfg) for seed in range(10): @@ -203,3 +207,29 @@ def test_resnet_oc(self): net_out = net(obs_tensor) print(net_out) + def test_resnet_oc_8(self): + game_cfg = CounterCircuitOvercookedConfig() + game = get_game_from_config(game_cfg) + obs, _, _ = game.get_obs() + print(obs.shape) + obs_tensor = torch.tensor(obs) + net_cfg = OvercookedResNetConfig8x8(game_cfg=game_cfg) + for seed in range(10): + set_seed(seed) + net = get_network_from_config(net_cfg) + net_out = net(obs_tensor) + print(net_out) + + def test_resnet_oc_large(self): + game_cfg = AsymmetricAdvantageOvercookedConfig() + game = get_game_from_config(game_cfg) + obs, _, _ = game.get_obs() + print(obs.shape) + obs_tensor = torch.tensor(obs) + net_cfg = OvercookedResNetConfig9x9(game_cfg=game_cfg) + for seed in range(10): + set_seed(seed) + net = get_network_from_config(net_cfg) + net_out = net(obs_tensor) + print(net_out) + From 3f0b942f49d410185d749a00c016dc94cb092eb8 Mon Sep 17 00:00:00 2001 From: Yannik Mahlau Date: Sat, 2 Dec 2023 14:29:27 +0100 Subject: [PATCH 2/3] setup proxy luis changes --- config/cfg_luis_proxy_aa_0.yaml | 2 +- config/cfg_luis_proxy_aa_1.yaml | 2 +- config/cfg_luis_proxy_aa_2.yaml | 2 +- config/cfg_luis_proxy_aa_3.yaml | 2 +- config/cfg_luis_proxy_aa_4.yaml | 2 +- config/cfg_luis_proxy_cc_0.yaml | 2 +- config/cfg_luis_proxy_cc_1.yaml | 2 +- config/cfg_luis_proxy_cc_2.yaml | 2 +- config/cfg_luis_proxy_cc_3.yaml | 2 +- config/cfg_luis_proxy_cc_4.yaml | 2 +- config/cfg_luis_proxy_co_0.yaml | 2 +- config/cfg_luis_proxy_co_1.yaml | 2 +- config/cfg_luis_proxy_co_2.yaml | 2 +- config/cfg_luis_proxy_co_3.yaml | 2 +- config/cfg_luis_proxy_co_4.yaml | 2 +- config/cfg_luis_proxy_cr_0.yaml | 2 +- config/cfg_luis_proxy_cr_1.yaml | 2 +- config/cfg_luis_proxy_cr_2.yaml | 2 +- config/cfg_luis_proxy_cr_3.yaml | 2 +- config/cfg_luis_proxy_cr_4.yaml | 2 +- config/cfg_luis_proxy_fc_0.yaml | 2 +- config/cfg_luis_proxy_fc_1.yaml | 2 +- config/cfg_luis_proxy_fc_2.yaml | 2 +- config/cfg_luis_proxy_fc_3.yaml | 2 +- config/cfg_luis_proxy_fc_4.yaml | 2 +- scripts/training/generate_training_cfg_oc.py | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) diff --git a/config/cfg_luis_proxy_aa_0.yaml b/config/cfg_luis_proxy_aa_0.yaml index b45083f..df77100 100644 --- a/config/cfg_luis_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_aa_0.yaml @@ -140,7 +140,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_aa_1.yaml b/config/cfg_luis_proxy_aa_1.yaml index af2d8ef..d6d7043 100644 --- a/config/cfg_luis_proxy_aa_1.yaml +++ b/config/cfg_luis_proxy_aa_1.yaml @@ -140,7 +140,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_aa_2.yaml b/config/cfg_luis_proxy_aa_2.yaml index 2163647..3d2fb15 100644 --- a/config/cfg_luis_proxy_aa_2.yaml +++ b/config/cfg_luis_proxy_aa_2.yaml @@ -140,7 +140,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_aa_3.yaml b/config/cfg_luis_proxy_aa_3.yaml index 650e2fa..c458555 100644 --- a/config/cfg_luis_proxy_aa_3.yaml +++ b/config/cfg_luis_proxy_aa_3.yaml @@ -140,7 +140,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_aa_4.yaml b/config/cfg_luis_proxy_aa_4.yaml index dc9bd33..5c5248b 100644 --- a/config/cfg_luis_proxy_aa_4.yaml +++ b/config/cfg_luis_proxy_aa_4.yaml @@ -140,7 +140,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_0.yaml index 16270ab..1e5719d 100644 --- a/config/cfg_luis_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_0.yaml @@ -135,7 +135,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cc_1.yaml b/config/cfg_luis_proxy_cc_1.yaml index 266af84..3963fa2 100644 --- a/config/cfg_luis_proxy_cc_1.yaml +++ b/config/cfg_luis_proxy_cc_1.yaml @@ -135,7 +135,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cc_2.yaml b/config/cfg_luis_proxy_cc_2.yaml index bddf45c..5d09e01 100644 --- a/config/cfg_luis_proxy_cc_2.yaml +++ b/config/cfg_luis_proxy_cc_2.yaml @@ -135,7 +135,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cc_3.yaml b/config/cfg_luis_proxy_cc_3.yaml index c1de5f8..d9af976 100644 --- a/config/cfg_luis_proxy_cc_3.yaml +++ b/config/cfg_luis_proxy_cc_3.yaml @@ -135,7 +135,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cc_4.yaml b/config/cfg_luis_proxy_cc_4.yaml index 83231ac..0154325 100644 --- a/config/cfg_luis_proxy_cc_4.yaml +++ b/config/cfg_luis_proxy_cc_4.yaml @@ -135,7 +135,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_co_0.yaml b/config/cfg_luis_proxy_co_0.yaml index 94e4d15..bef0e0d 100644 --- a/config/cfg_luis_proxy_co_0.yaml +++ b/config/cfg_luis_proxy_co_0.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_co_1.yaml b/config/cfg_luis_proxy_co_1.yaml index 1342795..14096f8 100644 --- a/config/cfg_luis_proxy_co_1.yaml +++ b/config/cfg_luis_proxy_co_1.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_co_2.yaml b/config/cfg_luis_proxy_co_2.yaml index 14c0432..20972c6 100644 --- a/config/cfg_luis_proxy_co_2.yaml +++ b/config/cfg_luis_proxy_co_2.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_co_3.yaml b/config/cfg_luis_proxy_co_3.yaml index ae2b13b..bb6a647 100644 --- a/config/cfg_luis_proxy_co_3.yaml +++ b/config/cfg_luis_proxy_co_3.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_co_4.yaml b/config/cfg_luis_proxy_co_4.yaml index 81cc701..4ed7101 100644 --- a/config/cfg_luis_proxy_co_4.yaml +++ b/config/cfg_luis_proxy_co_4.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cr_0.yaml b/config/cfg_luis_proxy_cr_0.yaml index 5700525..ceac1ab 100644 --- a/config/cfg_luis_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_cr_0.yaml @@ -115,7 +115,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cr_1.yaml b/config/cfg_luis_proxy_cr_1.yaml index 1d764f2..1e8d5c7 100644 --- a/config/cfg_luis_proxy_cr_1.yaml +++ b/config/cfg_luis_proxy_cr_1.yaml @@ -115,7 +115,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cr_2.yaml b/config/cfg_luis_proxy_cr_2.yaml index 0ea3edc..94e240b 100644 --- a/config/cfg_luis_proxy_cr_2.yaml +++ b/config/cfg_luis_proxy_cr_2.yaml @@ -115,7 +115,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cr_3.yaml b/config/cfg_luis_proxy_cr_3.yaml index d5683f7..f862a81 100644 --- a/config/cfg_luis_proxy_cr_3.yaml +++ b/config/cfg_luis_proxy_cr_3.yaml @@ -115,7 +115,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_cr_4.yaml b/config/cfg_luis_proxy_cr_4.yaml index 1d36413..59ec587 100644 --- a/config/cfg_luis_proxy_cr_4.yaml +++ b/config/cfg_luis_proxy_cr_4.yaml @@ -115,7 +115,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_0.yaml index a0d9585..4100568 100644 --- a/config/cfg_luis_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_0.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_fc_1.yaml b/config/cfg_luis_proxy_fc_1.yaml index ae29adb..b0a19cd 100644 --- a/config/cfg_luis_proxy_fc_1.yaml +++ b/config/cfg_luis_proxy_fc_1.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_fc_2.yaml b/config/cfg_luis_proxy_fc_2.yaml index 12fac1d..9dccf62 100644 --- a/config/cfg_luis_proxy_fc_2.yaml +++ b/config/cfg_luis_proxy_fc_2.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_fc_3.yaml b/config/cfg_luis_proxy_fc_3.yaml index c6a5783..0dc84e1 100644 --- a/config/cfg_luis_proxy_fc_3.yaml +++ b/config/cfg_luis_proxy_fc_3.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/config/cfg_luis_proxy_fc_4.yaml b/config/cfg_luis_proxy_fc_4.yaml index f9ac520..bb7353b 100644 --- a/config/cfg_luis_proxy_fc_4.yaml +++ b/config/cfg_luis_proxy_fc_4.yaml @@ -120,7 +120,7 @@ data: project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline - worker_episode_bucket_size: 2 + worker_episode_bucket_size: 5 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 diff --git a/scripts/training/generate_training_cfg_oc.py b/scripts/training/generate_training_cfg_oc.py index 39a6ec1..e1ec23a 100644 --- a/scripts/training/generate_training_cfg_oc.py +++ b/scripts/training/generate_training_cfg_oc.py @@ -279,7 +279,7 @@ def generate_training_structured_configs(): name=f'luis_proxy_{name}', id=seed, updater_bucket_size=1000, - worker_episode_bucket_size=2, + worker_episode_bucket_size=5, wandb_mode='offline', ) saver_cfg = SaverConfig( From b57414e89bb6b6955b80656ce5b708a2cc7b299f Mon Sep 17 00:00:00 2001 From: ymahlau Date: Sun, 3 Dec 2023 10:51:47 +0100 Subject: [PATCH 3/3] minor bug fixes --- config/cfg_luis_proxy_aa_0.yaml | 4 ++-- config/cfg_luis_proxy_aa_1.yaml | 4 ++-- config/cfg_luis_proxy_aa_2.yaml | 4 ++-- config/cfg_luis_proxy_aa_3.yaml | 4 ++-- config/cfg_luis_proxy_aa_4.yaml | 4 ++-- config/cfg_luis_proxy_cc_0.yaml | 4 ++-- config/cfg_luis_proxy_cc_1.yaml | 4 ++-- config/cfg_luis_proxy_cc_2.yaml | 4 ++-- config/cfg_luis_proxy_cc_3.yaml | 4 ++-- config/cfg_luis_proxy_cc_4.yaml | 4 ++-- config/cfg_luis_proxy_co_0.yaml | 4 ++-- config/cfg_luis_proxy_co_1.yaml | 4 ++-- config/cfg_luis_proxy_co_2.yaml | 4 ++-- config/cfg_luis_proxy_co_3.yaml | 4 ++-- config/cfg_luis_proxy_co_4.yaml | 4 ++-- config/cfg_luis_proxy_cr_0.yaml | 4 ++-- config/cfg_luis_proxy_cr_1.yaml | 4 ++-- config/cfg_luis_proxy_cr_2.yaml | 4 ++-- config/cfg_luis_proxy_cr_3.yaml | 4 ++-- config/cfg_luis_proxy_cr_4.yaml | 4 ++-- config/cfg_luis_proxy_fc_0.yaml | 4 ++-- config/cfg_luis_proxy_fc_1.yaml | 4 ++-- config/cfg_luis_proxy_fc_2.yaml | 4 ++-- config/cfg_luis_proxy_fc_3.yaml | 4 ++-- config/cfg_luis_proxy_fc_4.yaml | 4 ++-- scripts/training/generate_training_cfg_oc.py | 5 +++-- src/trainer/az_collector.py | 2 ++ src/trainer/az_distributor.py | 2 ++ src/trainer/az_evaluator.py | 5 ++++- src/trainer/az_inference_server.py | 2 ++ src/trainer/az_logger.py | 2 ++ src/trainer/az_saver.py | 4 ++++ src/trainer/az_trainer.py | 1 + src/trainer/az_updater.py | 4 ++-- src/trainer/az_worker.py | 2 ++ 35 files changed, 74 insertions(+), 55 deletions(-) diff --git a/config/cfg_luis_proxy_aa_0.yaml b/config/cfg_luis_proxy_aa_0.yaml index df77100..58010a6 100644 --- a/config/cfg_luis_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_aa_0.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -435,4 +435,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa_0 \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_1.yaml b/config/cfg_luis_proxy_aa_1.yaml index d6d7043..c74259c 100644 --- a/config/cfg_luis_proxy_aa_1.yaml +++ b/config/cfg_luis_proxy_aa_1.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -525,4 +525,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa_1 \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_2.yaml b/config/cfg_luis_proxy_aa_2.yaml index 3d2fb15..8b28eb7 100644 --- a/config/cfg_luis_proxy_aa_2.yaml +++ b/config/cfg_luis_proxy_aa_2.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -525,4 +525,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa_2 \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_3.yaml b/config/cfg_luis_proxy_aa_3.yaml index c458555..005f3e0 100644 --- a/config/cfg_luis_proxy_aa_3.yaml +++ b/config/cfg_luis_proxy_aa_3.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -525,4 +525,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa_3 \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_4.yaml b/config/cfg_luis_proxy_aa_4.yaml index 5c5248b..1f67ba6 100644 --- a/config/cfg_luis_proxy_aa_4.yaml +++ b/config/cfg_luis_proxy_aa_4.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -525,4 +525,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa_4 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_0.yaml index 1e5719d..57f46de 100644 --- a/config/cfg_luis_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_0.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -430,4 +430,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc_0 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_1.yaml b/config/cfg_luis_proxy_cc_1.yaml index 3963fa2..d1c1132 100644 --- a/config/cfg_luis_proxy_cc_1.yaml +++ b/config/cfg_luis_proxy_cc_1.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -515,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc_1 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_2.yaml b/config/cfg_luis_proxy_cc_2.yaml index 5d09e01..79db5b6 100644 --- a/config/cfg_luis_proxy_cc_2.yaml +++ b/config/cfg_luis_proxy_cc_2.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -515,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc_2 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_3.yaml b/config/cfg_luis_proxy_cc_3.yaml index d9af976..6eb8ab1 100644 --- a/config/cfg_luis_proxy_cc_3.yaml +++ b/config/cfg_luis_proxy_cc_3.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -515,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc_3 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_4.yaml b/config/cfg_luis_proxy_cc_4.yaml index 0154325..85d6a2a 100644 --- a/config/cfg_luis_proxy_cc_4.yaml +++ b/config/cfg_luis_proxy_cc_4.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -515,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc_4 \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_0.yaml b/config/cfg_luis_proxy_co_0.yaml index bef0e0d..4b181bc 100644 --- a/config/cfg_luis_proxy_co_0.yaml +++ b/config/cfg_luis_proxy_co_0.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -410,4 +410,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co_0 \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_1.yaml b/config/cfg_luis_proxy_co_1.yaml index 14096f8..d2c6d7c 100644 --- a/config/cfg_luis_proxy_co_1.yaml +++ b/config/cfg_luis_proxy_co_1.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co_1 \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_2.yaml b/config/cfg_luis_proxy_co_2.yaml index 20972c6..c72a835 100644 --- a/config/cfg_luis_proxy_co_2.yaml +++ b/config/cfg_luis_proxy_co_2.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co_2 \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_3.yaml b/config/cfg_luis_proxy_co_3.yaml index bb6a647..dde2917 100644 --- a/config/cfg_luis_proxy_co_3.yaml +++ b/config/cfg_luis_proxy_co_3.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co_3 \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_4.yaml b/config/cfg_luis_proxy_co_4.yaml index 4ed7101..5f7b26f 100644 --- a/config/cfg_luis_proxy_co_4.yaml +++ b/config/cfg_luis_proxy_co_4.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co_4 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_0.yaml b/config/cfg_luis_proxy_cr_0.yaml index ceac1ab..838e12d 100644 --- a/config/cfg_luis_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_cr_0.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -405,4 +405,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr_0 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_1.yaml b/config/cfg_luis_proxy_cr_1.yaml index 1e8d5c7..fd46069 100644 --- a/config/cfg_luis_proxy_cr_1.yaml +++ b/config/cfg_luis_proxy_cr_1.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -470,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr_1 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_2.yaml b/config/cfg_luis_proxy_cr_2.yaml index 94e240b..41775c4 100644 --- a/config/cfg_luis_proxy_cr_2.yaml +++ b/config/cfg_luis_proxy_cr_2.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -470,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr_2 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_3.yaml b/config/cfg_luis_proxy_cr_3.yaml index f862a81..453c177 100644 --- a/config/cfg_luis_proxy_cr_3.yaml +++ b/config/cfg_luis_proxy_cr_3.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -470,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr_3 \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_4.yaml b/config/cfg_luis_proxy_cr_4.yaml index 59ec587..9f17c07 100644 --- a/config/cfg_luis_proxy_cr_4.yaml +++ b/config/cfg_luis_proxy_cr_4.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -470,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr_4 \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_0.yaml index 4100568..3d79e28 100644 --- a/config/cfg_luis_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_0.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -410,4 +410,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc_0 \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_1.yaml b/config/cfg_luis_proxy_fc_1.yaml index b0a19cd..1bd4e8b 100644 --- a/config/cfg_luis_proxy_fc_1.yaml +++ b/config/cfg_luis_proxy_fc_1.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc_1 \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_2.yaml b/config/cfg_luis_proxy_fc_2.yaml index 9dccf62..92db579 100644 --- a/config/cfg_luis_proxy_fc_2.yaml +++ b/config/cfg_luis_proxy_fc_2.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc_2 \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_3.yaml b/config/cfg_luis_proxy_fc_3.yaml index 0dc84e1..cef9096 100644 --- a/config/cfg_luis_proxy_fc_3.yaml +++ b/config/cfg_luis_proxy_fc_3.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc_3 \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_4.yaml b/config/cfg_luis_proxy_fc_4.yaml index bb7353b..dd093cc 100644 --- a/config/cfg_luis_proxy_fc_4.yaml +++ b/config/cfg_luis_proxy_fc_4.yaml @@ -10,7 +10,7 @@ data: quick_start_buffer_path: null start_wait_n_samples: 500000 compile_mode: max-autotune - compile_model: true + compile_model: false data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -480,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc_4 \ No newline at end of file diff --git a/scripts/training/generate_training_cfg_oc.py b/scripts/training/generate_training_cfg_oc.py index e1ec23a..3547c1e 100644 --- a/scripts/training/generate_training_cfg_oc.py +++ b/scripts/training/generate_training_cfg_oc.py @@ -320,7 +320,7 @@ def generate_training_structured_configs(): max_cpu_inference_server=2, temperature_input=temperature_input, single_sbr_temperature=single_temperature, - compile_model=True, + compile_model=False, compile_mode='max-autotune', merge_inference_update_gpu=False, proxy_net_path=None, @@ -330,7 +330,8 @@ def generate_training_structured_configs(): exported_dict = serialize_dataclass(trainer_cfg) yaml_str = yaml.dump(exported_dict) # yaml_str = OmegaConf.to_yaml(trainer_cfg) - yaml_str += 'hydra:\n run:\n dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_' + f'{logger_cfg.name}' + yaml_str += 'hydra:\n run:\n dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_' \ + + f'{logger_cfg.name}_{logger_cfg.id}' config_name = f'cfg_{logger_cfg.name}_{logger_cfg.id}' config_dir = Path(__file__).parent.parent.parent / 'config' cur_config_file = config_dir / f'{config_name}.yaml' diff --git a/src/trainer/az_collector.py b/src/trainer/az_collector.py index 96bb6b7..04e3891 100644 --- a/src/trainer/az_collector.py +++ b/src/trainer/az_collector.py @@ -56,6 +56,8 @@ def run_collector( game_cfg = trainer_cfg.game_cfg collector_cfg = trainer_cfg.collector_cfg set_seed(seed) + os.environ["OMP_NUM_THREADS"] = "1" + # torch.set_num_threads(1) # init pid = os.getpid() state_save_dir = Path(os.getcwd()) / 'state' diff --git a/src/trainer/az_distributor.py b/src/trainer/az_distributor.py index 1e13903..1448d26 100644 --- a/src/trainer/az_distributor.py +++ b/src/trainer/az_distributor.py @@ -22,6 +22,8 @@ def run_distributor( ): # init pid = os.getpid() + os.environ["OMP_NUM_THREADS"] = "1" + # torch.set_num_threads(1) if cpu_list is not None: print(f"{datetime.now()} - CPU list in Distributor: {cpu_list}") os.sched_setaffinity(pid, cpu_list) diff --git a/src/trainer/az_evaluator.py b/src/trainer/az_evaluator.py index 61b2e64..e652fb2 100644 --- a/src/trainer/az_evaluator.py +++ b/src/trainer/az_evaluator.py @@ -21,6 +21,7 @@ from src.game.utils import step_with_draw_prevention from src.misc.utils import set_seed from src.network.initialization import get_network_from_config, get_network_from_file +from src.network.utils import cleanup_state_dict from src.trainer.config import EvaluatorConfig, AlphaZeroTrainerConfig from src.trainer.utils import wait_for_obj_from_queue, send_obj_to_queue import multiprocessing.sharedctypes as sc @@ -40,6 +41,7 @@ def run_evaluator( evaluator_cfg = trainer_cfg.evaluator_cfg # important for multiprocessing torch.set_num_threads(1) + os.environ["OMP_NUM_THREADS"] = "1" set_seed(seed) # paths model_folder: Path = Path(os.getcwd()) / 'eval_models' @@ -174,7 +176,8 @@ def run_evaluator( break if maybe_state_dict is None: raise Exception("Unknown exception with queue") - net.load_state_dict(maybe_state_dict) + state_dict = cleanup_state_dict(maybe_state_dict) + net.load_state_dict(state_dict) net.eval() value_agent.replace_net(net) if policy_agent is not None: diff --git a/src/trainer/az_inference_server.py b/src/trainer/az_inference_server.py index d4fce73..f829e6c 100644 --- a/src/trainer/az_inference_server.py +++ b/src/trainer/az_inference_server.py @@ -42,6 +42,8 @@ def run_inference_server( inf_cfg = trainer_cfg.inf_cfg net_cfg = trainer_cfg.net_cfg set_seed(seed) + os.environ["OMP_NUM_THREADS"] = "1" + torch.set_num_threads(1) torch.set_float32_matmul_precision('medium') # load initial network start_phase = True diff --git a/src/trainer/az_logger.py b/src/trainer/az_logger.py index f47e4cd..7519322 100644 --- a/src/trainer/az_logger.py +++ b/src/trainer/az_logger.py @@ -32,6 +32,8 @@ def run_logger( # paths wandb_dir = Path(__file__).parent.parent.parent / 'wandb' wandb_dir.mkdir(parents=True, exist_ok=True) + os.environ["OMP_NUM_THREADS"] = "1" + # torch.set_num_threads(1) # init wandb logger_cfg = trainer_cfg.logger_cfg if logger_cfg.buffer_gen: diff --git a/src/trainer/az_saver.py b/src/trainer/az_saver.py index 53b3d63..fee69b8 100644 --- a/src/trainer/az_saver.py +++ b/src/trainer/az_saver.py @@ -7,6 +7,8 @@ import multiprocessing as mp +import torch + from src.network.initialization import get_network_from_config from src.network.utils import cleanup_state_dict from src.trainer.config import AlphaZeroTrainerConfig @@ -22,6 +24,8 @@ def run_saver( ): net_cfg = trainer_cfg.net_cfg saver_cfg = trainer_cfg.saver_cfg + os.environ["OMP_NUM_THREADS"] = "1" + torch.set_num_threads(1) model_folder: Path = Path(os.getcwd()) / 'fixed_time_models' if not Path.exists(model_folder) and saver_cfg.save_all_checkpoints: model_folder.mkdir(parents=True, exist_ok=True) diff --git a/src/trainer/az_trainer.py b/src/trainer/az_trainer.py index c39419b..665d9d8 100644 --- a/src/trainer/az_trainer.py +++ b/src/trainer/az_trainer.py @@ -32,6 +32,7 @@ def __init__( ): self.cfg = cfg os.environ["WANDB__SERVICE_WAIT"] = "300" + os.environ["OMP_NUM_THREADS"] = "1" if mp.get_start_method() != 'spawn': try: mp.set_start_method('spawn') # this is important for using CUDA diff --git a/src/trainer/az_updater.py b/src/trainer/az_updater.py index 8c4bdad..7a09bfa 100644 --- a/src/trainer/az_updater.py +++ b/src/trainer/az_updater.py @@ -81,8 +81,8 @@ def run_updater( # torch.autograd.set_detect_anomaly(True) state_save_dir = Path(os.getcwd()) / 'state' # important to avoid pytorch deadlocks - # torch.set_num_threads(1) - # os.environ["OMP_NUM_THREADS"] = "1" + torch.set_num_threads(1) + os.environ["OMP_NUM_THREADS"] = "1" set_seed(seed) torch.set_float32_matmul_precision('medium') if updater_cfg.utility_loss != UtilityNorm.NONE and game_cfg.num_players != 2: diff --git a/src/trainer/az_worker.py b/src/trainer/az_worker.py index daf99a7..c0590d5 100644 --- a/src/trainer/az_worker.py +++ b/src/trainer/az_worker.py @@ -71,6 +71,8 @@ def run_worker( game_cfg = trainer_cfg.game_cfg worker_cfg = trainer_cfg.worker_cfg set_seed(seed) + # torch.set_num_threads(1) + os.environ["OMP_NUM_THREADS"] = "1" # initialization search = get_search_from_config(worker_cfg.search_cfg) if hasattr(search, "backup_func"):