diff --git a/config/cfg_luis_proxy_aa_0.yaml b/config/cfg_luis_proxy_aa_0.yaml index 248e34a..b45083f 100644 --- a/config/cfg_luis_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_aa_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -151,7 +151,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig9x9 data: activation_type: __module__: src.network.utils @@ -173,8 +173,13 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 + - 1 + - 3 + - 0 - 1 + - - 128 + - 2 - 3 - 1 - 1 @@ -367,7 +372,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_aa_1.yaml b/config/cfg_luis_proxy_aa_1.yaml index 977b8f7..af2d8ef 100644 --- a/config/cfg_luis_proxy_aa_1.yaml +++ b/config/cfg_luis_proxy_aa_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -151,7 +151,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig9x9 data: activation_type: __module__: src.network.utils @@ -161,7 +161,97 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 layer_specs: - - 32 - 3 @@ -173,9 +263,14 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 @@ -367,7 +462,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_aa_2.yaml b/config/cfg_luis_proxy_aa_2.yaml new file mode 100644 index 0000000..2163647 --- /dev/null +++ b/config/cfg_luis_proxy_aa_2.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 2 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_3.yaml b/config/cfg_luis_proxy_aa_3.yaml new file mode 100644 index 0000000..650e2fa --- /dev/null +++ b/config/cfg_luis_proxy_aa_3.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_aa_4.yaml b/config/cfg_luis_proxy_aa_4.yaml new file mode 100644 index 0000000..dc9bd33 --- /dev/null +++ b/config/cfg_luis_proxy_aa_4.yaml @@ -0,0 +1,528 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_aa + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig9x9 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: AsymmetricAdvantageOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + - - 3 + - 0 + - 1 + - 5 + - 1 + - 3 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 2 + - 1 + - 2 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 6 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 3 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 9 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_aa \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_0.yaml index 1f457a7..16270ab 100644 --- a/config/cfg_luis_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -146,7 +146,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -168,15 +168,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 + - 1 + - 3 + - 0 - 1 + - - 128 + - 2 - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +367,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_cc_1.yaml b/config/cfg_luis_proxy_cc_1.yaml index f018a37..266af84 100644 --- a/config/cfg_luis_proxy_cc_1.yaml +++ b/config/cfg_luis_proxy_cc_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -146,7 +146,7 @@ data: merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -156,7 +156,92 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 layer_specs: - - 32 - 3 @@ -168,15 +253,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +452,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_proxy_cc_0.yaml b/config/cfg_luis_proxy_cc_2.yaml similarity index 81% rename from config/cfg_proxy_cc_0.yaml rename to config/cfg_luis_proxy_cc_2.yaml index 9e1a534..bddf45c 100644 --- a/config/cfg_proxy_cc_0.yaml +++ b/config/cfg_luis_proxy_cc_2.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -130,23 +130,23 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_cc + id: 2 + name: luis_proxy_cc project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: __module__: src.network.resnet - __name__: OvercookedResNetConfig5x5 + __name__: OvercookedResNetConfig8x8 data: activation_type: __module__: src.network.utils @@ -156,7 +156,92 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 layer_specs: - - 32 - 3 @@ -168,15 +253,20 @@ data: - 3 - 1 - 1 - - - 128 + - - 64 - 1 - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 - 1 - 1 - - 256 - 1 - 3 - - 0 + - 1 - 1 lff_feature_expansion: 40 lff_features: false @@ -362,7 +452,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -425,4 +515,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_cc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_3.yaml b/config/cfg_luis_proxy_cc_3.yaml new file mode 100644 index 0000000..c1de5f8 --- /dev/null +++ b/config/cfg_luis_proxy_cc_3.yaml @@ -0,0 +1,518 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_cc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig8x8 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 1 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_cc_4.yaml b/config/cfg_luis_proxy_cc_4.yaml new file mode 100644 index 0000000..83231ac --- /dev/null +++ b/config/cfg_luis_proxy_cc_4.yaml @@ -0,0 +1,518 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_cc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig8x8 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CounterCircuitOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 4 + - 1 + - 1 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 1 + - 1 + - 1 + - 0 + - 5 + - - 1 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 3 + - 3 + - 1 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 3 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 8 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 64 + - 1 + - 3 + - 0 + - 1 + - - 128 + - 2 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 1 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cc \ No newline at end of file diff --git a/config/cfg_luis_proxy_oc_0.yaml b/config/cfg_luis_proxy_co_0.yaml similarity index 97% rename from config/cfg_luis_proxy_oc_0.yaml rename to config/cfg_luis_proxy_co_0.yaml index 443ff4d..94e4d15 100644 --- a/config/cfg_luis_proxy_oc_0.yaml +++ b/config/cfg_luis_proxy_co_0.yaml @@ -26,39 +26,44 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CrampedRoomOvercookedConfig + __name__: CoordinationRingOvercookedConfig data: board: - - 1 - 1 - - 4 - 1 + - 4 - 1 - - - 3 + - - 1 - 0 - 0 - 0 - - 3 - - - 1 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 - 0 - 0 - 0 - 1 - - 1 - - 2 - - 1 + - 3 - 5 - 1 + - 1 cooking_time: 20 flat_obs: false - h: 4 + h: 5 horizon: 400 num_actions: 6 num_players: 2 @@ -82,16 +87,16 @@ data: __name__: TupleWrapper data: data: - - 1 - 2 + - 1 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: - - 3 - 1 + - 2 - 0 - 0 temperature_input: true @@ -111,7 +116,7 @@ data: data: buffer_gen: false id: 0 - name: luis_proxy_oc + name: luis_proxy_co project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline @@ -342,7 +347,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +410,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_proxy_cr_0.yaml b/config/cfg_luis_proxy_co_1.yaml similarity index 83% rename from config/cfg_proxy_cr_0.yaml rename to config/cfg_luis_proxy_co_1.yaml index 64daa56..1342795 100644 --- a/config/cfg_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_co_1.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -115,18 +115,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_cr + id: 1 + name: luis_proxy_co project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -410,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_cr \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_2.yaml b/config/cfg_luis_proxy_co_2.yaml new file mode 100644 index 0000000..14c0432 --- /dev/null +++ b/config/cfg_luis_proxy_co_2.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 2 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_3.yaml b/config/cfg_luis_proxy_co_3.yaml new file mode 100644 index 0000000..ae2b13b --- /dev/null +++ b/config/cfg_luis_proxy_co_3.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_co_4.yaml b/config/cfg_luis_proxy_co_4.yaml new file mode 100644 index 0000000..81cc701 --- /dev/null +++ b/config/cfg_luis_proxy_co_4.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_co + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: CoordinationRingOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 1 + - 0 + - 0 + - 0 + - 4 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 3 + - 5 + - 1 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 2 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_co \ No newline at end of file diff --git a/config/cfg_luis_proxy_cr_0.yaml b/config/cfg_luis_proxy_cr_0.yaml index c93bd87..5700525 100644 --- a/config/cfg_luis_proxy_cr_0.yaml +++ b/config/cfg_luis_proxy_cr_0.yaml @@ -26,44 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CoordinationRingOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - - 1 - 1 - 4 - 1 - - - 1 - - 0 - - 0 + - 1 + - - 3 - 0 - - 4 - - - 2 - 0 - - 1 - 0 - - 1 - - - 3 + - 3 + - - 1 - 0 - 0 - 0 - 1 - - 1 - - 3 - - 5 + - 2 - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -87,16 +82,16 @@ data: __name__: TupleWrapper data: data: - - 2 - 1 + - 2 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: + - 3 - 1 - - 2 - 0 - 0 temperature_input: true @@ -347,7 +342,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_cr_1.yaml b/config/cfg_luis_proxy_cr_1.yaml index 7be7109..1d764f2 100644 --- a/config/cfg_luis_proxy_cr_1.yaml +++ b/config/cfg_luis_proxy_cr_1.yaml @@ -26,44 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: CoordinationRingOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - - 1 - 1 - 4 - 1 - - - 1 - - 0 + - 1 + - - 3 - 0 - 0 - - 4 - - - 2 - 0 - - 1 - - 0 - - 1 - - - 3 + - 3 + - - 1 - 0 - 0 - 0 - 1 - - 1 - - 3 - - 5 + - 2 - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -87,16 +82,16 @@ data: __name__: TupleWrapper data: data: - - 2 - 1 + - 2 - 0 - 0 - __module__: src.misc.serialization __name__: TupleWrapper data: data: + - 3 - 1 - - 2 - 0 - 0 temperature_input: true @@ -141,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_oc_1.yaml b/config/cfg_luis_proxy_cr_2.yaml similarity index 85% rename from config/cfg_luis_proxy_oc_1.yaml rename to config/cfg_luis_proxy_cr_2.yaml index 9e01ab8..0ea3edc 100644 --- a/config/cfg_luis_proxy_oc_1.yaml +++ b/config/cfg_luis_proxy_cr_2.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -110,8 +110,8 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 1 - name: luis_proxy_oc + id: 2 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 wandb_mode: offline @@ -136,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -342,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_proxy_oc_0.yaml b/config/cfg_luis_proxy_cr_3.yaml similarity index 84% rename from config/cfg_proxy_oc_0.yaml rename to config/cfg_luis_proxy_cr_3.yaml index 25baea4..d5683f7 100644 --- a/config/cfg_proxy_oc_0.yaml +++ b/config/cfg_luis_proxy_cr_3.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -110,18 +110,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_oc + id: 3 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -136,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -342,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -405,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_oc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_proxy_aa_0.yaml b/config/cfg_luis_proxy_cr_4.yaml similarity index 83% rename from config/cfg_proxy_aa_0.yaml rename to config/cfg_luis_proxy_cr_4.yaml index 24ab7d9..1d36413 100644 --- a/config/cfg_proxy_aa_0.yaml +++ b/config/cfg_luis_proxy_cr_4.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,64 +26,39 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true switch_pos: true game_cfg: __module__: src.game.overcooked.config - __name__: AsymmetricAdvantageOvercookedConfig + __name__: CrampedRoomOvercookedConfig data: board: - - 1 - 1 - - 1 - - 1 - - 1 - - 1 - - 1 + - 4 - 1 - 1 - - 3 - - 0 - - 1 - - 5 - - 1 - - 3 - - 1 - - 0 - - 5 - - - 1 - - 0 - - 0 - - 0 - - 4 - 0 - 0 - 0 - - 1 + - 3 - - 1 - - 0 - - 0 - - 0 - - 4 - 0 - 0 - 0 - 1 - - 1 - - 1 - - 1 - - 2 - - 1 - 2 - 1 - - 1 + - 5 - 1 cooking_time: 20 flat_obs: false - h: 5 + h: 4 horizon: 400 num_actions: 6 num_players: 2 @@ -107,7 +82,7 @@ data: __name__: TupleWrapper data: data: - - 6 + - 1 - 2 - 0 - 0 @@ -115,13 +90,13 @@ data: __name__: TupleWrapper data: data: - - 1 - 3 + - 1 - 0 - 0 temperature_input: true unstuck_behavior: false - w: 9 + w: 5 inf_cfg: __module__: src.trainer.config __name__: InferenceServerConfig @@ -135,18 +110,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_aa + id: 4 + name: luis_proxy_cr project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -161,7 +136,72 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: CrampedRoomOvercookedConfig + data: + board: + - - 1 + - 1 + - 4 + - 1 + - 1 + - - 3 + - 0 + - 0 + - 0 + - 3 + - - 1 + - 0 + - 0 + - 0 + - 1 + - - 1 + - 2 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 4 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -367,7 +407,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -430,4 +470,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_aa \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_cr \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_0.yaml index 14a9a09..a0d9585 100644 --- a/config/cfg_luis_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_0.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -347,7 +347,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_luis_proxy_fc_1.yaml b/config/cfg_luis_proxy_fc_1.yaml index 65d0e04..ae29adb 100644 --- a/config/cfg_luis_proxy_fc_1.yaml +++ b/config/cfg_luis_proxy_fc_1.yaml @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false diff --git a/config/cfg_proxy_fc_0.yaml b/config/cfg_luis_proxy_fc_2.yaml similarity index 83% rename from config/cfg_proxy_fc_0.yaml rename to config/cfg_luis_proxy_fc_2.yaml index b32e229..12fac1d 100644 --- a/config/cfg_proxy_fc_0.yaml +++ b/config/cfg_luis_proxy_fc_2.yaml @@ -5,12 +5,12 @@ data: __module__: src.trainer.config __name__: CollectorConfig data: - buffer_size: 100000 + buffer_size: 500000 log_every_sec: 300 quick_start_buffer_path: null - start_wait_n_samples: 100000 - compile_mode: reduce-overhead - compile_model: false + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true data_qsize: 10 distributor_out_qsize: 10 evaluator_cfg: @@ -26,7 +26,7 @@ data: eval_rate_sec: 60 num_episodes: - 100 - - 20 + - 2 prevent_draw: false save_checkpoints: false self_play: true @@ -115,18 +115,18 @@ data: __name__: LoggerConfig data: buffer_gen: false - id: 0 - name: proxy_fc + id: 2 + name: luis_proxy_fc project_name: overcooked updater_bucket_size: 1000 - wandb_mode: online + wandb_mode: offline worker_episode_bucket_size: 2 max_batch_size: 15000 max_cpu_evaluator: 1 max_cpu_inference_server: 2 max_cpu_log_dist_save_collect: 1 max_cpu_updater: 2 - max_cpu_worker: 11 + max_cpu_worker: 22 max_eval_per_worker: 30000 merge_inference_update_gpu: false net_cfg: @@ -141,7 +141,77 @@ data: __module__: src.network.vision_net __name__: EquivarianceType value: NONE - game_cfg: null + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 layer_specs: - - 32 - 3 @@ -347,7 +417,7 @@ data: data: active_wait_time: 0.05 init_temperatures: null - max_clip_value: .inf + max_clip_value: 30 min_clip_value: -.inf policy_prediction: true random_symmetry: false @@ -410,4 +480,4 @@ data: use_symmetries: true hydra: run: - dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_proxy_fc \ No newline at end of file + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_3.yaml b/config/cfg_luis_proxy_fc_3.yaml new file mode 100644 index 0000000..c6a5783 --- /dev/null +++ b/config/cfg_luis_proxy_fc_3.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 3 + name: luis_proxy_fc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/cfg_luis_proxy_fc_4.yaml b/config/cfg_luis_proxy_fc_4.yaml new file mode 100644 index 0000000..f9ac520 --- /dev/null +++ b/config/cfg_luis_proxy_fc_4.yaml @@ -0,0 +1,483 @@ +__module__: src.trainer.config +__name__: AlphaZeroTrainerConfig +data: + collector_cfg: + __module__: src.trainer.config + __name__: CollectorConfig + data: + buffer_size: 500000 + log_every_sec: 300 + quick_start_buffer_path: null + start_wait_n_samples: 500000 + compile_mode: max-autotune + compile_model: true + data_qsize: 10 + distributor_out_qsize: 10 + evaluator_cfg: + __module__: src.trainer.config + __name__: EvaluatorConfig + data: + enemy_cfgs: + - __module__: src.agent.one_shot + __name__: RandomAgentConfig + data: + name: RandomAgent + enemy_iterations: 1 + eval_rate_sec: 60 + num_episodes: + - 100 + - 2 + prevent_draw: false + save_checkpoints: false + self_play: true + switch_pos: true + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + inf_cfg: + __module__: src.trainer.config + __name__: InferenceServerConfig + data: + statistics_every_sec: 60 + use_gpu: true + info_qsize: 100 + init_new_network_params: false + logger_cfg: + __module__: src.trainer.config + __name__: LoggerConfig + data: + buffer_gen: false + id: 4 + name: luis_proxy_fc + project_name: overcooked + updater_bucket_size: 1000 + wandb_mode: offline + worker_episode_bucket_size: 2 + max_batch_size: 15000 + max_cpu_evaluator: 1 + max_cpu_inference_server: 2 + max_cpu_log_dist_save_collect: 1 + max_cpu_updater: 2 + max_cpu_worker: 22 + max_eval_per_worker: 30000 + merge_inference_update_gpu: false + net_cfg: + __module__: src.network.resnet + __name__: OvercookedResNetConfig5x5 + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + eq_type: + __module__: src.network.vision_net + __name__: EquivarianceType + value: NONE + game_cfg: + __module__: src.game.overcooked.config + __name__: ForcedCoordinationOvercookedConfig + data: + board: + - - 1 + - 1 + - 1 + - 4 + - 1 + - - 3 + - 0 + - 1 + - 0 + - 4 + - - 3 + - 0 + - 1 + - 0 + - 1 + - - 2 + - 0 + - 1 + - 0 + - 1 + - - 1 + - 1 + - 1 + - 5 + - 1 + cooking_time: 20 + flat_obs: false + h: 5 + horizon: 400 + num_actions: 6 + num_players: 2 + reward_cfg: + __module__: src.game.overcooked.config + __name__: OvercookedRewardConfig + data: + dish_pickup: 3 + placement_in_pot: 3 + soup_delivery: 20 + soup_pickup: 5 + start_cooking: 3 + reward_scaling_factor: 1 + single_temperature_input: true + start_pos: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 3 + - 1 + - 0 + - 0 + - __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - 1 + - 2 + - 0 + - 0 + temperature_input: true + unstuck_behavior: false + w: 5 + layer_specs: + - - 32 + - 3 + - 3 + - 1 + - 1 + - - 64 + - 2 + - 3 + - 1 + - 1 + - - 128 + - 1 + - 3 + - 1 + - 1 + - - 256 + - 1 + - 3 + - 0 + - 1 + lff_feature_expansion: 40 + lff_features: false + norm_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + policy_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + predict_policy: true + value_head_cfg: + __module__: src.network.fcn + __name__: WideHeadConfig + data: + activation_type: + __module__: src.network.utils + __name__: ActivationType + value: LEAKY_RELU + dropout_p: 0.2 + final_activation: + __module__: src.network.utils + __name__: ActivationType + value: NONE + hidden_size: 256 + normalization_type: + __module__: src.network.utils + __name__: NormalizationType + value: GROUP_NORM + num_layers: 1 + num_inference_server: 1 + num_worker: 75 + only_generate_buffer: false + prev_run_dir: null + prev_run_idx: null + proxy_net_path: null + restrict_cpu: true + save_state: false + save_state_after_seconds: 30 + saver_cfg: + __module__: src.trainer.config + __name__: SaverConfig + data: + save_all_checkpoints: false + save_interval_sec: 300 + single_sbr_temperature: true + temperature_input: true + updater_cfg: + __module__: src.trainer.config + __name__: UpdaterConfig + data: + gradient_max_norm: 100 + mse_policy_loss: false + optim_cfg: + __module__: src.supervised.optim + __name__: OptimizerConfig + data: + anneal_cfg: + __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 0.001 + - 1.0e-05 + - 0.001 + - 1.0e-06 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: false + end_times_min: + - 60 + - 600 + - 700 + - 1300 + init_temp: 0 + sampling: false + beta1: 0.9 + beta2: 0.99 + fused: false + optim_type: + __module__: src.supervised.optim + __name__: OptimType + value: ADAM_W + weight_decay: 0.0001 + policy_loss_factor: 1 + updates_until_distribution: 5 + use_gpu: true + utility_loss: + __module__: src.game.values + __name__: UtilityNorm + value: NONE + utility_loss_factor: 0 + value_reg_loss_factor: 0 + updater_in_qsize: 100 + updater_out_qsize: 10 + validator_data_qsize: 100 + worker_cfg: + __module__: src.trainer.config + __name__: WorkerConfig + data: + anneal_cfgs: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 1 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: COSINE + cyclic: true + end_times_min: + - 1 + init_temp: 0 + sampling: true + exploration_prob: 0.5 + max_game_length: 8 + max_random_start_steps: 0 + policy_eval_cfg: + __module__: src.trainer.policy_eval + __name__: PolicyEvalConfig + data: + eval_type: + __module__: src.trainer.policy_eval + __name__: PolicyEvalType + value: TD_0 + lambda_val: 0.5 + prevent_draw: false + quick_start: false + search_cfg: + __module__: src.search.config + __name__: FixedDepthConfig + data: + average_eval: false + backup_func_cfg: + __module__: src.search.config + __name__: LogitBackupConfig + data: + epsilon: 0 + hp_0: null + hp_1: null + init_random: true + init_temperatures: + - 15 + - 15 + num_iterations: 150 + sbr_mode: + __module__: src.equilibria.logit + __name__: SbrMode + value: NAGURNEY + use_cpp: true + discount: 0.9 + eval_func_cfg: + __module__: src.search.config + __name__: InferenceServerEvalConfig + data: + active_wait_time: 0.05 + init_temperatures: null + max_clip_value: 30 + min_clip_value: -.inf + policy_prediction: true + random_symmetry: false + single_temperature: true + temperature_input: true + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + extract_func_cfg: + __module__: src.search.config + __name__: SpecialExtractConfig + data: + max_clip_value: 30 + min_clip_value: -.inf + utility_norm: + __module__: src.game.values + __name__: UtilityNorm + value: FULL_COOP + search_iterations: 1 + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 600 + - 1300 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false + temperature: 1 + use_symmetries: true +hydra: + run: + dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_luis_proxy_fc \ No newline at end of file diff --git a/config/debug_config.yaml b/config/debug_config.yaml index 553ac87..abd213d 100644 --- a/config/debug_config.yaml +++ b/config/debug_config.yaml @@ -114,7 +114,7 @@ data: name: null project_name: test updater_bucket_size: 100 - wandb_mode: offline + wandb_mode: online worker_episode_bucket_size: 5 max_batch_size: 3000 max_cpu_evaluator: 1 @@ -204,12 +204,12 @@ data: value: GROUP_NORM num_layers: 1 num_inference_server: 1 - num_worker: 5 + num_worker: 30 only_generate_buffer: false prev_run_dir: null prev_run_idx: null - proxy_net_path: /home/mahlau/nobackup/albatross/albatross/outputs/working_proxy_simple2.pt - restrict_cpu: true + proxy_net_path: C:\Users\mahla\Programming\battlesnake\albatross\scripts\training\outputs\proxy2.pt + restrict_cpu: false save_state: false save_state_after_seconds: 30 saver_cfg: @@ -279,7 +279,7 @@ data: __name__: TemperatureAnnealingConfig data: anneal_temps: - - 10 + - 1 anneal_types: - __module__: src.supervised.annealer __name__: AnnealingType @@ -293,7 +293,7 @@ data: __name__: TemperatureAnnealingConfig data: anneal_temps: - - 10 + - 1 anneal_types: - __module__: src.supervised.annealer __name__: AnnealingType @@ -352,9 +352,46 @@ data: utility_norm: __module__: src.game.values __name__: UtilityNorm - value: FULL_COOP + value: NONE search_iterations: 1 - temp_scaling_cfgs: null + temp_scaling_cfgs: + __module__: src.misc.serialization + __name__: TupleWrapper + data: + data: + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 5 + - 0 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + - __module__: src.supervised.annealer + __name__: AnnealingType + value: LINEAR + cyclic: false + end_times_min: + - 2 + - 5 + init_temp: 5 + sampling: false + - __module__: src.supervised.annealer + __name__: TemperatureAnnealingConfig + data: + anneal_temps: + - 10 + anneal_types: + - __module__: src.supervised.annealer + __name__: AnnealingType + value: CONST + cyclic: true + end_times_min: + - 1 + init_temp: 10 + sampling: false temperature: 1 use_symmetries: true hydra: diff --git a/scripts/training/generate_training_cfg_oc.py b/scripts/training/generate_training_cfg_oc.py index 726ebdf..39a6ec1 100644 --- a/scripts/training/generate_training_cfg_oc.py +++ b/scripts/training/generate_training_cfg_oc.py @@ -22,7 +22,8 @@ from src.network.initialization import get_network_from_config from src.network.mobile_one import MobileOneConfig3x3 from src.network.mobilenet_v3 import MobileNetConfig3x3, MobileNetConfig5x5 -from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5 +from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5, \ + OvercookedResNetConfig9x9, OvercookedResNetConfig8x8 from src.network.utils import ActivationType from src.network.vision_net import EquivarianceType from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ @@ -48,16 +49,15 @@ def generate_training_structured_configs(): Main method to start the training using dataclasses specified below """ # for seed in range(5): - game_cfg_dict = { - 'oc': CrampedRoomOvercookedConfig(), - 'aa': AsymmetricAdvantageOvercookedConfig(), - 'cr': CoordinationRingOvercookedConfig(), - 'fc': ForcedCoordinationOvercookedConfig(), - 'cc': CounterCircuitOvercookedConfig(), - + cfg_dict = { + 'aa': (AsymmetricAdvantageOvercookedConfig(), OvercookedResNetConfig9x9()), + 'cc': (CounterCircuitOvercookedConfig(), OvercookedResNetConfig8x8()), + 'co': (CoordinationRingOvercookedConfig(), OvercookedResNetConfig5x5()), + 'cr': (CrampedRoomOvercookedConfig(), OvercookedResNetConfig5x5()), + 'fc': (ForcedCoordinationOvercookedConfig(), OvercookedResNetConfig5x5()), } - for name, game_cfg in game_cfg_dict.items(): - for seed in range(2): + for name, (game_cfg, net_cfg) in cfg_dict.items(): + for seed in range(5): temperature_input = True single_temperature = True # game @@ -81,7 +81,7 @@ def generate_training_structured_configs(): # net_cfg = MobileOneConfig3x3(predict_policy=True, predict_game_len=False, eq_type=eq_type) # net_cfg = MobileNetConfig5x5(predict_policy=True, predict_game_len=False, eq_type=eq_type) # net_cfg = ResNetConfig7x7Best() - net_cfg = OvercookedResNetConfig5x5(predict_policy=True, eq_type=eq_type, lff_features=False) + # net_cfg = OvercookedResNetConfig5x5() # net_cfg = EquivariantMobileNetConfig3x3(predict_game_len=True) # search @@ -104,7 +104,7 @@ def generate_training_structured_configs(): temperature_input=temperature_input, single_temperature=single_temperature, min_clip_value=-math.inf, - max_clip_value=math.inf, + max_clip_value=30, policy_prediction=net_cfg.predict_policy, utility_norm=UtilityNorm.FULL_COOP, ) @@ -145,7 +145,7 @@ def generate_training_structured_configs(): utility_norm=UtilityNorm.FULL_COOP, min_clip_value=-math.inf, max_clip_value=30, - ) + ) # extraction_func_cfg = StandardExtractConfig() # extraction_func_cfg = MeanPolicyExtractConfig() # extraction_func_cfg = PolicyExtractConfig() @@ -230,7 +230,7 @@ def generate_training_structured_configs(): ) evaluator_cfg = EvaluatorConfig( eval_rate_sec=60, - num_episodes=[100, 20], + num_episodes=[100, 2], enemy_iterations=1, enemy_cfgs=[ RandomAgentConfig() diff --git a/scripts/training/play_overcooked.py b/scripts/training/play_overcooked.py index fb8e6d9..0474d66 100644 --- a/scripts/training/play_overcooked.py +++ b/scripts/training/play_overcooked.py @@ -12,9 +12,9 @@ def play_overcooked_example(): - path = Path(__file__).parent.parent.parent / 'outputs' / 'latest copy 2.pt' + path = Path(__file__).parent / 'outputs' / 'response2.pt' temperature_input = True - single_temperature = False + single_temperature = True net = get_network_from_file(path).eval() game_cfg = net.cfg.game_cfg @@ -49,15 +49,15 @@ def play_overcooked_example(): # agent1.net = net agent_list = [ - agent1, agent0, + agent1, ] sample_temperatures = [math.inf, math.inf] # sample_temperatures = [5, 5] # play # temperatures = np.linspace(0, 10, 15) - temperatures = [0.1] + temperatures = [0.5] for t in temperatures: agent0.temperatures = [t, t] # agent1.temperatures = [t, t] diff --git a/scripts/training/script_start_training_oc.py b/scripts/training/script_start_training_oc.py index 7c5059c..d979b00 100644 --- a/scripts/training/script_start_training_oc.py +++ b/scripts/training/script_start_training_oc.py @@ -14,7 +14,9 @@ from src.game.battlesnake.bootcamp.test_envs_3x3 import perform_choke_2_player from src.game.battlesnake.bootcamp.test_envs_5x5 import perform_choke_5x5_4_player from src.game.battlesnake.bootcamp.test_envs_7x7 import survive_on_7x7_4_player_royale -from src.game.overcooked.config import CrampedRoomOvercookedConfig, OneStateCrampedRoomOvercookedConfig, Simple2CrampedRoomOvercookedConfig, Simple3CrampedRoomOvercookedConfig, Simple4CrampedRoomOvercookedConfig, SimpleCrampedRoomOvercookedConfig, TwoStateCrampedRoomOvercookedConfig +from src.game.overcooked.config import CrampedRoomOvercookedConfig, OneStateCrampedRoomOvercookedConfig, \ + Simple2CrampedRoomOvercookedConfig, Simple3CrampedRoomOvercookedConfig, Simple4CrampedRoomOvercookedConfig, \ + SimpleCrampedRoomOvercookedConfig, TwoStateCrampedRoomOvercookedConfig from src.game.values import UtilityNorm from src.misc.const import PHI from src.misc.serialization import serialize_dataclass @@ -24,7 +26,8 @@ from src.network.resnet import ResNetConfig3x3, ResNetConfig7x7Best, OvercookedResNetConfig5x5 from src.network.utils import ActivationType from src.network.vision_net import EquivarianceType -from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, ResponseInferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ +from src.search.config import AlphaZeroDecoupledSelectionConfig, InferenceServerEvalConfig, \ + ResponseInferenceServerEvalConfig, StandardBackupConfig, StandardExtractConfig, \ DecoupledUCTSelectionConfig, LogitBackupConfig, FixedDepthConfig, SpecialExtractConfig, NashBackupConfig, \ Exp3SelectionConfig, MeanPolicyExtractConfig, Exp3BackupConfig, RegretMatchingSelectionConfig, \ RegretMatchingBackupConfig, SMOOSConfig, PolicyExtractConfig, EnemyExploitationEvalConfig, \ @@ -58,7 +61,7 @@ def start_training_from_structured_configs(): # game_cfg = OneStateCrampedRoomOvercookedConfig() # game_cfg = SimpleCrampedRoomOvercookedConfig() game_cfg = Simple2CrampedRoomOvercookedConfig() - + game_cfg.temperature_input = temperature_input game_cfg.single_temperature_input = single_temperature @@ -97,13 +100,12 @@ def start_training_from_structured_configs(): # utility_norm=UtilityNorm.FULL_COOP, # ) eval_func_cfg = ResponseInferenceServerEvalConfig( - random_symmetry= False, + random_symmetry=False, min_clip_value=-math.inf, max_clip_value=50, policy_prediction=True, ) - - + # sel_func_cfg = DecoupledUCTSelectionConfig(exp_bonus=1.414) # 1.4) # sel_func_cfg = SampleSelectionConfig(dirichlet_alpha=math.inf, dirichlet_eps=0.25, temperature=1.0) # sel_func_cfg = AlphaZeroDecoupledSelectionConfig(exp_bonus=1.414, dirichlet_alpha=0.3, dirichlet_eps=0.25) @@ -129,7 +131,7 @@ def start_training_from_structured_configs(): # backup_func_cfg = Exp3BackupConfig() # backup_func_cfg = RegretMatchingBackupConfig(avg_backup=True) extraction_func_cfg = SpecialExtractConfig( - utility_norm=UtilityNorm.FULL_COOP, + utility_norm=UtilityNorm.NONE, min_clip_value=-math.inf, max_clip_value=30, ) @@ -170,24 +172,24 @@ def start_training_from_structured_configs(): worker_cfg = WorkerConfig( search_cfg=search_cfg, policy_eval_cfg=policy_eval_cfg, - # temp_scaling_cfgs=( - # TemperatureAnnealingConfig( - # init_temp=5, - # end_times_min=[20, 40], - # anneal_temps=[5, 0], - # anneal_types=[AnnealingType.CONST, AnnealingType.LINEAR], - # cyclic=False, - # sampling=False, - # ), - # TemperatureAnnealingConfig( - # init_temp=10, - # end_times_min=[1], - # anneal_temps=[10], - # anneal_types=[AnnealingType.CONST], - # cyclic=True, - # sampling=False, - # ), - # ), + temp_scaling_cfgs=( + TemperatureAnnealingConfig( + init_temp=5, + end_times_min=[2, 5], + anneal_temps=[5, 0], + anneal_types=[AnnealingType.CONST, AnnealingType.LINEAR], + cyclic=False, + sampling=False, + ), + TemperatureAnnealingConfig( + init_temp=10, + end_times_min=[1], + anneal_temps=[10], + anneal_types=[AnnealingType.CONST], + cyclic=True, + sampling=False, + ), + ), # anneal_cfgs=None, # anneal_cfgs=[TemperatureAnnealingConfig( # init_temp=0, @@ -196,11 +198,11 @@ def start_training_from_structured_configs(): # anneal_types=[AnnealingType.COSINE], # cyclic=True, # sampling=True, - # )], - anneal_cfgs=[TemperatureAnnealingConfig( + # )], + anneal_cfgs=[TemperatureAnnealingConfig( init_temp=0, end_times_min=[1], - anneal_temps=[10], + anneal_temps=[1], anneal_types=[AnnealingType.COSINE], cyclic=True, sampling=True, @@ -272,7 +274,7 @@ def start_training_from_structured_configs(): id=0, updater_bucket_size=100, worker_episode_bucket_size=5, - wandb_mode='offline', + wandb_mode='online', ) saver_cfg = SaverConfig( save_interval_sec=10, @@ -281,7 +283,7 @@ def start_training_from_structured_configs(): use_gpu=True, ) trainer_cfg = AlphaZeroTrainerConfig( - num_worker=5, # IMPORTANT + num_worker=30, # IMPORTANT num_inference_server=1, save_state=False, save_state_after_seconds=30, @@ -295,7 +297,7 @@ def start_training_from_structured_configs(): collector_cfg=collector_cfg, inf_cfg=inf_cfg, max_batch_size=batch_size, - max_eval_per_worker=batch_size*2, + max_eval_per_worker=batch_size * 2, data_qsize=10, info_qsize=100, updater_in_qsize=100, @@ -304,7 +306,7 @@ def start_training_from_structured_configs(): prev_run_dir=None, prev_run_idx=None, only_generate_buffer=False, - restrict_cpu=True, # only works on LINUX + restrict_cpu=False, # only works on LINUX max_cpu_updater=1, max_cpu_worker=10, max_cpu_evaluator=1, @@ -316,7 +318,7 @@ def start_training_from_structured_configs(): compile_mode='max-autotune', merge_inference_update_gpu=True, # proxy_net_path=None, - proxy_net_path=str(Path(__file__).parent.parent.parent / 'outputs' / 'working_proxy_simple2.pt'), + proxy_net_path=str(Path(__file__).parent / 'outputs' / 'proxy2.pt'), ) # initialize yaml file and hydra print(os.getcwd()) diff --git a/src/network/resnet.py b/src/network/resnet.py index b259d93..139d9e0 100644 --- a/src/network/resnet.py +++ b/src/network/resnet.py @@ -229,6 +229,40 @@ class OvercookedResNetConfig5x5(ResNetConfig): value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) +# channels, num_blocks, kernel_size, padding, norm +best_9x9 = [ + [32, 3, 3, 1, 1], + [64, 2, 3, 1, 1], + [64, 1, 3, 0, 1], + [128, 2, 3, 1, 1], + [256, 1, 3, 0, 1], +] + + +@dataclass +class OvercookedResNetConfig9x9(ResNetConfig): + layer_specs: list[list[int]] = field(default_factory=lambda: best_9x9) + policy_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + + +# channels, num_blocks, kernel_size, padding, norm +best_8x8 = [ + [32, 3, 3, 1, 1], + [64, 2, 3, 1, 1], + [64, 1, 3, 0, 1], + [128, 2, 3, 1, 1], + [256, 1, 3, 1, 1], +] + + +@dataclass +class OvercookedResNetConfig8x8(ResNetConfig): + layer_specs: list[list[int]] = field(default_factory=lambda: best_8x8) + policy_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + value_head_cfg: HeadConfig = field(default_factory=lambda: WideHeadConfig()) + + default_7x7 = [ [32, 2, 3, 1, 1], [48, 1, 3, 0, 1], diff --git a/start_training.py b/start_training.py index 8962151..925ad79 100644 --- a/start_training.py +++ b/start_training.py @@ -1,3 +1,4 @@ +import itertools import os import sys from pathlib import Path @@ -31,7 +32,26 @@ def main(cfg: AlphaZeroTrainerConfig): config_path = Path(__file__).parent / 'config' config_name = 'config' - if len(sys.argv) > 1 and sys.argv[1].startswith("config="): + if len(sys.argv) > 3 and sys.argv[1].startswith("config="): + config_prefix = sys.argv[1].split("=")[-1] + sys.argv.pop(1) + arr_id = int(sys.argv[1]) + sys.argv.pop(1) + + pref_lists = [ + # list(range(1, 6)), + # [1] + list(range(5, 51, 5)), + ['aa', 'cc', 'co', 'cr', 'fc'], + list(range(5)), + ] + prod = list(itertools.product(*pref_lists)) + tpl = prod[arr_id] + # config_name = f"{config_prefix}_{tpl[0]}_{tpl[1]}_{tpl[2]}" + config_name = f"{config_prefix}_{tpl[0]}_{tpl[1]}" + # config_name = f"{config_prefix}_{prefix_arr[t]}_{seed}" + # config_name = f"{config_prefix}_{seed}_{prefix_arr[t]}" + elif len(sys.argv) > 2 and sys.argv[1].startswith("config="): config_name = sys.argv[1].split("=")[-1] sys.argv.pop(1) + print(f"{config_name=}", flush=True) hydra.main(config_path=str(config_path), config_name=config_name, version_base=None)(main)() diff --git a/test/network/test_resnet.py b/test/network/test_resnet.py index 0be2f73..569d52d 100644 --- a/test/network/test_resnet.py +++ b/test/network/test_resnet.py @@ -10,10 +10,13 @@ from src.game.battlesnake.bootcamp.test_envs_7x7 import survive_on_7x7 from src.game.initialization import get_game_from_config from src.network.initialization import get_network_from_config -from src.game.overcooked.config import CrampedRoomOvercookedConfig +from src.game.overcooked.config import CrampedRoomOvercookedConfig, CoordinationRingOvercookedConfig, \ + ForcedCoordinationOvercookedConfig, AsymmetricAdvantageOvercookedConfig, CounterCircuitOvercookedConfig from src.misc.utils import set_seed from src.network.resnet import ResNetConfig3x3, ResNetConfig5x5, ResNetConfig7x7, ResNetConfig9x9, ResNetConfig7x7New, \ - ResNetConfig11x11, ResNetConfig7x7Best, OvercookedResNetConfig5x5 + ResNetConfig11x11, ResNetConfig7x7Best, OvercookedResNetConfig5x5, OvercookedResNetConfig9x9, \ + OvercookedResNetConfig8x8 + class TestResNet(unittest.TestCase): def test_resnet_game(self): @@ -192,9 +195,10 @@ def test_resnet_centered_11_speed(self): print(f"{(end_time - start_time) / n=}") def test_resnet_oc(self): - game_cfg = CrampedRoomOvercookedConfig() + game_cfg = ForcedCoordinationOvercookedConfig() game = get_game_from_config(game_cfg) obs, _, _ = game.get_obs() + print(obs.shape) obs_tensor = torch.tensor(obs) net_cfg = OvercookedResNetConfig5x5(game_cfg=game_cfg) for seed in range(10): @@ -203,3 +207,29 @@ def test_resnet_oc(self): net_out = net(obs_tensor) print(net_out) + def test_resnet_oc_8(self): + game_cfg = CounterCircuitOvercookedConfig() + game = get_game_from_config(game_cfg) + obs, _, _ = game.get_obs() + print(obs.shape) + obs_tensor = torch.tensor(obs) + net_cfg = OvercookedResNetConfig8x8(game_cfg=game_cfg) + for seed in range(10): + set_seed(seed) + net = get_network_from_config(net_cfg) + net_out = net(obs_tensor) + print(net_out) + + def test_resnet_oc_large(self): + game_cfg = AsymmetricAdvantageOvercookedConfig() + game = get_game_from_config(game_cfg) + obs, _, _ = game.get_obs() + print(obs.shape) + obs_tensor = torch.tensor(obs) + net_cfg = OvercookedResNetConfig9x9(game_cfg=game_cfg) + for seed in range(10): + set_seed(seed) + net = get_network_from_config(net_cfg) + net_out = net(obs_tensor) + print(net_out) +