Skip to content

Commit

Permalink
initial oc cramped runs
Browse files Browse the repository at this point in the history
  • Loading branch information
ymahlau committed Nov 27, 2023
1 parent d79de06 commit ec7419e
Show file tree
Hide file tree
Showing 10 changed files with 2,087 additions and 307 deletions.
354 changes: 354 additions & 0 deletions config/cfg_oc_proxy_0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,354 @@
__module__: src.trainer.config
__name__: AlphaZeroTrainerConfig
data:
collector_cfg:
__module__: src.trainer.config
__name__: CollectorConfig
data:
buffer_size: 100000
log_every_sec: 300
quick_start_buffer_path: null
start_wait_n_samples: 100000
compile_mode: reduce-overhead
compile_model: false
data_qsize: 10
distributor_out_qsize: 10
evaluator_cfg:
__module__: src.trainer.config
__name__: EvaluatorConfig
data:
enemy_cfgs:
- __module__: src.agent.one_shot
__name__: RandomAgentConfig
data:
name: RandomAgent
enemy_iterations: 100
eval_rate_sec: 20
num_episodes: 100
prevent_draw: true
save_checkpoints: false
temperature: 1
game_cfg:
__module__: src.game.overcooked.config
__name__: CrampedRoomOvercookedConfig
data:
board:
- - 1
- 1
- 4
- 1
- 1
- - 3
- 0
- 0
- 0
- 3
- - 1
- 0
- 0
- 0
- 1
- - 1
- 2
- 1
- 5
- 1
cooking_time: 20
flat_obs: false
h: 4
horizon: 400
num_actions: 6
num_players: 2
reward_cfg:
__module__: src.game.overcooked.config
__name__: OvercookedRewardConfig
data:
dish_pickup: 3
placement_in_pot: 3
soup_delivery: 20
soup_pickup: 5
start_cooking: 3
reward_scaling_factor: 0.5
single_temperature_input: true
start_pos:
__module__: src.misc.serialization
__name__: TupleWrapper
data:
data:
- __module__: src.misc.serialization
__name__: TupleWrapper
data:
data:
- 1
- 2
- 0
- 0
- __module__: src.misc.serialization
__name__: TupleWrapper
data:
data:
- 3
- 1
- 0
- 0
temperature_input: true
w: 5
inf_cfg:
__module__: src.trainer.config
__name__: InferenceServerConfig
data:
statistics_every_sec: 60
use_gpu: true
info_qsize: 100
init_new_network_params: false
logger_cfg:
__module__: src.trainer.config
__name__: LoggerConfig
data:
buffer_gen: false
id: 0
name: oc_proxy
project_name: overcooked_cramped
updater_bucket_size: 100
wandb_mode: online
worker_episode_bucket_size: 25
max_batch_size: 3000
max_cpu_evaluator: 1
max_cpu_inference_server: 2
max_cpu_log_dist_save_collect: 1
max_cpu_updater: 2
max_cpu_worker: 11
max_eval_per_worker: 6000
merge_inference_update_gpu: false
net_cfg:
__module__: src.network.resnet
__name__: OvercookedResNetConfig5x5
data:
activation_type:
__module__: src.network.utils
__name__: ActivationType
value: LEAKY_RELU
eq_type:
__module__: src.network.vision_net
__name__: EquivarianceType
value: NONE
game_cfg: null
layer_specs:
- - 32
- 3
- 3
- 1
- 1
- - 64
- 2
- 3
- 1
- 1
- - 128
- 1
- 3
- 1
- 1
- - 256
- 1
- 3
- 0
- 1
lff_feature_expansion: 40
lff_features: false
norm_type:
__module__: src.network.utils
__name__: NormalizationType
value: GROUP_NORM
policy_head_cfg:
__module__: src.network.fcn
__name__: WideHeadConfig
data:
activation_type:
__module__: src.network.utils
__name__: ActivationType
value: LEAKY_RELU
dropout_p: 0.2
final_activation:
__module__: src.network.utils
__name__: ActivationType
value: NONE
hidden_size: 256
normalization_type:
__module__: src.network.utils
__name__: NormalizationType
value: GROUP_NORM
num_layers: 1
predict_policy: true
value_head_cfg:
__module__: src.network.fcn
__name__: WideHeadConfig
data:
activation_type:
__module__: src.network.utils
__name__: ActivationType
value: LEAKY_RELU
dropout_p: 0.2
final_activation:
__module__: src.network.utils
__name__: ActivationType
value: NONE
hidden_size: 256
normalization_type:
__module__: src.network.utils
__name__: NormalizationType
value: GROUP_NORM
num_layers: 1
num_inference_server: 1
num_worker: 30
only_generate_buffer: false
prev_run_dir: null
prev_run_idx: null
proxy_net_path: null
restrict_cpu: true
save_state: false
save_state_after_seconds: 30
saver_cfg:
__module__: src.trainer.config
__name__: SaverConfig
data:
save_all_checkpoints: false
save_interval_sec: 30
single_sbr_temperature: true
temperature_input: true
updater_cfg:
__module__: src.trainer.config
__name__: UpdaterConfig
data:
gradient_max_norm: 1.0
mse_policy_loss: true
optim_cfg:
__module__: src.supervised.optim
__name__: OptimizerConfig
data:
anneal_cfg:
__module__: src.supervised.annealer
__name__: TemperatureAnnealingConfig
data:
anneal_temps:
- 0.001
- 1.0e-06
anneal_types:
- __module__: src.supervised.annealer
__name__: AnnealingType
value: LINEAR
- __module__: src.supervised.annealer
__name__: AnnealingType
value: COSINE
cyclic: false
end_times_min:
- 30
- 1400
init_temp: 0
sampling: false
beta1: 0.9
beta2: 0.99
fused: false
optim_type:
__module__: src.supervised.optim
__name__: OptimType
value: ADAM_W
weight_decay: 0.0001
policy_loss_factor: 5
updates_until_distribution: 5
use_gpu: true
utility_loss:
__module__: src.game.values
__name__: UtilityNorm
value: FULL_COOP
utility_loss_factor: 1
value_reg_loss_factor: 0
updater_in_qsize: 100
updater_out_qsize: 10
validator_data_qsize: 100
worker_cfg:
__module__: src.trainer.config
__name__: WorkerConfig
data:
anneal_cfgs:
- __module__: src.supervised.annealer
__name__: TemperatureAnnealingConfig
data:
anneal_temps:
- 10
anneal_types:
- __module__: src.supervised.annealer
__name__: AnnealingType
value: COSINE
cyclic: true
end_times_min:
- 1
init_temp: 0
sampling: true
exploration_prob: 0
max_game_length: 8
max_random_start_steps: 0
policy_eval_cfg:
__module__: src.trainer.policy_eval
__name__: PolicyEvalConfig
data:
eval_type:
__module__: src.trainer.policy_eval
__name__: PolicyEvalType
value: TD_0
lambda_val: 0.5
prevent_draw: false
quick_start: false
search_cfg:
__module__: src.search.config
__name__: FixedDepthConfig
data:
average_eval: false
backup_func_cfg:
__module__: src.search.config
__name__: LogitBackupConfig
data:
epsilon: 0
hp_0: null
hp_1: null
init_random: true
init_temperatures:
- 15
- 15
num_iterations: 150
sbr_mode:
__module__: src.equilibria.logit
__name__: SbrMode
value: NAGURNEY
use_cpp: true
discount: 0.93
eval_func_cfg:
__module__: src.search.config
__name__: InferenceServerEvalConfig
data:
active_wait_time: 0.05
init_temperatures: null
max_clip_value: 20
min_clip_value: -.inf
policy_prediction: true
random_symmetry: false
single_temperature: true
temperature_input: true
utility_norm:
__module__: src.game.values
__name__: UtilityNorm
value: NONE
extract_func_cfg:
__module__: src.search.config
__name__: SpecialExtractConfig
data:
utility_norm:
__module__: src.game.values
__name__: UtilityNorm
value: FULL_COOP
search_iterations: 1
temperature: 1
use_symmetries: true
hydra:
run:
dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_oc_proxy
Loading

0 comments on commit ec7419e

Please sign in to comment.