opendilab · superboySB · Apr 25, 2023 · Apr 25, 2023
diff --git a/chapter6_marl/hw_submission/q1/q1.py b/chapter6_marl/hw_submission/q1/q1.py
@@ -0,0 +1,86 @@
+import numpy as np
+import torch
+
+
+def get_agent_id_feature(agent_id, agent_num):
+    agent_id_feature = torch.zeros(agent_num)
+    agent_id_feature[agent_id] = 1
+    return agent_id_feature
+
+
+def get_movement_feature():
+    # for simplicity, we use random movement feature here
+    movement_feature = torch.randint(0, 2, (8, ))
+    return movement_feature
+
+
+def get_own_feature():
+    # for simplicity, we use random own feature here
+    return torch.randn(10)
+
+
+def get_ally_visible_feature():
+    # this function only return the visible feature of one ally
+    # for simplicity, we use random tensor as ally visible feature while zero tensor as ally invisible feature
+    if np.random.random() > 0.5:
+        ally_visible_feature = torch.randn(4)
+    else:
+        ally_visible_feature = torch.zeros(4)
+    return ally_visible_feature
+
+
+def get_enemy_visible_feature():
+    # this function only return the visible feature of one enemy
+    # for simplicity, we use random tensor as enemy visible feature while zero tensor as enemy invisible feature
+    if np.random.random() > 0.8:
+        enemy_visible_feature = torch.randn(4)
+    else:
+        enemy_visible_feature = torch.zeros(4)
+    return enemy_visible_feature
+
+
+def get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num):
+    # You need to implement this function
+    own_feature = get_own_feature()
+    ally_visible_feature = torch.cat([get_ally_visible_feature() for _ in range(ally_agent_num-1)])
+    enemy_visible_feature = torch.cat([get_enemy_visible_feature() for _ in range(enemy_agent_num)])
+    movement_feature = get_movement_feature()
+    agent_id_feature = get_agent_id_feature(agent_id,ally_agent_num+enemy_agent_num)
+    return torch.cat([own_feature,ally_visible_feature,enemy_visible_feature,movement_feature,agent_id_feature])
+
+
+def get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num):
+    # In many multi-agent environments such as SMAC, the global state is the simplified version of the combination
+    # of all the agent's independent state, and the concrete implementation depends on the characteris of environment.
+    # For simplicity, we use random feature here.
+    ally_center_feature = torch.randn(8)
+    enemy_center_feature = torch.randn(8)
+    return torch.cat([ally_center_feature, enemy_center_feature])
+
+
+def get_as_global_state(agent_id, ally_agent_num, enemy_agent_num):
+    # You need to implement this function
+    ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num)
+    ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num)
+    return torch.cat([ind_global_state,ep_global_state])
+
+
+def test_global_state():
+    ally_agent_num = 3
+    enemy_agent_num = 5
+    # get independent global state, which usually used in decentralized training
+    for agent_id in range(ally_agent_num):
+        ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num)
+        assert isinstance(ind_global_state, torch.Tensor)
+    # get environment provide global state, which is the same for all agents, used in centralized training
+    for agent_id in range(ally_agent_num):
+        ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num)
+        assert isinstance(ep_global_state, torch.Tensor)
+    # get naive agent-specific global state, which is the specific for each agent, used in centralized training
+    for agent_id in range(ally_agent_num):
+        as_global_state = get_as_global_state(agent_id, ally_agent_num, enemy_agent_num)
+        assert isinstance(as_global_state, torch.Tensor)
+
+
+if __name__ == "__main__":
+    test_global_state()
diff --git a/chapter6_marl/hw_submission/q2/Ant-v2_2x4运行截图.png b/chapter6_marl/hw_submission/q2/Ant-v2_2x4运行截图.png
diff --git a/chapter6_marl/hw_submission/q2/q2.py b/chapter6_marl/hw_submission/q2/q2.py
@@ -0,0 +1,63 @@
+from easydict import EasyDict
+
+ant_ddpg_default_config = dict(
+    exp_name='multi_mujoco_ant_2x4_ddpg',
+    env=dict(
+        scenario='Ant-v2',
+        agent_conf="2x4d",
+        agent_obsk=2,
+        add_agent_id=False,
+        episode_limit=1000,
+        collector_env_num=8,
+        evaluator_env_num=8,
+        n_evaluator_episode=8,
+        stop_value=6000,
+    ),
+    policy=dict(
+        cuda=True,
+        random_collect_size=0,
+        multi_agent=True,
+        model=dict(
+            agent_obs_shape=54,
+            global_obs_shape=111,
+            action_shape=4,
+            action_space='regression',
+            actor_head_hidden_size=256,
+            critic_head_hidden_size=256,
+        ),
+        learn=dict(
+            update_per_collect=10,
+            batch_size=256,
+            learning_rate_actor=1e-3,
+            learning_rate_critic=1e-3,
+            target_theta=0.005,
+            discount_factor=0.99,
+        ),
+        collect=dict(
+            n_sample=400,
+            noise_sigma=0.1,
+        ),
+        eval=dict(evaluator=dict(eval_freq=500, )),
+        other=dict(replay_buffer=dict(replay_buffer_size=100000, ), ),
+    ),
+)
+
+ant_ddpg_default_config = EasyDict(ant_ddpg_default_config)
+main_config = ant_ddpg_default_config
+
+ant_ddpg_default_create_config = dict(
+    env=dict(
+        type='mujoco_multi',
+        import_names=['dizoo.multiagent_mujoco.envs.multi_mujoco_env'],
+    ),
+    env_manager=dict(type='subprocess'),
+    policy=dict(type='ddpg'),
+    replay_buffer=dict(type='naive', ),
+)
+ant_ddpg_default_create_config = EasyDict(ant_ddpg_default_create_config)
+create_config = ant_ddpg_default_create_config
+
+if __name__ == '__main__':
+    # or you can enter `ding -m serial -c ant_maddpg_config.py -s 0`
+    from ding.entry.serial_entry import serial_pipeline
+    serial_pipeline((main_config, create_config), seed=0, max_env_step=int(1e7))