Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hw_submission(dzp): add hw6_20230425 #72

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions chapter6_marl/hw_submission/q1/q1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
import torch


def get_agent_id_feature(agent_id, agent_num):
agent_id_feature = torch.zeros(agent_num)
agent_id_feature[agent_id] = 1
return agent_id_feature


def get_movement_feature():
# for simplicity, we use random movement feature here
movement_feature = torch.randint(0, 2, (8, ))
return movement_feature


def get_own_feature():
# for simplicity, we use random own feature here
return torch.randn(10)


def get_ally_visible_feature():
# this function only return the visible feature of one ally
# for simplicity, we use random tensor as ally visible feature while zero tensor as ally invisible feature
if np.random.random() > 0.5:
ally_visible_feature = torch.randn(4)
else:
ally_visible_feature = torch.zeros(4)
return ally_visible_feature


def get_enemy_visible_feature():
# this function only return the visible feature of one enemy
# for simplicity, we use random tensor as enemy visible feature while zero tensor as enemy invisible feature
if np.random.random() > 0.8:
enemy_visible_feature = torch.randn(4)
else:
enemy_visible_feature = torch.zeros(4)
return enemy_visible_feature


def get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num):
# You need to implement this function
own_feature = get_own_feature()
ally_visible_feature = torch.cat([get_ally_visible_feature() for _ in range(ally_agent_num-1)])
enemy_visible_feature = torch.cat([get_enemy_visible_feature() for _ in range(enemy_agent_num)])
movement_feature = get_movement_feature()
agent_id_feature = get_agent_id_feature(agent_id,ally_agent_num+enemy_agent_num)
return torch.cat([own_feature,ally_visible_feature,enemy_visible_feature,movement_feature,agent_id_feature])


def get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num):
# In many multi-agent environments such as SMAC, the global state is the simplified version of the combination
# of all the agent's independent state, and the concrete implementation depends on the characteris of environment.
# For simplicity, we use random feature here.
ally_center_feature = torch.randn(8)
enemy_center_feature = torch.randn(8)
return torch.cat([ally_center_feature, enemy_center_feature])


def get_as_global_state(agent_id, ally_agent_num, enemy_agent_num):
# You need to implement this function
ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num)
ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num)
return torch.cat([ind_global_state,ep_global_state])


def test_global_state():
ally_agent_num = 3
enemy_agent_num = 5
# get independent global state, which usually used in decentralized training
for agent_id in range(ally_agent_num):
ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num)
assert isinstance(ind_global_state, torch.Tensor)
# get environment provide global state, which is the same for all agents, used in centralized training
for agent_id in range(ally_agent_num):
ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num)
assert isinstance(ep_global_state, torch.Tensor)
# get naive agent-specific global state, which is the specific for each agent, used in centralized training
for agent_id in range(ally_agent_num):
as_global_state = get_as_global_state(agent_id, ally_agent_num, enemy_agent_num)
assert isinstance(as_global_state, torch.Tensor)


if __name__ == "__main__":
test_global_state()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 63 additions & 0 deletions chapter6_marl/hw_submission/q2/q2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from easydict import EasyDict

ant_ddpg_default_config = dict(
exp_name='multi_mujoco_ant_2x4_ddpg',
env=dict(
scenario='Ant-v2',
agent_conf="2x4d",
agent_obsk=2,
add_agent_id=False,
episode_limit=1000,
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=6000,
),
policy=dict(
cuda=True,
random_collect_size=0,
multi_agent=True,
model=dict(
agent_obs_shape=54,
global_obs_shape=111,
action_shape=4,
action_space='regression',
actor_head_hidden_size=256,
critic_head_hidden_size=256,
),
learn=dict(
update_per_collect=10,
batch_size=256,
learning_rate_actor=1e-3,
learning_rate_critic=1e-3,
target_theta=0.005,
discount_factor=0.99,
),
collect=dict(
n_sample=400,
noise_sigma=0.1,
),
eval=dict(evaluator=dict(eval_freq=500, )),
other=dict(replay_buffer=dict(replay_buffer_size=100000, ), ),
),
)

ant_ddpg_default_config = EasyDict(ant_ddpg_default_config)
main_config = ant_ddpg_default_config

ant_ddpg_default_create_config = dict(
env=dict(
type='mujoco_multi',
import_names=['dizoo.multiagent_mujoco.envs.multi_mujoco_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(type='ddpg'),
replay_buffer=dict(type='naive', ),
)
ant_ddpg_default_create_config = EasyDict(ant_ddpg_default_create_config)
create_config = ant_ddpg_default_create_config

if __name__ == '__main__':
# or you can enter `ding -m serial -c ant_maddpg_config.py -s 0`
from ding.entry.serial_entry import serial_pipeline
serial_pipeline((main_config, create_config), seed=0, max_env_step=int(1e7))