Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QLearning+CliffWalking-v0+FrozenLake+Taxi-v3 #43

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions algos/DQN/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ def __init__(self) -> None:
self.epsilon_decay = 500 # epsilon 衰减率
self.gamma = 0.95 # 奖励折扣因子
self.lr = 0.0001 # 学习率
self.buffer_type = "REPLAY_QUE"
self.buffer_size = 100000 # buffer 大小
self.batch_size = 64 # batch size
self.target_update = 4 # target_net 更新频率
# 神经网络层配置
self.value_layers = [
{'layer_type': 'Linear', 'layer_dim': [64], 'activation': 'ReLU'},
{'layer_type': 'Linear', 'layer_dim': [64], 'activation': 'ReLU'},
# {'layer_type': 'embed', 'n_embeddings':600, 'embedding_dim': 8,'activation': 'none'},
{'layer_type': 'Linear', 'layer_dim': [256], 'activation': 'ReLU'},
{'layer_type': 'Linear', 'layer_dim': [256], 'activation': 'ReLU'},
]
20 changes: 12 additions & 8 deletions algos/base/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Email: [email protected]
Date: 2023-04-16 22:30:15
LastEditor: JiangJi
LastEditTime: 2023-04-24 15:12:17
LastEditTime: 2023-04-26 00:02:48
Discription:
'''
import torch
Expand All @@ -20,29 +20,33 @@ def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)

def get_out_size_with_batch(layers,input_size,dtype=torch.float):
def get_output_size_with_batch(layers,input_size,dtype=torch.float):
""" 获取层的输出维度
layer: 层
in_dim: 层的输入维度
"""
with torch.no_grad():
x = torch.randn(10,*input_size[-1],dtype=dtype)
x = torch.zeros(10, *input_size[1:], dtype=dtype)
out = layers(x)
return [None,list(out.size())[1:] ]
output_size = [None] + list(out.size())[1:]
return output_size

def embedding_layer(input_size, layer_cfg: LayerConfig):
n_embeddings = layer_cfg.n_embeddings
embedding_dim = layer_cfg.embedding_dim
class EmbeddingLayer(nn.Module):
def __init__(self, n_embeddings, embedding_dim):
super(EmbeddingLayer, self).__init__()
self.layer = nn.Embedding(n_embeddings=n_embeddings, embedding_dim=embedding_dim)
self.layer = nn.Embedding(num_embeddings=n_embeddings, embedding_dim=embedding_dim)

def forward(self, x: torch.Tensor):
# if x.dtype != torch.int:
# x = x.int()
if x.dtype != torch.int:
x = x.int()

return self.layer(x)
layer = EmbeddingLayer(n_embeddings, embedding_dim)
output_size = get_out_size_with_batch(layer, input_size=input_size, dtype=torch.long)
output_size = get_output_size_with_batch(layer, input_size=input_size, dtype=torch.long)

return layer, output_size
def linear_layer(input_size,layer_cfg: LayerConfig):
""" 生成一个线性层
Expand Down
9 changes: 6 additions & 3 deletions algos/base/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Email: [email protected]
Date: 2023-04-16 22:30:46
LastEditor: JiangJi
LastEditTime: 2023-04-24 15:12:45
LastEditTime: 2023-04-26 00:01:36
Discription:
'''
import sys, os
Expand All @@ -32,6 +32,7 @@ def __init__(self, cfg, input_size, action_dim) -> None:

def forward(self, x):
for layer in self.layers:

x = layer(x)
return x

Expand All @@ -40,13 +41,15 @@ def forward(self, x):
import torch
from config.config import MergedConfig
cfg = MergedConfig()
state_dim = [None,4]
state_dim = [None]
cfg.n_actions = 2
cfg.value_layers = [
{'layer_type': 'embed', 'n_embeddings': 48, 'embedding_dim': 32, 'activation': 'none'},
{'layer_type': 'Linear', 'layer_dim': [64], 'activation': 'ReLU'},
{'layer_type': 'Linear', 'layer_dim': [64], 'activation': 'ReLU'},
]
value_net = ValueNetwork(cfg, state_dim, cfg.n_actions)
print(value_net)
x = torch.randn(1,4)
x = torch.tensor([36]).long()

print(value_net(x))
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: gym
eval_eps: 10
eval_per_episode: 5
load_checkpoint: false
load_path: Train_CartPole-v1_DQN_20230419-224210
max_steps: 50
mode: train
mp_backend: mp
n_workers: 1
new_step_api: true
render: true
render_mode: human
save_fig: true
seed: 0
show_fig: false
test_eps: 200
train_eps: 10000
wrapper: envs.wrappers.blackjackwrapper
algo_cfg:
batch_size: 64
buffer_size: 100000
buffer_type: REPLAY_QUE
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
lr: 0.0001
target_update: 4
value_layers:
- activation: ReLU
layer_dim:
- 256
layer_type: Linear
- activation: ReLU
layer_dim:
- 256
layer_type: Linear
env_cfg:
id: Blackjack-v1
ignore_params:
- wrapper
- ignore_params
new_step_api: true
render_mode: null
wrapper: envs.wrappers.blackjackwrapper
Loading