diff --git a/ding/bonus/a2c.py b/ding/bonus/a2c.py index d10def313b..4533664f76 100644 --- a/ding/bonus/a2c.py +++ b/ding/bonus/a2c.py @@ -70,7 +70,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of A2C algorithm, which should be an instance of class \ :class:`ding.model.VAC`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of A2C algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of A2C algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/A2C/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/c51.py b/ding/bonus/c51.py index ab4f0be85e..0fe2cc64e7 100644 --- a/ding/bonus/c51.py +++ b/ding/bonus/c51.py @@ -68,9 +68,8 @@ def __init__( - exp_name (:obj:`str`): The name of this experiment, which will be used to create the folder to save \ log data. Default to None. If not specified, the folder name will be ``env_id``-``algorithm``. - model (:obj:`torch.nn.Module`): The model of C51 algorithm, which should be an instance of class \ - :class:`ding.model.C51DQN`. \ - If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of C51 algorithm, which is a dict. \ + :class:`ding.model.C51DQN`. If not specified, a default model will be generated according to the config. + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of C51 algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/C51/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/ddpg.py b/ding/bonus/ddpg.py index 0dade9e38b..88110c7598 100644 --- a/ding/bonus/ddpg.py +++ b/ding/bonus/ddpg.py @@ -70,7 +70,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of DDPG algorithm, which should be an instance of class \ :class:`ding.model.ContinuousQAC`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of DDPG algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of DDPG algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/DDPG/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/dqn.py b/ding/bonus/dqn.py index 4894e2aa6f..3207c30490 100644 --- a/ding/bonus/dqn.py +++ b/ding/bonus/dqn.py @@ -70,7 +70,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of DQN algorithm, which should be an instance of class \ :class:`ding.model.DQN`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of DQN algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of DQN algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/DQN/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/pg.py b/ding/bonus/pg.py index 59c031d65d..1e7cb8fb99 100644 --- a/ding/bonus/pg.py +++ b/ding/bonus/pg.py @@ -68,7 +68,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of PG algorithm, which should be an instance of class \ :class:`ding.model.PG`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of PG algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of PG algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/PG/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/ppo_offpolicy.py b/ding/bonus/ppo_offpolicy.py index 546aecbd6d..5aef20593f 100644 --- a/ding/bonus/ppo_offpolicy.py +++ b/ding/bonus/ppo_offpolicy.py @@ -70,7 +70,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of PPO (offpolicy) algorithm, \ which should be an instance of class :class:`ding.model.VAC`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of PPO (offpolicy) algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of PPO (offpolicy) algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/PPO (offpolicy)/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/bonus/sac.py b/ding/bonus/sac.py index cb6046476c..f635d5db37 100644 --- a/ding/bonus/sac.py +++ b/ding/bonus/sac.py @@ -71,7 +71,7 @@ def __init__( - model (:obj:`torch.nn.Module`): The model of SAC algorithm, which should be an instance of class \ :class:`ding.model.ContinuousQAC`. \ If not specified, a default model will be generated according to the configuration. - - cfg (:obj:Union[EasyDict, dict]): The configuration of SAC algorithm, which is a dict. \ + - cfg (:obj:`Union[EasyDict, dict]`): The configuration of SAC algorithm, which is a dict. \ Default to None. If not specified, the default configuration will be used. \ The default configuration can be found in ``ding/config/example/SAC/gym_lunarlander_v2.py``. - policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \ diff --git a/ding/envs/env_manager/base_env_manager.py b/ding/envs/env_manager/base_env_manager.py index 291390896c..6ef4bad28c 100644 --- a/ding/envs/env_manager/base_env_manager.py +++ b/ding/envs/env_manager/base_env_manager.py @@ -124,7 +124,7 @@ def __init__( .. note:: For more details about how to merge config, please refer to the system document of DI-engine \ - (`en link <../03_system/config.html>`_). + (`en link1 <../03_system/config.html>`_). """ self._cfg = cfg self._env_fn = env_fn @@ -484,7 +484,7 @@ def seed(self, seed: Union[Dict[int, int], List[int], int], dynamic_seed: bool = .. note:: For more details about ``dynamic_seed``, please refer to the best practice document of DI-engine \ - (`en link <../04_best_practice/random_seed.html>`_). + (`en link2 <../04_best_practice/random_seed.html>`_). """ if isinstance(seed, numbers.Integral): seed = [seed + i for i in range(self.env_num)] @@ -580,7 +580,7 @@ class BaseEnvManagerV2(BaseEnvManager): .. note:: For more details about new task pipeline, please refer to the system document of DI-engine \ - (`system en link <../03_system/index.html>`_). + (`system en link3 <../03_system/index.html>`_). Interfaces: reset, step, seed, close, enable_save_replay, launch, default_config, reward_shaping, enable_save_figure diff --git a/ding/model/template/qgpo.py b/ding/model/template/qgpo.py index 135433c42c..dd384846a7 100644 --- a/ding/model/template/qgpo.py +++ b/ding/model/template/qgpo.py @@ -418,9 +418,9 @@ def q_loss_fn(self, a, s, r, s_, d, fake_a_, discount=0.99): - a (:obj:`torch.Tensor`): The input action. - s (:obj:`torch.Tensor`): The input state. - r (:obj:`torch.Tensor`): The input reward. - - s_ (:obj:`torch.Tensor`): The input next state. + - s\_ (:obj:`torch.Tensor`): The input next state. - d (:obj:`torch.Tensor`): The input done. - - fake_a_ (:obj:`torch.Tensor`): The input fake action. + - fake_a (:obj:`torch.Tensor`): The input fake action. - discount (:obj:`float`): The discount factor. """ diff --git a/ding/policy/qgpo.py b/ding/policy/qgpo.py index cfa3cb19c1..2bd3884e11 100644 --- a/ding/policy/qgpo.py +++ b/ding/policy/qgpo.py @@ -13,12 +13,11 @@ @POLICY_REGISTRY.register('qgpo') class QGPOPolicy(Policy): """ - Overview: - Policy class of QGPO algorithm - Contrastive Energy Prediction for Exact Energy-Guided Diffusion Sampling in Offline Reinforcement Learning - https://arxiv.org/abs/2304.12824 - Interfaces: - ``__init__``, ``forward``, ``learn``, ``eval``, ``state_dict``, ``load_state_dict`` + Overview: + Policy class of QGPO algorithm (https://arxiv.org/abs/2304.12824). + Contrastive Energy Prediction for Exact Energy-Guided Diffusion Sampling in Offline Reinforcement Learning + Interfaces: + ``__init__``, ``forward``, ``learn``, ``eval``, ``state_dict``, ``load_state_dict`` """ config = dict( diff --git a/ding/rl_utils/value_rescale.py b/ding/rl_utils/value_rescale.py index 725ba0fc83..e51aed4d83 100644 --- a/ding/rl_utils/value_rescale.py +++ b/ding/rl_utils/value_rescale.py @@ -5,11 +5,11 @@ def value_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor: """ Overview: A function to reduce the scale of the action-value function. - :math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \eps * x` . + :math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \epsilon * x` . Arguments: - x: (:obj:`torch.Tensor`) The input tensor to be normalized. - eps: (:obj:`float`) The coefficient of the additive regularization term \ - to ensure h^{-1} is Lipschitz continuous + to ensure inverse function is Lipschitz continuous Returns: - (:obj:`torch.Tensor`) Normalized tensor. @@ -23,11 +23,11 @@ def value_inv_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor: """ Overview: The inverse form of value rescale. - :math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\eps(|x|+1+\eps)}-1}{2\eps})}^2-1)` . + :math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\epsilon(|x|+1+\epsilon)}-1}{2\epsilon})}^2-1)` . Arguments: - x: (:obj:`torch.Tensor`) The input tensor to be unnormalized. - eps: (:obj:`float`) The coefficient of the additive regularization term \ - to ensure h^{-1} is Lipschitz continuous + to ensure inverse function is Lipschitz continuous Returns: - (:obj:`torch.Tensor`) Unnormalized tensor. """ diff --git a/ding/torch_utils/network/gtrxl.py b/ding/torch_utils/network/gtrxl.py index 16ac7702c7..3672fc977d 100644 --- a/ding/torch_utils/network/gtrxl.py +++ b/ding/torch_utils/network/gtrxl.py @@ -167,8 +167,7 @@ def update(self, hidden_state: List[torch.Tensor]): """ Overview: Update the memory given a sequence of hidden states. - Example for single layer: - memory_len=3, hidden_size_len=2, bs=3 + Example for single layer: (memory_len=3, hidden_size_len=2, bs=3) m00 m01 m02 h00 h01 h02 m20 m21 m22 m = m10 m11 m12 h = h10 h11 h12 => new_m = h00 h01 h02 @@ -264,9 +263,8 @@ def _rel_shift(self, x: torch.Tensor, zero_upper: bool = False) -> torch.Tensor: 4) Mask out the upper triangle (optional) .. note:: - See the following material for better understanding: - https://github.com/kimiyoung/transformer-xl/issues/8 - https://arxiv.org/pdf/1901.02860.pdf (Appendix B) + See the following material for better understanding: https://github.com/kimiyoung/transformer-xl/issues/8 \ + https://arxiv.org/pdf/1901.02860.pdf (Appendix B) Arguments: - x (:obj:`torch.Tensor`): The input tensor with shape (cur_seq, full_seq, bs, head_num). - zero_upper (:obj:`bool`): If True, the upper-right triangle of the matrix is set to zero.