Merge branch 'gl-dev' into main

opendilab · Dec 3, 2021 · 73f0009 · 73f0009
2 parents f532c9e + db3eb75
commit 73f0009
Show file tree

Hide file tree

Showing 66 changed files with 1,233 additions and 324 deletions.
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
@@ -21,8 +21,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Generate
         run: |
-          python -m pip install .
-          python -m pip install sphinx_rtd_theme sphinx
+          python -m pip install .[doc]
           wget http://opendilab.org/download/DI-drive/carla-0.9.9-py3.7-linux-x86_64.egg
           easy_install carla-0.9.9-py3.7-linux-x86_64.egg
           make -C ./docs html

diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
@@ -17,6 +17,6 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: code style
         run: |
-          python -m pip install yapf==0.29.0 flake8
+          python -m pip install .[style]
           bash format.sh ./core --test
           flake8 ./core
diff --git a/.gitignore b/.gitignore
@@ -1408,3 +1408,4 @@ project_test.py
 *episode_metainfo.json
 *measurements.lmdb
 *index.txt
+openaigym*
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,16 @@
+## v0.2.2 (2021.12.3)
+- Add fail count and retry for benchmark collector
+- Add LBC Image training phase 0 & 1
+- Add comments for commonly used default config
+- Add LBC Image train doc; scenario doc; save replay doc
+- Update ci and setup
+- Update simple rl env wrappers
+- Update args in all Carla policy
+- Modify all eps-greedy input into learn iterations
+- Fix bug in CILRS model
+- Fix typos in doc and comments
+
+
 ## v0.2.1 (2021.11.18)
 - Add NoCrash Carla099
 - Add bev dataset

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 <img src="./docs/figs/DI-drive.png" width="200" alt="icon"/>
 
-Updated on 2021.11.18 DI-drive-v0.2.1 (beta)
+Updated on 2021.12.3 DI-drive-v0.2.2 (beta)
 
 DI-drive - Decision Intelligence Platform for Autonomous Driving simulation.
 
@@ -24,7 +24,7 @@ and most importantly, to **put these all together!**
 
 **DI-drive** uses [DI-engine](https://github.com/opendilab/DI-engine), a Reinforcement Learning
 platform to build most of the running modules and demos. **DI-drive** currently supports [Carla](http://carla.org),
-an open-source Autonomous Drining simualtor to operate driving simualtion.
+an open-source Autonomous Drining simulator to operate driving simualtion.
 
 ## Installation
 

diff --git a/core/__init__.py b/core/__init__.py
@@ -0,0 +1,6 @@
+__TITLE__ = "DI-drive"
+__VERSION__ = "0.2.2"
+__DESCRIPTION__ = "Decision AI Auto-Driving Platform"
+__AUTHOR__ = "OpenDILab Contributors"
+__AUTHOR_EMAIL__ = "[email protected]"
+__version__ = __VERSION__
diff --git a/core/data/carla_benchmark_collector.py b/core/data/carla_benchmark_collector.py
@@ -38,11 +38,17 @@ class CarlaBenchmarkCollector(BaseCollector):
 
     config = dict(
         benchmark_dir=None,
+        # suite name, can be str or list
         suite='FullTown01-v0',
         seed=None,
+        # whether make seed of each env different
         dynamic_seed=True,
+        # manually set weathers rather than read from suite
         weathers=None,
+        # whether apply hard failure judgement in suite
+        # by default in benchmark, collided will not cause failure
         nocrash=False,
+        # whether shuffle env setting in suite
         shuffle=False,
     )
 
@@ -187,6 +193,7 @@ def collect(
                     break
 
         if self._seed is not None:
+            # dynamic seed: different seed for each env
             if self._dynamic_seed:
                 self._env_manager.seed(self._seed)
             else:
@@ -195,6 +202,7 @@ def collect(
         self._env_manager.reset(running_env_params)
 
         return_data = []
+        env_fail_times = {env_id: 0 for env_id in running_env_params}
         collected_episodes = running_envs - 1
         collected_samples = 0
 
@@ -228,6 +236,7 @@ def collect(
                     self._traj_cache[env_id].append(transition)
                     if timestep.done:
                         if timestep.info['success'] and len(self._traj_cache[env_id]) > 50:
+                            env_fail_times[env_id] = 0
                             env_param = running_env_params[env_id]
                             episode_data = {'env_param': env_param, 'data': list(self._traj_cache[env_id])}
                             return_data.append(episode_data)
@@ -245,21 +254,28 @@ def collect(
                                 running_env_params[env_id] = reset_param
                                 self._env_manager.reset({env_id: reset_param})
                         else:
+                            env_fail_times[env_id] += 1
                             info = timestep.info
                             for k in list(info.keys()):
                                 if 'reward' in k:
                                     info.pop(k)
                                 if k in ['timestamp']:
                                     info.pop(k)
                             print('[COLLECTOR] env_id {} not success'.format(env_id), info)
-                            suite_index = collected_episodes % self._suite_num
-                            next_suite = self._collect_suite_list[suite_index]
-                            reset_param_index = self._collect_suite_index_dict[next_suite]
-                            reset_param = self._collect_suite_reset_params[next_suite][reset_param_index]
-                            self._collect_suite_index_dict[next_suite] += 1
-                            self._collect_suite_index_dict[next_suite] %= len(
-                                self._collect_suite_reset_params[next_suite]
-                            )
+                            if env_fail_times[env_id] < 5:
+                                # not reach max fail times, continue reset param
+                                reset_param = running_env_params[env_id]
+                            else:
+                                # reach max fail times, skip to next reset param
+                                env_fail_times[env_id] = 0
+                                suite_index = collected_episodes % self._suite_num
+                                next_suite = self._collect_suite_list[suite_index]
+                                reset_param_index = self._collect_suite_index_dict[next_suite]
+                                reset_param = self._collect_suite_reset_params[next_suite][reset_param_index]
+                                self._collect_suite_index_dict[next_suite] += 1
+                                self._collect_suite_index_dict[next_suite] %= len(
+                                    self._collect_suite_reset_params[next_suite]
+                                )
                             running_env_params[env_id] = reset_param
                             self._env_manager.reset({env_id: reset_param})
                         self._traj_cache[env_id].clear()

diff --git a/core/data/lbc_dataset.py b/core/data/lbc_dataset.py
@@ -190,7 +190,7 @@ def __init__(
         self._batch_read_number = batch_read_number
         self._batch_aug = batch_aug
 
-        print("augment with ", augment_strategy)
+        print("augment with", augment_strategy)
         if augment_strategy is not None and augment_strategy != 'None':
             self.augmenter = getattr(augmenter, augment_strategy)
         else:
@@ -297,7 +297,8 @@ def __getitem__(self, index) -> Any:
             # if len()
             #     import pdb; pdb.set_trace()
             rgb_images = torch.stack([self.rgb_transform(img) for img in rgb_images])
-        birdview = self.bird_view_transform(birdview)
+
+        # birdview = self.bird_view_transform(birdview)
 
         return {
             'rgb': rgb_images,

diff --git a/core/envs/carla_env_wrapper.py b/core/envs/carla_env_wrapper.py
@@ -44,7 +44,7 @@ def reset(self, *args, **kwargs) -> Any:
         are recorded.
 
         :Returns:
-            Any: Observations from envirionment
+            Any: Observations from environment
         """
         obs = self.env.reset(*args, **kwargs)
         obs = to_ndarray(obs)
@@ -65,12 +65,6 @@ def step(self, action: Any = None) -> BaseEnvTimestep:
             BaseEnvTimestep: DI-engine format of env step returns.
         """
         action = to_ndarray(action)
-        if action is not None:
-            for key in ['throttle', 'brake']:
-                if key in action:
-                    np.clip(action[key], 0, 1)
-            if 'steer' in action:
-                np.clip(action['steer'], -1, 1)
 
         obs, rew, done, info = self.env.step(action)
         self._final_eval_reward += rew

diff --git a/core/envs/scenario_carla_env.py b/core/envs/scenario_carla_env.py
@@ -38,8 +38,11 @@ class ScenarioCarlaEnv(BaseCarlaEnv):
     observation_space = spaces.Dict({})
     config = dict(
         simulator=dict(),
-        finish_reward=100,
+        # reward value if success
+        success_reward=10,
+        # whether open visualize
         visualize=None,
+        # outputs of scenario conclusion
         outputs=[],
         output_dir='',
     )
@@ -71,7 +74,7 @@ def __init__(
         self._output_dir = self._cfg.output_dir
         self._outputs = self._cfg.outputs
 
-        self._finish_reward = self._cfg.finish_reward
+        self._success_reward = self._cfg.success_reward
         self._is_success = False
         self._is_failure = False
         self._collided = False
@@ -296,10 +299,10 @@ def compute_reward(self):
         """
         goal_reward = 0
         if self._is_success:
-            goal_reward += self._finish_reward
+            goal_reward += self._success_reward
 
         elif self._is_failure:
-            goal_reward -= self._finish_reward
+            goal_reward -= self._success_reward
 
         criteria_dict = self._simulator.get_criteria()
 
@@ -341,7 +344,7 @@ def render(self):
             'tick': self._tick,
             'end_timeout': self._simulator.end_timeout,
             'end_distance': self._simulator.end_distance,
-            'total_distance': self._simulator.total_diatance,
+            'total_distance': self._simulator.total_distance,
         }
         render_info.update(self._simulator_databuffer['state'])
         render_info.update(self._simulator_databuffer['navigation'])