diff --git a/src/components/authors.jsx b/src/components/authors.jsx index b95baef..e782f72 100644 --- a/src/components/authors.jsx +++ b/src/components/authors.jsx @@ -41,6 +41,7 @@ export default class Authors extends React.Component { ); })} {this.props.meta} + {this.props.extra} ); diff --git a/src/components/header.jsx b/src/components/header.jsx index 29577bf..918f1a2 100644 --- a/src/components/header.jsx +++ b/src/components/header.jsx @@ -56,6 +56,7 @@ export default class Header extends React.Component { authors={this.props.authors} affiliations={this.props.affiliations} meta={this.props.meta} + extra={this.props.extra} />
{Object.keys(this.props.resources).map((key) => ( diff --git a/src/images/method.png b/src/images/method.png index 98c52ef..302420c 100644 Binary files a/src/images/method.png and b/src/images/method.png differ diff --git a/src/images/overview.png b/src/images/overview.png new file mode 100644 index 0000000..43cb436 Binary files /dev/null and b/src/images/overview.png differ diff --git a/src/images/result1.png b/src/images/result1.png deleted file mode 100644 index 328ffdb..0000000 Binary files a/src/images/result1.png and /dev/null differ diff --git a/src/images/result2.png b/src/images/result2.png deleted file mode 100644 index 1e818ed..0000000 Binary files a/src/images/result2.png and /dev/null differ diff --git a/src/images/teaser.jpeg b/src/images/teaser.jpeg deleted file mode 100644 index b765f81..0000000 Binary files a/src/images/teaser.jpeg and /dev/null differ diff --git a/src/images/teaser.png b/src/images/teaser.png deleted file mode 100644 index d169b2c..0000000 Binary files a/src/images/teaser.png and /dev/null differ diff --git a/src/pages/index.jsx b/src/pages/index.jsx index a889457..7266604 100644 --- a/src/pages/index.jsx +++ b/src/pages/index.jsx @@ -67,6 +67,7 @@ class Template extends React.Component { authors={data.authors} affiliations={data.affiliations} meta={data.meta} + extra={data.extra} resources={data.resources} />
diff --git a/template.yaml b/template.yaml index 42afd77..66fe5eb 100644 --- a/template.yaml +++ b/template.yaml @@ -1,209 +1,199 @@ organization: OMRON SINIC X twitter: '@omron_sinicx' -title: 'MULTIPOLAR: Multi-Source Policy Aggregation for Transfer Reinforcement Learning between Diverse Environmental Dynamics' -conference: IJCAI2020 +title: 'Learning Variable Compliance Control From a Few Demonstrations for Bimanual Robot with Haptic Feedback Teleoperation System' +conference: IROS2024 resources: - paper: https://arxiv.org/abs/1909.13111 - code: https://github.com/omron-sinicx/multipolar - video: https://www.youtube.com/embed/adUnIj83RtU - blog: https://medium.com/sinicx/multipolar-multi-source-policy-aggregation-for-transfer-reinforcement-learning-between-diverse-bc42a152b0f5 -description: explore a new challenge in transfer RL, where only a set of source policies collected under unknown diverse dynamics is available for learning a target task efficiently. -image: https://omron-sinicx.github.io/multipolar/assets/teaser.png -url: https://omron-sinicx.github.io/multipolar -speakerdeck: b7a0614c24014dcbbb121fbb9ed234cd + paper: http://arxiv.org/abs/2406.14990 + # code: https://github.com/omron-sinicx/multipolar + video: https://youtu.be/qejVTnmWFdo + # blog: https://medium.com/sinicx/multipolar-multi-source-policy-aggregation-for-transfer-reinforcement-learning-between-diverse-bc42a152b0f5 +description: "CompACT: Learning Compliance Control via Action Chunking with Transformers. (Code to be released soon)" +image: https://omron-sinicx.github.io/CompACT/assets/overview.png +url: https://omron-sinicx.github.io/CompACT +# speakerdeck: b7a0614c24014dcbbb121fbb9ed234cd authors: - - name: Mohammadamin Barekatain* + - name: Tatsuya Kamijo*+ affiliation: [1, 2] - url: http://barekatain.me/ position: intern - - name: Ryo Yonetani + - name: Cristian C. Beltran-Hernandez* affiliation: [1] - position: principal investigator - url: https://yonetaniryo.github.io/ + position: senior researcher + url: https://cristianbehe.me/ - name: Masashi Hamaya affiliation: [1] - position: senior researcher + position: principal investigator url: https://sites.google.com/view/masashihamaya/home - # - name: Mai Nishimura - # affiliation: [1] - # url: https://denkiwakame.github.io - # - name: Asako Kanezaki - # affiliation: [2] - # url: https://kanezaki.github.io/ contact_ids: ['github', 'omron', 2] #=> github issues, contact@sinicx.com, 2nd author affiliations: - OMRON SINIC X Corporation - - Technical University of Munich + - University of Tokyo meta: - - '* work done as an intern at OMRON SINIC X.' + - '* Equal Contribution' +extra: + - '+ work done as an intern at OMRON SINIC X.' bibtex: > # arXiv version - @article{barekatain2019multipolar, - title={MULTIPOLAR: Multi-Source Policy Aggregation for Transfer Reinforcement Learning between Diverse Environmental Dynamics}, - author={Barekatain, Mohammadamin and Yonetani, Ryo and Hamaya, Masashi}, - journal={arXiv preprint arXiv:1909.13111}, - year={2019} - } - - # IJCAI version - - @inproceedings{barekatain2020multipolar, - title={MULTIPOLAR: Multi-Source Policy Aggregation for Transfer Reinforcement Learning between Diverse Environmental Dynamics}, - author={Barekatain, Mohammadamin and Yonetani, Ryo and Hamaya, Masashi}, - booktitle={International Joint Conference on Artificial Intelligence (IJCAI)}, - year={2020} + @article{kamijo2024learning, + title={Learning Variable Compliance Control From a Few Demonstrations for Bimanual Robot with Haptic Feedback Teleoperation System}, + author={Kamijo, Tatsuya and Beltran-Hernandez, Cristian C and Hamaya, Masashi}, + journal={arXiv preprint arXiv:2406.14990}, + year={2024} } -teaser: teaser.png +teaser: overview.png overview: | - Transfer reinforcement learning (RL) aims at improving learning efficiency of an agent by exploiting knowledge from other source agents trained on relevant tasks. - However, it remains challenging to transfer knowledge between different environmental dynamics without having access to the source environments. - In this work, we explore a new challenge in transfer RL, where only a set of source policies collected under unknown diverse dynamics is available for learning a target task efficiently. - To address this problem, the proposed approach, **MULTI-source POLicy AggRegation (MULTIPOLAR)**, comprises two key techniques. - We learn to aggregate the actions provided by the source policies adaptively to maximize the target task performance. - Meanwhile, we learn an auxiliary network that predicts residuals around the aggregated actions, which ensures the target policy”s expressiveness even when some of the source policies perform poorly. - We demonstrated the effectiveness of MULTIPOLAR through an extensive experimental evaluation across six simulated environments ranging from classic control problems to challenging robotics simulations, under both continuous and discrete action spaces. + Automating dexterous, contact-rich manipulation tasks using rigid robots is a significant challenge in robotics. Rigid robots, defined by + their actuation through position commands, face issues of excessive contact forces due to their inability to adapt to contact with the environment, + potentially causing damage. While compliance control schemes have been introduced to mitigate these issues by controlling forces via external sensors, + they are hampered by the need for fine-tuning task-specific controller parameters. Learning from Demonstrations (LfD) offers an intuitive alternative, + allowing robots to learn manipulations through observed actions. In this work, we introduce a novel system to enhance the teaching of dexterous, + contact-rich manipulations to rigid robots. Our system is twofold: firstly, it incorporates a teleoperation interface utilizing Virtual Reality (VR) + controllers, designed to provide an intuitive and cost-effective method for task demonstration with haptic feedback. Secondly, we present Comp-ACT + (Compliance Control via Action Chunking with Transformers), a method that leverages the demonstrations to learn variable compliance control from a few + demonstrations. Our methods have been validated across various complex contact-rich manipulation tasks using single-arm and bimanual robot setups in + simulated and real-world environments, demonstrating the effectiveness of our system in teaching robots dexterous manipulations with enhanced adaptability + and safety. -body: - - title: subsection 1 - image: method.png - text: > - **test text with unicode characters:** α, β, φ, ψ - - title: subsection 2 - image: null - text: > - **test text with TeX characters:** $\alpha$, $\beta$, $\phi$, $\psi \\$ - see how it renders with $\KaTeX$. - $$ E = mc^2$$ - $$ \int \oint \sum \prod $$ - $$ \begin{CD} A @>a>> B \\ @VbVV @AAcA \\ C @= D \end{CD} $$ - - title: null - image: method.png - text: > - This is a multi-line text example. - "> - Flow Style" converts newlines to spaces. - Using >, newline characters are converted to spaces. - Newline characters and indentation are handled appropriately, and the text is represented as a single line. - It's suitable when you want to collapse multi-line text into a single line, such as in configurations or descriptions where readability is key. - - text: | - This is a multi-line - text example. - "| - Block Style" preserves newlines and indentation. - Using |, you can represent multi-line text that includes newline characters. - Newline characters are preserved exactly as they are, along with the block's indentation. - It's suitable when maintaining newlines and indentation is important, such as preserving the structure of code or prose. - - title: Results - text: | - ### Motion Planning (MP) Dataset - markdown version - |Method|Opt|Exp|Hmean| - |--|--|--|--| - |BF| 65.8 (63.8, 68.0)| 44.1 (42.8, 45.5) | 44.8 (43.4, 46.3)| - |WA*| 68.4 (66.5, 70.4)| 35.8 (34.5, 37.1) | 40.4 (39.0, 41.8)| - |**Neural A*** | **87.7 (86.6, 88.9)**| 40.1 (38.9, 41.3) | 52.0 (50.7, 53.3)| +# body: +# - title: subsection 1 +# image: method.png +# text: > +# **test text with unicode characters:** α, β, φ, ψ +# - title: subsection 2 +# image: null +# text: > +# **test text with TeX characters:** $\alpha$, $\beta$, $\phi$, $\psi \\$ +# see how it renders with $\KaTeX$. +# $$ E = mc^2$$ +# $$ \int \oint \sum \prod $$ +# $$ \begin{CD} A @>a>> B \\ @VbVV @AAcA \\ C @= D \end{CD} $$ +# - title: null +# image: method.png +# text: > +# This is a multi-line text example. +# "> - Flow Style" converts newlines to spaces. +# Using >, newline characters are converted to spaces. +# Newline characters and indentation are handled appropriately, and the text is represented as a single line. +# It's suitable when you want to collapse multi-line text into a single line, such as in configurations or descriptions where readability is key. +# - text: | +# This is a multi-line +# text example. +# "| - Block Style" preserves newlines and indentation. +# Using |, you can represent multi-line text that includes newline characters. +# Newline characters are preserved exactly as they are, along with the block's indentation. +# It's suitable when maintaining newlines and indentation is important, such as preserving the structure of code or prose. +# - title: Results +# text: | +# ### Motion Planning (MP) Dataset +# markdown version +# |Method|Opt|Exp|Hmean| +# |--|--|--|--| +# |BF| 65.8 (63.8, 68.0)| 44.1 (42.8, 45.5) | 44.8 (43.4, 46.3)| +# |WA*| 68.4 (66.5, 70.4)| 35.8 (34.5, 37.1) | 40.4 (39.0, 41.8)| +# |**Neural A*** | **87.7 (86.6, 88.9)**| 40.1 (38.9, 41.3) | 52.0 (50.7, 53.3)| -

Motion Planning (MP) Dataset

-

HTML version

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MethodOptExpHmean
- BF -
- WA* -
- 65.8 (63.8, 68.0) -
- 68.4 (66.5, 70.4) -
- 44.1 (42.8, 45.5) -
- 35.8 (34.5, 37.1) -
- 44.8 (43.4, 46.3) -
- 40.4 (39.0, 41.8) -
- SAIL -
- SAIL-SL -
- BB-A* -
- 5.7 (4.6, 6.8) -
- 3.1 (2.3, 3.8) -
- 31.2 (28.8, 33.5) -
- 58.0 (56.1, 60.0) -
- 57.6 (55.7, 59.6) -
- 52.0 (50.2, 53.9) -
- 7.7 (6.4, 9.0) -
- 4.4 (3.5, 5.3) -
- 31.1 (29.2, 33.0) -
- Neural BF -
- Neural A* -
- 75.5 (73.8, 77.1) -
- 87.7 (86.6, 88.9) -
- 45.9 (44.6, 47.2) -
- 40.1 (38.9, 41.3) -
- 52.0 (50.7, 53.4) -
- 52.0 (50.7, 53.3) -
-
-

Selected Path Planning Results

-

dummy text

- -

Path Planning Results on SSD Dataset

-

dummy text

- +#

Motion Planning (MP) Dataset

+#

HTML version

+#
+# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +#
MethodOptExpHmean
+# BF +#
+# WA* +#
+# 65.8 (63.8, 68.0) +#
+# 68.4 (66.5, 70.4) +#
+# 44.1 (42.8, 45.5) +#
+# 35.8 (34.5, 37.1) +#
+# 44.8 (43.4, 46.3) +#
+# 40.4 (39.0, 41.8) +#
+# SAIL +#
+# SAIL-SL +#
+# BB-A* +#
+# 5.7 (4.6, 6.8) +#
+# 3.1 (2.3, 3.8) +#
+# 31.2 (28.8, 33.5) +#
+# 58.0 (56.1, 60.0) +#
+# 57.6 (55.7, 59.6) +#
+# 52.0 (50.2, 53.9) +#
+# 7.7 (6.4, 9.0) +#
+# 4.4 (3.5, 5.3) +#
+# 31.1 (29.2, 33.0) +#
+# Neural BF +#
+# Neural A* +#
+# 75.5 (73.8, 77.1) +#
+# 87.7 (86.6, 88.9) +#
+# 45.9 (44.6, 47.2) +#
+# 40.1 (38.9, 41.3) +#
+# 52.0 (50.7, 53.4) +#
+# 52.0 (50.7, 53.3) +#
+#
+#

Selected Path Planning Results

+#

dummy text

+# +#

Path Planning Results on SSD Dataset

+#

dummy text

+#