generated from alshedivat/al-folio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
papers.bib
58 lines (53 loc) · 1.9 KB
/
papers.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@inproceedings{koyamada2022mjx,
abbr={IEEE},
bibtex_show={true},
title={Mjx: A framework for Mahjong AI research},
author={Koyamada, Sotetsu and Habara, Keigo and Goto, Nao and Okano, Shinri and Nishimori, Soichiro and Ishii, Shin},
jounal={2022 IEEE Conference on Games (CoG)},
pages={504--507},
year={2022},
organization={IEEE}
}
@article{koyamada2024pgx,
abbr={NIPS},
bibtex_show={true},
title={Pgx: Hardware-accelerated parallel game simulators for reinforcement learning},
author={Koyamada, Sotetsu and Okano, Shinri and Nishimori, Soichiro and Murata, Yu and Habara, Keigo and Kita, Haruka and Ishii, Shin},
journal={NeurIPS},
volume={36},
year={2023},
selected={true}
}
@article{nishimori2023end,
abbr={arXiv},
bibtex_show={true},
title={End-to-End Policy Gradient Method for POMDPs and Explainable Agents},
author={Nishimori, Soichiro and Koyamada, Sotetsu and Ishii, Shin},
journal={arXiv preprint},
year={2023}
}
@article{kitamura2024policy,
abbr={arXiv},
bibtex_show={true},
title={A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees},
author={Kitamura, Toshinori and Kozuno, Tadashi and Kato, Masahiro and Ichihara, Yuki and Nishimori, Soichiro and Sannai, Akiyoshi and Sonoda, Sho and Kumagai, Wataru and Matsuo, Yutaka},
journal={RLC Workshop},
year={2024}
}
@article{nishimori2024leveraging,
abbr={arXiv},
bibtex_show={true},
title={Leveraging Domain-Unlabeled Data in Offline Reinforcement Learning across Two Domains},
author={Nishimori, Soichiro and Cai, Xin-Qiang and Ackermann, Johannes and Sugiyama, Masashi},
journal={arXiv preprint},
year={2024}
}
@article{koyamada2024batch,
abbr={RLC},
bibtex_show={true},
title={A Batch Sequential Halving Algorithm without Performance Degradation},
author={Koyamada, Sotetsu and Nishimori, Soichiro and Ishii, Shin},
journal={RLC},
year={2024},
selected={true}
}