_bibliography/papers.bib

@inproceedings{koyamada2022mjx,
 abbr={IEEE},
  bibtex_show={true},
  title={Mjx: A framework for Mahjong AI research},
  author={Koyamada, Sotetsu and Habara, Keigo and Goto, Nao and Okano, Shinri and Nishimori, Soichiro and Ishii, Shin},
  jounal={2022 IEEE Conference on Games (CoG)},
  pages={504--507},
  year={2022},
  organization={IEEE}
}

@article{koyamada2024pgx,
  abbr={NIPS},
  bibtex_show={true},
  title={Pgx: Hardware-accelerated parallel game simulators for reinforcement learning},
  author={Koyamada, Sotetsu and Okano, Shinri and Nishimori, Soichiro and Murata, Yu and Habara, Keigo and Kita, Haruka and Ishii, Shin},
  journal={NeurIPS},
  volume={36},
  year={2023},
  selected={true}
}

@article{nishimori2023end,
  abbr={arXiv},
  bibtex_show={true},
  title={End-to-End Policy Gradient Method for POMDPs and Explainable Agents},
  author={Nishimori, Soichiro and Koyamada, Sotetsu and Ishii, Shin},
  journal={arXiv preprint},
  year={2023}
}

@article{kitamura2024policy,
  abbr={arXiv},
  bibtex_show={true},
  title={A Policy Gradient Primal-Dual Algorithm for Constrained MDPs with Uniform PAC Guarantees},
  author={Kitamura, Toshinori and Kozuno, Tadashi and Kato, Masahiro and Ichihara, Yuki and Nishimori, Soichiro and Sannai, Akiyoshi and Sonoda, Sho and Kumagai, Wataru and Matsuo, Yutaka},
  journal={RLC Workshop},
  year={2024}
}

@article{nishimori2024leveraging,
  abbr={arXiv},
  bibtex_show={true},
  title={Leveraging Domain-Unlabeled Data in Offline Reinforcement Learning across Two Domains},
  author={Nishimori, Soichiro and Cai, Xin-Qiang and Ackermann, Johannes and Sugiyama, Masashi},
  journal={arXiv preprint},
  year={2024}
}

@article{koyamada2024batch,
  abbr={RLC},
  bibtex_show={true},
  title={A Batch Sequential Halving Algorithm without Performance Degradation},
  author={Koyamada, Sotetsu and Nishimori, Soichiro and Ishii, Shin},
  journal={RLC},
  year={2024},
  selected={true}
}