From 7159633ac170099882f8714f1dc84456e106fe88 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Fri, 20 Sep 2024 14:17:47 +0900 Subject: [PATCH 1/2] Update pypolymlp doc --- doc/pypolymlp.md | 14 ++++++++++---- phono3py/file_IO.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/doc/pypolymlp.md b/doc/pypolymlp.md index 4a1bd73d..24aaef58 100644 --- a/doc/pypolymlp.md +++ b/doc/pypolymlp.md @@ -15,13 +15,15 @@ The training process involves using a dataset consisting of supercell displacements, forces, and energies. The trained MLPs are then employed to compute forces for supercells with specific displacements. -For more details on the methodology, refer to A. Togo and A. Seko, J. Chem. Phys. -**160**, 211001 (2024) [[doi](https://doi.org/10.1063/5.0211296)]. +For further details on combining phono3py calculations with pypolymlp, refer to +A. Togo and A. Seko, J. Chem. Phys. **160**, 211001 (2024) +[[doi](https://doi.org/10.1063/5.0211296)] +[[arxiv](https://arxiv.org/abs/2401.17531)]. An example of its usage can be found in the `example/NaCl-pypolymlp` directory in the distribution from GitHub or PyPI. -## Requirement +## Requirements - [pypolymlp](https://github.com/sekocha/pypolymlp) - [symfc](https://github.com/symfc/symfc) @@ -228,7 +230,7 @@ displacement distance of 0.001 Angstrom. The forces for these supercells are then evaluated using pypolymlp. Both the generated displacements and the corresponding forces are stored in the `phono3py_mlp_eval_dataset` file. -### Steps 4-6: Force constants calculation (random displacements in step 5) +### Steps 4-7: Force constants calculation (random displacements in step 5) After developing MLPs, random displacements are generated by specifying {ref}`--rd ` option. To compute force constants @@ -329,6 +331,10 @@ an additional 200 supercells. In total, 400 supercells are created. The forces for these supercells are then evaluated. Finally, the force constants are calculated using symfc. +## Convergence with respect to dataset size + +In general, increasing the amount of data improves the accuracy of representing force constants. Therefore, it is recommended to check the convergence of the target property as the dataset size grows. Lattice thermal conductivity is a convenient property to monitor when assessing convergence with respect to the number of supercells in the training dataset. + ## Parameters for developing MLPs A few parameters can be specified using the `--mlp-params` option for the diff --git a/phono3py/file_IO.py b/phono3py/file_IO.py index 0b9066b7..672bc3d6 100644 --- a/phono3py/file_IO.py +++ b/phono3py/file_IO.py @@ -413,6 +413,26 @@ def read_fc2_from_hdf5(filename="fc2.hdf5", p2s_map=None): ) +def write_datasets_to_hdf5( + dataset: dict, + phonon_dataset: dict = None, + filename: str = "datasets.hdf5", + compression: str = "gzip", +): + """Write dataset and phonon_dataset in datasets.hdf5.""" + + def _write_dataset(w, dataset: dict, group_name: str): + dataset_w = w.create_group(group_name) + for key in dataset: + dataset_w.create_dataset(key, data=dataset[key], compression=compression) + + with h5py.File(filename, "w") as w: + w.create_dataset("version", data=np.bytes_(__version__)) + _write_dataset(w, dataset, "dataset") + if phonon_dataset: + _write_dataset(w, phonon_dataset, "phonon_dataset") + + def write_grid_address_to_hdf5( grid_address, mesh, From 650adff94df4dd3eeceb6f6caf73a4b2afdb92d7 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Fri, 20 Sep 2024 14:25:17 +0900 Subject: [PATCH 2/2] Update pypolymlp doc --- doc/pypolymlp.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/pypolymlp.md b/doc/pypolymlp.md index 24aaef58..00403198 100644 --- a/doc/pypolymlp.md +++ b/doc/pypolymlp.md @@ -333,7 +333,11 @@ calculated using symfc. ## Convergence with respect to dataset size -In general, increasing the amount of data improves the accuracy of representing force constants. Therefore, it is recommended to check the convergence of the target property as the dataset size grows. Lattice thermal conductivity is a convenient property to monitor when assessing convergence with respect to the number of supercells in the training dataset. +In general, increasing the amount of data improves the accuracy of representing +force constants. Therefore, it is recommended to check the convergence of the +target property with respect to the number of supercells in the training +dataset. Lattice thermal conductivity may be a convenient property to monitor +when assessing convergence. ## Parameters for developing MLPs