Skip to content

Commit

Permalink
Metrics (#63)
Browse files Browse the repository at this point in the history
* CrysGFN: SMACT for composition validity (#34)

* eform and ehull metrics (#42)

* Eval pipeline from CSV (#45)

* Lattice metrics (#44)

* SMACT update (#47)

* update ehull metric  (#62)

---------

Co-authored-by: Divya Sharma <[email protected]>
  • Loading branch information
ppdebreuck and sh-divya authored Feb 19, 2024
1 parent 83a2539 commit 2918684
Show file tree
Hide file tree
Showing 5 changed files with 577 additions and 27 deletions.
6 changes: 5 additions & 1 deletion scripts/crystal_eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@ Before running the second script (`eval_CGFN`) be sure that `data/crystals/eval_
## Structure Generation with pyXtal
You can generate structures with `--n_random_struct` and relax them with `relax.py`. However, it is not recommended using at this point, as it will result in many empty (None) structures. This will be addressed in a future PR.
You can generate structures with `--n_random_struct` and relax them with `relax.py`. However, it is not recommended using at this point, as it will result in many empty (None) structures. This will be addressed in a future PR.
## SMACT
The SMACT metric requires the python library "SMACT", that can be installed using `pip install smact`
11 changes: 6 additions & 5 deletions scripts/crystal_eval/convert_CGFN_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
# symmetry: dictionnary with key "spacegroup". (other keys such as Wyckoff may be added later)
# eform: formation energy in eV

import pandas as pd
import os
from argparse import ArgumentParser
from pathlib import Path

import numpy as np
from pymatgen.core import Structure, Lattice
import pandas as pd
from pymatgen.core import Lattice, Structure
from pyxtal import pyxtal
from pyxtal.lattice import Lattice as pyxtal_lattice
from argparse import ArgumentParser
from pathlib import Path
import os

# encoded elements in CGFN pickle
IDX2ELEM = {
Expand Down
75 changes: 75 additions & 0 deletions scripts/crystal_eval/convert_from_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
import re
from argparse import ArgumentParser
from pathlib import Path

import numpy as np
import pandas as pd
from pymatgen.core import Composition, Lattice, Structure


def encoded_to_container_structure(sample: str):
"""Convert CGFN readable sample to pymatgen structure object and extract space group,
only setting known info (lattice)."""
pattern = r"Stage 2;\s*(\d+)\s*\|\s*[\w-]+.*?;\s*([\w\d]+[\w\dFOSi]+);\s*\(([\d.]+), ([\d.]+), ([\d.]+)\), \(([\d.]+), ([\d.]+), ([\d.]+)\)"
match = re.search(pattern, sample)
if match:
space_group = match.group(1)
composition = match.group(2)
a, b, c, alpha, beta, gamma = [float(x) for x in match.groups()[2:8]]

lattice = Lattice.from_parameters(
a=a, b=b, c=c, alpha=alpha, beta=beta, gamma=gamma
)
species = []
composition_dict = Composition(composition).as_dict()
for element, amount in composition_dict.items():
for _ in range(int(amount)):
species.append(element)
coords = np.zeros((len(species), 3))
structure = Structure(lattice=lattice, species=species, coords=coords)
return structure, {
"spacegroup": space_group
} # Return both structure and symmetry info
else:
return None, None # Handle samples that don't match the pattern


def main():
parser = ArgumentParser()
parser.add_argument(
"--file_path",
default="data/crystals/gfn_samples.csv",
type=str,
help="File path containing pickled CGFN samples",
)
parser.add_argument(
"--out_dir",
default="data/crystals/eval_data/",
type=str,
help="Output directory",
)

args = parser.parse_args()
print("Starting...")
df_data = pd.read_csv(args.file_path)
df_data = df_data.filter(items=["readable", "energies"])
df_data[["structure", "symmetry"]] = df_data.apply(
lambda row: encoded_to_container_structure(row["readable"]),
axis=1,
result_type="expand",
)
df_data.drop(["readable"], axis=1, inplace=True)
df_data.rename(columns={"energies": "eform"}, inplace=True)
out_file_path = os.path.join(
Path(args.out_dir),
os.path.splitext(os.path.basename(Path(args.file_path)))[0] + ".pkl",
)
df_data.to_pickle(out_file_path)
print(df_data)
print(f"File saved to {out_file_path}")
print("Done")


if __name__ == "__main__":
main()
50 changes: 38 additions & 12 deletions scripts/crystal_eval/eval_CGFN.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,41 @@
in order to have a standard input data format, saved to data/crystals/eval_data/.
"""

import os
import json
from pathlib import Path
import os
from argparse import ArgumentParser
from pathlib import Path

import numpy as np
import pandas as pd
from metrics import NumberOfElements, Rediscovery
from metrics import SG2LP, SMACT, Comp2SG, Eform, Ehull, NumberOfElements, Rediscovery


# put all metrics to be computed here:
METRICS = [
NumberOfElements(),
# Rediscovery(
# rediscovery_path=None # Path to original dataset for comparing against generated samples
# ),
]
def init_metrics(): # in a function to enable forking
global METRICS
METRICS = [
NumberOfElements(),
Rediscovery(
rediscovery_path=None # Path to original dataset for comparing against generated samples
),
Eform(),
SMACT(),
SMACT(oxidation_states_set="icsd"),
SMACT(oxidation_states_set="pymatgen"),
SMACT(oxidation_states_set="our_oxidations"),
SMACT(oxidation_only=True),
SMACT(oxidation_states_set="icsd", oxidation_only=True),
SMACT(oxidation_states_set="pymatgen", oxidation_only=True),
SMACT(oxidation_states_set="our_oxidations.txt", oxidation_only=True),
Comp2SG(),
SG2LP(),
Ehull(
PD_path="data/crystals/eval_data/MP_hull_12elms.zip", # replace by your path
n_jobs=4,
debug=True, # set False to make a full run
),
]


def add_args(parser):
Expand Down Expand Up @@ -68,6 +88,7 @@ def print_args(args):


def main():
init_metrics()
# Parse arguments
parser = ArgumentParser()
_, override_args = parser.parse_known_args()
Expand Down Expand Up @@ -100,7 +121,9 @@ def main():
else:
for metric in METRICS:
print(f"Computing {metric.__name__}")
results = metric.compute(df["structure"])
results = metric.compute(
df["structure"], energies=df["eform"], sg=df["symmetry"]
)
data_results[data_name][metric.__name__] = results
with open(metrics_path, "w+") as fp:
json.dump(data_results[data_name], fp)
Expand All @@ -111,19 +134,22 @@ def main():
os.chdir(out_dir)
for metric in METRICS:
try:
print([d for d, _ in data_results.items()])
# raise Exception
metric.plot(
{
data_name: results[metric.__name__]
for dataname, results in data_results.items()
for data_name, results in data_results.items()
},
)
except KeyError as ke:
print(
"Key not found in file. Metrics have changed. Rerun using --force_compute."
f"Key {ke} not found in file. Metrics have changed. Rerun using --force_compute."
)
exit()
os.chdir(original_directory)
print("Done")
print(f"Check summary figures saved in {out_dir}")


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 2918684

Please sign in to comment.