Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change default oxidation states from SMACT14 to ICSD24 #346

Merged
merged 9 commits into from
Dec 2, 2024
1 change: 1 addition & 0 deletions docs/tutorials/crystal_space.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
" max_atomic_num=103,\n",
" num_processes=8,\n",
" save_path=\"data/binary/df_binary_label.pkl\",\n",
" oxidation_states_set=\"smact14\"\n",
")"
]
},
Expand Down
6 changes: 3 additions & 3 deletions docs/tutorials/smact_validity_of_GNoMe.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions smact/screening.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def smact_filter(
threshold: int | None = 8,
stoichs: list[list[int]] | None = None,
species_unique: bool = True,
oxidation_states_set: str = "smact14",
oxidation_states_set: str = "icsd24",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Breaking change is properly documented but requires version update

The change from "smact14" to "icsd24" as the default oxidation state set is well-documented in the code with:

  • Clear deprecation notice mentioning this is a breaking change for SMACT 3.0
  • Detailed documentation of all available oxidation state sets
  • Explicit instructions for backwards compatibility
  • Tests that cover both oxidation state sets

However, this breaking change requires:

  • Increment of the major version number to 3.0 as per the documentation
  • Migration of the change notice to a more visible location like README.md
🔗 Analysis chain

Breaking change: Default oxidation states updated

The change of default oxidation state set from "smact14" to "icsd24" is a breaking change that could affect existing code.

Let's check if this change is properly documented:

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Check if the change is documented in README or changelog
rg -i "breaking.*change.*oxidation" README.md CHANGELOG.md
rg -i "default.*oxidation.*state" README.md CHANGELOG.md

Length of output: 225


Script:

#!/bin/bash
# Let's check for any documentation files in the repository
fd -e md -e rst

# Check for any mentions of oxidation states in documentation
rg -i "oxidation.*state" -g "*.{md,rst}"

# Check the specific function to understand the impact
ast-grep --pattern 'def smact_filter($$$) { $$$ }'

Length of output: 5444


Script:

#!/bin/bash
# Let's check the specific file for documentation about the oxidation state sets
rg -A 5 "icsd24|smact14" smact/screening.py

# Check the oxidation states module for details about these sets
rg -A 5 "icsd24|smact14" smact/oxidation_states.py

# Look for any test files that might be affected
fd -e py -e test test | xargs rg "smact14"

Length of output: 5961

comp_tuple: bool = False,
) -> list[tuple[str, int, int]] | list[tuple[str, int]]:
"""Function that applies the charge neutrality and electronegativity
Expand Down Expand Up @@ -438,7 +438,7 @@ def smact_validity(
composition: pymatgen.core.Composition | str,
use_pauling_test: bool = True,
include_alloys: bool = True,
oxidation_states_set: str | bytes | os.PathLike = "smact14",
oxidation_states_set: str | bytes | os.PathLike = "icsd24",
) -> bool:
"""
Check if a composition is valid according to the SMACT rules.
Expand Down Expand Up @@ -486,13 +486,13 @@ def smact_validity(
smact_elems = [e[1] for e in space.items()]
electronegs = [e.pauling_eneg for e in smact_elems]

if oxidation_states_set == "smact14" or oxidation_states_set is None:
if oxidation_states_set == "smact14":
ox_combos = [e.oxidation_states_smact14 for e in smact_elems]
elif oxidation_states_set == "icsd16":
ox_combos = [e.oxidation_states_icsd16 for e in smact_elems]
elif oxidation_states_set == "pymatgen_sp":
ox_combos = [e.oxidation_states_sp for e in smact_elems]
elif oxidation_states_set == "icsd24":
elif oxidation_states_set == "icsd24" or oxidation_states_set is None: # Default
ox_combos = [e.oxidation_states_icsd24 for e in smact_elems]
elif os.path.exists(oxidation_states_set):
ox_combos = [oxi_custom(e.symbol, oxidation_states_set) for e in smact_elems]
Expand Down
49 changes: 34 additions & 15 deletions smact/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,31 +335,50 @@ def test_ml_rep_generator(self):
self.assertEqual(smact.screening.ml_rep_generator([Pb, O], [1, 2]), PbO2_ml)

def test_smact_filter(self):
oxidation_states_sets = ["smact14", "icsd24"]
oxidation_states_sets_results = {
"smact14": {
"thresh_2": [
(("Na", "Fe", "Cl"), (1, -1, -1), (2, 1, 1)),
(("Na", "Fe", "Cl"), (1, 1, -1), (1, 1, 2)),
]
},
"icsd24": {"thresh_2": [(("Na", "Fe", "Cl"), (1, 1, -1), (1, 1, 2))]},
}

Na, Fe, Cl = (smact.Element(label) for label in ("Na", "Fe", "Cl"))
result = smact.screening.smact_filter([Na, Fe, Cl], threshold=2)
self.assertEqual(
[(r[0], r[1], r[2]) for r in result],
[
(("Na", "Fe", "Cl"), (1, -1, -1), (2, 1, 1)),
(("Na", "Fe", "Cl"), (1, 1, -1), (1, 1, 2)),
],
)

for ox_state_set in oxidation_states_sets:
with self.subTest(ox_state_set=ox_state_set):
output = smact.screening.smact_filter([Na, Fe, Cl], threshold=2, oxidation_states_set=ox_state_set)
self.assertEqual(
[(r[0], r[1], r[2]) for r in output],
oxidation_states_sets_results[ox_state_set]["thresh_2"],
)

# Test that reading the oxidation states from a file produces the same results
self.assertEqual(
result,
smact.screening.smact_filter([Na, Fe, Cl], threshold=2, oxidation_states_set="smact14"),
smact.screening.smact_filter([Na, Fe, Cl], threshold=2, oxidation_states_set=TEST_OX_STATES),
)

self.assertEqual(
set(smact.screening.smact_filter([Na, Fe, Cl], threshold=2, species_unique=False)),
set(
smact.screening.smact_filter(
[Na, Fe, Cl], threshold=2, species_unique=False, oxidation_states_set="smact14"
)
),
{
(("Na", "Fe", "Cl"), (2, 1, 1)),
(("Na", "Fe", "Cl"), (1, 1, 2)),
},
)

self.assertEqual(len(smact.screening.smact_filter([Na, Fe, Cl], threshold=8)), 77)
self.assertEqual(
len(smact.screening.smact_filter([Na, Fe, Cl], threshold=8, oxidation_states_set="smact14")), 77
)

result = smact.screening.smact_filter([Na, Fe, Cl], stoichs=[[1], [1], [4]])
result = smact.screening.smact_filter([Na, Fe, Cl], stoichs=[[1], [1], [4]], oxidation_states_set="smact14")
self.assertEqual(
[(r[0], r[1], r[2]) for r in result],
[
Expand All @@ -368,7 +387,7 @@ def test_smact_filter(self):
)
stoichs = [list(range(1, 5)), list(range(1, 5)), list(range(1, 10))]
self.assertEqual(
len(smact.screening.smact_filter([Na, Fe, Cl], stoichs=stoichs)),
len(smact.screening.smact_filter([Na, Fe, Cl], stoichs=stoichs, oxidation_states_set="smact14")),
45,
)

Expand All @@ -380,8 +399,8 @@ def test_smact_validity(self):
# Test for single element
self.assertTrue(smact.screening.smact_validity("Al"))

# Test for MgB2 which is invalid for the default oxi states but valid for the icsd states
self.assertFalse(smact.screening.smact_validity("MgB2"))
# Test for MgB2 which is invalid for the smact14 oxi states but valid for the icsd states
self.assertFalse(smact.screening.smact_validity("MgB2", oxidation_states_set="smact14"))
self.assertTrue(smact.screening.smact_validity("MgB2", oxidation_states_set="icsd16"))
self.assertFalse(smact.screening.smact_validity("MgB2", oxidation_states_set="pymatgen_sp"))
self.assertTrue(smact.screening.smact_validity("MgB2", oxidation_states_set="wiki"))
Expand Down
38 changes: 24 additions & 14 deletions smact/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,30 @@ def test_convert_formula(self):

def test_generate_composition_with_smact(self):
save_dir = "data/binary/df_binary_label.pkl"
smact_df = generate_composition_with_smact.generate_composition_with_smact(
num_elements=2,
max_stoich=1,
max_atomic_num=10,
save_path=save_dir,
)
self.assertIsInstance(smact_df, pd.DataFrame)
self.assertTrue(len(smact_df) > 0)

# Check if the data was saved to disk
self.assertTrue(os.path.exists(save_dir))

# Clean up
shutil.rmtree("data")
oxidation_states_sets = ["smact14", "icsd24"]
oxidation_states_sets_dict = {
"smact14": {"smact_allowed": 388},
"icsd24": {"smact_allowed": 342},
}
for ox_states in oxidation_states_sets:
with self.subTest(ox_states=ox_states):
smact_df = generate_composition_with_smact.generate_composition_with_smact(
num_elements=2,
max_stoich=3,
max_atomic_num=20,
save_path=save_dir,
oxidation_states_set=ox_states,
)
self.assertIsInstance(smact_df, pd.DataFrame)
self.assertTrue(len(smact_df) == 1330)
self.assertTrue(
smact_df["smact_allowed"].sum() == oxidation_states_sets_dict[ox_states]["smact_allowed"]
)
# Check if the data was saved to disk
self.assertTrue(os.path.exists(save_dir))

# Clean up
shutil.rmtree("data")

@pytest.mark.skipif(
sys.platform == "win32" or not (os.environ.get("MP_API_KEY") or SETTINGS.get("PMG_MAPI_KEY")),
Expand Down
7 changes: 6 additions & 1 deletion smact/utils/crystal_space/generate_composition_with_smact.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def generate_composition_with_smact(
max_atomic_num: int = 103,
num_processes: int | None = None,
save_path: str | None = None,
oxidation_states_set: str = "icsd24",
) -> pd.DataFrame:
"""
Generate all possible compositions of a given number of elements and
Expand All @@ -55,6 +56,7 @@ def generate_composition_with_smact(
max_atomic_num (int): the maximum atomic number. Defaults to 103.
num_processes (int): the number of processes to use. Defaults to None.
save_path (str): the path to save the results. Defaults to None.
oxidation_states_set (str): the oxidation states set to use. Options are "smact14", "icsd16", "icsd24", "pymatgen_sp" or a filepath to a custom oxidation states list. For reproducing the Faraday Discussions results, use "smact14".

Returns:
df (pd.DataFrame): A DataFrame of SMACT-generated compositions with boolean smact_allowed column.
Expand Down Expand Up @@ -104,7 +106,10 @@ def generate_composition_with_smact(
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() if num_processes is None else num_processes)
results = list(
tqdm(
pool.imap_unordered(partial(smact_filter, threshold=max_stoich), compounds_pauling),
pool.imap_unordered(
partial(smact_filter, threshold=max_stoich, oxidation_states_set=oxidation_states_set),
compounds_pauling,
),
total=len(compounds_pauling),
)
)
Expand Down