From f00597306afcaae3138ecb4a1ebddfe5067046b4 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Tue, 27 Jun 2023 16:39:30 +0200 Subject: [PATCH 1/8] Minor touch ups --- electronicparsers/fhiaims/parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 55f3adf3..8fd69715 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -1816,9 +1816,7 @@ def parse_atom_type(species): sec_basis_func.x_fhi_aims_controlInOut_basis_func_primitive_gauss_alpha = alpha # add inout parameters read from main output - # species - species = self.out_parser.get('control_inout', {}).get('species') - if species is not None: + if (species := self.out_parser.get('control_inout', {}).get('species')) is not None: for specie in species: parse_atom_type(specie) From 60ce7b5d25bce66e702e7912e3ad508edb370030 Mon Sep 17 00:00:00 2001 From: Nathan Date: Fri, 14 Jul 2023 16:03:47 +0200 Subject: [PATCH 2/8] Make control species repeatable --- electronicparsers/fhiaims/parser.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 8fd69715..5f0791e8 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -78,15 +78,12 @@ def str_to_unit(val_in): def init_quantities(self): def str_to_species(val_in): val = val_in.strip().splitlines() - data = [] species = dict() for v in val: v = v.strip().split('#')[0] if not v or not v[0].isalpha(): continue if v.startswith('species'): - if species: - data.append(species) species = dict(species=v.split()[1:]) else: v = v.replace('.d', '.e').split() @@ -95,8 +92,7 @@ def str_to_species(val_in): species[v[0]].extend([vi]) else: species[v[0]] = [vi] - data.append(species) - return data + return species self._quantities = [ Quantity( @@ -154,9 +150,9 @@ def str_to_species(val_in): 'xc', rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), Quantity( - 'species', rf'{re_n} *(species\s*[A-Z][a-z]?[\s\S]+?)' - r'(?:species\s*[A-Z][a-z]?|Completed|\-{10})', - str_operation=str_to_species, repeats=False)] + 'species', rf'{re_n} *(species\s+[A-Z][a-z]?[\s\S]+?)' + r'(FHI-aims code project|\-{10})', + str_operation=str_to_species, repeats=True)] class FHIAimsOutParser(TextParser): From 60dabe48f632639043acb2bf95312e986b2b6160 Mon Sep 17 00:00:00 2001 From: Nathan Date: Fri, 14 Jul 2023 17:06:08 +0200 Subject: [PATCH 3/8] Clean `atom_parameters` section and `ControlInOut` parsing --- .../fhiaims/metainfo/fhi_aims.py | 103 ++++++------------ electronicparsers/fhiaims/parser.py | 69 ++---------- 2 files changed, 42 insertions(+), 130 deletions(-) diff --git a/electronicparsers/fhiaims/metainfo/fhi_aims.py b/electronicparsers/fhiaims/metainfo/fhi_aims.py index 72034998..4ec7f6dd 100644 --- a/electronicparsers/fhiaims/metainfo/fhi_aims.py +++ b/electronicparsers/fhiaims/metainfo/fhi_aims.py @@ -192,75 +192,6 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): repeats=True) -class x_fhi_aims_section_controlInOut_atom_species(MSection): - ''' - - - ''' - - m_def = Section(validate=False) - - x_fhi_aims_controlInOut_pure_gaussian = Quantity( - type=str, - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_charge = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='coulomb', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot_scale = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot_width = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='meter', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='meter', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_mass = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='kilogram', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_name = Quantity( - type=str, - shape=[], - description=''' - - - ''') - - x_fhi_aims_section_controlInOut_basis_func = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_controlInOut_basis_func'), - repeats=True) - - x_fhi_aims_section_vdW_TS = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_vdW_TS'), - repeats=True) - - class x_fhi_aims_section_controlInOut_basis_func(MSection): ''' - @@ -1386,8 +1317,38 @@ class AtomParameters(simulation.method.AtomParameters): m_def = Section(validate=False, extends_base_section=True) - x_fhi_aims_section_controlInOut_atom_species = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_controlInOut_atom_species'), + x_fhi_aims_controlInOut_pure_gaussian = Quantity( + type=str, + shape=[], + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot_scale = Quantity( + type=np.dtype(np.float64), + shape=[], + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot_width = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='meter', + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='meter', + description=''' + - + ''') + + x_fhi_aims_section_vdW_TS = SubSection( + sub_section=SectionProxy('x_fhi_aims_section_vdW_TS'), repeats=True) diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 5f0791e8..dbc72160 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -51,7 +51,6 @@ from .metainfo.fhi_aims import Run as xsection_run, Method as xsection_method,\ x_fhi_aims_section_parallel_task_assignement, x_fhi_aims_section_parallel_tasks,\ x_fhi_aims_section_controlIn_basis_set, x_fhi_aims_section_controlIn_basis_func,\ - x_fhi_aims_section_controlInOut_atom_species, x_fhi_aims_section_controlInOut_basis_func,\ x_fhi_aims_section_vdW_TS from ..utils import BeyondDFTWorkflowsParser @@ -790,7 +789,7 @@ def __init__(self): 'Hybrid M11 gradient-corrected functionals': [{'name': 'MGGA_C_M11'}, {'name': 'HYB_MGGA_X_M11'}]} # TODO update metainfo to reflect all energy corrections - # why section_vdW_TS under x_fhi_aims_section_controlInOut_atom_species? + # why section_vdW_TS under atom_parameter? self._energy_map = { 'Total energy uncorrected': 'energy_total', 'Total energy corrected': 'energy_total_t0', @@ -1290,7 +1289,6 @@ def parse_scf(iteration): def parse_vdW(section): # these are not actually vdW outputs but vdW control parameters but are # printed within the calculation section. - # TODO why is x_fhi_aims_section_vdW_TS under x_fhi_aims_section_controlInOut_atom_species # we would then have to split the vdW parameters by species atoms = section.get('vdW_TS', {}).get('atom_hirshfeld', []) if not atoms: @@ -1303,8 +1301,7 @@ def parse_vdW(section): for sec in sec_atom_type: for atom in atoms: if sec.label == atom['atom']: - sec_vdW_ts = sec.x_fhi_aims_section_controlInOut_atom_species[-1].m_create( - x_fhi_aims_section_vdW_TS) + sec_vdW_ts = sec.m_create(x_fhi_aims_section_vdW_TS) for key, val in atom.items(): metainfo_name = self._property_map.get(key, None) if metainfo_name is None: @@ -1742,26 +1739,19 @@ def parse_topology(self): def parse_atom_type(species): sec_atom_type = sec_method.m_create(AtomParameters) - sec_atom_species = sec_atom_type.m_create( - x_fhi_aims_section_controlInOut_atom_species) for key, val in species.items(): if key == 'nuclear charge': - charge = val[0] * ureg.elementary_charge - sec_atom_type.charge = charge - sec_atom_species.x_fhi_aims_controlInOut_species_charge = charge + sec_atom_type.charge = val[0] * ureg.elementary_charge elif key == 'atomic mass': - mass = val[0][0] * ureg.amu - sec_atom_type.mass = mass - sec_atom_species.x_fhi_aims_controlInOut_species_mass = mass + sec_atom_type.mass = val[0][0] * ureg.amu elif key == 'species': sec_atom_type.label = val - sec_atom_species.x_fhi_aims_controlInOut_species_name = val elif 'request to include pure gaussian fns' in key: - sec_atom_species.x_fhi_aims_controlInOut_pure_gaussian = val[0] + sec_atom_type.x_fhi_aims_controlInOut_pure_gaussian = val[0] elif 'cutoff potl' in key: - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot = val[0][0] * ureg.angstrom - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot_width = val[0][1] * ureg.angstrom - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot_scale = val[0][2] + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot = val[0][0] * ureg.angstrom + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot_width = val[0][1] * ureg.angstrom + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot_scale = val[0][2] elif "request for '+U'" in key: sec_hubbard = sec_atom_type.m_create(HubbardKanamoriModel) sec_hubbard.orbital = f'{val[0][0]}{val[0][1]}' @@ -1769,47 +1759,8 @@ def parse_atom_type(species): sec_hubbard.double_counting_correction = 'Dudarev' sec_hubbard.x_fhi_aims_projection_type = 'Mulliken (dual)' sec_hubbard.x_fhi_aims_petukhov_mixing_factor = self.out_parser.get('petukhov') - elif 'free-atom' in key or 'free-ion' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = ' '.join(key.split()[:-1]) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_occ = val[i][2] - elif 'hydrogenic' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = ' '.join(key.split()[:-1]) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_eff_charge = val[i][2] - elif 'ionic' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = 'ionic basis' - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - elif 'basis function' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = key.split( - 'basis')[0].strip() - if val[i][0] == 'L': - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_l = val[i][2] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_N = val[i][3] - alpha = [val[i][j + 2] for j in range(len(val[i])) if val[i][j] == 'alpha'] - weight = [val[i][j + 2] for j in range(len(val[i])) if val[i][j] == 'weight'] - alpha = np.array(alpha) * (1 / ureg.angstrom ** 2) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_alpha = alpha - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_weight = weight - elif len(val[i]) == 2: - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_l = val[i][0] - alpha = np.array(val[i][1]) / ureg.angstrom ** 2 - sec_basis_func.x_fhi_aims_controlInOut_basis_func_primitive_gauss_alpha = alpha + # From legacy versions we know that 'free-atom' or 'free-ion' are connected to 'occ' + # and 'hydrogenic' to 'eff_charge'. Nothing for 'ionic' # add inout parameters read from main output if (species := self.out_parser.get('control_inout', {}).get('species')) is not None: From f5dace35e7959628f67b420cdde8e07b8170bf73 Mon Sep 17 00:00:00 2001 From: Nathan Date: Mon, 17 Jul 2023 18:21:45 +0200 Subject: [PATCH 4/8] Add reference to basis function from atom_parameters + TODO: add the inverse --- electronicparsers/fhiaims/metainfo/fhi_aims.py | 6 ++++++ electronicparsers/fhiaims/parser.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/electronicparsers/fhiaims/metainfo/fhi_aims.py b/electronicparsers/fhiaims/metainfo/fhi_aims.py index 4ec7f6dd..41052856 100644 --- a/electronicparsers/fhiaims/metainfo/fhi_aims.py +++ b/electronicparsers/fhiaims/metainfo/fhi_aims.py @@ -1351,6 +1351,12 @@ class AtomParameters(simulation.method.AtomParameters): sub_section=SectionProxy('x_fhi_aims_section_vdW_TS'), repeats=True) + x_fhi_aims_section_controlIn_basis_set = Quantity( + type=Reference(SectionProxy('x_fhi_aims_section_controlIn_basis_set')), + shape=[], + description='''-''', + ) + class HubbardKanamoriModel(simulation.method.HubbardKanamoriModel): diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index dbc72160..0b4cbe8a 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -1739,6 +1739,8 @@ def parse_topology(self): def parse_atom_type(species): sec_atom_type = sec_method.m_create(AtomParameters) + param_index = len(sec_method.atom_parameters) - 1 + sec_atom_type.x_fhi_aims_section_controlIn_basis_set = sec_method.x_fhi_aims_section_controlIn_basis_set[param_index] for key, val in species.items(): if key == 'nuclear charge': sec_atom_type.charge = val[0] * ureg.elementary_charge From 45b42315b777908ca6ea8cbb666edf86675963e7 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Tue, 18 Jul 2023 14:02:08 +0200 Subject: [PATCH 5/8] Add general hash mapper for `MSections` + Store hash by element --- .../fhiaims/metainfo/fhi_aims.py | 7 +++ electronicparsers/fhiaims/parser.py | 5 +- electronicparsers/utils/__init__.py | 2 +- electronicparsers/utils/utils.py | 48 +++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/electronicparsers/fhiaims/metainfo/fhi_aims.py b/electronicparsers/fhiaims/metainfo/fhi_aims.py index 41052856..f2f6e0e5 100644 --- a/electronicparsers/fhiaims/metainfo/fhi_aims.py +++ b/electronicparsers/fhiaims/metainfo/fhi_aims.py @@ -187,6 +187,13 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): - ''') + x_fhi_aims_controlIn_hash = Quantity( + type=str, + shape=[], + description=''' + - + ''') + x_fhi_aims_section_controlIn_basis_func = SubSection( sub_section=SectionProxy('x_fhi_aims_section_controlIn_basis_func'), repeats=True) diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 0b4cbe8a..1258330e 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -53,7 +53,7 @@ x_fhi_aims_section_controlIn_basis_set, x_fhi_aims_section_controlIn_basis_func,\ x_fhi_aims_section_vdW_TS -from ..utils import BeyondDFTWorkflowsParser +from ..utils import BeyondDFTWorkflowsParser, get_basis_hash re_float = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' @@ -1565,6 +1565,9 @@ def parse_basis_set(species): sec_basis_set.x_fhi_aims_controlIn_number_of_basis_func = len(division) sec_basis_set.x_fhi_aims_controlIn_division = division + # store hash + sec_basis_set.x_fhi_aims_controlIn_hash = get_basis_hash([sec_basis_set], [True]) + def _get_elemental_tier( basis_settings: x_fhi_aims_section_controlIn_basis_set, reference: dict = self._native_tier_references) -> tuple[Any, Any]: diff --git a/electronicparsers/utils/__init__.py b/electronicparsers/utils/__init__.py index ebeeb1e3..e61060f6 100644 --- a/electronicparsers/utils/__init__.py +++ b/electronicparsers/utils/__init__.py @@ -17,5 +17,5 @@ # limitations under the License. from .utils import ( - extract_section, get_files, BeyondDFTWorkflowsParser + extract_section, get_files, BeyondDFTWorkflowsParser, get_basis_hash ) diff --git a/electronicparsers/utils/utils.py b/electronicparsers/utils/utils.py index 066816e4..64201d7e 100644 --- a/electronicparsers/utils/utils.py +++ b/electronicparsers/utils/utils.py @@ -17,9 +17,13 @@ # limitations under the License. # +import json +import hashlib import os from glob import glob +from nomad.metainfo import MSection +from nomad.metainfo.util import MSubSectionList from nomad.datamodel import EntryArchive from nomad.datamodel.metainfo.simulation.run import Run from nomad.datamodel.metainfo.workflow import Link, TaskReference @@ -84,6 +88,50 @@ def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = Tru return filenames +def get_basis_hash(basis_settings: list[MSection], subsections: list[bool], **kwargs): + ''' + General function for converting basis set sections to a hash for comparison. + Basis sets may contain element-specific settings, which typically are tackled separately. + The option consists of adding also general settings to the hash. + + There are two modes determining whether sections are defined by the `quantities` provided (`inclusion`) + or rather `quantities` are explicitely removed (`exclusion`). + + basis_settings: sections to be hashed together + subsections: list of bools, indicating whether to include susbections. Must be of same length as basis_settings. + mode: str, either `inclusion` or `exclusion` + quantities: list of str, quantities to be included or excluded + ''' + mode: str = kwargs.get('mode', 'exclusion') + quantities: list[str] = kwargs.get('quantities', []) + # sanity checks + try: + evaluation_settings = zip(basis_settings, subsections) + except Exception: # TODO: specify exception + raise ValueError( + f'''basis_settings:{basis_settings} and subsections:{subsections} + must be of same length.''' + ) + # filter out subsections + to_compare: list[dict[str, any]] = [] + for section, subsection_bool in evaluation_settings: + section_dict = section.m_to_dict() + to_write = {} + for key, val in section_dict.items(): + if not subsection_bool and\ + isinstance(getattr(section, key), (MSection, MSubSectionList)): + continue + if key == 'm_def' or\ + (mode == 'exclusion' and key not in quantities) or\ + (mode == 'inclusion' and key in quantities): + to_write[key] = val + to_compare.append(to_write) + # hash the filtered sections + hash = hashlib.sha1() + hash.update(json.dumps(to_compare, sort_keys=True).encode('utf-8')) + return hash.hexdigest() + + class BeyondDFTWorkflowsParser: ''' Generates automatic beyondDFT (GW, BSE, DMFT) workflows. Main classes for parsers will From 8e7931e5b7c55ebfc5bb4c109b9c6303c3c082e0 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Tue, 18 Jul 2023 14:21:36 +0200 Subject: [PATCH 6/8] Change dependency from `hashlib` to `nomad.utils` --- electronicparsers/utils/utils.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/electronicparsers/utils/utils.py b/electronicparsers/utils/utils.py index 64201d7e..b50d438f 100644 --- a/electronicparsers/utils/utils.py +++ b/electronicparsers/utils/utils.py @@ -18,7 +18,6 @@ # import json -import hashlib import os from glob import glob @@ -32,6 +31,8 @@ ParticleHoleExcitationsMethod, ParticleHoleExcitationsResults, PhotonPolarization, PhotonPolarizationMethod, PhotonPolarizationResults ) +from nomad.utils import hash +from typing import Union def extract_section(source: EntryArchive, path: str): @@ -88,7 +89,11 @@ def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = Tru return filenames -def get_basis_hash(basis_settings: list[MSection], subsections: list[bool], **kwargs): +def hash_section( + sections: Union(MSection, list[MSection]), + subsections: Union(bool, list[bool]), + **kwargs, + ) -> str: ''' General function for converting basis set sections to a hash for comparison. Basis sets may contain element-specific settings, which typically are tackled separately. @@ -97,19 +102,21 @@ def get_basis_hash(basis_settings: list[MSection], subsections: list[bool], **kw There are two modes determining whether sections are defined by the `quantities` provided (`inclusion`) or rather `quantities` are explicitely removed (`exclusion`). - basis_settings: sections to be hashed together - subsections: list of bools, indicating whether to include susbections. Must be of same length as basis_settings. - mode: str, either `inclusion` or `exclusion` - quantities: list of str, quantities to be included or excluded + `sections`: sections to be hashed together + `subsections`: list of bools, indicating whether to include susbections. Must be of same length as basis_settings. + `mode`: str, either `inclusion` or `exclusion` + `quantities`: list of str, quantities to be included or excluded ''' + sections = [sections] if isinstance(sections, MSection) else sections + subsections = [subsections] if isinstance(subsections, bool) else subsections mode: str = kwargs.get('mode', 'exclusion') quantities: list[str] = kwargs.get('quantities', []) # sanity checks try: - evaluation_settings = zip(basis_settings, subsections) + evaluation_settings = zip(sections, subsections) except Exception: # TODO: specify exception raise ValueError( - f'''basis_settings:{basis_settings} and subsections:{subsections} + f'''basis_settings:{sections} and subsections:{subsections} must be of same length.''' ) # filter out subsections @@ -127,9 +134,7 @@ def get_basis_hash(basis_settings: list[MSection], subsections: list[bool], **kw to_write[key] = val to_compare.append(to_write) # hash the filtered sections - hash = hashlib.sha1() - hash.update(json.dumps(to_compare, sort_keys=True).encode('utf-8')) - return hash.hexdigest() + return hash(*to_compare) class BeyondDFTWorkflowsParser: From 4531b477054efdb3713b63ae032a01b84ec5faf2 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Tue, 18 Jul 2023 19:38:02 +0200 Subject: [PATCH 7/8] Clean Gaussian basis sets --- .../fhiaims/metainfo/fhi_aims.py | 107 +++++------------- electronicparsers/fhiaims/parser.py | 66 +++++++---- electronicparsers/utils/__init__.py | 2 +- electronicparsers/utils/utils.py | 16 +-- 4 files changed, 76 insertions(+), 115 deletions(-) diff --git a/electronicparsers/fhiaims/metainfo/fhi_aims.py b/electronicparsers/fhiaims/metainfo/fhi_aims.py index f2f6e0e5..3749136a 100644 --- a/electronicparsers/fhiaims/metainfo/fhi_aims.py +++ b/electronicparsers/fhiaims/metainfo/fhi_aims.py @@ -88,6 +88,30 @@ class x_fhi_aims_section_controlIn_basis_func(MSection): - ''') + x_fhi_aims_controlIn_basis_func_gauss_l = Quantity( + type=np.dtype(np.int32), + shape=[], + description=''' + "L is an integer number, specifying the angular momentum" + - Manual FHI-aims v201716_2 + ''') + + x_fhi_aims_controlIn_basis_func_gauss_alphas = Quantity( + type=np.dtype(np.float64), + shape=['*'], + unit='1 / meter ** 2', + description=''' + "The exponent defining a (primitive) Gaussian function" + - Manual FHI-aims v201716_2 + ''') + + x_fhi_aims_controlIn_basis_func_gauss_coeffs = Quantity( + type=np.dtype(np.float64), + shape=['*'], + description=''' + Weights in linearly composed Gaussian functions. + ''') + x_fhi_aims_controlIn_basis_func_type = Quantity( type=str, shape=[], @@ -145,20 +169,6 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): angular leven for the hartreee part ''') - x_fhi_aims_controlIn_mass = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - mass of the nucleus in atomic mass units - ''') - - x_fhi_aims_controlIn_nucleus = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - charge of the nucleus - ''') - x_fhi_aims_controlIn_outer_grid = Quantity( type=np.dtype(np.float64), shape=[], @@ -180,14 +190,14 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): radial multiplier ''') - x_fhi_aims_controlIn_species_name = Quantity( + x_fhi_aims_controlIn_hash = Quantity( type=str, shape=[], description=''' - ''') - x_fhi_aims_controlIn_hash = Quantity( + x_fhi_aims_controlIn_species_name = Quantity( type=str, shape=[], description=''' @@ -213,49 +223,6 @@ class x_fhi_aims_section_controlInOut_basis_func(MSection): - ''') - x_fhi_aims_controlInOut_basis_func_gauss_alpha = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='1 / meter ** 2', - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_l = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_N = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_weight = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_l = Quantity( - type=str, - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_n = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - x_fhi_aims_controlInOut_basis_func_occ = Quantity( type=np.dtype(np.float64), shape=[], @@ -263,28 +230,6 @@ class x_fhi_aims_section_controlInOut_basis_func(MSection): - ''') - x_fhi_aims_controlInOut_basis_func_primitive_gauss_alpha = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='1 / meter ** 2', - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_radius = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_type = Quantity( - type=str, - shape=[], - description=''' - - - ''') - class x_fhi_aims_section_eigenvalues_group_ZORA(MSection): ''' diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 1258330e..ba4947d2 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -53,7 +53,7 @@ x_fhi_aims_section_controlIn_basis_set, x_fhi_aims_section_controlIn_basis_func,\ x_fhi_aims_section_vdW_TS -from ..utils import BeyondDFTWorkflowsParser, get_basis_hash +from ..utils import BeyondDFTWorkflowsParser, hash_section re_float = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' @@ -76,21 +76,26 @@ def str_to_unit(val_in): def init_quantities(self): def str_to_species(val_in): - val = val_in.strip().splitlines() - species = dict() - for v in val: - v = v.strip().split('#')[0] - if not v or not v[0].isalpha(): + lines = [] + line = '' + val_in = val_in.strip().splitlines() + val_in.reverse() + for v in val_in: + line = v.strip().split('#')[0].replace('.d', '.e') + ' '+ line + if not line: continue - if v.startswith('species'): - species = dict(species=v.split()[1:]) + if line[0].isalpha(): + lines = [line.split()] + lines + if line.startswith('species'): + break + line = '' + species = {} + for line in lines: + content = [line[1]] if len(line) == 2 else [line[1:]] + if line[0] in species: + species[line[0]].extend(content) else: - v = v.replace('.d', '.e').split() - vi = v[1] if len(v[1:]) == 1 else v[1:] - if v[0] in species: - species[v[0]].extend([vi]) - else: - species[v[0]] = [vi] + species[line[0]] = content return species self._quantities = [ @@ -146,12 +151,12 @@ def str_to_species(val_in): xsection_method.x_fhi_aims_controlIn_verbatim_writeout, rf'{re_n} *verbatim_writeout\s*([\w]+)', repeats=False), Quantity( - 'xc', - rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), + 'xc', rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), Quantity( 'species', rf'{re_n} *(species\s+[A-Z][a-z]?[\s\S]+?)' - r'(FHI-aims code project|\-{10})', - str_operation=str_to_species, repeats=True)] + r'FHI-aims code project|\-{10}', + str_operation=str_to_species, repeats=True,), + ] class FHIAimsOutParser(TextParser): @@ -1542,20 +1547,31 @@ def parse_basis_set(species): elif key == 'division': pass elif key in basis_funcs: - for i in range(len(val)): + for v in val: sec_basis_func = sec_basis_set.m_create( x_fhi_aims_section_controlIn_basis_func) sec_basis_func.x_fhi_aims_controlIn_basis_func_type = key - sec_basis_func.x_fhi_aims_controlIn_basis_func_n = int(val[i][0]) - sec_basis_func.x_fhi_aims_controlIn_basis_func_l = str(val[i][1]) - if len(val[i]) == 3 and hasattr(val[i][2], 'real'): - sec_basis_func.x_fhi_aims_controlIn_basis_func_radius = val[i][2] + if key == 'gaussian': + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_l = int(v[0]) + gauss_alphas, gauss_coeffs = [], [] + for gaussian_index, gaussian_extra in enumerate(v[2:]): + if gaussian_index % 2: + gauss_alphas.append(float(gaussian_extra)) + else: + gauss_coeffs.append(float(gaussian_extra)) + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_alphas = np.array(gauss_alphas) / ureg.bohr ** 2 + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_coeffs = gauss_coeffs + else: + sec_basis_func.x_fhi_aims_controlIn_basis_func_n = int(v[0]) + sec_basis_func.x_fhi_aims_controlIn_basis_func_l = str(v[1]) + if len(v) == 3 and hasattr(v[2], 'real'): + sec_basis_func.x_fhi_aims_controlIn_basis_func_radius = v[2] elif key in ['cut_pot', 'radial_base']: setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, np.array( val[0], dtype=float)) else: try: - setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, val[0]) + setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, v[0]) except Exception: self.logger.warning('Error setting controlIn metainfo.', details={key: key}) @@ -1566,7 +1582,7 @@ def parse_basis_set(species): sec_basis_set.x_fhi_aims_controlIn_division = division # store hash - sec_basis_set.x_fhi_aims_controlIn_hash = get_basis_hash([sec_basis_set], [True]) + sec_basis_set.x_fhi_aims_controlIn_hash = hash_section([sec_basis_set], [True]) def _get_elemental_tier( basis_settings: x_fhi_aims_section_controlIn_basis_set, diff --git a/electronicparsers/utils/__init__.py b/electronicparsers/utils/__init__.py index e61060f6..ee10a158 100644 --- a/electronicparsers/utils/__init__.py +++ b/electronicparsers/utils/__init__.py @@ -17,5 +17,5 @@ # limitations under the License. from .utils import ( - extract_section, get_files, BeyondDFTWorkflowsParser, get_basis_hash + extract_section, get_files, BeyondDFTWorkflowsParser, hash_section ) diff --git a/electronicparsers/utils/utils.py b/electronicparsers/utils/utils.py index b50d438f..88a73b0f 100644 --- a/electronicparsers/utils/utils.py +++ b/electronicparsers/utils/utils.py @@ -90,8 +90,8 @@ def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = Tru def hash_section( - sections: Union(MSection, list[MSection]), - subsections: Union(bool, list[bool]), + sections: Union[MSection, list[MSection]], + subsections: Union[bool, list[bool]], **kwargs, ) -> str: ''' @@ -100,16 +100,16 @@ def hash_section( The option consists of adding also general settings to the hash. There are two modes determining whether sections are defined by the `quantities` provided (`inclusion`) - or rather `quantities` are explicitely removed (`exclusion`). + or rather `quantities` are explicitly removed (`exclusion`). `sections`: sections to be hashed together - `subsections`: list of bools, indicating whether to include susbections. Must be of same length as basis_settings. - `mode`: str, either `inclusion` or `exclusion` + `subsections`: list of bools, indicating whether to include subsections. Must be of same length as basis_settings. + `mode`: str, either `include` or `exclude` (default) `quantities`: list of str, quantities to be included or excluded ''' sections = [sections] if isinstance(sections, MSection) else sections subsections = [subsections] if isinstance(subsections, bool) else subsections - mode: str = kwargs.get('mode', 'exclusion') + mode: str = kwargs.get('mode', 'exclude') quantities: list[str] = kwargs.get('quantities', []) # sanity checks try: @@ -129,8 +129,8 @@ def hash_section( isinstance(getattr(section, key), (MSection, MSubSectionList)): continue if key == 'm_def' or\ - (mode == 'exclusion' and key not in quantities) or\ - (mode == 'inclusion' and key in quantities): + (mode == 'exclude' and key not in quantities) or\ + (mode == 'include' and key in quantities): to_write[key] = val to_compare.append(to_write) # hash the filtered sections From 615dd2dc9733e7f130a8aedbd966e2175ecfbd20 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Tue, 18 Jul 2023 19:59:33 +0200 Subject: [PATCH 8/8] Fix Gaussian parsing --- electronicparsers/fhiaims/parser.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index ba4947d2..9642a276 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -78,7 +78,7 @@ def init_quantities(self): def str_to_species(val_in): lines = [] line = '' - val_in = val_in.strip().splitlines() + val_in = val_in.strip().splitlines()[:-1] val_in.reverse() for v in val_in: line = v.strip().split('#')[0].replace('.d', '.e') + ' '+ line @@ -154,7 +154,7 @@ def str_to_species(val_in): 'xc', rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), Quantity( 'species', rf'{re_n} *(species\s+[A-Z][a-z]?[\s\S]+?)' - r'FHI-aims code project|\-{10}', + r'(FHI-aims code project|\-{10})', str_operation=str_to_species, repeats=True,), ] @@ -1556,11 +1556,12 @@ def parse_basis_set(species): gauss_alphas, gauss_coeffs = [], [] for gaussian_index, gaussian_extra in enumerate(v[2:]): if gaussian_index % 2: - gauss_alphas.append(float(gaussian_extra)) - else: gauss_coeffs.append(float(gaussian_extra)) + else: + gauss_alphas.append(float(gaussian_extra)) sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_alphas = np.array(gauss_alphas) / ureg.bohr ** 2 - sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_coeffs = gauss_coeffs + if gauss_coeffs: + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_coeffs = gauss_coeffs else: sec_basis_func.x_fhi_aims_controlIn_basis_func_n = int(v[0]) sec_basis_func.x_fhi_aims_controlIn_basis_func_l = str(v[1])