diff --git a/simulationdataschema/general.py b/simulationdataschema/general.py index c0d062e3..0c7e1a56 100644 --- a/simulationdataschema/general.py +++ b/simulationdataschema/general.py @@ -58,26 +58,28 @@ class Computation(BaseComputation, EntryData): - ''' - ''' + """ """ + # m_def = Section(extends_base_section=True) model_system = SubSection(sub_section=ModelSystem.m_def, repeats=True) # method = SubSection( - # sub_section=Method.m_def, - # description=''' - # The input methodological parameters used for the computation. - # ''', - # repeats=True, + # sub_section=Method.m_def, + # description=''' + # The input methodological parameters used for the computation. + # ''', + # repeats=True, # ) # calculation = SubSection( - # sub_section=Calculation.m_def, - # description=''' - # The output of a computation. It can reference a specific system and method section. - # ''', - # repeats=True, + # sub_section=Calculation.m_def, + # description=''' + # The output of a computation. It can reference a specific system and method section. + # ''', + # repeats=True, # ) - def _set_system_tree_index(self, system_parent: ModelSystem, tree_index: np.int32 = 0): + def _set_system_tree_index( + self, system_parent: ModelSystem, tree_index: np.int32 = 0 + ): for system_child in system_parent.model_system: system_child.tree_index = tree_index + 1 self._set_system_tree_index(system_child, tree_index + 1) @@ -89,7 +91,7 @@ def normalize(self, archive: EntryArchive, logger) -> None: # define it as the last system reported (CHECK THIS!). # TODO extend adding the proper representative system extraction using `normalizer.py` if len(self.model_system) == 0: - logger.error('No system information reported.') + logger.error("No system information reported.") return system_ref = self.model_system[-1] system_ref.is_representative = True diff --git a/simulationdataschema/system.py b/simulationdataschema/system.py index c78058e3..4ffb6d83 100644 --- a/simulationdataschema/system.py +++ b/simulationdataschema/system.py @@ -39,7 +39,15 @@ import ase from matid import SymmetryAnalyzer, Classifier # pylint: disable=import-error -from matid.classifications import Class0D, Atom, Class1D, Class2D, Material2D, Surface, Class3D # pylint: disable=import-error +from matid.classifications import ( + Class0D, + Atom, + Class1D, + Class2D, + Material2D, + Surface, + Class3D, +) # pylint: disable=import-error from nomad import config from nomad.units import ureg @@ -53,141 +61,147 @@ class AtomicCell(RealSpace): - ''' + """ A base section used to specify the atomic cell quantities (labels, positions) of a system at a given moment in time. - ''' + """ name = Quantity( - type=MEnum('original', 'primitive', 'standard'), - description=''' + type=MEnum("original", "primitive", "standard"), + description=""" Name to identify the cell structure. It might be: - 'original' as in orignally parsed, - 'primitive' as the primitive unit cell, - 'standard' as the standarized cell used for referencing. - ''', + """, ) n_atoms = Quantity( type=np.int32, - description=''' + description=""" The total number of atoms in the system. - ''', + """, ) labels = Quantity( type=str, - shape=['n_atoms'], - description=''' + shape=["n_atoms"], + description=""" List containing the labels of the atomic species in the system at the different positions of the structure. It refers to a chemical element as defined in the periodic table, e.g., 'H', 'O', 'Pt'. This quantity is equivalent to `atomic_numbers`. - ''', + """, ) atomic_numbers = Quantity( type=np.int32, - shape=['n_atoms'], - description=''' + shape=["n_atoms"], + description=""" List of atomic numbers Z. This quantity is equivalent to `labels`. - ''', + """, ) positions = Quantity( type=np.float64, - shape=['n_atoms', 3], - unit='meter', - description=''' + shape=["n_atoms", 3], + unit="meter", + description=""" Positions of all the atoms in Cartesian coordinates. - ''', + """, ) lattice_vectors = Quantity( type=np.float64, shape=[3, 3], - unit='meter', - description=''' + unit="meter", + description=""" Lattice vectors of the simulated cell in Cartesian coordinates. The first index runs over each lattice vector. The second index runs over the $x, y, z$ Cartesian coordinates. - ''', + """, ) lattice_vectors_reciprocal = Quantity( type=np.float64, shape=[3, 3], - unit='1/meter', - description=''' + unit="1/meter", + description=""" Reciprocal lattice vectors of the simulated cell, in Cartesian coordinates and including the $2 pi$ pre-factor. The first index runs over each lattice vector. The second index runs over the $x, y, z$ Cartesian coordinates. - ''', + """, ) periodic_boundary_conditions = Quantity( type=bool, shape=[3], - description=''' + description=""" If periodic boundary conditions are applied to each direction of the crystal axes. - ''', + """, ) velocities = Quantity( type=np.float64, - shape=['n_atoms', 3], - unit='meter / second', - description=''' + shape=["n_atoms", 3], + unit="meter / second", + description=""" Velocities of the atoms. It is the change in cartesian coordinates of the atom position with time. - ''', + """, ) supercell_matrix = Quantity( type=np.int32, shape=[3, 3], - description=''' + description=""" Specifies the matrix that transforms the primitive unit cell into the supercell in which the actual calculation is performed. In the easiest example, it is a diagonal matrix whose elements multiply the `lattice_vectors`, e.g., [[3, 0, 0], [0, 3, 0], [0, 0, 3]] is a $3 x 3 x 3$ superlattice. - ''', + """, ) equivalent_atoms = Quantity( type=np.int32, - shape=['n_atoms'], - description=''' + shape=["n_atoms"], + description=""" List of equivalent atoms as defined in `labels`. If no equivalent atoms are found, then the list is simply the index of each element, e.g.: - [0, 1, 2, 3] all four atoms are non-equivalent. - [0, 0, 0, 3] three equivalent atoms and one non-equivalent. - ''', + """, ) wyckoff_letters = Quantity( type=str, - shape=['n_atoms'], + shape=["n_atoms"], # TODO improve description - description=''' + description=""" Wyckoff letters associated with each atom position. - ''', + """, ) def normalize(self, archive: EntryArchive, logger) -> None: # Check if AtomicCell section exists if self is None: - logger.error('Could not find the basic System.atomic_cell information.') + logger.error("Could not find the basic System.atomic_cell information.") # Resolving atom_labels (either directly or from atomic_numbers) atom_labels = self.labels atomic_numbers = self.atomic_numbers if atom_labels is None: if atomic_numbers is None: - logger.error('System.atomic_cell has neither labels nor atomic_numbers defined.') + logger.error( + "System.atomic_cell has neither labels nor atomic_numbers defined." + ) return try: - atom_labels = [ase.data.chemical_symbols[number] for number in atomic_numbers] + atom_labels = [ + ase.data.chemical_symbols[number] for number in atomic_numbers + ] except IndexError: - logger.error('System.atomic_cell has atomic_numbers that are out of range of the periodic table.') + logger.error( + "System.atomic_cell has atomic_numbers that are out of range of the periodic table." + ) return self.labels = atom_labels self.n_atoms = len(atom_labels) @@ -196,14 +210,18 @@ def normalize(self, archive: EntryArchive, logger) -> None: ase_atoms = ase.Atoms(symbols=atom_labels) chemical_symbols = ase_atoms.get_chemical_symbols() if atom_labels != list(chemical_symbols): - logger.warning('Chemical symbols in System.atomic_cell.labels are ambigous and cannot be ' - 'recognized by ASE.') + logger.warning( + "Chemical symbols in System.atomic_cell.labels are ambigous and cannot be " + "recognized by ASE." + ) if atomic_numbers is None: atomic_numbers = ase_atoms.get_atomic_numbers() else: if atomic_numbers != list(ase_atoms.get_atomic_numbers()): - logger.info('The parsed System.atomic_cell.atomic_numbers do not coincide with ' - 'the ASE extracted numbers from the labels. We will rewrite the parsed data.') + logger.info( + "The parsed System.atomic_cell.atomic_numbers do not coincide with " + "the ASE extracted numbers from the labels. We will rewrite the parsed data." + ) atomic_numbers = ase_atoms.get_atomic_numbers() self.atomic_numbers = atomic_numbers @@ -211,55 +229,61 @@ def normalize(self, archive: EntryArchive, logger) -> None: pbc = self.periodic_boundary_conditions if pbc is None: pbc = [False, False, False] - logger.info('Could not find System.atomic_cell.periodic_boundary_conditions information. ' - 'Setting them to False.') + logger.info( + "Could not find System.atomic_cell.periodic_boundary_conditions information. " + "Setting them to False." + ) self.periodic_boundary_conditions = pbc ase_atoms.set_pbc(pbc) # Atom positions atom_positions = self.positions if atom_positions is None or len(atom_positions) == 0: - logger.error('Could not find System.atomic_cell.positions.') + logger.error("Could not find System.atomic_cell.positions.") return if len(atom_positions) != len(atom_labels): - logger.error('Length of System.atomic_cell.positions does not coincide with the length ' - 'of the System.atomic_cell.labels.') + logger.error( + "Length of System.atomic_cell.positions does not coincide with the length " + "of the System.atomic_cell.labels." + ) return - ase_atoms.set_positions(atom_positions.to('angstrom').magnitude) + ase_atoms.set_positions(atom_positions.to("angstrom").magnitude) # Lattice vectors and reciprocal lattice vectors lattice_vectors = self.lattice_vectors if lattice_vectors is None: - logger.info('Could not find System.atomic_cell.lattice_vectors.') + logger.info("Could not find System.atomic_cell.lattice_vectors.") else: - ase_atoms.set_cell(lattice_vectors.to('angstrom').magnitude) + ase_atoms.set_cell(lattice_vectors.to("angstrom").magnitude) lattice_vectors_reciprocal = self.lattice_vectors_reciprocal if lattice_vectors_reciprocal is None: - self.lattice_vectors_reciprocal = 2 * np.pi * ase_atoms.get_reciprocal_cell() / ureg.angstrom + self.lattice_vectors_reciprocal = ( + 2 * np.pi * ase_atoms.get_reciprocal_cell() / ureg.angstrom + ) # Store temporarily the ase.Atoms object to use in the ModelSystem.normalizer() - self.m_cache['ase_atoms'] = ase_atoms + self.m_cache["ase_atoms"] = ase_atoms def to_ase_atoms(self, nomad_atomic_cell) -> ase.Atoms: - ''' + """ Generates a ASE Atoms object with the most basic information from the parsed AtomicCell section (labels, positions, and lattice_vectors). - ''' + """ ase_atoms = ase.Atoms(symbols=nomad_atomic_cell.labels) - ase_atoms.set_positions(nomad_atomic_cell.positions.to('angstrom').magnitude) - ase_atoms.set_cell(nomad_atomic_cell.lattice_vectors.to('angstrom').magnitude) + ase_atoms.set_positions(nomad_atomic_cell.positions.to("angstrom").magnitude) + ase_atoms.set_cell(nomad_atomic_cell.lattice_vectors.to("angstrom").magnitude) return ase_atoms class Symmetry(ArchiveSection): - ''' + """ A base section used to specify the symmetry of the AtomicCell. This information can be extracted via normalization using the MatID package, if `AtomicCell` is specified. - ''' + """ bravais_lattice = Quantity( type=str, - description=''' + description=""" Bravais lattice in Pearson notation. The first lowercase letter identifies the @@ -268,59 +292,59 @@ class Symmetry(ArchiveSection): The second uppercase letter identifies the centring: P (primitive), S (face centered), I (body centred), R (rhombohedral centring), F (all faces centred). - ''', + """, ) hall_symbol = Quantity( type=str, - description=''' + description=""" Hall symbol for this system describing the minimum number of symmetry operations, in the form of Seitz matrices, needed to uniquely define a space group. See https://cci.lbl.gov/sginfo/hall_symbols.html. Examples: - `F -4 2 3`, - `-P 4 2`, - `-F 4 2 3`. - ''', + """, ) point_group_symbol = Quantity( type=str, - description=''' + description=""" Symbol of the crystallographic point group in the Hermann-Mauguin notation. See https://en.wikipedia.org/wiki/Crystallographic_point_group. Examples: - `-43m`, - `4/mmm`, - `m-3m`. - ''', + """, ) space_group_number = Quantity( type=np.int32, - description=''' + description=""" Specifies the International Union of Crystallography (IUC) space group number of the 3D space group of this system. See https://en.wikipedia.org/wiki/List_of_space_groups. Examples: - `216`, - `123`, - `225`. - ''', + """, ) space_group_symbol = Quantity( type=str, - description=''' + description=""" Specifies the International Union of Crystallography (IUC) space group symbol of the 3D space group of this system. See https://en.wikipedia.org/wiki/List_of_space_groups. Examples: - `F-43m`, - `P4/mmm`, - `Fm-3m`. - ''', + """, ) strukturbericht_designation = Quantity( type=str, - description=''' + description=""" Classification of the material according to the historically grown and similar crystal structures ('strukturbericht'). Useful when using altogether with `space_group_symbol`. Examples: @@ -329,38 +353,38 @@ class Symmetry(ArchiveSection): - `L21`. Extracted from the AFLOW encyclopedia of crystallographic prototypes. - ''', + """, ) prototype_formula = Quantity( type=str, - description=''' + description=""" The formula of the prototypical material for this structure as extracted from the AFLOW encyclopedia of crystallographic prototypes. It is a string with the chemical symbols: - https://aflowlib.org/prototype-encyclopedia/chemical_symbols.html - ''', + """, ) prototype_aflow_id = Quantity( type=str, - description=''' + description=""" The identifier of this structure in the AFLOW encyclopedia of crystallographic prototypes: http://www.aflowlib.org/prototype-encyclopedia/index.html - ''', + """, ) atomic_cell_ref = Quantity( type=AtomicCell, - description=''' + description=""" Reference to the AtomicCell section that the symmetry refers to. - ''', - a_eln=ELNAnnotation(component='ReferenceEditQuantity'), + """, + a_eln=ELNAnnotation(component="ReferenceEditQuantity"), ) class ModelSystem(System): - ''' + """ Model system used as an input for the computation. It inherits from `System` where a set of sub-sections for the `elemental_composition` is defined. We also define `name` to refer to all the verbose and user-dependent naming in ModelSystem. @@ -398,29 +422,38 @@ class ModelSystem(System): (for Si/(GaAs-CO2)), 2 child ModelSystems (for Si and GaAs-CO2), and 2 additional children in one of the childs (for GaAs and CO2). The number of AtomicCell and Symmetry sections can be inferred using a combination of example 2 and 3. - ''' + """ name = Quantity( type=str, - description=''' + description=""" Any verbose naming refering to the ModelSystem. Can be left empty if it is a simple crystal or it can be filled up. For example, an heterostructure of graphene (G) sandwiched in between hexagonal boron nitrides (hBN) slabs could be named 'hBN/G/hBN'. - ''', - a_eln=ELNAnnotation(component='StringEditQuantity'), + """, + a_eln=ELNAnnotation(component="StringEditQuantity"), ) # TODO work on improving and extending this quantity and the description type = Quantity( - type=MEnum('atom', 'molecule / cluster', 'bulk', 'surface', '2D', '1D', 'active_atom', 'unavailable'), - description=''' + type=MEnum( + "atom", + "molecule / cluster", + "bulk", + "surface", + "2D", + "1D", + "active_atom", + "unavailable", + ), + description=""" Type of the system (atom, bulk, surface, etc.) which is determined by the normalizer. - ''', + """, ) dimensionality = Quantity( - type=MEnum('0D', '1D', '2D', '3D', 'unavailable'), - description=''' + type=MEnum("0D", "1D", "2D", "3D", "unavailable"), + description=""" Dimensionality of the system. For atomistic systems this is automatically evaluated by using the topology-scaling algorithm: @@ -432,38 +465,38 @@ class ModelSystem(System): | `'1D'` | Periodi in one dimension | | `'2D'` | Periodic in two dimensions | | `'3D'` | Periodic in three dimensions | - ''', + """, ) time_step = Quantity( type=np.int32, - description=''' + description=""" Specific time snapshot of the ModelSystem. The time evolution is then encoded in a list of ModelSystems under Computation where for each element this quantity defines the time step. - ''', + """, ) chemical_formula_descriptive = Quantity( type=str, - description=''' + description=""" The chemical formula of the system as a string to be descriptive of the computation. It is derived from `elemental_composition` if not specified, with non-reduced integer numbers for the proportions of the elements. - ''', + """, ) chemical_formula_reduced = Quantity( type=str, - description=''' + description=""" Alphabetically sorted chemical formula with reduced integer chemical proportion numbers. The proportion number is omitted if it is 1. - ''', + """, ) chemical_formula_iupac = Quantity( type=str, - description=''' + description=""" Chemical formula where the elements are ordered using a formal list based on electronegativity as defined in the IUPAC nomenclature of inorganic chemistry (2005): @@ -471,20 +504,20 @@ class ModelSystem(System): Contains reduced integer chemical proportion numbers where the proportion number is omitted if it is 1. - ''', + """, ) chemical_formula_hill = Quantity( type=str, - description=''' + description=""" Chemical formula where Carbon is placed first, then Hydrogen, and then all the other elements in alphabetical order. If Carbon is not present, the order is alphabetical. - ''', + """, ) chemical_formula_anonymous = Quantity( type=str, - description=''' + description=""" Formula with the elements ordered by their reduced integer chemical proportion number, and the chemical species replaced by alphabetically ordered letters. The proportion number is omitted if it is 1. @@ -492,7 +525,7 @@ class ModelSystem(System): Examples: H2O becomes A2B and H2O2 becomes AB. The letters are drawn from the English alphabet that may be extended by increasing the number of letters: A, B, ..., Z, Aa, Ab and so on. This definition is in line with the similarly named OPTIMADE definition. - ''', + """, ) atomic_cell = SubSection(sub_section=AtomicCell.m_def, repeats=True) @@ -502,122 +535,143 @@ class ModelSystem(System): is_representative = Quantity( type=bool, default=False, - description=''' + description=""" If the model system section is the one representative of the computational simulation. Defaults to False and set to True by the `Computation.normalize()`. If set to True, the `ModelSystem.normalize()` function is ran (otherwise, it is not). - ''', + """, ) # TODO what about `branch_label`? tree_label = Quantity( type=str, shape=[], - description=''' + description=""" Label of the specific branch in the system tree. - ''') + """, + ) # TODO what about `branch_index`? tree_index = Quantity( type=np.int32, - description=''' + description=""" Index refering to the depth of a branch in the system tree. - ''', + """, ) atom_indices = Quantity( type=np.int32, - shape=['*'], - description=''' + shape=["*"], + description=""" Indices of the atoms in the child with respect to its parent. Example: - We have SrTiO3, where `AtomicCell.labels = ['Sr', 'Ti', 'O', 'O', 'O']`. If we create a `model_system` child for the `'Ti'` atom only, then in that child `ModelSystem.model_system.atom_indices = [1]`. If now we want to refer both to the `'Ti'` and the last `'O'` atoms, `ModelSystem.model_system.atom_indices = [1, 4]`. - ''', + """, ) bond_list = Quantity( type=np.int32, # TODO improve description and add an example using the case in atom_indices - description=''' + description=""" List of pairs of atom indices corresponding to bonds (e.g., as defined by a force field) within this atoms_group. - ''', + """, ) - model_system = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True) + model_system = SubSection(sub_section=SectionProxy("ModelSystem"), repeats=True) def _resolve_system_type_and_dimensionality(self, ase_atoms: ase.Atoms) -> str: - ''' + """ Determine the ModelSystem.type and ModelSystem.dimensionality using MatID classification analyzer: - https://singroup.github.io/matid/tutorials/classification.html Args: ase.Atoms: The ASE Atoms structure to analyse - ''' + """ classification = None system_type, dimensionality = self.type, self.dimensionality - if len(ase_atoms) <= config.normalize.system_classification_with_clusters_threshold: + if ( + len(ase_atoms) + <= config.normalize.system_classification_with_clusters_threshold + ): try: - classifier = Classifier(radii="covalent", cluster_threshold=config.normalize.cluster_threshold) + classifier = Classifier( + radii="covalent", + cluster_threshold=config.normalize.cluster_threshold, + ) cls = classifier.classify(ase_atoms) except Exception as e: - self.logger.warning('MatID system classification failed.', exc_info=e, error=str(e)) + self.logger.warning( + "MatID system classification failed.", exc_info=e, error=str(e) + ) return system_type, dimensionality classification = type(cls) if classification == Class3D: - system_type = 'bulk' - dimensionality = '3D' + system_type = "bulk" + dimensionality = "3D" elif classification == Atom: - system_type = 'atom' - dimensionality = '3D' + system_type = "atom" + dimensionality = "3D" elif classification == Class0D: - system_type = 'molecule / cluster' - dimensionality = '0D' + system_type = "molecule / cluster" + dimensionality = "0D" elif classification == Class1D: - system_type = '1D' - dimensionality = '1D' + system_type = "1D" + dimensionality = "1D" elif classification == Surface: - system_type = 'surface' - dimensionality = '2D' + system_type = "surface" + dimensionality = "2D" elif classification == Material2D or classification == Class2D: - system_type = '2D' - dimensionality = '2D' + system_type = "2D" + dimensionality = "2D" else: - self.logger.info("ModelSystem.type and dimensionality analysis not run due to large system size.") + self.logger.info( + "ModelSystem.type and dimensionality analysis not run due to large system size." + ) return system_type, dimensionality def _resolve_bulk_symmetry(self, ase_atoms: ase.Atoms) -> None: - ''' + """ Analyze the symmetry of the material being simulated using MatID and the parsed data stored under ModelSystem and AtomicCell. Only available for bulk materials. Args: ase.Atoms: The ASE Atoms structure to analyse - ''' + """ symmetry = {} try: - symmetry_analyzer = SymmetryAnalyzer(ase_atoms, symmetry_tol=config.normalize.symmetry_tolerance) - - symmetry['bravais_lattice'] = symmetry_analyzer.get_bravais_lattice() - symmetry['hall_symbol'] = symmetry_analyzer.get_hall_symbol() - symmetry['point_group_symbol'] = symmetry_analyzer.get_point_group() - symmetry['space_group_number'] = symmetry_analyzer.get_space_group_number() - symmetry['space_group_symbol'] = symmetry_analyzer.get_space_group_international_short() - symmetry['origin_shift'] = symmetry_analyzer._get_spglib_origin_shift() - symmetry['transformation_matrix'] = symmetry_analyzer._get_spglib_transformation_matrix() + symmetry_analyzer = SymmetryAnalyzer( + ase_atoms, symmetry_tol=config.normalize.symmetry_tolerance + ) + + symmetry["bravais_lattice"] = symmetry_analyzer.get_bravais_lattice() + symmetry["hall_symbol"] = symmetry_analyzer.get_hall_symbol() + symmetry["point_group_symbol"] = symmetry_analyzer.get_point_group() + symmetry["space_group_number"] = symmetry_analyzer.get_space_group_number() + symmetry[ + "space_group_symbol" + ] = symmetry_analyzer.get_space_group_international_short() + symmetry["origin_shift"] = symmetry_analyzer._get_spglib_origin_shift() + symmetry[ + "transformation_matrix" + ] = symmetry_analyzer._get_spglib_transformation_matrix() # Originally parsed cell original_wyckoff = symmetry_analyzer.get_wyckoff_letters_original() - original_equivalent_atoms = symmetry_analyzer.get_equivalent_atoms_original() + original_equivalent_atoms = ( + symmetry_analyzer.get_equivalent_atoms_original() + ) # Primitive cell primitive_wyckoff = symmetry_analyzer.get_wyckoff_letters_primitive() - primitive_equivalent_atoms = symmetry_analyzer.get_equivalent_atoms_primitive() + primitive_equivalent_atoms = ( + symmetry_analyzer.get_equivalent_atoms_primitive() + ) primitive_sys = symmetry_analyzer.get_primitive_system() primitive_pos = primitive_sys.get_scaled_positions() primitive_cell = primitive_sys.get_cell() @@ -626,17 +680,21 @@ def _resolve_bulk_symmetry(self, ase_atoms: ase.Atoms) -> None: # Standarized (or conventional) cell standard_wyckoff = symmetry_analyzer.get_wyckoff_letters_conventional() - standard_equivalent_atoms = symmetry_analyzer.get_equivalent_atoms_conventional() + standard_equivalent_atoms = ( + symmetry_analyzer.get_equivalent_atoms_conventional() + ) standard_sys = symmetry_analyzer.get_conventional_system() standard_pos = standard_sys.get_scaled_positions() standard_cell = standard_sys.get_cell() standard_num = standard_sys.get_atomic_numbers() standard_labels = standard_sys.get_chemical_symbols() except ValueError as e: - self.logger.debug('Symmetry analysis with MatID is not available.', details=str(e)) + self.logger.debug( + "Symmetry analysis with MatID is not available.", details=str(e) + ) return except Exception as e: - self.logger.warning('Symmetry analysis with MatID failed.', exc_info=e) + self.logger.warning("Symmetry analysis with MatID failed.", exc_info=e) return # Populating the originally parsed AtomicCell wyckoff_letters and equivalent_atoms information @@ -646,7 +704,7 @@ def _resolve_bulk_symmetry(self, ase_atoms: ase.Atoms) -> None: # Populating the primitive AtomicCell information sec_primitive_atoms = self.m_create(AtomicCell) - sec_primitive_atoms.name = 'primitive' + sec_primitive_atoms.name = "primitive" sec_primitive_atoms.lattice_vectors = primitive_cell * ureg.angstrom sec_primitive_atoms.n_atoms = len(primitive_labels) sec_primitive_atoms.positions = primitive_pos * ureg.angstrom @@ -657,7 +715,7 @@ def _resolve_bulk_symmetry(self, ase_atoms: ase.Atoms) -> None: # Populating the standarized Atoms information sec_standard_atoms = self.m_create(AtomicCell) - sec_standard_atoms.name = 'standard' + sec_standard_atoms.name = "standard" sec_standard_atoms.lattice_vectors = standard_cell * ureg.angstrom sec_standard_atoms.n_atoms = len(standard_labels) sec_standard_atoms.positions = standard_pos * ureg.angstrom @@ -668,20 +726,22 @@ def _resolve_bulk_symmetry(self, ase_atoms: ase.Atoms) -> None: # Getting prototype_formula, prototype_aflow_id, and strukturbericht designation from # standarized Wyckoff numbers and the space group number - if symmetry.get('space_group_number'): + if symmetry.get("space_group_number"): norm_wyckoff = get_normalized_wyckoff(standard_num, standard_wyckoff) - aflow_prototype = search_aflow_prototype(symmetry.get('space_group_number'), norm_wyckoff) - strukturbericht = aflow_prototype.get('Strukturbericht Designation') - if strukturbericht == 'None': + aflow_prototype = search_aflow_prototype( + symmetry.get("space_group_number"), norm_wyckoff + ) + strukturbericht = aflow_prototype.get("Strukturbericht Designation") + if strukturbericht == "None": strukturbericht = None else: - strukturbericht = re.sub('[$_{}]', '', strukturbericht) - prototype_aflow_id = aflow_prototype.get('aflow_prototype_id') - prototype_formula = aflow_prototype.get('Prototype') + strukturbericht = re.sub("[$_{}]", "", strukturbericht) + prototype_aflow_id = aflow_prototype.get("aflow_prototype_id") + prototype_formula = aflow_prototype.get("Prototype") # Adding these to the symmetry dictionary for later assignement - symmetry['strukturbericht_designation'] = strukturbericht - symmetry['prototype_aflow_id'] = prototype_aflow_id - symmetry['prototype_formula'] = prototype_formula + symmetry["strukturbericht_designation"] = strukturbericht + symmetry["prototype_aflow_id"] = prototype_aflow_id + symmetry["prototype_formula"] = prototype_formula # Populating Symmetry section (try to reference the standarized cell, and if not, # fallback to the originally parsed one) @@ -701,40 +761,53 @@ def normalize(self, archive: EntryArchive, logger) -> None: # Extracting ASE Atoms object from the originally parsed AtomicCell section if len(self.atomic_cell) == 0: - self.logger.warning('Could not find the originally parsed atomic system. ' - 'Symmetry and Formula extraction is thus not run.') + self.logger.warning( + "Could not find the originally parsed atomic system. " + "Symmetry and Formula extraction is thus not run." + ) return - self.atomic_cell[0].name = 'original' - ase_atoms = self.atomic_cell[0].m_cache.get('ase_atoms') + self.atomic_cell[0].name = "original" + ase_atoms = self.atomic_cell[0].m_cache.get("ase_atoms") if not ase_atoms: return # Resolving system `type`, `dimensionality`, and Symmetry section (if this last one does not exists already) original_atom_positions = self.atomic_cell[0].positions if original_atom_positions is not None: - self.type = 'unavailable' if not self.type else self.type - self.dimensionality = 'unavailable' if not self.dimensionality else self.dimensionality - self.type, self.dimensionality = self._resolve_system_type_and_dimensionality(ase_atoms) - if self.type == 'bulk' and len(self.symmetry) == 0: + self.type = "unavailable" if not self.type else self.type + self.dimensionality = ( + "unavailable" if not self.dimensionality else self.dimensionality + ) + ( + self.type, + self.dimensionality, + ) = self._resolve_system_type_and_dimensionality(ase_atoms) + if self.type == "bulk" and len(self.symmetry) == 0: self._resolve_bulk_symmetry(ase_atoms) # Extracting the cells parameters using the object Cell from ASE for atom_cell in self.atomic_cell: atoms = AtomicCell().to_ase_atoms(atom_cell) cell = atoms.get_cell() atom_cell.a, atom_cell.b, atom_cell.c = cell.lengths() * ureg.angstrom - atom_cell.alpha, atom_cell.beta, atom_cell.gamma = cell.angles() * ureg.degree - atom_cell.volume = cell.volume * ureg.angstrom ** 3 + atom_cell.alpha, atom_cell.beta, atom_cell.gamma = ( + cell.angles() * ureg.degree + ) + atom_cell.volume = cell.volume * ureg.angstrom**3 # Formulas # TODO add support for fractional formulas (possibly add `AtomicCell.concentrations` for each species) try: formula = Formula(ase_atoms.get_chemical_formula()) - self.chemical_composition = ase_atoms.get_chemical_formula(mode='all') - self.chemical_formula_descriptive = formula.format('descriptive') - self.chemical_formula_reduced = formula.format('reduced') - self.chemical_formula_iupac = formula.format('iupac') - self.chemical_formula_hill = formula.format('hill') - self.chemical_formula_anonymous = formula.format('anonymous') + self.chemical_composition = ase_atoms.get_chemical_formula(mode="all") + self.chemical_formula_descriptive = formula.format("descriptive") + self.chemical_formula_reduced = formula.format("reduced") + self.chemical_formula_iupac = formula.format("iupac") + self.chemical_formula_hill = formula.format("hill") + self.chemical_formula_anonymous = formula.format("anonymous") self.elemental_composition = formula.elemental_composition() except ValueError as e: - self.logger.warning('Could not extract the chemical formulas information.', exc_info=e, error=str(e)) + self.logger.warning( + "Could not extract the chemical formulas information.", + exc_info=e, + error=str(e), + ) diff --git a/tests/test_template.py b/tests/test_template.py index da9f0665..d61463bc 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -21,6 +21,7 @@ from nomad.utils import get_logger from nomad.datamodel import EntryArchive, EntryMetadata + def approx(value, abs=0, rel=1e-6): return pytest.approx(value, abs=abs, rel=rel)