diff --git a/pyproject.toml b/pyproject.toml index da497aba..ae813b12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,11 @@ maintainers = [ { name = "Joseph F. Rudzinski", email = "joseph.rudzinski@physik.hu-berlin.de" } ] license = { file = "LICENSE" } +# dependencies = [ +# "nomad-lab>=1.3.0", +# "matid>=2.0.0.dev2", +# "nomad-simulations@file:///home/bmohr/software/nomad-simulations", +# ] dependencies = [ "nomad-lab>=1.3.0", "matid>=2.0.0.dev2", @@ -40,12 +45,13 @@ dependencies = [ [project.optional-dependencies] dev = [ - "mypy==1.0.1", - "ruff", - "pytest", - "pytest-timeout", - "pytest-cov", - "structlog", + 'mypy==1.0.1', + 'pytest>= 5.3.0, <8', + 'pytest-timeout>=1.4.2', + 'pytest-cov>=2.7.1', + 'ruff>=0.6', + 'structlog>=1.0', + 'typing-extensions>=4.12', ] [tool.uv] diff --git a/src/nomad_simulations/schema_packages/__init__.py b/src/nomad_simulations/schema_packages/__init__.py index 8b730793..78d66557 100644 --- a/src/nomad_simulations/schema_packages/__init__.py +++ b/src/nomad_simulations/schema_packages/__init__.py @@ -31,8 +31,8 @@ class NOMADSimulationsEntryPoint(SchemaPackageEntryPoint): description='Limite of the number of atoms in the unit cell to be treated for the system type classification from MatID to work. This is done to avoid overhead of the package.', ) equal_cell_positions_tolerance: float = Field( - 1e-12, - description='Tolerance (in meters) for the cell positions to be considered equal.', + 12, + description='Decimal order or tolerance (in meters) for comparing cell positions.', ) def load(self): diff --git a/src/nomad_simulations/schema_packages/general.py b/src/nomad_simulations/schema_packages/general.py index 9a2d48f0..59777858 100644 --- a/src/nomad_simulations/schema_packages/general.py +++ b/src/nomad_simulations/schema_packages/general.py @@ -217,6 +217,7 @@ def _set_system_branch_depth( system_parent=system_child, branch_depth=branch_depth + 1 ) + #! Generalize from checks for atomic systems, error with CG input def resolve_composition_formula(self, system_parent: ModelSystem) -> None: """Determine and set the composition formula for `system_parent` and all of its descendants. @@ -275,6 +276,7 @@ def get_composition_recurs(system: ModelSystem, atom_labels: list[str]) -> None: for subsystem in subsystems: get_composition_recurs(system=subsystem, atom_labels=atom_labels) + # ! CG: system_parent.cell[0].particles_state instead of atoms_state! atoms_state = ( system_parent.cell[0].atoms_state if system_parent.cell is not None else [] ) diff --git a/src/nomad_simulations/schema_packages/model_system.py b/src/nomad_simulations/schema_packages/model_system.py index 0555c432..21cd6ad4 100644 --- a/src/nomad_simulations/schema_packages/model_system.py +++ b/src/nomad_simulations/schema_packages/model_system.py @@ -1,4 +1,24 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import re +import sys +from functools import lru_cache +from hashlib import sha1 from typing import TYPE_CHECKING, Optional import ase @@ -22,12 +42,18 @@ from nomad.units import ureg if TYPE_CHECKING: + from collections.abc import Generator + from typing import Any, Callable, Optional + + import pint from nomad.datamodel.datamodel import EntryArchive from nomad.metainfo import Context, Section from structlog.stdlib import BoundLogger from nomad_simulations.schema_packages.atoms_state import AtomsState +from nomad_simulations.schema_packages.particles_state import Particles, ParticlesState from nomad_simulations.schema_packages.utils import ( + catch_not_implemented, get_sibling_section, is_not_representative, ) @@ -200,6 +226,72 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: return +def _check_implemented(func: 'Callable'): + """ + Decorator to restrict the comparison functions to the same class. + """ + + def wrapper(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return func(self, other) + + return wrapper + + +class PartialOrderElement: + def __init__(self, representative_variable): + self.representative_variable = representative_variable + + def __hash__(self): + return self.representative_variable.__hash__() + + @_check_implemented + def __eq__(self, other): + return self.representative_variable == other.representative_variable + + @_check_implemented + def __lt__(self, other): + return False + + @_check_implemented + def __gt__(self, other): + return False + + def __le__(self, other): + return self.__eq__(other) + + def __ge__(self, other): + return self.__eq__(other) + + # __ne__ assumes that usage in a finite set with its comparison definitions + + +class HashedPositions(PartialOrderElement): + # `representative_variable` is a `pint.Quantity` object + + def __hash__(self): + hash_str = sha1( + np.ascontiguousarray( + np.round( + self.representative_variable.to_base_units().magnitude, + decimals=configuration.equal_cell_positions_tolerance, + out=None, + ) + ).tobytes() + ).hexdigest() + return int(hash_str, 16) + + def __eq__(self, other): + """Equality as defined between HashedPositions.""" + if ( + self.representative_variable is None + or other.representative_variable is None + ): + return NotImplemented + return np.allclose(self.representative_variable, other.representative_variable) + + class Cell(GeometricSpace): """ A base section used to specify the cell quantities of a system at a given moment in time. @@ -213,11 +305,12 @@ class Cell(GeometricSpace): """, ) + # TODO: default "unavailable"? type = Quantity( type=MEnum('original', 'primitive', 'conventional'), description=""" Representation type of the cell structure. It might be: - - 'original' as in origanally parsed, + - 'original' as in originally parsed, - 'primitive' as the primitive unit cell, - 'conventional' as the conventional cell used for referencing. """, @@ -278,45 +371,36 @@ class Cell(GeometricSpace): """, ) - def _check_positions(self, positions_1, positions_2) -> list: - # Check that all the `positions`` of `cell_1` match with the ones in `cell_2` - check_positions = [] - for i1, pos1 in enumerate(positions_1): - for i2, pos2 in enumerate(positions_2): - if np.allclose( - pos1, pos2, atol=configuration.equal_cell_positions_tolerance - ): - check_positions.append([i1, i2]) - break - return check_positions - - def is_equal_cell(self, other) -> bool: - """ - Check if the cell is equal to an`other` cell by comparing the `positions`. - Args: - other: The other cell to compare with. - Returns: - bool: True if the cells are equal, False otherwise. - """ - # TODO implement checks on `lattice_vectors` and other quantities to ensure the equality of primitive cells - if not isinstance(other, Cell): - return False + @staticmethod + def _generate_comparer(obj: 'Cell') -> 'Generator[Any, None, None]': + try: + return ((HashedPositions(pos)) for pos in obj.positions) + except AttributeError: + raise NotImplementedError - # If the `positions` are empty, return False - if self.positions is None or other.positions is None: - return False + @catch_not_implemented + def is_lt_cell(self, other) -> bool: + return set(self._generate_comparer(self)) < set(self._generate_comparer(other)) - # The `positions` should have the same length (same number of positions) - if len(self.positions) != len(other.positions): - return False - n_positions = len(self.positions) + @catch_not_implemented + def is_gt_cell(self, other) -> bool: + return set(self._generate_comparer(self)) > set(self._generate_comparer(other)) - check_positions = self._check_positions( - positions_1=self.positions, positions_2=other.positions - ) - if len(check_positions) != n_positions: - return False - return True + @catch_not_implemented + def is_le_cell(self, other) -> bool: + return set(self._generate_comparer(self)) <= set(self._generate_comparer(other)) + + @catch_not_implemented + def is_ge_cell(self, other) -> bool: + return set(self._generate_comparer(self)) >= set(self._generate_comparer(other)) + + @catch_not_implemented + def is_equal_cell(self, other) -> bool: # TODO: improve naming + return set(self._generate_comparer(self)) == set(self._generate_comparer(other)) + + def is_ne_cell(self, other) -> bool: + # this does not hold in general, but here we use finite sets + return not self.is_equal_cell(other) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) @@ -361,40 +445,20 @@ def __init__(self, m_def: 'Section' = None, m_context: 'Context' = None, **kwarg # Set the name of the section self.name = self.m_def.name - def is_equal_cell(self, other) -> bool: - """ - Check if the atomic cell is equal to an`other` atomic cell by comparing the `positions` and - the `AtomsState[*].chemical_symbol`. - Args: - other: The other atomic cell to compare with. - Returns: - bool: True if the atomic cells are equal, False otherwise. - """ - if not isinstance(other, AtomicCell): - return False - - # Compare positions using the parent sections's `__eq__` method - if not super().is_equal_cell(other=other): - return False - - # Check that the `chemical_symbol` of the atoms in `cell_1` match with the ones in `cell_2` - check_positions = self._check_positions( - positions_1=self.positions, positions_2=other.positions - ) + @staticmethod + def _generate_comparer(obj: 'AtomicCell') -> 'Generator[Any, None, None]': + # presumes `atoms_state` mapping 1-to-1 with `positions` and conserves the order try: - for atom in check_positions: - element_1 = self.atoms_state[atom[0]].chemical_symbol - element_2 = other.atoms_state[atom[1]].chemical_symbol - if element_1 != element_2: - return False - except Exception: - return False - return True + return ( + (HashedPositions(pos), PartialOrderElement(st.chemical_symbol)) + for pos, st in zip(obj.positions, obj.atoms_state) + ) + except AttributeError: + raise NotImplementedError def get_chemical_symbols(self, logger: 'BoundLogger') -> list[str]: """ Get the chemical symbols of the atoms in the atomic cell. These are defined on `atoms_state[*].chemical_symbol`. - Args: logger (BoundLogger): The logger to log messages. @@ -412,7 +476,7 @@ def get_chemical_symbols(self, logger: 'BoundLogger') -> list[str]: chemical_symbols.append(atom_state.chemical_symbol) return chemical_symbols - def to_ase_atoms(self, logger: 'BoundLogger') -> Optional[ase.Atoms]: + def to_ase_atoms(self, logger: 'BoundLogger') -> 'Optional[ase.Atoms]': """ Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell` section (labels, periodic_boundary_conditions, positions, and lattice_vectors). @@ -492,6 +556,168 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: self.name = self.m_def.name if self.name is None else self.name +class ParticleCell(Cell): + """ + A base section used to specify the particle cell information of a system. + """ + + particles_state = SubSection(sub_section=ParticlesState.m_def, repeats=True) + + n_particles = Quantity( + type=np.int32, + description=""" + Number of particles in the particle cell. + """, + ) + + equivalent_particles = Quantity( + type=np.int32, + shape=['n_particle'], + description=""" + List of equivalent particles as defined in `particles`. If no equivalent particles + are found, then the list is simply the index of each element, e.g.: + - [0, 1, 2, 3] all four particles are non-equivalent. + - [0, 0, 0, 3] three equivalent particles and one non-equivalent. + """, + ) + + def __init__(self, m_def: 'Section' = None, m_context: 'Context' = None, **kwargs): + super().__init__(m_def, m_context, **kwargs) + # Set the name of the section + self.name = self.m_def.name + + # def is_equal_cell(self, other) -> bool: + # """ + # Check if the atomic cell is equal to an`other` atomic cell by comparing the `positions` and + # the `AtomsState[*].chemical_symbol`. + # Args: + # other: The other atomic cell to compare with. + # Returns: + # bool: True if the atomic cells are equal, False otherwise. + # """ + # if not isinstance(other, ParticleCell): + # return False + + # # Compare positions using the parent sections's `__eq__` method + # if not super().is_equal_cell(other=other): + # return False + + # # Check that the `chemical_symbol` of the atoms in `cell_1` match with the ones in `cell_2` + # check_positions = self._check_positions( + # positions_1=self.positions, positions_2=other.positions + # ) + # try: + # for particle in check_positions: + # type_1 = self.particles_state[particle[0]].particle_type + # type_2 = other.particles_state[particle[1]].particle_type + # if type_1 != type_2: + # return False + # except Exception: + # return False + # return True + + def get_particle_types(self, logger: 'BoundLogger') -> list[str]: + """ + Get the chemical symbols of the particle in the particle cell. + These are defined on `particles_state[*].chemical_symbol`. + + Args: + logger (BoundLogger): The logger to log messages. + + Returns: + list: The list of chemical symbols of the particles in the particle cell. + """ + if not self.particles_state: + return [] + + particle_labels = [] + for particle_state in self.particles_state: + if not particle_state.particle_type: + logger.warning('Could not find `ParticlesState[*].particle_type`.') + return [] + particle_labels.append(particle_state.particle_type) + return particle_labels + + def to_particles(self, logger: 'BoundLogger') -> Optional[Particles]: + """ + Generates a Particles object with the most basic information from the parsed `ParticleCell` + section (labels, periodic_boundary_conditions, positions, and lattice_vectors). + + Args: + logger (BoundLogger): The logger to log messages. + + Returns: + (Optional[Particles]): The Partilces object with the basic information from the `ParticleCell`. + """ + # Initialize Partilces object with labels + particle_labels = self.get_particle_types(logger=logger) + particles = Particles(symbols=particle_labels) + + # PBC + if self.periodic_boundary_conditions is None: + logger.info( + 'Could not find `ParticleCell.periodic_boundary_conditions`. They will be set to [False, False, False].' + ) + self.periodic_boundary_conditions = [False, False, False] + particles.set_pbc(pbc=self.periodic_boundary_conditions) + + # Lattice vectors + if self.lattice_vectors is not None: + particles.set_cell(cell=self.lattice_vectors.to('angstrom').magnitude) + else: + logger.info('Could not find `ParticleCell.lattice_vectors`.') + + # Positions + if self.positions is not None: + if len(self.positions) != len(self.particles_state): + logger.error( + 'Length of `ParticleCell.positions` does not coincide with the length of the `ParticleCell.particles_state`.' + ) + return None + particles.set_positions( + newpositions=self.positions.to('angstrom').magnitude + ) + else: + logger.warning('Could not find `ParticleCell.positions`.') + return None + + return particles + + def from_particles(self, particles: Particles, logger: 'BoundLogger') -> None: + """ + Parses the information from a Particles object to the `ParticlesCell` section. + + Args: + particles (Particles): The Particles object to parse. + logger (BoundLogger): The logger to log messages. + """ + # `ParticlesState[*].particles_type` + for label in particles.get_particle_types(): + particle_state = ParticlesState(particle_type=label) + self.particles_state.append(particle_state) + + # `periodic_boundary_conditions` + self.periodic_boundary_conditions = particles.get_pbc() + + # `lattice_vectors` + cell = particles.get_cell() + self.lattice_vectors = ase.geometry.complete_cell(cell) * ureg('angstrom') + + # `positions` + positions = particles.get_positions() + if ( + not positions.tolist() + ): # ASE assigns a shape=(0, 3) array if no positions are found + return None + self.positions = positions * ureg('angstrom') + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Set the name of the section + self.name = self.m_def.name if self.name is None else self.name + + class Symmetry(ArchiveSection): """ A base section used to specify the symmetry of the `AtomicCell`. @@ -602,8 +828,11 @@ class Symmetry(ArchiveSection): ) def resolve_analyzed_atomic_cell( - self, symmetry_analyzer: SymmetryAnalyzer, cell_type: str, logger: 'BoundLogger' - ) -> Optional[AtomicCell]: + self, + symmetry_analyzer: 'SymmetryAnalyzer', + cell_type: str, + logger: 'BoundLogger', + ) -> 'Optional[AtomicCell]': """ Resolves the `AtomicCell` section from the `SymmetryAnalyzer` object and the cell_type (primitive or conventional). @@ -647,8 +876,8 @@ def resolve_analyzed_atomic_cell( return atomic_cell def resolve_bulk_symmetry( - self, original_atomic_cell: AtomicCell, logger: 'BoundLogger' - ) -> tuple[Optional[AtomicCell], Optional[AtomicCell]]: + self, original_atomic_cell: 'AtomicCell', logger: 'BoundLogger' + ) -> 'tuple[Optional[AtomicCell], Optional[AtomicCell]]': """ Resolves the symmetry of the material being simulated using MatID and the originally parsed data under original_atomic_cell. It generates two other @@ -860,6 +1089,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: self.m_cache['elemental_composition'] = formula.elemental_composition() +# TODO: generalize! indices instead of atom_indices, state instead of atoms_state... class ModelSystem(System): """ Model system used as an input for simulating the material. @@ -1017,12 +1247,14 @@ class ModelSystem(System): # TODO improve description and add an example using the case in atom_indices bond_list = Quantity( type=np.int32, + shape=['*', 2], description=""" List of pairs of atom indices corresponding to bonds (e.g., as defined by a force field) within this atoms_group. """, ) + # TODO: make this work with non_atomic systems: global_composition_formula of entire system with respect to lower layers composition_formula = Quantity( type=str, description=""" @@ -1115,6 +1347,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: 'Could not find the originally parsed atomic system. `Symmetry` and `ChemicalFormula` extraction is thus not run.' ) return + if self.cell[0].name == 'AtomicCell': self.cell[0].type = 'original' ase_atoms = self.cell[0].to_ase_atoms(logger=logger) @@ -1137,6 +1370,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: sec_symmetry = self.m_create(Symmetry) sec_symmetry.normalize(archive, logger) + #! ChemicalFormula calls `ase_atoms = atomic_cell.to_ase_atoms(logger=logger)` and `ase_atoms.get_chemical_formula()` # Creating and normalizing ChemicalFormula section # TODO add support for fractional formulas (possibly add `AtomicCell.concentrations` for each species) sec_chemical_formula = self.m_create(ChemicalFormula) diff --git a/src/nomad_simulations/schema_packages/particles_state.py b/src/nomad_simulations/schema_packages/particles_state.py new file mode 100644 index 00000000..28afb1e6 --- /dev/null +++ b/src/nomad_simulations/schema_packages/particles_state.py @@ -0,0 +1,493 @@ +import numbers +from typing import TYPE_CHECKING, Any, Optional, Union + +import ase +import ase.geometry +import numpy as np +import pint +from deprecated import deprecated +from nomad.datamodel.data import ArchiveSection +from nomad.datamodel.metainfo.annotations import ELNAnnotation +from nomad.datamodel.metainfo.basesections import Entity +from nomad.metainfo import MEnum, Quantity, SubSection +from nomad.units import ureg + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from nomad.metainfo import Context, Section + from structlog.stdlib import BoundLogger + + +class Particles: + """Particle object. + + Adaptation of the ASE Atoms object to coarse-grained particles. For use with + nomad_simulations.model_system.ParticlesCell. + Implemented methods: set_pbc, get_pbc, + set_cell, get_cell, + set_positions, get_positions, + get_particle_types + + Parameters: + + types: str (formula) or list of str. + Default: [‘A’] + typeid: int + (Optional) Id numbers of corresponding particle types. + Default: 0 + type_shapes: str + Store a per-type shape definition for visualization. + A dictionary is stored for each of the NT types, corresponding + to a shape for visualization of that type. + Default: empty + masses: list of float + The mass of each particle. + Default: 1.0 + charges: list of float + Initial atomic charges. + Default: 0.0 + diameter: float + The diameter of each particle. + Default: 1.0 + body: int + The composite body associated with each particle. The value -1 + indicates no body. + Default: -1 + moment_inertia: float + The moment_inertia of each particle (I_xx, I_yy, I_zz). + This inertia tensor is diagonal in the body frame of the particle. + The default value is for point particles. + Default: 0, 0, 0 + positions: float, list of xyz-positions + Particle positions. Needs to be convertible to an + ndarray of shape (N, 3). + Default: 0, 0, 0 + scaled_positions: list of scaled-positions + Like positions, but given in units of the unit cell. + Can not be set at the same time as positions. + Default: 0, 0, 0 + orientation: float + The orientation of each particle. In scalar + vector notation, + this is (r, a_x, a_y, a_z), where the quaternion is q = r + a_xi + a_yj + a_zk. + A unit quaternion has the property: sqrt(r^2 + a_x^2 + a_y^2 + a_z^2) = 1. + Default: 0, 0, 0, 0 + angmom: float + The angular momentum of each particle as a quaternion. + Default: 0, 0, 0, 0 + image: int + The number of times each particle has wrapped around the box (i_x, i_y, i_z). + Default: 0, 0, 0 + cell: 3x3 matrix or length 3 or 6 vector + Unit cell vectors. Can also be given as just three + numbers for orthorhombic cells, or 6 numbers, where + first three are lengths of unit cell vectors, and the + other three are angles between them (in degrees), in following order: + [len(a), len(b), len(c), angle(b,c), angle(a,c), angle(a,b)]. + First vector will lie in x-direction, second in xy-plane, + and the third one in z-positive subspace. + Default value: [0, 0, 0]. + celldisp: Vector + Unit cell displacement vector. To visualize a displaced cell + around the center of mass of a Systems of atoms. Default value + = (0,0,0) + pbc: one or three bool + Periodic boundary conditions flags. Examples: True, + False, 0, 1, (1, 1, 0), (True, False, False). Default + value: False. + + Examples: + + These three are equivalent: + + >>> d = 1.104 # N2 bondlength + >>> a = Atoms('N2', [(0, 0, 0), (0, 0, d)]) + >>> a = Atoms(numbers=[7, 7], positions=[(0, 0, 0), (0, 0, d)]) + >>> a = Atoms([Atom('N', (0, 0, 0)), Atom('N', (0, 0, d))]) + + FCC gold: + + >>> a = 4.05 # Gold lattice constant + >>> b = a / 2 + >>> fcc = Atoms('Au', + ... cell=[(0, b, b), (b, 0, b), (b, b, 0)], + ... pbc=True) + + Hydrogen wire: + + >>> d = 0.9 # H-H distance + >>> h = Atoms('H', positions=[(0, 0, 0)], + ... cell=(d, 0, 0), + ... pbc=(1, 0, 0)) + """ + + def __init__( + self, + types=None, + positions=None, + typeid=None, + type_shapes=None, + moment_inertia=None, + masses=None, + angmom=None, + charges=None, + diameter=None, + body=None, + scaled_positions=None, + orientation=None, + image=None, + cell=None, + pbc=None, + celldisp=None, + ): + self._cellobj = self.set_cell() + self._pbc = np.zeros(3, bool) + + particles = None + + # if hasattr(types, 'get_positions'): + # atoms = types + # types = None + # elif ( + # isinstance(types, (list, tuple)) + # and len(types) > 0 + # and isinstance(types[0], Atom) + # ): + # # Get data from a list or tuple of Atom objects: + # data = [ + # [atom.get_raw(name) for atom in types] + # for name in [ + # 'position', + # 'number', + # 'tag', + # 'momentum', + # 'mass', + # 'magmom', + # 'charge', + # ] + # ] + # atoms = self.__class__(None, *data) + # types = None + + # if atoms is not None: + # # Get data from another Atoms object: + # if scaled_positions is not None: + # raise NotImplementedError + # if types is None and numbers is None: + # numbers = atoms.get_atomic_numbers() + # if positions is None: + # positions = atoms.get_positions() + # if tags is None and atoms.has('tags'): + # tags = atoms.get_tags() + # if momenta is None and atoms.has('momenta'): + # momenta = atoms.get_momenta() + # if magmoms is None and atoms.has('initial_magmoms'): + # magmoms = atoms.get_initial_magnetic_moments() + # if masses is None and atoms.has('masses'): + # masses = atoms.get_masses() + # if charges is None and atoms.has('initial_charges'): + # charges = atoms.get_initial_charges() + # if cell is None: + # cell = atoms.get_cell() + # if celldisp is None: + # celldisp = atoms.get_celldisp() + # if pbc is None: + # pbc = atoms.get_pbc() + + # self.arrays = {} + + # if types is None: + # if numbers is None: + # if positions is not None: + # natoms = len(positions) + # elif scaled_positions is not None: + # natoms = len(scaled_positions) + # else: + # natoms = 0 + # numbers = np.zeros(natoms, int) + # self.new_array('numbers', numbers, int) + # else: + # if numbers is not None: + # raise TypeError('Use only one of "types" and "numbers".') + # else: + # self.new_array('numbers', types2numbers(types), int) + + # if self.numbers.ndim != 1: + # raise ValueError('"numbers" must be 1-dimensional.') + + if cell is None: + cell = np.zeros((3, 3)) + self.set_cell(cell) + + # if celldisp is None: + # celldisp = np.zeros(shape=(3, 1)) + # self.set_celldisp(celldisp) + + # if positions is None: + # if scaled_positions is None: + # positions = np.zeros((len(self.arrays['numbers']), 3)) + # else: + # assert self.cell.rank == 3 + # positions = np.dot(scaled_positions, self.cell) + # else: + # if scaled_positions is not None: + # raise TypeError('Use only one of "types" and "numbers".') + # self.new_array('positions', positions, float, (3,)) + # self.set_tags(default(tags, 0)) + # self.set_masses(default(masses, None)) + # self.set_initial_magnetic_moments(default(magmoms, 0.0)) + # self.set_initial_charges(default(charges, 0.0)) + # if pbc is None: + # pbc = False + # self.set_pbc(pbc) + # self.set_momenta(default(momenta, (0.0, 0.0, 0.0)), apply_constraint=False) + + # if velocities is not None: + # if momenta is None: + # self.set_velocities(velocities) + # else: + # raise TypeError('Use only one of "momenta" and "velocities".') + + # if info is None: + # self.info = {} + # else: + # self.info = dict(info) + + # self.calc = calculator + + # def set_cell(self, cell): + # if cell is None: + # cell = np.zeros((3, 3)) + + # @property + # def symbols(self): + # """Get chemical symbols as a :class:`ase.symbols.Symbols` object. + + # The object works like ``atoms.numbers`` except its values + # are strings. It supports in-place editing.""" + # return Symbols(self.numbers) + + # @symbols.setter + # def symbols(self, obj): + # new_symbols = Symbols.fromsymbols(obj) + # self.numbers[:] = new_symbols.numbers + + def get_particle_types(self): + """Get list of particle type strings. + + Labels describing type of coarse-grained particles.""" + return list(self.types) + + def set_cell(self, cell, scale_atoms=False, apply_constraint=True): + """Set unit cell vectors. + + Parameters: + + cell: 3x3 matrix + Unit cell. A 3x3 matrix (the three unit cell vectors). + First vector will lie in x-direction, second in + xy-plane, and the third one in z-positive subspace. + scale_atoms: bool + Fix atomic positions or move atoms with the unit cell? + Default behavior is to *not* move the atoms (scale_atoms=False). + apply_constraint: bool + Whether to apply constraints to the given cell. + """ + + # Override pbcs if and only if given a Cell object: + cell = ase.Cell.new(cell) + + if apply_constraint and hasattr(self, '_constraints'): + for constraint in self.constraints: + if hasattr(constraint, 'adjust_cell'): + constraint.adjust_cell(self, cell) + + if scale_atoms: + M = np.linalg.solve(self.cell.complete(), cell.complete()) + self.positions[:] = np.dot(self.positions, M) + + self.cell[:] = cell + + def get_cell(self, complete=False): + """Get the three unit cell vectors as a `class`:ase.cell.Cell` object. + + The Cell object resembles a 3x3 ndarray, and cell[i, j] + is the jth Cartesian coordinate of the ith cell vector.""" + if complete: + cell = self.cell.complete() + else: + cell = self.cell.copy() + + return cell + + @property + def pbc(self): + """Reference to pbc-flags for in-place manipulations.""" + return self._pbc + + @pbc.setter + def pbc(self, pbc): + self._pbc[:] = pbc + + def set_pbc(self, pbc): + """Set periodic boundary condition flags.""" + self.pbc = pbc + + def get_pbc(self): + """Get periodic boundary condition flags.""" + return self.pbc.copy() + + def set_positions(self, newpositions, apply_constraint=True): + """Set positions, honoring any constraints. To ignore constraints, + use *apply_constraint=False*.""" + if self.constraints and apply_constraint: + newpositions = np.array(newpositions, float) + for constraint in self.constraints: + constraint.adjust_positions(self, newpositions) + + self.set_array('positions', newpositions, shape=(3,)) + + def get_positions(self, wrap=False, **wrap_kw): + """Get array of positions. + + Parameters: + + wrap: bool + wrap atoms back to the cell before returning positions + wrap_kw: (keyword=value) pairs + optional keywords `pbc`, `center`, `pretty_translation`, `eps`, + see :func:`ase.geometry.wrap_positions` + """ + if wrap: + if 'pbc' not in wrap_kw: + wrap_kw['pbc'] = self.pbc + return ase.geometry.wrap_positions(self.positions, self.cell, **wrap_kw) + else: + return self.arrays['positions'].copy() + + def get_scaled_positions(self, wrap=True): + """Get positions relative to unit cell. + + If wrap is True, atoms outside the unit cell will be wrapped into + the cell in those directions with periodic boundary conditions + so that the scaled coordinates are between zero and one. + + If any cell vectors are zero, the corresponding coordinates + are evaluated as if the cell were completed using + ``cell.complete()``. This means coordinates will be Cartesian + as long as the non-zero cell vectors span a Cartesian axis or + plane.""" + + fractional = self.cell.scaled_positions(self.positions) + + if wrap: + for i, periodic in enumerate(self.pbc): + if periodic: + # Yes, we need to do it twice. + # See the scaled_positions.py test. + fractional[:, i] %= 1.0 + fractional[:, i] %= 1.0 + + return fractional + + def set_scaled_positions(self, scaled): + """Set positions relative to unit cell.""" + self.positions[:] = self.cell.cartesian_positions(scaled) + + def wrap(self, **wrap_kw): + """Wrap positions to unit cell. + + Parameters: + + wrap_kw: (keyword=value) pairs + optional keywords `pbc`, `center`, `pretty_translation`, `eps`, + see :func:`ase.geometry.wrap_positions` + """ + + if 'pbc' not in wrap_kw: + wrap_kw['pbc'] = self.pbc + + self.positions[:] = self.get_positions(wrap=True, **wrap_kw) + + def _get_positions(self): + """Return reference to positions-array for in-place manipulations.""" + return self.arrays['positions'] + + def _set_positions(self, pos): + """Set positions directly, bypassing constraints.""" + self.arrays['positions'][:] = pos + + positions = property( + _get_positions, + _set_positions, + doc='Attribute for direct ' + 'manipulation of the positions.', + ) + + +# ? How generic (usable for any CG model) vs. Martini-specific do we want to be? +class ParticlesState(Entity): + """ + A base section to define individual coarse-grained (CG) particle information. + """ + + # ? What do we want to qualify as type identifier? What safety checks do we need? + particle_type = Quantity( + type=str, + description=""" + Symbol(s) describing the CG particle type. Currently, entrie particle label is + used for type definition. + """, + ) + + # ? Do we want to reflect the Martini size nomenclature and include bead volume/bead mass? + # particle_size = Quantity( + # type=np.float64, + # description=""" + # Particle size, determining the number of non-hydrogen atoms represented by the + # particle. Currently, possible values are 0.47 nm (regular, default), + # 0.43/0.41 nm (small), and 0.34 nm (tiny). + # """, + # ) + + # particle_mass = Quantity( + # type=np.float64, + # description=""" + # Particle size, determining the number of non-hydrogen atoms represented by the + # particle. Currently, possible values are 72 amu (regular, default), 54/45 amu + # (small), and 36 amu (tiny). + # """, + # ) + + charge = Quantity( + type=np.int32, + default=0, + description=""" + Charge of the particle. Neutral = 0. Can be any positive integer (+1, +2...) + for cations or any negative integer (-1, -2...) for anions. + """, + a_eln=ELNAnnotation(component='NumberEditQuantity'), + ) + + def resolve_particle_type(self, logger: 'BoundLogger') -> Optional[str]: + """ + Checks if any value is passed as particle label. Converts to string to be used as + type identifier for the CG particle. + + Args: + logger (BoundLogger): The logger to log messages. + + Returns: + (Optional[str]): The resolved `particle type`. + """ + if self.particle_type is not None and self.particle_type.isascii(): + try: + return str(self.particle_type) + except TypeError: + logger.error('The parsed `particle type` can not be read.') + return None + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Get particle_type as string, if possible. + if not isinstance(self.particle_type, str): + self.particle_type = self.resolve_particle_type(logger=logger) diff --git a/src/nomad_simulations/schema_packages/utils/__init__.py b/src/nomad_simulations/schema_packages/utils/__init__.py index 52d9ca22..f9945a34 100644 --- a/src/nomad_simulations/schema_packages/utils/__init__.py +++ b/src/nomad_simulations/schema_packages/utils/__init__.py @@ -1,5 +1,6 @@ from .utils import ( RussellSaundersState, + catch_not_implemented, get_composition, get_sibling_section, get_variables, diff --git a/src/nomad_simulations/schema_packages/utils/utils.py b/src/nomad_simulations/schema_packages/utils/utils.py index 1d40aa4a..eff18376 100644 --- a/src/nomad_simulations/schema_packages/utils/utils.py +++ b/src/nomad_simulations/schema_packages/utils/utils.py @@ -5,7 +5,7 @@ from nomad.config import config if TYPE_CHECKING: - from typing import Optional + from typing import Callable, Optional from nomad.datamodel.data import ArchiveSection from structlog.stdlib import BoundLogger @@ -154,3 +154,19 @@ def get_composition(children_names: 'list[str]') -> str: children_count_tup = np.unique(children_names, return_counts=True) formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)]) return formula if formula else None + + +def catch_not_implemented(func: 'Callable') -> 'Callable': + """ + Decorator to default comparison functions outside the same class to `False`. + """ + + def wrapper(self, other) -> bool: + if not isinstance(other, self.__class__): + return False # ? should this throw an error instead? + try: + return func(self, other) + except (TypeError, NotImplementedError): + return False + + return wrapper diff --git a/tests/test_model_system.py b/tests/test_model_system.py index f334da23..088ecc6b 100644 --- a/tests/test_model_system.py +++ b/tests/test_model_system.py @@ -18,96 +18,104 @@ from .conftest import generate_atomic_cell -class TestCell: +class TestAtomicCell: """ - Test the `Cell` section defined in model_system.py + Test the `AtomicCell`, `Cell` and `GeometricSpace` classes defined in model_system.py """ @pytest.mark.parametrize( 'cell_1, cell_2, result', [ - (Cell(), None, False), # one cell is None - (Cell(), Cell(), False), # both cells are empty + (Cell(), None, {'lt': False, 'gt': False, 'eq': False}), # one cell is None + # (Cell(), Cell(), False), # both cells are empty + # ( + # Cell(positions=[[1, 0, 0]]), + # Cell(), + # False, + # ), # one cell has positions, the other is empty ( Cell(positions=[[1, 0, 0]]), - Cell(), - False, - ), # one cell has positions, the other is empty + Cell(positions=[[2, 0, 0]]), + {'lt': False, 'gt': False, 'eq': False}, + ), # position vectors are treated as the fundamental set elements ( Cell(positions=[[1, 0, 0], [0, 1, 0]]), Cell(positions=[[1, 0, 0]]), - False, - ), # length mismatch - ( - Cell(positions=[[1, 0, 0], [0, 1, 0]]), - Cell(positions=[[1, 0, 0], [0, -1, 0]]), - False, - ), # different positions - ( - Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - True, - ), # same ordered positions - ( - Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - Cell(positions=[[1, 0, 0], [0, 0, 1], [0, 1, 0]]), - True, - ), # different ordered positions but same cell - ], - ) - def test_is_equal_cell(self, cell_1: Cell, cell_2: Cell, result: bool): - """ - Test the `is_equal_cell` methods of `Cell`. - """ - assert cell_1.is_equal_cell(other=cell_2) == result - - -class TestAtomicCell: - """ - Test the `AtomicCell`, `Cell` and `GeometricSpace` classes defined in model_system.py - """ - - @pytest.mark.parametrize( - 'cell_1, cell_2, result', - [ - (Cell(), None, False), # one cell is None - (Cell(), Cell(), False), # both cells are empty + {'lt': False, 'gt': True, 'eq': False}, + ), # one is a subset of the other ( Cell(positions=[[1, 0, 0]]), - Cell(), - False, - ), # one cell has positions, the other is empty - ( Cell(positions=[[1, 0, 0], [0, 1, 0]]), - Cell(positions=[[1, 0, 0]]), - False, - ), # length mismatch + {'lt': True, 'gt': False, 'eq': False}, + ), # one is a subset of the other ( Cell(positions=[[1, 0, 0], [0, 1, 0]]), Cell(positions=[[1, 0, 0], [0, -1, 0]]), - False, + {'lt': False, 'gt': False, 'eq': False}, ), # different positions ( Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - True, + {'lt': False, 'gt': False, 'eq': True}, ), # same ordered positions ( Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), Cell(positions=[[1, 0, 0], [0, 0, 1], [0, 1, 0]]), - True, + {'lt': False, 'gt': False, 'eq': True}, ), # different ordered positions but same cell + # ( + # AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + # Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + # False, + # ), # one atomic cell and another cell (missing chemical symbols) + # ( + # AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + # AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), + # False, + # ), # missing chemical symbols + # ND: the comparison will now return an error here + # handling a case that should be resolved by the normalizer falls outside its scope ( - AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - Cell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - False, - ), # one atomic cell and another cell (missing chemical symbols) + AtomicCell( + positions=[[1, 0, 0]], + atoms_state=[ + AtomsState(chemical_symbol='O'), + ], + ), + AtomicCell( + positions=[[1, 0, 0]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + ], + ), + {'lt': False, 'gt': False, 'eq': False}, + ), # chemical symbols are treated as the fundamental set elements ( - AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - AtomicCell(positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]), - False, - ), # missing chemical symbols + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + ], + ), + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + ], + ), + {'lt': False, 'gt': True, 'eq': False}, + ), # one is a subset of the other ( + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + ], + ), AtomicCell( positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], atoms_state=[ @@ -116,6 +124,16 @@ class TestAtomicCell: AtomsState(chemical_symbol='O'), ], ), + {'lt': True, 'gt': False, 'eq': False}, + ), # one is a subset of the other + ( + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + ], + ), AtomicCell( positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], atoms_state=[ @@ -124,7 +142,26 @@ class TestAtomicCell: AtomsState(chemical_symbol='O'), ], ), - True, + {'lt': False, 'gt': False, 'eq': False}, + ), + ( + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + ], + ), + AtomicCell( + positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + ], + ), + {'lt': False, 'gt': False, 'eq': True}, ), # same ordered positions and chemical symbols ( AtomicCell( @@ -143,7 +180,7 @@ class TestAtomicCell: AtomsState(chemical_symbol='O'), ], ), - False, + {'lt': False, 'gt': False, 'eq': False}, ), # same ordered positions but different chemical symbols ( AtomicCell( @@ -162,38 +199,41 @@ class TestAtomicCell: AtomsState(chemical_symbol='H'), ], ), - True, - ), # different ordered positions but same chemical symbols - ], - ) - def test_is_equal_cell(self, cell_1: Cell, cell_2: Cell, result: bool): - """ - Test the `is_equal_cell` methods of `AtomicCell`. - """ - assert cell_1.is_equal_cell(other=cell_2) == result - - @pytest.mark.parametrize( - 'atomic_cell, result', - [ - (AtomicCell(), []), - (AtomicCell(atoms_state=[AtomsState(chemical_symbol='H')]), ['H']), + {'lt': False, 'gt': False, 'eq': True}, + ), # same position-symbol map, different overall order ( AtomicCell( + positions=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + ], + ), + AtomicCell( + positions=[[1, 0, 0], [0, 0, 1], [0, 1, 0]], atoms_state=[ AtomsState(chemical_symbol='H'), - AtomsState(chemical_symbol='Fe'), + AtomsState(chemical_symbol='H'), AtomsState(chemical_symbol='O'), - ] + ], ), - ['H', 'Fe', 'O'], - ), + {'lt': False, 'gt': False, 'eq': False}, + ), # different position-symbol map ], ) - def test_get_chemical_symbols(self, atomic_cell: AtomicCell, result: list[str]): + def test_partial_order( + self, cell_1: 'Cell', cell_2: 'Cell', result: dict[str, bool] + ): """ - Test the `get_chemical_symbols` method of `AtomicCell`. + Test the comparison operators of `Cell` and `AtomicCell`. """ - assert atomic_cell.get_chemical_symbols(logger=logger) == result + assert cell_1.is_lt_cell(cell_2) == result['lt'] + assert cell_1.is_gt_cell(cell_2) == result['gt'] + assert cell_1.is_le_cell(cell_2) == (result['lt'] or result['eq']) + assert cell_1.is_ge_cell(cell_2) == (result['gt'] or result['eq']) + assert cell_1.is_equal_cell(cell_2) == result['eq'] + assert cell_1.is_ne_cell(cell_2) == (not result['eq']) @pytest.mark.parametrize( 'chemical_symbols, atomic_numbers, formula, lattice_vectors, positions, periodic_boundary_conditions',