Skip to content

Commit

Permalink
Merge pull request #5 from vanderhe/atomicGradientWeights
Browse files Browse the repository at this point in the history
Add optional atom-resolved weighting of training gradients
  • Loading branch information
vanderhe authored Oct 22, 2021
2 parents f8b62d7 + 33202ab commit 38660c8
Show file tree
Hide file tree
Showing 30 changed files with 243 additions and 3 deletions.
30 changes: 28 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,43 @@ results obtained by the neural network implementation
Installation
============

|build status|
Please note, that this package has been tested for Python 3.X support. Its usage
additionally requires

- `numerical Python <https://numpy.org/doc/stable/reference/>`_ (`numpy`)
- `pythonic HDF5 <http://www.h5py.org/>`_ (`h5py`)
- `Atomic Simulation Environment <https://wiki.fysik.dtu.dk/ase/>`_ (`ase`)

as well as the `pytest` framework in order to run the regression tests.

Via the Python Package Index
----------------------------

The package can be downloaded and installed via pip into the active Python
interpreter (preferably using a virtual python environment) by::
interpreter (preferably using a virtual python environment) by ::

pip install fortnet-python

or into the user space issueing::

pip install --user fortnet-python

Locally from Source
-------------------

Alternatively, you can install it locally from source, i.e. from the root folder
of the project::

python -m pip install .

Testing
=======

The regression testsuite utilizes the `pytest` framework and may be executed by
::

python -m pytest --basetemp=Testing

Documentation
=============

Expand Down
76 changes: 76 additions & 0 deletions src/fortformat/fnetdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ def __init__(self, atoms=None, features=None, targets=None, atomic=False):

self._weights = np.ones((self._nsystems,), dtype=int)

if self._withatoms:
self._atomicweights = []
for entry in self._atoms:
self._atomicweights.append(np.ones((len(entry),), dtype=float))
elif self._withfeatures:
self._atomicweights = []
for entry in self._features:
self._atomicweights.append(
np.ones((entry.shape[0],), dtype=float))
else:
self._atomicweights = None


def _process_data(self):
'''Based on the stored data, a list of dictionaries,
Expand Down Expand Up @@ -216,6 +228,7 @@ def _create_contiguous_hdf(self, fname, data, zz):
subroot = datagrp.create_group('datapoint{}'.format(isys + 1))

hdf_append_weight(subroot, self._weights[isys])
hdf_append_atomicweights(subroot, self._atomicweights[isys])

if self._withatoms:
hdf_append_geometry(subroot, data[isys], True)
Expand Down Expand Up @@ -279,6 +292,55 @@ def weights(self, weights):
self._weights = weights


@property
def atomicweights(self):
'''Defines property, providing the gradient weight of each atom.
Returns:
atomicweights (list): float-valued list of atomic gradient weights
'''

return self._atomicweights


@atomicweights.setter
def atomicweights(self, atomicweights):
'''Sets user-specified gradient weighting of each atom.'''

# enable providing arrays of several dtypes
for ii, entry in enumerate(atomicweights):
atomicweights[ii] = np.array(entry, dtype=float)

if not self._withatoms:
msg = 'Trying to set atomic gradient weighting but the object ' + \
'was initialized without geometry information.'
raise FnetdataError(msg)

for weights in atomicweights:
weights = np.array(weights)

if not len(atomicweights) == len(self._atoms):
msg = 'Mismatch in list length of atomic gradient weighting ' + \
'and geometries.'
raise FnetdataError(msg)

# check consistency with geometries and whether (weights >= 0.0)
for isys, weights in enumerate(atomicweights):
if not len(weights) == len(self._atoms[isys]):
msg = 'Mismatch in number of atomic gradient weights and ' + \
'number of atoms of corresponding geometry (index: {}).' \
.format(isys + 1)
raise FnetdataError(msg)
if any(weights < 0.0):
msg = 'Negative atomic gradient weight(s) obtained ' + \
'(index: {}).'.format(isys + 1)
raise FnetdataError(msg)

self._atomicweights = atomicweights


@property
def ndatapoints(self):
'''Defines property, providing the number of datapoints.
Expand Down Expand Up @@ -461,6 +523,20 @@ def hdf_append_weight(root, weight):
root.attrs['weight'] = weight


def hdf_append_atomicweights(root, data):
'''Appends atomic gradient weights to a given in-memory hdf file.
Args:
root (hdf group): hdf group
data (1darray): atomic weights of current datapoint
'''

weights = root.create_dataset('atomicweights', data.shape, dtype='float')
weights[...] = data


def hdf_append_geometry(root, data, frac):
'''Appends geometry information to a given in-memory hdf file.
Expand Down
25 changes: 25 additions & 0 deletions test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,3 +406,28 @@ def get_atomicweights_byatoms(atoms):
weights.append(np.asfarray(np.random.randint(1, 100, natom, dtype=int)))

return weights


def get_batomicweights_byatoms(atoms):
'''Generates dummy properties for regression testing.
Args:
atoms (ASE atoms list): list of ASE Atoms objects
Returns:
weights (list): atomic gradient weighting
'''

# fix random seed for reproduction purposes
np.random.seed(42)
sample = [True, False]

weights = []
for atom in atoms:
natom = len(atom)
weights.append(np.random.choice(sample, size=natom))

return weights
Binary file modified test/references/Fnetdata/_cgeometries.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_atomictargets.hdf5
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_atomictargets_weights.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_extfeatures.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_globaltargets.hdf5
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_globaltargets_weights.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_cgeometries_weights.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_csgeometries.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_extfeatures.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_sgeometries_atomictargets.hdf5
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified test/references/Fnetdata/_sgeometries_atomictargets_weights.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_sgeometries_extfeatures.hdf5
Binary file not shown.
Binary file modified test/references/Fnetdata/_sgeometries_globaltargets.hdf5
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified test/references/Fnetdata/_sgeometries_globaltargets_weights.hdf5
Binary file not shown.
115 changes: 114 additions & 1 deletion test/test_fnetdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from fortformat import Fnetdata

from common import Hdf5, get_cluster_geometries, get_bulk_geometries, \
get_mixed_geometries, get_properties_byatoms, get_atomicweights_byatoms
get_mixed_geometries, get_properties_byatoms, get_atomicweights_byatoms, \
get_batomicweights_byatoms


REFPATH = os.path.join(os.getcwd(), 'test', 'references', 'Fnetdata')
Expand Down Expand Up @@ -272,6 +273,34 @@ def test_cgeometries_atomictargets_atomicweights(tmpdir):
assert equal, 'h5diff reports mismatch in generated datasets.'


def test_cgeometries_atomictargets_batomicweights(tmpdir):
'''Test dataset generation for configuration:
structures: Yes
periodic: No
targets: Yes
atomic targets: Yes
external features: No
manual dataset weights: No
manual gradient weights: Yes
'''

fname = 'cgeometries_atomictargets_batomicweights.hdf5'
catoms = get_cluster_geometries()
batomicweights = get_batomicweights_byatoms(catoms)
targets = get_properties_byatoms(catoms, 3, True)

fnetdata = Fnetdata(atoms=catoms, targets=targets, atomic=True)
fnetdata.atomicweights = batomicweights
fnetdata.dump(os.path.join(tmpdir, fname))

hdf5 = Hdf5(os.path.join(tmpdir, fname))
equal = hdf5.equals(os.path.join(REFPATH, '_' + fname))

assert equal, 'h5diff reports mismatch in generated datasets.'


def test_cgeometries_globaltargets(tmpdir):
'''Test dataset generation for configuration:
Expand Down Expand Up @@ -384,6 +413,34 @@ def test_cgeometries_globaltargets_atomicweights(tmpdir):
assert equal, 'h5diff reports mismatch in generated datasets.'


def test_cgeometries_globaltargets_batomicweights(tmpdir):
'''Test dataset generation for configuration:
structures: Yes
periodic: No
targets: Yes
atomic targets: No
external features: No
manual dataset weights: No
manual gradient weights: Yes
'''

fname = 'cgeometries_globaltargets_batomicweights.hdf5'
catoms = get_cluster_geometries()
batomicweights = get_batomicweights_byatoms(catoms)
targets = get_properties_byatoms(catoms, 3, False)

fnetdata = Fnetdata(atoms=catoms, targets=targets, atomic=False)
fnetdata.atomicweights = batomicweights
fnetdata.dump(os.path.join(tmpdir, fname))

hdf5 = Hdf5(os.path.join(tmpdir, fname))
equal = hdf5.equals(os.path.join(REFPATH, '_' + fname))

assert equal, 'h5diff reports mismatch in generated datasets.'


def test_sgeometries_extfeatures(tmpdir):
'''Test dataset generation for configuration:
Expand Down Expand Up @@ -522,6 +579,34 @@ def test_sgeometries_atomictargets_atomicweights(tmpdir):
assert equal, 'h5diff reports mismatch in generated datasets.'


def test_sgeometries_atomictargets_batomicweights(tmpdir):
'''Test dataset generation for configuration:
structures: Yes
periodic: Yes
targets: Yes
atomic targets: Yes
external features: No
manual dataset weights: No
manual gradient weights: Yes
'''

fname = 'sgeometries_atomictargets_batomicweights.hdf5'
satoms = get_bulk_geometries()
batomicweights = get_batomicweights_byatoms(satoms)
targets = get_properties_byatoms(satoms, 3, True)

fnetdata = Fnetdata(atoms=satoms, targets=targets, atomic=True)
fnetdata.atomicweights = batomicweights
fnetdata.dump(os.path.join(tmpdir, fname))

hdf5 = Hdf5(os.path.join(tmpdir, fname))
equal = hdf5.equals(os.path.join(REFPATH, '_' + fname))

assert equal, 'h5diff reports mismatch in generated datasets.'


def test_sgeometries_globaltargets(tmpdir):
'''Test dataset generation for configuration:
Expand Down Expand Up @@ -634,5 +719,33 @@ def test_sgeometries_globaltargets_atomicweights(tmpdir):
assert equal, 'h5diff reports mismatch in generated datasets.'


def test_sgeometries_globaltargets_batomicweights(tmpdir):
'''Test dataset generation for configuration:
structures: Yes
periodic: Yes
targets: Yes
atomic targets: No
external features: No
manual dataset weights: No
manual gradient weights: Yes
'''

fname = 'sgeometries_globaltargets_batomicweights.hdf5'
satoms = get_bulk_geometries()
batomicweights = get_batomicweights_byatoms(satoms)
targets = get_properties_byatoms(satoms, 3, False)

fnetdata = Fnetdata(atoms=satoms, targets=targets, atomic=False)
fnetdata.atomicweights = batomicweights
fnetdata.dump(os.path.join(tmpdir, fname))

hdf5 = Hdf5(os.path.join(tmpdir, fname))
equal = hdf5.equals(os.path.join(REFPATH, '_' + fname))

assert equal, 'h5diff reports mismatch in generated datasets.'


if __name__ == '__main__':
pytest.main()

0 comments on commit 38660c8

Please sign in to comment.