diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..a3ed7f4 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +# Codecov configuration to make it a bit less noisy +coverage: + status: + patch: false + project: + default: + threshold: 50% +comment: + layout: "header" + require_changes: false + branches: null + behavior: default + flags: null + paths: null \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..568769d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI +on: [push, pull_request] + +env: + HSD_PYTHON_VERSION: '0.1' + +jobs: + test: + + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + + - name: Install requirements (PIP) + run: pip3 install pytest sphinx numpy build + + - name: Setup up root directory + run: echo "PACKAGE_ROOT=${PWD}/src" >> $GITHUB_ENV + + - name: Build and install package + run: | + python -m build + pip install dist/hsd_python*.whl + python -c "import hsd; assert hsd.__version__ == '${HSD_PYTHON_VERSION}'" + + - name: Run test pytest + run: python3 -m pytest + + - name: Run doctest + run: cd docs; make doctest diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..a17433d --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,12 @@ +# Configure LGTM for this package + +extraction: + python: # Configure Python + python_setup: # Configure the setup + version: 3 # Specify Version 3 +path_classifiers: + library: + - src/versioneer.py # Set Versioneer.py to an external "library" (3rd party code) + - devtools/* + generated: + - src/hsd/_version.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8cb46fe --- /dev/null +++ b/.travis.yml @@ -0,0 +1,48 @@ +language: python + +# Run jobs on container-based infrastructure, can be overridden per job + +matrix: + include: + # Extra includes for OSX since python language is not available by default on OSX + - os: osx + language: generic + env: PYTHON_VER=3.6 + - os: osx + language: generic + env: PYTHON_VER=3.7 + + + # Pip can use Travis build-in Python + - os: linux + python: 3.6 + - os: linux + dist: xenial # Travis Trusty image does not have Python 3.7, Xenial does + python: 3.7 + + +before_install: + # Additional info about the build + - uname -a + - df -h + - ulimit -a + + # Install the Python environment + - source devtools/travis-ci/before_install.sh + - python -V + +install: + + # Install the package locally + - pip install -U pytest pytest-cov codecov + - pip install -e src/ + + +script: + - pytest -v --cov=hsd test/ + +notifications: + email: false + +after_success: + - codecov diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..6284212 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,14 @@ +========== +Change Log +========== + + +0.1 +=== + +Added +----- + +* Basic functionality to manipulate HSD-data in Python. + +* Pip installation diff --git a/LICENSE b/LICENSE index c1beed0..10c023d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2020 Bálint Aradi, Universität Bremen +Copyright (c) 2011-2021 DFTB+ developers group All rights reserved. diff --git a/README.rst b/README.rst index eb7089d..902ec94 100644 --- a/README.rst +++ b/README.rst @@ -1,23 +1,43 @@ -************************************ -HSD — Human-friendly Structured Data -************************************ +********************************************** +HSD — Make your structured data human friendly +********************************************** -This Python package contains utilities to write (and soon also to read) files in -the Human-friendly Structured Data (HSD) format. +Utilities to read and write files in the Human-friendly Structured Data (HSD) +format. -It is licensed under the *BSD 2-clause license*. +The HSD-format is very similar to both JSON and YAML, but tries to minimize the +effort for **humans** to read and write it. It ommits special characters as much +as possible (in contrast to JSON) and is not indentation dependent (in contrast +to YAML). It was developed originally as the input format for the scientific +simulation tool (`DFTB+ `_), but is +of general purpose. Data stored in HSD can be easily mapped to a subset of JSON +or XML and vica versa. +Detailed `documentation `_ can be found on +`Read the Docs `_. -The HSD format -============== -The HSD-format is very similar to both JSON and XML, but tries to minimize the -effort for humans to read and write it. It ommits special characters as much as -possible but (in contrast to YAML for example) is not indentation dependent. +Installation +============ + +The package can be installed via conda-forge:: + + conda install --channel "conda-forge" hsd-python + +Alternatively, the package can be downloaded and installed via pip into the +active Python interpreter (preferably using a virtual python environment) by :: + + pip install hsd + +or into the user space issueing :: -It was developed originally developed as the input format for a scientific -simulation tool (DFTB+), but is absolutely general. A typical input written in -HSD would look like :: + pip install --user hsd + + +Quick tutorial +============== + +A typical, self-explaining input written in HSD looks like :: driver { conjugate_gradients { @@ -35,11 +55,13 @@ HSD would look like :: } filling { fermi { - temperature [kelvin] = 1e-8 + # This is comment which will be ignored + # Note the attribute (unit) of the field below + temperature [kelvin] = 100 } } k_points_and_weights { - supercell_folding = { + supercell_folding { 2 0 0 0 2 0 0 0 2 @@ -49,12 +71,56 @@ HSD would look like :: } } -Content in HSD format can be represented as JSON. Content in JSON format can be -represented as HSD, provided it satisfies a restriction for arrays: Either all -elements of an array must be objects or none of them. (This allows for a clear -separation of structure and data and allows for the very simple input format.) +The above input can be parsed into a Python dictionary with:: + + import hsd + hsdinput = hsd.load("test.hsd") + +The dictionary ``hsdinput`` will then look as:: + + { + "driver": { + "conjugate_gradients" { + "moved_atoms": [1, 2, "7:19"], + "max_steps": 100 + } + }, + "hamiltonian": { + "dftb": { + "scc": True, + "scc_tolerance": 1e-10, + "mixer": { + "broyden": {} + }, + "filling": { + "fermi": { + "temperature": 100, + "temperature.attrib": "kelvin" + } + } + "k_points_and_weights": { + "supercell_folding": [ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [0.5, 0.5, 0.5] + ] + } + } + } + } + +Being a simple Python dictionary, it can be easily queried and manipulated in +Python :: + + hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200 + +and then stored again in HSD format :: + + hsd.dump(hsdinput, "test2.hsd") + + +License +======== -Content in HSD format can be represented as XML (DOM-tree). Content in XML can -be converted to HSD, provided it satisfies the restriction that every child has -either data (text) or further children, but never both of them. (Again, this -ensures the simplicity of the input format.) +The hsd-python package is licensed under the `BSD 2-clause license `_. diff --git a/devtools/set_version b/devtools/set_version new file mode 100755 index 0000000..6be38fa --- /dev/null +++ b/devtools/set_version @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +"""Sets a version number in all relevant project files""" + +import sys +import re +import os + +# The pattern the version number must satisfy +VERSION_PATTERN = r'\d+\.\d+(?:\.\d+)?(?:-\w+)?' + +# List of (file name, search pattern, replacement pattern) tuples for all +# the occurancies to be replaced. +FILES_PATTERNS = [('src/hsd/__init__.py', + r'^__version__\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN), + "__version__ = '{version}'"), + ('docs/introduction.rst', + r'hsd-python version[ ]*{}.'.format(VERSION_PATTERN), + 'hsd-python version {shortversion}.'), + ('setup.cfg', + r'version\s*=\s*{}'.format(VERSION_PATTERN), + "version = {version}"), + ('docs/conf.py', + r'release\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN), + "release = '{version}'"), + ('.github/workflows/ci.yml', + r'HSD_PYTHON_VERSION:\s*([\'"]){}\1'.format(VERSION_PATTERN), + "HSD_PYTHON_VERSION: '{version}'"), + ] + + +def main(): + """Main script.""" + + if len(sys.argv) < 2: + sys.stderr.write("Missing version string\n") + sys.exit(1) + + version, shortversion = _get_version_strings(sys.argv[1]) + rootdir = os.path.join(os.path.dirname(sys.argv[0]), '..') + _replace_version_in_files(FILES_PATTERNS, rootdir, version, shortversion) + _replace_version_in_changelog(rootdir, version) + + +def _get_version_strings(version): + """Returns version and the short version as string""" + + match = re.match(VERSION_PATTERN, version) + if match is None: + print("Invalid version string") + sys.exit(1) + + shortversion = '.'.join(version.split('.')[0:2]) + return version, shortversion + + +def _replace_version_in_files(files_patterns, rootdir, version, shortversion): + """Replaces version number in given files with given search/replacement patterns""" + + for fname, regexp, repl in files_patterns: + fname = os.path.join(rootdir, fname) + print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='') + fp = open(fname, 'r') + txt = fp.read() + fp.close() + replacement = repl.format(version=version, shortversion=shortversion) + newtxt, nsub = re.subn(regexp, replacement, txt, flags=re.MULTILINE) + print(nsub) + fp = open(fname, 'w') + fp.write(newtxt) + fp.close() + + +def _replace_version_in_changelog(rootdir, version): + """Replaces the unreleased section in CHANGELOG.rst""" + + fname = os.path.join(rootdir, 'CHANGELOG.rst') + print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='') + fp = open(fname, 'r') + txt = fp.read() + fp.close() + decoration = '=' * len(version) + newtxt, nsub = re.subn( + r'^Unreleased\s*\n=+', version + r'\n' + decoration, txt, + count=1, flags=re.MULTILINE) + print(nsub) + fp = open(fname, 'w') + fp.write(newtxt) + fp.close() + + +if __name__ == '__main__': + main() diff --git a/devtools/travis-ci/before_install.sh b/devtools/travis-ci/before_install.sh new file mode 100755 index 0000000..c918485 --- /dev/null +++ b/devtools/travis-ci/before_install.sh @@ -0,0 +1,21 @@ +# Temporarily change directory to $HOME to install software +pushd . +cd $HOME +# Make sure some level of pip is installed +python -m ensurepip + +if [ "$TRAVIS_OS_NAME" == "osx" ]; then + HOMEBREW_NO_AUTO_UPDATE=1 brew upgrade pyenv + # Pyenv requires minor revision, get the latest + PYENV_VERSION=$(pyenv install --list |grep $PYTHON_VER | sed -n "s/^[ \t]*\(${PYTHON_VER}\.*[0-9]*\).*/\1/p" | tail -n 1) + # Install version + pyenv install $PYENV_VERSION + # Use version for this + pyenv global $PYENV_VERSION + # Setup up path shims + eval "$(pyenv init -)" +fi +pip install --upgrade pip setuptools + +# Restore original directory +popd diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..c8caf59 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,38 @@ +***************** +API documentation +***************** + +.. testsetup:: + + import hsd + + +High level routines +=================== + +.. autofunction:: hsd.load_string + +.. autofunction:: hsd.load + +.. autofunction:: hsd.dump_string + +.. autofunction:: hsd.dump + + +Lower level building blocks +=========================== + +.. autoclass:: hsd.HsdParser + :members: + +.. autoclass:: hsd.HsdEventHandler + :members: + +.. autoclass:: hsd.HsdDictBuilder + :members: + +.. autoclass:: hsd.HsdDictWalker + :members: + +.. autoclass:: hsd.HsdFormatter + :members: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..055c8ee --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,64 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +import doctest + +sys.path.insert(0, os.path.abspath('../src')) + +# -- Project information ----------------------------------------------------- + +project = 'hsd-python' +copyright = '2021, DFTB+ developers group' +author = 'DFTB+ developers group' + +# The full version, including alpha/beta/rc tags +release = '0.1' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.napoleon' +] + +autodoc_member_order = 'bysource' + +doctest_default_flags = doctest.NORMALIZE_WHITESPACE + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/hsd.rst b/docs/hsd.rst new file mode 100644 index 0000000..b211865 --- /dev/null +++ b/docs/hsd.rst @@ -0,0 +1,270 @@ +************** +The HSD format +************** + +General description +=================== + +You can think about the Human-readable Structured Data format as a pleasant +representation of a tree structure. It can represent a subset of what you +can do for example with XML. The following constraints compared +to XML apply: + +* Every node of a tree, which is not empty, either contains further nodes + or data, but never both. + +* Every node may have a single (string) attribute only. + +These constraints allow a very natural looking formatting of the data. + +As an example, let's have a look at a data tree, which represents input +for scientific software. In the XML representation, it could be written as :: + + + + Yes + + + 77 + + + + + +The same information can be encoded in a much more natural and compact form in HSD +format as :: + + Hamiltonian { + Dftb { + Scc = Yes + Filling { + Fermi { + Temperature [Kelvin] = 77 + } + } + } + } + +The content of a node are passed either between an opening and a closing +curly brace or after an equals sign. In the latter case the end of the line will +be the closing delimiter. The attribute (typically the unit of the data +which the node contains) is specified between square brackets after +the node name. + +The equals sign can be used to assign data as a node content (provided +the data fits into one line), or to assign a single child node as content +for a given node. This leads to a compact and expressive notation for those +cases, where (by the semantics of the input) a given node is only allowed to +have a single child node as content. The tree above is a piece of a typical +DFTB+ input, where only one child node is allowed for the nodes ``Hamiltonian`` +and ``Filling``, respectively (They specify the type of the Hamiltonian +and the filling function). By making use of equals signs, the +simplified HSD representation can be as compact as :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +and still represent the same tree. + + +Mapping to dictionaries +======================= + +Being basically a subset of XML, HSD data is best represented as an XML +DOM-tree. However, very often a dictionary representation is more desirable, +especially when the language used to query and manipulate the tree offers +dictionaries as primary data type (e.g. Python). The data in an HSD input +can be easily represented with the help of nested dictionaries and lists. The +input from the previous section would have the following representation as +Python dictionary (or as a JSON formatted input file):: + + { + "Hamiltonian": { + "Dftb": { + "Scc": Yes, + "Filling": { + "Fermi": { + "Temperature": 77, + "Temperature.attrib": "Kelvin" + } + } + } + } + } + +The attribute of a node is stored under a special key containting the name of +the node and the ``.attrib`` suffix. + +One slight complication of the dictionary representation arises in the case +of node which has multiple child nodes with the same name :: + + + + 3 + + 3.3 -1.2 0.9 9.2 + 1.2 -3.4 5.6 -3.3 + + + + 10 + + 1.0 2.0 3.0 4.0 + -1.0 -2.0 -3.0 -4.0 + + + + +While the HSD representation has no problem to cope with the situation :: + + ExternalField { + PointCharges { + GaussianBlurWidth = 3 + CoordsAndCharges { + 3.3 -1.2 0.9 9.2 + 1.2 -3.4 5.6 -3.3 + } + } + PointCharges { + GaussianBlurWidth = 10 + CoordsAndCharges { + 1.0 2.0 3.0 4.0 + -1.0 -2.0 -3.0 -4.0 + } + } + } + +a trick is needed for the dictionary / JSON representation, as multiple keys +with the same name are not allowed in a dictionary. Therefore, the repetitive +nodes will be mapped to one key, which will contain a list of dictionaries +(instead of a single dictionary as in the usual case):: + + { + "ExternalField": { + // Note the list of dictionaries here! + "PointCharges": [ + { + "GaussianBlurWidth": 3, + "CoordsAndCharges": [ + [3.3, -1.2, 0.9, 9.2], + [1.2, -3.4, 5.6, -3.3] + ] + }, + { + "GaussianBlurWidth": 10, + "CoordsAndCharges": [ + [1.0, 2.0, 3.0, 4.0 ], + [-1.0, -2.0, -3.0, -4.0 ] + ] + }, + ] + # Also attributes becomes a list. Due to technialc reasons the + # dictbuilder always creates an attribute list for mulitple nodes, + # even if none of the nodes carries an actual attribute. + "PointCharges.attrib": [None, None] + } + } + +The mapping works in both directions, so that this dictionary (or the JSON file +created from it) can be easily converted back to the HSD form again. + + +Processing related information +============================== + +Additional to the data stored in an HSD-file, further processing related +information can be recorded on demand. The current Python implementation is able +to record following additional data for each HSD node: + +* the line, where the node was defined in the input (helpful for printing out + informative error messages), + +* the name of the HSD node, as found in the input (useful if the tag names are + converted to lower case to ease case-insensitive handling of the input) and + +* whether an equals sign was used to open the block. + +If this information is being recorded, a special key with the +``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON +presentation. The corresponding value will be a dictionary with those +information. + +As an example, let's store the input from the previous section :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +in the file `test.hsd`, parse it and convert the node names to lower case +(to make enable case-insensitive input processing). Using the Python command :: + + inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True) + +will yield the following dictionary representation of the input:: + + { + 'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'}, + 'hamiltonian': { + 'dftb.hsdattrib': {'line': 0, equal: False, 'tag': 'Dftb'}, + 'dftb': { + 'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'}, + 'scc': True, + 'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'}, + 'filling': { + 'fermi.hsdattrib': {'line': 2, 'equal': False, 'tag': 'Fermi'}, + 'fermi': { + 'temperature.attrib': 'Kelvin', + 'temperature.hsdattrib': {'equal': True, 'line': 3, + 'tag': 'Temperature'}, + 'temperature': 77 + } + } + } + } + } + +The recorded line numbers can be used to issue helpful error messages with +information about where the user should search for the problem. +The node names and formatting information about the equal sign ensures +that the formatting is similar to the original HSD, if the data is dumped +into the HSD format again. Dumping the dictionary with :: + + hsd.dump(inpdict, "test2-formatted.hsd", use_hsd_attribs=True) + +would indeed yield :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +which is basically identical with the original input. If the additional +processing information is not recorded when the data is loaded, or +it is not considered when the data is dumped as HSD again :: + + inpdict = hsd.load("test.hsd", lower_tag_names=True) + hsd.dump(inpdict, "test2-unformatted.hsd") + +the resulting formatting will more strongly differ from the original HSD :: + + hamiltonian { + dftb { + scc = Yes + filling { + fermi { + temperature [Kelvin] = 77 + } + } + } + } + +Still nice and readable, but less compact and with different casing. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..e766684 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,15 @@ +.. hsd-python documentation master file, created by + sphinx-quickstart on Mon Sep 13 11:38:29 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +######################## +HSD-python documentation +######################## + +.. toctree:: + :maxdepth: 2 + + introduction + hsd + api diff --git a/docs/introduction.rst b/docs/introduction.rst new file mode 100644 index 0000000..38fd346 --- /dev/null +++ b/docs/introduction.rst @@ -0,0 +1,119 @@ +************ +Introduction +************ + +This package contains utilities to read and write files in the Human-friendly +Structured Data (HSD) format. + +The HSD-format is very similar to XML, JSON and YAML, but tries to minimize the +effort for **humans** to read and write it. It ommits special characters as much +as possible (in contrast to XML and JSON) and is not indentation dependent (in +contrast to YAML). It was developed originally as the input format for the +scientific simulation tool (`DFTB+ `_), +but is of general purpose. Data stored in HSD can be easily mapped to a subset +of JSON, YAML or XML and *vice versa*. + +This document describes hsd-python version 0.1. + + +Installation +============ + +The package can be installed via conda-forge:: + + conda install hsd-python + +Alternatively, the package can be downloaded and installed via pip into the +active Python interpreter (preferably using a virtual python environment) by :: + + pip install hsd + +or into the user space issueing :: + + pip install --user hsd + + +Quick tutorial +============== + +A typical, self-explaining input written in HSD looks like :: + + driver { + conjugate_gradients { + moved_atoms = 1 2 "7:19" + max_steps = 100 + } + } + + hamiltonian { + dftb { + scc = yes + scc_tolerance = 1e-10 + mixer { + broyden {} + } + filling { + fermi { + # This is comment which will be ignored + # Note the attribute (unit) of the field below + temperature [kelvin] = 100 + } + } + k_points_and_weights { + supercell_folding { + 2 0 0 + 0 2 0 + 0 0 2 + 0.5 0.5 0.5 + } + } + } + } + +The above input can be parsed into a Python dictionary with:: + + import hsd + hsdinput = hsd.load("test.hsd") + +The dictionary ``hsdinput`` will then look as:: + + { + "driver": { + "conjugate_gradients" { + "moved_atoms": [1, 2, "7:19"], + "max_steps": 100 + } + }, + "hamiltonian": { + "dftb": { + "scc": True, + "scc_tolerance": 1e-10, + "mixer": { + "broyden": {} + }, + "filling": { + "fermi": { + "temperature": 100, + "temperature.attrib": "kelvin" + } + } + "k_points_and_weights": { + "supercell_folding": [ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [0.5, 0.5, 0.5] + ] + } + } + } + } + +Being a simple Python dictionary, it can be easily queried and manipulated in +Python :: + + hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200 + +and then stored again in HSD format :: + + hsd.dump(hsdinput, "test2.hsd") diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..8084272 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9787c3b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0cf64cf --- /dev/null +++ b/setup.cfg @@ -0,0 +1,31 @@ +[metadata] +name = hsd-python +version = 0.1 +author = DFTB+ developers group +author_email = info@dftbplus.org +url = https://github.com/dftbplus/hsd-python +description = + Tools for reading, writing and manipulating data stored in the human-friendly + structured data (HSD) format +long_description = file: README.rst +long_description_content_type = text/x-rst +license = BSD +license_file = LICENSE +platform = any +classifiers = + Intended Audience :: Developers + License :: OSI Approved :: BSD License + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.7 + +[options] +include_package_data = True +package_dir = + = src +packages = hsd + +[options.packages.find] +where = src diff --git a/src/hsd.py b/src/hsd.py deleted file mode 100644 index d4477c5..0000000 --- a/src/hsd.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python3 -#------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2020 Bálint Aradi, Universität Bremen # -# # -# See the LICENSE file for terms of usage and distribution. # -#------------------------------------------------------------------------------# -# -""" -Provides functionality to convert Python structures to HSD -""" -import io -import numpy as np - -__all__ = ['dump', 'dumps'] - - -_INDENT_STR = " " - -# String quoting delimiters (must be at least two) -_QUOTING_CHARS = "\"'" - -# Suffix for appending attributes -_ATTRIBUTE_SUFFIX = ".attribute" - - -def dump(obj, fobj): - """Serializes an object to a file in HSD format. - - Args: - obj: Object to be serialized in HSD format - fobj: File like object to write the result to. - """ - - if isinstance(obj, dict): - _dump_dict(obj, fobj, "") - else: - msg = "Invalid object type" - raise TypeError(msg) - - -def dumps(obj): - """Serializes an object to string in HSD format. - - Args: - obj: Object to serialize. - - Returns: - HSD formatted string. - """ - result = io.StringIO() - dump(obj, result) - return result.getvalue() - - -def _dump_dict(obj, fobj, indentstr): - for key, value in obj.items(): - if key.endswith(_ATTRIBUTE_SUFFIX): - if key[:-len(_ATTRIBUTE_SUFFIX)] in obj: - continue - else: - msg = "Attribute '{}' without corresponding tag '{}'"\ - .format(key, key[:-len(_ATTRIBUTE_SUFFIX)]) - raise ValueError(msg) - attrib = obj.get(key + _ATTRIBUTE_SUFFIX) - if attrib is None: - attribstr = "" - elif not isinstance(attrib, str): - msg = "Invalid data type ({}) for '{}'"\ - .format(str(type(attrib)), key + ".attribute") - raise ValueError(msg) - else: - attribstr = " [" + attrib + "]" - if isinstance(value, dict): - if value: - fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict(value, fobj, indentstr + _INDENT_STR) - fobj.write("{}}}\n".format(indentstr)) - else: - fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr)) - elif isinstance(value, list) and value and isinstance(value[0], dict): - for item in value: - fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict(item, fobj, indentstr + _INDENT_STR) - fobj.write("{}}}\n".format(indentstr)) - else: - valstr = _get_hsd_rhs(value, indentstr) - fobj.write("{}{}{} {}\n"\ - .format(indentstr, key, attribstr, valstr)) - - -def _get_hsd_rhs(obj, indentstr): - - if isinstance(obj, list): - objstr = _list_to_hsd(obj) - elif isinstance(obj, np.ndarray): - objstr = _list_to_hsd(obj.tolist()) - else: - objstr = _item_to_hsd(obj) - if "\n" in objstr: - newline_indent = "\n" + indentstr + _INDENT_STR - rhs = ("= {" + newline_indent + objstr.replace("\n", newline_indent) - + "\n" + indentstr + "}") - else: - rhs = "= " + objstr - return rhs - - -def _list_to_hsd(lst): - if lst and isinstance(lst[0], list): - lines = [] - for innerlist in lst: - lines.append(" ".join([_item_to_hsd(item) for item in innerlist])) - return "\n".join(lines) - return " ".join([_item_to_hsd(item) for item in lst]) - - -def _item_to_hsd(item): - - if isinstance(item, (int, float)): - return str(item) - elif isinstance(item, bool): - return "Yes" if item else "No" - elif isinstance(item, str): - return _str_to_hsd(item) - else: - msg = "Data type {} can not be converted to HSD string"\ - .format(type(item)) - raise TypeError(msg) - - -def _str_to_hsd(string): - is_present = [qc in string for qc in _QUOTING_CHARS] - if sum(is_present) > 1: - msg = "String '{}' can not be quoted correctly".format(string) - raise ValueError(msg) - delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1] - return delimiter + string + delimiter - - - -if __name__ == "__main__": - INPUT = { - "Driver": {}, - "Hamiltonian": { - "DFTB": { - "Scc": True, - "SccTolerance": 1e-10, - "MaxSccIterations": 1000, - "Mixer": { - "Broyden": {} - }, - "MaxAngularMomentum": { - "O": "p", - "H": "s" - }, - "Filling": { - "Fermi": { - "Temperature": 1e-8, - "Temperature.attribute": "Kelvin" - } - }, - "KPointsAndWeights": { - "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2], - [0.5, 0.5, 0.5]] - }, - "ElectricField": { - "PointCharges": { - "CoordsAndCharges": np.array( - [[-0.94, -9.44, 1.2, 1.0], - [-0.94, -9.44, 1.2, -1.0]]) - } - }, - "SelectSomeAtoms": [1, 2, "3:-3"] - } - }, - "Analysis": { - "ProjectStates": { - "Region": [ - { - "Atoms": [1, 2, 3], - "Label": "region1", - }, - { - "Atoms": np.array([1, 2, 3]), - "Label": "region2", - } - ] - } - } - } - print(dumps(INPUT)) diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py new file mode 100644 index 0000000..4faafbd --- /dev/null +++ b/src/hsd/__init__.py @@ -0,0 +1,18 @@ +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Toolbox for reading, writing and manipulating HSD-data. +""" +from hsd.common import HSD_ATTRIB_LINE, HSD_ATTRIB_EQUAL, HSD_ATTRIB_SUFFIX,\ + HSD_ATTRIB_NAME, HsdError +from hsd.dict import HsdDictBuilder, HsdDictWalker +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter +from hsd.formatter import HsdFormatter +from hsd.io import load, load_string, dump, dump_string +from hsd.parser import HsdParser + +__version__ = '0.1' diff --git a/src/hsd/common.py b/src/hsd/common.py new file mode 100644 index 0000000..80b93ca --- /dev/null +++ b/src/hsd/common.py @@ -0,0 +1,51 @@ +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Implements common functionalities for the HSD package +""" +try: + import numpy as np +except ModuleNotFoundError: + np = None + + + +class HsdError(Exception): + """Base class for exceptions in the HSD package.""" + + +def unquote(txt): + """Giving string without quotes if enclosed in those.""" + if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]: + return txt[1:-1] + return txt + + +# Name for default attribute (when attribute name is not specified) +DEFAULT_ATTRIBUTE = "unit" + +# Suffix to mark attribute +ATTRIB_SUFFIX = ".attrib" + +# Suffix to mark hsd processing attributes +HSD_ATTRIB_SUFFIX = ".hsdattrib" + +# HSD attribute containing the original tag name +HSD_ATTRIB_NAME = "name" + +# HSD attribute containing the line number +HSD_ATTRIB_LINE = "line" + +# HSD attribute marking that a node is equal to its only child (instead of +# containing it) +HSD_ATTRIB_EQUAL = "equal" + +# String quoting delimiters (must be at least two) +QUOTING_CHARS = "\"'" + +# Special characters +SPECIAL_CHARS = "{}[]= " diff --git a/src/hsd/dict.py b/src/hsd/dict.py new file mode 100644 index 0000000..2909184 --- /dev/null +++ b/src/hsd/dict.py @@ -0,0 +1,267 @@ +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Contains an event-driven builder for dictionary based (JSON-like) structure +""" +import re +from typing import List, Tuple, Union +from hsd.common import HSD_ATTRIB_NAME, np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ + QUOTING_CHARS, SPECIAL_CHARS +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter + +_ItemType = Union[float, int, bool, str] + +_DataType = Union[_ItemType, List[_ItemType]] + +# Pattern to transform HSD string values into actual Python data types +_TOKEN_PATTERN = re.compile(r""" +# Integer +(?:\s*(?:^|(?<=\s))(?P[+-]?[0-9]+)(?:\s*$|\s+)) +| +# Floating point +(?:\s*(?:^|(?<=\s)) +(?P[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) +| +# Logical (Yes/No) +(?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) +| +# Quoted string +(?:\s*(?:(?P(?P['"]).*?(?P=quote)) +| +# Unquoted string +(?P.+?))(?:$|\s+)) +""", re.VERBOSE | re.MULTILINE) + + +class HsdDictBuilder(HsdEventHandler): + """Specific HSD event handler, which builds a nested Python dictionary. + + Args: + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, with one list for + every line (default). + lower_tag_names: Whether tag names should be all converted to lower case (to ease case + insensitive processing). Default: False. If set and include_hsd_attribs is also set, + the original tag names can be retrieved from the "name" hsd attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like + original tag name, line information, etc.) should be stored (default: False). + """ + + def __init__(self, flatten_data: bool = False, lower_tag_names: bool = False, + include_hsd_attribs: bool = False): + super().__init__() + self._hsddict: dict = {} + self._curblock: dict = self._hsddict + self._parentblocks: List[dict] = [] + self._data: Union[None, _DataType] = None + self._attribs: List[Tuple[str, dict]] = [] + self._flatten_data: bool = flatten_data + self._lower_tag_names: bool = lower_tag_names + self._include_hsd_attribs: bool = include_hsd_attribs + + + @property + def hsddict(self): + """The dictionary which has been built""" + return self._hsddict + + + def open_tag(self, tagname, attrib, hsdattrib): + if self._data is not None: + msg = f"Node '{tagname}' opened in an invalid context" + raise HsdError(msg) + self._attribs.append((attrib, hsdattrib)) + self._parentblocks.append(self._curblock) + self._curblock = {} + + + def close_tag(self, tagname): + attrib, hsdattrib = self._attribs.pop(-1) + parentblock = self._parentblocks.pop(-1) + key = tagname.lower() if self._lower_tag_names else tagname + prevcont = parentblock.get(tagname) + + if self._data is not None: + if prevcont is None: + parentblock[key] = self._data + elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): + prevcont.append({None: self._data}) + elif isinstance(prevcont, dict): + parentblock[key] = [prevcont, {None: self._data}] + else: + parentblock[key] = [{None: prevcont}, {None: self._data}] + else: + if prevcont is None: + parentblock[key] = self._curblock + elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): + prevcont.append(self._curblock) + elif isinstance(prevcont, dict): + parentblock[key] = [prevcont, self._curblock] + else: + parentblock[key] = [{None: prevcont}, self._curblock] + + if attrib and prevcont is None: + parentblock[key + ATTRIB_SUFFIX] = attrib + elif prevcont is not None: + prevattrib = parentblock.get(key + ATTRIB_SUFFIX) + if isinstance(prevattrib, list): + prevattrib.append(attrib) + else: + parentblock[key + ATTRIB_SUFFIX] = [prevattrib, attrib] + + if self._include_hsd_attribs: + if self._lower_tag_names: + hsdattrib = {} if hsdattrib is None else hsdattrib + hsdattrib[HSD_ATTRIB_NAME] = tagname + if prevcont is None: + parentblock[key + HSD_ATTRIB_SUFFIX] = hsdattrib + else: + prevhsdattrib = parentblock.get(key + HSD_ATTRIB_SUFFIX) + if isinstance(prevhsdattrib, list): + prevhsdattrib.append(hsdattrib) + else: + parentblock[key + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, hsdattrib] + self._curblock = parentblock + self._data = None + + + def add_text(self, text): + if self._curblock or self._data is not None: + msg = "Data appeared in an invalid context" + raise HsdError(msg) + self._data = self._text_to_data(text) + + + def _text_to_data(self, txt: str) -> _DataType: + data = [] + for line in txt.split("\n"): + if self._flatten_data: + linedata = data + else: + linedata = [] + for match in _TOKEN_PATTERN.finditer(line.strip()): + if match.group("int"): + linedata.append(int(match.group("int"))) + elif match.group("float"): + linedata.append(float(match.group("float"))) + elif match.group("logical"): + lowlog = match.group("logical").lower() + linedata.append(lowlog == "yes") + elif match.group("str"): + linedata.append(match.group("str")) + elif match.group("qstr"): + linedata.append(match.group("qstr")) + if not self._flatten_data: + data.append(linedata) + if len(data) == 1: + if isinstance(data[0], list) and len(data[0]) == 1: + return data[0][0] + return data[0] + return data + + + +class HsdDictWalker: + """Walks through a Python dictionary and triggers HSD events. + + Args: + eventhandler: Event handler dealing with the HSD events generated while + walking through the dictionary. When not specified, the events + are printed. + """ + + def __init__(self, eventhandler: HsdEventHandler = None): + + if eventhandler is None: + self._eventhandler: HsdEventHandler = HsdEventPrinter() + else: + self._eventhandler: HsdEventHandler = eventhandler + + + def walk(self, dictobj): + """Walks through the directory and generates HSD events. + + Args: + dictobj: Directory to walk through. + """ + + for key, value in dictobj.items(): + + if key.endswith(ATTRIB_SUFFIX) or key.endswith(HSD_ATTRIB_SUFFIX): + continue + + hsdattrib = dictobj.get(key + HSD_ATTRIB_SUFFIX) + attrib = dictobj.get(key + ATTRIB_SUFFIX) + + if isinstance(value, dict): + + self._eventhandler.open_tag(key, attrib, hsdattrib) + self.walk(value) + self._eventhandler.close_tag(key) + + elif isinstance(value, list) and value and isinstance(value[0], dict): + for ind, item in enumerate(value): + hsdattr = hsdattrib[ind] if hsdattrib else None + attr = attrib[ind] if attrib else None + self._eventhandler.open_tag(key, attr, hsdattr) + if None in item: + self._eventhandler.add_text(_to_text(item[None])) + else: + self.walk(item) + self._eventhandler.close_tag(key) + + else: + self._eventhandler.open_tag(key, attrib, hsdattrib) + self._eventhandler.add_text(_to_text(value)) + self._eventhandler.close_tag(key) + + +def _to_text(obj): + + if isinstance(obj, list): + objstr = _list_to_hsd(obj) + elif np is not None and isinstance(obj, np.ndarray): + objstr = _list_to_hsd(obj.tolist()) + else: + objstr = _item_to_hsd(obj) + return objstr + + +def _list_to_hsd(lst): + if lst and isinstance(lst[0], list): + lines = [] + for innerlist in lst: + lines.append(" ".join([_item_to_hsd(item) for item in innerlist])) + return "\n".join(lines) + return " ".join([_item_to_hsd(item) for item in lst]) + + +def _item_to_hsd(item): + + if isinstance(item, bool): + return "Yes" if item else "No" + if isinstance(item, (int, float)): + return str(item) + if isinstance(item, str): + return _str_to_hsd(item) + msg = "Data type {} can not be converted to HSD string"\ + .format(type(item)) + raise TypeError(msg) + + +def _str_to_hsd(string): + present = [qc in string for qc in QUOTING_CHARS] + nquotetypes = sum(present) + delimiter = "" + if not nquotetypes and True in [sc in string for sc in SPECIAL_CHARS]: + delimiter = QUOTING_CHARS[0] + elif nquotetypes == 1 and string[0] not in QUOTING_CHARS: + delimiter = QUOTING_CHARS[1] if present[0] else QUOTING_CHARS[0] + elif nquotetypes > 1: + msg = "String '{}' can not be quoted correctly".format(string) + raise ValueError(msg) + return delimiter + string + delimiter diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py new file mode 100644 index 0000000..0ef52cb --- /dev/null +++ b/src/hsd/eventhandler.py @@ -0,0 +1,78 @@ +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group + # # BSD 2-clause license. + # +#--------------------------------------------------------------------------------------------------# +# +""" +Contains an event handler base class. +""" + +from abc import ABC, abstractmethod +from typing import Optional + + +class HsdEventHandler(ABC): + """Abstract base class for handling HSD events.""" + + @abstractmethod + def open_tag(self, tagname: str, attrib: Optional[str], + hsdattrib: Optional[dict]): + """Opens a tag. + + Args: + tagname: Name of the tag which had been opened. + attrib: String containing the attribute of the tag or None. + hsdattrib: Dictionary of the options created during the processing + in the hsd-parser. + """ + + @abstractmethod + def close_tag(self, tagname: str): + """Closes a tag. + + Args: + tagname: Name of the tag which had been closed. + """ + + @abstractmethod + def add_text(self, text: str): + """Adds text (data) to the current tag. + + Args: + text: Text in the current tag. + """ + + + +class HsdEventPrinter(HsdEventHandler): + """Minimal demonstration class for event handlers. + + This specifc implemenation prints the events. Subclassing instances + should override the public methods to customize its behavior. + """ + + def __init__(self): + """Initializes the default event printer.""" + self._indentlevel = 0 + self._indentstr = " " + + + def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + indentstr = self._indentlevel * self._indentstr + print(f"{indentstr}OPENING TAG: {tagname}") + print(f"{indentstr}ATTRIBUTE: {attrib}") + print(f"{indentstr}HSD ATTRIBUTE: {str(hsdattrib)}") + self._indentlevel += 1 + + + def close_tag(self, tagname: str): + self._indentlevel -= 1 + indentstr = self._indentlevel * self._indentstr + print(f"{indentstr}CLOSING TAG: {tagname}") + + + def add_text(self, text: str): + indentstr = self._indentlevel * self._indentstr + print(f"{indentstr}Received text: {text}") diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py new file mode 100644 index 0000000..70c4ca6 --- /dev/null +++ b/src/hsd/formatter.py @@ -0,0 +1,115 @@ +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Provides an event based formatter to create HSD dumps +""" + +from typing import List, TextIO, Union +from hsd.common import HSD_ATTRIB_EQUAL, HSD_ATTRIB_NAME +from hsd.eventhandler import HsdEventHandler + + +_INDENT_STR = " " + + +class HsdFormatter(HsdEventHandler): + """Implements an even driven HSD formatter. + + Args: + fobj: File like object to write the formatted output to. + use_hsd_attribs: Whether HSD attributes passed to the formatter should + be considered, when formatting the the output (default: True) + """ + + def __init__(self, fobj, use_hsd_attribs=True): + super().__init__() + self._fobj: TextIO = fobj + self._use_hsd_attribs: bool = use_hsd_attribs + self._level: int = 0 + self._indent_level: int = 0 + # Whether last node on current level should/was followed by an + # equal sign. (None = unspeciefied) + self._followed_by_equal: List[Union[bool, None]] = [] + self._nr_children: List[int] = [0] + + + def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + + if attrib is None: + attribstr = "" + elif not isinstance(attrib, str): + msg = f"Invalid attribute data type ({str(type(attrib))}) for "\ + f"'{tagname}'" + raise ValueError(msg) + else: + attribstr = " [" + attrib + "]" + + if self._level and not self._nr_children[-1]: + # Look up, whether previous (containing) node should be followed by + # an equal sign + equal = self._followed_by_equal[-1] + if equal: + self._fobj.write(" = ") + indentstr = "" + else: + self._fobj.write(" {\n") + self._indent_level += 1 + indentstr = self._indent_level * _INDENT_STR + else: + indentstr = self._indent_level * _INDENT_STR + + if self._use_hsd_attribs and hsdattrib is not None: + tagname = hsdattrib.get(HSD_ATTRIB_NAME, tagname) + + self._fobj.write(f"{indentstr}{tagname}{attribstr}") + + # Previous (containing) node has now one children more + self._nr_children[-1] += 1 + + # Currently opened node has no children so far. + self._nr_children.append(0) + self._level += 1 + + equal = None + if hsdattrib is not None and self._use_hsd_attribs: + equal = hsdattrib.get(HSD_ATTRIB_EQUAL) + self._followed_by_equal.append(equal) + + + def close_tag(self, tagname: str): + + nr_children = self._nr_children.pop(-1) + equal = self._followed_by_equal.pop(-1) + if not nr_children: + self._fobj.write(" {}\n") + elif not equal: + self._indent_level -= 1 + indentstr = self._indent_level * _INDENT_STR + self._fobj.write(f"{indentstr}}}\n") + self._level -= 1 + + + def add_text(self, text: str): + + equal = self._followed_by_equal[-1] + multiline = "\n" in text + if equal is None and not multiline: + if len(self._followed_by_equal) > 1: + equal = not self._followed_by_equal[-2] + else: + equal = True + if equal: + self._fobj.write(" = ") + self._followed_by_equal[-1] = True + else: + self._indent_level += 1 + indentstr = self._indent_level * _INDENT_STR + self._fobj.write(f" {{\n{indentstr}") + text = text.replace("\n", "\n" + indentstr) + + self._fobj.write(text) + self._fobj.write("\n") + self._nr_children[-1] += 1 diff --git a/src/hsd/io.py b/src/hsd/io.py new file mode 100644 index 0000000..f0f4f22 --- /dev/null +++ b/src/hsd/io.py @@ -0,0 +1,208 @@ +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Provides functionality to dump Python structures to HSD +""" +import io +from typing import Union, TextIO +from hsd.dict import HsdDictWalker, HsdDictBuilder +from hsd.formatter import HsdFormatter +from hsd.parser import HsdParser + + +_INDENT_STR = " " + + + +def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, + include_hsd_attribs: bool = False, flatten_data: bool = False) -> dict: + """Loads a file with HSD-formatted data into a Python dictionary + + Args: + hsdfile: Name of file or file like object to read the HSD data from + lower_tag_names: When set, all tag names will be converted to lower-case + (practical, when input should be treated case insensitive.) If + ``include_hsd_attribs`` is set, the original tag name will be + stored among the HSD attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. Use it, if you wish to keep the formatting of the data + close to the original on writing (e.g. lowered tag names + converted back to their original form, equals signs between parent + and only child kept, instead of converted to curly braces). + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). + + Returns: + Dictionary representing the HSD data. + + Examples: + See :func:`hsd.load_string` for examples of usage. + """ + dictbuilder = HsdDictBuilder(lower_tag_names=lower_tag_names, flatten_data=flatten_data, + include_hsd_attribs=include_hsd_attribs) + parser = HsdParser(eventhandler=dictbuilder) + if isinstance(hsdfile, str): + with open(hsdfile, "r") as hsddescr: + parser.parse(hsddescr) + else: + parser.parse(hsdfile) + return dictbuilder.hsddict + + +def load_string( + hsdstr: str, lower_tag_names: bool = False, + include_hsd_attribs: bool = False, flatten_data: bool = False + ) -> dict: + """Loads a string with HSD-formatted data into a Python dictionary. + + Args: + hsdstr: String with HSD-formatted data. + lower_tag_names: When set, all tag names will be converted to lower-case + (practical, when input should be treated case insensitive.) If + ``include_hsd_attribs`` is set, the original tag name will be + stored among the HSD attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. Use it, if you wish to keep the formatting of the data + close to the original one on writing (e.g. lowered tag names + converted back to their original form, equals signs between parent + and only child kept, instead of converted to curly braces). + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). + + Returns: + Dictionary representing the HSD data. + + Examples: + >>> hsdstr = \"\"\" + ... Dftb { + ... Scc = Yes + ... Filling { + ... Fermi { + ... Temperature [Kelvin] = 100 + ... } + ... } + ... } + ... \"\"\" + >>> hsd.load_string(hsdstr) + {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, + 'Temperature.attrib': 'Kelvin'}}}} + + In order to ease the case-insensitive handling of the input, the tag + names can be converted to lower case during reading using the + ``lower_tag_names`` option. + + >>> hsd.load_string(hsdstr, lower_tag_names=True) + {'dftb': {'scc': True, 'filling': {'fermi': {'temperature': 100, + 'temperature.attrib': 'Kelvin'}}}} + + The original tag names (together with additional information like the + line number of a tag) can be recorded, if the ``include_hsd_attribs`` + option is set: + + >>> data = hsd.load_string(hsdstr, lower_tag_names=True, + ... include_hsd_attribs=True) + + Each tag in the dictionary will have a corresponding ".hsdattrib" entry + with the recorded data: + + >>> data["dftb.hsdattrib"] + {'equal': False, 'line': 1, 'name': 'Dftb'} + + This additional data can be then also used to format the tags in the + original style, when writing the data in HSD-format again. Compare: + + >>> hsd.dump_string(data) + 'dftb {\\n scc = Yes\\n filling {\\n fermi {\\n + temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + + versus + + >>> hsd.dump_string(data, use_hsd_attribs=True) + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n + Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + + """ + fobj = io.StringIO(hsdstr) + return load(fobj, lower_tag_names, include_hsd_attribs, flatten_data) + + +def dump(data: dict, hsdfile: Union[TextIO, str], + use_hsd_attribs: bool = False): + """Dumps data to a file in HSD format. + + Args: + data: Dictionary like object to be written in HSD format + hsdfile: Name of file or file like object to write the result to. + use_hsd_attribs: Whether HSD attributes in the data structure should + be used to format the output. + + This option can be used to for example to restore original tag + names, if the file was loaded with the ``lower_tag_names`` and + ``include_hsd_attribs`` options set or keep the equal signs + between parent and contained only child. + + Raises: + TypeError: if object is not a dictionary instance. + + Examples: + + See :func:`hsd.load_string` for an example. + """ + if not isinstance(data, dict): + msg = "Invalid object type" + raise TypeError(msg) + if isinstance(hsdfile, str): + with open(hsdfile, "w") as hsddescr: + _dump_dict(data, hsddescr, use_hsd_attribs) + else: + _dump_dict(data, hsdfile, use_hsd_attribs) + + +def dump_string(data: dict, use_hsd_attribs: bool = False) -> str: + """Serializes an object to string in HSD format. + + Args: + data: Dictionary like object to be written in HSD format. + use_hsd_attribs: Whether HSD attributes of the data structure should + be used to format the output (e.g. to restore original mixed case + tag names) + + Returns: + HSD formatted string. + + Examples: + >>> hsdtree = { + ... 'Dftb': { + ... 'Scc': True, + ... 'Filling': { + ... 'Fermi': { + ... 'Temperature': 100, + ... 'Temperature.attrib': 'Kelvin' + ... } + ... } + ... } + ... } + >>> hsd.dump_string(hsdtree) + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n + Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + + See also :func:`hsd.load_string` for an example. + + """ + result = io.StringIO() + dump(data, result, use_hsd_attribs=use_hsd_attribs) + return result.getvalue() + + +def _dump_dict(obj: dict, fobj: TextIO, use_hsd_attribs: bool): + + formatter = HsdFormatter(fobj, use_hsd_attribs=use_hsd_attribs) + walker = HsdDictWalker(formatter) + walker.walk(obj) diff --git a/src/hsd/parser.py b/src/hsd/parser.py new file mode 100644 index 0000000..ae66efe --- /dev/null +++ b/src/hsd/parser.py @@ -0,0 +1,315 @@ +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +""" +Contains the event-generating HSD-parser. +""" +from typing import Optional, TextIO, Union +from hsd import common +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter + + +SYNTAX_ERROR = 1 +UNCLOSED_TAG_ERROR = 2 +UNCLOSED_ATTRIB_ERROR = 3 +UNCLOSED_QUOTATION_ERROR = 4 +ORPHAN_TEXT_ERROR = 5 + +_GENERAL_SPECIALS = "{}[]<=\"'#;" + +_ATTRIB_SPECIALS = "]\"'" + + +class HsdParser: + """Event based parser for the HSD format. + + Arguments: + eventhandler: Object which should handle the HSD-events triggered + during parsing. When not specified, HsdEventPrinter() is used. + + Examples: + >>> from io import StringIO + >>> dictbuilder = hsd.HsdDictBuilder() + >>> parser = hsd.HsdParser(eventhandler=dictbuilder) + >>> hsdfile = StringIO(\"\"\" + ... Hamiltonian { + ... Dftb { + ... Scc = Yes + ... Filling = Fermi { + ... Temperature [Kelvin] = 100 + ... } + ... } + ... } + ... \"\"\") + >>> parser.parse(hsdfile) + >>> dictbuilder.hsddict + {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': + {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}} + """ + + def __init__(self, eventhandler: Optional[HsdEventHandler] = None): + """Initializes the parser. + + Args: + eventhandler: Instance of the HsdEventHandler class or its children. + """ + if eventhandler is None: + self._eventhandler = HsdEventPrinter() + else: + self._eventhandler = eventhandler + + self._fname = "" # name of file being processed + self._checkstr = _GENERAL_SPECIALS # special characters to look for + self._oldcheckstr = "" # buffer fo checkstr + self._opened_tags = [] # info about opened tags + self._buffer = [] # buffering plain text between lines + self._attrib = None # attribute for current tag + self._hsdattrib = {} # hsd-options for current tag + self._currline = 0 # nr. of current line in file + self._after_equal_sign = False # last tag was opened with equal sign + self._inside_attrib = False # parser inside attrib specification + self._inside_quote = False # parser inside quotation + self._has_child = True # Whether current node has a child already + self._has_text = False # whether current node contains text already + self._oldbefore = "" # buffer for tagname + + + def parse(self, fobj: Union[TextIO, str]): + """Parses the provided file-like object. + + The parser will process the data and trigger the corresponding events + in the eventhandler which was passed at initialization. + + Args: + fobj: File like object or name of a file containing the data. + """ + isfilename = isinstance(fobj, str) + if isfilename: + fp = open(fobj, "r") + self._fname = fobj + else: + fp = fobj + for line in fp.readlines(): + self._parse(line) + self._currline += 1 + if isfilename: + fp.close() + + # Check for errors + if self._opened_tags: + line0 = self._opened_tags[-1][1] + else: + line0 = 0 + if self._inside_quote: + self._error(UNCLOSED_QUOTATION_ERROR, (line0, self._currline)) + elif self._inside_attrib: + self._error(UNCLOSED_ATTRIB_ERROR, (line0, self._currline)) + elif self._opened_tags: + self._error(UNCLOSED_TAG_ERROR, (line0, line0)) + elif ("".join(self._buffer)).strip(): + self._error(ORPHAN_TEXT_ERROR, (line0, self._currline)) + + + def _parse(self, line): + """Parses a given line.""" + + while True: + sign, before, after = _splitbycharset(line, self._checkstr) + + # End of line + if not sign: + if self._inside_quote: + self._buffer.append(before) + elif self._after_equal_sign: + self._text("".join(self._buffer) + before.strip()) + self._closetag() + self._after_equal_sign = False + elif not self._inside_attrib: + self._buffer.append(before) + elif before.strip(): + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + break + + # Special character is escaped + elif before.endswith("\\") and not before.endswith("\\\\"): + self._buffer.append(before + sign) + + # Equal sign + elif sign == "=": + # Ignore if followed by "{" (DFTB+ compatibility) + if after.lstrip().startswith("{"): # _oldbefore may already contain the tagname, if the # tagname was followed by an attribute -> append + self._oldbefore += before + else: + self._hsdattrib[common.HSD_ATTRIB_EQUAL] = True + self._starttag(before, False) + self._after_equal_sign = True + + # Opening tag by curly brace + elif sign == "{": + #self._has_child = True + self._hsdattrib[common.HSD_ATTRIB_EQUAL] = False + self._starttag(before, self._after_equal_sign) + self._buffer = [] + self._after_equal_sign = False + + # Closing tag by curly brace + elif sign == "}": + self._text("".join(self._buffer) + before) + self._buffer = [] + # If 'test { a = 12 }' occurs, curly brace closes two tags + if self._after_equal_sign: + self._after_equal_sign = False + self._closetag() + self._closetag() + + # Closing tag by semicolon + elif sign == ";" and self._after_equal_sign: + self._after_equal_sign = False + self._text(before) + self._closetag() + + # Comment line + elif sign == "#": + self._buffer.append(before) + after = "" + + # Opening attribute specification + elif sign == "[": + if "".join(self._buffer).strip(): + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + self._oldbefore = before + self._buffer = [] + self._inside_attrib = True + self._opened_tags.append(("[", self._currline, None, None, None)) + self._checkstr = _ATTRIB_SPECIALS + + # Closing attribute specification + elif sign == "]": + value = "".join(self._buffer) + before + self._attrib = value.strip() + self._inside_attrib = False + self._buffer = [] + self._opened_tags.pop() + self._checkstr = _GENERAL_SPECIALS + + # Quoting strings + elif sign in ("'", '"'): + if self._inside_quote: + self._checkstr = self._oldcheckstr + self._inside_quote = False + self._buffer.append(before + sign) + self._opened_tags.pop() + else: + self._oldcheckstr = self._checkstr + self._checkstr = sign + self._inside_quote = True + self._buffer.append(before + sign) + self._opened_tags.append(('"', self._currline, None, None, None)) + + # Interrupt + elif sign == "<" and not self._after_equal_sign: + txtinc = after.startswith("<<") + hsdinc = after.startswith("<+") + if txtinc: + self._text("".join(self._buffer) + before) + self._buffer = [] + self._eventhandler.add_text(self._include_txt(after[2:])) + break + if hsdinc: + self._include_hsd(after[2:]) + break + self._buffer.append(before + sign) + + else: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + + line = after + + + def _text(self, text): + stripped = text.strip() + if stripped: + if self._has_child: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + self._eventhandler.add_text(stripped) + self._has_text = True + + + def _starttag(self, tagname, closeprev): + txt = "".join(self._buffer) + if txt: + self._text(txt) + if self._has_text: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + tagname_stripped = tagname.strip() + if self._oldbefore: + if tagname_stripped: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + else: + tagname_stripped = self._oldbefore.strip() + if len(tagname_stripped.split()) > 1: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + self._hsdattrib[common.HSD_ATTRIB_LINE] = self._currline + self._eventhandler.open_tag(tagname_stripped, self._attrib, + self._hsdattrib) + self._opened_tags.append( + (tagname_stripped, self._currline, closeprev, True, False)) + self._has_child = False + self._buffer = [] + self._oldbefore = "" + self._attrib = None + self._hsdattrib = {} + + + def _closetag(self): + if not self._opened_tags: + self._error(SYNTAX_ERROR, (0, self._currline)) + self._buffer = [] + tag, _, closeprev, self._has_child, self._has_text = self._opened_tags.pop() + self._eventhandler.close_tag(tag) + if closeprev: + self._closetag() + + + def _include_hsd(self, fname): + fname = common.unquote(fname.strip()) + parser = HsdParser(eventhandler=self._eventhandler) + parser.parse(fname) + + + @staticmethod + def _include_txt(fname): + fname = common.unquote(fname.strip()) + with open(fname, "r") as fp: + txt = fp.read() + return txt + + + def _error(self, errorcode, lines): + error_msg = ( + "Parsing error ({}) between lines {} - {} in file '{}'.".format( + errorcode, lines[0] + 1, lines[1] + 1, self._fname)) + raise common.HsdError(error_msg) + + + +def _splitbycharset(txt, charset): + """Splits a string at the first occurrence of a character in a set. + + Args: + txt: Text to split. + chars: Chars to look for. + + Returns: + Tuple (char, before, after). Char is the character which had been found + (or empty string if nothing was found). Before is the substring before + the splitting character (or the entire string). After is the substring + after the splitting character (or empty string). + """ + for firstpos, char in enumerate(txt): + if char in charset: + return txt[firstpos], txt[:firstpos], txt[firstpos + 1:] + return '', txt, '' diff --git a/test/test.hsd b/test/test.hsd new file mode 100644 index 0000000..20f246b --- /dev/null +++ b/test/test.hsd @@ -0,0 +1,58 @@ +Geometry { + GenFormat = { + 3 C + O H + 1 1 0.0 0.0 0.0 + 2 2 0.0 0.5 0.5 + 3 2 0.0 0.5 -0.5 + } +} +Driver {} +Hamiltonian = DFTB { + Scc = Yes + SccTolerance = 1e-10 + MaxSccIterations = 1000 + Mixer = Broyden {} + MaxAngularMomentum { + H = SelectedShells {"s" "s" } + O = SelectedShells {"s" "p" } + } + Dispersion = SlaterKirkwood { + PolarRadiusCharge [AA^3,AA,] = { + 1.030000 3.800000 2.820000 + } + } + # Adding arbitrary comment, this should not change parsing result + Filling = Fermi { + Temperature [Kelvin] = 1e-08 + } + KPointsAndWeights { + SupercellFolding = { + 2 0 0 + 0 2 0 + 0 0 2 + 0.5 0.5 0.5 + } + } + ElectricField { + PointCharges { + CoordsAndCharges = { + -0.94 -9.44 1.2 1.0 + -0.94 -9.44 1.2 -1.0 + } + } + } + SelectSomeAtoms = 1 2 " 3 : -3 " +} +Analysis { + ProjectStates { + Region { + Atoms = 1 2 3 + Label = "region1" + } + REgion { + Atoms = 1 2 3 + Label = "region2" + } + } +} diff --git a/test/test_dict.py b/test/test_dict.py new file mode 100644 index 0000000..ebdfef2 --- /dev/null +++ b/test/test_dict.py @@ -0,0 +1,202 @@ +#!/bin/env python3 +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +"""Tests for the dictbuilder class""" + +import io +import pytest +import hsd + +# Some abbreviations +_HSD_LINE = hsd.HSD_ATTRIB_LINE +_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL +_HSD_NAME = hsd.HSD_ATTRIB_NAME + + +# General test list format for valid tests +# [("Test name", ([List of HSD events], expected dictionary outcome))] + +# Tests without hsd attribute recording +_TESTS_NO_HSDATTRIB = [ + ( + "Simple", ( + "Test {}", + {"Test": {}}, + ) + ), + ( + "Data with quoted strings", ( + "O = SelectedShells { \"s\" \"p\" }", + {"O": {"SelectedShells": ['"s"', '"p"']}}, + ) + ), + ( + "Attribute containing comma", ( + "PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}", + {"PolarRadiusCharge": [1.03, 3.8, 2.82], "PolarRadiusCharge.attrib": "AA^3,AA,"}, + ) + ), + ( + "Duplicate node entry", ( + "a { b = 1 }\na { b = 2 }\n", + {"a.attrib": [None, None], "a": [{"b": 1}, {"b": 2}]}, + ) + ), + ( + "Duplicate value entry", ( + "a = 1\na = 2", + {"a.attrib": [None, None], "a": [{None: 1}, {None: 2}]}, + ) + ), +] +_TESTS_NO_HSDATTRIB_NAMES, _TESTS_NO_HSDATTRIB_CASES = zip(*_TESTS_NO_HSDATTRIB) + + +# Tests with HSD attribute recording +# The input string should be formatted the same way as it comes out from the formatter since +# these tests are also used to test backwards direction (dictionary -> string). +_TESTS_HSDATTRIB = [ + ( + "Simple", ( + "Test {}\n", + {"Test.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, "Test": {}} + ) + ), + ( + "Data with quoted strings", ( + "O = SelectedShells {\n \"s\" \"p\"\n}\n", + { + "O.hsdattrib": {_HSD_EQUAL: True, _HSD_LINE: 0}, + "O": { + "SelectedShells.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, + "SelectedShells": ['"s"', '"p"'] + } + } + ) + ), + ( + "Duplicate node", ( + "a {\n b = 1\n}\na {\n b = 2\n}\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: False}, + {_HSD_LINE: 3, _HSD_EQUAL: False}], + "a.attrib": [None, None], + "a": [ + {"b.hsdattrib": {_HSD_LINE: 1, _HSD_EQUAL: True}, "b": 1}, + {"b.hsdattrib": {_HSD_LINE: 4, _HSD_EQUAL: True}, "b": 2} + ] + }, + ) + ), + ( + "Duplicate value", ( + "a = 1\na = 2\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}], + "a.attrib": [None, None], + "a": [{None: 1}, {None: 2}] + }, + ) + ), + ( + "Triple value with attrib", ( + "a = 1\na = 2\na [someunit] {\n 3\n}\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}, + {_HSD_LINE: 2, _HSD_EQUAL: False}], + "a.attrib": [None, None, "someunit"], + "a": [{None: 1}, {None: 2}, {None: 3}] + }, + ) + ), + +] +_TESTS_HSDATTRIB_NAMES, _TESTS_HSDATTRIB_CASES = zip(*_TESTS_HSDATTRIB) + + +# Tests with HSD attribute recording and tag name lowering switched on +# The input string should be formatted the same way as it comes out from the formatter since +# these tests are also used to test backwards direction (dictionary -> string). +_TESTS_HSDATTRIB_LOWER = [ + ( + "Simple", ( + "Test {}\n", + {"test.hsdattrib": {_HSD_NAME: "Test", _HSD_LINE: 0, _HSD_EQUAL: False}, "test": {}} + ) + ), +] +_TESTS_HSDATTRIB_LOWER_NAMES, _TESTS_HSDATTRIB_LOWER_CASES = zip(*_TESTS_HSDATTRIB_LOWER) + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_NO_HSDATTRIB_CASES, + ids=_TESTS_NO_HSDATTRIB_NAMES +) +def test_dict_builder_nohsdattr(hsdstr, hsddict): + """Test transformation from hsd to dictionary without HSD attributes.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False) + parser = hsd.HsdParser(eventhandler=dictbuilder) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_CASES, + ids=_TESTS_HSDATTRIB_NAMES +) +def test_dict_builder_hsdattr(hsdstr, hsddict): + """Test transformation from hsd to dictionary with HSD attributes.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True) + parser = hsd.HsdParser(eventhandler=dictbuilder) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_LOWER_CASES, + ids=_TESTS_HSDATTRIB_LOWER_NAMES +) +def test_dict_builder_hsdattr_lower(hsdstr, hsddict): + """Test transformation from hsd to dictionary with HSD attributes and case lowering.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True, lower_tag_names=True) + parser = hsd.HsdParser(eventhandler=dictbuilder) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_CASES, + ids=_TESTS_HSDATTRIB_NAMES +) +def test_dict_walker_hsdattr(hsdstr, hsddict): + """Test transformation from dictionary to string using HSD attributes.""" + output = io.StringIO() + formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) + dictwalker = hsd.HsdDictWalker(formatter) + dictwalker.walk(hsddict) + assert output.getvalue() == hsdstr + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_LOWER_CASES, + ids=_TESTS_HSDATTRIB_LOWER_NAMES +) +def test_dict_walker_hsdattr_lower(hsdstr, hsddict): + """Test transformation from dictionary to string using HSD attributes.""" + output = io.StringIO() + formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) + dictwalker = hsd.HsdDictWalker(formatter) + dictwalker.walk(hsddict) + assert output.getvalue() == hsdstr diff --git a/test/test_dump.py b/test/test_dump.py new file mode 100644 index 0000000..ff2fd29 --- /dev/null +++ b/test/test_dump.py @@ -0,0 +1,61 @@ +#!/bin/env python3 +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +import numpy as np +import hsd + +if __name__ == "__main__": + INPUT = { + "Driver": {}, + "Hamiltonian": { + "DFTB": { + "Scc": True, + "SccTolerance": 1e-10, + "MaxSccIterations": 1000, + "Mixer": { + "Broyden": {} + }, + "MaxAngularMomentum": { + "O": "p", + "H": "s" + }, + "Filling": { + "Fermi": { + "Temperature": 1e-8, + "Temperature.attribute": "Kelvin" + } + }, + "KPointsAndWeights": { + "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2], + [0.5, 0.5, 0.5]] + }, + "ElectricField": { + "PointCharges": { + "CoordsAndCharges": np.array( + [[-0.94, -9.44, 1.2, 1.0], + [-0.94, -9.44, 1.2, -1.0]]) + } + }, + "SelectSomeAtoms": [1, 2, "3:-3"] + } + }, + "Analysis": { + "ProjectStates": { + "Region": [ + { + "Atoms": [1, 2, 3], + "Label": "region1", + }, + { + "Atoms": np.array([1, 2, 3]), + "Label": "region2", + } + ] + } + } + } + print(hsd.dump_string(INPUT)) diff --git a/test/test_parser.py b/test/test_parser.py new file mode 100644 index 0000000..b2da98e --- /dev/null +++ b/test/test_parser.py @@ -0,0 +1,138 @@ +#!/bin/env python3 +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# +# +import io +import pytest +import hsd + +_OPEN_TAG_EVENT = 1 +_CLOSE_TAG_EVENT = 2 +_ADD_TEXT_EVENT = 3 + +_HSD_LINE = hsd.HSD_ATTRIB_LINE +_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL +_HSD_NAME = hsd.HSD_ATTRIB_NAME + +_VALID_TESTS = [ + ( + "Simple", ( + """Test {} """, + [ + (_OPEN_TAG_EVENT, "Test", None, {_HSD_LINE: 0, _HSD_EQUAL: False}), + (_CLOSE_TAG_EVENT, "Test"), + ] + ) + ), + ( + "Data with quoted strings", ( + """O = SelectedShells { "s" "p" }""", + [ + (_OPEN_TAG_EVENT, "O", None, {_HSD_LINE: 0, _HSD_EQUAL: True}), + (_OPEN_TAG_EVENT, 'SelectedShells', None, {_HSD_LINE: 0, _HSD_EQUAL: False}), + (_ADD_TEXT_EVENT, '"s" "p"'), + (_CLOSE_TAG_EVENT, 'SelectedShells'), + (_CLOSE_TAG_EVENT, 'O'), + ] + ) + ), + ( + "Attribute containing comma", ( + """PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}""", + [ + (_OPEN_TAG_EVENT, "PolarRadiusCharge", "AA^3,AA,", + {_HSD_LINE: 0, _HSD_EQUAL: False}), + (_ADD_TEXT_EVENT, '1.030000 3.800000 2.820000'), + (_CLOSE_TAG_EVENT, 'PolarRadiusCharge'), + ] + ) + ), + ( + "Variable", ( + """$Variable = 12\nValue = $Variable\n""", + [ + (_OPEN_TAG_EVENT, "$Variable", None, {_HSD_LINE: 0, _HSD_EQUAL: True}), + (_ADD_TEXT_EVENT, "12"), + (_CLOSE_TAG_EVENT, "$Variable"), + (_OPEN_TAG_EVENT, "Value", None, {_HSD_LINE: 1, _HSD_EQUAL: True}), + (_ADD_TEXT_EVENT, "$Variable"), + (_CLOSE_TAG_EVENT, "Value") + ] + ) + ), +] + +_VALID_TEST_NAMES, _VALID_TEST_CASES = zip(*_VALID_TESTS) + + +_FAILING_TESTS = [ + ( + "Node-less data", ( + """a = 2\n15\n""" + ) + ), + ( + "Node-less data at start", ( + """15\na = 2\na = 4\n""" + ) + ), + ( + "Node-less data in child", ( + """a {\n12\nb = 5\n}\n""" + ) + ), + ( + "Quoted tag name", ( + """\"mytag\" = 12\n""" + ) + ), + +] + +_FAILING_TEST_NAMES, _FAILING_TEST_CASES = zip(*_FAILING_TESTS) + + +class _TestEventHandler(hsd.HsdEventHandler): + + def __init__(self): + self.events = [] + + def open_tag(self, tagname, attrib, hsdattrib): + self.events.append((_OPEN_TAG_EVENT, tagname, attrib, hsdattrib)) + + def close_tag(self, tagname): + self.events.append((_CLOSE_TAG_EVENT, tagname)) + + def add_text(self, text): + self.events.append((_ADD_TEXT_EVENT, text)) + + +@pytest.mark.parametrize( + "hsd_input,expected_events", + _VALID_TEST_CASES, + ids=_VALID_TEST_NAMES +) +def test_parser_events(hsd_input, expected_events): + """Test valid parser events""" + testhandler = _TestEventHandler() + parser = hsd.HsdParser(eventhandler=testhandler) + hsdfile = io.StringIO(hsd_input) + parser.parse(hsdfile) + assert testhandler.events == expected_events + + +@pytest.mark.parametrize( + "hsd_input", + _FAILING_TEST_CASES, + ids=_FAILING_TEST_NAMES +) +def test_parser_exceptions(hsd_input): + """Test exception raised by the parser""" + testhandler = _TestEventHandler() + parser = hsd.HsdParser(eventhandler=testhandler) + hsdfile = io.StringIO(hsd_input) + with pytest.raises(hsd.HsdError): + parser.parse(hsdfile)