diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 0000000..a3ed7f4
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,14 @@
+# Codecov configuration to make it a bit less noisy
+coverage:
+  status:
+    patch: false
+    project:
+      default:
+        threshold: 50%
+comment:
+  layout: "header"
+  require_changes: false
+  branches: null
+  behavior: default
+  flags: null
+  paths: null
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..568769d
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,36 @@
+name: CI
+on: [push, pull_request]
+
+env:
+  HSD_PYTHON_VERSION: '0.1'
+
+jobs:
+  test:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v1
+      with:
+        python-version: '3.x'
+
+    - name: Install requirements (PIP)
+      run: pip3 install pytest sphinx numpy build
+
+    - name: Setup up root directory
+      run: echo "PACKAGE_ROOT=${PWD}/src" >> $GITHUB_ENV
+
+    - name: Build and install package
+      run: |
+        python -m build
+        pip install dist/hsd_python*.whl
+        python -c "import hsd; assert hsd.__version__ == '${HSD_PYTHON_VERSION}'"
+
+    - name: Run test pytest
+      run: python3 -m pytest
+
+    - name: Run doctest
+      run: cd docs; make doctest
diff --git a/.lgtm.yml b/.lgtm.yml
new file mode 100644
index 0000000..a17433d
--- /dev/null
+++ b/.lgtm.yml
@@ -0,0 +1,12 @@
+# Configure LGTM for this package
+
+extraction:
+  python:  # Configure Python
+    python_setup:  # Configure the setup
+      version: 3  # Specify Version 3
+path_classifiers:
+  library:
+    - src/versioneer.py  # Set Versioneer.py to an external "library" (3rd party code)
+    - devtools/*
+  generated:
+    - src/hsd/_version.py
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..8cb46fe
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,48 @@
+language: python
+
+# Run jobs on container-based infrastructure, can be overridden per job
+
+matrix:
+  include:
+    # Extra includes for OSX since python language is not available by default on OSX
+    - os: osx
+      language: generic
+      env: PYTHON_VER=3.6
+    - os: osx
+      language: generic
+      env: PYTHON_VER=3.7
+
+
+    # Pip can use Travis build-in Python
+    - os: linux
+      python: 3.6
+    - os: linux
+      dist: xenial  # Travis Trusty image does not have Python 3.7, Xenial does
+      python: 3.7
+
+
+before_install:
+    # Additional info about the build
+  - uname -a
+  - df -h
+  - ulimit -a
+
+    # Install the Python environment
+  - source devtools/travis-ci/before_install.sh
+  - python -V
+
+install:
+
+    # Install the package locally
+  - pip install -U pytest pytest-cov codecov
+  - pip install -e src/
+
+
+script:
+  - pytest -v --cov=hsd test/
+
+notifications:
+    email: false
+
+after_success:
+  - codecov
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
new file mode 100644
index 0000000..6284212
--- /dev/null
+++ b/CHANGELOG.rst
@@ -0,0 +1,14 @@
+==========
+Change Log
+==========
+
+
+0.1
+===
+
+Added
+-----
+
+* Basic functionality to manipulate HSD-data in Python.
+
+* Pip installation
diff --git a/LICENSE b/LICENSE
index c1beed0..10c023d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2020 Bálint Aradi, Universität Bremen
+Copyright (c) 2011-2021  DFTB+ developers group
 
 All rights reserved.
 
diff --git a/README.rst b/README.rst
index eb7089d..902ec94 100644
--- a/README.rst
+++ b/README.rst
@@ -1,23 +1,43 @@
-************************************
-HSD — Human-friendly Structured Data
-************************************
+**********************************************
+HSD — Make your structured data human friendly
+**********************************************
 
-This Python package contains utilities to write (and soon also to read) files in
-the Human-friendly Structured Data (HSD) format.
+Utilities to read and write files in the Human-friendly Structured Data (HSD)
+format.
 
-It is licensed under the *BSD 2-clause license*.
+The HSD-format is very similar to both JSON and YAML, but tries to minimize the
+effort for **humans** to read and write it. It ommits special characters as much
+as possible (in contrast to JSON) and is not indentation dependent (in contrast
+to YAML). It was developed originally as the input format for the scientific
+simulation tool (`DFTB+ <https://github.com/dftbplus/dftbplus>`_), but is
+of general purpose. Data stored in HSD can be easily mapped to a subset of JSON
+or XML and vica versa.
 
+Detailed `documentation <https://hsd-python.readthedocs.io/>`_ can be found on
+`Read the Docs <https://hsd-python.readthedocs.io/>`_.
 
-The HSD format
-==============
 
-The HSD-format is very similar to both JSON and XML, but tries to minimize the
-effort for humans to read and write it. It ommits special characters as much as
-possible but (in contrast to YAML for example) is not indentation dependent.
+Installation
+============
+
+The package can be installed via conda-forge::
+
+  conda install --channel "conda-forge" hsd-python
+
+Alternatively, the package can be downloaded and installed via pip into the
+active Python interpreter (preferably using a virtual python environment) by ::
+
+  pip install hsd
+
+or into the user space issueing ::
 
-It was developed originally developed as the input format for a scientific
-simulation tool (DFTB+), but is absolutely general. A typical input written in
-HSD would look like ::
+  pip install --user hsd
+
+
+Quick tutorial
+==============
+
+A typical, self-explaining input written in HSD looks like ::
 
   driver {
     conjugate_gradients {
@@ -35,11 +55,13 @@ HSD would look like ::
       }
       filling {
         fermi {
-          temperature [kelvin] = 1e-8
+          # This is comment which will be ignored
+          # Note the attribute (unit) of the field below
+          temperature [kelvin] = 100
         }
       }
       k_points_and_weights {
-        supercell_folding = {
+        supercell_folding {
           2   0   0
           0   2   0
           0   0   2
@@ -49,12 +71,56 @@ HSD would look like ::
     }
   }
 
-Content in HSD format can be represented as JSON. Content in JSON format can be
-represented as HSD, provided it satisfies a restriction for arrays: Either all
-elements of an array must be objects or none of them. (This allows for a clear
-separation of structure and data and allows for the very simple input format.)
+The above input can be parsed into a Python dictionary with::
+
+  import hsd
+  hsdinput = hsd.load("test.hsd")
+
+The dictionary ``hsdinput`` will then look as::
+
+  {
+      "driver": {
+          "conjugate_gradients" {
+              "moved_atoms": [1, 2, "7:19"],
+              "max_steps": 100
+          }
+      },
+      "hamiltonian": {
+          "dftb": {
+              "scc": True,
+              "scc_tolerance": 1e-10,
+              "mixer": {
+                  "broyden": {}
+              },
+              "filling": {
+                  "fermi": {
+                      "temperature": 100,
+                      "temperature.attrib": "kelvin"
+                  }
+              }
+              "k_points_and_weights": {
+                  "supercell_folding": [
+                      [2, 0, 0],
+                      [0, 2, 0],
+                      [0, 0, 2],
+                      [0.5, 0.5, 0.5]
+                  ]
+              }
+          }
+      }
+  }
+
+Being a simple Python dictionary, it can be easily queried and manipulated in
+Python ::
+
+  hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200
+
+and then stored again in HSD format ::
+
+    hsd.dump(hsdinput, "test2.hsd")
+
+
+License
+========
 
-Content in HSD format can be represented as XML (DOM-tree). Content in XML can
-be converted to HSD, provided it satisfies the restriction that every child has
-either data (text) or further children, but never both of them. (Again, this
-ensures the simplicity of the input format.)
+The hsd-python package is licensed under the `BSD 2-clause license <LICENSE>`_.
diff --git a/devtools/set_version b/devtools/set_version
new file mode 100755
index 0000000..6be38fa
--- /dev/null
+++ b/devtools/set_version
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+"""Sets a version number in all relevant project files"""
+
+import sys
+import re
+import os
+
+# The pattern the version number must satisfy
+VERSION_PATTERN = r'\d+\.\d+(?:\.\d+)?(?:-\w+)?'
+
+# List of (file name, search pattern, replacement pattern) tuples for all
+# the occurancies to be replaced.
+FILES_PATTERNS = [('src/hsd/__init__.py',
+                   r'^__version__\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN),
+                   "__version__ = '{version}'"),
+                  ('docs/introduction.rst',
+                   r'hsd-python version[ ]*{}.'.format(VERSION_PATTERN),
+                   'hsd-python version {shortversion}.'),
+                  ('setup.cfg',
+                   r'version\s*=\s*{}'.format(VERSION_PATTERN),
+                   "version = {version}"),
+                  ('docs/conf.py',
+                   r'release\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN),
+                   "release = '{version}'"),
+                  ('.github/workflows/ci.yml',
+                   r'HSD_PYTHON_VERSION:\s*([\'"]){}\1'.format(VERSION_PATTERN),
+                   "HSD_PYTHON_VERSION: '{version}'"),
+                 ]
+
+
+def main():
+    """Main script."""
+
+    if len(sys.argv) < 2:
+        sys.stderr.write("Missing version string\n")
+        sys.exit(1)
+
+    version, shortversion = _get_version_strings(sys.argv[1])
+    rootdir = os.path.join(os.path.dirname(sys.argv[0]), '..')
+    _replace_version_in_files(FILES_PATTERNS, rootdir, version, shortversion)
+    _replace_version_in_changelog(rootdir, version)
+
+
+def _get_version_strings(version):
+    """Returns version and the short version as string"""
+
+    match = re.match(VERSION_PATTERN, version)
+    if match is None:
+        print("Invalid version string")
+        sys.exit(1)
+
+    shortversion = '.'.join(version.split('.')[0:2])
+    return version, shortversion
+
+
+def _replace_version_in_files(files_patterns, rootdir, version, shortversion):
+    """Replaces version number in given files with given search/replacement patterns"""
+
+    for fname, regexp, repl in files_patterns:
+        fname = os.path.join(rootdir, fname)
+        print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='')
+        fp = open(fname, 'r')
+        txt = fp.read()
+        fp.close()
+        replacement = repl.format(version=version, shortversion=shortversion)
+        newtxt, nsub = re.subn(regexp, replacement, txt, flags=re.MULTILINE)
+        print(nsub)
+        fp = open(fname, 'w')
+        fp.write(newtxt)
+        fp.close()
+
+
+def _replace_version_in_changelog(rootdir, version):
+    """Replaces the unreleased section in CHANGELOG.rst"""
+
+    fname = os.path.join(rootdir, 'CHANGELOG.rst')
+    print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='')
+    fp = open(fname, 'r')
+    txt = fp.read()
+    fp.close()
+    decoration = '=' * len(version)
+    newtxt, nsub = re.subn(
+        r'^Unreleased\s*\n=+', version + r'\n' + decoration, txt,
+        count=1, flags=re.MULTILINE)
+    print(nsub)
+    fp = open(fname, 'w')
+    fp.write(newtxt)
+    fp.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/devtools/travis-ci/before_install.sh b/devtools/travis-ci/before_install.sh
new file mode 100755
index 0000000..c918485
--- /dev/null
+++ b/devtools/travis-ci/before_install.sh
@@ -0,0 +1,21 @@
+# Temporarily change directory to $HOME to install software
+pushd .
+cd $HOME
+# Make sure some level of pip is installed
+python -m ensurepip
+
+if [ "$TRAVIS_OS_NAME" == "osx" ]; then
+    HOMEBREW_NO_AUTO_UPDATE=1 brew upgrade pyenv
+    # Pyenv requires minor revision, get the latest
+    PYENV_VERSION=$(pyenv install --list |grep $PYTHON_VER | sed -n "s/^[ \t]*\(${PYTHON_VER}\.*[0-9]*\).*/\1/p" | tail -n 1)
+    # Install version
+    pyenv install $PYENV_VERSION
+    # Use version for this
+    pyenv global $PYENV_VERSION
+    # Setup up path shims
+    eval "$(pyenv init -)"
+fi
+pip install --upgrade pip setuptools
+
+# Restore original directory
+popd
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d4bb2cb
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 0000000..c8caf59
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,38 @@
+*****************
+API documentation
+*****************
+
+.. testsetup::
+
+    import hsd
+
+
+High level routines
+===================
+
+.. autofunction:: hsd.load_string
+
+.. autofunction:: hsd.load
+
+.. autofunction:: hsd.dump_string
+
+.. autofunction:: hsd.dump
+
+
+Lower level building blocks
+===========================
+
+.. autoclass:: hsd.HsdParser
+    :members:
+
+.. autoclass:: hsd.HsdEventHandler
+    :members:
+
+.. autoclass:: hsd.HsdDictBuilder
+    :members:
+
+.. autoclass:: hsd.HsdDictWalker
+    :members:
+
+.. autoclass:: hsd.HsdFormatter
+    :members:
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..055c8ee
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,64 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+import doctest
+
+sys.path.insert(0, os.path.abspath('../src'))
+
+# -- Project information -----------------------------------------------------
+
+project = 'hsd-python'
+copyright = '2021, DFTB+ developers group'
+author = 'DFTB+ developers group'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.doctest',
+    'sphinx.ext.napoleon'
+]
+
+autodoc_member_order = 'bysource'
+
+doctest_default_flags = doctest.NORMALIZE_WHITESPACE
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+# html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
\ No newline at end of file
diff --git a/docs/hsd.rst b/docs/hsd.rst
new file mode 100644
index 0000000..b211865
--- /dev/null
+++ b/docs/hsd.rst
@@ -0,0 +1,270 @@
+**************
+The HSD format
+**************
+
+General description
+===================
+
+You can think about the Human-readable Structured Data format as a pleasant
+representation of a tree structure. It can represent a subset of what you
+can do for example with XML. The following constraints compared
+to XML apply:
+
+* Every node of a tree, which is not empty, either contains further nodes
+  or data, but never both.
+
+* Every node may have a single (string) attribute only.
+
+These constraints allow a very natural looking formatting of the data.
+
+As an example, let's have a look at a data tree, which represents input
+for scientific software. In the XML representation, it could be written as ::
+
+  <Hamiltonian>
+    <Dftb>
+      <Scc>Yes</Scc>
+      <Filling>
+        <Fermi>
+          <Temperature attrib="Kelvin">77</Temperature>
+        </Fermi>
+      <Filling>
+    </Dftb>
+  </Hamiltonian>
+
+The same information can be encoded in a much more natural and compact form in HSD
+format as ::
+
+  Hamiltonian {
+    Dftb {
+      Scc = Yes
+      Filling {
+        Fermi {
+          Temperature [Kelvin] = 77
+        }
+      }
+    }
+  }
+
+The content of a node are passed either between an opening and a closing
+curly brace or after an equals sign. In the latter case the end of the line will
+be the closing delimiter. The attribute (typically the unit of the data
+which the node contains) is specified between square brackets after
+the node name.
+
+The equals sign can be used to assign data as a node content (provided
+the data fits into one line), or to assign a single child node as content
+for a given node. This leads to a compact and expressive notation for those
+cases, where (by the semantics of the input) a given node is only allowed to
+have a single child node as content. The tree above is a piece of a typical
+DFTB+ input, where only one child node is allowed for the nodes ``Hamiltonian``
+and ``Filling``, respectively (They specify the type of the Hamiltonian
+and the filling function). By making use of equals signs, the
+simplified HSD representation can be as compact as ::
+
+  Hamiltonian = Dftb {
+    Scc = Yes
+    Filling = Fermi {
+      Temperature [Kelvin] = 77
+    }
+  }
+
+and still represent the same tree.
+
+
+Mapping to dictionaries
+=======================
+
+Being basically a subset of XML, HSD data is best represented as an XML
+DOM-tree. However, very often a dictionary representation is more desirable,
+especially when the language used to query and manipulate the tree offers
+dictionaries as primary data type (e.g. Python). The data in an HSD input
+can be easily represented with the help of nested dictionaries and lists. The
+input from the previous section would have the following representation as
+Python dictionary (or as a JSON formatted input file)::
+
+  {
+      "Hamiltonian": {
+          "Dftb": {
+              "Scc": Yes,
+              "Filling": {
+                  "Fermi": {
+                      "Temperature": 77,
+                      "Temperature.attrib": "Kelvin"
+                  }
+              }
+          }
+      }
+  }
+
+The attribute of a node is stored under a special key containting the name of
+the node and the ``.attrib`` suffix.
+
+One slight complication of the dictionary representation arises in the case
+of node which has multiple child nodes with the same name ::
+
+  <ExternalField>
+    <PointCharges>
+      <GaussianBlurWidth>3</GaussianBlurWidth>
+      <CoordsAndCharges>
+       3.3 -1.2 0.9   9.2
+       1.2 -3.4 5.6  -3.3
+      </CoordsAndCharges>
+    </PointCharges>
+    <PointCharges>
+      <GaussianBlurWidth>10</GaussianBlurWidth>
+      <CoordsAndCharges>
+       1.0   2.0  3.0  4.0
+       -1.0 -2.0 -3.0 -4.0
+      </CoordsAndCharges>
+    </PointCharges>
+  </ExternalField>
+
+While the HSD representation has no problem to cope with the situation ::
+
+  ExternalField {
+    PointCharges {
+      GaussianBlurWidth = 3
+      CoordsAndCharges {
+       3.3 -1.2 0.9   9.2
+       1.2 -3.4 5.6  -3.3
+      }
+    }
+    PointCharges {
+      GaussianBlurWidth = 10
+      CoordsAndCharges {
+       1.0   2.0  3.0  4.0
+       -1.0 -2.0 -3.0 -4.0
+      }
+    }
+  }
+
+a trick is needed for the dictionary / JSON representation, as multiple keys
+with the same name are not allowed in a dictionary. Therefore, the repetitive
+nodes will be mapped to one key, which will contain a list of dictionaries
+(instead of a single dictionary as in the usual case)::
+
+  {
+      "ExternalField": {
+          // Note the list of dictionaries here!
+          "PointCharges": [
+              {
+                  "GaussianBlurWidth": 3,
+                  "CoordsAndCharges": [
+                      [3.3, -1.2, 0.9, 9.2],
+                      [1.2, -3.4, 5.6, -3.3]
+                  ]
+              },
+              {
+                  "GaussianBlurWidth": 10,
+                  "CoordsAndCharges": [
+                      [1.0,  2.0, 3.0, 4.0 ],
+                      [-1.0, -2.0, -3.0, -4.0 ]
+                  ]
+              },
+          ]
+          # Also attributes becomes a list. Due to technialc reasons the
+          # dictbuilder always creates an attribute list for mulitple nodes,
+          # even if none of the nodes carries an actual attribute.
+          "PointCharges.attrib": [None, None]
+      }
+  }
+
+The mapping works in both directions, so that this dictionary (or the JSON file
+created from it) can be easily converted back to the HSD form again.
+
+
+Processing related information
+==============================
+
+Additional to the data stored in an HSD-file, further processing related
+information can be recorded on demand. The current Python implementation is able
+to record following additional data for each HSD node:
+
+* the line, where the node was defined in the input (helpful for printing out
+  informative error messages),
+
+* the name of the HSD node, as found in the input (useful if the tag names are
+  converted to lower case to ease case-insensitive handling of the input) and
+
+* whether an equals sign was used to open the block.
+
+If this information is being recorded, a special key with the
+``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON
+presentation. The corresponding value will be a dictionary with those
+information.
+
+As an example, let's store the input from the previous section ::
+
+  Hamiltonian = Dftb {
+    Scc = Yes
+    Filling = Fermi {
+      Temperature [Kelvin] = 77
+    }
+  }
+
+in the file `test.hsd`, parse it and convert the node names to lower case
+(to make enable case-insensitive input processing). Using the Python command ::
+
+  inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True)
+
+will yield the following dictionary representation of the input::
+
+  {
+      'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'},
+      'hamiltonian': {
+          'dftb.hsdattrib': {'line': 0, equal: False, 'tag': 'Dftb'},
+          'dftb': {
+              'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'},
+              'scc': True,
+              'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'},
+              'filling': {
+                  'fermi.hsdattrib': {'line': 2, 'equal': False, 'tag': 'Fermi'},
+                  'fermi': {
+                      'temperature.attrib': 'Kelvin',
+                      'temperature.hsdattrib': {'equal': True, 'line': 3,
+                                                'tag': 'Temperature'},
+                      'temperature': 77
+                  }
+              }
+          }
+      }
+  }
+
+The recorded line numbers can be used to issue helpful error messages with
+information about where the user should search for the problem.
+The node names and formatting information about the equal sign ensures
+that the formatting is similar to the original HSD, if the data is dumped
+into the HSD format again. Dumping the dictionary with ::
+
+  hsd.dump(inpdict, "test2-formatted.hsd", use_hsd_attribs=True)
+
+would indeed yield ::
+
+  Hamiltonian = Dftb {
+    Scc = Yes
+    Filling = Fermi {
+      Temperature [Kelvin] = 77
+    }
+  }
+
+which is basically identical with the original input. If the additional
+processing information is not recorded when the data is loaded, or
+it is not considered when the data is dumped as HSD again ::
+
+  inpdict = hsd.load("test.hsd", lower_tag_names=True)
+  hsd.dump(inpdict, "test2-unformatted.hsd")
+
+the resulting formatting will more strongly differ from the original HSD ::
+
+  hamiltonian {
+    dftb {
+      scc = Yes
+      filling {
+        fermi {
+          temperature [Kelvin] = 77
+        }
+      }
+    }
+  }
+
+Still nice and readable, but less compact and with different casing.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..e766684
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,15 @@
+.. hsd-python documentation master file, created by
+   sphinx-quickstart on Mon Sep 13 11:38:29 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+########################
+HSD-python documentation
+########################
+
+.. toctree::
+   :maxdepth: 2
+
+   introduction
+   hsd
+   api
diff --git a/docs/introduction.rst b/docs/introduction.rst
new file mode 100644
index 0000000..38fd346
--- /dev/null
+++ b/docs/introduction.rst
@@ -0,0 +1,119 @@
+************
+Introduction
+************
+
+This package contains utilities to read and write files in the Human-friendly
+Structured Data (HSD) format.
+
+The HSD-format is very similar to XML, JSON and YAML, but tries to minimize the
+effort for **humans** to read and write it. It ommits special characters as much
+as possible (in contrast to XML and JSON) and is not indentation dependent (in
+contrast to YAML). It was developed originally as the input format for the
+scientific simulation tool (`DFTB+ <https://github.com/dftbplus/dftbplus>`_),
+but is of general purpose. Data stored in HSD can be easily mapped to a subset
+of JSON, YAML or XML and *vice versa*.
+
+This document describes hsd-python version 0.1.
+
+
+Installation
+============
+
+The package can be installed via conda-forge::
+
+  conda install hsd-python
+
+Alternatively, the package can be downloaded and installed via pip into the
+active Python interpreter (preferably using a virtual python environment) by ::
+
+  pip install hsd
+
+or into the user space issueing ::
+
+  pip install --user hsd
+
+
+Quick tutorial
+==============
+
+A typical, self-explaining input written in HSD looks like ::
+
+  driver {
+    conjugate_gradients {
+      moved_atoms = 1 2 "7:19"
+      max_steps = 100
+    }
+  }
+
+  hamiltonian {
+    dftb {
+      scc = yes
+      scc_tolerance = 1e-10
+      mixer {
+        broyden {}
+      }
+      filling {
+        fermi {
+          # This is comment which will be ignored
+          # Note the attribute (unit) of the field below
+          temperature [kelvin] = 100
+        }
+      }
+      k_points_and_weights {
+        supercell_folding {
+          2   0   0
+          0   2   0
+          0   0   2
+          0.5 0.5 0.5
+        }
+      }
+    }
+  }
+
+The above input can be parsed into a Python dictionary with::
+
+  import hsd
+  hsdinput = hsd.load("test.hsd")
+
+The dictionary ``hsdinput`` will then look as::
+
+  {
+      "driver": {
+          "conjugate_gradients" {
+              "moved_atoms": [1, 2, "7:19"],
+              "max_steps": 100
+          }
+      },
+      "hamiltonian": {
+          "dftb": {
+              "scc": True,
+              "scc_tolerance": 1e-10,
+              "mixer": {
+                  "broyden": {}
+              },
+              "filling": {
+                  "fermi": {
+                      "temperature": 100,
+                      "temperature.attrib": "kelvin"
+                  }
+              }
+              "k_points_and_weights": {
+                  "supercell_folding": [
+                      [2, 0, 0],
+                      [0, 2, 0],
+                      [0, 0, 2],
+                      [0.5, 0.5, 0.5]
+                  ]
+              }
+          }
+      }
+  }
+
+Being a simple Python dictionary, it can be easily queried and manipulated in
+Python ::
+
+  hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200
+
+and then stored again in HSD format ::
+
+    hsd.dump(hsdinput, "test2.hsd")
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..8084272
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..9787c3b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..0cf64cf
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,31 @@
+[metadata]
+name = hsd-python
+version = 0.1
+author = DFTB+ developers group
+author_email = info@dftbplus.org
+url = https://github.com/dftbplus/hsd-python
+description =
+    Tools for reading, writing and manipulating data stored in the human-friendly
+    structured data (HSD) format
+long_description = file: README.rst
+long_description_content_type = text/x-rst
+license = BSD
+license_file = LICENSE
+platform = any
+classifiers =
+    Intended Audience :: Developers
+    License :: OSI Approved :: BSD License
+    Programming Language :: Python :: 3 :: Only
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.7
+
+[options]
+include_package_data = True
+package_dir =
+    = src
+packages = hsd
+
+[options.packages.find]
+where = src
diff --git a/src/hsd.py b/src/hsd.py
deleted file mode 100644
index d4477c5..0000000
--- a/src/hsd.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env python3
-#------------------------------------------------------------------------------#
-#  hsd: package for manipulating HSD-formatted data                            #
-#  Copyright (C) 2020  Bálint Aradi, Universität Bremen                        #
-#                                                                              #
-#  See the LICENSE file for terms of usage and distribution.                   #
-#------------------------------------------------------------------------------#
-#
-"""
-Provides functionality to convert Python structures to HSD
-"""
-import io
-import numpy as np
-
-__all__ = ['dump', 'dumps']
-
-
-_INDENT_STR = "  "
-
-# String quoting delimiters (must be at least two)
-_QUOTING_CHARS = "\"'"
-
-# Suffix for appending attributes
-_ATTRIBUTE_SUFFIX = ".attribute"
-
-
-def dump(obj, fobj):
-    """Serializes an object to a file in HSD format.
-
-    Args:
-        obj: Object to be serialized in HSD format
-        fobj: File like object to write the result to.
-    """
-
-    if isinstance(obj, dict):
-        _dump_dict(obj, fobj, "")
-    else:
-        msg = "Invalid object type"
-        raise TypeError(msg)
-
-
-def dumps(obj):
-    """Serializes an object to string in HSD format.
-
-    Args:
-        obj: Object to serialize.
-
-    Returns:
-        HSD formatted string.
-    """
-    result = io.StringIO()
-    dump(obj, result)
-    return result.getvalue()
-
-
-def _dump_dict(obj, fobj, indentstr):
-    for key, value in obj.items():
-        if key.endswith(_ATTRIBUTE_SUFFIX):
-            if key[:-len(_ATTRIBUTE_SUFFIX)] in obj:
-                continue
-            else:
-                msg = "Attribute '{}' without corresponding tag '{}'"\
-                      .format(key, key[:-len(_ATTRIBUTE_SUFFIX)])
-                raise ValueError(msg)
-        attrib = obj.get(key + _ATTRIBUTE_SUFFIX)
-        if attrib is None:
-            attribstr = ""
-        elif not isinstance(attrib, str):
-            msg = "Invalid data type ({}) for '{}'"\
-                  .format(str(type(attrib)), key + ".attribute")
-            raise ValueError(msg)
-        else:
-            attribstr = " [" + attrib + "]"
-        if isinstance(value, dict):
-            if value:
-                fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
-                _dump_dict(value, fobj, indentstr + _INDENT_STR)
-                fobj.write("{}}}\n".format(indentstr))
-            else:
-                fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr))
-        elif isinstance(value, list) and value and isinstance(value[0], dict):
-            for item in value:
-                fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
-                _dump_dict(item, fobj, indentstr + _INDENT_STR)
-                fobj.write("{}}}\n".format(indentstr))
-        else:
-            valstr = _get_hsd_rhs(value, indentstr)
-            fobj.write("{}{}{} {}\n"\
-                     .format(indentstr, key, attribstr, valstr))
-
-
-def _get_hsd_rhs(obj, indentstr):
-
-    if isinstance(obj, list):
-        objstr = _list_to_hsd(obj)
-    elif isinstance(obj, np.ndarray):
-        objstr = _list_to_hsd(obj.tolist())
-    else:
-        objstr = _item_to_hsd(obj)
-    if "\n" in objstr:
-        newline_indent = "\n" + indentstr + _INDENT_STR
-        rhs = ("= {" + newline_indent + objstr.replace("\n", newline_indent)
-               + "\n" + indentstr + "}")
-    else:
-        rhs = "= " + objstr
-    return rhs
-
-
-def _list_to_hsd(lst):
-    if lst and isinstance(lst[0], list):
-        lines = []
-        for innerlist in lst:
-            lines.append(" ".join([_item_to_hsd(item) for item in innerlist]))
-        return "\n".join(lines)
-    return " ".join([_item_to_hsd(item) for item in lst])
-
-
-def _item_to_hsd(item):
-
-    if isinstance(item, (int, float)):
-        return str(item)
-    elif isinstance(item, bool):
-        return "Yes" if item else "No"
-    elif isinstance(item, str):
-        return _str_to_hsd(item)
-    else:
-        msg = "Data type {} can not be converted to HSD string"\
-              .format(type(item))
-        raise TypeError(msg)
-
-
-def _str_to_hsd(string):
-    is_present = [qc in string for qc in _QUOTING_CHARS]
-    if sum(is_present) > 1:
-        msg = "String '{}' can not be quoted correctly".format(string)
-        raise ValueError(msg)
-    delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1]
-    return delimiter + string + delimiter
-
-
-
-if __name__ == "__main__":
-    INPUT = {
-        "Driver": {},
-        "Hamiltonian": {
-            "DFTB": {
-                "Scc": True,
-                "SccTolerance": 1e-10,
-                "MaxSccIterations": 1000,
-                "Mixer": {
-                    "Broyden": {}
-                },
-                "MaxAngularMomentum": {
-                    "O": "p",
-                    "H": "s"
-                },
-                "Filling": {
-                    "Fermi": {
-                        "Temperature": 1e-8,
-                        "Temperature.attribute": "Kelvin"
-                    }
-                },
-                "KPointsAndWeights": {
-                    "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
-                                         [0.5, 0.5, 0.5]]
-                },
-                "ElectricField": {
-                    "PointCharges": {
-                        "CoordsAndCharges": np.array(
-                            [[-0.94, -9.44, 1.2, 1.0],
-                             [-0.94, -9.44, 1.2, -1.0]])
-                    }
-                },
-                "SelectSomeAtoms": [1, 2, "3:-3"]
-            }
-        },
-        "Analysis": {
-            "ProjectStates": {
-                "Region": [
-                    {
-                        "Atoms": [1, 2, 3],
-                        "Label": "region1",
-                    },
-                    {
-                        "Atoms": np.array([1, 2, 3]),
-                        "Label": "region2",
-                    }
-                ]
-            }
-        }
-    }
-    print(dumps(INPUT))
diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py
new file mode 100644
index 0000000..4faafbd
--- /dev/null
+++ b/src/hsd/__init__.py
@@ -0,0 +1,18 @@
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Toolbox for reading, writing and manipulating HSD-data.
+"""
+from hsd.common import HSD_ATTRIB_LINE, HSD_ATTRIB_EQUAL, HSD_ATTRIB_SUFFIX,\
+     HSD_ATTRIB_NAME, HsdError
+from hsd.dict import HsdDictBuilder, HsdDictWalker
+from hsd.eventhandler import HsdEventHandler, HsdEventPrinter
+from hsd.formatter import HsdFormatter
+from hsd.io import load, load_string, dump, dump_string
+from hsd.parser import HsdParser
+
+__version__ = '0.1'
diff --git a/src/hsd/common.py b/src/hsd/common.py
new file mode 100644
index 0000000..80b93ca
--- /dev/null
+++ b/src/hsd/common.py
@@ -0,0 +1,51 @@
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Implements common functionalities for the HSD package
+"""
+try:
+    import numpy as np
+except ModuleNotFoundError:
+    np = None
+
+
+
+class HsdError(Exception):
+    """Base class for exceptions in the HSD package."""
+
+
+def unquote(txt):
+    """Giving string without quotes if enclosed in those."""
+    if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]:
+        return txt[1:-1]
+    return txt
+
+
+# Name for default attribute (when attribute name is not specified)
+DEFAULT_ATTRIBUTE = "unit"
+
+# Suffix to mark attribute
+ATTRIB_SUFFIX = ".attrib"
+
+# Suffix to mark hsd processing attributes
+HSD_ATTRIB_SUFFIX = ".hsdattrib"
+
+# HSD attribute containing the original tag name
+HSD_ATTRIB_NAME = "name"
+
+# HSD attribute containing the line number
+HSD_ATTRIB_LINE = "line"
+
+# HSD attribute marking that a node is equal to its only child (instead of
+# containing it)
+HSD_ATTRIB_EQUAL = "equal"
+
+# String quoting delimiters (must be at least two)
+QUOTING_CHARS = "\"'"
+
+# Special characters
+SPECIAL_CHARS = "{}[]= "
diff --git a/src/hsd/dict.py b/src/hsd/dict.py
new file mode 100644
index 0000000..2909184
--- /dev/null
+++ b/src/hsd/dict.py
@@ -0,0 +1,267 @@
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Contains an event-driven builder for dictionary based (JSON-like) structure
+"""
+import re
+from typing import List, Tuple, Union
+from hsd.common import HSD_ATTRIB_NAME, np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\
+    QUOTING_CHARS, SPECIAL_CHARS
+from hsd.eventhandler import HsdEventHandler, HsdEventPrinter
+
+_ItemType = Union[float, int, bool, str]
+
+_DataType = Union[_ItemType, List[_ItemType]]
+
+# Pattern to transform HSD string values into actual Python data types
+_TOKEN_PATTERN = re.compile(r"""
+# Integer
+(?:\s*(?:^|(?<=\s))(?P<int>[+-]?[0-9]+)(?:\s*$|\s+))
+|
+# Floating point
+(?:\s*(?:^|(?<=\s))
+(?P<float>[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+)))
+|
+# Logical (Yes/No)
+(?:\s*(?:^|(?<=\s))(?P<logical>[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+)))
+|
+# Quoted string
+(?:\s*(?:(?P<qstr>(?P<quote>['"]).*?(?P=quote))
+|
+# Unquoted string
+(?P<str>.+?))(?:$|\s+))
+""", re.VERBOSE | re.MULTILINE)
+
+
+class HsdDictBuilder(HsdEventHandler):
+    """Specific HSD event handler, which builds a nested Python dictionary.
+
+    Args:
+        flatten_data: Whether multiline data in the HSD input should be
+            flattened into a single list. Othewise a list of lists is created, with one list for
+            every line (default).
+        lower_tag_names: Whether tag names should be all converted to lower case (to ease case
+            insensitive processing). Default: False. If set and include_hsd_attribs is also set,
+            the original tag names can be retrieved from the "name" hsd attributes.
+        include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like
+            original tag name, line information, etc.) should be stored (default: False).
+    """
+
+    def __init__(self, flatten_data: bool = False, lower_tag_names: bool = False,
+                 include_hsd_attribs: bool = False):
+        super().__init__()
+        self._hsddict: dict = {}
+        self._curblock: dict = self._hsddict
+        self._parentblocks: List[dict] = []
+        self._data: Union[None, _DataType] = None
+        self._attribs: List[Tuple[str, dict]] = []
+        self._flatten_data: bool = flatten_data
+        self._lower_tag_names: bool = lower_tag_names
+        self._include_hsd_attribs: bool = include_hsd_attribs
+
+
+    @property
+    def hsddict(self):
+        """The dictionary which has been built"""
+        return self._hsddict
+
+
+    def open_tag(self, tagname, attrib, hsdattrib):
+        if self._data is not None:
+            msg = f"Node '{tagname}' opened in an invalid context"
+            raise HsdError(msg)
+        self._attribs.append((attrib, hsdattrib))
+        self._parentblocks.append(self._curblock)
+        self._curblock = {}
+
+
+    def close_tag(self, tagname):
+        attrib, hsdattrib = self._attribs.pop(-1)
+        parentblock = self._parentblocks.pop(-1)
+        key = tagname.lower() if self._lower_tag_names else tagname
+        prevcont = parentblock.get(tagname)
+
+        if self._data is not None:
+            if prevcont is None:
+                parentblock[key] = self._data
+            elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
+                prevcont.append({None: self._data})
+            elif isinstance(prevcont, dict):
+                parentblock[key] = [prevcont, {None: self._data}]
+            else:
+                parentblock[key] = [{None: prevcont}, {None: self._data}]
+        else:
+            if prevcont is None:
+                parentblock[key] = self._curblock
+            elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
+                prevcont.append(self._curblock)
+            elif isinstance(prevcont, dict):
+                parentblock[key] = [prevcont, self._curblock]
+            else:
+                parentblock[key] = [{None: prevcont}, self._curblock]
+
+        if attrib and prevcont is None:
+            parentblock[key + ATTRIB_SUFFIX] = attrib
+        elif prevcont is not None:
+            prevattrib = parentblock.get(key + ATTRIB_SUFFIX)
+            if isinstance(prevattrib, list):
+                prevattrib.append(attrib)
+            else:
+                parentblock[key + ATTRIB_SUFFIX] = [prevattrib, attrib]
+
+        if self._include_hsd_attribs:
+            if self._lower_tag_names:
+                hsdattrib = {} if hsdattrib is None else hsdattrib
+                hsdattrib[HSD_ATTRIB_NAME] = tagname
+            if prevcont is None:
+                parentblock[key + HSD_ATTRIB_SUFFIX] = hsdattrib
+            else:
+                prevhsdattrib = parentblock.get(key + HSD_ATTRIB_SUFFIX)
+                if isinstance(prevhsdattrib, list):
+                    prevhsdattrib.append(hsdattrib)
+                else:
+                    parentblock[key + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, hsdattrib]
+        self._curblock = parentblock
+        self._data = None
+
+
+    def add_text(self, text):
+        if self._curblock or self._data is not None:
+            msg = "Data appeared in an invalid context"
+            raise HsdError(msg)
+        self._data = self._text_to_data(text)
+
+
+    def _text_to_data(self, txt: str) -> _DataType:
+        data = []
+        for line in txt.split("\n"):
+            if self._flatten_data:
+                linedata = data
+            else:
+                linedata = []
+            for match in _TOKEN_PATTERN.finditer(line.strip()):
+                if match.group("int"):
+                    linedata.append(int(match.group("int")))
+                elif match.group("float"):
+                    linedata.append(float(match.group("float")))
+                elif match.group("logical"):
+                    lowlog = match.group("logical").lower()
+                    linedata.append(lowlog == "yes")
+                elif match.group("str"):
+                    linedata.append(match.group("str"))
+                elif match.group("qstr"):
+                    linedata.append(match.group("qstr"))
+            if not self._flatten_data:
+                data.append(linedata)
+        if len(data) == 1:
+            if isinstance(data[0], list) and len(data[0]) == 1:
+                return data[0][0]
+            return data[0]
+        return data
+
+
+
+class HsdDictWalker:
+    """Walks through a Python dictionary and triggers HSD events.
+
+    Args:
+        eventhandler: Event handler dealing with the HSD events generated while
+            walking through the dictionary. When not specified, the events
+            are printed.
+    """
+
+    def __init__(self, eventhandler: HsdEventHandler = None):
+
+        if eventhandler is None:
+            self._eventhandler: HsdEventHandler = HsdEventPrinter()
+        else:
+            self._eventhandler: HsdEventHandler = eventhandler
+
+
+    def walk(self, dictobj):
+        """Walks through the directory and generates HSD events.
+
+        Args:
+            dictobj: Directory to walk through.
+        """
+
+        for key, value in dictobj.items():
+
+            if key.endswith(ATTRIB_SUFFIX) or key.endswith(HSD_ATTRIB_SUFFIX):
+                continue
+
+            hsdattrib = dictobj.get(key + HSD_ATTRIB_SUFFIX)
+            attrib = dictobj.get(key + ATTRIB_SUFFIX)
+
+            if isinstance(value, dict):
+
+                self._eventhandler.open_tag(key, attrib, hsdattrib)
+                self.walk(value)
+                self._eventhandler.close_tag(key)
+
+            elif isinstance(value, list) and value and isinstance(value[0], dict):
+                for ind, item in enumerate(value):
+                    hsdattr = hsdattrib[ind] if hsdattrib else None
+                    attr = attrib[ind] if attrib else None
+                    self._eventhandler.open_tag(key, attr, hsdattr)
+                    if None in item:
+                        self._eventhandler.add_text(_to_text(item[None]))
+                    else:
+                        self.walk(item)
+                    self._eventhandler.close_tag(key)
+
+            else:
+                self._eventhandler.open_tag(key, attrib, hsdattrib)
+                self._eventhandler.add_text(_to_text(value))
+                self._eventhandler.close_tag(key)
+
+
+def _to_text(obj):
+
+    if isinstance(obj, list):
+        objstr = _list_to_hsd(obj)
+    elif np is not None and isinstance(obj, np.ndarray):
+        objstr = _list_to_hsd(obj.tolist())
+    else:
+        objstr = _item_to_hsd(obj)
+    return objstr
+
+
+def _list_to_hsd(lst):
+    if lst and isinstance(lst[0], list):
+        lines = []
+        for innerlist in lst:
+            lines.append(" ".join([_item_to_hsd(item) for item in innerlist]))
+        return "\n".join(lines)
+    return " ".join([_item_to_hsd(item) for item in lst])
+
+
+def _item_to_hsd(item):
+
+    if isinstance(item, bool):
+        return "Yes" if item else "No"
+    if isinstance(item, (int, float)):
+        return str(item)
+    if isinstance(item, str):
+        return _str_to_hsd(item)
+    msg = "Data type {} can not be converted to HSD string"\
+            .format(type(item))
+    raise TypeError(msg)
+
+
+def _str_to_hsd(string):
+    present = [qc in string for qc in QUOTING_CHARS]
+    nquotetypes = sum(present)
+    delimiter = ""
+    if not nquotetypes and True in [sc in string for sc in SPECIAL_CHARS]:
+        delimiter = QUOTING_CHARS[0]
+    elif nquotetypes == 1 and string[0] not in QUOTING_CHARS:
+        delimiter = QUOTING_CHARS[1] if present[0] else QUOTING_CHARS[0]
+    elif nquotetypes > 1:
+        msg = "String '{}' can not be quoted correctly".format(string)
+        raise ValueError(msg)
+    return delimiter + string + delimiter
diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py
new file mode 100644
index 0000000..0ef52cb
--- /dev/null
+++ b/src/hsd/eventhandler.py
@@ -0,0 +1,78 @@
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group
+                   #                    # BSD 2-clause license.
+                    #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Contains an event handler base class.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+
+class HsdEventHandler(ABC):
+    """Abstract base class for handling HSD events."""
+
+    @abstractmethod
+    def open_tag(self, tagname: str, attrib: Optional[str],
+                 hsdattrib: Optional[dict]):
+        """Opens a tag.
+
+        Args:
+            tagname: Name of the tag which had been opened.
+            attrib: String containing the attribute of the tag or None.
+            hsdattrib: Dictionary of the options created during the processing
+                in the hsd-parser.
+        """
+
+    @abstractmethod
+    def close_tag(self, tagname: str):
+        """Closes a tag.
+
+        Args:
+            tagname: Name of the tag which had been closed.
+        """
+
+    @abstractmethod
+    def add_text(self, text: str):
+        """Adds text (data) to the current tag.
+
+        Args:
+           text: Text in the current tag.
+        """
+
+
+
+class HsdEventPrinter(HsdEventHandler):
+    """Minimal demonstration class for event handlers.
+
+    This specifc implemenation prints the events. Subclassing instances
+    should override the public methods to customize its behavior.
+    """
+
+    def __init__(self):
+        """Initializes the default event printer."""
+        self._indentlevel = 0
+        self._indentstr = "  "
+
+
+    def open_tag(self, tagname: str, attrib: str, hsdattrib: dict):
+        indentstr = self._indentlevel * self._indentstr
+        print(f"{indentstr}OPENING TAG: {tagname}")
+        print(f"{indentstr}ATTRIBUTE: {attrib}")
+        print(f"{indentstr}HSD ATTRIBUTE: {str(hsdattrib)}")
+        self._indentlevel += 1
+
+
+    def close_tag(self, tagname: str):
+        self._indentlevel -= 1
+        indentstr = self._indentlevel * self._indentstr
+        print(f"{indentstr}CLOSING TAG: {tagname}")
+
+
+    def add_text(self, text: str):
+        indentstr = self._indentlevel * self._indentstr
+        print(f"{indentstr}Received text: {text}")
diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py
new file mode 100644
index 0000000..70c4ca6
--- /dev/null
+++ b/src/hsd/formatter.py
@@ -0,0 +1,115 @@
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Provides an event based formatter to create HSD dumps
+"""
+
+from typing import List, TextIO, Union
+from hsd.common import HSD_ATTRIB_EQUAL, HSD_ATTRIB_NAME
+from hsd.eventhandler import HsdEventHandler
+
+
+_INDENT_STR = "  "
+
+
+class HsdFormatter(HsdEventHandler):
+    """Implements an even driven HSD formatter.
+
+    Args:
+        fobj: File like object to write the formatted output to.
+        use_hsd_attribs: Whether HSD attributes passed to the formatter should
+            be considered, when formatting the the output (default: True)
+    """
+
+    def __init__(self, fobj, use_hsd_attribs=True):
+        super().__init__()
+        self._fobj: TextIO = fobj
+        self._use_hsd_attribs: bool = use_hsd_attribs
+        self._level: int = 0
+        self._indent_level: int = 0
+        # Whether last node on current level should/was followed by an
+        # equal sign. (None = unspeciefied)
+        self._followed_by_equal: List[Union[bool, None]] = []
+        self._nr_children: List[int] = [0]
+
+
+    def open_tag(self, tagname: str, attrib: str, hsdattrib: dict):
+
+        if attrib is None:
+            attribstr = ""
+        elif not isinstance(attrib, str):
+            msg = f"Invalid attribute data type ({str(type(attrib))}) for "\
+                f"'{tagname}'"
+            raise ValueError(msg)
+        else:
+            attribstr = " [" + attrib + "]"
+
+        if self._level and not self._nr_children[-1]:
+            # Look up, whether previous (containing) node should be followed by
+            # an equal sign
+            equal = self._followed_by_equal[-1]
+            if equal:
+                self._fobj.write(" = ")
+                indentstr = ""
+            else:
+                self._fobj.write(" {\n")
+                self._indent_level += 1
+                indentstr = self._indent_level * _INDENT_STR
+        else:
+            indentstr = self._indent_level * _INDENT_STR
+
+        if self._use_hsd_attribs and hsdattrib is not None:
+            tagname = hsdattrib.get(HSD_ATTRIB_NAME, tagname)
+
+        self._fobj.write(f"{indentstr}{tagname}{attribstr}")
+
+        # Previous (containing) node has now one children more
+        self._nr_children[-1] += 1
+
+        # Currently opened node has no children so far.
+        self._nr_children.append(0)
+        self._level += 1
+
+        equal = None
+        if hsdattrib is not None and self._use_hsd_attribs:
+            equal = hsdattrib.get(HSD_ATTRIB_EQUAL)
+        self._followed_by_equal.append(equal)
+
+
+    def close_tag(self, tagname: str):
+
+        nr_children = self._nr_children.pop(-1)
+        equal = self._followed_by_equal.pop(-1)
+        if not nr_children:
+            self._fobj.write(" {}\n")
+        elif not equal:
+            self._indent_level -= 1
+            indentstr = self._indent_level * _INDENT_STR
+            self._fobj.write(f"{indentstr}}}\n")
+        self._level -= 1
+
+
+    def add_text(self, text: str):
+
+        equal = self._followed_by_equal[-1]
+        multiline = "\n" in text
+        if equal is None and not multiline:
+            if len(self._followed_by_equal) > 1:
+                equal = not self._followed_by_equal[-2]
+            else:
+                equal = True
+        if equal:
+            self._fobj.write(" = ")
+            self._followed_by_equal[-1] = True
+        else:
+            self._indent_level += 1
+            indentstr = self._indent_level *  _INDENT_STR
+            self._fobj.write(f" {{\n{indentstr}")
+            text = text.replace("\n", "\n" + indentstr)
+
+        self._fobj.write(text)
+        self._fobj.write("\n")
+        self._nr_children[-1] += 1
diff --git a/src/hsd/io.py b/src/hsd/io.py
new file mode 100644
index 0000000..f0f4f22
--- /dev/null
+++ b/src/hsd/io.py
@@ -0,0 +1,208 @@
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Provides functionality to dump Python structures to HSD
+"""
+import io
+from typing import Union, TextIO
+from hsd.dict import HsdDictWalker, HsdDictBuilder
+from hsd.formatter import HsdFormatter
+from hsd.parser import HsdParser
+
+
+_INDENT_STR = "  "
+
+
+
+def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False,
+         include_hsd_attribs: bool = False, flatten_data: bool = False) -> dict:
+    """Loads a file with HSD-formatted data into a Python dictionary
+
+    Args:
+        hsdfile: Name of file or file like object to read the HSD data from
+        lower_tag_names: When set, all tag names will be converted to lower-case
+            (practical, when input should be treated case insensitive.) If
+            ``include_hsd_attribs`` is set, the original tag name will be
+            stored among the HSD attributes.
+        include_hsd_attribs: Whether the HSD-attributes (processing related
+            attributes, like original tag name, line information, etc.) should
+            be stored. Use it, if you wish to keep the formatting of the data
+            close to the original on writing (e.g. lowered tag names
+            converted back to their original form, equals signs between parent
+            and only child kept, instead of converted to curly braces).
+        flatten_data: Whether multiline data in the HSD input should be
+            flattened into a single list. Othewise a list of lists is created,
+            with one list for every line (default).
+
+    Returns:
+        Dictionary representing the HSD data.
+
+    Examples:
+        See :func:`hsd.load_string` for examples of usage.
+    """
+    dictbuilder = HsdDictBuilder(lower_tag_names=lower_tag_names, flatten_data=flatten_data,
+                                 include_hsd_attribs=include_hsd_attribs)
+    parser = HsdParser(eventhandler=dictbuilder)
+    if isinstance(hsdfile, str):
+        with open(hsdfile, "r") as hsddescr:
+            parser.parse(hsddescr)
+    else:
+        parser.parse(hsdfile)
+    return dictbuilder.hsddict
+
+
+def load_string(
+        hsdstr: str, lower_tag_names: bool = False,
+        include_hsd_attribs: bool = False, flatten_data: bool = False
+    ) -> dict:
+    """Loads a string with HSD-formatted data into a Python dictionary.
+
+    Args:
+        hsdstr: String with HSD-formatted data.
+        lower_tag_names: When set, all tag names will be converted to lower-case
+            (practical, when input should be treated case insensitive.) If
+            ``include_hsd_attribs`` is set, the original tag name will be
+            stored among the HSD attributes.
+        include_hsd_attribs: Whether the HSD-attributes (processing related
+            attributes, like original tag name, line information, etc.) should
+            be stored. Use it, if you wish to keep the formatting of the data
+            close to the original one on writing (e.g. lowered tag names
+            converted back to their original form, equals signs between parent
+            and only child kept, instead of converted to curly braces).
+        flatten_data: Whether multiline data in the HSD input should be
+            flattened into a single list. Othewise a list of lists is created,
+            with one list for every line (default).
+
+    Returns:
+        Dictionary representing the HSD data.
+
+    Examples:
+        >>> hsdstr = \"\"\"
+        ... Dftb {
+        ...   Scc = Yes
+        ...   Filling {
+        ...     Fermi {
+        ...       Temperature [Kelvin] = 100
+        ...     }
+        ...   }
+        ... }
+        ... \"\"\"
+        >>> hsd.load_string(hsdstr)
+        {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100,
+        'Temperature.attrib': 'Kelvin'}}}}
+
+        In order to ease the case-insensitive handling of the input, the tag
+        names can be converted to lower case during reading using the
+        ``lower_tag_names`` option.
+
+        >>> hsd.load_string(hsdstr, lower_tag_names=True)
+        {'dftb': {'scc': True, 'filling': {'fermi': {'temperature': 100,
+        'temperature.attrib': 'Kelvin'}}}}
+
+        The original tag names (together with additional information like the
+        line number of a tag) can be recorded, if the ``include_hsd_attribs``
+        option is set:
+
+        >>> data = hsd.load_string(hsdstr, lower_tag_names=True,
+        ... include_hsd_attribs=True)
+
+        Each tag in the dictionary will have a corresponding ".hsdattrib" entry
+        with the recorded data:
+
+        >>> data["dftb.hsdattrib"]
+        {'equal': False, 'line': 1, 'name': 'Dftb'}
+
+        This additional data can be then also used to format the tags in the
+        original style, when writing the data in HSD-format again. Compare:
+
+        >>> hsd.dump_string(data)
+        'dftb {\\n  scc = Yes\\n  filling {\\n    fermi {\\n
+        temperature [Kelvin] = 100\\n    }\\n  }\\n}\\n'
+
+        versus
+
+        >>> hsd.dump_string(data, use_hsd_attribs=True)
+        'Dftb {\\n  Scc = Yes\\n  Filling {\\n    Fermi {\\n
+        Temperature [Kelvin] = 100\\n    }\\n  }\\n}\\n'
+
+    """
+    fobj = io.StringIO(hsdstr)
+    return load(fobj, lower_tag_names, include_hsd_attribs, flatten_data)
+
+
+def dump(data: dict, hsdfile: Union[TextIO, str],
+         use_hsd_attribs: bool = False):
+    """Dumps data to a file in HSD format.
+
+    Args:
+        data: Dictionary like object to be written in HSD format
+        hsdfile: Name of file or file like object to write the result to.
+        use_hsd_attribs: Whether HSD attributes in the data structure should
+            be used to format the output.
+
+            This option can be used to for example to restore original tag
+            names, if the file was loaded with the ``lower_tag_names`` and
+            ``include_hsd_attribs`` options set or keep the equal signs
+            between parent and contained only child.
+
+    Raises:
+        TypeError: if object is not a dictionary instance.
+
+    Examples:
+
+        See :func:`hsd.load_string` for an example.
+    """
+    if not isinstance(data, dict):
+        msg = "Invalid object type"
+        raise TypeError(msg)
+    if isinstance(hsdfile, str):
+        with open(hsdfile, "w") as hsddescr:
+            _dump_dict(data, hsddescr, use_hsd_attribs)
+    else:
+        _dump_dict(data, hsdfile, use_hsd_attribs)
+
+
+def dump_string(data: dict, use_hsd_attribs: bool = False) -> str:
+    """Serializes an object to string in HSD format.
+
+    Args:
+        data: Dictionary like object to be written in HSD format.
+        use_hsd_attribs: Whether HSD attributes of the data structure should
+            be used to format the output (e.g. to restore original mixed case
+            tag names)
+
+    Returns:
+        HSD formatted string.
+
+    Examples:
+        >>> hsdtree = {
+        ...     'Dftb': {
+        ...         'Scc': True,
+        ...         'Filling': {
+        ...             'Fermi': {
+        ...                 'Temperature': 100,
+        ...                 'Temperature.attrib': 'Kelvin'
+        ...             }
+        ...         }
+        ...     }
+        ... }
+        >>> hsd.dump_string(hsdtree)
+        'Dftb {\\n  Scc = Yes\\n  Filling {\\n    Fermi {\\n
+        Temperature [Kelvin] = 100\\n    }\\n  }\\n}\\n'
+
+        See also :func:`hsd.load_string` for an example.
+
+    """
+    result = io.StringIO()
+    dump(data, result, use_hsd_attribs=use_hsd_attribs)
+    return result.getvalue()
+
+
+def _dump_dict(obj: dict, fobj: TextIO, use_hsd_attribs: bool):
+
+    formatter = HsdFormatter(fobj, use_hsd_attribs=use_hsd_attribs)
+    walker = HsdDictWalker(formatter)
+    walker.walk(obj)
diff --git a/src/hsd/parser.py b/src/hsd/parser.py
new file mode 100644
index 0000000..ae66efe
--- /dev/null
+++ b/src/hsd/parser.py
@@ -0,0 +1,315 @@
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""
+Contains the event-generating HSD-parser.
+"""
+from typing import Optional, TextIO, Union
+from hsd import common
+from hsd.eventhandler import HsdEventHandler, HsdEventPrinter
+
+
+SYNTAX_ERROR = 1
+UNCLOSED_TAG_ERROR = 2
+UNCLOSED_ATTRIB_ERROR = 3
+UNCLOSED_QUOTATION_ERROR = 4
+ORPHAN_TEXT_ERROR = 5
+
+_GENERAL_SPECIALS = "{}[]<=\"'#;"
+
+_ATTRIB_SPECIALS = "]\"'"
+
+
+class HsdParser:
+    """Event based parser for the HSD format.
+
+    Arguments:
+        eventhandler: Object which should handle the HSD-events triggered
+            during parsing. When not specified, HsdEventPrinter() is used.
+
+    Examples:
+        >>> from io import StringIO
+        >>> dictbuilder = hsd.HsdDictBuilder()
+        >>> parser = hsd.HsdParser(eventhandler=dictbuilder)
+        >>> hsdfile = StringIO(\"\"\"
+        ... Hamiltonian {
+        ...     Dftb {
+        ...         Scc = Yes
+        ...         Filling = Fermi {
+        ...             Temperature [Kelvin] = 100
+        ...         }
+        ...     }
+        ... }
+        ... \"\"\")
+        >>> parser.parse(hsdfile)
+        >>> dictbuilder.hsddict
+        {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi':
+        {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}}
+    """
+
+    def __init__(self, eventhandler: Optional[HsdEventHandler] = None):
+        """Initializes the parser.
+
+        Args:
+            eventhandler: Instance of the HsdEventHandler class or its children.
+        """
+        if eventhandler is None:
+            self._eventhandler = HsdEventPrinter()
+        else:
+            self._eventhandler = eventhandler
+
+        self._fname = ""                   # name of file being processed
+        self._checkstr = _GENERAL_SPECIALS # special characters to look for
+        self._oldcheckstr = ""             # buffer fo checkstr
+        self._opened_tags = []             # info about opened tags
+        self._buffer = []                  # buffering plain text between lines
+        self._attrib = None                # attribute for current tag
+        self._hsdattrib = {}               # hsd-options for current tag
+        self._currline = 0                 # nr. of current line in file
+        self._after_equal_sign = False     # last tag was opened with equal sign
+        self._inside_attrib = False        # parser inside attrib specification
+        self._inside_quote = False         # parser inside quotation
+        self._has_child = True             # Whether current node has a child already
+        self._has_text = False             # whether current node contains text already
+        self._oldbefore = ""               # buffer for tagname
+
+
+    def parse(self, fobj: Union[TextIO, str]):
+        """Parses the provided file-like object.
+
+        The parser will process the data and trigger the corresponding events
+        in the eventhandler which was passed at initialization.
+
+        Args:
+            fobj: File like object or name of a file containing the data.
+        """
+        isfilename = isinstance(fobj, str)
+        if isfilename:
+            fp = open(fobj, "r")
+            self._fname = fobj
+        else:
+            fp = fobj
+        for line in fp.readlines():
+            self._parse(line)
+            self._currline += 1
+        if isfilename:
+            fp.close()
+
+        # Check for errors
+        if self._opened_tags:
+            line0 = self._opened_tags[-1][1]
+        else:
+            line0 = 0
+        if self._inside_quote:
+            self._error(UNCLOSED_QUOTATION_ERROR, (line0, self._currline))
+        elif self._inside_attrib:
+            self._error(UNCLOSED_ATTRIB_ERROR, (line0, self._currline))
+        elif self._opened_tags:
+            self._error(UNCLOSED_TAG_ERROR, (line0, line0))
+        elif ("".join(self._buffer)).strip():
+            self._error(ORPHAN_TEXT_ERROR, (line0, self._currline))
+
+
+    def _parse(self, line):
+        """Parses a given line."""
+
+        while True:
+            sign, before, after = _splitbycharset(line, self._checkstr)
+
+            # End of line
+            if not sign:
+                if self._inside_quote:
+                    self._buffer.append(before)
+                elif self._after_equal_sign:
+                    self._text("".join(self._buffer) + before.strip())
+                    self._closetag()
+                    self._after_equal_sign = False
+                elif not self._inside_attrib:
+                    self._buffer.append(before)
+                elif before.strip():
+                    self._error(SYNTAX_ERROR, (self._currline, self._currline))
+                break
+
+            # Special character is escaped
+            elif before.endswith("\\") and not before.endswith("\\\\"):
+                self._buffer.append(before + sign)
+
+            # Equal sign
+            elif sign == "=":
+                # Ignore if followed by "{" (DFTB+ compatibility)
+                if after.lstrip().startswith("{"):                    # _oldbefore may already contain the tagname, if the                    # tagname was followed by an attribute -> append
+                    self._oldbefore += before
+                else:
+                    self._hsdattrib[common.HSD_ATTRIB_EQUAL] = True
+                    self._starttag(before, False)
+                    self._after_equal_sign = True
+
+            # Opening tag by curly brace
+            elif sign == "{":
+                #self._has_child = True
+                self._hsdattrib[common.HSD_ATTRIB_EQUAL] = False
+                self._starttag(before, self._after_equal_sign)
+                self._buffer = []
+                self._after_equal_sign = False
+
+            # Closing tag by curly brace
+            elif sign == "}":
+                self._text("".join(self._buffer) + before)
+                self._buffer = []
+                # If 'test { a = 12 }' occurs, curly brace closes two tags
+                if self._after_equal_sign:
+                    self._after_equal_sign = False
+                    self._closetag()
+                self._closetag()
+
+            # Closing tag by semicolon
+            elif sign == ";" and self._after_equal_sign:
+                self._after_equal_sign = False
+                self._text(before)
+                self._closetag()
+
+            # Comment line
+            elif sign == "#":
+                self._buffer.append(before)
+                after = ""
+
+            # Opening attribute specification
+            elif sign == "[":
+                if "".join(self._buffer).strip():
+                    self._error(SYNTAX_ERROR, (self._currline, self._currline))
+                self._oldbefore = before
+                self._buffer = []
+                self._inside_attrib = True
+                self._opened_tags.append(("[", self._currline, None, None, None))
+                self._checkstr = _ATTRIB_SPECIALS
+
+            # Closing attribute specification
+            elif sign == "]":
+                value = "".join(self._buffer) + before
+                self._attrib = value.strip()
+                self._inside_attrib = False
+                self._buffer = []
+                self._opened_tags.pop()
+                self._checkstr = _GENERAL_SPECIALS
+
+            # Quoting strings
+            elif sign in ("'", '"'):
+                if self._inside_quote:
+                    self._checkstr = self._oldcheckstr
+                    self._inside_quote = False
+                    self._buffer.append(before + sign)
+                    self._opened_tags.pop()
+                else:
+                    self._oldcheckstr = self._checkstr
+                    self._checkstr = sign
+                    self._inside_quote = True
+                    self._buffer.append(before + sign)
+                    self._opened_tags.append(('"', self._currline, None, None, None))
+
+            # Interrupt
+            elif sign == "<" and not self._after_equal_sign:
+                txtinc = after.startswith("<<")
+                hsdinc = after.startswith("<+")
+                if txtinc:
+                    self._text("".join(self._buffer) + before)
+                    self._buffer = []
+                    self._eventhandler.add_text(self._include_txt(after[2:]))
+                    break
+                if hsdinc:
+                    self._include_hsd(after[2:])
+                    break
+                self._buffer.append(before + sign)
+
+            else:
+                self._error(SYNTAX_ERROR, (self._currline, self._currline))
+
+            line = after
+
+
+    def _text(self, text):
+        stripped = text.strip()
+        if stripped:
+            if self._has_child:
+                self._error(SYNTAX_ERROR, (self._currline, self._currline))
+            self._eventhandler.add_text(stripped)
+            self._has_text = True
+
+
+    def _starttag(self, tagname, closeprev):
+        txt = "".join(self._buffer)
+        if txt:
+            self._text(txt)
+        if self._has_text:
+            self._error(SYNTAX_ERROR, (self._currline, self._currline))
+        tagname_stripped = tagname.strip()
+        if self._oldbefore:
+            if tagname_stripped:
+                self._error(SYNTAX_ERROR, (self._currline, self._currline))
+            else:
+                tagname_stripped = self._oldbefore.strip()
+        if len(tagname_stripped.split()) > 1:
+            self._error(SYNTAX_ERROR, (self._currline, self._currline))
+        self._hsdattrib[common.HSD_ATTRIB_LINE] = self._currline
+        self._eventhandler.open_tag(tagname_stripped, self._attrib,
+                                    self._hsdattrib)
+        self._opened_tags.append(
+            (tagname_stripped, self._currline, closeprev, True, False))
+        self._has_child = False
+        self._buffer = []
+        self._oldbefore = ""
+        self._attrib = None
+        self._hsdattrib = {}
+
+
+    def _closetag(self):
+        if not self._opened_tags:
+            self._error(SYNTAX_ERROR, (0, self._currline))
+        self._buffer = []
+        tag, _, closeprev, self._has_child, self._has_text = self._opened_tags.pop()
+        self._eventhandler.close_tag(tag)
+        if closeprev:
+            self._closetag()
+
+
+    def _include_hsd(self, fname):
+        fname = common.unquote(fname.strip())
+        parser = HsdParser(eventhandler=self._eventhandler)
+        parser.parse(fname)
+
+
+    @staticmethod
+    def _include_txt(fname):
+        fname = common.unquote(fname.strip())
+        with open(fname, "r") as fp:
+            txt = fp.read()
+        return txt
+
+
+    def _error(self, errorcode, lines):
+        error_msg = (
+            "Parsing error ({}) between lines {} - {} in file '{}'.".format(
+                errorcode, lines[0] + 1, lines[1] + 1, self._fname))
+        raise common.HsdError(error_msg)
+
+
+
+def _splitbycharset(txt, charset):
+    """Splits a string at the first occurrence of a character in a set.
+
+    Args:
+        txt: Text to split.
+        chars: Chars to look for.
+
+    Returns:
+        Tuple (char, before, after). Char is the character which had been found
+        (or empty string if nothing was found). Before is the substring before
+        the splitting character (or the entire string). After is the substring
+        after the splitting character (or empty string).
+    """
+    for firstpos, char in enumerate(txt):
+        if char in charset:
+            return txt[firstpos], txt[:firstpos], txt[firstpos + 1:]
+    return '', txt, ''
diff --git a/test/test.hsd b/test/test.hsd
new file mode 100644
index 0000000..20f246b
--- /dev/null
+++ b/test/test.hsd
@@ -0,0 +1,58 @@
+Geometry {
+  GenFormat = {
+    3 C
+    O H
+    1 1   0.0  0.0  0.0
+    2 2   0.0  0.5  0.5
+    3 2   0.0  0.5 -0.5
+  }
+}
+Driver {}
+Hamiltonian = DFTB {
+  Scc = Yes
+  SccTolerance = 1e-10
+  MaxSccIterations = 1000
+  Mixer = Broyden {}
+  MaxAngularMomentum {
+    H = SelectedShells {"s" "s" }
+    O = SelectedShells {"s" "p" }
+  }
+  Dispersion = SlaterKirkwood {
+    PolarRadiusCharge [AA^3,AA,] = {
+      1.030000    3.800000    2.820000
+    }
+  }
+  # Adding arbitrary comment, this should not change parsing result
+  Filling = Fermi {
+    Temperature [Kelvin] = 1e-08
+  }
+  KPointsAndWeights {
+    SupercellFolding = {
+      2 0 0
+      0 2 0
+      0 0 2
+      0.5 0.5 0.5
+    }
+  }
+  ElectricField {
+    PointCharges {
+      CoordsAndCharges = {
+        -0.94 -9.44 1.2 1.0
+        -0.94 -9.44 1.2 -1.0
+      }
+    }
+  }
+  SelectSomeAtoms = 1 2  " 3 : -3 "
+}
+Analysis {
+  ProjectStates {
+    Region {
+      Atoms = 1 2 3
+      Label = "region1"
+    }
+    REgion {
+      Atoms = 1 2 3
+      Label = "region2"
+    }
+  }
+}
diff --git a/test/test_dict.py b/test/test_dict.py
new file mode 100644
index 0000000..ebdfef2
--- /dev/null
+++ b/test/test_dict.py
@@ -0,0 +1,202 @@
+#!/bin/env python3
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+"""Tests for the dictbuilder class"""
+
+import io
+import pytest
+import hsd
+
+# Some abbreviations
+_HSD_LINE = hsd.HSD_ATTRIB_LINE
+_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL
+_HSD_NAME = hsd.HSD_ATTRIB_NAME
+
+
+# General test list format for valid tests
+# [("Test name", ([List of HSD events], expected dictionary outcome))]
+
+# Tests without hsd attribute recording
+_TESTS_NO_HSDATTRIB = [
+    (
+        "Simple", (
+            "Test {}",
+            {"Test": {}},
+        )
+    ),
+    (
+        "Data with quoted strings", (
+            "O = SelectedShells { \"s\" \"p\" }",
+            {"O": {"SelectedShells": ['"s"', '"p"']}},
+        )
+    ),
+    (
+        "Attribute containing comma", (
+            "PolarRadiusCharge [AA^3,AA,] = {\n1.030000  3.800000  2.820000\n}",
+            {"PolarRadiusCharge": [1.03, 3.8, 2.82], "PolarRadiusCharge.attrib": "AA^3,AA,"},
+        )
+    ),
+    (
+        "Duplicate node entry", (
+            "a { b = 1 }\na { b = 2 }\n",
+            {"a.attrib": [None, None], "a": [{"b": 1}, {"b": 2}]},
+        )
+    ),
+    (
+        "Duplicate value entry", (
+            "a = 1\na = 2",
+            {"a.attrib": [None, None], "a": [{None: 1}, {None: 2}]},
+        )
+    ),
+]
+_TESTS_NO_HSDATTRIB_NAMES, _TESTS_NO_HSDATTRIB_CASES = zip(*_TESTS_NO_HSDATTRIB)
+
+
+# Tests with HSD attribute recording
+# The input string should be formatted the same way as it comes out from the formatter since
+# these tests are also used to test backwards direction (dictionary -> string).
+_TESTS_HSDATTRIB = [
+    (
+        "Simple", (
+            "Test {}\n",
+            {"Test.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, "Test": {}}
+        )
+    ),
+    (
+        "Data with quoted strings", (
+            "O = SelectedShells {\n  \"s\" \"p\"\n}\n",
+            {
+                "O.hsdattrib": {_HSD_EQUAL: True, _HSD_LINE: 0},
+                "O": {
+                    "SelectedShells.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False},
+                    "SelectedShells": ['"s"', '"p"']
+                    }
+            }
+        )
+    ),
+    (
+        "Duplicate node", (
+            "a {\n  b = 1\n}\na {\n  b = 2\n}\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: False},
+                                {_HSD_LINE: 3, _HSD_EQUAL: False}],
+                "a.attrib": [None, None],
+                "a": [
+                    {"b.hsdattrib": {_HSD_LINE: 1, _HSD_EQUAL: True}, "b": 1},
+                    {"b.hsdattrib": {_HSD_LINE: 4, _HSD_EQUAL: True}, "b": 2}
+                ]
+            },
+        )
+    ),
+    (
+        "Duplicate value", (
+            "a = 1\na = 2\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}],
+                "a.attrib": [None, None],
+                "a": [{None: 1}, {None: 2}]
+            },
+        )
+    ),
+    (
+        "Triple value with attrib", (
+            "a = 1\na = 2\na [someunit] {\n  3\n}\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True},
+                                {_HSD_LINE: 2, _HSD_EQUAL: False}],
+                "a.attrib": [None, None, "someunit"],
+                "a": [{None: 1}, {None: 2}, {None: 3}]
+            },
+        )
+    ),
+
+]
+_TESTS_HSDATTRIB_NAMES, _TESTS_HSDATTRIB_CASES = zip(*_TESTS_HSDATTRIB)
+
+
+# Tests with HSD attribute recording and tag name lowering switched on
+# The input string should be formatted the same way as it comes out from the formatter since
+# these tests are also used to test backwards direction (dictionary -> string).
+_TESTS_HSDATTRIB_LOWER = [
+    (
+        "Simple", (
+            "Test {}\n",
+            {"test.hsdattrib": {_HSD_NAME: "Test", _HSD_LINE: 0, _HSD_EQUAL: False}, "test": {}}
+        )
+    ),
+]
+_TESTS_HSDATTRIB_LOWER_NAMES, _TESTS_HSDATTRIB_LOWER_CASES = zip(*_TESTS_HSDATTRIB_LOWER)
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_NO_HSDATTRIB_CASES,
+    ids=_TESTS_NO_HSDATTRIB_NAMES
+)
+def test_dict_builder_nohsdattr(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary without HSD attributes."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False)
+    parser = hsd.HsdParser(eventhandler=dictbuilder)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_CASES,
+    ids=_TESTS_HSDATTRIB_NAMES
+)
+def test_dict_builder_hsdattr(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary with HSD attributes."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True)
+    parser = hsd.HsdParser(eventhandler=dictbuilder)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_LOWER_CASES,
+    ids=_TESTS_HSDATTRIB_LOWER_NAMES
+)
+def test_dict_builder_hsdattr_lower(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary with HSD attributes and case lowering."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True, lower_tag_names=True)
+    parser = hsd.HsdParser(eventhandler=dictbuilder)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_CASES,
+    ids=_TESTS_HSDATTRIB_NAMES
+)
+def test_dict_walker_hsdattr(hsdstr, hsddict):
+    """Test transformation from dictionary to string using HSD attributes."""
+    output = io.StringIO()
+    formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
+    dictwalker = hsd.HsdDictWalker(formatter)
+    dictwalker.walk(hsddict)
+    assert output.getvalue() == hsdstr
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_LOWER_CASES,
+    ids=_TESTS_HSDATTRIB_LOWER_NAMES
+)
+def test_dict_walker_hsdattr_lower(hsdstr, hsddict):
+    """Test transformation from dictionary to string using HSD attributes."""
+    output = io.StringIO()
+    formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
+    dictwalker = hsd.HsdDictWalker(formatter)
+    dictwalker.walk(hsddict)
+    assert output.getvalue() == hsdstr
diff --git a/test/test_dump.py b/test/test_dump.py
new file mode 100644
index 0000000..ff2fd29
--- /dev/null
+++ b/test/test_dump.py
@@ -0,0 +1,61 @@
+#!/bin/env python3
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+import numpy as np
+import hsd
+
+if __name__ == "__main__":
+    INPUT = {
+        "Driver": {},
+        "Hamiltonian": {
+            "DFTB": {
+                "Scc": True,
+                "SccTolerance": 1e-10,
+                "MaxSccIterations": 1000,
+                "Mixer": {
+                    "Broyden": {}
+                },
+                "MaxAngularMomentum": {
+                    "O": "p",
+                    "H": "s"
+                },
+                "Filling": {
+                    "Fermi": {
+                        "Temperature": 1e-8,
+                        "Temperature.attribute": "Kelvin"
+                    }
+                },
+                "KPointsAndWeights": {
+                    "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
+                                         [0.5, 0.5, 0.5]]
+                },
+                "ElectricField": {
+                    "PointCharges": {
+                        "CoordsAndCharges": np.array(
+                            [[-0.94, -9.44, 1.2, 1.0],
+                             [-0.94, -9.44, 1.2, -1.0]])
+                    }
+                },
+                "SelectSomeAtoms": [1, 2, "3:-3"]
+            }
+        },
+        "Analysis": {
+            "ProjectStates": {
+                "Region": [
+                    {
+                        "Atoms": [1, 2, 3],
+                        "Label": "region1",
+                    },
+                    {
+                        "Atoms": np.array([1, 2, 3]),
+                        "Label": "region2",
+                    }
+                ]
+            }
+        }
+    }
+    print(hsd.dump_string(INPUT))
diff --git a/test/test_parser.py b/test/test_parser.py
new file mode 100644
index 0000000..b2da98e
--- /dev/null
+++ b/test/test_parser.py
@@ -0,0 +1,138 @@
+#!/bin/env python3
+#--------------------------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python                               #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                                               #
+#  Licensed under the BSD 2-clause license.                                                        #
+#--------------------------------------------------------------------------------------------------#
+#
+import io
+import pytest
+import hsd
+
+_OPEN_TAG_EVENT = 1
+_CLOSE_TAG_EVENT = 2
+_ADD_TEXT_EVENT = 3
+
+_HSD_LINE = hsd.HSD_ATTRIB_LINE
+_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL
+_HSD_NAME = hsd.HSD_ATTRIB_NAME
+
+_VALID_TESTS = [
+    (
+        "Simple", (
+            """Test {} """,
+            [
+                (_OPEN_TAG_EVENT, "Test", None, {_HSD_LINE: 0, _HSD_EQUAL: False}),
+                (_CLOSE_TAG_EVENT, "Test"),
+            ]
+        )
+    ),
+    (
+        "Data with quoted strings", (
+            """O = SelectedShells { "s" "p" }""",
+            [
+                (_OPEN_TAG_EVENT, "O", None, {_HSD_LINE: 0, _HSD_EQUAL: True}),
+                (_OPEN_TAG_EVENT, 'SelectedShells', None, {_HSD_LINE: 0, _HSD_EQUAL: False}),
+                (_ADD_TEXT_EVENT, '"s" "p"'),
+                (_CLOSE_TAG_EVENT, 'SelectedShells'),
+                (_CLOSE_TAG_EVENT, 'O'),
+            ]
+        )
+    ),
+    (
+        "Attribute containing comma", (
+            """PolarRadiusCharge [AA^3,AA,] = {\n1.030000  3.800000  2.820000\n}""",
+            [
+                (_OPEN_TAG_EVENT, "PolarRadiusCharge", "AA^3,AA,",
+                 {_HSD_LINE: 0, _HSD_EQUAL: False}),
+                (_ADD_TEXT_EVENT, '1.030000  3.800000  2.820000'),
+                (_CLOSE_TAG_EVENT, 'PolarRadiusCharge'),
+            ]
+        )
+    ),
+    (
+        "Variable", (
+            """$Variable = 12\nValue = $Variable\n""",
+            [
+                (_OPEN_TAG_EVENT, "$Variable", None, {_HSD_LINE: 0, _HSD_EQUAL: True}),
+                (_ADD_TEXT_EVENT, "12"),
+                (_CLOSE_TAG_EVENT, "$Variable"),
+                (_OPEN_TAG_EVENT, "Value", None, {_HSD_LINE: 1, _HSD_EQUAL: True}),
+                (_ADD_TEXT_EVENT, "$Variable"),
+                (_CLOSE_TAG_EVENT, "Value")
+            ]
+        )
+    ),
+]
+
+_VALID_TEST_NAMES, _VALID_TEST_CASES = zip(*_VALID_TESTS)
+
+
+_FAILING_TESTS = [
+    (
+        "Node-less data", (
+            """a = 2\n15\n"""
+        )
+    ),
+    (
+        "Node-less data at start", (
+            """15\na = 2\na = 4\n"""
+        )
+    ),
+    (
+        "Node-less data in child", (
+            """a {\n12\nb = 5\n}\n"""
+        )
+    ),
+    (
+        "Quoted tag name", (
+            """\"mytag\" = 12\n"""
+        )
+    ),
+
+]
+
+_FAILING_TEST_NAMES, _FAILING_TEST_CASES = zip(*_FAILING_TESTS)
+
+
+class _TestEventHandler(hsd.HsdEventHandler):
+
+    def __init__(self):
+        self.events = []
+
+    def open_tag(self, tagname, attrib, hsdattrib):
+        self.events.append((_OPEN_TAG_EVENT, tagname, attrib, hsdattrib))
+
+    def close_tag(self, tagname):
+        self.events.append((_CLOSE_TAG_EVENT, tagname))
+
+    def add_text(self, text):
+        self.events.append((_ADD_TEXT_EVENT, text))
+
+
+@pytest.mark.parametrize(
+    "hsd_input,expected_events",
+    _VALID_TEST_CASES,
+    ids=_VALID_TEST_NAMES
+)
+def test_parser_events(hsd_input, expected_events):
+    """Test valid parser events"""
+    testhandler = _TestEventHandler()
+    parser = hsd.HsdParser(eventhandler=testhandler)
+    hsdfile = io.StringIO(hsd_input)
+    parser.parse(hsdfile)
+    assert testhandler.events == expected_events
+
+
+@pytest.mark.parametrize(
+    "hsd_input",
+    _FAILING_TEST_CASES,
+    ids=_FAILING_TEST_NAMES
+)
+def test_parser_exceptions(hsd_input):
+    """Test exception raised by the parser"""
+    testhandler = _TestEventHandler()
+    parser = hsd.HsdParser(eventhandler=testhandler)
+    hsdfile = io.StringIO(hsd_input)
+    with pytest.raises(hsd.HsdError):
+        parser.parse(hsdfile)