Skip to content

Commit

Permalink
Add HSD parser and builder for dict-based representation
Browse files Browse the repository at this point in the history
  • Loading branch information
aradi committed Feb 2, 2020
1 parent cf02c39 commit c9b8553
Show file tree
Hide file tree
Showing 11 changed files with 741 additions and 64 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2020 Bálint Aradi, Universität Bremen
Copyright (c) 2011-2020 DFTB+ developers group

All rights reserved.

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
HSD — Human-friendly Structured Data
************************************

This Python package contains utilities to write (and soon also to read) files in
This Python package contains utilities to read and write files in
the Human-friendly Structured Data (HSD) format.

It is licensed under the *BSD 2-clause license*.
Expand Down
13 changes: 13 additions & 0 deletions src/hsd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#------------------------------------------------------------------------------#
# hsd: package for manipulating HSD-formatted data #
# Copyright (C) 2011 - 2020 DFTB+ developers group #
# #
# See the LICENSE file for terms of usage and distribution. #
#------------------------------------------------------------------------------#
#
"""
Central module for the hsd package
"""
from .dump import dump, dumps
from .parser import HsdParser
from .dictbuilder import HsdDictBuilder
66 changes: 66 additions & 0 deletions src/hsd/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#------------------------------------------------------------------------------#
# hsd: package for manipulating HSD-formatted data #
# Copyright (C) 2011 - 2020 DFTB+ developers group #
# #
# See the LICENSE file for terms of usage and distribution. #
#------------------------------------------------------------------------------#
#
"""
Implements common functionalities for the HSD package
"""


class HsdException(Exception):
"""Base class for exceptions in the HSD package."""
pass


class HsdQueryError(HsdException):
"""Base class for errors detected by the HsdQuery object.
Attributes:
filename: Name of the file where error occured (or empty string).
line: Line where the error occurred (or -1).
tag: Name of the tag with the error (or empty string).
"""

def __init__(self, msg="", node=None):
"""Initializes the exception.
Args:
msg: Error message
node: HSD element where error occured (optional).
"""
super().__init__(msg)
if node is not None:
self.tag = node.gethsd(HSDATTR_TAG, node.tag)
self.file = node.gethsd(HSDATTR_FILE, -1)
self.line = node.gethsd(HSDATTR_LINE, None)
else:
self.tag = ""
self.file = -1
self.line = None


class HsdParserError(HsdException):
"""Base class for parser related errors."""
pass


def unquote(txt):
"""Giving string without quotes if enclosed in those."""
if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]:
return txt[1:-1]
return txt


# Name for default attribute (when attribute name is not specified)
DEFAULT_ATTRIBUTE = "attribute"


HSDATTR_PROC = "processed"
HSDATTR_EQUAL = "equal"
HSDATTR_FILE = "file"
HSDATTR_LINE = "line"
HSDATTR_TAG = "tag"
101 changes: 101 additions & 0 deletions src/hsd/dictbuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#------------------------------------------------------------------------------#
# hsd: package for manipulating HSD-formatted data #
# Copyright (C) 2011 - 2020 DFTB+ developers group #
# #
# See the LICENSE file for terms of usage and distribution. #
#------------------------------------------------------------------------------#
#
"""
Contains an event-driven builder for dictionary based (JSON-like) structure
"""
import re
from .parser import HsdEventHandler

__all__ = ['HsdDictBuilder']


_TOKEN_PATTERN = re.compile(r"""
(?:\s*(?:^|(?<=\s))(?P<int>[+-]?[0-9]+)(?:\s*$|\s+))
|
(?:\s*(?:^|(?<=\s))
(?P<float>[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+)))
|
(?:\s*(?:^|(?<=\s))(?P<logical>[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+)))
|
(?:(?P<qstr>(?P<quote>['"]).*?(?P=quote)) | (?P<str>.+?)(?:$|\s+))
""", re.VERBOSE | re.MULTILINE)


class HsdDictBuilder(HsdEventHandler):
"""Deserializes HSD into nested dictionaries"""

def __init__(self, flatten_data=False):
HsdEventHandler.__init__(self)
self._hsddict = {}
self._curblock = self._hsddict
self._parentblocks = []
self._data = None
self._flatten_data = flatten_data


def open_tag(self, tagname, options, hsdoptions):
for attrname, attrvalue in options.items():
self._curblock[tagname + '.' + attrname] = attrvalue
self._parentblocks.append(self._curblock)
self._curblock = {}


def close_tag(self, tagname):
parentblock = self._parentblocks.pop(-1)
prevcontent = parentblock.get(tagname)
if prevcontent is not None and not isinstance(prevcontent, list):
prevcontent = [prevcontent]
parentblock[tagname] = prevcontent
if self._data is None:
content = self._curblock
else:
content = self._data
self._data = None
if prevcontent is None:
parentblock[tagname] = content
else:
prevcontent.append(content)
self._curblock = parentblock


def add_text(self, text):
self._data = self._text_to_data(text)


@property
def hsddict(self):
"""Returns the dictionary which has been built"""
return self._hsddict


def _text_to_data(self, txt):
data = []
for line in txt.split("\n"):
if self._flatten_data:
linedata = data
else:
linedata = []
for match in _TOKEN_PATTERN.finditer(line.strip()):
if match.group("int"):
linedata.append(int(match.group("int")))
elif match.group("float"):
linedata.append(float(match.group("float")))
elif match.group("logical"):
lowlog = match.group("logical").lower()
linedata.append(lowlog == "yes")
elif match.group("str"):
linedata.append(match.group("str"))
elif match.group("qstr"):
linedata.append(match.group("qstr"))
if not self._flatten_data:
data.append(linedata)
if len(data) == 1:
if isinstance(data[0], list) and len(data[0]) == 1:
return data[0][0]
return data[0]
return data
78 changes: 16 additions & 62 deletions src/hsd.py → src/hsd/dump.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#!/usr/bin/env python3
#------------------------------------------------------------------------------#
# hsd: package for manipulating HSD-formatted data #
# Copyright (C) 2020 Bálint Aradi, Universität Bremen #
# Copyright (C) 2011 - 2020 DFTB+ developers group #
# #
# See the LICENSE file for terms of usage and distribution. #
#------------------------------------------------------------------------------#
#
"""
Provides functionality to convert Python structures to HSD
Provides functionality to dump Python structures to HSD
"""
import io
import numpy as np
from .common import DEFAULT_ATTRIBUTE

__all__ = ['dump', 'dumps']

Expand All @@ -20,8 +20,11 @@
# String quoting delimiters (must be at least two)
_QUOTING_CHARS = "\"'"

# Suffix for appending attributes
_ATTRIBUTE_SUFFIX = ".attribute"
# Special characters
_SPECIAL_CHARS = "{}[]= "


_ATTRIBUTE_SUFFIX = "." + DEFAULT_ATTRIBUTE


def dump(obj, fobj):
Expand Down Expand Up @@ -130,63 +133,14 @@ def _item_to_hsd(item):


def _str_to_hsd(string):
is_present = [qc in string for qc in _QUOTING_CHARS]
if sum(is_present) > 1:
present = [qc in string for qc in _QUOTING_CHARS]
nquotetypes = sum(present)
delimiter = ""
if not nquotetypes and True in [sc in string for sc in _SPECIAL_CHARS]:
delimiter = _QUOTING_CHARS[0]
elif nquotetypes == 1 and string[0] not in _QUOTING_CHARS:
delimiter = _QUOTING_CHARS[1] if present[0] else _QUOTING_CHARS[0]
elif nquotetypes > 1:
msg = "String '{}' can not be quoted correctly".format(string)
raise ValueError(msg)
delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1]
return delimiter + string + delimiter



if __name__ == "__main__":
INPUT = {
"Driver": {},
"Hamiltonian": {
"DFTB": {
"Scc": True,
"SccTolerance": 1e-10,
"MaxSccIterations": 1000,
"Mixer": {
"Broyden": {}
},
"MaxAngularMomentum": {
"O": "p",
"H": "s"
},
"Filling": {
"Fermi": {
"Temperature": 1e-8,
"Temperature.attribute": "Kelvin"
}
},
"KPointsAndWeights": {
"SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
[0.5, 0.5, 0.5]]
},
"ElectricField": {
"PointCharges": {
"CoordsAndCharges": np.array(
[[-0.94, -9.44, 1.2, 1.0],
[-0.94, -9.44, 1.2, -1.0]])
}
},
"SelectSomeAtoms": [1, 2, "3:-3"]
}
},
"Analysis": {
"ProjectStates": {
"Region": [
{
"Atoms": [1, 2, 3],
"Label": "region1",
},
{
"Atoms": np.array([1, 2, 3]),
"Label": "region2",
}
]
}
}
}
print(dumps(INPUT))
Loading

0 comments on commit c9b8553

Please sign in to comment.