-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add HSD parser and builder for dict-based representation
- Loading branch information
Showing
11 changed files
with
741 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#------------------------------------------------------------------------------# | ||
# hsd: package for manipulating HSD-formatted data # | ||
# Copyright (C) 2011 - 2020 DFTB+ developers group # | ||
# # | ||
# See the LICENSE file for terms of usage and distribution. # | ||
#------------------------------------------------------------------------------# | ||
# | ||
""" | ||
Central module for the hsd package | ||
""" | ||
from .dump import dump, dumps | ||
from .parser import HsdParser | ||
from .dictbuilder import HsdDictBuilder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#------------------------------------------------------------------------------# | ||
# hsd: package for manipulating HSD-formatted data # | ||
# Copyright (C) 2011 - 2020 DFTB+ developers group # | ||
# # | ||
# See the LICENSE file for terms of usage and distribution. # | ||
#------------------------------------------------------------------------------# | ||
# | ||
""" | ||
Implements common functionalities for the HSD package | ||
""" | ||
|
||
|
||
class HsdException(Exception): | ||
"""Base class for exceptions in the HSD package.""" | ||
pass | ||
|
||
|
||
class HsdQueryError(HsdException): | ||
"""Base class for errors detected by the HsdQuery object. | ||
Attributes: | ||
filename: Name of the file where error occured (or empty string). | ||
line: Line where the error occurred (or -1). | ||
tag: Name of the tag with the error (or empty string). | ||
""" | ||
|
||
def __init__(self, msg="", node=None): | ||
"""Initializes the exception. | ||
Args: | ||
msg: Error message | ||
node: HSD element where error occured (optional). | ||
""" | ||
super().__init__(msg) | ||
if node is not None: | ||
self.tag = node.gethsd(HSDATTR_TAG, node.tag) | ||
self.file = node.gethsd(HSDATTR_FILE, -1) | ||
self.line = node.gethsd(HSDATTR_LINE, None) | ||
else: | ||
self.tag = "" | ||
self.file = -1 | ||
self.line = None | ||
|
||
|
||
class HsdParserError(HsdException): | ||
"""Base class for parser related errors.""" | ||
pass | ||
|
||
|
||
def unquote(txt): | ||
"""Giving string without quotes if enclosed in those.""" | ||
if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]: | ||
return txt[1:-1] | ||
return txt | ||
|
||
|
||
# Name for default attribute (when attribute name is not specified) | ||
DEFAULT_ATTRIBUTE = "attribute" | ||
|
||
|
||
HSDATTR_PROC = "processed" | ||
HSDATTR_EQUAL = "equal" | ||
HSDATTR_FILE = "file" | ||
HSDATTR_LINE = "line" | ||
HSDATTR_TAG = "tag" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#------------------------------------------------------------------------------# | ||
# hsd: package for manipulating HSD-formatted data # | ||
# Copyright (C) 2011 - 2020 DFTB+ developers group # | ||
# # | ||
# See the LICENSE file for terms of usage and distribution. # | ||
#------------------------------------------------------------------------------# | ||
# | ||
""" | ||
Contains an event-driven builder for dictionary based (JSON-like) structure | ||
""" | ||
import re | ||
from .parser import HsdEventHandler | ||
|
||
__all__ = ['HsdDictBuilder'] | ||
|
||
|
||
_TOKEN_PATTERN = re.compile(r""" | ||
(?:\s*(?:^|(?<=\s))(?P<int>[+-]?[0-9]+)(?:\s*$|\s+)) | ||
| | ||
(?:\s*(?:^|(?<=\s)) | ||
(?P<float>[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) | ||
| | ||
(?:\s*(?:^|(?<=\s))(?P<logical>[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) | ||
| | ||
(?:(?P<qstr>(?P<quote>['"]).*?(?P=quote)) | (?P<str>.+?)(?:$|\s+)) | ||
""", re.VERBOSE | re.MULTILINE) | ||
|
||
|
||
class HsdDictBuilder(HsdEventHandler): | ||
"""Deserializes HSD into nested dictionaries""" | ||
|
||
def __init__(self, flatten_data=False): | ||
HsdEventHandler.__init__(self) | ||
self._hsddict = {} | ||
self._curblock = self._hsddict | ||
self._parentblocks = [] | ||
self._data = None | ||
self._flatten_data = flatten_data | ||
|
||
|
||
def open_tag(self, tagname, options, hsdoptions): | ||
for attrname, attrvalue in options.items(): | ||
self._curblock[tagname + '.' + attrname] = attrvalue | ||
self._parentblocks.append(self._curblock) | ||
self._curblock = {} | ||
|
||
|
||
def close_tag(self, tagname): | ||
parentblock = self._parentblocks.pop(-1) | ||
prevcontent = parentblock.get(tagname) | ||
if prevcontent is not None and not isinstance(prevcontent, list): | ||
prevcontent = [prevcontent] | ||
parentblock[tagname] = prevcontent | ||
if self._data is None: | ||
content = self._curblock | ||
else: | ||
content = self._data | ||
self._data = None | ||
if prevcontent is None: | ||
parentblock[tagname] = content | ||
else: | ||
prevcontent.append(content) | ||
self._curblock = parentblock | ||
|
||
|
||
def add_text(self, text): | ||
self._data = self._text_to_data(text) | ||
|
||
|
||
@property | ||
def hsddict(self): | ||
"""Returns the dictionary which has been built""" | ||
return self._hsddict | ||
|
||
|
||
def _text_to_data(self, txt): | ||
data = [] | ||
for line in txt.split("\n"): | ||
if self._flatten_data: | ||
linedata = data | ||
else: | ||
linedata = [] | ||
for match in _TOKEN_PATTERN.finditer(line.strip()): | ||
if match.group("int"): | ||
linedata.append(int(match.group("int"))) | ||
elif match.group("float"): | ||
linedata.append(float(match.group("float"))) | ||
elif match.group("logical"): | ||
lowlog = match.group("logical").lower() | ||
linedata.append(lowlog == "yes") | ||
elif match.group("str"): | ||
linedata.append(match.group("str")) | ||
elif match.group("qstr"): | ||
linedata.append(match.group("qstr")) | ||
if not self._flatten_data: | ||
data.append(linedata) | ||
if len(data) == 1: | ||
if isinstance(data[0], list) and len(data[0]) == 1: | ||
return data[0][0] | ||
return data[0] | ||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.