From c9b85535fb2f7deb3c5eb2a4d213e4358f559e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Sun, 2 Feb 2020 19:39:26 +0100 Subject: [PATCH 01/31] Add HSD parser and builder for dict-based representation --- LICENSE | 2 +- README.rst | 2 +- src/hsd/__init__.py | 13 ++ src/hsd/common.py | 66 +++++++ src/hsd/dictbuilder.py | 101 ++++++++++ src/{hsd.py => hsd/dump.py} | 78 ++------ src/hsd/parser.py | 367 ++++++++++++++++++++++++++++++++++++ test/test.hsd | 58 ++++++ test/test_dictbuilder.py | 37 ++++ test/test_dump.py | 62 ++++++ test/test_parser.py | 19 ++ 11 files changed, 741 insertions(+), 64 deletions(-) create mode 100644 src/hsd/__init__.py create mode 100644 src/hsd/common.py create mode 100644 src/hsd/dictbuilder.py rename src/{hsd.py => hsd/dump.py} (66%) create mode 100644 src/hsd/parser.py create mode 100644 test/test.hsd create mode 100644 test/test_dictbuilder.py create mode 100644 test/test_dump.py create mode 100644 test/test_parser.py diff --git a/LICENSE b/LICENSE index c1beed0..36d474a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2020 Bálint Aradi, Universität Bremen +Copyright (c) 2011-2020 DFTB+ developers group All rights reserved. diff --git a/README.rst b/README.rst index eb7089d..cafbdf4 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ HSD — Human-friendly Structured Data ************************************ -This Python package contains utilities to write (and soon also to read) files in +This Python package contains utilities to read and write files in the Human-friendly Structured Data (HSD) format. It is licensed under the *BSD 2-clause license*. diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py new file mode 100644 index 0000000..87c7544 --- /dev/null +++ b/src/hsd/__init__.py @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +""" +Central module for the hsd package +""" +from .dump import dump, dumps +from .parser import HsdParser +from .dictbuilder import HsdDictBuilder diff --git a/src/hsd/common.py b/src/hsd/common.py new file mode 100644 index 0000000..6669d13 --- /dev/null +++ b/src/hsd/common.py @@ -0,0 +1,66 @@ +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +""" +Implements common functionalities for the HSD package +""" + + +class HsdException(Exception): + """Base class for exceptions in the HSD package.""" + pass + + +class HsdQueryError(HsdException): + """Base class for errors detected by the HsdQuery object. + + + Attributes: + filename: Name of the file where error occured (or empty string). + line: Line where the error occurred (or -1). + tag: Name of the tag with the error (or empty string). + """ + + def __init__(self, msg="", node=None): + """Initializes the exception. + + Args: + msg: Error message + node: HSD element where error occured (optional). + """ + super().__init__(msg) + if node is not None: + self.tag = node.gethsd(HSDATTR_TAG, node.tag) + self.file = node.gethsd(HSDATTR_FILE, -1) + self.line = node.gethsd(HSDATTR_LINE, None) + else: + self.tag = "" + self.file = -1 + self.line = None + + +class HsdParserError(HsdException): + """Base class for parser related errors.""" + pass + + +def unquote(txt): + """Giving string without quotes if enclosed in those.""" + if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]: + return txt[1:-1] + return txt + + +# Name for default attribute (when attribute name is not specified) +DEFAULT_ATTRIBUTE = "attribute" + + +HSDATTR_PROC = "processed" +HSDATTR_EQUAL = "equal" +HSDATTR_FILE = "file" +HSDATTR_LINE = "line" +HSDATTR_TAG = "tag" diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py new file mode 100644 index 0000000..e4804ef --- /dev/null +++ b/src/hsd/dictbuilder.py @@ -0,0 +1,101 @@ +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +""" +Contains an event-driven builder for dictionary based (JSON-like) structure +""" +import re +from .parser import HsdEventHandler + +__all__ = ['HsdDictBuilder'] + + +_TOKEN_PATTERN = re.compile(r""" +(?:\s*(?:^|(?<=\s))(?P[+-]?[0-9]+)(?:\s*$|\s+)) +| +(?:\s*(?:^|(?<=\s)) +(?P[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) +| +(?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) +| +(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?)(?:$|\s+)) +""", re.VERBOSE | re.MULTILINE) + + +class HsdDictBuilder(HsdEventHandler): + """Deserializes HSD into nested dictionaries""" + + def __init__(self, flatten_data=False): + HsdEventHandler.__init__(self) + self._hsddict = {} + self._curblock = self._hsddict + self._parentblocks = [] + self._data = None + self._flatten_data = flatten_data + + + def open_tag(self, tagname, options, hsdoptions): + for attrname, attrvalue in options.items(): + self._curblock[tagname + '.' + attrname] = attrvalue + self._parentblocks.append(self._curblock) + self._curblock = {} + + + def close_tag(self, tagname): + parentblock = self._parentblocks.pop(-1) + prevcontent = parentblock.get(tagname) + if prevcontent is not None and not isinstance(prevcontent, list): + prevcontent = [prevcontent] + parentblock[tagname] = prevcontent + if self._data is None: + content = self._curblock + else: + content = self._data + self._data = None + if prevcontent is None: + parentblock[tagname] = content + else: + prevcontent.append(content) + self._curblock = parentblock + + + def add_text(self, text): + self._data = self._text_to_data(text) + + + @property + def hsddict(self): + """Returns the dictionary which has been built""" + return self._hsddict + + + def _text_to_data(self, txt): + data = [] + for line in txt.split("\n"): + if self._flatten_data: + linedata = data + else: + linedata = [] + for match in _TOKEN_PATTERN.finditer(line.strip()): + if match.group("int"): + linedata.append(int(match.group("int"))) + elif match.group("float"): + linedata.append(float(match.group("float"))) + elif match.group("logical"): + lowlog = match.group("logical").lower() + linedata.append(lowlog == "yes") + elif match.group("str"): + linedata.append(match.group("str")) + elif match.group("qstr"): + linedata.append(match.group("qstr")) + if not self._flatten_data: + data.append(linedata) + if len(data) == 1: + if isinstance(data[0], list) and len(data[0]) == 1: + return data[0][0] + return data[0] + return data diff --git a/src/hsd.py b/src/hsd/dump.py similarity index 66% rename from src/hsd.py rename to src/hsd/dump.py index d4477c5..916a6d9 100644 --- a/src/hsd.py +++ b/src/hsd/dump.py @@ -1,16 +1,16 @@ -#!/usr/bin/env python3 #------------------------------------------------------------------------------# # hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2020 Bálint Aradi, Universität Bremen # +# Copyright (C) 2011 - 2020 DFTB+ developers group # # # # See the LICENSE file for terms of usage and distribution. # #------------------------------------------------------------------------------# # """ -Provides functionality to convert Python structures to HSD +Provides functionality to dump Python structures to HSD """ import io import numpy as np +from .common import DEFAULT_ATTRIBUTE __all__ = ['dump', 'dumps'] @@ -20,8 +20,11 @@ # String quoting delimiters (must be at least two) _QUOTING_CHARS = "\"'" -# Suffix for appending attributes -_ATTRIBUTE_SUFFIX = ".attribute" +# Special characters +_SPECIAL_CHARS = "{}[]= " + + +_ATTRIBUTE_SUFFIX = "." + DEFAULT_ATTRIBUTE def dump(obj, fobj): @@ -130,63 +133,14 @@ def _item_to_hsd(item): def _str_to_hsd(string): - is_present = [qc in string for qc in _QUOTING_CHARS] - if sum(is_present) > 1: + present = [qc in string for qc in _QUOTING_CHARS] + nquotetypes = sum(present) + delimiter = "" + if not nquotetypes and True in [sc in string for sc in _SPECIAL_CHARS]: + delimiter = _QUOTING_CHARS[0] + elif nquotetypes == 1 and string[0] not in _QUOTING_CHARS: + delimiter = _QUOTING_CHARS[1] if present[0] else _QUOTING_CHARS[0] + elif nquotetypes > 1: msg = "String '{}' can not be quoted correctly".format(string) raise ValueError(msg) - delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1] return delimiter + string + delimiter - - - -if __name__ == "__main__": - INPUT = { - "Driver": {}, - "Hamiltonian": { - "DFTB": { - "Scc": True, - "SccTolerance": 1e-10, - "MaxSccIterations": 1000, - "Mixer": { - "Broyden": {} - }, - "MaxAngularMomentum": { - "O": "p", - "H": "s" - }, - "Filling": { - "Fermi": { - "Temperature": 1e-8, - "Temperature.attribute": "Kelvin" - } - }, - "KPointsAndWeights": { - "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2], - [0.5, 0.5, 0.5]] - }, - "ElectricField": { - "PointCharges": { - "CoordsAndCharges": np.array( - [[-0.94, -9.44, 1.2, 1.0], - [-0.94, -9.44, 1.2, -1.0]]) - } - }, - "SelectSomeAtoms": [1, 2, "3:-3"] - } - }, - "Analysis": { - "ProjectStates": { - "Region": [ - { - "Atoms": [1, 2, 3], - "Label": "region1", - }, - { - "Atoms": np.array([1, 2, 3]), - "Label": "region2", - } - ] - } - } - } - print(dumps(INPUT)) diff --git a/src/hsd/parser.py b/src/hsd/parser.py new file mode 100644 index 0000000..751b090 --- /dev/null +++ b/src/hsd/parser.py @@ -0,0 +1,367 @@ +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +""" +Contains the event-generating HSD-parser. +""" +from collections import OrderedDict +import hsd.common as common + + +__all__ = ["HsdParser", + "SYNTAX_ERROR", "UNCLOSED_TAG_ERROR", "UNCLOSED_OPTION_ERROR", + "UNCLOSED_QUOTATION_ERROR", "ORPHAN_TEXT_ERROR"] + +SYNTAX_ERROR = 1 +UNCLOSED_TAG_ERROR = 2 +UNCLOSED_OPTION_ERROR = 3 +UNCLOSED_QUOTATION_ERROR = 4 +ORPHAN_TEXT_ERROR = 5 + +_GENERAL_SPECIALS = "{}[]<=\"'#;" +_OPTION_SPECIALS = ",]=\"'#{};" + + +class HsdEventHandler: + """Base class for event handler implementing simple printing""" + + def __init__(self): + """Initializes the default event handler""" + self._indentlevel = 0 + self._indentstr = " " + + + def open_tag(self, tagname, options, hsdoptions): + """Handler which is called when a tag is opened. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + tagname: Name of the tag which had been opened. + options: Dictionary of the options (attributes) of the tag. + hsdoptions: Dictionary of the options created during the processing + in the hsd-parser. + """ + indentstr = self._indentlevel * self._indentstr + print("{}OPENING TAG: {}".format(indentstr, tagname)) + print("{}OPTIONS: {}".format(indentstr, str(options))) + print("{}HSD OPTIONS: {}".format(indentstr, str(hsdoptions))) + self._indentlevel += 1 + + + def close_tag(self, tagname): + """Handler which is called when a tag is closed. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + tagname: Name of the tag which had been closed. + """ + indentstr = self._indentlevel * self._indentstr + print("{}CLOSING TAG: {}".format(indentstr, tagname)) + self._indentlevel -= 1 + + + def add_text(self, text): + """Handler which is called with the text found inside a tag. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + text: Text in the current tag. + """ + indentstr = self._indentlevel * self._indentstr + print("{}Received text: {}".format(indentstr, text)) + + +class HsdParser: + """Event based parser for the HSD format. + + The methods `open_tag()`, `close_tag()`, `add_text()` + and `_handle_error()` should be overridden by the actual application. + """ + + def __init__(self, defattrib=common.DEFAULT_ATTRIBUTE, eventhandler=None): + """Initializes the parser. + + Args: + defattrib: Name of the default attribute (default: 'attribute') + eventhandler: Instance of the HsdEventHandler class or its children. + """ + if eventhandler is None: + self._eventhandler = HsdEventHandler() + else: + self._eventhandler = eventhandler + + self._fname = "" # Name of file being processed + self._defattrib = defattrib.lower() # def. attribute name + self._checkstr = _GENERAL_SPECIALS # special characters to look for + self._oldcheckstr = "" # buffer fo checkstr + self._opened_tags = [] # info about opened tags + self._buffer = [] # buffering plain text between lines + self._options = OrderedDict() # options for current tag + self._hsdoptions = OrderedDict() # hsd-options for current tag + self._key = "" # current option name + self._currline = 0 # nr. of current line in file + self._after_equal_sign = False # last tag was opened with equal sign + self._inside_option = False # parser inside option specification + self._inside_quote = False # parser inside quotation + self._has_child = False + self._oldbefore = "" + + + def feed(self, fobj): + """Feeds the parser with data. + + Args: + fobj: File like object or name of a file containing the data. + """ + isfilename = isinstance(fobj, str) + if isfilename: + fp = open(fobj, "r") + self._fname = fobj + else: + fp = fobj + for line in fp.readlines(): + self._parse(line) + self._currline += 1 + if isfilename: + fp.close() + + # Check for errors + if self._opened_tags: + line0 = self._opened_tags[-1][1] + else: + line0 = 0 + if self._inside_quote: + self._error(UNCLOSED_QUOTATION_ERROR, (line0, self._currline)) + elif self._inside_option: + self._error(UNCLOSED_OPTION_ERROR, (line0, self._currline)) + elif self._opened_tags: + self._error(UNCLOSED_TAG_ERROR, (line0, line0)) + elif ("".join(self._buffer)).strip(): + self._error(ORPHAN_TEXT_ERROR, (line0, self._currline)) + + + def _parse(self, line): + """Parses a given line.""" + + while True: + sign, before, after = _splitbycharset(line, self._checkstr) + + # End of line + if not sign: + if self._inside_quote: + self._buffer.append(before) + elif self._after_equal_sign: + self._text("".join(self._buffer) + before.strip()) + self._closetag() + self._after_equal_sign = False + elif not self._inside_option: + self._buffer.append(before) + elif before.strip(): + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + break + + # Special character is escaped + elif before.endswith("\\") and not before.endswith("\\\\"): + self._buffer.append(before + sign) + + # Equal sign outside option specification + elif sign == "=" and not self._inside_option: + # Ignore if followed by "{" (DFTB+ compatibility) + if after.lstrip().startswith("{"): + self._oldbefore = before + else: + self._has_child = True + self._hsdoptions[common.HSDATTR_EQUAL] = True + self._starttag(before, False) + self._after_equal_sign = True + + # Equal sign inside option specification + elif sign == "=": + self._key = before.strip() + self._buffer = [] + + # Opening tag by curly brace + elif sign == "{" and not self._inside_option: + self._has_child = True + self._starttag(before, self._after_equal_sign) + self._buffer = [] + self._after_equal_sign = False + + # Closing tag by curly brace + elif sign == "}" and not self._inside_option: + self._text("".join(self._buffer) + before) + self._buffer = [] + # If 'test { a = 12 }' occurs, curly brace closes two tags + if self._after_equal_sign: + self._after_equal_sign = False + self._closetag() + self._closetag() + + # Closing tag by semicolon + elif (sign == ";" and self._after_equal_sign + and not self._inside_option): + self._after_equal_sign = False + self._text(before) + self._closetag() + + # Comment line + elif sign == "#": + self._buffer.append(before) + after = "" + + # Opening option specification + elif sign == "[" and not self._inside_option: + if "".join(self._buffer).strip(): + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + self._oldbefore = before + self._buffer = [] + self._inside_option = True + self._key = "" + self._opened_tags.append(("[", self._currline, None)) + self._checkstr = _OPTION_SPECIALS + + # Closing option specification + elif sign == "]" and self._inside_option: + value = "".join(self._buffer) + before + key = self._key.lower() if self._key else self._defattrib + self._options[key] = value.strip() + self._inside_option = False + self._buffer = [] + self._opened_tags.pop() + self._checkstr = _GENERAL_SPECIALS + + # Quoting strings + elif sign == "'" or sign == '"': + if self._inside_quote: + self._checkstr = self._oldcheckstr + self._inside_quote = False + self._buffer.append(before + sign) + self._opened_tags.pop() + else: + self._oldcheckstr = self._checkstr + self._checkstr = sign + self._inside_quote = True + self._buffer.append(before + sign) + self._opened_tags.append(('"', self._currline, None)) + + # Closing attribute specification + elif sign == "," and self._inside_option: + value = "".join(self._buffer) + before + key = self._key.lower() if self._key else self._defattrib + self._options[key] = value.strip() + + # Interrupt + elif (sign == "<" and not self._inside_option + and not self._after_equal_sign): + txtinc = after.startswith("<<") + hsdinc = after.startswith("<+") + if txtinc: + self._text("".join(self._buffer) + before) + self._buffer = [] + self._eventhandler.add_text(self._include_txt(after[2:])) + break + elif hsdinc: + self._include_hsd(after[2:]) + break + else: + self._buffer.append(before + sign) + + else: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + + line = after + + + def _text(self, text): + stripped = text.strip() + if stripped: + self._eventhandler.add_text(stripped) + + + def _starttag(self, tagname, closeprev): + txt = "".join(self._buffer) + if txt: + self._text(txt) + tagname_stripped = tagname.strip() + if self._oldbefore: + if tagname_stripped: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + else: + tagname_stripped = self._oldbefore.strip() + if len(tagname_stripped.split()) > 1: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) + self._hsdoptions[common.HSDATTR_LINE] = self._currline + self._hsdoptions[common.HSDATTR_TAG] = tagname_stripped + tagname_stripped = tagname_stripped.lower() + self._eventhandler.open_tag(tagname_stripped, self._options, + self._hsdoptions) + self._opened_tags.append( + (tagname_stripped, self._currline, closeprev, self._has_child)) + self._buffer = [] + self._oldbefore = "" + self._has_child = False + self._options = OrderedDict() + self._hsdoptions = OrderedDict() + + + def _closetag(self): + if not self._opened_tags: + self._error(SYNTAX_ERROR, (0, self._currline)) + self._buffer = [] + tag, _, closeprev, self._has_child = self._opened_tags.pop() + self._eventhandler.close_tag(tag) + if closeprev: + self._closetag() + + + def _include_hsd(self, fname): + fname = common.unquote(fname.strip()) + parser = HsdParser(defattrib=self._defattrib, + eventhandler=self._eventhandler) + parser.feed(fname) + + + @staticmethod + def _include_txt(fname): + fname = common.unquote(fname.strip()) + fp = open(fname, "r") + txt = fp.read() + fp.close() + return txt + + + def _error(self, errorcode, lines): + error_msg = ( + "Parsing error ({}) between lines {} - {} in file '{}'.".format( + errorcode, lines[0] + 1, lines[1] + 1, self._fname)) + raise common.HsdParserError(error_msg) + + + +def _splitbycharset(txt, charset): + """Splits a string at the first occurrence of a character in a set. + + Args: + txt: Text to split. + chars: Chars to look for. + + Returns: + Tuple (char, before, after). Char is the character which had been found + (or empty string if nothing was found). Before is the substring before + the splitting character (or the entire string). After is the substring + after the splitting character (or empty string). + """ + for firstpos, char in enumerate(txt): + if char in charset: + return txt[firstpos], txt[:firstpos], txt[firstpos + 1:] + return '', txt, '' diff --git a/test/test.hsd b/test/test.hsd new file mode 100644 index 0000000..4141c15 --- /dev/null +++ b/test/test.hsd @@ -0,0 +1,58 @@ +Geometry { + GenFormat = { + 3 C + O H + 1 1 0.0 0.0 0.0 + 2 2 0.0 0.5 0.5 + 3 2 0.0 0.5 -0.5 + } +} +Driver {} +Hamiltonian { + DFTB { + Scc = Yes + SccTolerance = 1e-10 + MaxSccIterations = 1000 + Mixer { + Broyden {} + } + MaxAngularMomentum { + O = "p" + H = "s" + } + Filling { + Fermi { + Temperature [Kelvin] = 1e-08 + } + } + KPointsAndWeights { + SupercellFolding = { + 2 0 0 + 0 2 0 + 0 0 2 + 0.5 0.5 0.5 + } + } + ElectricField { + PointCharges { + CoordsAndCharges = { + -0.94 -9.44 1.2 1.0 + -0.94 -9.44 1.2 -1.0 + } + } + } + SelectSomeAtoms = 1 2 " 3 : -3 " + } +} +Analysis { + ProjectStates { + Region { + Atoms = 1 2 3 + Label = "region1" + } + Region { + Atoms = 1 2 3 + Label = "region2" + } + } +} diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py new file mode 100644 index 0000000..88a4a22 --- /dev/null +++ b/test/test_dictbuilder.py @@ -0,0 +1,37 @@ +#!/bin/env python3 +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +import hsd + +def test_dictbuilder(): + dictbuilder = hsd.HsdDictBuilder() + parser = hsd.HsdParser(eventhandler=dictbuilder) + with open("test.hsd", "r") as fobj: + parser.feed(fobj) + pyrep = dictbuilder.hsddict + print("** Python structure without data flattening:\n") + print(pyrep) + print("\n** Turning back to HSD:\n") + print(hsd.dumps(pyrep)) + + +def test_dictbuilder_flat(): + dictbuilder = hsd.HsdDictBuilder(flatten_data=True) + parser = hsd.HsdParser(eventhandler=dictbuilder) + with open("test.hsd", "r") as fobj: + parser.feed(fobj) + pyrep = dictbuilder.hsddict + print("** Python structure with data flattening:\n") + print(pyrep) + print("\n** Turning back to HSD:\n") + print(hsd.dumps(pyrep)) + + +if __name__ == '__main__': + test_dictbuilder() + test_dictbuilder_flat() diff --git a/test/test_dump.py b/test/test_dump.py new file mode 100644 index 0000000..aca21c0 --- /dev/null +++ b/test/test_dump.py @@ -0,0 +1,62 @@ +#!/bin/env python3 +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +import numpy as np +import hsd + +if __name__ == "__main__": + INPUT = { + "Driver": {}, + "Hamiltonian": { + "DFTB": { + "Scc": True, + "SccTolerance": 1e-10, + "MaxSccIterations": 1000, + "Mixer": { + "Broyden": {} + }, + "MaxAngularMomentum": { + "O": "p", + "H": "s" + }, + "Filling": { + "Fermi": { + "Temperature": 1e-8, + "Temperature.attribute": "Kelvin" + } + }, + "KPointsAndWeights": { + "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2], + [0.5, 0.5, 0.5]] + }, + "ElectricField": { + "PointCharges": { + "CoordsAndCharges": np.array( + [[-0.94, -9.44, 1.2, 1.0], + [-0.94, -9.44, 1.2, -1.0]]) + } + }, + "SelectSomeAtoms": [1, 2, "3:-3"] + } + }, + "Analysis": { + "ProjectStates": { + "Region": [ + { + "Atoms": [1, 2, 3], + "Label": "region1", + }, + { + "Atoms": np.array([1, 2, 3]), + "Label": "region2", + } + ] + } + } + } + print(hsd.dumps(INPUT)) diff --git a/test/test_parser.py b/test/test_parser.py new file mode 100644 index 0000000..2280839 --- /dev/null +++ b/test/test_parser.py @@ -0,0 +1,19 @@ +#!/bin/env python3 +#------------------------------------------------------------------------------# +# hsd: package for manipulating HSD-formatted data # +# Copyright (C) 2011 - 2020 DFTB+ developers group # +# # +# See the LICENSE file for terms of usage and distribution. # +#------------------------------------------------------------------------------# +# +import hsd + + +def test_parser(): + parser = hsd.HsdParser() + with open("test.hsd", "r") as fobj: + parser.feed(fobj) + + +if __name__ == '__main__': + test_parser() From f2cabe31eac2b833275d8d6dd9df59b91b0a0024 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Sat, 8 Feb 2020 15:54:32 +0100 Subject: [PATCH 02/31] Add installation with setup.py --- .codecov.yml | 14 + .lgtm.yml | 12 + .travis.yml | 48 + devtools/travis-ci/before_install.sh | 21 + src/LICENSE | 1 + src/MANIFEST.in | 6 + src/setup.cfg | 30 + src/setup.py | 59 + src/versioneer.py | 1822 ++++++++++++++++++++++++++ test/test_dictbuilder.py | 5 +- test/test_parser.py | 3 +- 11 files changed, 2018 insertions(+), 3 deletions(-) create mode 100644 .codecov.yml create mode 100644 .lgtm.yml create mode 100644 .travis.yml create mode 100755 devtools/travis-ci/before_install.sh create mode 120000 src/LICENSE create mode 100644 src/MANIFEST.in create mode 100644 src/setup.cfg create mode 100644 src/setup.py create mode 100644 src/versioneer.py diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..a3ed7f4 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +# Codecov configuration to make it a bit less noisy +coverage: + status: + patch: false + project: + default: + threshold: 50% +comment: + layout: "header" + require_changes: false + branches: null + behavior: default + flags: null + paths: null \ No newline at end of file diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..a17433d --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,12 @@ +# Configure LGTM for this package + +extraction: + python: # Configure Python + python_setup: # Configure the setup + version: 3 # Specify Version 3 +path_classifiers: + library: + - src/versioneer.py # Set Versioneer.py to an external "library" (3rd party code) + - devtools/* + generated: + - src/hsd/_version.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8cb46fe --- /dev/null +++ b/.travis.yml @@ -0,0 +1,48 @@ +language: python + +# Run jobs on container-based infrastructure, can be overridden per job + +matrix: + include: + # Extra includes for OSX since python language is not available by default on OSX + - os: osx + language: generic + env: PYTHON_VER=3.6 + - os: osx + language: generic + env: PYTHON_VER=3.7 + + + # Pip can use Travis build-in Python + - os: linux + python: 3.6 + - os: linux + dist: xenial # Travis Trusty image does not have Python 3.7, Xenial does + python: 3.7 + + +before_install: + # Additional info about the build + - uname -a + - df -h + - ulimit -a + + # Install the Python environment + - source devtools/travis-ci/before_install.sh + - python -V + +install: + + # Install the package locally + - pip install -U pytest pytest-cov codecov + - pip install -e src/ + + +script: + - pytest -v --cov=hsd test/ + +notifications: + email: false + +after_success: + - codecov diff --git a/devtools/travis-ci/before_install.sh b/devtools/travis-ci/before_install.sh new file mode 100755 index 0000000..c918485 --- /dev/null +++ b/devtools/travis-ci/before_install.sh @@ -0,0 +1,21 @@ +# Temporarily change directory to $HOME to install software +pushd . +cd $HOME +# Make sure some level of pip is installed +python -m ensurepip + +if [ "$TRAVIS_OS_NAME" == "osx" ]; then + HOMEBREW_NO_AUTO_UPDATE=1 brew upgrade pyenv + # Pyenv requires minor revision, get the latest + PYENV_VERSION=$(pyenv install --list |grep $PYTHON_VER | sed -n "s/^[ \t]*\(${PYTHON_VER}\.*[0-9]*\).*/\1/p" | tail -n 1) + # Install version + pyenv install $PYENV_VERSION + # Use version for this + pyenv global $PYENV_VERSION + # Setup up path shims + eval "$(pyenv init -)" +fi +pip install --upgrade pip setuptools + +# Restore original directory +popd diff --git a/src/LICENSE b/src/LICENSE new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/src/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/src/MANIFEST.in b/src/MANIFEST.in new file mode 100644 index 0000000..2606db3 --- /dev/null +++ b/src/MANIFEST.in @@ -0,0 +1,6 @@ +include LICENSE +include MANIFEST.in +include versioneer.py + +graft hsd +global-exclude *.py[cod] __pycache__ *.so diff --git a/src/setup.cfg b/src/setup.cfg new file mode 100644 index 0000000..61ad7fb --- /dev/null +++ b/src/setup.cfg @@ -0,0 +1,30 @@ +# Helper file to handle all configs + +[coverage:run] +# .coveragerc to control coverage.py and pytest-cov +omit = + # Omit the tests + */tests/* + # Omit generated versioneer + hsd/_version.py + +[yapf] +# YAPF, in .style.yapf files this shows up as "[style]" header +COLUMN_LIMIT = 119 +INDENT_WIDTH = 4 +USE_TABS = False + +[flake8] +# Flake8, PyFlakes, etc +max-line-length = 119 + +[versioneer] +# Automatic version numbering scheme +VCS = git +style = pep440 +versionfile_source = hsd/_version.py +versionfile_build = hsd/_version.py +tag_prefix = '' + +[aliases] +test = pytest diff --git a/src/setup.py b/src/setup.py new file mode 100644 index 0000000..5160b13 --- /dev/null +++ b/src/setup.py @@ -0,0 +1,59 @@ +""" +hsd +Python routines to manipulate HSD data +""" +import sys +from setuptools import setup, find_packages +import versioneer + +short_description = __doc__.split("\n") + +# from https://github.com/pytest-dev/pytest-runner#conditional-requirement +needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv) +pytest_runner = ['pytest-runner'] if needs_pytest else [] + +try: + with open("README.rst", "r") as handle: + long_description = handle.read() +except: + long_description = "\n".join(short_description[2:]) + + +setup( + # Self-descriptive entries which should always be present + name='hsd', + author='DFTB+ developers group', + author_email='info@dftbplus.org', + description=short_description[0], + long_description=long_description, + long_description_content_type="text/x-rst", + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + license='BSD 2-clause license', + + # Which Python importable modules should be included when your package is installed + # Handled automatically by setuptools. Use 'exclude' to prevent some specific + # subpackage(s) from being added, if needed + packages=find_packages(), + + # Optional include package data to ship with your package + # Customize MANIFEST.in if the general case does not suit your needs + # Comment out this line to prevent the files from being packaged with your software + include_package_data=True, + + # Allows `setup.py test` to work correctly with pytest + setup_requires=[] + pytest_runner, + + # Additional entries you may want simply uncomment the lines you want and fill in the data + # url='http://www.my_package.com', # Website + install_requires=['numpy'], # Required packages, pulls from pip if needed; do not use for Conda deployment + # platforms=['Linux', + # 'Mac OS-X', + # 'Unix', + # 'Windows'], # Valid platforms your code works on, adjust to your flavor + # python_requires=">=3.5", # Python version restrictions + + # Manual control if final package is compressible or not, set False to prevent the .egg from being made + # zip_safe=False, + +) diff --git a/src/versioneer.py b/src/versioneer.py new file mode 100644 index 0000000..64fea1c --- /dev/null +++ b/src/versioneer.py @@ -0,0 +1,1822 @@ + +# Version: 0.18 + +"""The Versioneer - like a rocketeer, but for versions. + +The Versioneer +============== + +* like a rocketeer, but for versions! +* https://github.com/warner/python-versioneer +* Brian Warner +* License: Public Domain +* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy +* [![Latest Version] +(https://pypip.in/version/versioneer/badge.svg?style=flat) +](https://pypi.python.org/pypi/versioneer/) +* [![Build Status] +(https://travis-ci.org/warner/python-versioneer.png?branch=master) +](https://travis-ci.org/warner/python-versioneer) + +This is a tool for managing a recorded version number in distutils-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. + + +## Quick Install + +* `pip install versioneer` to somewhere to your $PATH +* add a `[versioneer]` section to your setup.cfg (see below) +* run `versioneer install` in your source tree, commit the results + +## Version Identifiers + +Source trees come from a variety of places: + +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI + +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: + +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step + +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example `git describe --tags --dirty --always` reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes. + +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the `git archive` command. As a result, generated tarballs will +contain enough information to get the proper version. + +To allow `setup.py` to compute a version too, a `versioneer.py` is added to +the top level of your source tree, next to `setup.py` and the `setup.cfg` +that configures it. This overrides several distutils/setuptools commands to +compute the version when invoked, and changes `setup.py build` and `setup.py +sdist` to replace `_version.py` with a small static file that contains just +the generated version data. + +## Installation + +See [INSTALL.md](./INSTALL.md) for detailed installation instructions. + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different flavors of version +information: + +* `['version']`: A condensed version string, rendered using the selected + style. This is the most commonly used value for the project's version + string. The default "pep440" style yields strings like `0.11`, + `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section + below for alternative styles. + +* `['full-revisionid']`: detailed revision identifier. For Git, this is the + full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". + +* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the + commit date in ISO 8601 format. This will be None if the date is not + available. + +* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that + this is only accurate if run in a VCS checkout, otherwise it is likely to + be False or None + +* `['error']`: if the version string could not be computed, this will be set + to a string describing the problem, otherwise it will be None. It may be + useful to throw an exception in setup.py if this is set, to avoid e.g. + creating tarballs with a version string of "unknown". + +Some variants are more useful than others. Including `full-revisionid` in a +bug report should allow developers to reconstruct the exact code being tested +(or indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The installer adds the following text to your `__init__.py` to place a basic +version in `YOURPROJECT.__version__`: + + from ._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Styles + +The setup.cfg `style=` configuration controls how the VCS information is +rendered into a version string. + +The default style, "pep440", produces a PEP440-compliant string, equal to the +un-prefixed tag name for actual releases, and containing an additional "local +version" section with more detail for in-between builds. For Git, this is +TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags +--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the +tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and +that this commit is two revisions ("+2") beyond the "0.11" tag. For released +software (exactly equal to a known tag), the identifier will only contain the +stripped tag, e.g. "0.11". + +Other styles are available. See [details.md](details.md) in the Versioneer +source tree for descriptions. + +## Debugging + +Versioneer tries to avoid fatal errors: if something goes wrong, it will tend +to return a version of "0+unknown". To investigate the problem, run `setup.py +version`, which will run the version-lookup code in a verbose mode, and will +display the full contents of `get_versions()` (including the `error` string, +which may help identify what went wrong). + +## Known Limitations + +Some situations are known to cause problems for Versioneer. This details the +most significant ones. More can be found on Github +[issues page](https://github.com/warner/python-versioneer/issues). + +### Subprojects + +Versioneer has limited support for source trees in which `setup.py` is not in +the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are +two common reasons why `setup.py` might not be in the root: + +* Source trees which contain multiple subprojects, such as + [Buildbot](https://github.com/buildbot/buildbot), which contains both + "master" and "slave" subprojects, each with their own `setup.py`, + `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI + distributions (and upload multiple independently-installable tarballs). +* Source trees whose main purpose is to contain a C library, but which also + provide bindings to Python (and perhaps other langauges) in subdirectories. + +Versioneer will look for `.git` in parent directories, and most operations +should get the right version string. However `pip` and `setuptools` have bugs +and implementation details which frequently cause `pip install .` from a +subproject directory to fail to find a correct version string (so it usually +defaults to `0+unknown`). + +`pip install --editable .` should work correctly. `setup.py install` might +work too. + +Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in +some later version. + +[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +this issue. The discussion in +[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +issue from the Versioneer side in more detail. +[pip PR#3176](https://github.com/pypa/pip/pull/3176) and +[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve +pip to let Versioneer work correctly. + +Versioneer-0.16 and earlier only looked for a `.git` directory next to the +`setup.cfg`, so subprojects were completely unsupported with those releases. + +### Editable installs with setuptools <= 18.5 + +`setup.py develop` and `pip install --editable .` allow you to install a +project into a virtualenv once, then continue editing the source code (and +test) without re-installing after every change. + +"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a +convenient way to specify executable scripts that should be installed along +with the python package. + +These both work as expected when using modern setuptools. When using +setuptools-18.5 or earlier, however, certain operations will cause +`pkg_resources.DistributionNotFound` errors when running the entrypoint +script, which must be resolved by re-installing the package. This happens +when the install happens with one version, then the egg_info data is +regenerated while a different version is checked out. Many setup.py commands +cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into +a different virtualenv), so this can be surprising. + +[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +this one, but upgrading to a newer version of setuptools should probably +resolve it. + +### Unicode version strings + +While Versioneer works (and is continually tested) with both Python 2 and +Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. +Newer releases probably generate unicode version strings on py2. It's not +clear that this is wrong, but it may be surprising for applications when then +write these strings to a network connection or include them in bytes-oriented +APIs like cryptographic checksums. + +[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates +this question. + + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* edit `setup.cfg`, if necessary, to include any new configuration settings + indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install` in your source tree, to replace + `SRC/_version.py` +* commit any changed files + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + + +## License + +To make Versioneer easier to embed, all its code is dedicated to the public +domain. The `_version.py` that it creates is also in the public domain. +Specifically, both are released under the Creative Commons "Public Domain +Dedication" license (CC0-1.0), as described in +https://creativecommons.org/publicdomain/zero/1.0/ . + +""" + +from __future__ import print_function +try: + import configparser +except ImportError: + import ConfigParser as configparser +import errno +import json +import os +import re +import subprocess +import sys + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_root(): + """Get the project root directory. + + We require that all commands are run from the project root, i.e. the + directory that contains setup.py, setup.cfg, and versioneer.py . + """ + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + err = ("Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND').") + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + me = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(me)[0]) + vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) + if me_dir != vsr_dir: + print("Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py)) + except NameError: + pass + return root + + +def get_config_from_root(root): + """Read the project setup.cfg file to determine Versioneer config.""" + # This might raise EnvironmentError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + setup_cfg = os.path.join(root, "setup.cfg") + parser = configparser.SafeConfigParser() + with open(setup_cfg, "r") as f: + parser.readfp(f) + VCS = parser.get("versioneer", "VCS") # mandatory + + def get(parser, name): + if parser.has_option("versioneer", name): + return parser.get("versioneer", name) + return None + cfg = VersioneerConfig() + cfg.VCS = VCS + cfg.style = get(parser, "style") or "" + cfg.versionfile_source = get(parser, "versionfile_source") + cfg.versionfile_build = get(parser, "versionfile_build") + cfg.tag_prefix = get(parser, "tag_prefix") + if cfg.tag_prefix in ("''", '""'): + cfg.tag_prefix = "" + cfg.parentdir_prefix = get(parser, "parentdir_prefix") + cfg.verbose = get(parser, "verbose") + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +LONG_VERSION_PY['git'] = ''' +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + print("stdout was %%s" %% stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %%s but none started with prefix %%s" %% + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs - tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %%s not under git control" %% root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%%s*" %% tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], + cwd=root)[0].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%%d" %% pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} +''' + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%s*" % tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], + cwd=root)[0].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def do_vcs_install(manifest_in, versionfile_source, ipy): + """Git-specific installation logic for Versioneer. + + For Git, this means creating/changing .gitattributes to mark _version.py + for export-subst keyword substitution. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + f = open(".gitattributes", "r") + for line in f.readlines(): + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + f.close() + except EnvironmentError: + pass + if not present: + f = open(".gitattributes", "a+") + f.write("%s export-subst\n" % versionfile_source) + f.close() + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.18) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename): + """Try to determine the version from _version.py if present.""" + try: + with open(filename) as f: + contents = f.read() + except EnvironmentError: + raise NotThisMethod("unable to read _version.py") + mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", + contents, re.M | re.S) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename, versions): + """Write the given version number to the given _version.py file.""" + os.unlink(filename) + contents = json.dumps(versions, sort_keys=True, + indent=1, separators=(",", ": ")) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +class VersioneerBadRootError(Exception): + """The project root directory is unknown or missing key files.""" + + +def get_versions(verbose=False): + """Get the project version from whatever source is available. + + Returns dict with two keys: 'version' and 'full'. + """ + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or cfg.verbose + assert cfg.versionfile_source is not None, \ + "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, "error": "unable to compute version", + "date": None} + + +def get_version(): + """Get the short version string for this project.""" + return get_versions()["version"] + + +def get_cmdclass(): + """Get the custom setuptools/distutils subclasses used by Versioneer.""" + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/warner/python-versioneer/issues/52 + + cmds = {} + + # we add "version" to both distutils and setuptools + from distutils.core import Command + + class cmd_version(Command): + description = "report generated version string" + user_options = [] + boolean_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + print(" date: %s" % vers.get("date")) + if vers["error"]: + print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version + + # we override "build_py" in both distutils and setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + # pip install: + # copies source tree to a tempdir before running egg_info/etc + # if .git isn't copied too, 'git describe' will fail + # then does setup.py bdist_wheel, or sometimes setup.py install + # setup.py egg_info -> ? + + # we override different "build_py" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.build_py import build_py as _build_py + else: + from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string + # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. + # setup(console=[{ + # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION + # "product_version": versioneer.get_version(), + # ... + + class cmd_build_exe(_build_exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + if 'py2exe' in sys.modules: # py2exe enabled? + try: + from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + except ImportError: + from py2exe.build_exe import py2exe as _py2exe # py2 + + class cmd_py2exe(_py2exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _py2exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % + {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + cmds["py2exe"] = cmd_py2exe + + # we override different "sdist" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.sdist import sdist as _sdist + else: + from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self): + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir, files): + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, + self._versioneer_generated_versions) + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +INIT_PY_SNIPPET = """ +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + + +def do_setup(): + """Main VCS-independent setup function for installing Versioneer.""" + root = get_root() + try: + cfg = get_config_from_root(root) + except (EnvironmentError, configparser.NoSectionError, + configparser.NoOptionError) as e: + if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + print("Adding sample versioneer config to setup.cfg", + file=sys.stderr) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write(LONG % {"DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + }) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), + "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(root, "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if cfg.versionfile_source not in simple_includes: + print(" appending versionfile_source ('%s') to MANIFEST.in" % + cfg.versionfile_source) + with open(manifest_in, "a") as f: + f.write("include %s\n" % cfg.versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-subst keyword + # substitution. + do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + return 0 + + +def scan_setup_py(): + """Validate the contents of setup.py against Versioneer's expectations.""" + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + errors = do_setup() + errors += scan_setup_py() + if errors: + sys.exit(1) diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index 88a4a22..61a40b4 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -7,11 +7,12 @@ #------------------------------------------------------------------------------# # import hsd +import os.path as op def test_dictbuilder(): dictbuilder = hsd.HsdDictBuilder() parser = hsd.HsdParser(eventhandler=dictbuilder) - with open("test.hsd", "r") as fobj: + with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: parser.feed(fobj) pyrep = dictbuilder.hsddict print("** Python structure without data flattening:\n") @@ -23,7 +24,7 @@ def test_dictbuilder(): def test_dictbuilder_flat(): dictbuilder = hsd.HsdDictBuilder(flatten_data=True) parser = hsd.HsdParser(eventhandler=dictbuilder) - with open("test.hsd", "r") as fobj: + with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: parser.feed(fobj) pyrep = dictbuilder.hsddict print("** Python structure with data flattening:\n") diff --git a/test/test_parser.py b/test/test_parser.py index 2280839..5c2b7db 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -7,11 +7,12 @@ #------------------------------------------------------------------------------# # import hsd +import os.path as op def test_parser(): parser = hsd.HsdParser() - with open("test.hsd", "r") as fobj: + with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: parser.feed(fobj) From b9514f23ac054f6048927ed20866433c82321026 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Sun, 9 Feb 2020 11:27:11 +0100 Subject: [PATCH 03/31] Appveyor support --- .appveyor.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .appveyor.yml diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 0000000..dc6de00 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,29 @@ +environment: + + matrix: + - PYTHON: "C:\\Python36-x64" + PYTHON_VERSION: "3.6" + PYTHON_ARCH: "64" + + - PYTHON: "C:\\Python37-x64" + PYTHON_VERSION: "3.7" + PYTHON_ARCH: "64" + + +install: + # Make sure pip is around + - python -m ensurepip + - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" + + # Install the package locally + #- pip install --upgrade pip setuptools + - pip install pytest pytest-cov codecov + - pip install -e src + +build: false + +test_script: + - pytest -v --cov=hsd test + +on_success: + - codecov From c26324b8db0ad6588275d3b574eab64d4a72a0b4 Mon Sep 17 00:00:00 2001 From: Ben Hourahine Date: Sun, 9 Feb 2020 11:55:27 +0000 Subject: [PATCH 04/31] A few typos and some grammar in README.rst --- README.rst | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index cafbdf4..840dc7a 100644 --- a/README.rst +++ b/README.rst @@ -15,9 +15,9 @@ The HSD-format is very similar to both JSON and XML, but tries to minimize the effort for humans to read and write it. It ommits special characters as much as possible but (in contrast to YAML for example) is not indentation dependent. -It was developed originally developed as the input format for a scientific -simulation tool (DFTB+), but is absolutely general. A typical input written in -HSD would look like :: +It was developed originally as the input format for a scientific simulation tool +(`DFTB+ `_), but is absolutely general. A +typical input written in HSD looks like :: driver { conjugate_gradients { @@ -49,12 +49,13 @@ HSD would look like :: } } -Content in HSD format can be represented as JSON. Content in JSON format can be -represented as HSD, provided it satisfies a restriction for arrays: Either all -elements of an array must be objects or none of them. (This allows for a clear -separation of structure and data and allows for the very simple input format.) +Content in HSD format can be represented as JSON. Content in JSON format can +similarly be represented as HSD, provided it satisfies one restriction for +arrays: Either all elements of an array must be objects or none of them. (This +allows for a clear separation of structure and data and allows for the very +simple input format.) -Content in HSD format can be represented as XML (DOM-tree). Content in XML can -be converted to HSD, provided it satisfies the restriction that every child has -either data (text) or further children, but never both of them. (Again, this -ensures the simplicity of the input format.) +Content in HSD format can be represented as XML (DOM-tree). Likewise content in +XML can be converted to HSD, provided it satisfies the restriction that every +child has either data (text) or further children, but never both of +them. (Again, this ensures the simplicity of the input format.) From c09852afb6f636562a4182c2ae44aaec957c6994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Tue, 3 Mar 2020 13:56:35 +0100 Subject: [PATCH 05/31] Fix parsing error of '= {' following an attribute --- src/hsd/dictbuilder.py | 5 ++++- src/hsd/parser.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index e4804ef..a202c8b 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -27,7 +27,10 @@ class HsdDictBuilder(HsdEventHandler): - """Deserializes HSD into nested dictionaries""" + """Deserializes HSD into nested dictionaries + + Note: hsdoptions passed by the generating events are ignored. + """ def __init__(self, flatten_data=False): HsdEventHandler.__init__(self) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 751b090..c3b418f 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -114,7 +114,7 @@ def __init__(self, defattrib=common.DEFAULT_ATTRIBUTE, eventhandler=None): self._inside_option = False # parser inside option specification self._inside_quote = False # parser inside quotation self._has_child = False - self._oldbefore = "" + self._oldbefore = "" # buffer for tagname def feed(self, fobj): @@ -178,7 +178,9 @@ def _parse(self, line): elif sign == "=" and not self._inside_option: # Ignore if followed by "{" (DFTB+ compatibility) if after.lstrip().startswith("{"): - self._oldbefore = before + # _oldbefore may already contain the tagname, if the + # tagname was followed by an attribute -> append + self._oldbefore += before else: self._has_child = True self._hsdoptions[common.HSDATTR_EQUAL] = True From a66f099120679197a7198ada78aad3c2b10b2e8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 6 Mar 2020 16:19:39 +0100 Subject: [PATCH 06/31] Fix HSD-representation of bools --- src/hsd/dump.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hsd/dump.py b/src/hsd/dump.py index 916a6d9..040d026 100644 --- a/src/hsd/dump.py +++ b/src/hsd/dump.py @@ -120,10 +120,10 @@ def _list_to_hsd(lst): def _item_to_hsd(item): - if isinstance(item, (int, float)): - return str(item) - elif isinstance(item, bool): + if isinstance(item, bool): return "Yes" if item else "No" + elif isinstance(item, (int, float)): + return str(item) elif isinstance(item, str): return _str_to_hsd(item) else: From ba1359e691d6d69e67c17653b541a820bca1b0f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Mon, 23 Mar 2020 16:31:50 +0100 Subject: [PATCH 07/31] Fix string quotation handling error --- src/hsd/dictbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index a202c8b..1f27c22 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -22,7 +22,7 @@ | (?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) | -(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?)(?:$|\s+)) +(?:\s*(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?))(?:$|\s+)) """, re.VERBOSE | re.MULTILINE) From 4035f43a2f0c529eb1b32735af800f19c7d7a9e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Tue, 24 Mar 2020 10:23:43 +0100 Subject: [PATCH 08/31] Fix attribute parsing bug Attribute parsing had been reduced to bare minimum: one string attribute per item. --- src/hsd/dictbuilder.py | 6 ++-- src/hsd/parser.py | 71 ++++++++++++++++-------------------------- 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index 1f27c22..3109eb4 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -41,9 +41,9 @@ def __init__(self, flatten_data=False): self._flatten_data = flatten_data - def open_tag(self, tagname, options, hsdoptions): - for attrname, attrvalue in options.items(): - self._curblock[tagname + '.' + attrname] = attrvalue + def open_tag(self, tagname, attrib, hsdoptions): + if attrib is not None: + self._curblock[tagname + '.attribute'] = attrib self._parentblocks.append(self._curblock) self._curblock = {} diff --git a/src/hsd/parser.py b/src/hsd/parser.py index c3b418f..0f7cd59 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -18,12 +18,12 @@ SYNTAX_ERROR = 1 UNCLOSED_TAG_ERROR = 2 -UNCLOSED_OPTION_ERROR = 3 +UNCLOSED_ATTRIB_ERROR = 3 UNCLOSED_QUOTATION_ERROR = 4 ORPHAN_TEXT_ERROR = 5 _GENERAL_SPECIALS = "{}[]<=\"'#;" -_OPTION_SPECIALS = ",]=\"'#{};" +_ATTRIB_SPECIALS = "]\"'" class HsdEventHandler: @@ -35,7 +35,7 @@ def __init__(self): self._indentstr = " " - def open_tag(self, tagname, options, hsdoptions): + def open_tag(self, tagname, attrib, hsdoptions): """Handler which is called when a tag is opened. It should be overriden in the application to handle the event in a @@ -43,13 +43,13 @@ def open_tag(self, tagname, options, hsdoptions): Args: tagname: Name of the tag which had been opened. - options: Dictionary of the options (attributes) of the tag. + attrib: String containing the attribute of the tag or None. hsdoptions: Dictionary of the options created during the processing in the hsd-parser. """ indentstr = self._indentlevel * self._indentstr print("{}OPENING TAG: {}".format(indentstr, tagname)) - print("{}OPTIONS: {}".format(indentstr, str(options))) + print("{}ATTRIBUTE: {}".format(indentstr, attrib)) print("{}HSD OPTIONS: {}".format(indentstr, str(hsdoptions))) self._indentlevel += 1 @@ -88,11 +88,10 @@ class HsdParser: and `_handle_error()` should be overridden by the actual application. """ - def __init__(self, defattrib=common.DEFAULT_ATTRIBUTE, eventhandler=None): + def __init__(self, eventhandler=None): """Initializes the parser. Args: - defattrib: Name of the default attribute (default: 'attribute') eventhandler: Instance of the HsdEventHandler class or its children. """ if eventhandler is None: @@ -101,17 +100,15 @@ def __init__(self, defattrib=common.DEFAULT_ATTRIBUTE, eventhandler=None): self._eventhandler = eventhandler self._fname = "" # Name of file being processed - self._defattrib = defattrib.lower() # def. attribute name self._checkstr = _GENERAL_SPECIALS # special characters to look for self._oldcheckstr = "" # buffer fo checkstr self._opened_tags = [] # info about opened tags self._buffer = [] # buffering plain text between lines - self._options = OrderedDict() # options for current tag + self._attrib = None # attribute for current tag self._hsdoptions = OrderedDict() # hsd-options for current tag - self._key = "" # current option name self._currline = 0 # nr. of current line in file self._after_equal_sign = False # last tag was opened with equal sign - self._inside_option = False # parser inside option specification + self._inside_attrib = False # parser inside attrib specification self._inside_quote = False # parser inside quotation self._has_child = False self._oldbefore = "" # buffer for tagname @@ -142,8 +139,8 @@ def feed(self, fobj): line0 = 0 if self._inside_quote: self._error(UNCLOSED_QUOTATION_ERROR, (line0, self._currline)) - elif self._inside_option: - self._error(UNCLOSED_OPTION_ERROR, (line0, self._currline)) + elif self._inside_attrib: + self._error(UNCLOSED_ATTRIB_ERROR, (line0, self._currline)) elif self._opened_tags: self._error(UNCLOSED_TAG_ERROR, (line0, line0)) elif ("".join(self._buffer)).strip(): @@ -164,7 +161,7 @@ def _parse(self, line): self._text("".join(self._buffer) + before.strip()) self._closetag() self._after_equal_sign = False - elif not self._inside_option: + elif not self._inside_attrib: self._buffer.append(before) elif before.strip(): self._error(SYNTAX_ERROR, (self._currline, self._currline)) @@ -174,8 +171,8 @@ def _parse(self, line): elif before.endswith("\\") and not before.endswith("\\\\"): self._buffer.append(before + sign) - # Equal sign outside option specification - elif sign == "=" and not self._inside_option: + # Equal sign + elif sign == "=": # Ignore if followed by "{" (DFTB+ compatibility) if after.lstrip().startswith("{"): # _oldbefore may already contain the tagname, if the @@ -187,20 +184,15 @@ def _parse(self, line): self._starttag(before, False) self._after_equal_sign = True - # Equal sign inside option specification - elif sign == "=": - self._key = before.strip() - self._buffer = [] - # Opening tag by curly brace - elif sign == "{" and not self._inside_option: + elif sign == "{": self._has_child = True self._starttag(before, self._after_equal_sign) self._buffer = [] self._after_equal_sign = False # Closing tag by curly brace - elif sign == "}" and not self._inside_option: + elif sign == "}": self._text("".join(self._buffer) + before) self._buffer = [] # If 'test { a = 12 }' occurs, curly brace closes two tags @@ -210,8 +202,7 @@ def _parse(self, line): self._closetag() # Closing tag by semicolon - elif (sign == ";" and self._after_equal_sign - and not self._inside_option): + elif sign == ";" and self._after_equal_sign: self._after_equal_sign = False self._text(before) self._closetag() @@ -221,23 +212,22 @@ def _parse(self, line): self._buffer.append(before) after = "" - # Opening option specification - elif sign == "[" and not self._inside_option: + # Opening attribute specification + elif sign == "[": if "".join(self._buffer).strip(): self._error(SYNTAX_ERROR, (self._currline, self._currline)) self._oldbefore = before self._buffer = [] - self._inside_option = True + self._inside_attrib = True self._key = "" self._opened_tags.append(("[", self._currline, None)) - self._checkstr = _OPTION_SPECIALS + self._checkstr = _ATTRIB_SPECIALS - # Closing option specification - elif sign == "]" and self._inside_option: + # Closing attribute specification + elif sign == "]": value = "".join(self._buffer) + before - key = self._key.lower() if self._key else self._defattrib - self._options[key] = value.strip() - self._inside_option = False + self._attrib = value.strip() + self._inside_attrib = False self._buffer = [] self._opened_tags.pop() self._checkstr = _GENERAL_SPECIALS @@ -256,15 +246,8 @@ def _parse(self, line): self._buffer.append(before + sign) self._opened_tags.append(('"', self._currline, None)) - # Closing attribute specification - elif sign == "," and self._inside_option: - value = "".join(self._buffer) + before - key = self._key.lower() if self._key else self._defattrib - self._options[key] = value.strip() - # Interrupt - elif (sign == "<" and not self._inside_option - and not self._after_equal_sign): + elif sign == "<" and not self._after_equal_sign: txtinc = after.startswith("<<") hsdinc = after.startswith("<+") if txtinc: @@ -305,14 +288,14 @@ def _starttag(self, tagname, closeprev): self._hsdoptions[common.HSDATTR_LINE] = self._currline self._hsdoptions[common.HSDATTR_TAG] = tagname_stripped tagname_stripped = tagname_stripped.lower() - self._eventhandler.open_tag(tagname_stripped, self._options, + self._eventhandler.open_tag(tagname_stripped, self._attrib, self._hsdoptions) self._opened_tags.append( (tagname_stripped, self._currline, closeprev, self._has_child)) self._buffer = [] self._oldbefore = "" self._has_child = False - self._options = OrderedDict() + self._attrib = None self._hsdoptions = OrderedDict() From 81b2f15e448a2cd9c1c0c0ee00ff30c59676ac09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 3 Apr 2020 08:11:44 +0200 Subject: [PATCH 09/31] Add HSD-constructs triggering the fixed bug --- test/test.hsd | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test.hsd b/test/test.hsd index 4141c15..783d157 100644 --- a/test/test.hsd +++ b/test/test.hsd @@ -17,8 +17,13 @@ Hamiltonian { Broyden {} } MaxAngularMomentum { - O = "p" - H = "s" + H = SelectedShells {"s" "s" } + O = SelectedShells {"s" "p" } + } + Dispersion = SlaterKirkwood { + PolarRadiusCharge [AA^3,AA,] = { + 1.030000 3.800000 2.820000 + } } Filling { Fermi { From f1f61a2d84b8caa5a5062889ebac271b0ef2f0b4 Mon Sep 17 00:00:00 2001 From: Tammo van der Heide Date: Tue, 7 Apr 2020 11:08:28 +0200 Subject: [PATCH 10/31] Remove artifacts of previous commit Furthermore, consider merging comparisons with 'in', as this is probably faster and less verbose. --- src/hsd/parser.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 0f7cd59..4c99087 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -13,7 +13,7 @@ __all__ = ["HsdParser", - "SYNTAX_ERROR", "UNCLOSED_TAG_ERROR", "UNCLOSED_OPTION_ERROR", + "SYNTAX_ERROR", "UNCLOSED_TAG_ERROR", "UNCLOSED_ATTRIB_ERROR", "UNCLOSED_QUOTATION_ERROR", "ORPHAN_TEXT_ERROR"] SYNTAX_ERROR = 1 @@ -99,15 +99,15 @@ def __init__(self, eventhandler=None): else: self._eventhandler = eventhandler - self._fname = "" # Name of file being processed - self._checkstr = _GENERAL_SPECIALS # special characters to look for + self._fname = "" # name of file being processed + self._checkstr = _GENERAL_SPECIALS # special characters to look for self._oldcheckstr = "" # buffer fo checkstr self._opened_tags = [] # info about opened tags self._buffer = [] # buffering plain text between lines self._attrib = None # attribute for current tag self._hsdoptions = OrderedDict() # hsd-options for current tag self._currline = 0 # nr. of current line in file - self._after_equal_sign = False # last tag was opened with equal sign + self._after_equal_sign = False # last tag was opened with equal sign self._inside_attrib = False # parser inside attrib specification self._inside_quote = False # parser inside quotation self._has_child = False @@ -233,7 +233,7 @@ def _parse(self, line): self._checkstr = _GENERAL_SPECIALS # Quoting strings - elif sign == "'" or sign == '"': + elif sign in ("'", '"'): if self._inside_quote: self._checkstr = self._oldcheckstr self._inside_quote = False @@ -311,8 +311,7 @@ def _closetag(self): def _include_hsd(self, fname): fname = common.unquote(fname.strip()) - parser = HsdParser(defattrib=self._defattrib, - eventhandler=self._eventhandler) + parser = HsdParser(eventhandler=self._eventhandler) parser.feed(fname) From eedb0cc94ecfa54e72f7d14ce5033da9bc2864f9 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Tue, 4 Aug 2020 12:16:46 +0200 Subject: [PATCH 11/31] Minimal installation info - add at least a minimal info how to install hsd-python --- README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.rst b/README.rst index 840dc7a..a44fc16 100644 --- a/README.rst +++ b/README.rst @@ -8,6 +8,16 @@ the Human-friendly Structured Data (HSD) format. It is licensed under the *BSD 2-clause license*. +Installation +============ + +To install the python package in development mode use + +.. code:: + + pip install -e src + + The HSD format ============== From 54486aaa01d5a5ad8daa2b6f98182cd76cd3f9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Sun, 12 Sep 2021 15:00:41 +0200 Subject: [PATCH 12/31] Refactor, improve API, documentation and packaging --- CHANGELOG.rst | 14 + LICENSE | 2 +- README.rst | 108 ++- pyproject.toml | 3 + setup.cfg | 31 + src/LICENSE | 1 - src/MANIFEST.in | 6 - src/hsd/__init__.py | 5 +- src/hsd/common.py | 54 +- src/hsd/dictbuilder.py | 18 +- src/hsd/eventhandler.py | 56 ++ src/hsd/{dump.py => io.py} | 91 +- src/hsd/parser.py | 86 +- src/setup.cfg | 30 - src/setup.py | 59 -- src/versioneer.py | 1822 ------------------------------------ test/test_dictbuilder.py | 2 +- 17 files changed, 310 insertions(+), 2078 deletions(-) create mode 100644 CHANGELOG.rst create mode 100644 pyproject.toml create mode 100644 setup.cfg delete mode 120000 src/LICENSE delete mode 100644 src/MANIFEST.in create mode 100644 src/hsd/eventhandler.py rename src/hsd/{dump.py => io.py} (64%) delete mode 100644 src/setup.cfg delete mode 100644 src/setup.py delete mode 100644 src/versioneer.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..16cdadb --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,14 @@ +========== +Change Log +========== + + +Unreleased +========== + +Added +----- + +* Basic functionality to manipulate HSD-data in Python. + +* Pip installation diff --git a/LICENSE b/LICENSE index 36d474a..10c023d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2011-2020 DFTB+ developers group +Copyright (c) 2011-2021 DFTB+ developers group All rights reserved. diff --git a/README.rst b/README.rst index a44fc16..f491fb2 100644 --- a/README.rst +++ b/README.rst @@ -1,33 +1,40 @@ -************************************ -HSD — Human-friendly Structured Data -************************************ +********************************************** +HSD — Make your structured data human friendly +********************************************** -This Python package contains utilities to read and write files in -the Human-friendly Structured Data (HSD) format. +This package contains utilities to read and write files in the Human-friendly +Structured Data (HSD) format. -It is licensed under the *BSD 2-clause license*. +The HSD-format is very similar to both JSON and YAML, but tries to minimize the +effort for **humans** to read and write it. It ommits special characters as much +as possible (in contrast to JSON) and is not indentation dependent (in contrast +to YAML). It was developed originally as the input format for the scientific +simulation tool (`DFTB+ `_), but is +of general purpose. Data stored in HSD can be easily mapped to a subset of JSON +or XML andvica versa. Installation ============ -To install the python package in development mode use +The package can be installed via conda-forge:: -.. code:: + conda install hsd-python - pip install -e src +Alternatively, the package can be downloaded and installed via pip into the +active Python interpreter (preferably using a virtual python environment) by :: + pip install hsd -The HSD format -============== +or into the user space issueing :: + + pip install --user hsd -The HSD-format is very similar to both JSON and XML, but tries to minimize the -effort for humans to read and write it. It ommits special characters as much as -possible but (in contrast to YAML for example) is not indentation dependent. -It was developed originally as the input format for a scientific simulation tool -(`DFTB+ `_), but is absolutely general. A -typical input written in HSD looks like :: +Quick tutorial +============== + +A typical, self-explaining input written in HSD looks like :: driver { conjugate_gradients { @@ -45,11 +52,13 @@ typical input written in HSD looks like :: } filling { fermi { - temperature [kelvin] = 1e-8 + # This is comment which will be ignored + # Note the attribute (unit) of the field below + temperature [kelvin] = 100 } } k_points_and_weights { - supercell_folding = { + supercell_folding { 2 0 0 0 2 0 0 0 2 @@ -59,13 +68,56 @@ typical input written in HSD looks like :: } } -Content in HSD format can be represented as JSON. Content in JSON format can -similarly be represented as HSD, provided it satisfies one restriction for -arrays: Either all elements of an array must be objects or none of them. (This -allows for a clear separation of structure and data and allows for the very -simple input format.) +The above input can be parsed into a Python dictionary with:: + + import hsd + hsdinput = hsd.load_file("test.hsd") + +The dictionary ``hsdinput`` will then look as:: + + { + "driver": { + "conjugate_gradients" { + "moved_atoms": [1, 2, "7:19"], + "max_steps": 100 + } + }, + "hamiltonian": { + "dftb": { + "scc": True, + "scc_tolerance": 1e-10, + "mixer": { + "broyden": {} + }, + "filling": { + "fermi": { + "temperature": 100, + "temperature.attrib": "kelvin" + } + } + "k_points_and_weights": { + "supercell_folding": [ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [0.5, 0.5, 0.5] + ] + } + } + } + } + +Being a simple Python dictionary, it can be easily queried and manipulated in +Python :: + + hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200 + +and then stored again in HSD format :: + + hsd.dump_file(hsdinput, "test2.hsd") + + +License +======== -Content in HSD format can be represented as XML (DOM-tree). Likewise content in -XML can be converted to HSD, provided it satisfies the restriction that every -child has either data (text) or further children, but never both of -them. (Again, this ensures the simplicity of the input format.) +The hsd-python package is licensed under the `BSD 2-clause license `_. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9787c3b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0cf64cf --- /dev/null +++ b/setup.cfg @@ -0,0 +1,31 @@ +[metadata] +name = hsd-python +version = 0.1 +author = DFTB+ developers group +author_email = info@dftbplus.org +url = https://github.com/dftbplus/hsd-python +description = + Tools for reading, writing and manipulating data stored in the human-friendly + structured data (HSD) format +long_description = file: README.rst +long_description_content_type = text/x-rst +license = BSD +license_file = LICENSE +platform = any +classifiers = + Intended Audience :: Developers + License :: OSI Approved :: BSD License + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.7 + +[options] +include_package_data = True +package_dir = + = src +packages = hsd + +[options.packages.find] +where = src diff --git a/src/LICENSE b/src/LICENSE deleted file mode 120000 index ea5b606..0000000 --- a/src/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/src/MANIFEST.in b/src/MANIFEST.in deleted file mode 100644 index 2606db3..0000000 --- a/src/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -include LICENSE -include MANIFEST.in -include versioneer.py - -graft hsd -global-exclude *.py[cod] __pycache__ *.so diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 87c7544..4eb10ee 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -8,6 +8,7 @@ """ Central module for the hsd package """ -from .dump import dump, dumps -from .parser import HsdParser from .dictbuilder import HsdDictBuilder +from .eventhandler import HsdEventHandler +from .io import load, load_string, load_file, dump, dump_string, dump_file +from .parser import HsdParser diff --git a/src/hsd/common.py b/src/hsd/common.py index 6669d13..f425699 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -12,40 +12,10 @@ class HsdException(Exception): """Base class for exceptions in the HSD package.""" - pass - - -class HsdQueryError(HsdException): - """Base class for errors detected by the HsdQuery object. - - - Attributes: - filename: Name of the file where error occured (or empty string). - line: Line where the error occurred (or -1). - tag: Name of the tag with the error (or empty string). - """ - - def __init__(self, msg="", node=None): - """Initializes the exception. - - Args: - msg: Error message - node: HSD element where error occured (optional). - """ - super().__init__(msg) - if node is not None: - self.tag = node.gethsd(HSDATTR_TAG, node.tag) - self.file = node.gethsd(HSDATTR_FILE, -1) - self.line = node.gethsd(HSDATTR_LINE, None) - else: - self.tag = "" - self.file = -1 - self.line = None class HsdParserError(HsdException): """Base class for parser related errors.""" - pass def unquote(txt): @@ -56,11 +26,23 @@ def unquote(txt): # Name for default attribute (when attribute name is not specified) -DEFAULT_ATTRIBUTE = "attribute" +DEFAULT_ATTRIBUTE = "unit" + +# Suffix to mark attribute +ATTRIB_SUFFIX = ".attrib" + +# Length of the attribute suffix +LEN_ATTRIB_SUFFIX = len(ATTRIB_SUFFIX) + +# Suffix to mark hsd processing attributes +HSD_ATTRIB_SUFFIX = ".hsdattrib" + +# Lengths of hsd processing attribute suffix +LEN_HSD_ATTRIB_SUFFIX = len(HSD_ATTRIB_SUFFIX) + + +HSD_ATTRIB_LINE = "line" +HSD_ATTRIB_EQUAL = "equal" -HSDATTR_PROC = "processed" -HSDATTR_EQUAL = "equal" -HSDATTR_FILE = "file" -HSDATTR_LINE = "line" -HSDATTR_TAG = "tag" +HSD_ATTRIB_TAG = "tag" \ No newline at end of file diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index 3109eb4..423bfd4 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -9,9 +9,8 @@ Contains an event-driven builder for dictionary based (JSON-like) structure """ import re -from .parser import HsdEventHandler - -__all__ = ['HsdDictBuilder'] +from .common import ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX +from .eventhandler import HsdEventHandler _TOKEN_PATTERN = re.compile(r""" @@ -29,21 +28,24 @@ class HsdDictBuilder(HsdEventHandler): """Deserializes HSD into nested dictionaries - Note: hsdoptions passed by the generating events are ignored. + Note: hsdattrib passed by the generating events are ignored. """ - def __init__(self, flatten_data=False): - HsdEventHandler.__init__(self) + def __init__(self, flatten_data=False, include_hsd_attribs=False): + super().__init__() self._hsddict = {} self._curblock = self._hsddict self._parentblocks = [] self._data = None self._flatten_data = flatten_data + self._include_hsd_attribs = include_hsd_attribs - def open_tag(self, tagname, attrib, hsdoptions): + def open_tag(self, tagname, attrib, hsdattrib): if attrib is not None: - self._curblock[tagname + '.attribute'] = attrib + self._curblock[tagname + ATTRIB_SUFFIX] = attrib + if self._include_hsd_attribs and hsdattrib is not None: + self._curblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib self._parentblocks.append(self._curblock) self._curblock = {} diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py new file mode 100644 index 0000000..8fd0dc1 --- /dev/null +++ b/src/hsd/eventhandler.py @@ -0,0 +1,56 @@ +"""Contains an event handler base class.""" + + +class HsdEventHandler: + """Base class for event handler implementing simple printing""" + + def __init__(self): + """Initializes the default event handler""" + self._indentlevel = 0 + self._indentstr = " " + + + def open_tag(self, tagname, attrib, hsdattrib): + """Handler which is called when a tag is opened. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + tagname: Name of the tag which had been opened. + attrib: String containing the attribute of the tag or None. + hsdattrib: Dictionary of the options created during the processing + in the hsd-parser. + """ + indentstr = self._indentlevel * self._indentstr + print("{}OPENING TAG: {}".format(indentstr, tagname)) + print("{}ATTRIBUTE: {}".format(indentstr, attrib)) + print("{}HSD OPTIONS: {}".format(indentstr, str(hsdattrib))) + self._indentlevel += 1 + + + def close_tag(self, tagname): + """Handler which is called when a tag is closed. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + tagname: Name of the tag which had been closed. + """ + indentstr = self._indentlevel * self._indentstr + print("{}CLOSING TAG: {}".format(indentstr, tagname)) + self._indentlevel -= 1 + + + def add_text(self, text): + """Handler which is called with the text found inside a tag. + + It should be overriden in the application to handle the event in a + customized way. + + Args: + text: Text in the current tag. + """ + indentstr = self._indentlevel * self._indentstr + print("{}Received text: {}".format(indentstr, text)) diff --git a/src/hsd/dump.py b/src/hsd/io.py similarity index 64% rename from src/hsd/dump.py rename to src/hsd/io.py index 040d026..421eb6e 100644 --- a/src/hsd/dump.py +++ b/src/hsd/io.py @@ -9,10 +9,14 @@ Provides functionality to dump Python structures to HSD """ import io -import numpy as np -from .common import DEFAULT_ATTRIBUTE - -__all__ = ['dump', 'dumps'] +try: + import numpy as np +except ModuleNotFoundError: + np = None +from .common import \ + ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, LEN_ATTRIB_SUFFIX, LEN_HSD_ATTRIB_SUFFIX +from .dictbuilder import HsdDictBuilder +from .parser import HsdParser _INDENT_STR = " " @@ -24,17 +28,74 @@ _SPECIAL_CHARS = "{}[]= " -_ATTRIBUTE_SUFFIX = "." + DEFAULT_ATTRIBUTE +def load(fobj): + """Loads a file like object with HSD-formatted data into a Python dictionary + + Args: + fobj: File like object to read the data from + + Returns: + Dictionary representing the HSD data. + """ + dictbuilder = HsdDictBuilder() + parser = HsdParser(eventhandler=dictbuilder) + parser.feed(fobj) + return dictbuilder.hsddict + + +def load_file(fname): + """Loads a file with HSD-formatted data into a Python dictionary + + Args: + fname: Name of the text file to read the data from + + Returns: + Dictionary representing the HSD data. + """ + with open(fname, "r") as fobj: + return load(fobj) + + +def load_string(hsdstr): + """Loads a string with HSD-formatted data into a Python dictionary. + + Args: + hsdstr: String with HSD-formatted data. + + Returns: + Dictionary representing the HSD data. + """ + fobj = io.StringIO(hsdstr) + return load(fobj) def dump(obj, fobj): """Serializes an object to a file in HSD format. Args: - obj: Object to be serialized in HSD format + obj: Dictionary like object to be serialized in HSD format fobj: File like object to write the result to. + + Raises: + TypeError: if object is not a dictionary instance. """ + if isinstance(obj, dict): + _dump_dict(obj, fobj, "") + else: + msg = "Invalid object type" + raise TypeError(msg) + +def dump_file(obj, fobj): + """Serializes an object to a file in HSD format. + + Args: + obj: Dictionary like object to be serialized in HSD format + fobj: File like object to write the result to. + + Raises: + TypeError: if object is not a dictionary instance. + """ if isinstance(obj, dict): _dump_dict(obj, fobj, "") else: @@ -42,7 +103,7 @@ def dump(obj, fobj): raise TypeError(msg) -def dumps(obj): +def dump_string(obj): """Serializes an object to string in HSD format. Args: @@ -58,14 +119,20 @@ def dumps(obj): def _dump_dict(obj, fobj, indentstr): for key, value in obj.items(): - if key.endswith(_ATTRIBUTE_SUFFIX): - if key[:-len(_ATTRIBUTE_SUFFIX)] in obj: + if key.endswith(ATTRIB_SUFFIX): + if key[:-LEN_ATTRIB_SUFFIX] in obj: continue else: msg = "Attribute '{}' without corresponding tag '{}'"\ - .format(key, key[:-len(_ATTRIBUTE_SUFFIX)]) + .format(key, key[:-len(ATTRIB_SUFFIX)]) + raise ValueError(msg) + if key.endswith(HSD_ATTRIB_SUFFIX): + if key[:-LEN_HSD_ATTRIB_SUFFIX] in obj: continue + else: + msg = "HSD attribute '{}' without corresponding tag '{}'"\ + .format(key, key[:-len(HSD_ATTRIB_SUFFIX)]) raise ValueError(msg) - attrib = obj.get(key + _ATTRIBUTE_SUFFIX) + attrib = obj.get(key + ATTRIB_SUFFIX) if attrib is None: attribstr = "" elif not isinstance(attrib, str): @@ -96,7 +163,7 @@ def _get_hsd_rhs(obj, indentstr): if isinstance(obj, list): objstr = _list_to_hsd(obj) - elif isinstance(obj, np.ndarray): + elif np is not None and isinstance(obj, np.ndarray): objstr = _list_to_hsd(obj.tolist()) else: objstr = _item_to_hsd(obj) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 4c99087..3d9d726 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -8,14 +8,10 @@ """ Contains the event-generating HSD-parser. """ -from collections import OrderedDict import hsd.common as common +from .eventhandler import HsdEventHandler -__all__ = ["HsdParser", - "SYNTAX_ERROR", "UNCLOSED_TAG_ERROR", "UNCLOSED_ATTRIB_ERROR", - "UNCLOSED_QUOTATION_ERROR", "ORPHAN_TEXT_ERROR"] - SYNTAX_ERROR = 1 UNCLOSED_TAG_ERROR = 2 UNCLOSED_ATTRIB_ERROR = 3 @@ -23,62 +19,8 @@ ORPHAN_TEXT_ERROR = 5 _GENERAL_SPECIALS = "{}[]<=\"'#;" -_ATTRIB_SPECIALS = "]\"'" - - -class HsdEventHandler: - """Base class for event handler implementing simple printing""" - - def __init__(self): - """Initializes the default event handler""" - self._indentlevel = 0 - self._indentstr = " " - - - def open_tag(self, tagname, attrib, hsdoptions): - """Handler which is called when a tag is opened. - - It should be overriden in the application to handle the event in a - customized way. - - Args: - tagname: Name of the tag which had been opened. - attrib: String containing the attribute of the tag or None. - hsdoptions: Dictionary of the options created during the processing - in the hsd-parser. - """ - indentstr = self._indentlevel * self._indentstr - print("{}OPENING TAG: {}".format(indentstr, tagname)) - print("{}ATTRIBUTE: {}".format(indentstr, attrib)) - print("{}HSD OPTIONS: {}".format(indentstr, str(hsdoptions))) - self._indentlevel += 1 - - - def close_tag(self, tagname): - """Handler which is called when a tag is closed. - It should be overriden in the application to handle the event in a - customized way. - - Args: - tagname: Name of the tag which had been closed. - """ - indentstr = self._indentlevel * self._indentstr - print("{}CLOSING TAG: {}".format(indentstr, tagname)) - self._indentlevel -= 1 - - - def add_text(self, text): - """Handler which is called with the text found inside a tag. - - It should be overriden in the application to handle the event in a - customized way. - - Args: - text: Text in the current tag. - """ - indentstr = self._indentlevel * self._indentstr - print("{}Received text: {}".format(indentstr, text)) +_ATTRIB_SPECIALS = "]\"'" class HsdParser: @@ -88,7 +30,7 @@ class HsdParser: and `_handle_error()` should be overridden by the actual application. """ - def __init__(self, eventhandler=None): + def __init__(self, eventhandler=None, lower_tag_names=False): """Initializes the parser. Args: @@ -105,13 +47,14 @@ def __init__(self, eventhandler=None): self._opened_tags = [] # info about opened tags self._buffer = [] # buffering plain text between lines self._attrib = None # attribute for current tag - self._hsdoptions = OrderedDict() # hsd-options for current tag + self._hsdattrib = {} # hsd-options for current tag self._currline = 0 # nr. of current line in file self._after_equal_sign = False # last tag was opened with equal sign self._inside_attrib = False # parser inside attrib specification self._inside_quote = False # parser inside quotation self._has_child = False self._oldbefore = "" # buffer for tagname + self._lower_tag_names = lower_tag_names def feed(self, fobj): @@ -180,7 +123,7 @@ def _parse(self, line): self._oldbefore += before else: self._has_child = True - self._hsdoptions[common.HSDATTR_EQUAL] = True + self._hsdattrib[common.HSD_ATTRIB_EQUAL] = True self._starttag(before, False) self._after_equal_sign = True @@ -219,7 +162,6 @@ def _parse(self, line): self._oldbefore = before self._buffer = [] self._inside_attrib = True - self._key = "" self._opened_tags.append(("[", self._currline, None)) self._checkstr = _ATTRIB_SPECIALS @@ -255,11 +197,10 @@ def _parse(self, line): self._buffer = [] self._eventhandler.add_text(self._include_txt(after[2:])) break - elif hsdinc: + if hsdinc: self._include_hsd(after[2:]) break - else: - self._buffer.append(before + sign) + self._buffer.append(before + sign) else: self._error(SYNTAX_ERROR, (self._currline, self._currline)) @@ -285,18 +226,19 @@ def _starttag(self, tagname, closeprev): tagname_stripped = self._oldbefore.strip() if len(tagname_stripped.split()) > 1: self._error(SYNTAX_ERROR, (self._currline, self._currline)) - self._hsdoptions[common.HSDATTR_LINE] = self._currline - self._hsdoptions[common.HSDATTR_TAG] = tagname_stripped - tagname_stripped = tagname_stripped.lower() + self._hsdattrib[common.HSD_ATTRIB_LINE] = self._currline + if self._lower_tag_names: + self._hsdattrib[common.HSD_ATTRIB_TAG] = tagname_stripped + tagname_stripped = tagname_stripped.lower() self._eventhandler.open_tag(tagname_stripped, self._attrib, - self._hsdoptions) + self._hsdattrib) self._opened_tags.append( (tagname_stripped, self._currline, closeprev, self._has_child)) self._buffer = [] self._oldbefore = "" self._has_child = False self._attrib = None - self._hsdoptions = OrderedDict() + self._hsdattrib = {} def _closetag(self): diff --git a/src/setup.cfg b/src/setup.cfg deleted file mode 100644 index 61ad7fb..0000000 --- a/src/setup.cfg +++ /dev/null @@ -1,30 +0,0 @@ -# Helper file to handle all configs - -[coverage:run] -# .coveragerc to control coverage.py and pytest-cov -omit = - # Omit the tests - */tests/* - # Omit generated versioneer - hsd/_version.py - -[yapf] -# YAPF, in .style.yapf files this shows up as "[style]" header -COLUMN_LIMIT = 119 -INDENT_WIDTH = 4 -USE_TABS = False - -[flake8] -# Flake8, PyFlakes, etc -max-line-length = 119 - -[versioneer] -# Automatic version numbering scheme -VCS = git -style = pep440 -versionfile_source = hsd/_version.py -versionfile_build = hsd/_version.py -tag_prefix = '' - -[aliases] -test = pytest diff --git a/src/setup.py b/src/setup.py deleted file mode 100644 index 5160b13..0000000 --- a/src/setup.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -hsd -Python routines to manipulate HSD data -""" -import sys -from setuptools import setup, find_packages -import versioneer - -short_description = __doc__.split("\n") - -# from https://github.com/pytest-dev/pytest-runner#conditional-requirement -needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv) -pytest_runner = ['pytest-runner'] if needs_pytest else [] - -try: - with open("README.rst", "r") as handle: - long_description = handle.read() -except: - long_description = "\n".join(short_description[2:]) - - -setup( - # Self-descriptive entries which should always be present - name='hsd', - author='DFTB+ developers group', - author_email='info@dftbplus.org', - description=short_description[0], - long_description=long_description, - long_description_content_type="text/x-rst", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - license='BSD 2-clause license', - - # Which Python importable modules should be included when your package is installed - # Handled automatically by setuptools. Use 'exclude' to prevent some specific - # subpackage(s) from being added, if needed - packages=find_packages(), - - # Optional include package data to ship with your package - # Customize MANIFEST.in if the general case does not suit your needs - # Comment out this line to prevent the files from being packaged with your software - include_package_data=True, - - # Allows `setup.py test` to work correctly with pytest - setup_requires=[] + pytest_runner, - - # Additional entries you may want simply uncomment the lines you want and fill in the data - # url='http://www.my_package.com', # Website - install_requires=['numpy'], # Required packages, pulls from pip if needed; do not use for Conda deployment - # platforms=['Linux', - # 'Mac OS-X', - # 'Unix', - # 'Windows'], # Valid platforms your code works on, adjust to your flavor - # python_requires=">=3.5", # Python version restrictions - - # Manual control if final package is compressible or not, set False to prevent the .egg from being made - # zip_safe=False, - -) diff --git a/src/versioneer.py b/src/versioneer.py deleted file mode 100644 index 64fea1c..0000000 --- a/src/versioneer.py +++ /dev/null @@ -1,1822 +0,0 @@ - -# Version: 0.18 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -""" - -from __future__ import print_function -try: - import configparser -except ImportError: - import ConfigParser as configparser -import errno -import json -import os -import re -import subprocess -import sys - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -LONG_VERSION_PY['git'] = ''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: - pass - if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except EnvironmentError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 - - cmds = {} - - # we add "version" to both distutils and setuptools - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 - except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -INIT_PY_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - - -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index 61a40b4..c82bde4 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -6,8 +6,8 @@ # See the LICENSE file for terms of usage and distribution. # #------------------------------------------------------------------------------# # -import hsd import os.path as op +import hsd def test_dictbuilder(): dictbuilder = hsd.HsdDictBuilder() From 6d96f235ff83924d3bf5f089f797a9f3a010a0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Mon, 13 Sep 2021 14:46:09 +0200 Subject: [PATCH 13/31] Add sphinx docs, improve API usability --- README.rst | 5 +- docs/Makefile | 20 +++++++ docs/api.rst | 33 +++++++++++ docs/conf.py | 60 +++++++++++++++++++ docs/index.rst | 15 +++++ docs/introduction.rst | 123 +++++++++++++++++++++++++++++++++++++++ docs/make.bat | 35 +++++++++++ src/hsd/__init__.py | 11 ++-- src/hsd/common.py | 7 +-- src/hsd/dictbuilder.py | 34 ++++++----- src/hsd/eventhandler.py | 39 +++++++------ src/hsd/io.py | 104 ++++++++++++++++++--------------- src/hsd/parser.py | 42 ++++++++++--- test/test_dictbuilder.py | 7 +-- test/test_dump.py | 9 ++- test/test_parser.py | 7 +-- 16 files changed, 439 insertions(+), 112 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/api.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/introduction.rst create mode 100644 docs/make.bat diff --git a/README.rst b/README.rst index f491fb2..a52d3de 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,10 @@ as possible (in contrast to JSON) and is not indentation dependent (in contrast to YAML). It was developed originally as the input format for the scientific simulation tool (`DFTB+ `_), but is of general purpose. Data stored in HSD can be easily mapped to a subset of JSON -or XML andvica versa. +or XML and vica versa. + +Detailed `documentation `_ can be found on +`Read the Docs `_. Installation diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..1b35f1c --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,33 @@ +***************** +API documentation +***************** + +.. testsetup:: + + import hsd + + +High level routines +=================== + +.. autofunction:: hsd.load + +.. autofunction:: hsd.load_string + +.. autofunction:: hsd.dump + +.. autofunction:: hsd.dump_string + + + +Lower level building blocks +=========================== + +.. autoclass:: hsd.HsdParser + :members: + +.. autoclass:: hsd.HsdEventHandler + :members: + +.. autoclass:: hsd.HsdDictBuilder + :members: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..47fa9f8 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,60 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../src')) + +# -- Project information ----------------------------------------------------- + +project = 'hsd-python' +copyright = '2021, DFTB+ developers group' +author = 'DFTB+ developers group' + +# The full version, including alpha/beta/rc tags +release = '0.1' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.napoleon' +] + +autodoc_member_order = 'bysource' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..833f9a4 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,15 @@ +.. hsd-python documentation master file, created by + sphinx-quickstart on Mon Sep 13 11:38:29 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +###################################### +Welcome to hsd-python's documentation! +###################################### + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + introduction + api diff --git a/docs/introduction.rst b/docs/introduction.rst new file mode 100644 index 0000000..11de562 --- /dev/null +++ b/docs/introduction.rst @@ -0,0 +1,123 @@ +************ +Introduction +************ + +This package contains utilities to read and write files in the Human-friendly +Structured Data (HSD) format. + +The HSD-format is very similar to both JSON and YAML, but tries to minimize the +effort for **humans** to read and write it. It ommits special characters as much +as possible (in contrast to JSON) and is not indentation dependent (in contrast +to YAML). It was developed originally as the input format for the scientific +simulation tool (`DFTB+ `_), but is +of general purpose. Data stored in HSD can be easily mapped to a subset of JSON +or XML and vica versa. + + +Installation +============ + +The package can be installed via conda-forge:: + + conda install hsd-python + +Alternatively, the package can be downloaded and installed via pip into the +active Python interpreter (preferably using a virtual python environment) by :: + + pip install hsd + +or into the user space issueing :: + + pip install --user hsd + + +Quick tutorial +============== + +A typical, self-explaining input written in HSD looks like :: + + driver { + conjugate_gradients { + moved_atoms = 1 2 "7:19" + max_steps = 100 + } + } + + hamiltonian { + dftb { + scc = yes + scc_tolerance = 1e-10 + mixer { + broyden {} + } + filling { + fermi { + # This is comment which will be ignored + # Note the attribute (unit) of the field below + temperature [kelvin] = 100 + } + } + k_points_and_weights { + supercell_folding { + 2 0 0 + 0 2 0 + 0 0 2 + 0.5 0.5 0.5 + } + } + } + } + +The above input can be parsed into a Python dictionary with:: + + import hsd + hsdinput = hsd.load_file("test.hsd") + +The dictionary ``hsdinput`` will then look as:: + + { + "driver": { + "conjugate_gradients" { + "moved_atoms": [1, 2, "7:19"], + "max_steps": 100 + } + }, + "hamiltonian": { + "dftb": { + "scc": True, + "scc_tolerance": 1e-10, + "mixer": { + "broyden": {} + }, + "filling": { + "fermi": { + "temperature": 100, + "temperature.attrib": "kelvin" + } + } + "k_points_and_weights": { + "supercell_folding": [ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [0.5, 0.5, 0.5] + ] + } + } + } + } + +Being a simple Python dictionary, it can be easily queried and manipulated in +Python :: + + hsdinput["driver"]["conjugate_gradients"]["max_steps"] = 200 + +and then stored again in HSD format :: + + hsd.dump_file(hsdinput, "test2.hsd") + + +License +======== + +The hsd-python package is licensed under the `BSD 2-clause license `_. diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..8084272 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 4eb10ee..3185fa4 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -1,14 +1,13 @@ #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # """ -Central module for the hsd package +Toolbox for reading, writing and manipulating HSD-data. """ from .dictbuilder import HsdDictBuilder from .eventhandler import HsdEventHandler -from .io import load, load_string, load_file, dump, dump_string, dump_file +from .io import load, load_string, dump, dump_string from .parser import HsdParser diff --git a/src/hsd/common.py b/src/hsd/common.py index f425699..3b6972d 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -1,8 +1,7 @@ #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # """ diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index 423bfd4..64d953d 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -1,8 +1,7 @@ #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # """ @@ -26,12 +25,19 @@ class HsdDictBuilder(HsdEventHandler): - """Deserializes HSD into nested dictionaries - - Note: hsdattrib passed by the generating events are ignored. + """Specific HSD event handler, which builds a nested Python dictionary. + + Args: + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. """ - def __init__(self, flatten_data=False, include_hsd_attribs=False): + def __init__(self, flatten_data: bool = False, + include_hsd_attribs: bool = False): super().__init__() self._hsddict = {} self._curblock = self._hsddict @@ -41,6 +47,12 @@ def __init__(self, flatten_data=False, include_hsd_attribs=False): self._include_hsd_attribs = include_hsd_attribs + @property + def hsddict(self): + """The dictionary which has been built""" + return self._hsddict + + def open_tag(self, tagname, attrib, hsdattrib): if attrib is not None: self._curblock[tagname + ATTRIB_SUFFIX] = attrib @@ -72,12 +84,6 @@ def add_text(self, text): self._data = self._text_to_data(text) - @property - def hsddict(self): - """Returns the dictionary which has been built""" - return self._hsddict - - def _text_to_data(self, txt): data = [] for line in txt.split("\n"): diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py index 8fd0dc1..851b3b5 100644 --- a/src/hsd/eventhandler.py +++ b/src/hsd/eventhandler.py @@ -1,8 +1,20 @@ -"""Contains an event handler base class.""" +#------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#------------------------------------------------------------------------------# +# +""" +Contains an event handler base class. +""" class HsdEventHandler: - """Base class for event handler implementing simple printing""" + """Base class for event handlers. + + This specifc implemenation prints the events. Subclassing instances + should override the public methods to customize its behavior. + """ def __init__(self): """Initializes the default event handler""" @@ -10,11 +22,8 @@ def __init__(self): self._indentstr = " " - def open_tag(self, tagname, attrib, hsdattrib): - """Handler which is called when a tag is opened. - - It should be overriden in the application to handle the event in a - customized way. + def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + """Opens a tag. Args: tagname: Name of the tag which had been opened. @@ -25,15 +34,12 @@ def open_tag(self, tagname, attrib, hsdattrib): indentstr = self._indentlevel * self._indentstr print("{}OPENING TAG: {}".format(indentstr, tagname)) print("{}ATTRIBUTE: {}".format(indentstr, attrib)) - print("{}HSD OPTIONS: {}".format(indentstr, str(hsdattrib))) + print("{}HSD ATTRIBUTE: {}".format(indentstr, str(hsdattrib))) self._indentlevel += 1 - def close_tag(self, tagname): - """Handler which is called when a tag is closed. - - It should be overriden in the application to handle the event in a - customized way. + def close_tag(self, tagname: str): + """Closes a tag. Args: tagname: Name of the tag which had been closed. @@ -43,11 +49,8 @@ def close_tag(self, tagname): self._indentlevel -= 1 - def add_text(self, text): - """Handler which is called with the text found inside a tag. - - It should be overriden in the application to handle the event in a - customized way. + def add_text(self, text: str): + """Adds text (data) to the current tag. Args: text: Text in the current tag. diff --git a/src/hsd/io.py b/src/hsd/io.py index 421eb6e..038e158 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -1,8 +1,7 @@ #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # """ @@ -13,6 +12,8 @@ import numpy as np except ModuleNotFoundError: np = None +from typing import Union, TextIO + from .common import \ ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, LEN_ATTRIB_SUFFIX, LEN_HSD_ATTRIB_SUFFIX from .dictbuilder import HsdDictBuilder @@ -28,35 +29,26 @@ _SPECIAL_CHARS = "{}[]= " -def load(fobj): - """Loads a file like object with HSD-formatted data into a Python dictionary +def load(hsdfile: Union[TextIO, str]) -> dict: + """Loads a file with HSD-formatted data into a Python dictionary Args: - fobj: File like object to read the data from + hsdfile: Name of file or file like object to read the HSD data from Returns: Dictionary representing the HSD data. """ dictbuilder = HsdDictBuilder() parser = HsdParser(eventhandler=dictbuilder) - parser.feed(fobj) + if isinstance(hsdfile, str): + with open(hsdfile, "r") as hsdfile: + parser.feed(hsdfile) + else: + parser.feed(hsdfile) return dictbuilder.hsddict -def load_file(fname): - """Loads a file with HSD-formatted data into a Python dictionary - - Args: - fname: Name of the text file to read the data from - - Returns: - Dictionary representing the HSD data. - """ - with open(fname, "r") as fobj: - return load(fobj) - - -def load_string(hsdstr): +def load_string(hsdstr: str) -> dict: """Loads a string with HSD-formatted data into a Python dictionary. Args: @@ -64,56 +56,72 @@ def load_string(hsdstr): Returns: Dictionary representing the HSD data. + + Examples: + >>> hsdstr = \"\"\" + ... Dftb { + ... Scc = Yes + ... Filling { + ... Fermi { + ... Temperature [Kelvin] = 100 + ... } + ... } + ... } + ... \"\"\" + >>> hsd.load_string(hsdstr) + {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}} """ fobj = io.StringIO(hsdstr) return load(fobj) -def dump(obj, fobj): - """Serializes an object to a file in HSD format. +def dump(data: dict, hsdfile: Union[TextIO, str]): + """Dumps data to a file in HSD format. Args: - obj: Dictionary like object to be serialized in HSD format - fobj: File like object to write the result to. + data: Dictionary like object to be written in HSD format + hsdfile: Name of file or file like object to write the result to. Raises: TypeError: if object is not a dictionary instance. """ - if isinstance(obj, dict): - _dump_dict(obj, fobj, "") - else: + if not isinstance(data, dict): msg = "Invalid object type" raise TypeError(msg) - - -def dump_file(obj, fobj): - """Serializes an object to a file in HSD format. - - Args: - obj: Dictionary like object to be serialized in HSD format - fobj: File like object to write the result to. - - Raises: - TypeError: if object is not a dictionary instance. - """ - if isinstance(obj, dict): - _dump_dict(obj, fobj, "") + if isinstance(hsdfile, str): + with open(hsdfile, "w") as hsdfile: + _dump_dict(data, hsdfile, "") else: - msg = "Invalid object type" - raise TypeError(msg) + _dump_dict(data, hsdfile, "") -def dump_string(obj): +def dump_string(data) -> str: """Serializes an object to string in HSD format. Args: - obj: Object to serialize. + data: Dictionary like object to be written in HSD format. Returns: HSD formatted string. + + Examples: + >>> hsdtree = { + ... 'Dftb': { + ... 'Scc': True, + ... 'Filling': { + ... 'Fermi': { + ... 'Temperature': 100, + ... 'Temperature.attrib': 'Kelvin' + ... } + ... } + ... } + ... } + >>> hsd.dump_string(hsdtree) + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + """ result = io.StringIO() - dump(obj, result) + dump(data, result) return result.getvalue() diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 3d9d726..591294b 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -1,13 +1,13 @@ #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # """ Contains the event-generating HSD-parser. """ +from typing import Optional, TextIO, Union import hsd.common as common from .eventhandler import HsdEventHandler @@ -26,11 +26,34 @@ class HsdParser: """Event based parser for the HSD format. - The methods `open_tag()`, `close_tag()`, `add_text()` - and `_handle_error()` should be overridden by the actual application. + Arguments: + eventhandler: Object which should handle the HSD-events triggered + during parsing. When not specified, HsdEventHandler() is used. + lower_tag_names: Whether tag names should be lowered during parsing. + If the option is set, the original tag name will be stored among + the hsd attributes. + + Examples: + >>> from io import StringIO + >>> dictbuilder = hsd.HsdDictBuilder() + >>> parser = hsd.HsdParser(eventhandler=dictbuilder) + >>> hsdfile = StringIO(\"\"\" + ... Hamiltonian { + ... Dftb { + ... Scc = Yes + ... Filling = Fermi { + ... Temperature [Kelvin] = 100 + ... } + ... } + ... } + ... \"\"\") + >>> parser.feed(hsdfile) + >>> dictbuilder.hsddict + {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}}} """ - def __init__(self, eventhandler=None, lower_tag_names=False): + def __init__(self, eventhandler: Optional[HsdEventHandler] = None, + lower_tag_names: bool = False): """Initializes the parser. Args: @@ -57,9 +80,12 @@ def __init__(self, eventhandler=None, lower_tag_names=False): self._lower_tag_names = lower_tag_names - def feed(self, fobj): + def feed(self, fobj: Union[TextIO, str]): """Feeds the parser with data. + The parser will process the data and trigger the corresponding events + in the eventhandler which was passed at initialization. + Args: fobj: File like object or name of a file containing the data. """ diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index c82bde4..0c5c80e 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -1,9 +1,8 @@ #!/bin/env python3 #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # import os.path as op diff --git a/test/test_dump.py b/test/test_dump.py index aca21c0..3e6a896 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -1,9 +1,8 @@ #!/bin/env python3 #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # import numpy as np @@ -59,4 +58,4 @@ } } } - print(hsd.dumps(INPUT)) + print(hsd.dump_string(INPUT)) diff --git a/test/test_parser.py b/test/test_parser.py index 5c2b7db..58dfc4c 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -1,9 +1,8 @@ #!/bin/env python3 #------------------------------------------------------------------------------# -# hsd: package for manipulating HSD-formatted data # -# Copyright (C) 2011 - 2020 DFTB+ developers group # -# # -# See the LICENSE file for terms of usage and distribution. # +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # import hsd From ed902ce95423e27df56b1f51e5220e46a5929751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Mon, 13 Sep 2021 16:56:13 +0200 Subject: [PATCH 14/31] Add args to high level interface, extend docs --- README.rst | 4 +- docs/api.rst | 6 +-- docs/index.rst | 7 ++- docs/introduction.rst | 10 +--- src/hsd/io.py | 107 ++++++++++++++++++++++++++++++++++++------ 5 files changed, 103 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index a52d3de..9ad39d4 100644 --- a/README.rst +++ b/README.rst @@ -74,7 +74,7 @@ A typical, self-explaining input written in HSD looks like :: The above input can be parsed into a Python dictionary with:: import hsd - hsdinput = hsd.load_file("test.hsd") + hsdinput = hsd.load("test.hsd") The dictionary ``hsdinput`` will then look as:: @@ -117,7 +117,7 @@ Python :: and then stored again in HSD format :: - hsd.dump_file(hsdinput, "test2.hsd") + hsd.dump(hsdinput, "test2.hsd") License diff --git a/docs/api.rst b/docs/api.rst index 1b35f1c..ca89c08 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -10,14 +10,14 @@ API documentation High level routines =================== -.. autofunction:: hsd.load - .. autofunction:: hsd.load_string -.. autofunction:: hsd.dump +.. autofunction:: hsd.load .. autofunction:: hsd.dump_string +.. autofunction:: hsd.dump + Lower level building blocks diff --git a/docs/index.rst b/docs/index.rst index 833f9a4..1354db9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,13 +3,12 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -###################################### -Welcome to hsd-python's documentation! -###################################### +######################## +HSD-python documentation +######################## .. toctree:: :maxdepth: 2 - :caption: Contents: introduction api diff --git a/docs/introduction.rst b/docs/introduction.rst index 11de562..f4782c6 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -71,7 +71,7 @@ A typical, self-explaining input written in HSD looks like :: The above input can be parsed into a Python dictionary with:: import hsd - hsdinput = hsd.load_file("test.hsd") + hsdinput = hsd.load("test.hsd") The dictionary ``hsdinput`` will then look as:: @@ -114,10 +114,4 @@ Python :: and then stored again in HSD format :: - hsd.dump_file(hsdinput, "test2.hsd") - - -License -======== - -The hsd-python package is licensed under the `BSD 2-clause license `_. + hsd.dump(hsdinput, "test2.hsd") diff --git a/src/hsd/io.py b/src/hsd/io.py index 038e158..21932a3 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -1,4 +1,3 @@ -#------------------------------------------------------------------------------# # hsd-python: package for manipulating HSD-formatted data in Python # # Copyright (C) 2011 - 2021 DFTB+ developers group # # Licensed under the BSD 2-clause license. # @@ -29,17 +28,33 @@ _SPECIAL_CHARS = "{}[]= " -def load(hsdfile: Union[TextIO, str]) -> dict: +def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, + include_hsd_attribs: bool = False, flatten_data: bool = False) -> dict: """Loads a file with HSD-formatted data into a Python dictionary Args: hsdfile: Name of file or file like object to read the HSD data from + lower_tag_names: When set, all tag names will be converted to lower-case + (practical, when input should be treated case insensitive.) If + ``include_hsd_attribs`` is set, the original tag name will be + stored among the HSD attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). Returns: Dictionary representing the HSD data. + + Examples: + See :func:`hsd.load_string` for examples of usage. """ - dictbuilder = HsdDictBuilder() - parser = HsdParser(eventhandler=dictbuilder) + dictbuilder = HsdDictBuilder(flatten_data=flatten_data, + include_hsd_attribs=include_hsd_attribs) + parser = HsdParser(eventhandler=dictbuilder, + lower_tag_names=lower_tag_names) if isinstance(hsdfile, str): with open(hsdfile, "r") as hsdfile: parser.feed(hsdfile) @@ -48,11 +63,22 @@ def load(hsdfile: Union[TextIO, str]) -> dict: return dictbuilder.hsddict -def load_string(hsdstr: str) -> dict: +def load_string(hsdstr: str, lower_tag_names: bool = False, + include_hsd_attribs: bool = False, flatten_data: bool = False) -> dict: """Loads a string with HSD-formatted data into a Python dictionary. Args: hsdstr: String with HSD-formatted data. + lower_tag_names: When set, all tag names will be converted to lower-case + (practical, when input should be treated case insensitive.) If + ``include_hsd_attribs`` is set, the original tag name will be + stored among the HSD attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). Returns: Dictionary representing the HSD data. @@ -70,36 +96,81 @@ def load_string(hsdstr: str) -> dict: ... \"\"\" >>> hsd.load_string(hsdstr) {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}} + + In order to ease the case-insensitive handling of the input, the tag + names can be converted to lower case during reading using the + ``lower_tag_names`` option. + + >>> hsd.load_string(hsdstr, lower_tag_names=True) + {'dftb': {'scc': True, 'filling': {'fermi': {'temperature.attrib': 'Kelvin', 'temperature': 100}}}} + + The original tag names (together with additional information like the + line number of a tag) can be recorded, if the ``include_hsd_attribs`` + option is set: + + >>> data = hsd.load_string(hsdstr, lower_tag_names=True, include_hsd_attribs=True) + + Each tag in the dictionary will have a corresponding ".hsdattrib" entry + with the recorded data: + + >>> data["dftb.hsdattrib"] + {'line': 1, 'tag': 'Dftb'} + + This additional data can be then also used to format the tags in the + original style, when writing the data in HSD-format again. Compare: + + >>> hsd.dump_string(data) + 'dftb {\\n scc = Yes\\n filling {\\n fermi {\\n temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + + versus + + >>> hsd.dump_string(data, use_hsd_attribs=True) + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + """ fobj = io.StringIO(hsdstr) - return load(fobj) + return load(fobj, lower_tag_names, include_hsd_attribs, flatten_data) -def dump(data: dict, hsdfile: Union[TextIO, str]): +def dump(data: dict, hsdfile: Union[TextIO, str], + use_hsd_attribs: bool = False): """Dumps data to a file in HSD format. Args: data: Dictionary like object to be written in HSD format hsdfile: Name of file or file like object to write the result to. + use_hsd_attribs: Whether HSD attributes in the data structure should + be used to format the output. + + This option can be used to for example to restore original tag + names, if the file was loaded with the ``lower_tag_names`` and + ``include_hsd_attribs`` options set. Raises: TypeError: if object is not a dictionary instance. + + Examples: + + See :func:`hsd.load_string` for an example. """ if not isinstance(data, dict): msg = "Invalid object type" raise TypeError(msg) if isinstance(hsdfile, str): with open(hsdfile, "w") as hsdfile: - _dump_dict(data, hsdfile, "") + _dump_dict(data, hsdfile, "", use_hsd_attribs=use_hsd_attribs) else: - _dump_dict(data, hsdfile, "") + _dump_dict(data, hsdfile, "", use_hsd_attribs=use_hsd_attribs) -def dump_string(data) -> str: +def dump_string(data: dict, use_hsd_attribs: bool = False) -> str: """Serializes an object to string in HSD format. Args: data: Dictionary like object to be written in HSD format. + use_hsd_attribs: Whether HSD attributes of the data structure should + be used to format the output (e.g. to restore original mixed case + tag names) Returns: HSD formatted string. @@ -119,13 +190,15 @@ def dump_string(data) -> str: >>> hsd.dump_string(hsdtree) 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + See also :func:`hsd.load_string` for an example. + """ result = io.StringIO() - dump(data, result) + dump(data, result, use_hsd_attribs=use_hsd_attribs) return result.getvalue() -def _dump_dict(obj, fobj, indentstr): +def _dump_dict(obj, fobj, indentstr, use_hsd_attribs): for key, value in obj.items(): if key.endswith(ATTRIB_SUFFIX): if key[:-LEN_ATTRIB_SUFFIX] in obj: @@ -149,17 +222,23 @@ def _dump_dict(obj, fobj, indentstr): raise ValueError(msg) else: attribstr = " [" + attrib + "]" + if use_hsd_attribs: + hsdattribs = obj.get(key + HSD_ATTRIB_SUFFIX) + if hsdattribs is not None: + key = hsdattribs.get("tag", key) if isinstance(value, dict): if value: fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict(value, fobj, indentstr + _INDENT_STR) + _dump_dict( + value, fobj, indentstr + _INDENT_STR, use_hsd_attribs) fobj.write("{}}}\n".format(indentstr)) else: fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr)) elif isinstance(value, list) and value and isinstance(value[0], dict): for item in value: fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict(item, fobj, indentstr + _INDENT_STR) + _dump_dict( + item, fobj, indentstr + _INDENT_STR, use_hsd_attribs) fobj.write("{}}}\n".format(indentstr)) else: valstr = _get_hsd_rhs(value, indentstr) From 9e07ea4b51d2084eee934b0df0659d850bd63821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Mon, 13 Sep 2021 17:11:36 +0200 Subject: [PATCH 15/31] Add minimal github CI tests, remove appveyor test --- .appveyor.yml | 29 ----------------------------- .github/workflows/ci.yml | 22 ++++++++++++++++++++++ test/test_dictbuilder.py | 4 ++-- 3 files changed, 24 insertions(+), 31 deletions(-) delete mode 100644 .appveyor.yml create mode 100644 .github/workflows/ci.yml diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index dc6de00..0000000 --- a/.appveyor.yml +++ /dev/null @@ -1,29 +0,0 @@ -environment: - - matrix: - - PYTHON: "C:\\Python36-x64" - PYTHON_VERSION: "3.6" - PYTHON_ARCH: "64" - - - PYTHON: "C:\\Python37-x64" - PYTHON_VERSION: "3.7" - PYTHON_ARCH: "64" - - -install: - # Make sure pip is around - - python -m ensurepip - - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - - # Install the package locally - #- pip install --upgrade pip setuptools - - pip install pytest pytest-cov codecov - - pip install -e src - -build: false - -test_script: - - pytest -v --cov=hsd test - -on_success: - - codecov diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4eb7f4e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,22 @@ +name: CI +on: [push, pull_request] + +jobs: + test-new: + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + + - name: Install requirements (PIP) + run: pip3 install pytest sphinx + + - name: Run test pytest + run: python3 -m pytest + + - name: Run doctest + run: cd docs; make doctest diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index 0c5c80e..88d0843 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -17,7 +17,7 @@ def test_dictbuilder(): print("** Python structure without data flattening:\n") print(pyrep) print("\n** Turning back to HSD:\n") - print(hsd.dumps(pyrep)) + print(hsd.dump_string(pyrep)) def test_dictbuilder_flat(): @@ -29,7 +29,7 @@ def test_dictbuilder_flat(): print("** Python structure with data flattening:\n") print(pyrep) print("\n** Turning back to HSD:\n") - print(hsd.dumps(pyrep)) + print(hsd.dump_string(pyrep)) if __name__ == '__main__': From ffdb9e3b78339d251384a799948d37fbe3e785cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Wed, 15 Sep 2021 09:49:17 +0200 Subject: [PATCH 16/31] Fix GitHub action config --- .github/workflows/ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4eb7f4e..b7516e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,8 @@ on: [push, pull_request] jobs: test-new: + runs-on: ubuntu-latest + steps: - name: Checkout code uses: actions/checkout@v2 @@ -13,7 +15,10 @@ jobs: python-version: '3.x' - name: Install requirements (PIP) - run: pip3 install pytest sphinx + run: pip3 install pytest sphinx numpy + + - name: Setup up PYTHONPATH + run: echo "PYTHONPATH=${PWD}/src" >> $GITHUB_ENV - name: Run test pytest run: python3 -m pytest From 67ac737f74f8c8d96aa8dbda9a7ca8724eb6481a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Thu, 16 Sep 2021 18:51:30 +0200 Subject: [PATCH 17/31] Fix duplicate handling --- src/hsd/common.py | 6 +--- src/hsd/dictbuilder.py | 61 +++++++++++++++++++++++++--------------- src/hsd/io.py | 11 +++++--- src/hsd/parser.py | 2 +- test/test.hsd | 2 +- test/test_dictbuilder.py | 6 ++-- 6 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/hsd/common.py b/src/hsd/common.py index 3b6972d..3be53a8 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -9,14 +9,10 @@ """ -class HsdException(Exception): +class HsdError(Exception): """Base class for exceptions in the HSD package.""" -class HsdParserError(HsdException): - """Base class for parser related errors.""" - - def unquote(txt): """Giving string without quotes if enclosed in those.""" if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]: diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py index 64d953d..3cf0611 100644 --- a/src/hsd/dictbuilder.py +++ b/src/hsd/dictbuilder.py @@ -8,7 +8,7 @@ Contains an event-driven builder for dictionary based (JSON-like) structure """ import re -from .common import ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX +from .common import ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError from .eventhandler import HsdEventHandler @@ -40,9 +40,9 @@ def __init__(self, flatten_data: bool = False, include_hsd_attribs: bool = False): super().__init__() self._hsddict = {} - self._curblock = self._hsddict + self._content = self._hsddict # Content obtained for the current node self._parentblocks = [] - self._data = None + self._attribs = [] self._flatten_data = flatten_data self._include_hsd_attribs = include_hsd_attribs @@ -54,34 +54,51 @@ def hsddict(self): def open_tag(self, tagname, attrib, hsdattrib): - if attrib is not None: - self._curblock[tagname + ATTRIB_SUFFIX] = attrib - if self._include_hsd_attribs and hsdattrib is not None: - self._curblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib - self._parentblocks.append(self._curblock) - self._curblock = {} + self._attribs.append((attrib, hsdattrib)) + content = {} if self._content is None else self._content + self._parentblocks.append(content) + self._content = None def close_tag(self, tagname): + attrib, hsdattrib = self._attribs.pop(-1) parentblock = self._parentblocks.pop(-1) - prevcontent = parentblock.get(tagname) - if prevcontent is not None and not isinstance(prevcontent, list): - prevcontent = [prevcontent] - parentblock[tagname] = prevcontent - if self._data is None: - content = self._curblock - else: - content = self._data - self._data = None - if prevcontent is None: + prevcont = parentblock.get(tagname) + if prevcont is not None: + if isinstance(prevcont, dict) and isinstance(self._content, dict): + prevcont = [prevcont] + parentblock[tagname] = prevcont + elif not (isinstance(prevcont, list) + and isinstance(prevcont[0], dict)): + msg = f"Invalid duplicate occurance of node '{tagname}'" + raise HsdError(msg) + content = {} if self._content is None else self._content + if prevcont is None: parentblock[tagname] = content + if attrib: + parentblock[tagname + ATTRIB_SUFFIX] = attrib + if self._include_hsd_attribs: + parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib else: - prevcontent.append(content) - self._curblock = parentblock + prevcont.append(content) + prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX) + if not (prevattrib is None and attrib is None): + msg = f"Duplicate node '{tagname}' should not carry attributes" + if self._include_hsd_attribs: + prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX) + if isinstance(prevhsdattrib, list): + prevhsdattrib.append(hsdattrib) + else: + parentblock[tagname + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, + hsdattrib] + self._content = parentblock def add_text(self, text): - self._data = self._text_to_data(text) + if self._content is not None: + msg = f"Data appeared in an invalid context" + raise HsdError(msg) + self._content = self._text_to_data(text) def _text_to_data(self, txt): diff --git a/src/hsd/io.py b/src/hsd/io.py index 21932a3..c0f7dc1 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -223,10 +223,11 @@ def _dump_dict(obj, fobj, indentstr, use_hsd_attribs): else: attribstr = " [" + attrib + "]" if use_hsd_attribs: - hsdattribs = obj.get(key + HSD_ATTRIB_SUFFIX) - if hsdattribs is not None: - key = hsdattribs.get("tag", key) + hsdattrib = obj.get(key + HSD_ATTRIB_SUFFIX) + else: + hsdattrib = None if isinstance(value, dict): + key = hsdattrib.get("tag", key) if hsdattrib else key if value: fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) _dump_dict( @@ -235,12 +236,14 @@ def _dump_dict(obj, fobj, indentstr, use_hsd_attribs): else: fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr)) elif isinstance(value, list) and value and isinstance(value[0], dict): - for item in value: + for ind, item in enumerate(value): + key = hsdattrib[ind].get("tag", key) if hsdattrib else key fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) _dump_dict( item, fobj, indentstr + _INDENT_STR, use_hsd_attribs) fobj.write("{}}}\n".format(indentstr)) else: + key = hsdattrib.get("tag", key) if hsdattrib else key valstr = _get_hsd_rhs(value, indentstr) fobj.write("{}{}{} {}\n"\ .format(indentstr, key, attribstr, valstr)) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 591294b..b2c4a43 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -296,7 +296,7 @@ def _error(self, errorcode, lines): error_msg = ( "Parsing error ({}) between lines {} - {} in file '{}'.".format( errorcode, lines[0] + 1, lines[1] + 1, self._fname)) - raise common.HsdParserError(error_msg) + raise common.HsdError(error_msg) diff --git a/test/test.hsd b/test/test.hsd index 783d157..dcd6983 100644 --- a/test/test.hsd +++ b/test/test.hsd @@ -55,7 +55,7 @@ Analysis { Atoms = 1 2 3 Label = "region1" } - Region { + REgion { Atoms = 1 2 3 Label = "region2" } diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index 88d0843..8f2c263 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -21,15 +21,15 @@ def test_dictbuilder(): def test_dictbuilder_flat(): - dictbuilder = hsd.HsdDictBuilder(flatten_data=True) - parser = hsd.HsdParser(eventhandler=dictbuilder) + dictbuilder = hsd.HsdDictBuilder(flatten_data=True, include_hsd_attribs=True) + parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True) with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: parser.feed(fobj) pyrep = dictbuilder.hsddict print("** Python structure with data flattening:\n") print(pyrep) print("\n** Turning back to HSD:\n") - print(hsd.dump_string(pyrep)) + print(hsd.dump_string(pyrep, use_hsd_attribs=True)) if __name__ == '__main__': From d490d8141287e044e7bcfd5f671bf6da112c7d49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Wed, 15 Sep 2021 15:35:54 +0200 Subject: [PATCH 18/31] Improve formatting and documentation --- docs/api.rst | 7 +- docs/hsd.rst | 266 +++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + docs/introduction.rst | 12 +- src/hsd/__init__.py | 5 +- src/hsd/common.py | 22 ++-- src/hsd/dict.py | 239 +++++++++++++++++++++++++++++++++++ src/hsd/dictbuilder.py | 129 ------------------- src/hsd/eventhandler.py | 64 ++++++---- src/hsd/formatter.py | 115 +++++++++++++++++ src/hsd/io.py | 148 ++++------------------ src/hsd/parser.py | 14 +-- test/test.hsd | 64 +++++----- test/test_dictbuilder.py | 4 +- test/test_parser.py | 2 +- 15 files changed, 756 insertions(+), 336 deletions(-) create mode 100644 docs/hsd.rst create mode 100644 src/hsd/dict.py delete mode 100644 src/hsd/dictbuilder.py create mode 100644 src/hsd/formatter.py diff --git a/docs/api.rst b/docs/api.rst index ca89c08..c8caf59 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -19,7 +19,6 @@ High level routines .. autofunction:: hsd.dump - Lower level building blocks =========================== @@ -31,3 +30,9 @@ Lower level building blocks .. autoclass:: hsd.HsdDictBuilder :members: + +.. autoclass:: hsd.HsdDictWalker + :members: + +.. autoclass:: hsd.HsdFormatter + :members: diff --git a/docs/hsd.rst b/docs/hsd.rst new file mode 100644 index 0000000..c68227a --- /dev/null +++ b/docs/hsd.rst @@ -0,0 +1,266 @@ +************** +The HSD format +************** + +General description +=================== + +You can think about the Human-readable Structured Data format as a pleasent +representation of a tree structure. It can represent a subset of what you +can represent for example with XML. The following constraints with respect +to XML apply: + +* Every node of a tree, which is not empty, can either contain further nodes + or data, but never both. + +* Every node may have a single (string) attribute only. + +These constraints allow a very natural looking formatting of the data. + +As an example, let's have a look at a data tree, which represents the input +for a scientific software. In the XML representation, it could look as :: + + + + Yes + + + 77 + + + + + +The same information can be encoded much more natural and compact in the HSD +format as :: + + Hamiltonian { + Dftb { + Scc = Yes + Filling { + Fermi { + Temperature [Kelvin] = 77 + } + } + } + } + +The content of a node can be passed either between an opening and a closing +curly brace or after an equal sign. In the latter case the end of the line will +be the closing delimiter. The attribute (typically the unit of the data +which a node contains) is specified between square brackets after +the node name. + +The equal sign can not only be used to assign data as node content (provided +the data fits into one line), but also to assign a single child node as content +for a given node. This leads to a compact and expressive notation for those +cases, where (by the semantics of the input) a given node is only allowed to +have a single child node as content. The tree above is a piece of a typical +DFTB+ input, where only one child node is allowed for the nodes ``Hamiltonian`` +and ``Filling``, respectively. (They specify the type of the Hamiltonian +and the filling function.) By making use of equal signs, the +simplified HSD representation would look as compact as :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +and still represent the same tree. + + +Mapping to dictionaries +======================= + +Being basically a subset of XML, HSD data is best represented as an XML +DOM-tree. However, very often a dictionary representation is more desirable, +especially, when the language used to query and manipulate the tree offers +dictionaries as primary data type (e.g. Python). The data in an HSD input +can be easily represented with the help of nested dictionaries and lists. The +input from the previous section would have the following representation as +Python dictionary (or as a JSON formatted input file):: + + { + "Hamiltonian": { + "Dftb": { + "Scc": Yes, + "Filling": { + "Fermi": { + "Temperature": 77, + "Temperature.attrib": "Kelvin" + } + } + } + } + } + +The attribute of a node is stored under a special key containting the name of +the node and the ``.attrib`` suffix. + +One slight complication of the dictionary representation arrises in the case, +when a given node has multiple child nodes with the same name, such as :: + + + + 3 + + 3.3 -1.2 0.9 9.2 + 1.2 -3.4 5.6 -3.3 + + + + 10 + + 1.0 2.0 3.0 4.0 + -1.0 -2.0 -3.0 -4.0 + + + + +While the HSD representation has no problem to cope with the situation :: + + ExternalField { + PointCharges { + GaussianBlurWidth = 3 + CoordsAndCharges { + 3.3 -1.2 0.9 9.2 + 1.2 -3.4 5.6 -3.3 + } + } + PointCharges { + GaussianBlurWidth = 10 + CoordsAndCharges { + 1.0 2.0 3.0 4.0 + -1.0 -2.0 -3.0 -4.0 + } + } + } + +a trick is needed for the dictionary / JSON representation, as multiple keys +with the same name are not allowed in a dictionary. Therefore, the repetitive +nodes will be mapped to one key, which will contain a list of dictionaries +(instead of a single dictionary as in the usual case):: + + { + "ExternalField": { + // Note the list of dictionaries here! + "PointCharges": [ + { + "GaussianBlurWidth": 3, + "CoordsAndCharges": [ + [3.3, -1.2, 0.9, 9.2], + [1.2, -3.4, 5.6, -3.3] + ] + }, + { + "GaussianBlurWidth": 10, + "CoordsAndCharges": [ + [1.0, 2.0, 3.0, 4.0 ], + [-1.0, -2.0, -3.0, -4.0 ] + ] + }, + ] + } + } + +The mapping works in both directions, so that this dictionary (or the JSON file +created from it) can be easily converted back to the HSD form again. + + +Processing related information +============================== + +Additional to the data stored in an HSD-file, further processing related +information can be recorded on demand. The current Python implementation is able +to record following additional data for each HSD node: + +* the line, where the node was defined in the input (helpful for printing out + informative error messages), + +* the name of the HSD node as found in the input (useful if the tag names are + converted to lower case to ease case-insensitive handling of the input) and + +* whether an equal sign was used to open the block. + +If those information are asked to be recored, a special key with the +``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON +presentation. The correpsonding value will be a dictionary with those +information. + +As an example, let's store the input from the previous section :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +in the file `test.hsd`, parse it and convert the node names to lower case +(to make the input processing case-insensitive). Using the Python command :: + + inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True) + +will yield the following dictionary representation of the input:: + + { + 'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'}, + 'hamiltonian': { + 'dftb.hsdattrib': {'line': 0, 'tag': 'Dftb'}, + 'dftb': { + 'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'}, + 'scc': True, + 'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'}, + 'filling': { + 'fermi.hsdattrib': {'line': 2, 'tag': 'Fermi'}, + 'fermi': { + 'temperature.attrib': 'Kelvin', + 'temperature.hsdattrib': {'equal': True, 'line': 3, + 'tag': 'Temperature'}, + 'temperature': 77 + } + } + } + } + } + +The recorded line numbers can be used to issue helpful error messages with +information about the line, where the user should search for the problem. +The node names and the formatting information about the equal sign can ensure, +that the formatting is similar to the original one, if the data is damped +into the HSD format again. Dumping the dictionary with :: + + hsd.dump(inpdict, "test2-formatted.hsd", use_hsd_attribs=True) + +would indeed yield :: + + Hamiltonian = Dftb { + Scc = Yes + Filling = Fermi { + Temperature [Kelvin] = 77 + } + } + +which is basically identical with the original input. If the additional +processing information is not recorded when the data is loaded or +it is not considered when the data is dumped as HSD again :: + + inpdict = hsd.load("test.hsd", lower_tag_names=True) + hsd.dump(inpdict, "test2-unformatted.hsd") + +the resulting formatting will differ from the original form more:: + + hamiltonian { + dftb { + scc = Yes + filling { + fermi { + temperature [Kelvin] = 77 + } + } + } + } + +Still nice and readable, but less compact and with different casing. diff --git a/docs/index.rst b/docs/index.rst index 1354db9..e766684 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,4 +11,5 @@ HSD-python documentation :maxdepth: 2 introduction + hsd api diff --git a/docs/introduction.rst b/docs/introduction.rst index f4782c6..393c784 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -5,13 +5,13 @@ Introduction This package contains utilities to read and write files in the Human-friendly Structured Data (HSD) format. -The HSD-format is very similar to both JSON and YAML, but tries to minimize the +The HSD-format is very similar to XML, JSON and YAML, but tries to minimize the effort for **humans** to read and write it. It ommits special characters as much -as possible (in contrast to JSON) and is not indentation dependent (in contrast -to YAML). It was developed originally as the input format for the scientific -simulation tool (`DFTB+ `_), but is -of general purpose. Data stored in HSD can be easily mapped to a subset of JSON -or XML and vica versa. +as possible (in contrast to XML and JSON) and is not indentation dependent (in +contrast to YAML). It was developed originally as the input format for the +scientific simulation tool (`DFTB+ `_), +but is of general purpose. Data stored in HSD can be easily mapped to a subset +of JSON, YAML or XML and vica versa. Installation diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 3185fa4..c10c99a 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -7,7 +7,8 @@ """ Toolbox for reading, writing and manipulating HSD-data. """ -from .dictbuilder import HsdDictBuilder -from .eventhandler import HsdEventHandler +from .dict import HsdDictBuilder, HsdDictWalker +from .eventhandler import HsdEventHandler, HsdEventPrinter +from .formatter import HsdFormatter from .io import load, load_string, dump, dump_string from .parser import HsdParser diff --git a/src/hsd/common.py b/src/hsd/common.py index 3be53a8..f685e8c 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -7,6 +7,11 @@ """ Implements common functionalities for the HSD package """ +try: + import numpy as np +except ModuleNotFoundError: + np = None + class HsdError(Exception): @@ -26,18 +31,21 @@ def unquote(txt): # Suffix to mark attribute ATTRIB_SUFFIX = ".attrib" -# Length of the attribute suffix -LEN_ATTRIB_SUFFIX = len(ATTRIB_SUFFIX) - # Suffix to mark hsd processing attributes HSD_ATTRIB_SUFFIX = ".hsdattrib" -# Lengths of hsd processing attribute suffix -LEN_HSD_ATTRIB_SUFFIX = len(HSD_ATTRIB_SUFFIX) - +# HSD attribute containing the original tag name +HSD_ATTRIB_TAG = "tag" +# HSD attribute containing the line number HSD_ATTRIB_LINE = "line" +# HSD attribute marking that a node equals to its only child (instead of +# containing it) HSD_ATTRIB_EQUAL = "equal" -HSD_ATTRIB_TAG = "tag" \ No newline at end of file +# String quoting delimiters (must be at least two) +QUOTING_CHARS = "\"'" + +# Special characters +SPECIAL_CHARS = "{}[]= " diff --git a/src/hsd/dict.py b/src/hsd/dict.py new file mode 100644 index 0000000..3648930 --- /dev/null +++ b/src/hsd/dict.py @@ -0,0 +1,239 @@ +#------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#------------------------------------------------------------------------------# +# +""" +Contains an event-driven builder for dictionary based (JSON-like) structure +""" +import re +from typing import List, Tuple, Union +from .common import np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ + QUOTING_CHARS, SPECIAL_CHARS +from .eventhandler import HsdEventHandler, HsdEventPrinter + +_ItemType = Union[float, int, bool, str] + +_DataType = Union[_ItemType, List[_ItemType]] + +_TOKEN_PATTERN = re.compile(r""" +(?:\s*(?:^|(?<=\s))(?P[+-]?[0-9]+)(?:\s*$|\s+)) +| +(?:\s*(?:^|(?<=\s)) +(?P[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) +| +(?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) +| +(?:\s*(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?))(?:$|\s+)) +""", re.VERBOSE | re.MULTILINE) + + +class HsdDictBuilder(HsdEventHandler): + """Specific HSD event handler, which builds a nested Python dictionary. + + Args: + flatten_data: Whether multiline data in the HSD input should be + flattened into a single list. Othewise a list of lists is created, + with one list for every line (default). + include_hsd_attribs: Whether the HSD-attributes (processing related + attributes, like original tag name, line information, etc.) should + be stored. + """ + + def __init__(self, flatten_data: bool = False, + include_hsd_attribs: bool = False): + super().__init__() + self._hsddict: dict = {} + self._curblock: dict = self._hsddict + self._parentblocks: List[dict] = [] + self._data: Union[None, _DataType] = None + self._attribs: List[Tuple[str, dict]] = [] + self._flatten_data: bool = flatten_data + self._include_hsd_attribs: bool = include_hsd_attribs + + + @property + def hsddict(self): + """The dictionary which has been built""" + return self._hsddict + + + def open_tag(self, tagname, attrib, hsdattrib): + if self._data is not None: + msg = f"Node '{tagname}' opened in an invalid context" + raise HsdError(msg) + self._attribs.append((attrib, hsdattrib)) + self._parentblocks.append(self._curblock) + self._curblock = {} + + + def close_tag(self, tagname): + attrib, hsdattrib = self._attribs.pop(-1) + parentblock = self._parentblocks.pop(-1) + prevcont = parentblock.get(tagname) + if prevcont is not None: + if isinstance(prevcont, dict) and self._data is None: + prevcont = [prevcont] + parentblock[tagname] = prevcont + elif not (isinstance(prevcont, list) + and isinstance(prevcont[0], dict)): + msg = f"Invalid duplicate occurance of node '{tagname}'" + raise HsdError(msg) + + if prevcont is None: + content = self._data if self._data is not None else self._curblock + parentblock[tagname] = content + if attrib: + parentblock[tagname + ATTRIB_SUFFIX] = attrib + if self._include_hsd_attribs: + parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib + else: + prevcont.append(self._curblock) + prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX) + if not (prevattrib is None and attrib is None): + msg = f"Duplicate node '{tagname}' should not carry attributes" + if self._include_hsd_attribs: + prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX) + if isinstance(prevhsdattrib, list): + prevhsdattrib.append(hsdattrib) + else: + parentblock[tagname + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, + hsdattrib] + self._curblock = parentblock + self._data = None + + + def add_text(self, text): + if self._curblock or self._data is not None: + msg = f"Data appeared in an invalid context" + raise HsdError(msg) + self._data = self._text_to_data(text) + + + def _text_to_data(self, txt: str) -> _DataType: + data = [] + for line in txt.split("\n"): + if self._flatten_data: + linedata = data + else: + linedata = [] + for match in _TOKEN_PATTERN.finditer(line.strip()): + if match.group("int"): + linedata.append(int(match.group("int"))) + elif match.group("float"): + linedata.append(float(match.group("float"))) + elif match.group("logical"): + lowlog = match.group("logical").lower() + linedata.append(lowlog == "yes") + elif match.group("str"): + linedata.append(match.group("str")) + elif match.group("qstr"): + linedata.append(match.group("qstr")) + if not self._flatten_data: + data.append(linedata) + if len(data) == 1: + if isinstance(data[0], list) and len(data[0]) == 1: + return data[0][0] + return data[0] + return data + + + +class HsdDictWalker: + """Walks through a Python dictionary and triggers HSD events. + + Args: + eventhandler: Event handler dealing with the HSD events generated while + walking through the dictionary. When not specified, the events + are printed. + """ + + def __init__(self, eventhandler: HsdEventHandler = None): + + if eventhandler is None: + self._eventhandler: HsdEventHandler = HsdEventPrinter() + else: + self._eventhandler: HsdEventHandler = eventhandler + + + def walk(self, dictobj): + """Walks through the directory and generates HSD events. + + Args: + dictobj: Directory to walk through. + """ + + for key, value in dictobj.items(): + + if key.endswith(ATTRIB_SUFFIX) or key.endswith(HSD_ATTRIB_SUFFIX): + continue + + hsdattrib = dictobj.get(key + HSD_ATTRIB_SUFFIX) + attrib = dictobj.get(key + ATTRIB_SUFFIX) + + if isinstance(value, dict): + + self._eventhandler.open_tag(key, attrib, hsdattrib) + self.walk(value) + self._eventhandler.close_tag(key) + + elif isinstance(value, list) and value and isinstance(value[0], dict): + for ind, item in enumerate(value): + hsdattr = hsdattrib[ind] if hsdattrib else None + self._eventhandler.open_tag(key, None, hsdattr) + self.walk(item) + self._eventhandler.close_tag(key) + + else: + self._eventhandler.open_tag(key, attrib, hsdattrib) + self._eventhandler.add_text(_to_text(value)) + self._eventhandler.close_tag(key) + + +def _to_text(obj): + + if isinstance(obj, list): + objstr = _list_to_hsd(obj) + elif np is not None and isinstance(obj, np.ndarray): + objstr = _list_to_hsd(obj.tolist()) + else: + objstr = _item_to_hsd(obj) + return objstr + + +def _list_to_hsd(lst): + if lst and isinstance(lst[0], list): + lines = [] + for innerlist in lst: + lines.append(" ".join([_item_to_hsd(item) for item in innerlist])) + return "\n".join(lines) + return " ".join([_item_to_hsd(item) for item in lst]) + + +def _item_to_hsd(item): + + if isinstance(item, bool): + return "Yes" if item else "No" + elif isinstance(item, (int, float)): + return str(item) + elif isinstance(item, str): + return _str_to_hsd(item) + else: + msg = "Data type {} can not be converted to HSD string"\ + .format(type(item)) + raise TypeError(msg) + + +def _str_to_hsd(string): + present = [qc in string for qc in QUOTING_CHARS] + nquotetypes = sum(present) + delimiter = "" + if not nquotetypes and True in [sc in string for sc in SPECIAL_CHARS]: + delimiter = QUOTING_CHARS[0] + elif nquotetypes == 1 and string[0] not in QUOTING_CHARS: + delimiter = QUOTING_CHARS[1] if present[0] else QUOTING_CHARS[0] + elif nquotetypes > 1: + msg = "String '{}' can not be quoted correctly".format(string) + raise ValueError(msg) + return delimiter + string + delimiter diff --git a/src/hsd/dictbuilder.py b/src/hsd/dictbuilder.py deleted file mode 100644 index 3cf0611..0000000 --- a/src/hsd/dictbuilder.py +++ /dev/null @@ -1,129 +0,0 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# -# -""" -Contains an event-driven builder for dictionary based (JSON-like) structure -""" -import re -from .common import ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError -from .eventhandler import HsdEventHandler - - -_TOKEN_PATTERN = re.compile(r""" -(?:\s*(?:^|(?<=\s))(?P[+-]?[0-9]+)(?:\s*$|\s+)) -| -(?:\s*(?:^|(?<=\s)) -(?P[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) -| -(?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) -| -(?:\s*(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?))(?:$|\s+)) -""", re.VERBOSE | re.MULTILINE) - - -class HsdDictBuilder(HsdEventHandler): - """Specific HSD event handler, which builds a nested Python dictionary. - - Args: - flatten_data: Whether multiline data in the HSD input should be - flattened into a single list. Othewise a list of lists is created, - with one list for every line (default). - include_hsd_attribs: Whether the HSD-attributes (processing related - attributes, like original tag name, line information, etc.) should - be stored. - """ - - def __init__(self, flatten_data: bool = False, - include_hsd_attribs: bool = False): - super().__init__() - self._hsddict = {} - self._content = self._hsddict # Content obtained for the current node - self._parentblocks = [] - self._attribs = [] - self._flatten_data = flatten_data - self._include_hsd_attribs = include_hsd_attribs - - - @property - def hsddict(self): - """The dictionary which has been built""" - return self._hsddict - - - def open_tag(self, tagname, attrib, hsdattrib): - self._attribs.append((attrib, hsdattrib)) - content = {} if self._content is None else self._content - self._parentblocks.append(content) - self._content = None - - - def close_tag(self, tagname): - attrib, hsdattrib = self._attribs.pop(-1) - parentblock = self._parentblocks.pop(-1) - prevcont = parentblock.get(tagname) - if prevcont is not None: - if isinstance(prevcont, dict) and isinstance(self._content, dict): - prevcont = [prevcont] - parentblock[tagname] = prevcont - elif not (isinstance(prevcont, list) - and isinstance(prevcont[0], dict)): - msg = f"Invalid duplicate occurance of node '{tagname}'" - raise HsdError(msg) - content = {} if self._content is None else self._content - if prevcont is None: - parentblock[tagname] = content - if attrib: - parentblock[tagname + ATTRIB_SUFFIX] = attrib - if self._include_hsd_attribs: - parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib - else: - prevcont.append(content) - prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX) - if not (prevattrib is None and attrib is None): - msg = f"Duplicate node '{tagname}' should not carry attributes" - if self._include_hsd_attribs: - prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX) - if isinstance(prevhsdattrib, list): - prevhsdattrib.append(hsdattrib) - else: - parentblock[tagname + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, - hsdattrib] - self._content = parentblock - - - def add_text(self, text): - if self._content is not None: - msg = f"Data appeared in an invalid context" - raise HsdError(msg) - self._content = self._text_to_data(text) - - - def _text_to_data(self, txt): - data = [] - for line in txt.split("\n"): - if self._flatten_data: - linedata = data - else: - linedata = [] - for match in _TOKEN_PATTERN.finditer(line.strip()): - if match.group("int"): - linedata.append(int(match.group("int"))) - elif match.group("float"): - linedata.append(float(match.group("float"))) - elif match.group("logical"): - lowlog = match.group("logical").lower() - linedata.append(lowlog == "yes") - elif match.group("str"): - linedata.append(match.group("str")) - elif match.group("qstr"): - linedata.append(match.group("qstr")) - if not self._flatten_data: - data.append(linedata) - if len(data) == 1: - if isinstance(data[0], list) and len(data[0]) == 1: - return data[0][0] - return data[0] - return data diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py index 851b3b5..789f088 100644 --- a/src/hsd/eventhandler.py +++ b/src/hsd/eventhandler.py @@ -8,21 +8,16 @@ Contains an event handler base class. """ +from abc import ABC, abstractmethod +from typing import Optional -class HsdEventHandler: - """Base class for event handlers. - - This specifc implemenation prints the events. Subclassing instances - should override the public methods to customize its behavior. - """ - - def __init__(self): - """Initializes the default event handler""" - self._indentlevel = 0 - self._indentstr = " " +class HsdEventHandler(ABC): + """Abstract base class for handling HSD events.""" - def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + @abstractmethod + def open_tag(self, tagname: str, attrib: Optional[str], + hsdattrib: Optional[dict]): """Opens a tag. Args: @@ -31,29 +26,52 @@ def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): hsdattrib: Dictionary of the options created during the processing in the hsd-parser. """ - indentstr = self._indentlevel * self._indentstr - print("{}OPENING TAG: {}".format(indentstr, tagname)) - print("{}ATTRIBUTE: {}".format(indentstr, attrib)) - print("{}HSD ATTRIBUTE: {}".format(indentstr, str(hsdattrib))) - self._indentlevel += 1 - + @abstractmethod def close_tag(self, tagname: str): """Closes a tag. Args: tagname: Name of the tag which had been closed. """ - indentstr = self._indentlevel * self._indentstr - print("{}CLOSING TAG: {}".format(indentstr, tagname)) - self._indentlevel -= 1 - + @abstractmethod def add_text(self, text: str): """Adds text (data) to the current tag. Args: text: Text in the current tag. """ + + + +class HsdEventPrinter(HsdEventHandler): + """Mininal demonstration class for event handlers. + + This specifc implemenation prints the events. Subclassing instances + should override the public methods to customize its behavior. + """ + + def __init__(self): + """Initializes the default event printer.""" + self._indentlevel = 0 + self._indentstr = " " + + + def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + indentstr = self._indentlevel * self._indentstr + print(f"{indentstr}OPENING TAG: {tagname}") + print(f"{indentstr}ATTRIBUTE: {attrib}") + print(f"{indentstr}HSD ATTRIBUTE: {str(hsdattrib)}") + self._indentlevel += 1 + + + def close_tag(self, tagname: str): + self._indentlevel -= 1 + indentstr = self._indentlevel * self._indentstr + print(f"{indentstr}CLOSING TAG: {tagname}") + + + def add_text(self, text: str): indentstr = self._indentlevel * self._indentstr - print("{}Received text: {}".format(indentstr, text)) + print(f"{indentstr}Received text: {text}") diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py new file mode 100644 index 0000000..7e4b614 --- /dev/null +++ b/src/hsd/formatter.py @@ -0,0 +1,115 @@ +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#------------------------------------------------------------------------------# +# +""" +Provides an event based formatter to create HSD dumps +""" + +from typing import List, TextIO, Union +from hsd.common import HSD_ATTRIB_EQUAL, HSD_ATTRIB_TAG +from hsd.eventhandler import HsdEventHandler + + +_INDENT_STR = " " + + +class HsdFormatter(HsdEventHandler): + """Implements an even driven HSD formatter. + + Args: + fobj: File like object to write the formatted output to. + use_hsd_attribs: Whether HSD attributes passed to the formatter should + be considered, when formatting the the output (default: True) + """ + + def __init__(self, fobj, use_hsd_attribs=True): + super().__init__() + self._fobj: TextIO = fobj + self._use_hsd_attribs: bool = use_hsd_attribs + self._level: int = 0 + self._indent_level: int = 0 + # Whether last node on current level should/was followed by an + # equal sign. (None = unspeciefied) + self._followed_by_equal: List[Union[bool, None]] = [] + self._nr_children: List[int] = [0] + + + def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): + + if attrib is None: + attribstr = "" + elif not isinstance(attrib, str): + msg = f"Invalid attribute data type ({str(type(attrib))}) for "\ + f"'{tagname}'" + raise ValueError(msg) + else: + attribstr = " [" + attrib + "]" + + if self._level and not self._nr_children[-1]: + # Look up, whether previous (containing) node should be followed by + # an equal sign + equal = self._followed_by_equal[-1] + if equal: + self._fobj.write(" = ") + indentstr = "" + else: + self._fobj.write(" {\n") + self._indent_level += 1 + indentstr = self._indent_level * _INDENT_STR + else: + indentstr = self._indent_level * _INDENT_STR + + if self._use_hsd_attribs and hsdattrib is not None: + tagname = hsdattrib.get(HSD_ATTRIB_TAG, tagname) + + self._fobj.write(f"{indentstr}{tagname}{attribstr}") + + # Previous (containing) node has now one children more + self._nr_children[-1] += 1 + + # Currently opened node has no children so far. + self._nr_children.append(0) + self._level += 1 + + equal = None + if hsdattrib is not None and self._use_hsd_attribs: + equal = hsdattrib.get(HSD_ATTRIB_EQUAL) + self._followed_by_equal.append(equal) + + + def close_tag(self, tagname: str): + + nr_children = self._nr_children.pop(-1) + equal = self._followed_by_equal.pop(-1) + if not nr_children: + self._fobj.write(" {}\n") + elif not equal: + self._indent_level -= 1 + indentstr = self._indent_level * _INDENT_STR + self._fobj.write(f"{indentstr}}}\n") + self._level -= 1 + + + def add_text(self, text: str): + + equal = self._followed_by_equal[-1] + multiline = "\n" in text + if equal is None and not multiline: + if len(self._followed_by_equal) > 2: + equal = not self._followed_by_equal[-2] + else: + equal = True + if equal: + self._fobj.write(" = ") + self._followed_by_equal[-1] = True + else: + self._indent_level += 1 + indentstr = self._indent_level * _INDENT_STR + self._fobj.write(f" {{\n{indentstr}") + text = text.replace("\n", "\n" + indentstr) + + self._fobj.write(text) + self._fobj.write("\n") + self._nr_children[-1] += 1 diff --git a/src/hsd/io.py b/src/hsd/io.py index c0f7dc1..6139f78 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -7,25 +7,16 @@ Provides functionality to dump Python structures to HSD """ import io -try: - import numpy as np -except ModuleNotFoundError: - np = None from typing import Union, TextIO -from .common import \ - ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, LEN_ATTRIB_SUFFIX, LEN_HSD_ATTRIB_SUFFIX -from .dictbuilder import HsdDictBuilder +from .dict import HsdDictWalker, HsdDictBuilder +from .formatter import HsdFormatter + from .parser import HsdParser _INDENT_STR = " " -# String quoting delimiters (must be at least two) -_QUOTING_CHARS = "\"'" - -# Special characters -_SPECIAL_CHARS = "{}[]= " def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, @@ -40,7 +31,10 @@ def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, stored among the HSD attributes. include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like original tag name, line information, etc.) should - be stored. + be stored. Use it, if you wish to keep the formatting of the data + on writing close to the original one (e.g. lowered tag names + converted back to their original form, equal signs between parent + and only child kept, instead of converted to curly braces). flatten_data: Whether multiline data in the HSD input should be flattened into a single list. Othewise a list of lists is created, with one list for every line (default). @@ -57,14 +51,16 @@ def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, lower_tag_names=lower_tag_names) if isinstance(hsdfile, str): with open(hsdfile, "r") as hsdfile: - parser.feed(hsdfile) + parser.parse(hsdfile) else: - parser.feed(hsdfile) + parser.parse(hsdfile) return dictbuilder.hsddict -def load_string(hsdstr: str, lower_tag_names: bool = False, - include_hsd_attribs: bool = False, flatten_data: bool = False) -> dict: +def load_string( + hsdstr: str, lower_tag_names: bool = False, + include_hsd_attribs: bool = False, flatten_data: bool = False + ) -> dict: """Loads a string with HSD-formatted data into a Python dictionary. Args: @@ -75,7 +71,10 @@ def load_string(hsdstr: str, lower_tag_names: bool = False, stored among the HSD attributes. include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like original tag name, line information, etc.) should - be stored. + be stored. Use it, if you wish to keep the formatting of the data + on writing close to the original one (e.g. lowered tag names + converted back to their original form, equal signs between parent + and only child kept, instead of converted to curly braces). flatten_data: Whether multiline data in the HSD input should be flattened into a single list. Othewise a list of lists is created, with one list for every line (default). @@ -144,7 +143,8 @@ def dump(data: dict, hsdfile: Union[TextIO, str], This option can be used to for example to restore original tag names, if the file was loaded with the ``lower_tag_names`` and - ``include_hsd_attribs`` options set. + ``include_hsd_attribs`` options set or keep the equal signs + between parent and contained only child. Raises: TypeError: if object is not a dictionary instance. @@ -158,9 +158,9 @@ def dump(data: dict, hsdfile: Union[TextIO, str], raise TypeError(msg) if isinstance(hsdfile, str): with open(hsdfile, "w") as hsdfile: - _dump_dict(data, hsdfile, "", use_hsd_attribs=use_hsd_attribs) + _dump_dict(data, hsdfile, use_hsd_attribs) else: - _dump_dict(data, hsdfile, "", use_hsd_attribs=use_hsd_attribs) + _dump_dict(data, hsdfile, use_hsd_attribs) def dump_string(data: dict, use_hsd_attribs: bool = False) -> str: @@ -198,106 +198,8 @@ def dump_string(data: dict, use_hsd_attribs: bool = False) -> str: return result.getvalue() -def _dump_dict(obj, fobj, indentstr, use_hsd_attribs): - for key, value in obj.items(): - if key.endswith(ATTRIB_SUFFIX): - if key[:-LEN_ATTRIB_SUFFIX] in obj: - continue - else: - msg = "Attribute '{}' without corresponding tag '{}'"\ - .format(key, key[:-len(ATTRIB_SUFFIX)]) - raise ValueError(msg) - if key.endswith(HSD_ATTRIB_SUFFIX): - if key[:-LEN_HSD_ATTRIB_SUFFIX] in obj: continue - else: - msg = "HSD attribute '{}' without corresponding tag '{}'"\ - .format(key, key[:-len(HSD_ATTRIB_SUFFIX)]) - raise ValueError(msg) - attrib = obj.get(key + ATTRIB_SUFFIX) - if attrib is None: - attribstr = "" - elif not isinstance(attrib, str): - msg = "Invalid data type ({}) for '{}'"\ - .format(str(type(attrib)), key + ".attribute") - raise ValueError(msg) - else: - attribstr = " [" + attrib + "]" - if use_hsd_attribs: - hsdattrib = obj.get(key + HSD_ATTRIB_SUFFIX) - else: - hsdattrib = None - if isinstance(value, dict): - key = hsdattrib.get("tag", key) if hsdattrib else key - if value: - fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict( - value, fobj, indentstr + _INDENT_STR, use_hsd_attribs) - fobj.write("{}}}\n".format(indentstr)) - else: - fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr)) - elif isinstance(value, list) and value and isinstance(value[0], dict): - for ind, item in enumerate(value): - key = hsdattrib[ind].get("tag", key) if hsdattrib else key - fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr)) - _dump_dict( - item, fobj, indentstr + _INDENT_STR, use_hsd_attribs) - fobj.write("{}}}\n".format(indentstr)) - else: - key = hsdattrib.get("tag", key) if hsdattrib else key - valstr = _get_hsd_rhs(value, indentstr) - fobj.write("{}{}{} {}\n"\ - .format(indentstr, key, attribstr, valstr)) - - -def _get_hsd_rhs(obj, indentstr): - - if isinstance(obj, list): - objstr = _list_to_hsd(obj) - elif np is not None and isinstance(obj, np.ndarray): - objstr = _list_to_hsd(obj.tolist()) - else: - objstr = _item_to_hsd(obj) - if "\n" in objstr: - newline_indent = "\n" + indentstr + _INDENT_STR - rhs = ("= {" + newline_indent + objstr.replace("\n", newline_indent) - + "\n" + indentstr + "}") - else: - rhs = "= " + objstr - return rhs - - -def _list_to_hsd(lst): - if lst and isinstance(lst[0], list): - lines = [] - for innerlist in lst: - lines.append(" ".join([_item_to_hsd(item) for item in innerlist])) - return "\n".join(lines) - return " ".join([_item_to_hsd(item) for item in lst]) - - -def _item_to_hsd(item): - - if isinstance(item, bool): - return "Yes" if item else "No" - elif isinstance(item, (int, float)): - return str(item) - elif isinstance(item, str): - return _str_to_hsd(item) - else: - msg = "Data type {} can not be converted to HSD string"\ - .format(type(item)) - raise TypeError(msg) - +def _dump_dict(obj: dict, fobj: TextIO, use_hsd_attribs: bool): -def _str_to_hsd(string): - present = [qc in string for qc in _QUOTING_CHARS] - nquotetypes = sum(present) - delimiter = "" - if not nquotetypes and True in [sc in string for sc in _SPECIAL_CHARS]: - delimiter = _QUOTING_CHARS[0] - elif nquotetypes == 1 and string[0] not in _QUOTING_CHARS: - delimiter = _QUOTING_CHARS[1] if present[0] else _QUOTING_CHARS[0] - elif nquotetypes > 1: - msg = "String '{}' can not be quoted correctly".format(string) - raise ValueError(msg) - return delimiter + string + delimiter + formatter = HsdFormatter(fobj, use_hsd_attribs=use_hsd_attribs) + walker = HsdDictWalker(formatter) + walker.walk(obj) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index b2c4a43..ad231bb 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -9,7 +9,7 @@ """ from typing import Optional, TextIO, Union import hsd.common as common -from .eventhandler import HsdEventHandler +from .eventhandler import HsdEventHandler, HsdEventPrinter SYNTAX_ERROR = 1 @@ -28,7 +28,7 @@ class HsdParser: Arguments: eventhandler: Object which should handle the HSD-events triggered - during parsing. When not specified, HsdEventHandler() is used. + during parsing. When not specified, HsdEventPrinter() is used. lower_tag_names: Whether tag names should be lowered during parsing. If the option is set, the original tag name will be stored among the hsd attributes. @@ -47,7 +47,7 @@ class HsdParser: ... } ... } ... \"\"\") - >>> parser.feed(hsdfile) + >>> parser.parse(hsdfile) >>> dictbuilder.hsddict {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}}} """ @@ -60,7 +60,7 @@ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, eventhandler: Instance of the HsdEventHandler class or its children. """ if eventhandler is None: - self._eventhandler = HsdEventHandler() + self._eventhandler = HsdEventPrinter() else: self._eventhandler = eventhandler @@ -80,8 +80,8 @@ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, self._lower_tag_names = lower_tag_names - def feed(self, fobj: Union[TextIO, str]): - """Feeds the parser with data. + def parse(self, fobj: Union[TextIO, str]): + """Parses the provided file like object. The parser will process the data and trigger the corresponding events in the eventhandler which was passed at initialization. @@ -280,7 +280,7 @@ def _closetag(self): def _include_hsd(self, fname): fname = common.unquote(fname.strip()) parser = HsdParser(eventhandler=self._eventhandler) - parser.feed(fname) + parser.parse(fname) @staticmethod diff --git a/test/test.hsd b/test/test.hsd index dcd6983..8f10b02 100644 --- a/test/test.hsd +++ b/test/test.hsd @@ -8,46 +8,40 @@ Geometry { } } Driver {} -Hamiltonian { - DFTB { - Scc = Yes - SccTolerance = 1e-10 - MaxSccIterations = 1000 - Mixer { - Broyden {} - } - MaxAngularMomentum { - H = SelectedShells {"s" "s" } - O = SelectedShells {"s" "p" } - } - Dispersion = SlaterKirkwood { - PolarRadiusCharge [AA^3,AA,] = { - 1.030000 3.800000 2.820000 - } - } - Filling { - Fermi { - Temperature [Kelvin] = 1e-08 - } +Hamiltonian = DFTB { + Scc = Yes + SccTolerance = 1e-10 + MaxSccIterations = 1000 + Mixer = Broyden {} + MaxAngularMomentum { + H = SelectedShells {"s" "s" } + O = SelectedShells {"s" "p" } + } + Dispersion = SlaterKirkwood { + PolarRadiusCharge [AA^3,AA,] = { + 1.030000 3.800000 2.820000 } - KPointsAndWeights { - SupercellFolding = { - 2 0 0 - 0 2 0 - 0 0 2 - 0.5 0.5 0.5 - } + } + Filling = Fermi { + Temperature [Kelvin] = 1e-08 + } + KPointsAndWeights { + SupercellFolding = { + 2 0 0 + 0 2 0 + 0 0 2 + 0.5 0.5 0.5 } - ElectricField { - PointCharges { - CoordsAndCharges = { - -0.94 -9.44 1.2 1.0 - -0.94 -9.44 1.2 -1.0 - } + } + ElectricField { + PointCharges { + CoordsAndCharges = { + -0.94 -9.44 1.2 1.0 + -0.94 -9.44 1.2 -1.0 } } - SelectSomeAtoms = 1 2 " 3 : -3 " } + SelectSomeAtoms = 1 2 " 3 : -3 " } Analysis { ProjectStates { diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py index 8f2c263..7d86b8f 100644 --- a/test/test_dictbuilder.py +++ b/test/test_dictbuilder.py @@ -12,7 +12,7 @@ def test_dictbuilder(): dictbuilder = hsd.HsdDictBuilder() parser = hsd.HsdParser(eventhandler=dictbuilder) with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.feed(fobj) + parser.parse(fobj) pyrep = dictbuilder.hsddict print("** Python structure without data flattening:\n") print(pyrep) @@ -24,7 +24,7 @@ def test_dictbuilder_flat(): dictbuilder = hsd.HsdDictBuilder(flatten_data=True, include_hsd_attribs=True) parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True) with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.feed(fobj) + parser.parse(fobj) pyrep = dictbuilder.hsddict print("** Python structure with data flattening:\n") print(pyrep) diff --git a/test/test_parser.py b/test/test_parser.py index 58dfc4c..3020afa 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -12,7 +12,7 @@ def test_parser(): parser = hsd.HsdParser() with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.feed(fobj) + parser.parse(fobj) if __name__ == '__main__': From 77c5c12ce5a824ea43d10361fdb67c06c265d1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 11:38:58 +0200 Subject: [PATCH 19/31] Add simple testing framework --- src/hsd/__init__.py | 2 + src/hsd/common.py | 2 +- src/hsd/formatter.py | 4 +- src/hsd/io.py | 8 ++- src/hsd/parser.py | 27 ++++++---- test/test_parser.py | 117 ++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 134 insertions(+), 26 deletions(-) diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index c10c99a..2d4e2dd 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -7,6 +7,8 @@ """ Toolbox for reading, writing and manipulating HSD-data. """ +from .common import HSD_ATTRIB_LINE, HSD_ATTRIB_EQUAL, HSD_ATTRIB_SUFFIX,\ + HSD_ATTRIB_NAME, HsdError from .dict import HsdDictBuilder, HsdDictWalker from .eventhandler import HsdEventHandler, HsdEventPrinter from .formatter import HsdFormatter diff --git a/src/hsd/common.py b/src/hsd/common.py index f685e8c..9c1266a 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -35,7 +35,7 @@ def unquote(txt): HSD_ATTRIB_SUFFIX = ".hsdattrib" # HSD attribute containing the original tag name -HSD_ATTRIB_TAG = "tag" +HSD_ATTRIB_NAME = "name" # HSD attribute containing the line number HSD_ATTRIB_LINE = "line" diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py index 7e4b614..903c4a2 100644 --- a/src/hsd/formatter.py +++ b/src/hsd/formatter.py @@ -8,7 +8,7 @@ """ from typing import List, TextIO, Union -from hsd.common import HSD_ATTRIB_EQUAL, HSD_ATTRIB_TAG +from hsd.common import HSD_ATTRIB_EQUAL, HSD_ATTRIB_NAME from hsd.eventhandler import HsdEventHandler @@ -62,7 +62,7 @@ def open_tag(self, tagname: str, attrib: str, hsdattrib: dict): indentstr = self._indent_level * _INDENT_STR if self._use_hsd_attribs and hsdattrib is not None: - tagname = hsdattrib.get(HSD_ATTRIB_TAG, tagname) + tagname = hsdattrib.get(HSD_ATTRIB_NAME, tagname) self._fobj.write(f"{indentstr}{tagname}{attribstr}") diff --git a/src/hsd/io.py b/src/hsd/io.py index 6139f78..828600b 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -8,10 +8,8 @@ """ import io from typing import Union, TextIO - from .dict import HsdDictWalker, HsdDictBuilder from .formatter import HsdFormatter - from .parser import HsdParser @@ -94,14 +92,14 @@ def load_string( ... } ... \"\"\" >>> hsd.load_string(hsdstr) - {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}} + {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}} In order to ease the case-insensitive handling of the input, the tag names can be converted to lower case during reading using the ``lower_tag_names`` option. >>> hsd.load_string(hsdstr, lower_tag_names=True) - {'dftb': {'scc': True, 'filling': {'fermi': {'temperature.attrib': 'Kelvin', 'temperature': 100}}}} + {'dftb': {'scc': True, 'filling': {'fermi': {'temperature': 100, 'temperature.attrib': 'Kelvin'}}}} The original tag names (together with additional information like the line number of a tag) can be recorded, if the ``include_hsd_attribs`` @@ -113,7 +111,7 @@ def load_string( with the recorded data: >>> data["dftb.hsdattrib"] - {'line': 1, 'tag': 'Dftb'} + {'line': 1, 'name': 'Dftb'} This additional data can be then also used to format the tags in the original style, when writing the data in HSD-format again. Compare: diff --git a/src/hsd/parser.py b/src/hsd/parser.py index ad231bb..50d1fcc 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -49,7 +49,7 @@ class HsdParser: ... \"\"\") >>> parser.parse(hsdfile) >>> dictbuilder.hsddict - {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}}} + {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}} """ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, @@ -75,9 +75,10 @@ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, self._after_equal_sign = False # last tag was opened with equal sign self._inside_attrib = False # parser inside attrib specification self._inside_quote = False # parser inside quotation - self._has_child = False + self._has_child = True # Whether current node has a child already + self._has_text = False # whether current node contains text already self._oldbefore = "" # buffer for tagname - self._lower_tag_names = lower_tag_names + self._lower_tag_names = lower_tag_names # whether tag names should be lowered def parse(self, fobj: Union[TextIO, str]): @@ -148,14 +149,13 @@ def _parse(self, line): # tagname was followed by an attribute -> append self._oldbefore += before else: - self._has_child = True self._hsdattrib[common.HSD_ATTRIB_EQUAL] = True self._starttag(before, False) self._after_equal_sign = True # Opening tag by curly brace elif sign == "{": - self._has_child = True + #self._has_child = True self._starttag(before, self._after_equal_sign) self._buffer = [] self._after_equal_sign = False @@ -188,7 +188,7 @@ def _parse(self, line): self._oldbefore = before self._buffer = [] self._inside_attrib = True - self._opened_tags.append(("[", self._currline, None)) + self._opened_tags.append(("[", self._currline, None, None, None)) self._checkstr = _ATTRIB_SPECIALS # Closing attribute specification @@ -212,7 +212,7 @@ def _parse(self, line): self._checkstr = sign self._inside_quote = True self._buffer.append(before + sign) - self._opened_tags.append(('"', self._currline, None)) + self._opened_tags.append(('"', self._currline, None, None, None)) # Interrupt elif sign == "<" and not self._after_equal_sign: @@ -237,13 +237,18 @@ def _parse(self, line): def _text(self, text): stripped = text.strip() if stripped: + if self._has_child: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) self._eventhandler.add_text(stripped) + self._has_text = True def _starttag(self, tagname, closeprev): txt = "".join(self._buffer) if txt: self._text(txt) + if self._has_text: + self._error(SYNTAX_ERROR, (self._currline, self._currline)) tagname_stripped = tagname.strip() if self._oldbefore: if tagname_stripped: @@ -254,15 +259,15 @@ def _starttag(self, tagname, closeprev): self._error(SYNTAX_ERROR, (self._currline, self._currline)) self._hsdattrib[common.HSD_ATTRIB_LINE] = self._currline if self._lower_tag_names: - self._hsdattrib[common.HSD_ATTRIB_TAG] = tagname_stripped + self._hsdattrib[common.HSD_ATTRIB_NAME] = tagname_stripped tagname_stripped = tagname_stripped.lower() self._eventhandler.open_tag(tagname_stripped, self._attrib, self._hsdattrib) self._opened_tags.append( - (tagname_stripped, self._currline, closeprev, self._has_child)) + (tagname_stripped, self._currline, closeprev, True, False)) + self._has_child = False self._buffer = [] self._oldbefore = "" - self._has_child = False self._attrib = None self._hsdattrib = {} @@ -271,7 +276,7 @@ def _closetag(self): if not self._opened_tags: self._error(SYNTAX_ERROR, (0, self._currline)) self._buffer = [] - tag, _, closeprev, self._has_child = self._opened_tags.pop() + tag, _, closeprev, self._has_child, self._has_text = self._opened_tags.pop() self._eventhandler.close_tag(tag) if closeprev: self._closetag() diff --git a/test/test_parser.py b/test/test_parser.py index 3020afa..88ff7e2 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -5,15 +5,118 @@ # Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # +import io +import pytest import hsd -import os.path as op +_OPEN_TAG_EVENT = 1 +_CLOSE_TAG_EVENT = 2 +_ADD_TEXT_EVENT = 3 -def test_parser(): - parser = hsd.HsdParser() - with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.parse(fobj) +_HSD_LINE = hsd.HSD_ATTRIB_LINE +_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL +_HSD_NAME = hsd.HSD_ATTRIB_NAME +_VALID_TESTS = [ + ( + "Simple", ( + """Test {} """, + [ + (_OPEN_TAG_EVENT, "Test", None, {_HSD_LINE: 0}), + (_CLOSE_TAG_EVENT, "Test"), + ] + ) + ), + ( + "Data with quoted strings", ( + """O = SelectedShells { "s" "p" }""", + [ + (_OPEN_TAG_EVENT, "O", None, {_HSD_LINE: 0, _HSD_EQUAL: True}), + (_OPEN_TAG_EVENT, 'SelectedShells', None, {_HSD_LINE: 0}), + (_ADD_TEXT_EVENT, '"s" "p"'), + (_CLOSE_TAG_EVENT, 'SelectedShells'), + (_CLOSE_TAG_EVENT, 'O'), + ] + ) + ), + ( + "Attribute containing comma", ( + """PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}""", + [ + (_OPEN_TAG_EVENT, "PolarRadiusCharge", "AA^3,AA,", {_HSD_LINE: 0, }), + (_ADD_TEXT_EVENT, '1.030000 3.800000 2.820000'), + (_CLOSE_TAG_EVENT, 'PolarRadiusCharge'), + ] + ) + ), +] -if __name__ == '__main__': - test_parser() +_VALID_TEST_NAMES, _VALID_TEST_CASES = zip(*_VALID_TESTS) + + +_FAILING_TESTS = [ + ( + "Node-less data", ( + """a = 2\n15\n""" + ) + ), + ( + "Node-less data at start", ( + """15\na = 2\na = 4\n""" + ) + ), + ( + "Node-less data in child", ( + """a {\n12\nb = 5\n}\n""" + ) + ), + ( + "Quoted tag name", ( + """\"mytag\" = 12\n""" + ) + ), + +] + +_FAILING_TEST_NAMES, _FAILING_TEST_CASES = zip(*_FAILING_TESTS) + + +class _TestEventHandler(hsd.HsdEventHandler): + + def __init__(self): + self.events = [] + + def open_tag(self, tagname, attrib, hsdoptions): + self.events.append((_OPEN_TAG_EVENT, tagname, attrib, hsdoptions)) + + def close_tag(self, tagname): + self.events.append((_CLOSE_TAG_EVENT, tagname)) + + def add_text(self, text): + self.events.append((_ADD_TEXT_EVENT, text)) + + +@pytest.mark.parametrize( + "hsd_input,expected_events", + _VALID_TEST_CASES, + ids=_VALID_TEST_NAMES +) +def test_valid_parser_events(hsd_input, expected_events): + testhandler = _TestEventHandler() + parser = hsd.HsdParser(eventhandler=testhandler) + hsdfile = io.StringIO(hsd_input) + parser.parse(hsdfile) + assert testhandler.events == expected_events + + +@pytest.mark.parametrize( + "hsd_input", + _FAILING_TEST_CASES, + ids=_FAILING_TEST_NAMES +) +def test_invalid_parser_events(hsd_input): + testhandler = _TestEventHandler() + parser = hsd.HsdParser(eventhandler=testhandler) + hsdfile = io.StringIO(hsd_input) + with pytest.raises(hsd.HsdError): + parser.parse(hsdfile) From ae55ce4035ff8e2d659c9a128f91d5621abb0939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 12:22:13 +0200 Subject: [PATCH 20/31] Make cosmetic changes for higher pylint-score --- docs/conf.py | 4 ++++ src/hsd/__init__.py | 12 ++++++------ src/hsd/dict.py | 17 ++++++++--------- src/hsd/io.py | 32 +++++++++++++++++++------------- src/hsd/parser.py | 12 ++++++------ 5 files changed, 43 insertions(+), 34 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 47fa9f8..055c8ee 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,8 @@ # import os import sys +import doctest + sys.path.insert(0, os.path.abspath('../src')) # -- Project information ----------------------------------------------------- @@ -37,6 +39,8 @@ autodoc_member_order = 'bysource' +doctest_default_flags = doctest.NORMALIZE_WHITESPACE + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 2d4e2dd..5ffed70 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -7,10 +7,10 @@ """ Toolbox for reading, writing and manipulating HSD-data. """ -from .common import HSD_ATTRIB_LINE, HSD_ATTRIB_EQUAL, HSD_ATTRIB_SUFFIX,\ +from hsd.common import HSD_ATTRIB_LINE, HSD_ATTRIB_EQUAL, HSD_ATTRIB_SUFFIX,\ HSD_ATTRIB_NAME, HsdError -from .dict import HsdDictBuilder, HsdDictWalker -from .eventhandler import HsdEventHandler, HsdEventPrinter -from .formatter import HsdFormatter -from .io import load, load_string, dump, dump_string -from .parser import HsdParser +from hsd.dict import HsdDictBuilder, HsdDictWalker +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter +from hsd.formatter import HsdFormatter +from hsd.io import load, load_string, dump, dump_string +from hsd.parser import HsdParser diff --git a/src/hsd/dict.py b/src/hsd/dict.py index 3648930..9a5d0bb 100644 --- a/src/hsd/dict.py +++ b/src/hsd/dict.py @@ -9,9 +9,9 @@ """ import re from typing import List, Tuple, Union -from .common import np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ +from hsd.common import np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ QUOTING_CHARS, SPECIAL_CHARS -from .eventhandler import HsdEventHandler, HsdEventPrinter +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter _ItemType = Union[float, int, bool, str] @@ -106,7 +106,7 @@ def close_tag(self, tagname): def add_text(self, text): if self._curblock or self._data is not None: - msg = f"Data appeared in an invalid context" + msg = "Data appeared in an invalid context" raise HsdError(msg) self._data = self._text_to_data(text) @@ -215,14 +215,13 @@ def _item_to_hsd(item): if isinstance(item, bool): return "Yes" if item else "No" - elif isinstance(item, (int, float)): + if isinstance(item, (int, float)): return str(item) - elif isinstance(item, str): + if isinstance(item, str): return _str_to_hsd(item) - else: - msg = "Data type {} can not be converted to HSD string"\ - .format(type(item)) - raise TypeError(msg) + msg = "Data type {} can not be converted to HSD string"\ + .format(type(item)) + raise TypeError(msg) def _str_to_hsd(string): diff --git a/src/hsd/io.py b/src/hsd/io.py index 828600b..7c07a27 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -8,9 +8,9 @@ """ import io from typing import Union, TextIO -from .dict import HsdDictWalker, HsdDictBuilder -from .formatter import HsdFormatter -from .parser import HsdParser +from hsd.dict import HsdDictWalker, HsdDictBuilder +from hsd.formatter import HsdFormatter +from hsd.parser import HsdParser _INDENT_STR = " " @@ -48,8 +48,8 @@ def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, parser = HsdParser(eventhandler=dictbuilder, lower_tag_names=lower_tag_names) if isinstance(hsdfile, str): - with open(hsdfile, "r") as hsdfile: - parser.parse(hsdfile) + with open(hsdfile, "r") as hsddescr: + parser.parse(hsddescr) else: parser.parse(hsdfile) return dictbuilder.hsddict @@ -92,20 +92,23 @@ def load_string( ... } ... \"\"\" >>> hsd.load_string(hsdstr) - {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}} + {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, + 'Temperature.attrib': 'Kelvin'}}}} In order to ease the case-insensitive handling of the input, the tag names can be converted to lower case during reading using the ``lower_tag_names`` option. >>> hsd.load_string(hsdstr, lower_tag_names=True) - {'dftb': {'scc': True, 'filling': {'fermi': {'temperature': 100, 'temperature.attrib': 'Kelvin'}}}} + {'dftb': {'scc': True, 'filling': {'fermi': {'temperature': 100, + 'temperature.attrib': 'Kelvin'}}}} The original tag names (together with additional information like the line number of a tag) can be recorded, if the ``include_hsd_attribs`` option is set: - >>> data = hsd.load_string(hsdstr, lower_tag_names=True, include_hsd_attribs=True) + >>> data = hsd.load_string(hsdstr, lower_tag_names=True, + ... include_hsd_attribs=True) Each tag in the dictionary will have a corresponding ".hsdattrib" entry with the recorded data: @@ -117,12 +120,14 @@ def load_string( original style, when writing the data in HSD-format again. Compare: >>> hsd.dump_string(data) - 'dftb {\\n scc = Yes\\n filling {\\n fermi {\\n temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + 'dftb {\\n scc = Yes\\n filling {\\n fermi {\\n + temperature [Kelvin] = 100\\n }\\n }\\n}\\n' versus >>> hsd.dump_string(data, use_hsd_attribs=True) - 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n + Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' """ fobj = io.StringIO(hsdstr) @@ -155,8 +160,8 @@ def dump(data: dict, hsdfile: Union[TextIO, str], msg = "Invalid object type" raise TypeError(msg) if isinstance(hsdfile, str): - with open(hsdfile, "w") as hsdfile: - _dump_dict(data, hsdfile, use_hsd_attribs) + with open(hsdfile, "w") as hsddescr: + _dump_dict(data, hsddescr, use_hsd_attribs) else: _dump_dict(data, hsdfile, use_hsd_attribs) @@ -186,7 +191,8 @@ def dump_string(data: dict, use_hsd_attribs: bool = False) -> str: ... } ... } >>> hsd.dump_string(hsdtree) - 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' + 'Dftb {\\n Scc = Yes\\n Filling {\\n Fermi {\\n + Temperature [Kelvin] = 100\\n }\\n }\\n}\\n' See also :func:`hsd.load_string` for an example. diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 50d1fcc..17dcbb5 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -8,8 +8,8 @@ Contains the event-generating HSD-parser. """ from typing import Optional, TextIO, Union -import hsd.common as common -from .eventhandler import HsdEventHandler, HsdEventPrinter +from hsd import common +from hsd.eventhandler import HsdEventHandler, HsdEventPrinter SYNTAX_ERROR = 1 @@ -49,7 +49,8 @@ class HsdParser: ... \"\"\") >>> parser.parse(hsdfile) >>> dictbuilder.hsddict - {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}} + {'Hamiltonian': {'Dftb': {'Scc': True, 'Filling': {'Fermi': + {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}} """ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, @@ -291,9 +292,8 @@ def _include_hsd(self, fname): @staticmethod def _include_txt(fname): fname = common.unquote(fname.strip()) - fp = open(fname, "r") - txt = fp.read() - fp.close() + with open(fname, "r") as fp: + txt = fp.read() return txt From 8950f2c3181a33f476f05df0d7c5ab7d1b8169d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 17:36:19 +0200 Subject: [PATCH 21/31] Apply wording/spelling suggestions from code review Co-authored-by: Ben Hourahine --- docs/hsd.rst | 50 ++++++++++++++++++++--------------------- docs/introduction.rst | 2 +- src/hsd/common.py | 2 +- src/hsd/eventhandler.py | 2 +- src/hsd/io.py | 8 +++---- src/hsd/parser.py | 4 ++-- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/docs/hsd.rst b/docs/hsd.rst index c68227a..bd51072 100644 --- a/docs/hsd.rst +++ b/docs/hsd.rst @@ -5,20 +5,20 @@ The HSD format General description =================== -You can think about the Human-readable Structured Data format as a pleasent +You can think about the Human-readable Structured Data format as a pleasant representation of a tree structure. It can represent a subset of what you -can represent for example with XML. The following constraints with respect +can do for example with XML. The following constraints compared to XML apply: -* Every node of a tree, which is not empty, can either contain further nodes +* Every node of a tree, which is not empty, either contains further nodes or data, but never both. * Every node may have a single (string) attribute only. These constraints allow a very natural looking formatting of the data. -As an example, let's have a look at a data tree, which represents the input -for a scientific software. In the XML representation, it could look as :: +As an example, let's have a look at a data tree, which represents input +for scientific software. In the XML representation, it could be written as :: @@ -31,7 +31,7 @@ for a scientific software. In the XML representation, it could look as :: -The same information can be encoded much more natural and compact in the HSD +The same information can be encoded in a much more natural and compact form in HSD format as :: Hamiltonian { @@ -45,21 +45,21 @@ format as :: } } -The content of a node can be passed either between an opening and a closing -curly brace or after an equal sign. In the latter case the end of the line will +The content of a node are passed either between an opening and a closing +curly brace or after an equals sign. In the latter case the end of the line will be the closing delimiter. The attribute (typically the unit of the data -which a node contains) is specified between square brackets after +which the node contains) is specified between square brackets after the node name. -The equal sign can not only be used to assign data as node content (provided -the data fits into one line), but also to assign a single child node as content +The equals sign can be used to assign data as a node content (provided +the data fits into one line), or to assign a single child node as content for a given node. This leads to a compact and expressive notation for those cases, where (by the semantics of the input) a given node is only allowed to have a single child node as content. The tree above is a piece of a typical DFTB+ input, where only one child node is allowed for the nodes ``Hamiltonian`` -and ``Filling``, respectively. (They specify the type of the Hamiltonian -and the filling function.) By making use of equal signs, the -simplified HSD representation would look as compact as :: +and ``Filling``, respectively (They specify the type of the Hamiltonian +and the filling function). By making use of equals signs, the +simplified HSD representation can be as compact as :: Hamiltonian = Dftb { Scc = Yes @@ -76,7 +76,7 @@ Mapping to dictionaries Being basically a subset of XML, HSD data is best represented as an XML DOM-tree. However, very often a dictionary representation is more desirable, -especially, when the language used to query and manipulate the tree offers +especially when the language used to query and manipulate the tree offers dictionaries as primary data type (e.g. Python). The data in an HSD input can be easily represented with the help of nested dictionaries and lists. The input from the previous section would have the following representation as @@ -99,8 +99,8 @@ Python dictionary (or as a JSON formatted input file):: The attribute of a node is stored under a special key containting the name of the node and the ``.attrib`` suffix. -One slight complication of the dictionary representation arrises in the case, -when a given node has multiple child nodes with the same name, such as :: +One slight complication of the dictionary representation arises in the case +of node which has multiple child nodes with the same name :: @@ -179,12 +179,12 @@ to record following additional data for each HSD node: * the line, where the node was defined in the input (helpful for printing out informative error messages), -* the name of the HSD node as found in the input (useful if the tag names are +* the name of the HSD node, as found in the input (useful if the tag names are converted to lower case to ease case-insensitive handling of the input) and -* whether an equal sign was used to open the block. +* whether an equals sign was used to open the block. -If those information are asked to be recored, a special key with the +If this information is being recorded, a special key with the ``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON presentation. The correpsonding value will be a dictionary with those information. @@ -227,9 +227,9 @@ will yield the following dictionary representation of the input:: } The recorded line numbers can be used to issue helpful error messages with -information about the line, where the user should search for the problem. -The node names and the formatting information about the equal sign can ensure, -that the formatting is similar to the original one, if the data is damped +information about where the user should search for the problem. +The node names and formatting information about the equal sign ensures +that the formatting is similar to the original HSD, if the data is dumped into the HSD format again. Dumping the dictionary with :: hsd.dump(inpdict, "test2-formatted.hsd", use_hsd_attribs=True) @@ -244,13 +244,13 @@ would indeed yield :: } which is basically identical with the original input. If the additional -processing information is not recorded when the data is loaded or +processing information is not recorded when the data is loaded, or it is not considered when the data is dumped as HSD again :: inpdict = hsd.load("test.hsd", lower_tag_names=True) hsd.dump(inpdict, "test2-unformatted.hsd") -the resulting formatting will differ from the original form more:: +the resulting formatting will more strongly differ from the original HSD :: hamiltonian { dftb { diff --git a/docs/introduction.rst b/docs/introduction.rst index 393c784..df6c433 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -11,7 +11,7 @@ as possible (in contrast to XML and JSON) and is not indentation dependent (in contrast to YAML). It was developed originally as the input format for the scientific simulation tool (`DFTB+ `_), but is of general purpose. Data stored in HSD can be easily mapped to a subset -of JSON, YAML or XML and vica versa. +of JSON, YAML or XML and *vice versa*. Installation diff --git a/src/hsd/common.py b/src/hsd/common.py index 9c1266a..4803fc0 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -40,7 +40,7 @@ def unquote(txt): # HSD attribute containing the line number HSD_ATTRIB_LINE = "line" -# HSD attribute marking that a node equals to its only child (instead of +# HSD attribute marking that a node is equal to its only child (instead of # containing it) HSD_ATTRIB_EQUAL = "equal" diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py index 789f088..9361515 100644 --- a/src/hsd/eventhandler.py +++ b/src/hsd/eventhandler.py @@ -46,7 +46,7 @@ def add_text(self, text: str): class HsdEventPrinter(HsdEventHandler): - """Mininal demonstration class for event handlers. + """Minimal demonstration class for event handlers. This specifc implemenation prints the events. Subclassing instances should override the public methods to customize its behavior. diff --git a/src/hsd/io.py b/src/hsd/io.py index 7c07a27..7babb75 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -30,8 +30,8 @@ def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like original tag name, line information, etc.) should be stored. Use it, if you wish to keep the formatting of the data - on writing close to the original one (e.g. lowered tag names - converted back to their original form, equal signs between parent + close to the original on writing (e.g. lowered tag names + converted back to their original form, equals signs between parent and only child kept, instead of converted to curly braces). flatten_data: Whether multiline data in the HSD input should be flattened into a single list. Othewise a list of lists is created, @@ -70,8 +70,8 @@ def load_string( include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like original tag name, line information, etc.) should be stored. Use it, if you wish to keep the formatting of the data - on writing close to the original one (e.g. lowered tag names - converted back to their original form, equal signs between parent + close to the original one on writing (e.g. lowered tag names + converted back to their original form, equals signs between parent and only child kept, instead of converted to curly braces). flatten_data: Whether multiline data in the HSD input should be flattened into a single list. Othewise a list of lists is created, diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 17dcbb5..ab8e1da 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -79,11 +79,11 @@ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, self._has_child = True # Whether current node has a child already self._has_text = False # whether current node contains text already self._oldbefore = "" # buffer for tagname - self._lower_tag_names = lower_tag_names # whether tag names should be lowered + self._lower_tag_names = lower_tag_names # whether tag names should be lower cased def parse(self, fobj: Union[TextIO, str]): - """Parses the provided file like object. + """Parses the provided file-like object. The parser will process the data and trigger the corresponding events in the eventhandler which was passed at initialization. From 3bb47ad4f50d5c9d2a8986593008e479765467c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 17:57:42 +0200 Subject: [PATCH 22/31] Add comment in test.hsd, document regexp in dict.py --- src/hsd/dict.py | 10 +++++++++- test/test.hsd | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/hsd/dict.py b/src/hsd/dict.py index 9a5d0bb..f6c14f8 100644 --- a/src/hsd/dict.py +++ b/src/hsd/dict.py @@ -17,15 +17,23 @@ _DataType = Union[_ItemType, List[_ItemType]] +# Pattern to transform HSD string values into actual Python data types _TOKEN_PATTERN = re.compile(r""" +# Integer (?:\s*(?:^|(?<=\s))(?P[+-]?[0-9]+)(?:\s*$|\s+)) | +# Floating point (?:\s*(?:^|(?<=\s)) (?P[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) | +# Logical (Yes/No) (?:\s*(?:^|(?<=\s))(?P[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) | -(?:\s*(?:(?P(?P['"]).*?(?P=quote)) | (?P.+?))(?:$|\s+)) +# Quoted string +(?:\s*(?:(?P(?P['"]).*?(?P=quote)) +| +# Unquoted string +(?P.+?))(?:$|\s+)) """, re.VERBOSE | re.MULTILINE) diff --git a/test/test.hsd b/test/test.hsd index 8f10b02..20f246b 100644 --- a/test/test.hsd +++ b/test/test.hsd @@ -22,6 +22,7 @@ Hamiltonian = DFTB { 1.030000 3.800000 2.820000 } } + # Adding arbitrary comment, this should not change parsing result Filling = Fermi { Temperature [Kelvin] = 1e-08 } From 93cd1eb3dd26577af23f90f5bda9b06d8d9ae115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 16:16:05 +0200 Subject: [PATCH 23/31] Add script for setting version globally --- devtools/set_version | 90 +++++++++++++++++++++++++++++++++++++++++++ docs/introduction.rst | 2 + src/hsd/__init__.py | 2 + 3 files changed, 94 insertions(+) create mode 100755 devtools/set_version diff --git a/devtools/set_version b/devtools/set_version new file mode 100755 index 0000000..ee17b92 --- /dev/null +++ b/devtools/set_version @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 + +"""Sets a version number in all relevant project files""" + +import sys +import re +import os + +# The pattern the version number must satisfy +VERSION_PATTERN = r'\d+\.\d+(?:\.\d+)?(?:-\w+)?' + +# List of (file name, search pattern, replacement pattern) tuples for all +# the occurancies to be replaced. +FILES_PATTERNS = [('src/hsd/__init__.py', + r'^__version__\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN), + "__version__ = '{version}'"), + ('docs/introduction.rst', + r'hsd-python version[ ]*{}.'.format(VERSION_PATTERN), + 'hsd-python version {shortversion}.'), + ('setup.cfg', + r'version\s*=\s*{}'.format(VERSION_PATTERN), + "version = {version}"), + ('docs/conf.py', + r'release\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN), + "release = '{version}'"), + ] + + +def main(): + """Main script.""" + + if len(sys.argv) < 2: + sys.stderr.write("Missing version string\n") + sys.exit(1) + + version, shortversion = _get_version_strings(sys.argv[1]) + rootdir = os.path.join(os.path.dirname(sys.argv[0]), '..') + _replace_version_in_files(FILES_PATTERNS, rootdir, version, shortversion) + _replace_version_in_changelog(rootdir, version) + + +def _get_version_strings(version): + """Returns version and the short version as string""" + + match = re.match(VERSION_PATTERN, version) + if match is None: + print("Invalid version string") + sys.exit(1) + + shortversion = '.'.join(version.split('.')[0:2]) + return version, shortversion + + +def _replace_version_in_files(files_patterns, rootdir, version, shortversion): + """Replaces version number in given files with given search/replacement patterns""" + + for fname, regexp, repl in files_patterns: + fname = os.path.join(rootdir, fname) + print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='') + fp = open(fname, 'r') + txt = fp.read() + fp.close() + replacement = repl.format(version=version, shortversion=shortversion) + newtxt, nsub = re.subn(regexp, replacement, txt, flags=re.MULTILINE) + print(nsub) + fp = open(fname, 'w') + fp.write(newtxt) + fp.close() + + +def _replace_version_in_changelog(rootdir, version): + """Replaces the unreleased section in CHANGELOG.rst""" + + fname = os.path.join(rootdir, 'CHANGELOG.rst') + print("Replacments in '{}': ".format(os.path.relpath(fname, rootdir)), end='') + fp = open(fname, 'r') + txt = fp.read() + fp.close() + decoration = '=' * len(version) + newtxt, nsub = re.subn( + r'^Unreleased\s*\n=+', version + r'\n' + decoration, txt, + count=1, flags=re.MULTILINE) + print(nsub) + fp = open(fname, 'w') + fp.write(newtxt) + fp.close() + + +if __name__ == '__main__': + main() diff --git a/docs/introduction.rst b/docs/introduction.rst index df6c433..38fd346 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -13,6 +13,8 @@ scientific simulation tool (`DFTB+ `_), but is of general purpose. Data stored in HSD can be easily mapped to a subset of JSON, YAML or XML and *vice versa*. +This document describes hsd-python version 0.1. + Installation ============ diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 5ffed70..c29dac8 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -14,3 +14,5 @@ from hsd.formatter import HsdFormatter from hsd.io import load, load_string, dump, dump_string from hsd.parser import HsdParser + +__version__ = '0.1' From 77e7317c2f9c0d2a6f571271a284ba3a6f9efd15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 16:37:10 +0200 Subject: [PATCH 24/31] Add packaging to github workflow --- .github/workflows/ci.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7516e0..05335e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: [push, pull_request] jobs: - test-new: + test: runs-on: ubuntu-latest @@ -15,10 +15,16 @@ jobs: python-version: '3.x' - name: Install requirements (PIP) - run: pip3 install pytest sphinx numpy + run: pip3 install pytest sphinx numpy build - - name: Setup up PYTHONPATH - run: echo "PYTHONPATH=${PWD}/src" >> $GITHUB_ENV + - name: Setup up root directory + run: echo "PACKAGE_ROOT=${PWD}/src" >> $GITHUB_ENV + + - name: Build and install package + run: | + python -m build + pip install dist/hsd_python*.whl + python3 -c 'import hsd; print(hsd.__version__)' - name: Run test pytest run: python3 -m pytest From 865ddc18ab705abf96665f459a08862c09df8884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 17:07:42 +0200 Subject: [PATCH 25/31] Update Readme --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 9ad39d4..902ec94 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ HSD — Make your structured data human friendly ********************************************** -This package contains utilities to read and write files in the Human-friendly -Structured Data (HSD) format. +Utilities to read and write files in the Human-friendly Structured Data (HSD) +format. The HSD-format is very similar to both JSON and YAML, but tries to minimize the effort for **humans** to read and write it. It ommits special characters as much @@ -22,7 +22,7 @@ Installation The package can be installed via conda-forge:: - conda install hsd-python + conda install --channel "conda-forge" hsd-python Alternatively, the package can be downloaded and installed via pip into the active Python interpreter (preferably using a virtual python environment) by :: From 4b2a89dcdf8c731484a29bb2b6957a04615a8a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Fri, 1 Oct 2021 20:29:55 +0200 Subject: [PATCH 26/31] Add version check in github workflow --- .github/workflows/ci.yml | 5 ++++- devtools/set_version | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05335e4..568769d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,9 @@ name: CI on: [push, pull_request] +env: + HSD_PYTHON_VERSION: '0.1' + jobs: test: @@ -24,7 +27,7 @@ jobs: run: | python -m build pip install dist/hsd_python*.whl - python3 -c 'import hsd; print(hsd.__version__)' + python -c "import hsd; assert hsd.__version__ == '${HSD_PYTHON_VERSION}'" - name: Run test pytest run: python3 -m pytest diff --git a/devtools/set_version b/devtools/set_version index ee17b92..6be38fa 100755 --- a/devtools/set_version +++ b/devtools/set_version @@ -23,6 +23,9 @@ FILES_PATTERNS = [('src/hsd/__init__.py', ('docs/conf.py', r'release\s*=\s*([\'"]){}\1'.format(VERSION_PATTERN), "release = '{version}'"), + ('.github/workflows/ci.yml', + r'HSD_PYTHON_VERSION:\s*([\'"]){}\1'.format(VERSION_PATTERN), + "HSD_PYTHON_VERSION: '{version}'"), ] From 2110e6345ba37acf9886748fd127ccc7f967f355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Sun, 3 Oct 2021 12:52:55 +0200 Subject: [PATCH 27/31] Make handling of repeated nodes consistent with DFTB+ --- docs/hsd.rst | 12 ++- src/hsd/dict.py | 45 ++++++--- src/hsd/formatter.py | 2 +- src/hsd/io.py | 2 +- src/hsd/parser.py | 1 + test/test_dict.py | 200 +++++++++++++++++++++++++++++++++++++++ test/test_dictbuilder.py | 37 -------- test/test_parser.py | 7 +- 8 files changed, 245 insertions(+), 61 deletions(-) create mode 100644 test/test_dict.py delete mode 100644 test/test_dictbuilder.py diff --git a/docs/hsd.rst b/docs/hsd.rst index bd51072..b211865 100644 --- a/docs/hsd.rst +++ b/docs/hsd.rst @@ -162,6 +162,10 @@ nodes will be mapped to one key, which will contain a list of dictionaries ] }, ] + # Also attributes becomes a list. Due to technialc reasons the + # dictbuilder always creates an attribute list for mulitple nodes, + # even if none of the nodes carries an actual attribute. + "PointCharges.attrib": [None, None] } } @@ -186,7 +190,7 @@ to record following additional data for each HSD node: If this information is being recorded, a special key with the ``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON -presentation. The correpsonding value will be a dictionary with those +presentation. The corresponding value will be a dictionary with those information. As an example, let's store the input from the previous section :: @@ -199,7 +203,7 @@ As an example, let's store the input from the previous section :: } in the file `test.hsd`, parse it and convert the node names to lower case -(to make the input processing case-insensitive). Using the Python command :: +(to make enable case-insensitive input processing). Using the Python command :: inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True) @@ -208,13 +212,13 @@ will yield the following dictionary representation of the input:: { 'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'}, 'hamiltonian': { - 'dftb.hsdattrib': {'line': 0, 'tag': 'Dftb'}, + 'dftb.hsdattrib': {'line': 0, equal: False, 'tag': 'Dftb'}, 'dftb': { 'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'}, 'scc': True, 'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'}, 'filling': { - 'fermi.hsdattrib': {'line': 2, 'tag': 'Fermi'}, + 'fermi.hsdattrib': {'line': 2, 'equal': False, 'tag': 'Fermi'}, 'fermi': { 'temperature.attrib': 'Kelvin', 'temperature.hsdattrib': {'equal': True, 'line': 3, diff --git a/src/hsd/dict.py b/src/hsd/dict.py index f6c14f8..f0f8126 100644 --- a/src/hsd/dict.py +++ b/src/hsd/dict.py @@ -80,27 +80,38 @@ def close_tag(self, tagname): attrib, hsdattrib = self._attribs.pop(-1) parentblock = self._parentblocks.pop(-1) prevcont = parentblock.get(tagname) - if prevcont is not None: - if isinstance(prevcont, dict) and self._data is None: - prevcont = [prevcont] - parentblock[tagname] = prevcont - elif not (isinstance(prevcont, list) - and isinstance(prevcont[0], dict)): - msg = f"Invalid duplicate occurance of node '{tagname}'" - raise HsdError(msg) + if self._data is not None: + if prevcont is None: + parentblock[tagname] = self._data + elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): + prevcont.append({None: self._data}) + elif isinstance(prevcont, dict): + parentblock[tagname] = [prevcont, {None: self._data}] + else: + parentblock[tagname] = [{None: prevcont}, {None: self._data}] + else: + if prevcont is None: + parentblock[tagname] = self._curblock + elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): + prevcont.append(self._curblock) + elif isinstance(prevcont, dict): + parentblock[tagname] = [prevcont, self._curblock] + else: + parentblock[tagname] = [{None: prevcont}, self._curblock] if prevcont is None: - content = self._data if self._data is not None else self._curblock - parentblock[tagname] = content if attrib: parentblock[tagname + ATTRIB_SUFFIX] = attrib if self._include_hsd_attribs: parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib else: - prevcont.append(self._curblock) prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX) - if not (prevattrib is None and attrib is None): - msg = f"Duplicate node '{tagname}' should not carry attributes" + if isinstance(prevattrib, list): + prevattrib.append(attrib) + else: + parentblock[tagname + ATTRIB_SUFFIX] = [prevattrib, attrib] + print(f"parentblock[{tagname} + {ATTRIB_SUFFIX}] = [{prevattrib}, {attrib}]") + if self._include_hsd_attribs: prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX) if isinstance(prevhsdattrib, list): @@ -189,8 +200,12 @@ def walk(self, dictobj): elif isinstance(value, list) and value and isinstance(value[0], dict): for ind, item in enumerate(value): hsdattr = hsdattrib[ind] if hsdattrib else None - self._eventhandler.open_tag(key, None, hsdattr) - self.walk(item) + attr = attrib[ind] if attrib else None + self._eventhandler.open_tag(key, attr, hsdattr) + if None in item: + self._eventhandler.add_text(_to_text(item[None])) + else: + self.walk(item) self._eventhandler.close_tag(key) else: diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py index 903c4a2..27027cd 100644 --- a/src/hsd/formatter.py +++ b/src/hsd/formatter.py @@ -97,7 +97,7 @@ def add_text(self, text: str): equal = self._followed_by_equal[-1] multiline = "\n" in text if equal is None and not multiline: - if len(self._followed_by_equal) > 2: + if len(self._followed_by_equal) > 1: equal = not self._followed_by_equal[-2] else: equal = True diff --git a/src/hsd/io.py b/src/hsd/io.py index 7babb75..5301881 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -114,7 +114,7 @@ def load_string( with the recorded data: >>> data["dftb.hsdattrib"] - {'line': 1, 'name': 'Dftb'} + {'equal': False, 'line': 1, 'name': 'Dftb'} This additional data can be then also used to format the tags in the original style, when writing the data in HSD-format again. Compare: diff --git a/src/hsd/parser.py b/src/hsd/parser.py index ab8e1da..5af5839 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -157,6 +157,7 @@ def _parse(self, line): # Opening tag by curly brace elif sign == "{": #self._has_child = True + self._hsdattrib[common.HSD_ATTRIB_EQUAL] = False self._starttag(before, self._after_equal_sign) self._buffer = [] self._after_equal_sign = False diff --git a/test/test_dict.py b/test/test_dict.py new file mode 100644 index 0000000..eaba972 --- /dev/null +++ b/test/test_dict.py @@ -0,0 +1,200 @@ +#!/bin/env python3 +#------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#------------------------------------------------------------------------------# +# +"""Tests for the dictbuilder class""" + +import io +import pytest +import hsd + +_HSD_LINE = hsd.HSD_ATTRIB_LINE +_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL +_HSD_NAME = hsd.HSD_ATTRIB_NAME + +# General test list format for valid tests +# [("Test name", ([List of HSD events], expected dictionary outcome))] + +# Tests without hsd attribute recording +_TESTS_NO_HSDATTRIB = [ + ( + "Simple", ( + "Test {}", + {"Test": {}}, + ) + ), + ( + "Data with quoted strings", ( + "O = SelectedShells { \"s\" \"p\" }", + {"O": {"SelectedShells": ['"s"', '"p"']}}, + ) + ), + ( + "Attribute containing comma", ( + "PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}", + {"PolarRadiusCharge": [1.03, 3.8, 2.82], "PolarRadiusCharge.attrib": "AA^3,AA,"}, + ) + ), + ( + "Duplicate node entry", ( + "a { b = 1 }\na { b = 2 }\n", + {"a.attrib": [None, None], "a": [{"b": 1}, {"b": 2}]}, + ) + ), + ( + "Duplicate value entry", ( + "a = 1\na = 2", + {"a.attrib": [None, None], "a": [{None: 1}, {None: 2}]}, + ) + ), +] +_TESTS_NO_HSDATTRIB_NAMES, _TESTS_NO_HSDATTRIB_CASES = zip(*_TESTS_NO_HSDATTRIB) + + +# Tests with HSD attribute recording +# The input string should be formatted the same way as it comes out from the formatter since +# these tests are also used to test backwards direction (dictionary -> string). +_TESTS_HSDATTRIB = [ + ( + "Simple", ( + "Test {}\n", + {"Test.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, "Test": {}} + ) + ), + ( + "Data with quoted strings", ( + "O = SelectedShells {\n \"s\" \"p\"\n}\n", + { + "O.hsdattrib": {_HSD_EQUAL: True, _HSD_LINE: 0}, + "O": { + "SelectedShells.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, + "SelectedShells": ['"s"', '"p"'] + } + } + ) + ), + ( + "Duplicate node", ( + "a {\n b = 1\n}\na {\n b = 2\n}\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: False}, + {_HSD_LINE: 3, _HSD_EQUAL: False}], + "a.attrib": [None, None], + "a": [ + {"b.hsdattrib": {_HSD_LINE: 1, _HSD_EQUAL: True}, "b": 1}, + {"b.hsdattrib": {_HSD_LINE: 4, _HSD_EQUAL: True}, "b": 2} + ] + }, + ) + ), + ( + "Duplicate value", ( + "a = 1\na = 2\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}], + "a.attrib": [None, None], + "a": [{None: 1}, {None: 2}] + }, + ) + ), + ( + "Triple value with attrib", ( + "a = 1\na = 2\na [someunit] {\n 3\n}\n", + { + "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}, + {_HSD_LINE: 2, _HSD_EQUAL: False}], + "a.attrib": [None, None, "someunit"], + "a": [{None: 1}, {None: 2}, {None: 3}] + }, + ) + ), + +] +_TESTS_HSDATTRIB_NAMES, _TESTS_HSDATTRIB_CASES = zip(*_TESTS_HSDATTRIB) + + +# Tests with HSD attribute recording and tag name lowering switched on +# The input string should be formatted the same way as it comes out from the formatter since +# these tests are also used to test backwards direction (dictionary -> string). +_TESTS_HSDATTRIB_LOWER = [ + ( + "Simple", ( + "Test {}\n", + {"test.hsdattrib": {_HSD_NAME: "Test", _HSD_LINE: 0, _HSD_EQUAL: False}, "test": {}} + ) + ), +] +_TESTS_HSDATTRIB_LOWER_NAMES, _TESTS_HSDATTRIB_LOWER_CASES = zip(*_TESTS_HSDATTRIB_LOWER) + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_NO_HSDATTRIB_CASES, + ids=_TESTS_NO_HSDATTRIB_NAMES +) +def test_builder_nohsdattr(hsdstr, hsddict): + """Test transformation from hsd to dictionary without HSD attributes.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False) + parser = hsd.HsdParser(eventhandler=dictbuilder) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_CASES, + ids=_TESTS_HSDATTRIB_NAMES +) +def test_builder_hsdattr(hsdstr, hsddict): + """Test transformation from hsd to dictionary with HSD attributes.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True) + parser = hsd.HsdParser(eventhandler=dictbuilder) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_LOWER_CASES, + ids=_TESTS_HSDATTRIB_LOWER_NAMES +) +def test_builder_hsdattr_lower(hsdstr, hsddict): + """Test transformation from hsd to dictionary with HSD attributes and case lowering.""" + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True) + parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True) + fobj = io.StringIO(hsdstr) + parser.parse(fobj) + assert dictbuilder.hsddict == hsddict + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_CASES, + ids=_TESTS_HSDATTRIB_NAMES +) +def test_walker_hsdattr(hsdstr, hsddict): + """Test transformation from dictionary to string using HSD attributes.""" + output = io.StringIO() + formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) + dictwalker = hsd.HsdDictWalker(formatter) + dictwalker.walk(hsddict) + assert output.getvalue() == hsdstr + + +@pytest.mark.parametrize( + "hsdstr,hsddict", + _TESTS_HSDATTRIB_LOWER_CASES, + ids=_TESTS_HSDATTRIB_LOWER_NAMES +) +def test_walker_hsdattr_lower(hsdstr, hsddict): + """Test transformation from dictionary to string using HSD attributes.""" + output = io.StringIO() + formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) + dictwalker = hsd.HsdDictWalker(formatter) + dictwalker.walk(hsddict) + assert output.getvalue() == hsdstr diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py deleted file mode 100644 index 7d86b8f..0000000 --- a/test/test_dictbuilder.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/env python3 -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# -# -import os.path as op -import hsd - -def test_dictbuilder(): - dictbuilder = hsd.HsdDictBuilder() - parser = hsd.HsdParser(eventhandler=dictbuilder) - with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.parse(fobj) - pyrep = dictbuilder.hsddict - print("** Python structure without data flattening:\n") - print(pyrep) - print("\n** Turning back to HSD:\n") - print(hsd.dump_string(pyrep)) - - -def test_dictbuilder_flat(): - dictbuilder = hsd.HsdDictBuilder(flatten_data=True, include_hsd_attribs=True) - parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True) - with open(op.join(op.dirname(__file__), "test.hsd"), "r") as fobj: - parser.parse(fobj) - pyrep = dictbuilder.hsddict - print("** Python structure with data flattening:\n") - print(pyrep) - print("\n** Turning back to HSD:\n") - print(hsd.dump_string(pyrep, use_hsd_attribs=True)) - - -if __name__ == '__main__': - test_dictbuilder() - test_dictbuilder_flat() diff --git a/test/test_parser.py b/test/test_parser.py index 88ff7e2..2153373 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -22,7 +22,7 @@ "Simple", ( """Test {} """, [ - (_OPEN_TAG_EVENT, "Test", None, {_HSD_LINE: 0}), + (_OPEN_TAG_EVENT, "Test", None, {_HSD_LINE: 0, _HSD_EQUAL: False}), (_CLOSE_TAG_EVENT, "Test"), ] ) @@ -32,7 +32,7 @@ """O = SelectedShells { "s" "p" }""", [ (_OPEN_TAG_EVENT, "O", None, {_HSD_LINE: 0, _HSD_EQUAL: True}), - (_OPEN_TAG_EVENT, 'SelectedShells', None, {_HSD_LINE: 0}), + (_OPEN_TAG_EVENT, 'SelectedShells', None, {_HSD_LINE: 0, _HSD_EQUAL: False}), (_ADD_TEXT_EVENT, '"s" "p"'), (_CLOSE_TAG_EVENT, 'SelectedShells'), (_CLOSE_TAG_EVENT, 'O'), @@ -43,7 +43,8 @@ "Attribute containing comma", ( """PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}""", [ - (_OPEN_TAG_EVENT, "PolarRadiusCharge", "AA^3,AA,", {_HSD_LINE: 0, }), + (_OPEN_TAG_EVENT, "PolarRadiusCharge", "AA^3,AA,", + {_HSD_LINE: 0, _HSD_EQUAL: False}), (_ADD_TEXT_EVENT, '1.030000 3.800000 2.820000'), (_CLOSE_TAG_EVENT, 'PolarRadiusCharge'), ] From ec7f5f4f5da448f1c60ca8548c03b8d527dda94f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Sun, 3 Oct 2021 13:04:07 +0200 Subject: [PATCH 28/31] Make source header 100 character wide --- src/hsd/__init__.py | 11 ++++++----- src/hsd/common.py | 10 +++++----- src/hsd/dict.py | 10 +++++----- src/hsd/eventhandler.py | 11 ++++++----- src/hsd/formatter.py | 2 +- src/hsd/io.py | 2 +- src/hsd/parser.py | 14 ++++++-------- test/test_dict.py | 12 +++++++----- test/test_dump.py | 11 ++++++----- test/test_parser.py | 10 +++++----- 10 files changed, 48 insertions(+), 45 deletions(-) diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index c29dac8..7303046 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -1,8 +1,9 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group + # # BSD 2-clause license. + # +#--------------------------------------------------------------------------------------------------# # """ Toolbox for reading, writing and manipulating HSD-data. diff --git a/src/hsd/common.py b/src/hsd/common.py index 4803fc0..80b93ca 100644 --- a/src/hsd/common.py +++ b/src/hsd/common.py @@ -1,8 +1,8 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """ Implements common functionalities for the HSD package diff --git a/src/hsd/dict.py b/src/hsd/dict.py index f0f8126..3b0b34a 100644 --- a/src/hsd/dict.py +++ b/src/hsd/dict.py @@ -1,8 +1,8 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """ Contains an event-driven builder for dictionary based (JSON-like) structure diff --git a/src/hsd/eventhandler.py b/src/hsd/eventhandler.py index 9361515..0ef52cb 100644 --- a/src/hsd/eventhandler.py +++ b/src/hsd/eventhandler.py @@ -1,8 +1,9 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group + # # BSD 2-clause license. + # +#--------------------------------------------------------------------------------------------------# # """ Contains an event handler base class. diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py index 27027cd..bc2c97e 100644 --- a/src/hsd/formatter.py +++ b/src/hsd/formatter.py @@ -1,5 +1,5 @@ # hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Copyright (C) 2011 - 2021 DFTB+ developers group # # Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # diff --git a/src/hsd/io.py b/src/hsd/io.py index 5301881..891e31b 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -1,5 +1,5 @@ # hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Copyright (C) 2011 - 2021 DFTB+ developers group # # Licensed under the BSD 2-clause license. # #------------------------------------------------------------------------------# # diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 5af5839..85629ad 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -1,8 +1,8 @@ -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """ Contains the event-generating HSD-parser. @@ -145,9 +145,7 @@ def _parse(self, line): # Equal sign elif sign == "=": # Ignore if followed by "{" (DFTB+ compatibility) - if after.lstrip().startswith("{"): - # _oldbefore may already contain the tagname, if the - # tagname was followed by an attribute -> append + if after.lstrip().startswith("{"): # _oldbefore may already contain the tagname, if the # tagname was followed by an attribute -> append self._oldbefore += before else: self._hsdattrib[common.HSD_ATTRIB_EQUAL] = True diff --git a/test/test_dict.py b/test/test_dict.py index eaba972..6044872 100644 --- a/test/test_dict.py +++ b/test/test_dict.py @@ -1,9 +1,9 @@ #!/bin/env python3 -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """Tests for the dictbuilder class""" @@ -11,10 +11,12 @@ import pytest import hsd +# Some abbreviations _HSD_LINE = hsd.HSD_ATTRIB_LINE _HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL _HSD_NAME = hsd.HSD_ATTRIB_NAME + # General test list format for valid tests # [("Test name", ([List of HSD events], expected dictionary outcome))] diff --git a/test/test_dump.py b/test/test_dump.py index 3e6a896..1d6c6d2 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -1,9 +1,10 @@ #!/bin/env python3 -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group + # # BSD 2-clause license. + # +#--------------------------------------------------------------------------------------------------# # import numpy as np import hsd diff --git a/test/test_parser.py b/test/test_parser.py index 2153373..8c31c40 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -1,9 +1,9 @@ #!/bin/env python3 -#------------------------------------------------------------------------------# -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +#--------------------------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # import io import pytest From e503e6ac53fa9dfe086a5a1937e65a8a2afb5346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Sun, 3 Oct 2021 13:45:35 +0200 Subject: [PATCH 29/31] Move tagname lowering from parser to builder --- src/hsd/dict.py | 58 +++++++++++++++++++++++++-------------------- src/hsd/io.py | 5 ++-- src/hsd/parser.py | 10 +------- test/test_dict.py | 14 +++++------ test/test_parser.py | 23 ++++++++++++++---- 5 files changed, 61 insertions(+), 49 deletions(-) diff --git a/src/hsd/dict.py b/src/hsd/dict.py index 3b0b34a..2909184 100644 --- a/src/hsd/dict.py +++ b/src/hsd/dict.py @@ -9,7 +9,7 @@ """ import re from typing import List, Tuple, Union -from hsd.common import np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ +from hsd.common import HSD_ATTRIB_NAME, np, ATTRIB_SUFFIX, HSD_ATTRIB_SUFFIX, HsdError,\ QUOTING_CHARS, SPECIAL_CHARS from hsd.eventhandler import HsdEventHandler, HsdEventPrinter @@ -42,14 +42,16 @@ class HsdDictBuilder(HsdEventHandler): Args: flatten_data: Whether multiline data in the HSD input should be - flattened into a single list. Othewise a list of lists is created, - with one list for every line (default). - include_hsd_attribs: Whether the HSD-attributes (processing related - attributes, like original tag name, line information, etc.) should - be stored. + flattened into a single list. Othewise a list of lists is created, with one list for + every line (default). + lower_tag_names: Whether tag names should be all converted to lower case (to ease case + insensitive processing). Default: False. If set and include_hsd_attribs is also set, + the original tag names can be retrieved from the "name" hsd attributes. + include_hsd_attribs: Whether the HSD-attributes (processing related attributes, like + original tag name, line information, etc.) should be stored (default: False). """ - def __init__(self, flatten_data: bool = False, + def __init__(self, flatten_data: bool = False, lower_tag_names: bool = False, include_hsd_attribs: bool = False): super().__init__() self._hsddict: dict = {} @@ -58,6 +60,7 @@ def __init__(self, flatten_data: bool = False, self._data: Union[None, _DataType] = None self._attribs: List[Tuple[str, dict]] = [] self._flatten_data: bool = flatten_data + self._lower_tag_names: bool = lower_tag_names self._include_hsd_attribs: bool = include_hsd_attribs @@ -79,46 +82,49 @@ def open_tag(self, tagname, attrib, hsdattrib): def close_tag(self, tagname): attrib, hsdattrib = self._attribs.pop(-1) parentblock = self._parentblocks.pop(-1) + key = tagname.lower() if self._lower_tag_names else tagname prevcont = parentblock.get(tagname) + if self._data is not None: if prevcont is None: - parentblock[tagname] = self._data + parentblock[key] = self._data elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): prevcont.append({None: self._data}) elif isinstance(prevcont, dict): - parentblock[tagname] = [prevcont, {None: self._data}] + parentblock[key] = [prevcont, {None: self._data}] else: - parentblock[tagname] = [{None: prevcont}, {None: self._data}] + parentblock[key] = [{None: prevcont}, {None: self._data}] else: if prevcont is None: - parentblock[tagname] = self._curblock + parentblock[key] = self._curblock elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict): prevcont.append(self._curblock) elif isinstance(prevcont, dict): - parentblock[tagname] = [prevcont, self._curblock] + parentblock[key] = [prevcont, self._curblock] else: - parentblock[tagname] = [{None: prevcont}, self._curblock] + parentblock[key] = [{None: prevcont}, self._curblock] - if prevcont is None: - if attrib: - parentblock[tagname + ATTRIB_SUFFIX] = attrib - if self._include_hsd_attribs: - parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib - else: - prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX) + if attrib and prevcont is None: + parentblock[key + ATTRIB_SUFFIX] = attrib + elif prevcont is not None: + prevattrib = parentblock.get(key + ATTRIB_SUFFIX) if isinstance(prevattrib, list): prevattrib.append(attrib) else: - parentblock[tagname + ATTRIB_SUFFIX] = [prevattrib, attrib] - print(f"parentblock[{tagname} + {ATTRIB_SUFFIX}] = [{prevattrib}, {attrib}]") + parentblock[key + ATTRIB_SUFFIX] = [prevattrib, attrib] - if self._include_hsd_attribs: - prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX) + if self._include_hsd_attribs: + if self._lower_tag_names: + hsdattrib = {} if hsdattrib is None else hsdattrib + hsdattrib[HSD_ATTRIB_NAME] = tagname + if prevcont is None: + parentblock[key + HSD_ATTRIB_SUFFIX] = hsdattrib + else: + prevhsdattrib = parentblock.get(key + HSD_ATTRIB_SUFFIX) if isinstance(prevhsdattrib, list): prevhsdattrib.append(hsdattrib) else: - parentblock[tagname + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, - hsdattrib] + parentblock[key + HSD_ATTRIB_SUFFIX] = [prevhsdattrib, hsdattrib] self._curblock = parentblock self._data = None diff --git a/src/hsd/io.py b/src/hsd/io.py index 891e31b..89e4ddf 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -43,10 +43,9 @@ def load(hsdfile: Union[TextIO, str], lower_tag_names: bool = False, Examples: See :func:`hsd.load_string` for examples of usage. """ - dictbuilder = HsdDictBuilder(flatten_data=flatten_data, + dictbuilder = HsdDictBuilder(lower_tag_names=lower_tag_names, flatten_data=flatten_data, include_hsd_attribs=include_hsd_attribs) - parser = HsdParser(eventhandler=dictbuilder, - lower_tag_names=lower_tag_names) + parser = HsdParser(eventhandler=dictbuilder) if isinstance(hsdfile, str): with open(hsdfile, "r") as hsddescr: parser.parse(hsddescr) diff --git a/src/hsd/parser.py b/src/hsd/parser.py index 85629ad..ae66efe 100644 --- a/src/hsd/parser.py +++ b/src/hsd/parser.py @@ -29,9 +29,6 @@ class HsdParser: Arguments: eventhandler: Object which should handle the HSD-events triggered during parsing. When not specified, HsdEventPrinter() is used. - lower_tag_names: Whether tag names should be lowered during parsing. - If the option is set, the original tag name will be stored among - the hsd attributes. Examples: >>> from io import StringIO @@ -53,8 +50,7 @@ class HsdParser: {'Temperature': 100, 'Temperature.attrib': 'Kelvin'}}}}} """ - def __init__(self, eventhandler: Optional[HsdEventHandler] = None, - lower_tag_names: bool = False): + def __init__(self, eventhandler: Optional[HsdEventHandler] = None): """Initializes the parser. Args: @@ -79,7 +75,6 @@ def __init__(self, eventhandler: Optional[HsdEventHandler] = None, self._has_child = True # Whether current node has a child already self._has_text = False # whether current node contains text already self._oldbefore = "" # buffer for tagname - self._lower_tag_names = lower_tag_names # whether tag names should be lower cased def parse(self, fobj: Union[TextIO, str]): @@ -258,9 +253,6 @@ def _starttag(self, tagname, closeprev): if len(tagname_stripped.split()) > 1: self._error(SYNTAX_ERROR, (self._currline, self._currline)) self._hsdattrib[common.HSD_ATTRIB_LINE] = self._currline - if self._lower_tag_names: - self._hsdattrib[common.HSD_ATTRIB_NAME] = tagname_stripped - tagname_stripped = tagname_stripped.lower() self._eventhandler.open_tag(tagname_stripped, self._attrib, self._hsdattrib) self._opened_tags.append( diff --git a/test/test_dict.py b/test/test_dict.py index 6044872..ebdfef2 100644 --- a/test/test_dict.py +++ b/test/test_dict.py @@ -137,7 +137,7 @@ _TESTS_NO_HSDATTRIB_CASES, ids=_TESTS_NO_HSDATTRIB_NAMES ) -def test_builder_nohsdattr(hsdstr, hsddict): +def test_dict_builder_nohsdattr(hsdstr, hsddict): """Test transformation from hsd to dictionary without HSD attributes.""" dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False) parser = hsd.HsdParser(eventhandler=dictbuilder) @@ -151,7 +151,7 @@ def test_builder_nohsdattr(hsdstr, hsddict): _TESTS_HSDATTRIB_CASES, ids=_TESTS_HSDATTRIB_NAMES ) -def test_builder_hsdattr(hsdstr, hsddict): +def test_dict_builder_hsdattr(hsdstr, hsddict): """Test transformation from hsd to dictionary with HSD attributes.""" dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True) parser = hsd.HsdParser(eventhandler=dictbuilder) @@ -165,10 +165,10 @@ def test_builder_hsdattr(hsdstr, hsddict): _TESTS_HSDATTRIB_LOWER_CASES, ids=_TESTS_HSDATTRIB_LOWER_NAMES ) -def test_builder_hsdattr_lower(hsdstr, hsddict): +def test_dict_builder_hsdattr_lower(hsdstr, hsddict): """Test transformation from hsd to dictionary with HSD attributes and case lowering.""" - dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True) - parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True) + dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True, lower_tag_names=True) + parser = hsd.HsdParser(eventhandler=dictbuilder) fobj = io.StringIO(hsdstr) parser.parse(fobj) assert dictbuilder.hsddict == hsddict @@ -179,7 +179,7 @@ def test_builder_hsdattr_lower(hsdstr, hsddict): _TESTS_HSDATTRIB_CASES, ids=_TESTS_HSDATTRIB_NAMES ) -def test_walker_hsdattr(hsdstr, hsddict): +def test_dict_walker_hsdattr(hsdstr, hsddict): """Test transformation from dictionary to string using HSD attributes.""" output = io.StringIO() formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) @@ -193,7 +193,7 @@ def test_walker_hsdattr(hsdstr, hsddict): _TESTS_HSDATTRIB_LOWER_CASES, ids=_TESTS_HSDATTRIB_LOWER_NAMES ) -def test_walker_hsdattr_lower(hsdstr, hsddict): +def test_dict_walker_hsdattr_lower(hsdstr, hsddict): """Test transformation from dictionary to string using HSD attributes.""" output = io.StringIO() formatter = hsd.HsdFormatter(output, use_hsd_attribs=True) diff --git a/test/test_parser.py b/test/test_parser.py index 8c31c40..b2da98e 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -50,6 +50,19 @@ ] ) ), + ( + "Variable", ( + """$Variable = 12\nValue = $Variable\n""", + [ + (_OPEN_TAG_EVENT, "$Variable", None, {_HSD_LINE: 0, _HSD_EQUAL: True}), + (_ADD_TEXT_EVENT, "12"), + (_CLOSE_TAG_EVENT, "$Variable"), + (_OPEN_TAG_EVENT, "Value", None, {_HSD_LINE: 1, _HSD_EQUAL: True}), + (_ADD_TEXT_EVENT, "$Variable"), + (_CLOSE_TAG_EVENT, "Value") + ] + ) + ), ] _VALID_TEST_NAMES, _VALID_TEST_CASES = zip(*_VALID_TESTS) @@ -87,8 +100,8 @@ class _TestEventHandler(hsd.HsdEventHandler): def __init__(self): self.events = [] - def open_tag(self, tagname, attrib, hsdoptions): - self.events.append((_OPEN_TAG_EVENT, tagname, attrib, hsdoptions)) + def open_tag(self, tagname, attrib, hsdattrib): + self.events.append((_OPEN_TAG_EVENT, tagname, attrib, hsdattrib)) def close_tag(self, tagname): self.events.append((_CLOSE_TAG_EVENT, tagname)) @@ -102,7 +115,8 @@ def add_text(self, text): _VALID_TEST_CASES, ids=_VALID_TEST_NAMES ) -def test_valid_parser_events(hsd_input, expected_events): +def test_parser_events(hsd_input, expected_events): + """Test valid parser events""" testhandler = _TestEventHandler() parser = hsd.HsdParser(eventhandler=testhandler) hsdfile = io.StringIO(hsd_input) @@ -115,7 +129,8 @@ def test_valid_parser_events(hsd_input, expected_events): _FAILING_TEST_CASES, ids=_FAILING_TEST_NAMES ) -def test_invalid_parser_events(hsd_input): +def test_parser_exceptions(hsd_input): + """Test exception raised by the parser""" testhandler = _TestEventHandler() parser = hsd.HsdParser(eventhandler=testhandler) hsdfile = io.StringIO(hsd_input) From a2680ede27b54dff9550532573cbd8747c31ad0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Wed, 10 Nov 2021 10:43:50 +0100 Subject: [PATCH 30/31] Fix header formatting --- src/hsd/__init__.py | 5 ++--- src/hsd/formatter.py | 8 ++++---- src/hsd/io.py | 8 ++++---- test/test_dump.py | 5 ++--- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/hsd/__init__.py b/src/hsd/__init__.py index 7303046..4faafbd 100644 --- a/src/hsd/__init__.py +++ b/src/hsd/__init__.py @@ -1,8 +1,7 @@ #--------------------------------------------------------------------------------------------------# # hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group - # # BSD 2-clause license. - # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #--------------------------------------------------------------------------------------------------# # """ diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py index bc2c97e..70c4ca6 100644 --- a/src/hsd/formatter.py +++ b/src/hsd/formatter.py @@ -1,7 +1,7 @@ -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """ Provides an event based formatter to create HSD dumps diff --git a/src/hsd/io.py b/src/hsd/io.py index 89e4ddf..f0f4f22 100644 --- a/src/hsd/io.py +++ b/src/hsd/io.py @@ -1,7 +1,7 @@ -# hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group # -# Licensed under the BSD 2-clause license. # -#------------------------------------------------------------------------------# +# hsd-python: package for manipulating HSD-formatted data in Python # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # +#--------------------------------------------------------------------------------------------------# # """ Provides functionality to dump Python structures to HSD diff --git a/test/test_dump.py b/test/test_dump.py index 1d6c6d2..ff2fd29 100644 --- a/test/test_dump.py +++ b/test/test_dump.py @@ -1,9 +1,8 @@ #!/bin/env python3 #--------------------------------------------------------------------------------------------------# # hsd-python: package for manipulating HSD-formatted data in Python # -# Copyright (C) 2011 - 2021 DFTB+ developers group - # # BSD 2-clause license. - # +# Copyright (C) 2011 - 2021 DFTB+ developers group # +# Licensed under the BSD 2-clause license. # #--------------------------------------------------------------------------------------------------# # import numpy as np From 1b7774ae4877ae4c525c23916c7466b1115e9da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A1lint=20Aradi?= Date: Wed, 10 Nov 2021 10:46:52 +0100 Subject: [PATCH 31/31] Set version number to 0.1 --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 16cdadb..6284212 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,8 @@ Change Log ========== -Unreleased -========== +0.1 +=== Added -----