From 6960709e974a39a55d139f5c6430aaa1bdcc38e1 Mon Sep 17 00:00:00 2001 From: Florian Dobener Date: Tue, 16 Apr 2024 11:54:22 +0200 Subject: [PATCH] Fix nexus parsing for NOMAD (#304) * Fix nxdl_path construction for attributes * Fix generating of duplicate attribute nodes in nxdl_path * Ignore xml comments * Use `ET._Element` * Update definitions * Update definitions file * Update ref log * Update definitions * Update nexus-version.txt * Update defs * Update nexus version --- pynxtools/nexus/nexus.py | 64 ++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/pynxtools/nexus/nexus.py b/pynxtools/nexus/nexus.py index fb5988419..41b619520 100644 --- a/pynxtools/nexus/nexus.py +++ b/pynxtools/nexus/nexus.py @@ -9,30 +9,31 @@ import click import h5py import lxml.etree as ET + from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( - get_hdf_info_parent, - get_nxdl_child, - get_node_concept_path, - get_local_name_from_xml, - write_doc_string, - try_find_units, + NxdlAttributeNotFoundError, + add_base_classes, check_attr_name_nxdl, - get_required_string, get_best_child, - get_node_name, - add_base_classes, - walk_elist, - get_nx_class, - try_find_default, - other_attrs, - get_nexus_definitions_path, - NxdlAttributeNotFoundError, - get_node_at_nxdl_path, get_enums, + get_hdf_info_parent, get_inherited_nodes, + get_local_name_from_xml, + get_nexus_definitions_path, + get_node_at_nxdl_path, + get_node_concept_path, + get_node_name, + get_nx_attribute_type, + get_nx_class, get_nx_classes, get_nx_units, - get_nx_attribute_type, + get_nxdl_child, + get_required_string, + other_attrs, + try_find_default, + try_find_units, + walk_elist, + write_doc_string, ) @@ -151,6 +152,8 @@ def check_deprecation_enum_axis(variables, doc, elist, attr, hdf_node): if doc: logger.debug("enumeration (" + get_node_concept_path(base_elem) + "):") for item in sdoc: + if isinstance(item, ET._Comment): + continue if get_local_name_from_xml(item) == "item": if doc: logger.debug("-> " + item.attrib["value"]) @@ -171,6 +174,7 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals """Get nxdl documentation for an attribute""" new_elem = [] old_elem = elem + attr_inheritance_chain = [] for elem_index, act_elem1 in enumerate(elist): act_elem = act_elem1 # NX_class is a compulsory attribute for groups in a nexus file @@ -187,13 +191,15 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals # units category is a compulsory attribute for any fields if attr == "units" and isinstance(hdf_node, h5py.Dataset): req_str = "<>" - logger, act_elem, nxdl_path, doc, attr = try_find_units( - logger, act_elem, nxdl_path, doc, attr + logger, act_elem, attr_inheritance_chain, doc, attr = try_find_units( + logger, act_elem, attr_inheritance_chain, doc, attr ) # units for attributes can be given as ATTRIBUTENAME_units elif attr.endswith("_units"): - logger, act_elem, nxdl_path, doc, attr, req_str = check_attr_name_nxdl( - (logger, act_elem, nxdl_path, doc, attr, req_str) + logger, act_elem, attr_inheritance_chain, doc, attr, req_str = ( + check_attr_name_nxdl( + (logger, act_elem, attr_inheritance_chain, doc, attr, req_str) + ) ) # default is allowed for groups elif attr == "default" and not isinstance(hdf_node, h5py.Dataset): @@ -202,16 +208,16 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals act_elem = get_nxdl_child( act_elem, attr, nexus_type="attribute", go_base=False ) - logger, act_elem, nxdl_path, doc, attr = try_find_default( - logger, act_elem1, act_elem, nxdl_path, doc, attr + logger, act_elem, attr_inheritance_chain, doc, attr = try_find_default( + logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr ) else: # other attributes act_elem = get_nxdl_child( act_elem, attr, nexus_type="attribute", go_base=False ) if act_elem is not None: - logger, act_elem, nxdl_path, doc, attr = other_attrs( - logger, act_elem1, act_elem, nxdl_path, doc, attr + logger, act_elem, attr_inheritance_chain, doc, attr = other_attrs( + logger, act_elem1, act_elem, attr_inheritance_chain, doc, attr ) if act_elem is not None: new_elem.append(act_elem) @@ -234,6 +240,10 @@ def get_nxdl_attr_doc( # pylint: disable=too-many-arguments,too-many-locals if attr != "NX_class": logger.debug("@" + attr + " - IS NOT IN SCHEMA") logger.debug("") + + # Add the lowest child element to the nxdl_path + if attr_inheritance_chain: + nxdl_path.append(attr_inheritance_chain[0]) return (req_str, get_nxdl_entry(hdf_info), nxdl_path) @@ -247,6 +257,8 @@ def get_nxdl_doc(hdf_info, logger, doc, attr=False): hdf_path=hdf_info["hdf_path"] if "hdf_path" in hdf_info else None, hdf_root=hdf_info["hdf_root"] if "hdf_root" in hdf_info else None, ) + # Copy the nxdl_path, otherwise the cached object is altered + nxdl_path = nxdl_path.copy() elem = elist[0] if class_path and elist else None if doc: logger.debug("classpath: " + str(class_path)) @@ -310,7 +322,7 @@ def get_hdf_path(hdf_info): @lru_cache(maxsize=None) def get_inherited_hdf_nodes( nx_name: str = None, - elem: ET.Element = None, + elem: ET._Element = None, hdf_node=None, hdf_path=None, hdf_root=None,