From 8c7ece3676e46484d11c1050baffceb02b6f97fd Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 10 Sep 2024 15:51:51 +0200 Subject: [PATCH] Add a hook to inject datamodel version information into podio internals (#651) * Add the possibility to specify a version header and variable * Add reading back version to test * Make the readers and writers handle datamodel versions * Reuse existing type but give it new block name * Make sure to not pollute the available categories * Add docstrings * Make datamodel version a command line argument to the generator * Update documentation * Make sure roundtrip models also have version info * Make datamodel versions accessible from python * Move strings once we are done with them --- cmake/podioMacros.cmake | 10 ++++- doc/datamodel_syntax.md | 17 +++++++++ include/podio/DatamodelRegistry.h | 10 +++++ include/podio/RNTupleReader.h | 11 ++++++ include/podio/ROOTReader.h | 11 ++++++ include/podio/Reader.h | 16 ++++++++ include/podio/SIOBlock.h | 38 ++++++++++++++++--- include/podio/SIOReader.h | 11 ++++++ .../utilities/DatamodelRegistryIOHelpers.h | 11 +++++- python/podio/base_reader.py | 30 ++++++++++++--- python/podio/test_Reader.py | 19 ++++++++++ python/podio_class_generator.py | 23 ++++++++++- python/podio_gen/cpp_generator.py | 15 +++++++- python/podio_gen/generator_base.py | 12 +++++- python/templates/DatamodelDefinition.h.jinja2 | 10 ++++- src/DatamodelRegistry.cc | 17 ++++++++- src/DatamodelRegistryIOHelpers.cc | 10 +++++ src/RNTupleReader.cc | 11 +++++- src/RNTupleWriter.cc | 8 ++++ src/ROOTReader.cc | 16 +++++++- src/ROOTWriter.cc | 13 +++++++ src/SIOReader.cc | 5 ++- src/SIOWriter.cc | 14 +++++++ src/rootUtils.h | 6 +++ tests/CMakeLists.txt | 2 +- tests/dumpmodel/CMakeLists.txt | 3 ++ tests/read_frame.h | 26 +++++++++++++ tools/podio-dump | 13 +++++-- 28 files changed, 359 insertions(+), 29 deletions(-) diff --git a/cmake/podioMacros.cmake b/cmake/podioMacros.cmake index b9a892fc3..d816bb406 100644 --- a/cmake/podioMacros.cmake +++ b/cmake/podioMacros.cmake @@ -131,13 +131,14 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF) # LANG OPTIONAL: The programming language choice # Default is cpp # DEPENDS OPTIONAL: List of files to be added as configure dependencies of the datamodel +# VERSION OPTIONAL: The version of the datamodel (which does not have to be the schema version!) # ) # # Note that the create_${datamodel} target will always be called, but if the YAML_FILE has not changed # this is essentially a no-op, and should not cause re-compilation. #--------------------------------------------------------------------------------------------------- function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOURCES) - CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS" ${ARGN}) + CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS;VERSION" ${ARGN}) IF(NOT ARG_OUTPUT_FOLDER) SET(ARG_OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() @@ -196,6 +197,11 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR endif() endif() + set(VERSION_ARG "") + if (ARG_VERSION) + set(VERSION_ARG "--datamodel-version=${ARG_VERSION}") + endif() + # Make sure that we re run the generation process every time either the # templates or the yaml file changes. include(${podio_PYTHON_DIR}/templates/CMakeLists.txt) @@ -215,7 +221,7 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR message(STATUS "Creating '${datamodel}' datamodel") # we need to bootstrap the data model, so this has to be executed in the cmake run execute_process( - COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG} + COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG} ${VERSION_ARG} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE podio_generate_command_retval ) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index d16be965d..ca44db876 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -206,6 +206,23 @@ Some customization of the generated code is possible through flags. These flags - `getSyntax`: steers the naming of get and set methods. If set to true, methods are prefixed with `get` and `set` following the capitalized member name, otherwise the member name is used for both. - `exposePODMembers`: whether get and set methods are also generated for members of a member-component. In the example corresponding methods would be generated to directly set / get `x` through `ExampleType`. +## Embedding a datamodel version +Each datamodel definition needs a schema version. However, in the case of podio +this schema version is a single integer. This makes it rather hard to use in +typical versioning, where one might differentiate between *major*, *minor* (and +*patch*) versions. Hence, the versioning of a datamodel and its schema version +are coupled but do not necessarily have to be the same. podio offers hooks to +store this important meta information into the produce files. In order to do you +can pass the version of the datamodel to the generator via the +`--datamodel-version` argument. It expects the version to conform to this +regular expression: `"v?\d+[\.|-]\d+([\.|-]\d+)?$"`, i.e. that the major and +minor version are present, separated by either a dot or comma with an optional +patch version and an optional `v` prefix. + +If this this information is passed to the generator it will be injected into the +podio internals and will be stored in the output files. They can be retrieved +via the `currentFileVersion(const std::string&)` methods of the various readers. + ## Extending a datamodel / using types from an upstream datamodel It is possible to extend another datamodel with your own types, resp. use some datatypes or components from an upstream datamodel in your own datamodel. diff --git a/include/podio/DatamodelRegistry.h b/include/podio/DatamodelRegistry.h index cbfa294cd..760adff19 100644 --- a/include/podio/DatamodelRegistry.h +++ b/include/podio/DatamodelRegistry.h @@ -1,6 +1,9 @@ #ifndef PODIO_DATAMODELREGISTRY_H #define PODIO_DATAMODELREGISTRY_H +#include "podio/podioVersion.h" + +#include #include #include #include @@ -97,6 +100,8 @@ class DatamodelRegistry { /// @returns The name of the datamodel const std::string& getDatamodelName(size_t index) const; + std::optional getDatamodelVersion(const std::string& name) const; + /// Register a datamodel and return its index in the registry. /// /// This is the hook that is called during dynamic loading of an EDM to @@ -114,6 +119,9 @@ class DatamodelRegistry { size_t registerDatamodel(std::string name, std::string_view definition, const podio::RelationNameMapping& relationNames); + size_t registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames, podio::version::Version version); + /// Get the names of the relations and vector members of a datatype RelationNames getRelationNames(std::string_view typeName) const; @@ -123,6 +131,8 @@ class DatamodelRegistry { std::vector> m_definitions{}; std::unordered_map m_relations{}; + + std::unordered_map m_datamodelVersions{}; }; } // namespace podio diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 373e7c45f..36248c5ca 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -96,6 +96,17 @@ class RNTupleReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the datamodel definition for the given name /// /// @param name The name of the datamodel diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index e6ecbd0e0..7167411ec 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -104,6 +104,17 @@ class ROOTReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the names of all the available Frame categories in the current file(s). /// /// @returns The names of the available categories from the file diff --git a/include/podio/Reader.h b/include/podio/Reader.h index ee8eaa682..09b00161d 100644 --- a/include/podio/Reader.h +++ b/include/podio/Reader.h @@ -26,6 +26,7 @@ class Reader { virtual podio::Frame readFrame(const std::string& name, size_t index) = 0; virtual size_t getEntries(const std::string& name) const = 0; virtual podio::version::Version currentFileVersion() const = 0; + virtual std::optional currentFileVersion(const std::string& name) const = 0; virtual std::vector getAvailableCategories() const = 0; virtual const std::string_view getDatamodelDefinition(const std::string& name) const = 0; virtual std::vector getAvailableDatamodels() const = 0; @@ -66,6 +67,10 @@ class Reader { return m_reader->currentFileVersion(); } + std::optional currentFileVersion(const std::string& name) const override { + return m_reader->currentFileVersion(name); + } + std::vector getAvailableCategories() const override { return m_reader->getAvailableCategories(); } @@ -166,6 +171,17 @@ class Reader { return m_self->currentFileVersion(); } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional + std::optional currentFileVersion(const std::string& name) const { + return m_self->currentFileVersion(name); + } + /// Get the names of all the available Frame categories in the current file(s). /// /// @returns The names of the available categories from the file diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 3ae41651e..051d06f9e 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -16,14 +16,15 @@ #include #include #include +#include #include namespace podio { template -void handlePODDataSIO(devT& device, PODData* data, size_t size) { +void handlePODDataSIO(devT& device, const PODData* data, size_t size) { unsigned count = size * sizeof(PODData); - char* dataPtr = reinterpret_cast(data); + auto* dataPtr = reinterpret_cast(const_cast(data)); device.data(dataPtr, count); } @@ -33,7 +34,12 @@ void writeMapLike(sio::write_device& device, const MapLikeT& map) { device.data((int)map.size()); for (const auto& [key, value] : map) { device.data(key); - device.data(value); + using MappedType = detail::GetMappedType; + if constexpr (std::is_trivial_v) { + handlePODDataSIO(device, &value, 1); + } else { + device.data(value); + } } } @@ -165,15 +171,37 @@ class SIOEventMetaDataBlock : public sio::block { podio::GenericParameters* metadata{nullptr}; }; +namespace detail { + inline std::string sioMapBlockNameImpl(std::string keyTName, std::string valueTName) { + std::replace(keyTName.begin(), keyTName.end(), ':', '_'); + std::replace(valueTName.begin(), valueTName.end(), ':', '_'); + return "SIOMapBlockV2_KK_" + keyTName + "_VV_" + valueTName; + } + + template + inline std::string sioMapBlockName(); + +#define SIOMAPBLOCK_NAME(key_type, value_type) \ + template <> \ + inline std::string sioMapBlockName() { \ + return sioMapBlockNameImpl(#key_type, #value_type); \ + } + + SIOMAPBLOCK_NAME(std::string, std::string) + SIOMAPBLOCK_NAME(std::string, podio::version::Version) +#undef SIOMAPBLOCK_NAME +} // namespace detail + /// A block to serialize anything that behaves similar in iterating as a /// map, e.g. vector>, which is what is used /// internally to represent the data to be written. template struct SIOMapBlock : public sio::block { - SIOMapBlock() : sio::block("SIOMapBlock", sio::version::encode_version(0, 1)) { + SIOMapBlock() : sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 2)) { } SIOMapBlock(std::vector>&& data) : - sio::block("SIOMapBlock", sio::version::encode_version(0, 1)), mapData(std::move(data)) { + sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 2)), + mapData(std::move(data)) { } SIOMapBlock(const SIOMapBlock&) = delete; diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index 80970abbb..f9f5ba02a 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -73,6 +73,17 @@ class SIOReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the names of all the available Frame categories in the current file. /// /// @returns The names of the available categores from the file diff --git a/include/podio/utilities/DatamodelRegistryIOHelpers.h b/include/podio/utilities/DatamodelRegistryIOHelpers.h index 77416a839..de847ce42 100644 --- a/include/podio/utilities/DatamodelRegistryIOHelpers.h +++ b/include/podio/utilities/DatamodelRegistryIOHelpers.h @@ -35,8 +35,12 @@ class DatamodelDefinitionHolder { public: /// The "map" type that is used internally using MapType = std::vector>; - /// Constructor from an existing collection of names and datamodel definitions - DatamodelDefinitionHolder(MapType&& definitions) : m_availEDMDefs(std::move(definitions)) { + /// The "map" mapping names and datamodel versions (where available) + using VersionList = std::vector>; + + /// Constructor from an existing collection of names and datamodel definitions and versions + DatamodelDefinitionHolder(MapType&& definitions, VersionList&& versions) : + m_availEDMDefs(std::move(definitions)), m_edmVersions(std::move(versions)) { } DatamodelDefinitionHolder() = default; @@ -57,8 +61,11 @@ class DatamodelDefinitionHolder { /// Get all names of the datamodels that have been read from file std::vector getAvailableDatamodels() const; + std::optional getDatamodelVersion(const std::string& name) const; + protected: MapType m_availEDMDefs{}; + VersionList m_edmVersions{}; }; } // namespace podio diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index 7e1ddf2a0..e078aa4ad 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -81,11 +81,31 @@ def get_datamodel_definition(self, edm_name): return "" return self._reader.getDatamodelDefinition(edm_name).data() - def current_file_version(self): - """Get the podio (build) version that was used to write this file + def current_file_version(self, edm_name=None): + """Get the (build) version that was used to write this file + + If called without argument or None, the podio build version is returned + otherwise the build version of the datamodel + + Args: + edm_name (str, optional): The package name of the datamodel Returns: - podio.version.Version: The build version of podio that was use to - write this file + podio.version.Version: The build version of podio or the build + version of the datamodel (if available) that was used to write + this file + + Raises: + KeyError: If the datamodel does not have a version stored + RuntimeError: If the reader is a legacy reader and a datamodel + version is requested """ - return self._reader.currentFileVersion() + if edm_name is None: + return self._reader.currentFileVersion() + + if self._is_legacy: + raise RuntimeError("Legacy readers do not store any version info") + maybe_version = self._reader.currentFileVersion(edm_name) + if maybe_version.has_value(): + return maybe_version.value() + raise KeyError(f"No version information available for '{edm_name}'") diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py index 1208feace..6be55467d 100644 --- a/python/podio/test_Reader.py +++ b/python/podio/test_Reader.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """Unit tests for podio readers""" +from podio.version import build_version + class ReaderTestCaseMixin: """Common unittests for readers. @@ -70,6 +72,23 @@ def test_frame_iterator_invalid_category(self): i += 1 self.assertEqual(i, 0) + def test_available_datamodels(self): + """Make sure that the datamodel information can be retrieved""" + datamodels = self.reader.datamodel_definitions + self.assertEqual(len(datamodels), 2) + for model in datamodels: + self.assertTrue(model in ("datamodel", "extension_model")) + + self.assertEqual(self.reader.current_file_version("datamodel"), build_version) + + def test_invalid_datamodel_version(self): + """Make sure that the necessary exceptions are raised""" + with self.assertRaises(KeyError): + self.reader.current_file_version("extension_model") + + with self.assertRaises(KeyError): + self.reader.current_file_version("non-existant-model") + class LegacyReaderTestCaseMixin: """Common test cases for the legacy readers python bindings. diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 9254ec155..d3d6c62b6 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -4,7 +4,7 @@ import os import subprocess - +import re from podio_gen.podio_config_reader import PodioConfigReader from podio_gen.generator_utils import DefinitionError @@ -72,6 +72,20 @@ def read_upstream_edm(name_path): ) from err +def parse_version(version_str): + """Parse the version into a tuple of (major, minor, patch) from the passed + version string. + """ + if version_str is None: + return None + + if re.match(r"v?\d+[\.|-]\d+([\.|-]\d+)?$", version_str): + ver = version_str.replace("-", ".").replace("v", "").split(".") + return tuple(int(v) for v in ver) + + raise argparse.ArgumentTypeError(f"{version_str} cannot be parsed as a valid version") + + if __name__ == "__main__": import argparse @@ -147,6 +161,12 @@ def read_upstream_edm(name_path): default=None, action="store", ) + parser.add_argument( + "--datamodel-version", + help="The version string of the generated datamodel", + default=None, + type=parse_version, + ) args = parser.parse_args() @@ -176,6 +196,7 @@ def read_upstream_edm(name_path): verbose=args.verbose, dryrun=args.dryrun, upstream_edm=args.upstream_edm, + datamodel_version=args.datamodel_version, old_description=args.old_description, evolution_file=args.evolution_file, ) diff --git a/python/podio_gen/cpp_generator.py b/python/podio_gen/cpp_generator.py index bfc16e50e..f09898fc4 100644 --- a/python/podio_gen/cpp_generator.py +++ b/python/podio_gen/cpp_generator.py @@ -44,7 +44,7 @@ class IncludeFrom(IntEnum): class CPPClassGenerator(ClassGeneratorBaseMixin): """The c++ class / code generator for podio""" - def __init__( + def __init__( # pylint: disable=too-many-arguments self, yamlfile, install_dir, @@ -55,8 +55,17 @@ def __init__( upstream_edm, old_description, evolution_file, + datamodel_version=None, ): - super().__init__(yamlfile, install_dir, package_name, verbose, dryrun, upstream_edm) + super().__init__( + yamlfile, + install_dir, + package_name, + verbose, + dryrun, + upstream_edm, + datamodel_version=datamodel_version, + ) self.io_handlers = io_handlers # schema evolution specific code @@ -505,12 +514,14 @@ def _write_all_collections_header(self): def _write_edm_def_file(self): """Write the edm definition to a compile time string""" model_encoder = DataModelJSONEncoder() + print(f"{self.datamodel_version=}") data = { "package_name": self.package_name, "edm_definition": model_encoder.encode(self.datamodel), "incfolder": self.incfolder, "schema_version": self.datamodel.schema_version, "datatypes": self.datamodel.datatypes, + "datamodel_version": self.datamodel_version, } def quoted_sv(string): diff --git a/python/podio_gen/generator_base.py b/python/podio_gen/generator_base.py index e772d1c98..36f55cb32 100644 --- a/python/podio_gen/generator_base.py +++ b/python/podio_gen/generator_base.py @@ -107,13 +107,23 @@ class ClassGeneratorBaseMixin: """ - def __init__(self, yamlfile, install_dir, package_name, verbose, dryrun, upstream_edm): + def __init__( + self, + yamlfile, + install_dir, + package_name, + verbose, + dryrun, + upstream_edm, + datamodel_version=None, + ): self.yamlfile = yamlfile self.install_dir = install_dir self.package_name = package_name self.verbose = verbose self.dryrun = dryrun self.upstream_edm = upstream_edm + self.datamodel_version = datamodel_version try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index ce4cefb41..e56410faf 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -41,7 +41,15 @@ class DatamodelRegistryIndex { public: static size_t value() { static const auto relationNames = {{ package_name }}__getRelationNames(); - static auto index = DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel("{{ package_name }}", {{ package_name }}__JSONDefinition, relationNames)); + static auto index = + DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel( + "{{ package_name }}", + {{ package_name }}__JSONDefinition, + relationNames +{% if datamodel_version %} + , podio::version::Version{ {{ datamodel_version | join(", ") }} } +{% endif %} + )); return index.m_value; } private: diff --git a/src/DatamodelRegistry.cc b/src/DatamodelRegistry.cc index 16e387132..ffd89f1a2 100644 --- a/src/DatamodelRegistry.cc +++ b/src/DatamodelRegistry.cc @@ -22,7 +22,7 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d if (it == m_definitions.cend()) { int index = m_definitions.size(); - m_definitions.emplace_back(name, definition); + m_definitions.emplace_back(std::move(name), definition); for (const auto& [typeName, relations, vectorMembers] : relationNames) { m_relations.emplace(typeName, RelationNames{relations, vectorMembers}); @@ -35,6 +35,14 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d return std::distance(m_definitions.cbegin(), it); } +size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames, + podio::version::Version version) { + auto index = registerDatamodel(name, definition, relationNames); + m_datamodelVersions.emplace(std::move(name), version); + return index; +} + const std::string_view DatamodelRegistry::getDatamodelDefinition(std::string_view name) const { const auto it = std::find_if(m_definitions.cbegin(), m_definitions.cend(), [&name](const auto& kvPair) { return kvPair.first == name; }); @@ -84,4 +92,11 @@ RelationNames DatamodelRegistry::getRelationNames(std::string_view typeName) con return {emptyVec, emptyVec}; } +std::optional DatamodelRegistry::getDatamodelVersion(const std::string& name) const { + if (const auto it = m_datamodelVersions.find(name); it != m_datamodelVersions.end()) { + return it->second; + } + return std::nullopt; +} + } // namespace podio diff --git a/src/DatamodelRegistryIOHelpers.cc b/src/DatamodelRegistryIOHelpers.cc index 1e7573a1f..8c0dde068 100644 --- a/src/DatamodelRegistryIOHelpers.cc +++ b/src/DatamodelRegistryIOHelpers.cc @@ -47,4 +47,14 @@ std::vector DatamodelDefinitionHolder::getAvailableDatamodels() con return defs; } +std::optional DatamodelDefinitionHolder::getDatamodelVersion(const std::string& name) const { + const auto it = std::find_if(m_edmVersions.begin(), m_edmVersions.end(), + [&name](const auto& entry) { return std::get<0>(entry) == name; }); + if (it != m_edmVersions.end()) { + return std::get<1>(*it); + } + + return std::nullopt; +} + } // namespace podio diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index d391e578a..6e0afe446 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -84,7 +84,16 @@ void RNTupleReader::openFiles(const std::vector& filenames) { auto edmView = m_metadata->GetView>>(root_utils::edmDefBranchName); auto edm = edmView(0); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm)); + DatamodelDefinitionHolder::VersionList edmVersions{}; + for (const auto& [name, _] : edm) { + try { + auto edmVersionView = m_metadata->GetView>(root_utils::edmVersionBranchName(name)); + auto edmVersion = edmVersionView(0); + edmVersions.emplace_back(name, podio::version::Version{edmVersion[0], edmVersion[1], edmVersion[2]}); + } catch (const ROOT::Experimental::RException&) { + } + } + m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm), std::move(edmVersions)); auto availableCategoriesField = m_metadata->GetView>(root_utils::availableCategories); m_availableCategories = availableCategoriesField(0); diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 0af6b1bd9..73c5db22d 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -263,6 +263,14 @@ void RNTupleWriter::finish() { *versionField = {podioVersion.major, podioVersion.minor, podioVersion.patch}; auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); + for (const auto& [name, _] : edmDefinitions) { + auto edmVersion = DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + auto edmVersionField = metadata->MakeField>(root_utils::edmVersionBranchName(name).c_str()); + *edmVersionField = {edmVersion->major, edmVersion->minor, edmVersion->patch}; + } + } + auto edmField = metadata->MakeField>>(root_utils::edmDefBranchName); *edmField = std::move(edmDefinitions); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 1b4971601..7b0373ba4 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -222,7 +222,7 @@ std::vector getAvailableCategories(TChain* metaChain) { for (int i = 0; i < branches->GetEntries(); ++i) { const std::string name = branches->At(i)->GetName(); - const auto fUnder = name.find("___"); + const auto fUnder = name.find(root_utils::idTableName("")); if (fUnder != std::string::npos) { brNames.emplace_back(name.substr(0, fUnder)); } @@ -262,7 +262,19 @@ void ROOTReader::openFiles(const std::vector& filenames) { auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{}; edmDefBranch->SetAddress(&datamodelDefs); edmDefBranch->GetEntry(0); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs)); + + DatamodelDefinitionHolder::VersionList edmVersions{}; + for (const auto& [name, _] : *datamodelDefs) { + if (auto* edmVersionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::edmVersionBranchName(name))) { + auto* edmVersion = new podio::version::Version{}; + edmVersionBranch->SetAddress(&edmVersion); + edmVersionBranch->GetEntry(0); + edmVersions.emplace_back(name, *edmVersion); + delete edmVersion; + } + } + + m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs), std::move(edmVersions)); delete datamodelDefs; } diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 33f02741e..21e2998db 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -4,6 +4,7 @@ #include "podio/GenericParameters.h" #include "podio/podioVersion.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "rootUtils.h" #include "TTree.h" @@ -185,6 +186,18 @@ void ROOTWriter::finish() { auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); metaTree->Branch(root_utils::edmDefBranchName, &edmDefinitions); + // Collect the (build) versions of the generated datamodels where available + DatamodelDefinitionHolder::VersionList edmVersions; + for (const auto& [name, _] : edmDefinitions) { + auto edmVersion = podio::DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + edmVersions.emplace_back(name, edmVersion.value()); + } + } + for (auto& [name, version] : edmVersions) { + metaTree->Branch(root_utils::edmVersionBranchName(name).c_str(), &version); + } + metaTree->Fill(); m_file->Write(); diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 2609d611f..a54e5670b 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -120,10 +120,13 @@ void SIOReader::readEDMDefinitions() { sio::block_list blocks; blocks.emplace_back(std::make_shared>()); + blocks.emplace_back(std::make_shared>()); + sio::api::read_blocks(buffer.span(), blocks); auto datamodelDefs = static_cast*>(blocks[0].get()); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData)); + auto edmVersions = static_cast*>(blocks[1].get()); + m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData), std::move(edmVersions->mapData)); } } // namespace podio diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index aef6e52e6..ebc7edbab 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -2,6 +2,7 @@ #include "podio/Frame.h" #include "podio/SIOBlock.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "sioUtils.h" #include @@ -59,6 +60,19 @@ void SIOWriter::finish() { sio::block_list blocks; blocks.push_back(edmDefMap); + + DatamodelDefinitionHolder::VersionList edmVersions; + for (const auto& [name, _] : edmDefMap->mapData) { + auto edmVersion = podio::DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + edmVersions.emplace_back(name, edmVersion.value()); + } + } + + auto edmVersionMap = + std::make_shared>(std::move(edmVersions)); + blocks.push_back(edmVersionMap); + m_tocRecord.addRecord(sio_helpers::SIOEDMDefinitionName, sio_utils::writeRecord(blocks, "EDMDefinitions", m_stream)); blocks.clear(); diff --git a/src/rootUtils.h b/src/rootUtils.h index a473c6f0c..03d6a386e 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -140,6 +140,12 @@ constexpr static auto versionBranchName = "PodioBuildVersion"; */ constexpr static auto edmDefBranchName = "EDMDefinitions"; +/// The name of the branch used for storing the version of a generated datamodel +/// (if available) +inline std::string edmVersionBranchName(const std::string& edmname) { + return edmname + "___Version"; +} + /** * Name of the branch for storing the idTable for a given category in the meta * data tree diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fb5739805..1c0345447 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -14,7 +14,7 @@ set(extra_code extra_code/component_declarations.cc ) PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources - IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} DEPENDS ${extra_code} + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} DEPENDS ${extra_code} VERSION ${${PROJECT_NAME}_VERSION} ) # Use the cmake building blocks to add the different parts (conditionally) diff --git a/tests/dumpmodel/CMakeLists.txt b/tests/dumpmodel/CMakeLists.txt index 6e9dfef1e..4ae61a66a 100644 --- a/tests/dumpmodel/CMakeLists.txt +++ b/tests/dumpmodel/CMakeLists.txt @@ -7,6 +7,7 @@ add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${PROJECT_SOURCE_DIR}/t ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root) @@ -34,6 +35,7 @@ if (ENABLE_SIO) ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio) # The extension model needs to know about the upstream model for generation @@ -64,6 +66,7 @@ if (ENABLE_RNTUPLE) ${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_rntuple) diff --git a/tests/read_frame.h b/tests/read_frame.h index bde5e2d28..6fc835c71 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -114,6 +114,32 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 1; } + const auto datamodelVersion = reader.currentFileVersion("datamodel").value_or(podio::version::Version{}); + if (assertBuildVersion && datamodelVersion != podio::version::build_version) { + std::cerr << "The (build) version of the datamodel could not be read back correctly. " + << "(expected: " << podio::version::build_version << ", actual: " << datamodelVersion << ")" << std::endl; + return 1; + } + + const auto extensionModelVersion = reader.currentFileVersion("extension_model"); + if (extensionModelVersion) { + std::cerr << "The (build) version of the extension model was available althought it shouldn't be. Its value is " + << extensionModelVersion.value() << std::endl; + } + + const auto availableCategories = reader.getAvailableCategories(); + if (availableCategories.size() != 2) { + std::cerr << "More categories than expected!" << std::endl; + return 1; + } + if (std::find(availableCategories.begin(), availableCategories.end(), "events") == availableCategories.end() || + std::find(availableCategories.begin(), availableCategories.end(), "other_events") == availableCategories.end()) { + std::cerr << "Could not read back the available categories as expected! (expected: ['events', 'other_events']), " + "actual: ['" + << availableCategories[0] << "', '" << availableCategories[1] << "']" << std::endl; + return 1; + } + if (reader.getEntries(podio::Category::Event) != 10) { std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; diff --git a/tools/podio-dump b/tools/podio-dump index 8379cf102..f85387711 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -19,6 +19,7 @@ def print_general_info(reader, filename): Args: reader (root_io.Reader, sio_io.Reader): An initialized reader + filename (str): The name of the input file """ legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" print( @@ -26,10 +27,14 @@ def print_general_info(reader, filename): " (written with podio version: " f"{version_as_str(reader.current_file_version())})\n" ) - print( - "datamodel model definitions stored in this file: " - f'{", ".join(reader.datamodel_definitions)}' - ) + + print("datamodel model definitions stored in this file: ") + for edm_name in reader.datamodel_definitions: + try: + edm_version = reader.current_file_version(edm_name) + print(f" - {edm_name} ({version_as_str(edm_version)})") + except KeyError: + print(f" - {edm_name}") print() print("Frame categories in this file:")