From b00fd75d38daf962b0d3628381d068d4f43ae921 Mon Sep 17 00:00:00 2001 From: hegner Date: Wed, 13 Sep 2023 21:58:43 +0200 Subject: [PATCH] Schema evolution based on ROOT and Reflex dictionaries (#472) * Add SchemaEvolution singleton to hold evolution functions * Inject type information into collection buffers * Inject current schema version into buffers from buffer factory * Require registration of each evolution function * Create schema_evolution test subdirectory and build old datamodel * creating components and datatypes for explicit schema evolution * add code generation for reflex schema evolution * Rearrange schema evolution tests to not interfere with others * Move function implementations into .cc files for Components Co-authored-by: Thomas Madlener --- .github/scripts/pylint.rc | 2 +- python/podio_class_generator.py | 126 ++++++++++++++++-- python/podio_schema_evolution.py | 13 +- python/templates/Collection.cc.jinja2 | 47 ++++++- python/templates/CollectionData.h.jinja2 | 5 + python/templates/macros/collections.jinja2 | 14 +- python/templates/selection.xml.jinja2 | 16 ++- src/SchemaEvolution.cc | 24 ++-- tests/CMakeLists.txt | 5 +- tests/CTestCustom.cmake | 3 + tests/datalayout.yaml | 5 + tests/schema_evolution.yaml | 13 -- tests/schema_evolution/CMakeLists.txt | 36 +++++ tests/schema_evolution/README.md | 20 +++ .../datalayout_new.yaml} | 21 ++- tests/schema_evolution/read_new_data.h | 81 +++++++++++ tests/schema_evolution/root_io/CMakeLists.txt | 4 + .../root_io/read_new_data_root.cpp | 7 + .../root_io/write_old_data_root.cpp | 7 + tests/schema_evolution/schema_evolution.yaml | 13 ++ tests/schema_evolution/write_old_data.h | 76 +++++++++++ 21 files changed, 479 insertions(+), 59 deletions(-) delete mode 100644 tests/schema_evolution.yaml create mode 100644 tests/schema_evolution/CMakeLists.txt create mode 100644 tests/schema_evolution/README.md rename tests/{datalayout_old.yaml => schema_evolution/datalayout_new.yaml} (95%) create mode 100644 tests/schema_evolution/read_new_data.h create mode 100644 tests/schema_evolution/root_io/CMakeLists.txt create mode 100644 tests/schema_evolution/root_io/read_new_data_root.cpp create mode 100644 tests/schema_evolution/root_io/write_old_data_root.cpp create mode 100644 tests/schema_evolution/schema_evolution.yaml create mode 100644 tests/schema_evolution/write_old_data.h diff --git a/.github/scripts/pylint.rc b/.github/scripts/pylint.rc index 2db65ccd7..4d1492c68 100644 --- a/.github/scripts/pylint.rc +++ b/.github/scripts/pylint.rc @@ -285,7 +285,7 @@ max-statements=50 max-parents=7 # Maximum number of attributes for a class (see R0902). -max-attributes=25 +max-attributes=30 # Minimum number of public methods for a class (see R0903). min-public-methods=0 diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index c19375d5d..59918ece9 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """Podio class generator script""" +import copy import os import sys import subprocess @@ -14,6 +15,7 @@ import jinja2 from podio_schema_evolution import DataModelComparator # dealing with cyclic imports +from podio_schema_evolution import RenamedMember, root_filter, RootIoRule from podio.podio_config_reader import PodioConfigReader from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder @@ -89,9 +91,16 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr # schema evolution specific code self.old_yamlfile = old_description self.evolution_file = evolution_file + self.old_schema_version = None + self.old_schema_version_int = None self.old_datamodel = None self.old_datamodels_components = set() self.old_datamodels_datatypes = set() + self.root_schema_dict = {} # containing the root relevant schema evolution per datatype + # information to update the selection.xml + self.root_schema_component_names = set() + self.root_schema_datatype_names = set() + self.root_schema_iorules = set() try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) @@ -115,19 +124,20 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr def process(self): """Run the actual generation""" + self.process_schema_evolution() + for name, component in self.datamodel.components.items(): self._process_component(name, component) - for name, datatype in self.datamodel.datatypes.items(): self._process_datatype(name, datatype) self._write_edm_def_file() if 'ROOT' in self.io_handlers: + self.prepare_iorules() self._create_selection_xml() self._write_cmake_lists_file() - self.process_schema_evolution() self.print_report() @@ -141,7 +151,8 @@ def process_schema_evolution(self): evolution_file=self.evolution_file) comparator.read() comparator.compare() - + self.old_schema_version = f"v{comparator.datamodel_old.schema_version}" + self.old_schema_version_int = comparator.datamodel_old.schema_version # some sanity checks if len(comparator.errors) > 0: print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \ @@ -156,6 +167,12 @@ def process_schema_evolution(self): print(warning) sys.exit(-1) + # now go through all the io_handlers and see what we have to do + if 'ROOT' in self.io_handlers: + for item in root_filter(comparator.schema_changes): + # add whatever is relevant to our ROOT schema evolution + self.root_schema_dict.setdefault(item.klassname, []).append(item) + def print_report(self): """Print a summary report about the generated code""" if not self.verbose: @@ -170,8 +187,15 @@ def print_report(self): print(summaryline) print() - def _eval_template(self, template, data): + def _eval_template(self, template, data, old_schema_data=None): """Fill the specified template""" + # merge the info of data and the old schema into a single dict + if old_schema_data: + data['OneToOneRelations_old'] = old_schema_data['OneToOneRelations'] + data['OneToManyRelations_old'] = old_schema_data['OneToManyRelations'] + data['VectorMembers_old'] = old_schema_data['VectorMembers'] + data['old_schema_version'] = self.old_schema_version_int + return self.env.get_template(template).render(data) def _write_file(self, name, content): @@ -220,7 +244,7 @@ def get_fn_format(tmpl): return fn_templates - def _fill_templates(self, template_base, data): + def _fill_templates(self, template_base, data, old_schema_data=None): """Fill the template and write the results to file""" # Update the passed data with some global things that are the same for all # files @@ -229,7 +253,7 @@ def _fill_templates(self, template_base, data): data['incfolder'] = self.incfolder for filename, template in self._get_filenames_templates(template_base, data['class'].bare_type): - self._write_file(filename, self._eval_template(template, data)) + self._write_file(filename, self._eval_template(template, data, old_schema_data)) def _process_component(self, name, component): """Process one component""" @@ -247,12 +271,71 @@ def _process_component(self, name, component): component['includes'] = self._sort_includes(includes) component['class'] = DataType(name) - self._fill_templates('Component', component) + # Add potentially older schema for schema evolution + # based on ROOT capabilities for now + if name in self.root_schema_dict: + schema_evolutions = self.root_schema_dict[name] + component = copy.deepcopy(component) + for schema_evolution in schema_evolutions: + if isinstance(schema_evolution, RenamedMember): + for member in component['Members']: + if member.name == schema_evolution.member_name_new: + member.name = schema_evolution.member_name_old + component['class'] = DataType(name + self.old_schema_version) + else: + raise NotImplementedError + self._fill_templates('Component', component) + self.root_schema_component_names.add(name + self.old_schema_version) + + @staticmethod + def _replace_component_in_paths(oldname, newname, paths): + """Replace component name by another one in existing paths""" + # strip the namespace + shortoldname = oldname.split("::")[-1] + shortnewname = newname.split("::")[-1] + # and do the replace in place + for index, thePath in enumerate(paths): + if shortoldname in thePath: + newPath = thePath.replace(shortoldname, shortnewname) + paths[index] = newPath + def _process_datatype(self, name, definition): """Process one datatype""" datatype = self._preprocess_datatype(name, definition) + + # ROOT schema evolution preparation + # Compute and prepare the potential schema evolution parts + schema_evolution_datatype = copy.deepcopy(datatype) + needs_schema_evolution = False + for member in schema_evolution_datatype['Members']: + if member.is_array: + if member.array_type in self.root_schema_dict: + needs_schema_evolution = True + self._replace_component_in_paths(member.array_type, member.array_type + self.old_schema_version, + schema_evolution_datatype['includes_data']) + member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version) + member.array_type = member.array_type + self.old_schema_version + + else: + if member.full_type in self.root_schema_dict: + needs_schema_evolution = True + # prepare the ROOT I/O rule + self._replace_component_in_paths(member.full_type, member.full_type + self.old_schema_version, + schema_evolution_datatype['includes_data']) + member.full_type = member.full_type + self.old_schema_version + member.bare_type = member.bare_type + self.old_schema_version + + if needs_schema_evolution: + print(f" Preparing explicit schema evolution for {name}") + schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa + self._fill_templates('Data', schema_evolution_datatype) + self.root_schema_datatype_names.add(name + self.old_schema_version) + self._fill_templates('Collection', datatype, schema_evolution_datatype) + else: + self._fill_templates('Collection', datatype) + self._fill_templates('Data', datatype) self._fill_templates('Object', datatype) self._fill_templates('MutableObject', datatype) @@ -263,6 +346,28 @@ def _process_datatype(self, name, definition): if 'SIO' in self.io_handlers: self._fill_templates('SIOBlock', datatype) + def prepare_iorules(self): + """Prepare the IORules to be put in the Reflex dictionary""" + for type_name, schema_changes in self.root_schema_dict.items(): + for schema_change in schema_changes: + if isinstance(schema_change, RenamedMember): + # find out the type of the renamed member + component = self.datamodel.components[type_name] + for member in component["Members"]: + if member.name == schema_change.member_name_new: + member_type = member.full_type + + iorule = RootIoRule() + iorule.sourceClass = type_name + iorule.targetClass = type_name + iorule.version = self.old_schema_version.lstrip("v") + iorule.source = f'{member_type} {schema_change.member_name_old}' + iorule.target = schema_change.member_name_new + iorule.code = f'{iorule.target} = onfile.{schema_change.member_name_old};' + self.root_schema_iorules.add(iorule) + else: + raise NotImplementedError("Schema evolution for this type not yet implemented") + def _preprocess_for_obj(self, datatype): """Do the preprocessing that is necessary for the Obj classes""" fwd_declarations = defaultdict(list) @@ -483,10 +588,13 @@ def _needs_include(self, classname) -> IncludeFrom: def _create_selection_xml(self): """Create the selection xml that is necessary for ROOT I/O""" - data = {'components': [DataType(c) for c in self.datamodel.components], + data = {'version': self.datamodel.schema_version, + 'components': [DataType(c) for c in self.datamodel.components], 'datatypes': [DataType(d) for d in self.datamodel.datatypes], 'old_schema_components': [DataType(d) for d in - self.old_datamodels_datatypes | self.old_datamodels_components]} + self.root_schema_datatype_names | self.root_schema_component_names], # noqa + 'iorules': self.root_schema_iorules} + self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) def _build_include(self, member): diff --git a/python/podio_schema_evolution.py b/python/podio_schema_evolution.py index 7bd5b676b..e2b7a916f 100755 --- a/python/podio_schema_evolution.py +++ b/python/podio_schema_evolution.py @@ -107,6 +107,17 @@ def __init__(self, name, member_name_old, member_name_new): super().__init__(f"'{self.name}': member '{self.member_name_old}' renamed to '{self.member_name_new}'.") +class RootIoRule: + """A placeholder IORule class""" + def __init__(self): + self.sourceClass = None + self.targetClass = None + self.version = None + self.source = None + self.target = None + self.code = None + + def sio_filter(schema_changes): """ Checks what is required/supported for the SIO backend @@ -225,7 +236,7 @@ def heuristics_members(self, added_members, dropped_members, schema_changes): """make analysis of member changes in a given data type """ for dropped_member in dropped_members: added_members_in_definition = [member for member in added_members if - dropped_member.definition_name == member.definition_name] + dropped_member.definition_name == member.definition_name] for added_member in added_members_in_definition: if added_member.member.full_type == dropped_member.member.full_type: # this is a rename candidate. So let's see whether it has been explicitly declared by the user diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 665e2188d..f8b7a17e8 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -9,6 +9,10 @@ #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" +{% if old_schema_version is defined %} +#include "{{ incfolder }}{{ class.bare_type }}v{{ old_schema_version }}Data.h" +{% endif %} + {% for include in includes_coll_cc %} {{ include }} {% endfor %} @@ -173,7 +177,18 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { return {{ package_name }}::meta::schemaVersion; } - {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }} +// anonymous namespace for registration with the CollectionBufferFactory. This +// ensures that we don't have to make up arbitrary namespace names here, since +// none of this is publicly visible +namespace { + {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }} + +{# +// SCHEMA EVOLUTION: Not yet required with only ROOT backend +// {% if old_schema_version is defined %} +// {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }} +// {% endif %} +#} // The usual trick with an IIFE and a static variable inside a funtion and then // making sure to call that function during shared library loading @@ -182,6 +197,36 @@ bool registerCollection() { auto& factory = podio::CollectionBufferFactory::mutInstance(); factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + // Make the SchemaEvolution aware of the current version by + // registering a no-op function for this and all preceeding versions + // will be overriden whenever an explicit action is required + for (unsigned int schemaVersion=1; schemaVersion< {{ package_name }}::meta::schemaVersion+1; ++schemaVersion) { + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + "{{ class.full_type }}Collection", + schemaVersion, + {{ package_name }}::meta::schemaVersion, + podio::SchemaEvolution::noOpSchemaEvolution, + podio::SchemaEvolution::Priority::AutoGenerated + ); + } + +{% if old_schema_version is defined %} + // register a buffer creation function for the schema evolution buffer + // SCHEMA EVOLUTION: Not yet required with only ROOT backend + // factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); //TODO + + //Make the SchemaEvolution aware of any other non-trivial conversion + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + "{{ class.full_type }}Collection", + {{ old_schema_version }}, + {{ package_name }}::meta::schemaVersion, + podio::SchemaEvolution::noOpSchemaEvolution, + podio::SchemaEvolution::Priority::AutoGenerated + ); + + +{% endif %} + return true; }(); return reg; diff --git a/python/templates/CollectionData.h.jinja2 b/python/templates/CollectionData.h.jinja2 index 0eee7dabe..039fd1f4a 100644 --- a/python/templates/CollectionData.h.jinja2 +++ b/python/templates/CollectionData.h.jinja2 @@ -11,6 +11,11 @@ {{ include }} {% endfor %} +// schema evolution specific includes +{% if schema_evolution_data is defined %} +#include "{{ incfolder }}{{ schema_evolution_data }}Data" +{% endif %} + // podio specific includes #include "podio/CollectionBuffers.h" #include "podio/ICollectionProvider.h" diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index 3224b6fc0..ae3d5687d 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -155,16 +155,20 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const {% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %} -// anonymous namespace for registration with the CollectionBufferFactory. This -// ensures that we don't have to make up arbitrary namespace names here, since -// none of this is publicly visible -namespace { +{% if schemaVersion == -1 %} podio::CollectionReadBuffers createBuffers(bool isSubset) { +{% else %} +podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) { +{% endif %} auto readBuffers = podio::CollectionReadBuffers{}; readBuffers.type = "{{ class.full_type }}Collection"; +{% if schemaVersion == -1 %} readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; - +{% else %} + readBuffers.schemaVersion = {{ schemaVersion }}; + readBuffers.data = isSubset ? nullptr : new std::vector<{{ class.bare_type }}v{{ schemaVersion }}Data>; +{% endif %} // The number of ObjectID vectors is either 1 or the sum of OneToMany and // OneToOne relations const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; diff --git a/python/templates/selection.xml.jinja2 b/python/templates/selection.xml.jinja2 index b0da9ff74..61d9a7a9c 100644 --- a/python/templates/selection.xml.jinja2 +++ b/python/templates/selection.xml.jinja2 @@ -1,11 +1,19 @@ {% macro class_selection(class, prefix='', postfix='') %} {%- if class.namespace %} - + {%- else %} - + {%- endif %} {% endmacro %} +{% macro ioread(iorule) %} + + + +{% endmacro %} + @@ -31,4 +39,8 @@ {% endfor %} + +{% for iorule in iorules %} +{{ ioread(iorule) }} +{% endfor %} diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 60b53264e..4576cde1e 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -24,15 +24,15 @@ podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(const podio::Collect } const auto& typeEvolFuncs = m_evolutionFuncs[mapIndex]; - if (fromVersion < typeEvolFuncs.size() - 1) { + if (fromVersion < typeEvolFuncs.size()) { // Do we need this check? In principle we could ensure at registration // time that this is always guaranteed return typeEvolFuncs[fromVersion - 1](oldBuffers, fromVersion); } } - std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << std::endl; - // TODO: exception + std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType + << " from version " << fromVersion << std::endl; // TODO: exception return oldBuffers; } @@ -53,20 +53,18 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV m_evolutionFuncs.emplace_back(); } - // From here on out we don't need the mutabale any longer + // From here on out we don't need the mutable any longer const auto& [_, mapIndex] = typeIt->second; auto& versionMap = m_evolutionFuncs[mapIndex]; const auto prevSize = versionMap.size(); - if (prevSize < fromVersion) { - versionMap.resize(fromVersion); - versionMap[fromVersion - 1] = evolutionFunc; - } else { - if (priority == Priority::UserDefined) { - versionMap[fromVersion - 1] = evolutionFunc; - } else { - std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; - } + if (prevSize < currentVersion) { + versionMap.resize(currentVersion); + } + + versionMap[fromVersion - 1] = evolutionFunc; + if (priority != Priority::UserDefined) { + // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bfd203fb4..50d2846ab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,9 +10,7 @@ SET(podio_PYTHON_DIR ${PROJECT_SOURCE_DIR}/python CACHE PATH "Path to the podio PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} - OLD_DESCRIPTION datalayout_old.yaml - SCHEMA_EVOLUTION schema_evolution.yaml - ) + ) # Use the cmake building blocks to add the different parts (conditionally) PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel "${headers}" "${sources}") @@ -60,6 +58,7 @@ if (ENABLE_SIO) endif() add_subdirectory(unittests) add_subdirectory(dumpmodel) +add_subdirectory(schema_evolution) # Tests that don't fit into one of the broad categories above CREATE_PODIO_TEST(ostream_operator.cpp "") diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 59d9efa81..f386ec07b 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -64,6 +64,9 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ datamodel_def_store_roundtrip_root_extension datamodel_def_store_roundtrip_sio datamodel_def_store_roundtrip_sio_extension + + write_old_data_root + read_new_data_root ) foreach(version in @legacy_test_versions@) diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 06f8e3075..de3ae703c 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -9,6 +9,11 @@ options : includeSubfolder: True components : + ToBeDroppedStruct: + Description: "A struct to be dropped during schema evolution" + Members: + - int x + SimpleStruct: Members: - int x [mm] diff --git a/tests/schema_evolution.yaml b/tests/schema_evolution.yaml deleted file mode 100644 index 561f36fbe..000000000 --- a/tests/schema_evolution.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -from_schema_version : 1 -to_schema_version : 2 - -evolutions: - - ex2::NamespaceStruct: - member_rename: - - y_old - - y - - EventInfoOldName: - class_renamed_to: EventInfo diff --git a/tests/schema_evolution/CMakeLists.txt b/tests/schema_evolution/CMakeLists.txt new file mode 100644 index 000000000..a86963d82 --- /dev/null +++ b/tests/schema_evolution/CMakeLists.txt @@ -0,0 +1,36 @@ +PODIO_GENERATE_DATAMODEL(datamodel datalayout_new.yaml headers sources + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new + OLD_DESCRIPTION ${PROJECT_SOURCE_DIR}/tests/datalayout.yaml + SCHEMA_EVOLUTION schema_evolution.yaml +) + +PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel_v3 "${headers}" "${sources}" + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new +) + +PODIO_ADD_ROOT_IO_DICT(TestDataModel_v3Dict TestDataModel_v3 "${headers}" ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new/src/selection.xml + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new +) + +PODIO_ADD_SIO_IO_BLOCKS(TestDataModel_v3 "${headers}" "${sources}") + +#--- Helper function to create a test in an environment that is suitable to +#--- read data in a new schema version +function(PODIO_CREATE_READ_NEW_DATA_TEST sourcefile additional_libs) + string( REPLACE ".cpp" "" name ${sourcefile} ) + add_executable( ${name} ${sourcefile} ) + add_test(NAME ${name} COMMAND ${name}) + + target_link_libraries(${name} PRIVATE TestDataModel_v3 ${additional_libs}) + target_include_directories(${name} PRIVATE ${PROJECT_SOURCE_DIR}/tests/schema_evolution) + + set_property(TEST ${name} + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/src:${PROJECT_BINARY_DIR}/tests/schema_evolution:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + PODIO_SIO_BLOCK=${CMAKE_CURRENT_BINARY_DIR} + ROOT_INCLUDE_PATH=${PROJECT_BINARY_DIR}/tests/schema_evolution/datamodel_new:${PROJECT_SOURCE_DIR}/include + ) +endfunction() + +add_subdirectory(root_io) diff --git a/tests/schema_evolution/README.md b/tests/schema_evolution/README.md new file mode 100644 index 000000000..1c83fc3f6 --- /dev/null +++ b/tests/schema_evolution/README.md @@ -0,0 +1,20 @@ +# Schema Evolution tests +This folder contains tests for the schema evolution functionality in podio. The +functionality is tested by first writing data with an old schema version and +then reading in with the current schema version. +[`datalayout_new.yaml`](./datalayout_new.yaml) holds the definition of the new +version, using schema version 3, while the +[`datalayout.yaml`](../datalayout.yaml) that is also used for the other I/O +tests is used as the old version (schema version 2). + +## Differences between the two schema versions +Since it is not immediately visible from the test code this list contains the +differences between the two schema versions, and also how this evolution is +tested (if it is supported) + +| component / datatype | difference from v2 to v3 | purpose of test | tested with | +|--|--|--|--| +| `SimpleStruct` | no `int t` member in v2 | Addition of new members in components | As member of `ExampleWithArrayComponent` | +| `ExampleHit` | no `double t` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | +| `ex2::NamespaceStruct` | renaming of `y` to `y_new` | Renaming of member variables | As member of `ex42::ExampleWithNamespace` | +| `ex42::ExampleWithARelation` | type of `number` member | migration of `float` to `double` | Direcetly via `ex42::ExampleWithARelation` | diff --git a/tests/datalayout_old.yaml b/tests/schema_evolution/datalayout_new.yaml similarity index 95% rename from tests/datalayout_old.yaml rename to tests/schema_evolution/datalayout_new.yaml index eea733ffa..e9938fd1c 100755 --- a/tests/datalayout_old.yaml +++ b/tests/schema_evolution/datalayout_new.yaml @@ -1,5 +1,5 @@ --- -schema_version : 1 +schema_version : 3 options : # should getters / setters be prefixed with get / set? @@ -10,20 +10,18 @@ options : components : - ToBeDroppedStruct: - Members: - - int x - SimpleStruct: Members: - int x + - int y - int z + - int t - std::array p # can also add c'tors: ExtraCode : declaration: " - SimpleStruct() : x(0),y(0),z(0) {}\n - SimpleStruct( const int* v) : x(v[0]),y(v[1]),z(v[2]) {} + SimpleStruct() : x(0),z(0) {}\n + SimpleStruct( const int* v) : x(v[0]),z(v[2]) {} " NotSoSimpleStruct: @@ -33,7 +31,7 @@ components : ex2::NamespaceStruct: Members: - int x - - int y_old + - int y_new ex2::NamespaceInNamespaceStruct: Members: @@ -52,7 +50,7 @@ components : datatypes : - EventInfoOldName: + EventInfoNewName: Description : "Event info" Author : "B. Hegner" Members : @@ -71,7 +69,8 @@ datatypes : - double x // x-coordinate - double y // y-coordinate - double z // z-coordinate - - double energy // measured energy deposit + - double energy // energy + - double t // time ExampleMC : Description : "Example MC-particle" @@ -151,7 +150,7 @@ datatypes : Description : "Type with namespace and namespaced relation" Author : "Joschka Lingemann" Members: - - float number // just a number + - double number // just a number OneToOneRelations : - ex42::ExampleWithNamespace ref // a ref in a namespace OneToManyRelations : diff --git a/tests/schema_evolution/read_new_data.h b/tests/schema_evolution/read_new_data.h new file mode 100644 index 000000000..cdc3abf46 --- /dev/null +++ b/tests/schema_evolution/read_new_data.h @@ -0,0 +1,81 @@ +#ifndef PODIO_TESTS_SCHEMAEVOLUTION_READNEWDATA_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_SCHEMAEVOLUTION_READNEWDATA_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithARelationCollection.h" +#include "datamodel/ExampleWithArrayComponentCollection.h" +#include "datamodel/ExampleWithNamespaceCollection.h" + +#include "podio/Frame.h" + +#include +#include + +#define ASSERT_EQUAL(actual, expected, msg) \ + if ((expected) != (actual)) { \ + std::cerr << __PRETTY_FUNCTION__ << ": " << msg << " (expected: " << expected << ", actual: " << actual << ")"; \ + return 1; \ + } + +int readSimpleStruct(const podio::Frame& event) { + const auto& coll = event.get("simpleStructTest"); + auto elem = coll[0]; + const auto sstruct = elem.s(); + + ASSERT_EQUAL(sstruct.y, 0, "New component member not 0 initialized"); + ASSERT_EQUAL(sstruct.x, 42, "Existing component member changed"); + ASSERT_EQUAL(sstruct.z, 123, "Existing component member changed"); + + return 0; +} + +int readExampleHit(const podio::Frame& event) { + const auto& coll = event.get("datatypeMemberAdditionTest"); + auto elem = coll[0]; + + ASSERT_EQUAL(elem.t(), 0, "New datatype member variable not 0 initialized"); + ASSERT_EQUAL(elem.x(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.y(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.z(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.energy(), 0, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.cellID(), 0xcaffee, "Member variables unrelated to schema evolution have changed"); + + return 0; +} + +int readExampleWithNamespace(const podio::Frame& event) { + const auto& coll = event.get("componentMemberRenameTest"); + auto elem = coll[0]; + + ASSERT_EQUAL(elem.y_new(), 42, "Renamed component member variable does not have the expected value"); + ASSERT_EQUAL(elem.x(), 123, "Member variables unrelated to schema evolution have changed"); + + return 0; +} + +int readExampleWithARelation(const podio::Frame& event) { + const auto& coll = event.get("floatToDoubleMemberTest"); + auto elem = coll[0]; + + ASSERT_EQUAL(elem.number(), (double)3.14f, "Conversion from float to double member does not work as expected"); + + return 0; +} + +template +int read_new_data(const std::string& filename) { + ReaderT reader{}; + reader.openFile(filename); + + const auto event = podio::Frame(reader.readEntry("events", 0)); + + int result = 0; + result += readSimpleStruct(event); + result += readExampleHit(event); + result += readExampleWithNamespace(event); + result += readExampleWithARelation(event); + + return result; +} + +#endif // PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H diff --git a/tests/schema_evolution/root_io/CMakeLists.txt b/tests/schema_evolution/root_io/CMakeLists.txt new file mode 100644 index 000000000..53e8b451f --- /dev/null +++ b/tests/schema_evolution/root_io/CMakeLists.txt @@ -0,0 +1,4 @@ +CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioRootIO") +PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioRootIO") + +set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root) diff --git a/tests/schema_evolution/root_io/read_new_data_root.cpp b/tests/schema_evolution/root_io/read_new_data_root.cpp new file mode 100644 index 000000000..77f5736a4 --- /dev/null +++ b/tests/schema_evolution/root_io/read_new_data_root.cpp @@ -0,0 +1,7 @@ +#include "read_new_data.h" + +#include "podio/ROOTFrameReader.h" + +int main() { + return read_new_data("example_data_old_schema.root"); +} diff --git a/tests/schema_evolution/root_io/write_old_data_root.cpp b/tests/schema_evolution/root_io/write_old_data_root.cpp new file mode 100644 index 000000000..a3456229f --- /dev/null +++ b/tests/schema_evolution/root_io/write_old_data_root.cpp @@ -0,0 +1,7 @@ +#include "schema_evolution/write_old_data.h" + +#include "podio/ROOTFrameWriter.h" + +int main() { + return writeData("example_data_old_schema.root"); +} diff --git a/tests/schema_evolution/schema_evolution.yaml b/tests/schema_evolution/schema_evolution.yaml new file mode 100644 index 000000000..82b6e5e93 --- /dev/null +++ b/tests/schema_evolution/schema_evolution.yaml @@ -0,0 +1,13 @@ +--- +from_schema_version : 2 +to_schema_version : 3 + +evolutions: + + ex2::NamespaceStruct: + member_rename: + - y + - y_new + + EventInfo: + class_renamed_to: EventInfoNewName diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h new file mode 100644 index 000000000..7676ae34b --- /dev/null +++ b/tests/schema_evolution/write_old_data.h @@ -0,0 +1,76 @@ +#ifndef PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithARelationCollection.h" +#include "datamodel/ExampleWithArrayComponentCollection.h" +#include "datamodel/ExampleWithNamespaceCollection.h" + +#include "podio/Frame.h" + +#include + +/// This is a datatype that holds a SimpleStruct component +auto writeSimpleStruct() { + ExampleWithArrayComponentCollection coll; + auto elem = coll.create(); + auto sstruct = SimpleStruct(); + sstruct.x = 42; + sstruct.z = 123; + elem.s(sstruct); + + return coll; +} + +auto writeExampleHit() { + ExampleHitCollection coll; + auto elem = coll.create(); + elem.x(1.23); + elem.y(1.23); + elem.z(1.23); + elem.cellID(0xcaffee); + + return coll; +} + +auto writeExampleWithNamespace() { + ex42::ExampleWithNamespaceCollection coll; + auto elem = coll.create(); + elem.y(42); + elem.x(123); + + return coll; +} + +auto writeExamplewWithARelation() { + ex42::ExampleWithARelationCollection coll; + auto elem = coll.create(); + elem.number(3.14f); + + return coll; +} + +podio::Frame createFrame() { + podio::Frame event; + + event.put(writeSimpleStruct(), "simpleStructTest"); + event.put(writeExampleHit(), "datatypeMemberAdditionTest"); + event.put(writeExampleWithNamespace(), "componentMemberRenameTest"); + event.put(writeExamplewWithARelation(), "floatToDoubleMemberTest"); + + return event; +} + +template +int writeData(const std::string& filename) { + WriterT writer(filename); + + auto event = createFrame(); + writer.writeFrame(event, "events"); + + writer.finish(); + + return 0; +} + +#endif // PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H