Skip to content

Commit

Permalink
Schema evolution based on ROOT and Reflex dictionaries (#472)
Browse files Browse the repository at this point in the history
* Add SchemaEvolution singleton to hold evolution functions

* Inject type information into collection buffers

* Inject current schema version into buffers from buffer factory

* Require registration of each evolution function

* Create schema_evolution test subdirectory and build old datamodel

* creating components and datatypes for explicit schema evolution

* add code generation for reflex schema evolution

* Rearrange schema evolution tests to not interfere with others

* Move function implementations into .cc files for Components

Co-authored-by: Thomas Madlener <[email protected]>
  • Loading branch information
hegner and tmadlener authored Sep 13, 2023
1 parent 06eae8e commit b00fd75
Show file tree
Hide file tree
Showing 21 changed files with 479 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/pylint.rc
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ max-statements=50
max-parents=7

# Maximum number of attributes for a class (see R0902).
max-attributes=25
max-attributes=30

# Minimum number of public methods for a class (see R0903).
min-public-methods=0
Expand Down
126 changes: 117 additions & 9 deletions python/podio_class_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
"""Podio class generator script"""

import copy
import os
import sys
import subprocess
Expand All @@ -14,6 +15,7 @@
import jinja2

from podio_schema_evolution import DataModelComparator # dealing with cyclic imports
from podio_schema_evolution import RenamedMember, root_filter, RootIoRule
from podio.podio_config_reader import PodioConfigReader
from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder

Expand Down Expand Up @@ -89,9 +91,16 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr
# schema evolution specific code
self.old_yamlfile = old_description
self.evolution_file = evolution_file
self.old_schema_version = None
self.old_schema_version_int = None
self.old_datamodel = None
self.old_datamodels_components = set()
self.old_datamodels_datatypes = set()
self.root_schema_dict = {} # containing the root relevant schema evolution per datatype
# information to update the selection.xml
self.root_schema_component_names = set()
self.root_schema_datatype_names = set()
self.root_schema_iorules = set()

try:
self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm)
Expand All @@ -115,19 +124,20 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr

def process(self):
"""Run the actual generation"""
self.process_schema_evolution()

for name, component in self.datamodel.components.items():
self._process_component(name, component)

for name, datatype in self.datamodel.datatypes.items():
self._process_datatype(name, datatype)

self._write_edm_def_file()

if 'ROOT' in self.io_handlers:
self.prepare_iorules()
self._create_selection_xml()

self._write_cmake_lists_file()
self.process_schema_evolution()

self.print_report()

Expand All @@ -141,7 +151,8 @@ def process_schema_evolution(self):
evolution_file=self.evolution_file)
comparator.read()
comparator.compare()

self.old_schema_version = f"v{comparator.datamodel_old.schema_version}"
self.old_schema_version_int = comparator.datamodel_old.schema_version
# some sanity checks
if len(comparator.errors) > 0:
print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \
Expand All @@ -156,6 +167,12 @@ def process_schema_evolution(self):
print(warning)
sys.exit(-1)

# now go through all the io_handlers and see what we have to do
if 'ROOT' in self.io_handlers:
for item in root_filter(comparator.schema_changes):
# add whatever is relevant to our ROOT schema evolution
self.root_schema_dict.setdefault(item.klassname, []).append(item)

def print_report(self):
"""Print a summary report about the generated code"""
if not self.verbose:
Expand All @@ -170,8 +187,15 @@ def print_report(self):
print(summaryline)
print()

def _eval_template(self, template, data):
def _eval_template(self, template, data, old_schema_data=None):
"""Fill the specified template"""
# merge the info of data and the old schema into a single dict
if old_schema_data:
data['OneToOneRelations_old'] = old_schema_data['OneToOneRelations']
data['OneToManyRelations_old'] = old_schema_data['OneToManyRelations']
data['VectorMembers_old'] = old_schema_data['VectorMembers']
data['old_schema_version'] = self.old_schema_version_int

return self.env.get_template(template).render(data)

def _write_file(self, name, content):
Expand Down Expand Up @@ -220,7 +244,7 @@ def get_fn_format(tmpl):

return fn_templates

def _fill_templates(self, template_base, data):
def _fill_templates(self, template_base, data, old_schema_data=None):
"""Fill the template and write the results to file"""
# Update the passed data with some global things that are the same for all
# files
Expand All @@ -229,7 +253,7 @@ def _fill_templates(self, template_base, data):
data['incfolder'] = self.incfolder

for filename, template in self._get_filenames_templates(template_base, data['class'].bare_type):
self._write_file(filename, self._eval_template(template, data))
self._write_file(filename, self._eval_template(template, data, old_schema_data))

def _process_component(self, name, component):
"""Process one component"""
Expand All @@ -247,12 +271,71 @@ def _process_component(self, name, component):

component['includes'] = self._sort_includes(includes)
component['class'] = DataType(name)

self._fill_templates('Component', component)

# Add potentially older schema for schema evolution
# based on ROOT capabilities for now
if name in self.root_schema_dict:
schema_evolutions = self.root_schema_dict[name]
component = copy.deepcopy(component)
for schema_evolution in schema_evolutions:
if isinstance(schema_evolution, RenamedMember):
for member in component['Members']:
if member.name == schema_evolution.member_name_new:
member.name = schema_evolution.member_name_old
component['class'] = DataType(name + self.old_schema_version)
else:
raise NotImplementedError
self._fill_templates('Component', component)
self.root_schema_component_names.add(name + self.old_schema_version)

@staticmethod
def _replace_component_in_paths(oldname, newname, paths):
"""Replace component name by another one in existing paths"""
# strip the namespace
shortoldname = oldname.split("::")[-1]
shortnewname = newname.split("::")[-1]
# and do the replace in place
for index, thePath in enumerate(paths):
if shortoldname in thePath:
newPath = thePath.replace(shortoldname, shortnewname)
paths[index] = newPath

def _process_datatype(self, name, definition):
"""Process one datatype"""
datatype = self._preprocess_datatype(name, definition)

# ROOT schema evolution preparation
# Compute and prepare the potential schema evolution parts
schema_evolution_datatype = copy.deepcopy(datatype)
needs_schema_evolution = False
for member in schema_evolution_datatype['Members']:
if member.is_array:
if member.array_type in self.root_schema_dict:
needs_schema_evolution = True
self._replace_component_in_paths(member.array_type, member.array_type + self.old_schema_version,
schema_evolution_datatype['includes_data'])
member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version)
member.array_type = member.array_type + self.old_schema_version

else:
if member.full_type in self.root_schema_dict:
needs_schema_evolution = True
# prepare the ROOT I/O rule
self._replace_component_in_paths(member.full_type, member.full_type + self.old_schema_version,
schema_evolution_datatype['includes_data'])
member.full_type = member.full_type + self.old_schema_version
member.bare_type = member.bare_type + self.old_schema_version

if needs_schema_evolution:
print(f" Preparing explicit schema evolution for {name}")
schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa
self._fill_templates('Data', schema_evolution_datatype)
self.root_schema_datatype_names.add(name + self.old_schema_version)
self._fill_templates('Collection', datatype, schema_evolution_datatype)
else:
self._fill_templates('Collection', datatype)

self._fill_templates('Data', datatype)
self._fill_templates('Object', datatype)
self._fill_templates('MutableObject', datatype)
Expand All @@ -263,6 +346,28 @@ def _process_datatype(self, name, definition):
if 'SIO' in self.io_handlers:
self._fill_templates('SIOBlock', datatype)

def prepare_iorules(self):
"""Prepare the IORules to be put in the Reflex dictionary"""
for type_name, schema_changes in self.root_schema_dict.items():
for schema_change in schema_changes:
if isinstance(schema_change, RenamedMember):
# find out the type of the renamed member
component = self.datamodel.components[type_name]
for member in component["Members"]:
if member.name == schema_change.member_name_new:
member_type = member.full_type

iorule = RootIoRule()
iorule.sourceClass = type_name
iorule.targetClass = type_name
iorule.version = self.old_schema_version.lstrip("v")
iorule.source = f'{member_type} {schema_change.member_name_old}'
iorule.target = schema_change.member_name_new
iorule.code = f'{iorule.target} = onfile.{schema_change.member_name_old};'
self.root_schema_iorules.add(iorule)
else:
raise NotImplementedError("Schema evolution for this type not yet implemented")

def _preprocess_for_obj(self, datatype):
"""Do the preprocessing that is necessary for the Obj classes"""
fwd_declarations = defaultdict(list)
Expand Down Expand Up @@ -483,10 +588,13 @@ def _needs_include(self, classname) -> IncludeFrom:

def _create_selection_xml(self):
"""Create the selection xml that is necessary for ROOT I/O"""
data = {'components': [DataType(c) for c in self.datamodel.components],
data = {'version': self.datamodel.schema_version,
'components': [DataType(c) for c in self.datamodel.components],
'datatypes': [DataType(d) for d in self.datamodel.datatypes],
'old_schema_components': [DataType(d) for d in
self.old_datamodels_datatypes | self.old_datamodels_components]}
self.root_schema_datatype_names | self.root_schema_component_names], # noqa
'iorules': self.root_schema_iorules}

self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data))

def _build_include(self, member):
Expand Down
13 changes: 12 additions & 1 deletion python/podio_schema_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,17 @@ def __init__(self, name, member_name_old, member_name_new):
super().__init__(f"'{self.name}': member '{self.member_name_old}' renamed to '{self.member_name_new}'.")


class RootIoRule:
"""A placeholder IORule class"""
def __init__(self):
self.sourceClass = None
self.targetClass = None
self.version = None
self.source = None
self.target = None
self.code = None


def sio_filter(schema_changes):
"""
Checks what is required/supported for the SIO backend
Expand Down Expand Up @@ -225,7 +236,7 @@ def heuristics_members(self, added_members, dropped_members, schema_changes):
"""make analysis of member changes in a given data type """
for dropped_member in dropped_members:
added_members_in_definition = [member for member in added_members if
dropped_member.definition_name == member.definition_name]
dropped_member.definition_name == member.definition_name]
for added_member in added_members_in_definition:
if added_member.member.full_type == dropped_member.member.full_type:
# this is a rename candidate. So let's see whether it has been explicitly declared by the user
Expand Down
47 changes: 46 additions & 1 deletion python/templates/Collection.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "{{ incfolder }}{{ class.bare_type }}Collection.h"
#include "{{ incfolder }}DatamodelDefinition.h"

{% if old_schema_version is defined %}
#include "{{ incfolder }}{{ class.bare_type }}v{{ old_schema_version }}Data.h"
{% endif %}

{% for include in includes_coll_cc %}
{{ include }}
{% endfor %}
Expand Down Expand Up @@ -173,7 +177,18 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const {
return {{ package_name }}::meta::schemaVersion;
}

{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }}
// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }}

{#
// SCHEMA EVOLUTION: Not yet required with only ROOT backend
// {% if old_schema_version is defined %}
// {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }}
// {% endif %}
#}

// The usual trick with an IIFE and a static variable inside a funtion and then
// making sure to call that function during shared library loading
Expand All @@ -182,6 +197,36 @@ bool registerCollection() {
auto& factory = podio::CollectionBufferFactory::mutInstance();
factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers);

// Make the SchemaEvolution aware of the current version by
// registering a no-op function for this and all preceeding versions
// will be overriden whenever an explicit action is required
for (unsigned int schemaVersion=1; schemaVersion< {{ package_name }}::meta::schemaVersion+1; ++schemaVersion) {
podio::SchemaEvolution::mutInstance().registerEvolutionFunc(
"{{ class.full_type }}Collection",
schemaVersion,
{{ package_name }}::meta::schemaVersion,
podio::SchemaEvolution::noOpSchemaEvolution,
podio::SchemaEvolution::Priority::AutoGenerated
);
}

{% if old_schema_version is defined %}
// register a buffer creation function for the schema evolution buffer
// SCHEMA EVOLUTION: Not yet required with only ROOT backend
// factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); //TODO

//Make the SchemaEvolution aware of any other non-trivial conversion
podio::SchemaEvolution::mutInstance().registerEvolutionFunc(
"{{ class.full_type }}Collection",
{{ old_schema_version }},
{{ package_name }}::meta::schemaVersion,
podio::SchemaEvolution::noOpSchemaEvolution,
podio::SchemaEvolution::Priority::AutoGenerated
);


{% endif %}

return true;
}();
return reg;
Expand Down
5 changes: 5 additions & 0 deletions python/templates/CollectionData.h.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
{{ include }}
{% endfor %}

// schema evolution specific includes
{% if schema_evolution_data is defined %}
#include "{{ incfolder }}{{ schema_evolution_data }}Data"
{% endif %}

// podio specific includes
#include "podio/CollectionBuffers.h"
#include "podio/ICollectionProvider.h"
Expand Down
14 changes: 9 additions & 5 deletions python/templates/macros/collections.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,20 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const

{% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %}

// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{% if schemaVersion == -1 %}
podio::CollectionReadBuffers createBuffers(bool isSubset) {
{% else %}
podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) {
{% endif %}
auto readBuffers = podio::CollectionReadBuffers{};
readBuffers.type = "{{ class.full_type }}Collection";
{% if schemaVersion == -1 %}
readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion;
readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer;

{% else %}
readBuffers.schemaVersion = {{ schemaVersion }};
readBuffers.data = isSubset ? nullptr : new std::vector<{{ class.bare_type }}v{{ schemaVersion }}Data>;
{% endif %}
// The number of ObjectID vectors is either 1 or the sum of OneToMany and
// OneToOne relations
const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }};
Expand Down
Loading

0 comments on commit b00fd75

Please sign in to comment.