diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index d2dc1a626..7e2a51b2c 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -47,15 +47,6 @@ class CollectionBase { /// Get the collection buffers for this collection virtual podio::CollectionWriteBuffers getBuffers() = 0; - /// Create (empty) collection buffers from which a collection can be constructed - virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0; - - /// Create (empty) collection buffers from which a collection can be constructed - /// Versioned to support schema evolution - virtual podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, - podio::Backend backend) /*const*/ - = 0; - /// check for validity of the container after read virtual bool isValid() const = 0; diff --git a/include/podio/CollectionBufferFactory.h b/include/podio/CollectionBufferFactory.h new file mode 100644 index 000000000..c3c45692b --- /dev/null +++ b/include/podio/CollectionBufferFactory.h @@ -0,0 +1,84 @@ +#ifndef PODIO_COLLECTIONBUFFERFACTORY_H +#define PODIO_COLLECTIONBUFFERFACTORY_H + +#include "podio/CollectionBuffers.h" +#include "podio/SchemaEvolution.h" + +#include +#include +#include +#include + +namespace podio { + +/** + * The CollectionBufferFactory allows to create buffers of known datatypes, + * which can then be populated by e.g. readers. In order to support schema + * evolution, the buffers have a version and this factory will also require a + * schema version to create buffers. + * + * It is implemented as a singleton, which is populated at the time a shared + * datamodel library is loaded. It is assumed that that happens early on in the + * startup of an appliation, such that only a single thread will access the + * factory instance for registering datatypes. Since the necessary creation + * functions are part of the core datamodel library, this should be very easy to + * achieve by simply linking to that library. Once the factory is populated it + * can be safely accessed from multiple threads concurrently to obtain buffers. + */ +class CollectionBufferFactory { + /// Internal storage is a map to an array of creation functions, where the + /// version determines the place in that array. This should be a viable + /// approach because we know the "latest and greatest" schema version + using CreationFuncT = std::function; + using VersionMapT = std::vector; + using MapT = std::unordered_map; + +public: + /// The buffer factory is a singleton so we disable all copy and move + /// constructors explicitly + CollectionBufferFactory(CollectionBufferFactory const&) = delete; + CollectionBufferFactory& operator=(CollectionBufferFactory const&) = delete; + CollectionBufferFactory(CollectionBufferFactory&&) = delete; + CollectionBufferFactory& operator=(CollectionBufferFactory&&) = delete; + ~CollectionBufferFactory() = default; + + /// Mutable instance only used for the initial registration of functions + /// during library loading + static CollectionBufferFactory& mutInstance(); + /// Get the factory instance + static CollectionBufferFactory const& instance(); + + /** + * Create buffers for a given collection type of a given schema version. + * + * @param collType The collection type name (e.g. from collection->getTypeName()) + * @param version The schema version the created buffers should have + * @param susbsetColl Should the buffers be for a subset collection or not + * + * @return CollectionReadBuffers if a creation function for this collection + * type has been registered, otherwise an empty optional + */ + std::optional createBuffers(const std::string& collType, SchemaVersionT version, + bool subsetColl) const; + /** + * Register a creation function for a given collection type and schema version. + * + * @param collType The collection type name (i.e. what + * collection->getTypeName() returns) + * @param version The schema version for which this creation function is valid + * @param creationFunc The function that when invoked returns buffers for this + * collection type and schema version. The signature has to be + * podio::CollectionReadBuffers(bool) where the boolean parameter steers + * whether the buffers are for a subset collection or not. + */ + void registerCreationFunc(const std::string& collType, SchemaVersionT version, const CreationFuncT& creationFunc); + +private: + CollectionBufferFactory() = default; + + MapT m_funcMap{}; ///< Map to the creation functions +}; + +} // namespace podio + +#endif // PODIO_COLLECTIONBUFFERFACTORY_H diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index 1a2f48a4d..3c493e4ca 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -25,9 +25,10 @@ class TTree; namespace podio { namespace detail { - // Information about the data vector as wall as the collection class type - // and the index in the collection branches cache vector - using CollectionInfo = std::tuple; + // Information about the collection class type, whether it is a subset, the + // schema version on file and the index in the collection branches cache + // vector + using CollectionInfo = std::tuple; } // namespace detail diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index b6fed99f1..4b52b91c6 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -23,10 +23,10 @@ class TTree; namespace podio { namespace detail { - // Information about the data vector as wall as the collection class type - // and the index in the collection branches cache vector - using CollectionInfo = std::tuple; - + // Information about the collection class type, whether it is a subset, the + // schema version on file and the index in the collection branches cache + // vector + using CollectionInfo = std::tuple; } // namespace detail class EventStore; diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 3e02561b8..5834a9b5b 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -78,6 +78,10 @@ class SIOBlock : public sio::block { return sio::block::name(); } + void setSubsetCollection(bool subsetColl) { + m_subsetColl = subsetColl; + } + void setCollection(podio::CollectionBase* col) { m_subsetColl = col->isSubsetCollection(); m_buffers = col->getBuffers(); @@ -85,9 +89,6 @@ class SIOBlock : public sio::block { virtual SIOBlock* create(const std::string& name) const = 0; - // create a new collection for this block - virtual void createBuffers(const bool subsetCollection = false) = 0; - protected: bool m_subsetColl{false}; podio::CollectionReadBuffers m_buffers{}; diff --git a/include/podio/SIOBlockUserData.h b/include/podio/SIOBlockUserData.h index 7ce28cd8e..2c2c5d275 100644 --- a/include/podio/SIOBlockUserData.h +++ b/include/podio/SIOBlockUserData.h @@ -1,6 +1,7 @@ #ifndef PODIO_SIOBLOCKUSERDATA_H #define PODIO_SIOBLOCKUSERDATA_H +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/SIOBlock.h" #include "podio/UserDataCollection.h" @@ -29,15 +30,23 @@ namespace podio { template > class SIOBlockUserData : public podio::SIOBlock { public: - SIOBlockUserData() : SIOBlock(::sio_name(), sio::version::encode_version(0, 1)) { + SIOBlockUserData() : + SIOBlock(::sio_name(), sio::version::encode_version(UserDataCollection::schemaVersion, 0)) { podio::SIOBlockFactory::instance().registerBlockForCollection(podio::userDataTypeName(), this); } - SIOBlockUserData(const std::string& name) : SIOBlock(name, sio::version::encode_version(0, 1)) { + SIOBlockUserData(const std::string& name) : + SIOBlock(name, sio::version::encode_version(UserDataCollection::schemaVersion, 0)) { } - void read(sio::read_device& device, sio::version_type /*version*/) override { + void read(sio::read_device& device, sio::version_type version) override { + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + m_buffers = + bufferFactory + .createBuffers(podio::userDataCollTypeName(), sio::version::major_version(version), false) + .value(); + auto* dataVec = new std::vector(); unsigned size(0); device.data(size); @@ -53,17 +62,6 @@ class SIOBlockUserData : public podio::SIOBlock { podio::handlePODDataSIO(device, &(*dataVec)[0], size); } - void createBuffers(bool) override { - - m_buffers.references = new podio::CollRefCollection(); - m_buffers.vectorMembers = new podio::VectorMembersInfo(); - - // Nothing to do here since UserDataCollections cannot be subset collections - m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool) { - return std::make_unique>(std::move(*buffers.dataAsVector())); - }; - } - SIOBlock* create(const std::string& name) const override { return new SIOBlockUserData(name); } diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index b3d910ed8..78abda49a 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -4,6 +4,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBuffers.h" #include "podio/DatamodelRegistry.h" +#include "podio/SchemaEvolution.h" #include "podio/utilities/TypeHelpers.h" #include @@ -16,6 +17,10 @@ template <> \ constexpr const char* userDataTypeName() { \ return #type; \ + } \ + template <> \ + constexpr const char* userDataCollTypeName() { \ + return "podio::UserDataCollection<" #type ">"; \ } namespace podio { @@ -37,6 +42,12 @@ using EnableIfSupportedUserType = std::enable_if_t> constexpr const char* userDataTypeName(); +/** Helper template to provide the fully qualified name of a UserDataCollection. + * Implementations are populated by the PODIO_ADD_USER_TYPE macro. + */ +template > +constexpr const char* userDataCollTypeName(); + PODIO_ADD_USER_TYPE(float) PODIO_ADD_USER_TYPE(double) @@ -79,6 +90,9 @@ class UserDataCollection : public CollectionBase { UserDataCollection& operator=(UserDataCollection&&) = default; ~UserDataCollection() = default; + /// The schema version of UserDataCollections + static constexpr SchemaVersionT schemaVersion = 1; + /// prepare buffers for serialization void prepareForWrite() const override { } @@ -108,22 +122,6 @@ class UserDataCollection : public CollectionBase { return {&_vecPtr, &m_refCollections, &m_vecmem_info}; } - podio::CollectionReadBuffers createBuffers() /*const*/ final { - return {nullptr, nullptr, nullptr, - [](podio::CollectionReadBuffers buffers, bool) { - return std::make_unique>(std::move(*buffers.dataAsVector())); - }, - [](podio::CollectionReadBuffers& buffers) { - buffers.data = podio::CollectionWriteBuffers::asVector(buffers.data); - }}; - } - - podio::CollectionReadBuffers createSchemaEvolvableBuffers(__attribute__((unused)) int readSchemaVersion, - __attribute__((unused)) - podio::Backend backend) /*const*/ final { - return createBuffers(); - } - /// check for validity of the container after read bool isValid() const override { return true; @@ -136,7 +134,7 @@ class UserDataCollection : public CollectionBase { /// fully qualified type name std::string getTypeName() const override { - return std::string("podio::UserDataCollection<") + userDataTypeName() + ">"; + return userDataCollTypeName(); } /// fully qualified type name of elements - with namespace @@ -165,7 +163,7 @@ class UserDataCollection : public CollectionBase { /// The schema version is fixed manually SchemaVersionT getSchemaVersion() const final { - return 1; + return schemaVersion; } /// Print this collection to the passed stream diff --git a/python/podio/generator_utils.py b/python/podio/generator_utils.py index 600d11ca8..17d3a37f3 100644 --- a/python/podio/generator_utils.py +++ b/python/podio/generator_utils.py @@ -71,10 +71,9 @@ def _is_fixed_width_type(type_name): class DataType: """Simple class to hold information about a datatype or component that is defined in the datamodel.""" - def __init__(self, klass, schema_version): + def __init__(self, klass): self.full_type = klass self.namespace, self.bare_type = _get_namespace_class(self.full_type) - self.schema_version = schema_version def __str__(self): if self.namespace: diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index 79f43d1b2..b6836f0d3 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -412,10 +412,12 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None): if "schema_version" in model_dict: schema_version = model_dict["schema_version"] + if int(schema_version) <= 0: + raise DefinitionError(f"schema_version has to be larger than 0 (is {schema_version})") else: - warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 0 as default", + warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 1 as default", FutureWarning, stacklevel=3) - schema_version = 0 + schema_version = 1 components = {} if "components" in model_dict: diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index ccfaab8ab..773055f10 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -118,7 +118,6 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr self.incfolder = self.datamodel.options['includeSubfolder'] self.expose_pod_members = self.datamodel.options["exposePODMembers"] self.upstream_edm = upstream_edm - self.schema_version = self.datamodel.schema_version self.clang_format = [] self.generated_files = [] @@ -264,7 +263,7 @@ def _process_component(self, name, component): includes.update(component.get("ExtraCode", {}).get("includes", "").split('\n')) component['includes'] = self._sort_includes(includes) - component['class'] = DataType(name, self.schema_version) + component['class'] = DataType(name) self._fill_templates('Component', component) @@ -411,7 +410,7 @@ def _preprocess_datatype(self, name, definition): # Make a copy here and add the preprocessing steps to that such that the # original definition can be left untouched data = deepcopy(definition) - data['class'] = DataType(name, self.schema_version) + data['class'] = DataType(name) data['includes_data'] = self._get_member_includes(definition["Members"]) self._preprocess_for_class(data) self._preprocess_for_obj(data) @@ -426,6 +425,7 @@ def _write_edm_def_file(self): 'package_name': self.package_name, 'edm_definition': model_encoder.encode(self.datamodel), 'incfolder': self.incfolder, + 'schema_version': self.datamodel.schema_version, } self._write_file('DatamodelDefinition.h', @@ -494,9 +494,9 @@ def _needs_include(self, classname) -> IncludeFrom: def _create_selection_xml(self): """Create the selection xml that is necessary for ROOT I/O""" - data = {'components': [DataType(c, self.schema_version) for c in self.datamodel.components], - 'datatypes': [DataType(d, self.schema_version) for d in self.datamodel.datatypes], - 'old_schema_components': [DataType(d, self.schema_version) for d in + data = {'components': [DataType(c) for c in self.datamodel.components], + 'datatypes': [DataType(d) for d in self.datamodel.datatypes], + 'old_schema_components': [DataType(d) for d in self.old_datamodels_datatypes | self.old_datamodels_components]} self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index f7a5bcfd8..1789a83f6 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -3,6 +3,8 @@ {% from "macros/iterator.jinja2" import iterator_definitions %} // AUTOMATICALLY GENERATED FILE - DO NOT EDIT +#include "podio/CollectionBufferFactory.h" + #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" @@ -150,18 +152,48 @@ podio::CollectionWriteBuffers {{ collection_type }}::getBuffers() { return m_storage.getCollectionBuffers(m_isSubsetColl); } -podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { - // Very cumbersome way at the moment. We get the actual buffers to have the - // references and vector members sized appropriately (we will use this - // information to create new buffers outside) - auto collBuffers = m_storage.getCollectionBuffers(m_isSubsetColl); +{% for member in Members %} +{{ macros.vectorized_access(class, member) }} +{% endfor %} + +size_t {{ collection_type }}::getDatamodelRegistryIndex() const { + return {{ package_name }}::meta::DatamodelRegistryIndex::value(); +} + +podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { + return {{ package_name }}::meta::schemaVersion; +} + +// anonymous namespace for registration with the CollectionBufferFactory. This +// ensures that we don't have to make up arbitrary namespace names here, since +// none of this is publicly visible +namespace { +podio::CollectionReadBuffers createBuffers(bool isSubset) { auto readBuffers = podio::CollectionReadBuffers{}; - readBuffers.references = collBuffers.references; - readBuffers.vectorMembers = collBuffers.vectorMembers; + readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; + + // The number of ObjectID vectors is either 1 or the sum of OneToMany and + // OneToOne relations + const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; + readBuffers.references = new podio::CollRefCollection(nRefs); + for (auto& ref : *readBuffers.references) { + // Make sure to place usable buffer pointers here + ref = std::make_unique>(); + } + + readBuffers.vectorMembers = new podio::VectorMembersInfo(); + if (!isSubset) { + readBuffers.vectorMembers->reserve({{ VectorMembers | length }}); +{% for member in VectorMembers %} + readBuffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>); +{% endfor %} + } + readBuffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { - {{ collection_type }}Data data(buffers, isSubsetColl); - return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); + {{ collection_type }}Data data(buffers, isSubsetColl); + return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); }; + readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { if (buffers.data) { buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); @@ -172,26 +204,24 @@ podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { {% endfor %} {% endif %} }; + return readBuffers; } -podio::CollectionReadBuffers {{ collection_type }}::createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend /*backend*/) /*const*/ { - // no version difference -> no-op - if (readSchemaVersion == {{ class.schema_version }}) { - return createBuffers(); - } - // default is no-op as well - return createBuffers(); +// The usual trick with an IIFE and a static variable inside a funtion and then +// making sure to call that function during shared library loading +bool registerCollection() { + const static auto reg = []() { + auto& factory = podio::CollectionBufferFactory::mutInstance(); + factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + return true; + }(); + return reg; } +const auto registeredCollection = registerCollection(); +} // namespace -{% for member in Members %} -{{ macros.vectorized_access(class, member) }} -{% endfor %} - -size_t {{ collection_type }}::getDatamodelRegistryIndex() const { - return {{ package_name }}::meta::DatamodelRegistryIndex::value(); -} #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 6c6939456..f91c02db5 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -84,7 +84,7 @@ public: /// fully qualified type name of stored POD elements - with namespace std::string getDataTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Data" }}"); } /// schema version - unsigned int getSchemaVersion() const final { return {{ class.schema_version }}; }; + podio::SchemaVersionT getSchemaVersion() const final; bool isSubsetCollection() const final { return m_isSubsetColl; @@ -112,14 +112,6 @@ public: /// Get the collection buffers for this collection podio::CollectionWriteBuffers getBuffers() final; - /// Create (empty) collection buffers from which a collection can be constructed - podio::CollectionReadBuffers createBuffers() /*const*/ final; - - /// Create (empty) collection buffers from which a collection can be constructed - /// Versioned to support schema evolution - podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend backend) /*const*/ final; - - void setID(unsigned ID) final { m_collectionID = ID; if (!m_isSubsetColl) { diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index 17a300cb9..c424ded7f 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -1,6 +1,7 @@ // AUTOMATICALLY GENERATED FILE - DO NOT EDIT #include "podio/DatamodelRegistry.h" +#include "podio/SchemaEvolution.h" namespace {{ package_name }}::meta { /** @@ -27,4 +28,9 @@ private: size_t m_value{podio::DatamodelRegistry::NoDefinitionAvailable}; }; +/** + * The schema version at generation time + */ +static constexpr podio::SchemaVersionT schemaVersion = {{ schema_version }}; + } // namespace {{ package_name }}::meta diff --git a/python/templates/SIOBlock.cc.jinja2 b/python/templates/SIOBlock.cc.jinja2 index a33febb8c..f8090ea75 100644 --- a/python/templates/SIOBlock.cc.jinja2 +++ b/python/templates/SIOBlock.cc.jinja2 @@ -6,6 +6,7 @@ #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "podio/CollectionBuffers.h" +#include "podio/CollectionBufferFactory.h" #include #include @@ -14,22 +15,19 @@ {{ utils.namespace_open(class.namespace) }} {% with block_class = class.bare_type + 'SIOBlock' %} -void {{ block_class }}::read(sio::read_device& device, sio::version_type) { - if (m_subsetColl) { - m_buffers.references->emplace_back(std::make_unique>()); - } else { -{% for relation in OneToManyRelations + OneToOneRelations %} - m_buffers.references->emplace_back(std::make_unique>()); -{% endfor %} - } +void {{ block_class }}::read(sio::read_device& device, sio::version_type version) { + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + // TODO: + // - Error handling of empty optional + auto maybeBuffers = bufferFactory.createBuffers("{{ class.full_type }}Collection", sio::version::major_version(version), m_subsetColl); + m_buffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (not m_subsetColl) { unsigned size(0); device.data( size ); - m_buffers.data = new std::vector<{{ class.full_type }}Data>(size); auto* dataVec = m_buffers.dataAsVector<{{ class.full_type }}Data>(); + dataVec->resize(size); podio::handlePODDataSIO(device, dataVec->data(), size); - // m_buffers.data = dataVec; } //---- read ref collections ----- @@ -84,18 +82,12 @@ void {{ block_class }}::write(sio::write_device& device) { {% endif %} } -void {{ block_class }}::createBuffers(bool subsetColl) { - m_subsetColl = subsetColl; - - - - m_buffers.references = new podio::CollRefCollection(); - m_buffers.vectorMembers = new podio::VectorMembersInfo(); - - m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { - {{ class.bare_type }}CollectionData data(buffers, isSubsetColl); - return std::make_unique<{{ class.bare_type }}Collection>(std::move(data), isSubsetColl); - }; +namespace { + // Create one instance of the type in order to ensure that the SioBlock + // library actually needs linking to the core library. Otherwise it is + // possible that the registry is not populated when the SioBlock library is + // loaded, e.g. when using the python bindings. + const auto elem = {{ class.full_type }}{}; } {% endwith %} diff --git a/python/templates/SIOBlock.h.jinja2 b/python/templates/SIOBlock.h.jinja2 index 62d210852..c25eaebc9 100644 --- a/python/templates/SIOBlock.h.jinja2 +++ b/python/templates/SIOBlock.h.jinja2 @@ -4,6 +4,8 @@ #ifndef {{ package_name.upper() }}_{{ class.bare_type }}SIOBlock_H #define {{ package_name.upper() }}_{{ class.bare_type }}SIOBlock_H +#include "{{ incfolder }}DatamodelDefinition.h" + #include "podio/SIOBlock.h" #include @@ -23,13 +25,12 @@ namespace podio { class {{ block_class }}: public podio::SIOBlock { public: {{ block_class }}() : - SIOBlock("{{ class.bare_type }}", sio::version::encode_version(0, 1)) { + SIOBlock("{{ class.bare_type }}", sio::version::encode_version({{ package_name }}::meta::schemaVersion, 0)) { podio::SIOBlockFactory::instance().registerBlockForCollection("{{class.full_type}}", this); } {{ block_class }}(const std::string& name) : - // SIOBlock(name + "__{{ class.bare_type }}", sio::version::encode_version(0, 1)) {} - SIOBlock(name, sio::version::encode_version(0, 1)) {} + SIOBlock(name, sio::version::encode_version({{ package_name }}::meta::schemaVersion, 0)) {} // Read the collection data from the device void read(sio::read_device& device, sio::version_type version) override; @@ -37,12 +38,7 @@ public: // Write the collection data to the device void write(sio::write_device& device) override; - void createBuffers(bool isSubsetColl) override; - SIOBlock* create(const std::string& name) const override { return new {{ block_class }}(name); } - -private: - podio::CollectionReadBuffers createBuffers() const; }; static {{ block_class }} _dummy{{ block_class }}; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ec7d4cfce..ae5f0f984 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,6 +51,8 @@ SET(core_sources EventStore.cc DatamodelRegistry.cc DatamodelRegistryIOHelpers.cc + UserDataCollection.cc + CollectionBufferFactory.cc ) SET(core_headers diff --git a/src/CollectionBufferFactory.cc b/src/CollectionBufferFactory.cc new file mode 100644 index 000000000..8c48ea083 --- /dev/null +++ b/src/CollectionBufferFactory.cc @@ -0,0 +1,59 @@ +#include "podio/CollectionBufferFactory.h" +#include "podio/CollectionBuffers.h" + +namespace podio { +CollectionBufferFactory& CollectionBufferFactory::mutInstance() { + static CollectionBufferFactory factory; + return factory; +} + +CollectionBufferFactory const& CollectionBufferFactory::instance() { + return mutInstance(); +} + +std::optional +CollectionBufferFactory::createBuffers(const std::string& collType, SchemaVersionT version, bool subsetColl) const { + if (const auto typeIt = m_funcMap.find(collType); typeIt != m_funcMap.end()) { + const auto& [_, versionMap] = *typeIt; + if (versionMap.size() >= version) { + return versionMap[version - 1](subsetColl); + } + } + + return std::nullopt; +} + +void CollectionBufferFactory::registerCreationFunc(const std::string& collType, SchemaVersionT version, + const CreationFuncT& creationFunc) { + // Check if we have an entry already to which we can add information + auto typeIt = m_funcMap.find(collType); + if (typeIt != m_funcMap.end()) { + auto& versionMap = typeIt->second; + // If we already have something for this type, make sure to handle all + // versions correctly, assuming that all present creation functions are + // unchanged and that all non-present creation functions behave the same as + // this (assumed latest) version + const auto prevSize = versionMap.size(); + if (prevSize < version) { + versionMap.resize(version); + for (auto i = prevSize; i < version; ++i) { + versionMap[i] = creationFunc; + } + } else { + // In this case we are explicitly updating one specific version + versionMap[version - 1] = creationFunc; + } + } else { + // If we have a completely new map, than we simply populate all versions + // with this creation function + VersionMapT versionMap; + versionMap.reserve(version); + for (size_t i = 0; i < version; ++i) { + versionMap.emplace_back(creationFunc); + } + + m_funcMap.emplace(collType, std::move(versionMap)); + } +} + +} // namespace podio diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index f4ba00bc4..d150789df 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -1,5 +1,6 @@ #include "podio/ROOTFrameReader.h" #include "podio/CollectionBase.h" +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" #include "podio/GenericParameters.h" @@ -66,40 +67,14 @@ std::unique_ptr ROOTFrameReader::readEntry(ROOTFrameReader::Categ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, size_t iColl) { const auto& name = catInfo.storedClasses[iColl].first; - const auto& [theClass, collectionClass, index] = catInfo.storedClasses[iColl].second; + const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second; auto& branches = catInfo.branches[index]; - // Create empty collection buffers, and connect them to the right branches - auto collBuffers = podio::CollectionReadBuffers(); - // If we have a valid data buffer class we know that have to read data, - // otherwise we are handling a subset collection - const bool isSubsetColl = theClass == nullptr; - if (!isSubsetColl) { - collBuffers.data = theClass->New(); - } - - { - auto collection = - std::unique_ptr(static_cast(collectionClass->New())); - collection->setSubsetCollection(isSubsetColl); - - auto tmpBuffers = collection->createBuffers(); - collBuffers.createCollection = std::move(tmpBuffers.createCollection); - collBuffers.recast = std::move(tmpBuffers.recast); + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + auto maybeBuffers = bufferFactory.createBuffers(collType, schemaVersion, isSubsetColl); - if (auto* refs = tmpBuffers.references) { - collBuffers.references = new podio::CollRefCollection(refs->size()); - } - if (auto* vminfo = tmpBuffers.vectorMembers) { - collBuffers.vectorMembers = new podio::VectorMembersInfo(); - collBuffers.vectorMembers->reserve(vminfo->size()); - - for (const auto& [type, _] : (*vminfo)) { - const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); - collBuffers.vectorMembers->emplace_back(type, vecClass->New()); - } - } - } + // TODO: Error handling of empty optional + auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); // After switching trees in the chain, branch pointers get invalidated so @@ -295,10 +270,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); } - const std::string bufferClassName = "std::vector<" + collection->getDataTypeName() + ">"; - const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); - - storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); + storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); collBranches.push_back(branches); } diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index 0a9380bd1..0ce48bf77 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -1,3 +1,4 @@ +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/ROOTFrameData.h" #include "rootUtils.h" @@ -49,40 +50,14 @@ std::unique_ptr ROOTLegacyReader::readEntry() { podio::CollectionReadBuffers ROOTLegacyReader::getCollectionBuffers(const std::pair& collInfo) { const auto& name = collInfo.first; - const auto& [theClass, collectionClass, index] = collInfo.second; + const auto& [collType, isSubsetColl, schemaVersion, index] = collInfo.second; auto& branches = m_collectionBranches[index]; - // Create empty collection buffers, and connect them to the right branches - auto collBuffers = podio::CollectionReadBuffers(); - // If we have a valid data buffer class we know that have to read data, - // otherwise we are handling a subset collection - const bool isSubsetColl = theClass == nullptr; - if (!isSubsetColl) { - collBuffers.data = theClass->New(); - } - - { - auto collection = - std::unique_ptr(static_cast(collectionClass->New())); - collection->setSubsetCollection(isSubsetColl); - - auto tmpBuffers = collection->createBuffers(); - collBuffers.createCollection = std::move(tmpBuffers.createCollection); - collBuffers.recast = std::move(tmpBuffers.recast); + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + auto maybeBuffers = bufferFactory.createBuffers(collType, schemaVersion, isSubsetColl); - if (auto* refs = tmpBuffers.references) { - collBuffers.references = new podio::CollRefCollection(refs->size()); - } - if (auto* vminfo = tmpBuffers.vectorMembers) { - collBuffers.vectorMembers = new podio::VectorMembersInfo(); - collBuffers.vectorMembers->reserve(vminfo->size()); - - for (const auto& [type, _] : (*vminfo)) { - const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); - collBuffers.vectorMembers->emplace_back(type, vecClass->New()); - } - } - } + // TODO: Error handling of empty optional + auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); const auto localEntry = m_chain->LoadTree(m_eventNumber); // After switching trees in the chain, branch pointers get invalidated so @@ -216,10 +191,8 @@ void ROOTLegacyReader::createCollectionBranches(const std::vectorgetDataTypeName() + ">"; - const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); + m_storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); - m_storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); m_collectionBranches.push_back(branches); } } diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index c0a514e6a..3c1663536 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -100,7 +100,7 @@ std::shared_ptr SIOBlockFactory::createBlock(const std::string& typeSt if (it != _map.end()) { auto blk = std::shared_ptr(it->second->create(name)); - blk->createBuffers(isSubsetColl); + blk->setSubsetCollection(isSubsetColl); return blk; } else { return nullptr; diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc new file mode 100644 index 000000000..71ea34d48 --- /dev/null +++ b/src/UserDataCollection.cc @@ -0,0 +1,54 @@ +#include "podio/UserDataCollection.h" +#include "podio/CollectionBufferFactory.h" +#include "podio/CollectionBuffers.h" + +#include +#include + +namespace podio { + +namespace { + /** + * Helper function to register a UserDataCollection to the + * CollectionBufferFactory. Takes the BasicType as template argument. + * + * Returns an integer so that it can be used with std::apply + */ + template + int registerUserDataCollection(T) { + // Register with schema version 1 to allow for potential changes + CollectionBufferFactory::mutInstance().registerCreationFunc( + userDataCollTypeName(), UserDataCollection::schemaVersion, [](bool) { + return podio::CollectionReadBuffers{new std::vector(), nullptr, nullptr, + [](podio::CollectionReadBuffers buffers, bool) { + return std::make_unique>( + std::move(*buffers.dataAsVector())); + }, + [](podio::CollectionReadBuffers& buffers) { + buffers.data = podio::CollectionWriteBuffers::asVector(buffers.data); + }}; + }); + + return 1; + } + + /** + * Helper function to loop over all types in the SupportedUserDataTypes to + * register the UserDataCollection types. + */ + bool registerUserDataCollections() { + // Use an IILE here to make sure to do the call exactly once + const static auto reg = []() { + std::apply([](auto... x) { std::make_tuple(registerUserDataCollection(x)...); }, SupportedUserDataTypes{}); + return true; + }(); + return reg; + } + + /** + * Invoke the registration function for user data collections at least once + */ + const auto registeredUserData = registerUserDataCollections(); +} // namespace + +} // namespace podio diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 2bcf7cb11..369d39b58 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,5 +1,5 @@ --- -schema_version : 1 +schema_version : 2 options : # should getters / setters be prefixed with get / set? diff --git a/tests/datalayout_old.yaml b/tests/datalayout_old.yaml index 81a9d5707..eea733ffa 100755 --- a/tests/datalayout_old.yaml +++ b/tests/datalayout_old.yaml @@ -1,5 +1,5 @@ --- -schema_version : 0 +schema_version : 1 options : # should getters / setters be prefixed with get / set? @@ -200,4 +200,4 @@ datatypes : Description: "Datatype with user defined initialization values" Author: "Thomas Madlener" Members: - - int x // some member \ No newline at end of file + - int x // some member diff --git a/tests/schema_evolution.yaml b/tests/schema_evolution.yaml index 8a9e925f8..561f36fbe 100644 --- a/tests/schema_evolution.yaml +++ b/tests/schema_evolution.yaml @@ -1,6 +1,6 @@ --- -from_schema_version : 0 -to_schema_version : 1 +from_schema_version : 1 +to_schema_version : 2 evolutions: