Skip to content

Commit

Permalink
Remove the Frame from the default readers and writers (#549)
Browse files Browse the repository at this point in the history
* Use freed up name for standard root readers / writers

* Rename the SIO default reader / writer

* Remove internal usage of deprecated reader / writer

* Remove mentions of old names from existing docs

* Make edm4hep workflows pass

* Make clang-tidy pass

clang-tidy seems to struggle with the "header-only" implementation
otherwise and not be able to parse out necessary include directories
from the compile_commands.json
  • Loading branch information
tmadlener authored Feb 6, 2024
1 parent 3d381ab commit 6adf43a
Show file tree
Hide file tree
Showing 34 changed files with 538 additions and 498 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/edm4hep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
cd $STARTDIR/edm4hep
mkdir build && cd build
cmake -DCMAKE_CXX_STANDARD=17 \
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always " \
-DUSE_EXTERNAL_CATCH2=ON \
-G Ninja ..
ninja -k0
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
name: clang-tidy
entry: clang-tidy -warnings-as-errors='*,-clang-diagnostic-deprecated-declarations' -p compile_commands.json
types: [c++]
exclude: (tests/(datamodel|src)/.*(h|cc)|podioVersion.in.h)
exclude: (tests/(datamodel|src)/.*(h|cc)|podioVersion.in.h|SIOFrame.*h)
language: system
- id: clang-format
name: clang-format
Expand Down
2 changes: 1 addition & 1 deletion doc/advanced_topics.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,6 @@ read and write all the necessary EDM definitions.
- The `DatamodelDefinitionHolder` is intended to be used by readers. It
provides the `getDatamodelDefinition` and `getAvailableDatamodels` methods.
**It is again the readers property to correctly populate it with the data it
has read from file.** Currently the `SIOFrameReader` and the `ROOTFrameReader`
has read from file.** Currently the `SIOReader`, `ROOTReader` and the `ROOTRNTupleReader`,
use it and also offer the same functionality as public methods with the help
of it.
169 changes: 3 additions & 166 deletions include/podio/ROOTFrameReader.h
Original file line number Diff line number Diff line change
@@ -1,173 +1,10 @@
#ifndef PODIO_ROOTFRAMEREADER_H
#define PODIO_ROOTFRAMEREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>

// forward declarations
class TClass;
class TFile;
class TTree;
#include "podio/ROOTReader.h"

namespace podio {

namespace detail {
// Information about the collection class type, whether it is a subset, the
// schema version on file and the index in the collection branches cache
// vector
using CollectionInfo = std::tuple<std::string, bool, SchemaVersionT, size_t>;

} // namespace detail

class CollectionBase;
class CollectionIDTable;
class GenericParameters;
struct CollectionReadBuffers;

/**
* This class has the function to read available data from disk
* and to prepare collections and buffers.
**/
class ROOTFrameReader {

public:
ROOTFrameReader() = default;
~ROOTFrameReader() = default;

// non-copyable
ROOTFrameReader(const ROOTFrameReader&) = delete;
ROOTFrameReader& operator=(const ROOTFrameReader&) = delete;

/**
* Open a single file for reading.
*
* @param filename The name of the input file
*/
void openFile(const std::string& filename);

/**
* Open multiple files for reading and then treat them as if they are one file
*
* NOTE: All of the files are assumed to have the same structure. Specifically
* this means:
* - The same categories are available from all files
* - The collections that are contained in the individual categories are the
* same across all files
*
* This usually boils down to "the files have been written with the same
* settings", e.g. they are outputs of a batched process.
*
* @param filenames The filenames of all input files that should be read
*/
void openFiles(const std::vector<std::string>& filenames);

/**
* Read the next data entry from which a Frame can be constructed for the
* given name. In case there are no more entries left for this name or in
* case there is no data for this name, this returns a nullptr.
*/
std::unique_ptr<podio::ROOTFrameData> readNextEntry(const std::string& name);

/**
* Read the specified data entry from which a Frame can be constructed for
* the given name. In case the entry does not exist for this name or in case
* there is no data for this name, this returns a nullptr.
*/
std::unique_ptr<podio::ROOTFrameData> readEntry(const std::string& name, const unsigned entry);

/// Returns number of entries for the given name
unsigned getEntries(const std::string& name) const;

/// Get the build version of podio that has been used to write the current file
podio::version::Version currentFileVersion() const {
return m_fileVersion;
}

/// Get the names of all the available Frame categories in the current file(s)
std::vector<std::string_view> getAvailableCategories() const;

/// Get the datamodel definition for the given name
const std::string_view getDatamodelDefinition(const std::string& name) const {
return m_datamodelHolder.getDatamodelDefinition(name);
}

/// Get all names of the datamodels that ara available from this reader
std::vector<std::string> getAvailableDatamodels() const {
return m_datamodelHolder.getAvailableDatamodels();
}

private:
/**
* Helper struct to group together all the necessary state to read / process a
* given category. A "category" in this case describes all frames with the
* same name which are constrained by the ROOT file structure that we use to
* have the same contents. It encapsulates all state that is necessary for
* reading from a TTree / TChain (i.e. collection infos, branches, ...)
*/
struct CategoryInfo {
/// constructor from chain for more convenient map insertion
CategoryInfo(std::unique_ptr<TChain>&& c) : chain(std::move(c)) {
}
std::unique_ptr<TChain> chain{nullptr}; ///< The TChain with the data
unsigned entry{0}; ///< The next entry to read
std::vector<std::pair<std::string, detail::CollectionInfo>> storedClasses{}; ///< The stored collections in this
///< category
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::shared_ptr<CollectionIDTable> table{nullptr}; ///< The collection ID table for this category
};

/**
* Initialize the passed CategoryInfo by setting up the necessary branches,
* collection infos and all necessary meta data to be able to read entries
* with this name
*/
void initCategory(CategoryInfo& catInfo, const std::string& name);

/**
* Get the category information for the given name. In case there is no TTree
* with contents for the given name this will return a CategoryInfo with an
* uninitialized chain (nullptr) member
*/
CategoryInfo& getCategoryInfo(const std::string& name);

/**
* Read the parameters for the entry specified in the passed CategoryInfo
*/
GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry);

/**
* Read the data entry specified in the passed CategoryInfo, and increase the
* counter afterwards. In case the requested entry is larger than the
* available number of entries, return a nullptr.
*/
std::unique_ptr<podio::ROOTFrameData> readEntry(ROOTFrameReader::CategoryInfo& catInfo);

/**
* Get / read the buffers at index iColl in the passed category information
*/
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches,
unsigned int localEntry);

std::unique_ptr<TChain> m_metaChain{nullptr}; ///< The metadata tree
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
std::vector<std::string> m_availCategories{}; ///< All available categories from this file

podio::version::Version m_fileVersion{0, 0, 0};
DatamodelDefinitionHolder m_datamodelHolder{};
};

} // namespace podio
using ROOTFrameReader [[deprecated("Will be removed in v1.0 switch podio::ROOTReader")]] = podio::ROOTReader;
}

#endif // PODIO_ROOTFRAMEREADER_H
104 changes: 3 additions & 101 deletions include/podio/ROOTFrameWriter.h
Original file line number Diff line number Diff line change
@@ -1,108 +1,10 @@
#ifndef PODIO_ROOTFRAMEWRITER_H
#define PODIO_ROOTFRAMEWRITER_H

#include "podio/CollectionBranches.h"
#include "podio/CollectionIDTable.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"

#include "TFile.h"

#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>

// forward declarations
class TTree;
#include "podio/ROOTWriter.h"

namespace podio {
class Frame;
class CollectionBase;
class GenericParameters;

class ROOTFrameWriter {
public:
ROOTFrameWriter(const std::string& filename);
~ROOTFrameWriter();

ROOTFrameWriter(const ROOTFrameWriter&) = delete;
ROOTFrameWriter& operator=(const ROOTFrameWriter&) = delete;

/** Store the given frame with the given category. Store all available
* collections from the Frame.
*
* NOTE: The contents of the first Frame that is written in this way
* determines the contents that will be written for all subsequent Frames.
*/
void writeFrame(const podio::Frame& frame, const std::string& category);

/** Store the given Frame with the given category. Store only the
* collections that are passed.
*
* NOTE: The contents of the first Frame that is written in this way
* determines the contents that will be written for all subsequent Frames.
*/
void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector<std::string>& collsToWrite);

/** Write the current file, including all the necessary metadata to read it again.
*/
void finish();

/** Check whether the collsToWrite are consistent with the state of the passed
* category.
*
* Return two vectors of collection names. The first one contains all the
* names that were missing from the collsToWrite but were present in the
* category. The second one contains the names that are present in the
* collsToWrite only. If both vectors are empty the category and the passed
* collsToWrite are consistent.
*
* NOTE: This will only be a meaningful check if the first Frame of the passed
* category has already been written. Also, this check is rather expensive as
* it has to effectively do two set differences.
*/
std::tuple<std::vector<std::string>, std::vector<std::string>>
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;

// collectionID, collectionType, subsetCollection
// NOTE: same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

/**
* Helper struct to group together all necessary state to write / process a
* given category. Created during the first writing of a category
*/
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<CollectionInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
};

/// Initialize the branches for this category
void initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters);

/// Get the (potentially uninitialized category information for this category)
CategoryInfo& getCategoryInfo(const std::string& category);

static void resetBranches(std::vector<root_utils::CollectionBranches>& branches,
const std::vector<ROOTFrameWriter::StoreCollection>& collections,
/*const*/ podio::GenericParameters* parameters);

std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories

DatamodelDefinitionCollector m_datamodelCollector{};

bool m_finished{false}; ///< Whether writing has been actually done
};

} // namespace podio
using ROOTFrameWriter [[deprecated("Will be removed in v1.0 switch podio::ROOTWriter")]] = podio::ROOTWriter;
}

#endif // PODIO_ROOTFRAMEWRITER_H
Loading

0 comments on commit 6adf43a

Please sign in to comment.