From ae6a82b28b40b60c56e8563c1bfff036ed4fca80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 11:36:01 +0200 Subject: [PATCH] Better error messages and some documentation inside example --- examples/13_write_dynamic_configuration.cpp | 33 ++++- src/auxiliary/JSONMatcher.cpp | 142 +++++++++++++------- 2 files changed, 125 insertions(+), 50 deletions(-) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 479cc65354..d507b4c8bd 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -10,7 +10,7 @@ using namespace openPMD; int main() { - if (!getVariants()["adios2"]) + if (!getVariants()["hdf5"]) { // Example configuration below selects the ADIOS2 backend return 0; @@ -57,13 +57,40 @@ parameters.clevel = 5 # type = "some other parameter" # # ... +# Sometimes, dataset configurations should not affect all datasets, but only +# specific ones, e.g. only particle data. +# Dataset configurations can be given as a list, here at the example of HDF5. +# In such lists, each entry is an object with two keys: +# +# 1. 'cfg': Mandatory key, this is the actual dataset configuration. +# 2. 'select': A Regex or a list of Regexes to match against the dataset name. +# +# This makes it possible to give dataset-specific configurations. +# The dataset name is the same as returned +# by `Attributable::myPath().openPMDPath()`. +# The regex must match against either the full path (e.g. "/data/1/meshes/E/x") +# or against the path within the iteration (e.g. "meshes/E/x"). + +# Example: +# Let HDF5 datasets be automatically chunked by default [[hdf5.dataset]] cfg.chunks = "auto" +# For particles, we can specify the chunking explicitly +[[hdf5.dataset]] +# Multiple selection regexes can be given as a list. +# They will be fused into a single regex '($^)|(regex1)|(regex2)|(regex3)|...'. +select = ["/data/1/particles/e/.*", "/data/2/particles/e/.*"] +cfg.chunks = [5] + +# Selecting a match works top-down, the order of list entries is important. [[hdf5.dataset]] +# Specifying only a single regex. +# The regex can match against the full dataset path +# or against the path within the Iteration. +# Capitalization is irrelevant. select = "particles/e/.*" -cfg.chunks = [10] -cfg.chornks = [] +CFG.CHUNKS = [10] )END"; // open file for writing diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp index 3da52c3844..cabc269e61 100644 --- a/src/auxiliary/JSONMatcher.cpp +++ b/src/auxiliary/JSONMatcher.cpp @@ -2,8 +2,10 @@ #include "openPMD/Error.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" +#include #include #include +#include namespace openPMD::json { @@ -16,6 +18,8 @@ namespace * The "select" key is optional, indicating the default configuration if it * is missing. * + * @param backend_name For error messages. + * @param index_in_list For error messages. * @param patterns Output parameter: Emplace a parsed pattern into this * list. * @param defaultConfig Output parameter: If the pattern was the default @@ -24,6 +28,8 @@ namespace * @return Whether the pattern was the default configuration or not. */ auto readPattern( + std::string const &backend_name, + size_t index_in_list, std::vector &patterns, std::optional &defaultConfig, nlohmann::json object) -> void; @@ -53,9 +59,14 @@ void MatcherPerBackend::init(TracingJSON tracing_config) { std::optional defaultConfig; // enhanced PIConGPU-defined layout - for (auto &value : config) + for (size_t i = 0; i < config.size(); ++i) { - readPattern(m_patterns, defaultConfig, std::move(value)); + readPattern( + backendName, + i, + m_patterns, + defaultConfig, + std::move(config.at(i))); } // now replace the pattern list with the default config tracing_config.json() = @@ -63,9 +74,8 @@ void MatcherPerBackend::init(TracingJSON tracing_config) } else { - throw std::runtime_error( - "[openPMD plugin] Expecting an object or an array as JSON " - "configuration."); + throw error::BackendConfigSchema( + {backendName, "dataset"}, "Expecting an object or an array."); } } @@ -171,66 +181,104 @@ auto JsonMatcher::getDefault() -> TracingJSON namespace { auto readPattern( + std::string const &backend_name, + size_t index_in_list, std::vector &patterns, std::optional &defaultConfig, nlohmann::json object) -> void { - constexpr char const *errorMsg = &R"END( -Each single pattern in an extended JSON configuration must be a JSON object -with keys 'select' and 'cfg'. -The key 'select' is optional, indicating a default configuration if it is -not set. -The key 'select' must point to either a single string or an array of strings.)END" - [1]; + constexpr char const *errorMsg = R"END( +Each single pattern in an dataset-specific JSON/TOML configuration must be +an object with mandatory key 'cfg' and optional key 'select'. +When the key 'select' is not specified, the given configuration is used +for setting up the default dataset configuration upon backend initialization. +The key 'select' must point to either a single string or an array of strings +and is interpreted as a regular expression against which the dataset name +(full path or path within an iteration) must match.)END"; + auto throw_up = [&](std::string const &additional_info, + auto &&...additional_path) { + throw error::BackendConfigSchema( + {backend_name, + "dataset", + std::to_string(index_in_list), + additional_path...}, + additional_info + errorMsg); + }; if (!object.is_object()) { - throw std::runtime_error(errorMsg); + throw_up("Not an object!"); + } + if (!object.contains("cfg")) + { + throw_up("Mandatory key missing: 'cfg'!"); } - try { - nlohmann::json &cfg = object.at("cfg"); - if (!object.contains("select")) + std::vector unrecognized_keys; + for (auto it = object.begin(); it != object.end(); ++it) { - if (defaultConfig.has_value()) + if (it.key() == "select" || it.key() == "cfg") { - throw std::runtime_error( - "Specified more than one default configuration."); + continue; } - defaultConfig.emplace(std::move(cfg)); - return; + unrecognized_keys.emplace_back(it.key()); } - else + if (!unrecognized_keys.empty()) { - nlohmann::json const &pattern = object.at("select"); - std::string pattern_str = [&]() -> std::string { - if (pattern.is_string()) - { - return pattern.get(); - } - else if (pattern.is_array()) + std::cerr << "[Warning] JSON/TOML config at '" << backend_name + << ".dataset." << index_in_list + << "' has unrecognized keys:"; + for (auto const &item : unrecognized_keys) + { + std::cerr << " '" << item << '\''; + } + std::cerr << '.' << std::endl; + } + } + + nlohmann::json &cfg = object.at("cfg"); + if (!object.contains("select")) + { + if (defaultConfig.has_value()) + { + throw_up("Specified more than one default configuration!"); + } + defaultConfig.emplace(std::move(cfg)); + return; + } + else + { + nlohmann::json const &pattern = object.at("select"); + std::string pattern_str = [&]() -> std::string { + if (pattern.is_string()) + { + return pattern.get(); + } + else if (pattern.is_array()) + { + std::stringstream res; + res << "($^)"; + for (auto const &sub_pattern : pattern) { - std::stringstream res; - res << "($^)"; - for (auto const &sub_pattern : pattern) + if (!sub_pattern.is_string()) { - res << "|(" << sub_pattern.get() - << ")"; + throw_up( + "Must be a string or an array of string!", + "select"); } - return res.str(); - } - else - { - throw std::runtime_error(errorMsg); + res << "|(" << sub_pattern.get() << ")"; } - }(); - patterns.emplace_back(pattern_str, std::move(cfg)); - return; - } - } - catch (nlohmann::json::out_of_range const &) - { - throw std::runtime_error(errorMsg); + return res.str(); + } + else + { + throw_up( + "Must be a string or an array of string!", "select"); + throw std::runtime_error("Unreachable!"); + } + }(); + patterns.emplace_back(pattern_str, std::move(cfg)); + return; } } } // namespace