From 8dee128094417cdfd96161d1424f5493717f5e15 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Mon, 20 May 2024 16:13:36 -0400 Subject: [PATCH] Implement any/all evaluation in group.include. --- doc/src/guide/howto/same.md | 14 +++-- doc/src/guide/tutorial/group.md | 10 +-- doc/src/workflow/action/group.md | 30 ++++++--- doc/src/workflow/action/index.md | 2 +- src/project.rs | 76 ++++++++++++++++------ src/workflow.rs | 104 ++++++++++++++++++++----------- 6 files changed, 160 insertions(+), 76 deletions(-) diff --git a/doc/src/guide/howto/same.md b/doc/src/guide/howto/same.md index 16f4fb2..98a29e8 100644 --- a/doc/src/guide/howto/same.md +++ b/doc/src/guide/howto/same.md @@ -2,9 +2,8 @@ You can submit the same action to different groups and resources. To do so, create multiple elements in the action array *with the same name*. Each must use -[`group.include`](../../workflow/action/group.md#include) to select *non-overlapping -subsets*. You can use [`action.from`](../../workflow/action/index.md#from) to copy all -fields from one action and selectively override others. +[`action.group.include`] to select *non-overlapping subsets*. You can use +[`action.from`] to copy all fields from one action and selectively override others. For example, this `workflow.toml` uses 4 processors on directories with small *N* and 8 those with a large *N*. @@ -20,11 +19,16 @@ products = ["results.out"] walltime.per_submission = "12:00:00" processes.per_directory = 4 [action.group] -include = [["/N", "<=", "4096"]] maximum_size = 32 +[[action.group.include]] +condition = ["/N", "<=", "4096"] [[action]] from = "compute" resources.processes.per_directory = 8 -group.include = [["/N", ">", "4096"]] +[[action.group.include]] +condition = ["/N", ">", "4096"] ``` + +[`action.group.include`]: ../../workflow/action/group.md#include +[`action.from`]: ../../workflow/action/index.md#from diff --git a/doc/src/guide/tutorial/group.md b/doc/src/guide/tutorial/group.md index b0f9822..28f6dc9 100644 --- a/doc/src/guide/tutorial/group.md +++ b/doc/src/guide/tutorial/group.md @@ -53,15 +53,15 @@ This workflow will apply the `process_point` action to the directories where `value/type == "point"` and the `process_letter` action to the directories where `value/type == "letter"`. -`include` is an array. Each element is a length 3 array with the contents: `[JSON -pointer, operator, operand]`. Think of each element as an expression. The [*JSON -pointer*](../concepts/json-pointers.md) is a string that reads a particular value +`condition` is a length 3 array with the contents: `[JSON pointer, operator, operand]`. +Think of each element as an expression. The +[*JSON pointer*](../concepts/json-pointers.md) is a string that reads a particular value from the directory's **value**. The *operator* is a comparison operator: `"<"`, `"<="`, `"=="`, `">="`, or `">"`. The *operand* is the value to compare to. Together, these 3 elements make a *condition*. -**Row** applies these *conditions* to all directories in the workspace. When all -*conditions* are true, the directory is included in the action's **groups**. +**Row** applies the *condition* to all directories in the workspace. When the +*condition* is true, the directory is included in the action's **groups**. > Note: This implies that every JSON pointer used in an `include` condition **MUST** > be present in every value file. diff --git a/doc/src/workflow/action/group.md b/doc/src/workflow/action/group.md index e9a7a13..04258c6 100644 --- a/doc/src/workflow/action/group.md +++ b/doc/src/workflow/action/group.md @@ -6,12 +6,13 @@ that it submits. Example: ```toml [action.group] -include = [["/subproject", "==", "project_one"]] sort_by = ["/value"] split_by_sort_key = true maximum_size = 16 submit_whole = true reverse_sort = true +[[action.group.include]] +condition = ["/subproject", "==", "project_one"] ``` > Note: You may omit `[action.group]` entirely. @@ -21,27 +22,36 @@ groups of directories included in a given action. ## include -`action.group.include`: **array** of **arrays** - Define a set of conditions that must -all be true for a directory to be included in this group. Each condition is an **array** -of three elements: The *JSON pointer*, *the operator*, and the *operand*. The [JSON -pointer](../../guide/concepts/json-pointers.md) points to a specific element -from the directory's value. The operator may be `"<"`, `"<="`, `"=="`, `">="`, or `">"`. +`action.group.include`: **array** of **tables** - Define a set of selectors, *any* of +which may be true for a directory to be included in this group. + +Each selector is a **table** with only one of the following keys: +* `condition`: An array of three elements: The *JSON pointer*, *the operator*, and the + *operand*. The [JSON pointer](../../guide/concepts/json-pointers.md) points to a + specific element from the directory's value. The operator may be `"<"`, `"<="`, + `"=="`, `">="`, or `">"`. +* `all`: Array of conditions (see above). All conditions must be true for this selector + to be true. For example, select all directories where a value is in the given range: ```toml -include = [["/value", ">", 0.2], ["/value", "<", 0.9]] +[[action.group.include]] +all = [["/value", ">", 0.2], ["/value", "<", 0.9]] ``` Choose directories where an array element is equal to a specific value: ```toml -include = [["/array/1", "==", 12]] +[[action.group.include]] +condition = ["/array/1", "==", 12] ``` Match against strings: ```toml -include = [["/map/name", "==", "string"]] +[[action.group.include]] +condition = ["/map/name", "==", "string"] ``` Compare by array: ```toml -include = [["/array", "==", [1, "string", 14.0]]] +[[action.group.include]] +condition = ["/array", "==", [1, "string", 14.0] ``` Both operands **must** have the same data type. The JSON pointer must be present in the diff --git a/doc/src/workflow/action/index.md b/doc/src/workflow/action/index.md index 03eb218..96b1fa4 100644 --- a/doc/src/workflow/action/index.md +++ b/doc/src/workflow/action/index.md @@ -32,7 +32,7 @@ action. The name may be set by [from](#from). > the same name. All elements with the same name **must** have identical > [`products`](#products) and [`previous_actions`](#previous_actions). All elements > with the same name **must also** select non-intersecting subsets of directories with -> [`group.include`](group.md#include). +> [`action.group.include`](group.md#include). ## command diff --git a/src/project.rs b/src/project.rs index 96d1d24..df71393 100644 --- a/src/project.rs +++ b/src/project.rs @@ -17,7 +17,7 @@ use crate::scheduler::bash::Bash; use crate::scheduler::slurm::Slurm; use crate::scheduler::Scheduler; use crate::state::State; -use crate::workflow::{Action, Workflow}; +use crate::workflow::{Action, Selector, Workflow}; use crate::{Error, MultiProgressContainer}; /// Encapsulate the workflow, state, and scheduler into a project. @@ -184,23 +184,55 @@ impl Project { 'outer: for name in directories { if let Some(value) = self.state.values().get(&name) { - for (include, comparison, expected) in action.group.include() { - let actual = value - .pointer(include) - .ok_or_else(|| Error::JSONPointerNotFound(name.clone(), include.clone()))?; - if !expr::evaluate_json_comparison(comparison, actual, expected).ok_or_else( - || { - Error::CannotCompareInclude( - actual.clone(), - expected.clone(), - name.clone(), - ) - }, - )? { - continue 'outer; + if action.group.include().is_empty() { + matching_directories.push(name); + } else { + for selector in action.group.include() { + let result = match selector { + Selector::Condition((include, comparison, expected)) => { + let actual = value.pointer(include).ok_or_else(|| { + Error::JSONPointerNotFound(name.clone(), include.clone()) + })?; + + expr::evaluate_json_comparison(comparison, actual, expected) + .ok_or_else(|| { + Error::CannotCompareInclude( + actual.clone(), + expected.clone(), + name.clone(), + ) + }) + } + + Selector::All(conditions) => { + let mut matches = 0; + for (include, comparison, expected) in conditions { + let actual = value.pointer(include).ok_or_else(|| { + Error::JSONPointerNotFound(name.clone(), include.clone()) + })?; + + if expr::evaluate_json_comparison(comparison, actual, expected) + .ok_or_else(|| { + Error::CannotCompareInclude( + actual.clone(), + expected.clone(), + name.clone(), + ) + })? + { + matches += 1; + } + } + Ok(matches == conditions.len()) + } + }; + + if result? { + matching_directories.push(name); + continue 'outer; + } } } - matching_directories.push(name); } else { warn!("Directory '{}' not found in workspace.", name.display()); } @@ -424,7 +456,8 @@ products = ["one"] name = "two" command = "c" products = ["two"] -group.include = [["/i", "<", {}]] +[[action.group.include]] +condition = ["/i", "<", {}] [[action]] name = "three" @@ -464,15 +497,22 @@ previous_actions = ["two"] all_directories[0..6] ); + // Check all conditions. let mut action = project.workflow.action[1].clone(); let include = action.group.include.as_mut().unwrap(); - include.push(("/i".into(), Comparison::GreaterThan, Value::from(4))); + include.clear(); + include.push(Selector::All(vec![ + ("/i".into(), Comparison::GreaterThan, Value::from(4)), + ("/i".into(), Comparison::LessThan, Value::from(6)), + ])); assert_eq!( project .find_matching_directories(&action, all_directories.clone()) .unwrap(), vec![PathBuf::from("dir5")] ); + + // TODO, test any } #[test] diff --git a/src/workflow.rs b/src/workflow.rs index 00bd952..f263499 100644 --- a/src/workflow.rs +++ b/src/workflow.rs @@ -180,13 +180,24 @@ pub enum Comparison { GreaterThan, } +/// Condition definition +type ConditionElement = (String, Comparison, serde_json::Value); + +/// Directory selector +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum Selector { + Condition(ConditionElement), + All(Vec), +} + /// Group definition. #[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] pub struct Group { /// Include members of the group where all JSON elements match the given values. #[serde(default)] - pub include: Option>, + pub include: Option>, /// Sort by the given set of JSON elements. #[serde(default)] @@ -500,7 +511,7 @@ impl Action { impl Group { /// Get the group's `include`. - pub fn include(&self) -> &[(String, Comparison, serde_json::Value)] { + pub fn include(&self) -> &[Selector] { if let Some(include) = self.include.as_ref() { include } else { @@ -1268,12 +1279,15 @@ products = ["d", "e"] name = "b" command = "c" [action.group] -include = [["/d", "==", 5], ["/float", ">", 6.5], ["/string", "<", "str"], ["/array", "==", [1,2,3]], ["/bool", "==", false]] sort_by = ["/sort"] split_by_sort_key = true maximum_size = 10 submit_whole = true reverse_sort = true +[[action.group.include]] +condition = ["/d", "==", 5] +[[action.group.include]] +all = [["/float", ">", 6.5], ["/string", "<", "str"], ["/array", "==", [1,2,3]], ["/bool", "==", false]] "#; let workflow = Workflow::open_str(temp.path(), workflow).unwrap(); @@ -1284,31 +1298,33 @@ reverse_sort = true assert_eq!( action.group.include(), vec![ - ( + Selector::Condition(( "/d".to_string(), Comparison::EqualTo, serde_json::Value::from(5) - ), - ( - "/float".to_string(), - Comparison::GreaterThan, - serde_json::Value::from(6.5) - ), - ( - "/string".to_string(), - Comparison::LessThan, - serde_json::Value::from("str") - ), - ( - "/array".to_string(), - Comparison::EqualTo, - serde_json::Value::from(vec![1, 2, 3]) - ), - ( - "/bool".to_string(), - Comparison::EqualTo, - serde_json::Value::from(false) - ) + )), + Selector::All(vec![ + ( + "/float".to_string(), + Comparison::GreaterThan, + serde_json::Value::from(6.5) + ), + ( + "/string".to_string(), + Comparison::LessThan, + serde_json::Value::from("str") + ), + ( + "/array".to_string(), + Comparison::EqualTo, + serde_json::Value::from(vec![1, 2, 3]) + ), + ( + "/bool".to_string(), + Comparison::EqualTo, + serde_json::Value::from(false) + ) + ]) ] ); assert_eq!(action.group.sort_by(), vec![String::from("/sort")]); @@ -1565,12 +1581,13 @@ walltime.per_submission = "00:00:01" # submit_options is tested above [default.action.group] -include = [["/f", "==", 5]] sort_by = ["/g"] split_by_sort_key = true reverse_sort = true maximum_size = 6 submit_whole = true +[[default.action.group.include]] +condition = ["/f", "==", 5] [[action]] @@ -1598,7 +1615,11 @@ name = "d" assert!(action.submit_options.is_empty()); assert_eq!( action.group.include(), - vec![("/f".into(), Comparison::EqualTo, serde_json::Value::from(5))] + vec![Selector::Condition(( + "/f".into(), + Comparison::EqualTo, + serde_json::Value::from(5) + ))] ); assert_eq!(action.group.sort_by(), vec!["/g"]); assert!(action.group.split_by_sort_key()); @@ -1628,12 +1649,13 @@ walltime.per_submission = "00:00:01" # submit_options is tested above [default.action.group] -include = [["/f", "==", 5]] sort_by = ["/g"] split_by_sort_key = true reverse_sort = true maximum_size = 6 submit_whole = true +[[default.action.group.include]] +condition = ["/f", "==", 5] [[action]] name = "aa" @@ -1651,12 +1673,13 @@ walltime.per_submission = "00:00:02" # submit_options is tested above [action.group] -include = [["/ff", "==", 10]] sort_by = ["/gg"] split_by_sort_key = false reverse_sort = false maximum_size = 12 submit_whole = false +[[action.group.include]] +condition = ["/ff", "==", 10] [[action]] name = "dd" @@ -1682,11 +1705,11 @@ name = "dd" assert!(action.submit_options.is_empty()); assert_eq!( action.group.include(), - vec![( + vec![Selector::Condition(( "/ff".into(), Comparison::EqualTo, serde_json::Value::from(10) - )] + ))] ); assert_eq!(action.group.sort_by(), vec!["/gg"]); assert!(!action.group.split_by_sort_key()); @@ -1717,12 +1740,13 @@ walltime.per_submission = "00:00:01" # submit_options is tested above [default.action.group] -include = [["/f", "==", 5]] sort_by = ["/g"] split_by_sort_key = true reverse_sort = true maximum_size = 6 submit_whole = true +[[default.action.group.include]] +condition = ["/f", "==", 5] [[action]] from = "a" @@ -1752,7 +1776,11 @@ command = "e" assert!(action.submit_options.is_empty()); assert_eq!( action.group.include(), - vec![("/f".into(), Comparison::EqualTo, serde_json::Value::from(5))] + vec![Selector::Condition(( + "/f".into(), + Comparison::EqualTo, + serde_json::Value::from(5) + ))] ); assert_eq!(action.group.sort_by(), vec!["/g"]); assert!(action.group.split_by_sort_key()); @@ -1783,12 +1811,13 @@ walltime.per_submission = "00:00:01" # submit_options is tested above [default.action.group] -include = [["/f", "==", 5]] sort_by = ["/g"] split_by_sort_key = true reverse_sort = true maximum_size = 6 submit_whole = true +[[default.action.group.include]] +condition = ["/f", "==", 5] [[action]] from = "a" @@ -1808,12 +1837,13 @@ walltime.per_submission = "00:00:02" # submit_options is tested above [action.group] -include = [["/ff", "==", 10]] sort_by = ["/gg"] split_by_sort_key = false reverse_sort = false maximum_size = 12 submit_whole = false +[[action.group.include]] +condition = ["/ff", "==", 10] [[action]] name = "dd" @@ -1844,11 +1874,11 @@ command = "e" assert!(action.submit_options.is_empty()); assert_eq!( action.group.include(), - vec![( + vec![Selector::Condition(( "/ff".into(), Comparison::EqualTo, serde_json::Value::from(10) - )] + ))] ); assert_eq!(action.group.sort_by(), vec!["/gg"]); assert!(!action.group.split_by_sort_key());