From 61b974836894fe08c929b3a3231dc0afa1a6a158 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 22 Oct 2024 07:06:49 -0400 Subject: [PATCH 1/9] Rename working_directory to workflow_root. --- src/scheduler.rs | 4 ++-- src/scheduler/bash.rs | 4 ++-- src/scheduler/slurm.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/scheduler.rs b/src/scheduler.rs index 6f2c8d6..44ff56c 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -29,7 +29,7 @@ pub trait Scheduler { /// Submit a job to the scheduler. /// /// # Arguments - /// * `working_directory`: The working directory the action should be submitted from. + /// * `workflow_root`: The working directory the action should be submitted from. /// * `action`: The action to submit. /// * `directories`: The directories to include in the submission. /// * `should_terminate`: Set to true when the user terminates the process. @@ -49,7 +49,7 @@ pub trait Scheduler { /// fn submit( &self, - working_directory: &Path, + workflow_root: &Path, action: &Action, directories: &[PathBuf], should_terminate: Arc, diff --git a/src/scheduler/bash.rs b/src/scheduler/bash.rs index 0fb3fd9..0e498dc 100644 --- a/src/scheduler/bash.rs +++ b/src/scheduler/bash.rs @@ -242,7 +242,7 @@ impl Scheduler for Bash { fn submit( &self, - working_directory: &Path, + workflow_root: &Path, action: &Action, directories: &[PathBuf], should_terminate: Arc, @@ -252,7 +252,7 @@ impl Scheduler for Bash { let mut child = Command::new("bash") .stdin(Stdio::piped()) - .current_dir(working_directory) + .current_dir(workflow_root) .spawn() .map_err(|e| Error::SpawnProcess("bash".into(), e))?; diff --git a/src/scheduler/slurm.rs b/src/scheduler/slurm.rs index 859e9a5..c14e9dd 100644 --- a/src/scheduler/slurm.rs +++ b/src/scheduler/slurm.rs @@ -141,7 +141,7 @@ impl Scheduler for Slurm { fn submit( &self, - working_directory: &Path, + workflow_root: &Path, action: &Action, directories: &[PathBuf], should_terminate: Arc, @@ -164,7 +164,7 @@ impl Scheduler for Slurm { .stdin(Stdio::piped()) .stdout(Stdio::piped()) .arg("--parsable") - .current_dir(working_directory) + .current_dir(workflow_root) .spawn() .map_err(|e| Error::SpawnProcess("sbatch".into(), e))?; From 5dce8031a188f72292fa6cd969e38edd743c8c24 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 22 Oct 2024 12:49:12 -0400 Subject: [PATCH 2/9] Pass workspace_path and directory_values to script builder. Also set the env var ACTION_WORKSPACE_PATH. --- doc/src/env.md | 1 + src/cli/submit.rs | 9 +- src/scheduler.rs | 84 +++++++++------ src/scheduler/bash.rs | 233 ++++++++++++++++++++++++++++++++--------- src/scheduler/slurm.rs | 48 ++++++--- 5 files changed, 276 insertions(+), 99 deletions(-) diff --git a/doc/src/env.md b/doc/src/env.md index ef10a50..6746fb2 100644 --- a/doc/src/env.md +++ b/doc/src/env.md @@ -6,6 +6,7 @@ | Environment variable | Value | |----------------------|-------| +| `ACTION_WORKSPACE_PATH` | Path to the workspace, relative to the path containing `workflow.toml`. | | `ACTION_CLUSTER` | Name of the cluster the action is executing on. | | `ACTION_NAME` | The name of the action that is executing. | | `ACTION_PROCESSES` | The total number of processes that this action uses. | diff --git a/src/cli/submit.rs b/src/cli/submit.rs index 7335134..2460bc4 100644 --- a/src/cli/submit.rs +++ b/src/cli/submit.rs @@ -170,7 +170,12 @@ pub fn submit( info!("Execute without --dry-run to submit the following scripts..."); for (index, (action, directories)) in action_directories.iter().enumerate() { info!("Script {}/{}:", index + 1, action_directories.len()); - let script = scheduler.make_script(action, directories)?; + let script = scheduler.make_script( + action, + directories, + &project.workflow().workspace.path, + project.state().values(), + )?; write!(output, "{script}")?; output.flush()?; @@ -264,6 +269,8 @@ pub fn submit( &project.workflow().root, action, directories, + &project.workflow().workspace.path, + project.state().values(), Arc::clone(&should_terminate), ); diff --git a/src/scheduler.rs b/src/scheduler.rs index 44ff56c..6695c2b 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -4,7 +4,8 @@ pub mod bash; pub mod slurm; -use std::collections::HashSet; +use serde_json::Value; +use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; use std::sync::Arc; @@ -14,44 +15,61 @@ use crate::Error; /// A `Scheduler` creates and submits job scripts. pub trait Scheduler { - /// Make a job script given an `Action` and a list of directories. - /// - /// Useful for showing the script that would be submitted to the user. - /// - /// # Returns - /// A `String` containing the job script. - /// - /// # Errors - /// Returns `Err` when the script cannot be created. - /// - fn make_script(&self, action: &Action, directories: &[PathBuf]) -> Result; + /** Make a job script given an `Action` and a list of directories. - /// Submit a job to the scheduler. - /// - /// # Arguments - /// * `workflow_root`: The working directory the action should be submitted from. - /// * `action`: The action to submit. - /// * `directories`: The directories to include in the submission. - /// * `should_terminate`: Set to true when the user terminates the process. - /// - /// # Returns - /// `Ok(job_id_option)` on success. - /// Schedulers that queue jobs should set `job_id_option = Some(job_id)`. - /// Schedulers that execute jobs immediately should set `job_id_option = None`. - /// - /// # Early termination. - /// Implementations should periodically check `should_terminate` and - /// exit early (if possible) with `Err(Error::Interrupted)` when set. - /// - /// # Errors - /// Returns `Err(row::Error)` on error, which may be due to a non-zero exit - /// status from the submission. - /// + # Arguments + * `action`: The action to submit. + * `directories`: The directories to include in the submission. + * `workspace_path`: The relative path to the workspace directory from the workflow root. + * `directory_values`: Maps directory names to JSON values. + + `make_script` must use expand `{workspace_path`} and `{\JSON pointer}` + templates in the action's command. + + # Returns + A `String` containing the job script. + + # Errors + Returns `Err` when the script cannot be created. + */ + fn make_script( + &self, + action: &Action, + directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, + ) -> Result; + + /** Submit a job to the scheduler. + + # Arguments + * `workflow_root`: The working directory the action should be submitted from. + * `action`: The action to submit. + * `directories`: The directories to include in the submission. + * `workspace_path`: The relative path to the workspace directory from the workflow root. + * `directory_values`: Maps directory names to JSON values. + * `should_terminate`: Set to true when the user terminates the process. + + # Returns + `Ok(job_id_option)` on success. + Schedulers that queue jobs should set `job_id_option = Some(job_id)`. + Schedulers that execute jobs immediately should set `job_id_option = None`. + + # Early termination. + Implementations should periodically check `should_terminate` and + exit early (if possible) with `Err(Error::Interrupted)` when set. + + # Errors + Returns `Err(row::Error)` on error, which may be due to a non-zero exit + status from the submission. + */ fn submit( &self, workflow_root: &Path, action: &Action, directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, should_terminate: Arc, ) -> Result, Error>; diff --git a/src/scheduler/bash.rs b/src/scheduler/bash.rs index 0e498dc..43240dc 100644 --- a/src/scheduler/bash.rs +++ b/src/scheduler/bash.rs @@ -4,6 +4,7 @@ use log::{debug, error, trace}; use nix::sys::signal::{self, Signal}; use nix::unistd::Pid; +use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::env; use std::fmt::Write as _; @@ -29,6 +30,8 @@ pub(crate) struct BashScriptBuilder<'a> { cluster_name: &'a str, action: &'a Action, directories: &'a [PathBuf], + workspace_path: &'a Path, + directory_values: &'a HashMap, preamble: &'a str, launchers: &'a HashMap, } @@ -39,6 +42,8 @@ impl<'a> BashScriptBuilder<'a> { cluster_name: &'a str, action: &'a Action, directories: &'a [PathBuf], + workspace_path: &'a Path, + directory_values: &'a HashMap, launchers: &'a HashMap, ) -> Self { let walltime_in_minutes = action @@ -53,6 +58,8 @@ impl<'a> BashScriptBuilder<'a> { cluster_name, action, directories, + workspace_path, + directory_values, preamble: "", launchers, } @@ -91,11 +98,15 @@ impl<'a> BashScriptBuilder<'a> { let _ = write!( result, r#" +export ACTION_WORKSPACE_PATH="{}" export ACTION_CLUSTER="{}" export ACTION_NAME="{}" export ACTION_PROCESSES="{}" export ACTION_WALLTIME_IN_MINUTES="{}" "#, + self.workspace_path + .to_str() + .ok_or_else(|| Error::NonUTF8DirectoryName(self.workspace_path.into()))?, self.cluster_name, self.action.name(), self.total_processes, @@ -158,8 +169,10 @@ trap 'printf %s\\n "${{directories[@]}}" | {row_executable} scan --no-progress - } fn execution(&self) -> Result { - let contains_directory = self.action.command().contains("{directory}"); - let contains_directories = self.action.command().contains("{directories}"); + let command = self.action.command(); + + let contains_directory = command.contains("{directory}"); + let contains_directories = command.contains("{directories}"); if contains_directory && contains_directories { return Err(Error::ActionContainsMultipleTemplates( self.action.name().into(), @@ -191,7 +204,7 @@ trap 'printf %s\\n "${{directories[@]}}" | {row_executable} scan --no-progress - } if contains_directory { - let command = self.action.command().replace("{directory}", "$directory"); + let command = command.replace("{directory}", "$directory"); Ok(format!( r#" for directory in "${{directories[@]}}" @@ -201,10 +214,7 @@ done "# )) } else if contains_directories { - let command = self - .action - .command() - .replace("{directories}", r#""${directories[@]}""#); + let command = command.replace("{directories}", r#""${directories[@]}""#); Ok(format!( r#" {launcher_prefix}{command} || {{ >&2 echo "[row] Error executing command."; exit 1; }} @@ -236,8 +246,22 @@ impl Bash { pub struct ActiveBashJobs {} impl Scheduler for Bash { - fn make_script(&self, action: &Action, directories: &[PathBuf]) -> Result { - BashScriptBuilder::new(&self.cluster.name, action, directories, &self.launchers).build() + fn make_script( + &self, + action: &Action, + directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, + ) -> Result { + BashScriptBuilder::new( + &self.cluster.name, + action, + directories, + workspace_path, + directory_values, + &self.launchers, + ) + .build() } fn submit( @@ -245,10 +269,12 @@ impl Scheduler for Bash { workflow_root: &Path, action: &Action, directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, should_terminate: Arc, ) -> Result, Error> { debug!("Executing '{}' in bash.", action.name()); - let script = self.make_script(action, directories)?; + let script = self.make_script(action, directories, workspace_path, directory_values)?; let mut child = Command::new("bash") .stdin(Stdio::piped()) @@ -347,9 +373,16 @@ mod tests { #[parallel] fn header() { let (action, directories, launchers) = setup(); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.starts_with("#!/bin/bash")); @@ -359,10 +392,17 @@ mod tests { #[parallel] fn preamble() { let (action, directories, launchers) = setup(); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .with_preamble("#preamble") - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .with_preamble("#preamble") + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains("#preamble\n")); @@ -372,9 +412,16 @@ mod tests { #[parallel] fn no_setup() { let (action, directories, launchers) = setup(); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(!script.contains("test $? -eq 0 ||")); @@ -388,16 +435,30 @@ mod tests { .submit_options .insert("cluster".to_string(), SubmitOptions::default()); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(!script.contains("test $? -eq 0 ||")); action.submit_options.get_mut("cluster").unwrap().setup = Some("my setup".to_string()); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains("my setup")); assert!(script.contains("test $? -eq 0 ||")); @@ -407,9 +468,16 @@ mod tests { #[parallel] fn execution_directory() { let (action, directories, launchers) = setup(); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains("command $directory")); @@ -421,9 +489,16 @@ mod tests { let (mut action, directories, launchers) = setup(); action.command = Some("command {directories}".to_string()); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains("command \"${directories[@]}\"")); @@ -437,9 +512,16 @@ mod tests { action.launchers = Some(vec!["openmp".into()]); action.command = Some("command {directories}".to_string()); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains("OMP_NUM_THREADS=4 command \"${directories[@]}\"")); @@ -452,9 +534,16 @@ mod tests { action.launchers = Some(vec!["mpi".into()]); action.command = Some("command {directories}".to_string()); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); assert!(script.contains( @@ -468,7 +557,15 @@ mod tests { let (mut action, directories, launchers) = setup(); action.command = Some("command {directory} {directories}".to_string()); - let result = BashScriptBuilder::new("cluster", &action, &directories, &launchers).build(); + let result = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build(); assert!(matches!( result, @@ -477,7 +574,15 @@ mod tests { action.command = Some("command".to_string()); - let result = BashScriptBuilder::new("cluster", &action, &directories, &launchers).build(); + let result = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build(); assert!(matches!( result, @@ -489,9 +594,16 @@ mod tests { #[parallel] fn variables() { let (action, directories, launchers) = setup(); - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); @@ -515,9 +627,16 @@ mod tests { action.resources.threads_per_process = None; action.resources.gpus_per_process = None; - let script = BashScriptBuilder::new("cluster", &action, &directories, &launchers) - .build() - .expect("Valid script."); + let script = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build() + .expect("Valid script."); println!("{script}"); @@ -542,7 +661,7 @@ mod tests { submit_options: Vec::new(), }; let script = Bash::new(cluster, launchers) - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("Valid script"); println!("{script}"); @@ -556,7 +675,15 @@ mod tests { action.launchers = Some(vec![]); action.command = Some("command {directories}".to_string()); - let result = BashScriptBuilder::new("cluster", &action, &directories, &launchers).build(); + let result = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build(); assert!(matches!(result, Err(Error::NoProcessLauncher(_, _)))); } @@ -569,7 +696,15 @@ mod tests { action.launchers = Some(vec!["mpi".into(), "mpi".into()]); action.command = Some("command {directories}".to_string()); - let result = BashScriptBuilder::new("cluster", &action, &directories, &launchers).build(); + let result = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &PathBuf::default(), + &HashMap::new(), + &launchers, + ) + .build(); assert!(matches!(result, Err(Error::TooManyProcessLaunchers(_)))); } diff --git a/src/scheduler/slurm.rs b/src/scheduler/slurm.rs index c14e9dd..c80127c 100644 --- a/src/scheduler/slurm.rs +++ b/src/scheduler/slurm.rs @@ -2,6 +2,7 @@ // Part of row, released under the BSD 3-Clause License. use log::{debug, error, trace}; +use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::fmt::Write as _; use std::io::Write; @@ -41,7 +42,13 @@ pub struct ActiveSlurmJobs { } impl Scheduler for Slurm { - fn make_script(&self, action: &Action, directories: &[PathBuf]) -> Result { + fn make_script( + &self, + action: &Action, + directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, + ) -> Result { let mut preamble = String::with_capacity(512); let mut user_partition = &None; @@ -134,9 +141,16 @@ impl Scheduler for Slurm { } } - BashScriptBuilder::new(&self.cluster.name, action, directories, &self.launchers) - .with_preamble(&preamble) - .build() + BashScriptBuilder::new( + &self.cluster.name, + action, + directories, + workspace_path, + directory_values, + &self.launchers, + ) + .with_preamble(&preamble) + .build() } fn submit( @@ -144,6 +158,8 @@ impl Scheduler for Slurm { workflow_root: &Path, action: &Action, directories: &[PathBuf], + workspace_path: &Path, + directory_values: &HashMap, should_terminate: Arc, ) -> Result, Error> { debug!("Submtitting '{}' with sbatch.", action.name()); @@ -158,7 +174,7 @@ impl Scheduler for Slurm { return Err(Error::Interrupted); } - let script = self.make_script(action, directories)?; + let script = self.make_script(action, directories, workspace_path, directory_values)?; let mut child = Command::new("sbatch") .stdin(Stdio::piped()) @@ -316,7 +332,7 @@ mod tests { fn default() { let (action, directories, slurm) = setup(); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -336,7 +352,7 @@ mod tests { slurm.cluster.submit_options = vec!["--option=value".to_string()]; let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -358,7 +374,7 @@ mod tests { action.resources.processes = Some(Processes::PerDirectory(3)); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -379,7 +395,7 @@ mod tests { ); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -400,7 +416,7 @@ mod tests { ); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -416,7 +432,7 @@ mod tests { action.resources.threads_per_process = Some(5); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -431,7 +447,7 @@ mod tests { action.resources.gpus_per_process = Some(5); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -458,7 +474,7 @@ mod tests { let slurm = Slurm::new(cluster, launchers.by_cluster("cluster")); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -487,7 +503,7 @@ mod tests { action.resources.gpus_per_process = Some(1); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -516,7 +532,7 @@ mod tests { action.resources.processes = Some(Processes::PerSubmission(81)); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); @@ -546,7 +562,7 @@ mod tests { action.resources.gpus_per_process = Some(1); let script = slurm - .make_script(&action, &directories) + .make_script(&action, &directories, &PathBuf::default(), &HashMap::new()) .expect("valid script"); println!("{script}"); From f78576823d0cccbaeea4e84eaa2a230d07d53f9e Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 11:36:39 -0400 Subject: [PATCH 3/9] Implement JSON pointer unpacking in submit. Also implement proper shell escaping of all user-provided quantities. --- Cargo.lock | 8 + Cargo.toml | 2 + DESIGN.md | 2 - src/lib.rs | 8 + src/scheduler/bash.rs | 372 ++++++++++++++++++++++++++++++++++++++---- 5 files changed, 356 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 173b07d..e5f9fb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -784,9 +784,11 @@ dependencies = [ "path-absolutize", "postcard", "predicates", + "regex", "serde", "serde_json", "serial_test", + "shell-quote", "signal-hook", "speedate", "thiserror", @@ -916,6 +918,12 @@ dependencies = [ "syn", ] +[[package]] +name = "shell-quote" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae4c63bdcc11eea49b562941b914d5ac30d42cad982e3f6e846a513ee6a3ce7e" + [[package]] name = "signal-hook" version = "0.3.17" diff --git a/Cargo.toml b/Cargo.toml index 41dc326..3612d42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,10 @@ memchr = "2.7.4" nix = { version = "0.29.0", features = ["signal"] } path-absolutize = "3.1.1" postcard = { version = "1.0.10", default-features = false, features = ["use-std"] } +regex = "1.11.0" serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" +shell-quote = { version = "0.7.1", default-features = false, features = ["bash"] } signal-hook = { version = "0.3.17", default-features = false } speedate = "0.14.4" thiserror = "1.0.64" diff --git a/DESIGN.md b/DESIGN.md index b34c385..139072a 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -293,5 +293,3 @@ status may take a long time, so it should display a progress bar. - **whole group**: A **submission group** that is identical to the **group** found without applying the additional submission filters. - **workspace**: The location on the file system that contains **directories**. - -# TODO: logo diff --git a/src/lib.rs b/src/lib.rs index ce5ff8e..eabb658 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -148,6 +148,14 @@ pub enum Error { #[error("Duplicate actions '{0}' must have the same `previous_actions`.")] DuplicateActionsDifferentPreviousActions(String), + #[error( + r"Action '{0}' must use {{directory}} instead of {{directories}} with {{\JSON pointer}}." + )] + DirectoriesUsedWithJSONPointer(String), + + #[error("Unable to parse template '{1}' for action '{0}'.")] + InvalidTemplate(String, String), + // submission errors #[error("Error encountered while executing action '{0}': {1}.")] ExecuteAction(String, String), diff --git a/src/scheduler/bash.rs b/src/scheduler/bash.rs index 43240dc..f9a5cb0 100644 --- a/src/scheduler/bash.rs +++ b/src/scheduler/bash.rs @@ -4,7 +4,9 @@ use log::{debug, error, trace}; use nix::sys::signal::{self, Signal}; use nix::unistd::Pid; +use regex::{Captures, Regex}; use serde_json::Value; +use shell_quote::{Quote, QuoteExt}; use std::collections::{HashMap, HashSet}; use std::env; use std::fmt::Write as _; @@ -85,30 +87,32 @@ impl<'a> BashScriptBuilder<'a> { fn variables(&self) -> Result { let mut result = "directories=(\n".to_string(); for directory in self.directories { - result.push('\''); - result.push_str( + result.push_quoted( + shell_quote::Bash, directory .to_str() .ok_or_else(|| Error::NonUTF8DirectoryName(directory.clone()))?, ); - result.push_str("'\n"); + result.push('\n'); } result.push_str(")\n"); let _ = write!( result, r#" -export ACTION_WORKSPACE_PATH="{}" -export ACTION_CLUSTER="{}" -export ACTION_NAME="{}" -export ACTION_PROCESSES="{}" -export ACTION_WALLTIME_IN_MINUTES="{}" +export ACTION_WORKSPACE_PATH={} +export ACTION_CLUSTER={} +export ACTION_NAME={} +export ACTION_PROCESSES={} +export ACTION_WALLTIME_IN_MINUTES={} "#, - self.workspace_path - .to_str() - .ok_or_else(|| Error::NonUTF8DirectoryName(self.workspace_path.into()))?, - self.cluster_name, - self.action.name(), + >::quote( + self.workspace_path + .to_str() + .ok_or_else(|| Error::NonUTF8DirectoryName(self.workspace_path.into()))? + ), + >::quote(self.cluster_name), + >::quote(self.action.name()), self.total_processes, self.walltime_in_minutes, ); @@ -117,22 +121,19 @@ export ACTION_WALLTIME_IN_MINUTES="{}" { let _ = writeln!( result, - "export ACTION_PROCESSES_PER_DIRECTORY=\"{processes_per_directory}\"", + "export ACTION_PROCESSES_PER_DIRECTORY={processes_per_directory}", ); } if let Some(threads_per_process) = self.action.resources.threads_per_process { let _ = writeln!( result, - "export ACTION_THREADS_PER_PROCESS=\"{threads_per_process}\"", + "export ACTION_THREADS_PER_PROCESS={threads_per_process}", ); } if let Some(gpus_per_process) = self.action.resources.gpus_per_process { - let _ = writeln!( - result, - "export ACTION_GPUS_PER_PROCESS=\"{gpus_per_process}\"", - ); + let _ = writeln!(result, "export ACTION_GPUS_PER_PROCESS={gpus_per_process}",); } Ok(result) @@ -178,6 +179,11 @@ trap 'printf %s\\n "${{directories[@]}}" | {row_executable} scan --no-progress - self.action.name().into(), )); } + if contains_directories && self.contains_json_pointer() { + return Err(Error::DirectoriesUsedWithJSONPointer( + self.action.name().into(), + )); + } // Build up launcher prefix let mut launcher_prefix = String::new(); @@ -204,17 +210,44 @@ trap 'printf %s\\n "${{directories[@]}}" | {row_executable} scan --no-progress - } if contains_directory { - let command = command.replace("{directory}", "$directory"); - Ok(format!( - r#" + if self.contains_json_pointer() { + // When JSON pointers are present, produce one line per directory. + let mut result = String::with_capacity(128 * self.directories.len()); + for directory in self.directories { + let current_command = self.substitute(command, directory)?; + let _ = writeln!( + result, + r#" +{launcher_prefix}{current_command} || {{ >&2 echo "[ERROR row::action] Error executing command."; exit 2; }} +"# + ); + } + + Ok(result) + } else { + // When there are no JSON pointers, use a compact for loop. + let command = command.replace("{directory}", "$directory"); + let command = self.substitute(&command, Path::new(""))?; + Ok(format!( + r#" for directory in "${{directories[@]}}" do {launcher_prefix}{command} || {{ >&2 echo "[ERROR row::action] Error executing command."; exit 2; }} done "# - )) + )) + } } else if contains_directories { + // {directories} is compatible with {workspace_path}, but not {/JSON pointer} let command = command.replace("{directories}", r#""${directories[@]}""#); + let command = command.replace( + "{workspace_path}", + &>::quote( + self.workspace_path + .to_str() + .ok_or_else(|| Error::NonUTF8DirectoryName(self.workspace_path.into()))?, + ), + ); Ok(format!( r#" {launcher_prefix}{command} || {{ >&2 echo "[row] Error executing command."; exit 1; }} @@ -228,6 +261,65 @@ done pub(crate) fn build(&self) -> Result { Ok(self.header() + &self.variables()? + &self.setup()? + &self.execution()?) } + + /// Check if the command uses JSON pointers. + fn contains_json_pointer(&self) -> bool { + self.action.command().contains("{}") || self.action.command().contains("{/") + } + + /** Substitute all template strings in a given command. + + Substitutes `{workspace_path}` with the value of `workspace_path`. + Substitutes `{\JSON pointer}` with the value of the JSON pointer for the given directory. + + # Errors + + * `Err(row::JSONPointerNotFound)` when a JSON pointer named in `command` is not present + in the values for the given directory. + * `Err(row::InvalidTemplate)` when an unexpected name appears between `{` and `}`. + */ + fn substitute(&self, command: &str, directory: &Path) -> Result { + let replacement = |caps: &Captures| -> Result { + println!("Matching {}", &caps[0]); + match &caps[0] { + "{workspace_path}" => Ok(shell_quote::Bash::quote( + self.workspace_path + .to_str() + .ok_or_else(|| Error::NonUTF8DirectoryName(self.workspace_path.into()))?, + )), + "{directory}" => { + Ok(shell_quote::Bash::quote(directory.to_str().ok_or_else( + || Error::NonUTF8DirectoryName(self.workspace_path.into()), + )?)) + } + template if template.starts_with("{/") || template == "{}" => { + let pointer = caps[1].into(); + let value = self + .directory_values + .get(directory) + .ok_or_else(|| Error::DirectoryNotFound(directory.into()))? + .pointer(pointer) + .ok_or_else(|| { + Error::JSONPointerNotFound(directory.into(), pointer.to_string()) + })?; + + match value { + // Value::to_string puts extra double quotes around JSON strings, + // extract the string itself. + Value::String(s) => Ok(>::quote(s)), + _ => Ok(shell_quote::Bash::quote(&value.to_string())), + } + } + _ => Err(Error::InvalidTemplate( + self.action.name().into(), + caps[0].into(), + )), + } + }; + + let regex = Regex::new(r"\{([^\}]*)\}").expect("valid regular expression"); + replace_all(®ex, command, replacement) + } } /// The `Bash` scheduler constructs bash scripts and executes them with `bash`. @@ -334,9 +426,33 @@ impl ActiveJobs for ActiveBashJobs { } } +/** Fallible `replace_all`. + +From [the regex documentation]. + +[the regex documentation]: https://docs.rs/regex/latest/regex/struct.Regex.html#fallibility +*/ +fn replace_all( + re: &Regex, + haystack: &str, + replacement: impl Fn(&Captures) -> Result, +) -> Result { + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for caps in re.captures_iter(haystack) { + let m = caps.get(0).unwrap(); + new.push_str(&haystack[last_match..m.start()]); + new.push_str(&replacement(&caps)?); + last_match = m.end(); + } + new.push_str(&haystack[last_match..]); + Ok(new) +} + #[cfg(test)] mod tests { use super::*; + use serde_json::json; use serial_test::parallel; use speedate::Duration; @@ -607,13 +723,13 @@ mod tests { println!("{script}"); - assert!(script.contains("export ACTION_CLUSTER=\"cluster\"\n")); - assert!(script.contains("export ACTION_NAME=\"action\"\n")); - assert!(script.contains("export ACTION_PROCESSES=\"6\"\n")); - assert!(script.contains("export ACTION_WALLTIME_IN_MINUTES=\"4\"\n")); - assert!(script.contains("export ACTION_PROCESSES_PER_DIRECTORY=\"2\"\n")); - assert!(script.contains("export ACTION_THREADS_PER_PROCESS=\"4\"\n")); - assert!(script.contains("export ACTION_GPUS_PER_PROCESS=\"1\"\n")); + assert!(script.contains("export ACTION_CLUSTER=cluster\n")); + assert!(script.contains("export ACTION_NAME=action\n")); + assert!(script.contains("export ACTION_PROCESSES=6\n")); + assert!(script.contains("export ACTION_WALLTIME_IN_MINUTES=4\n")); + assert!(script.contains("export ACTION_PROCESSES_PER_DIRECTORY=2\n")); + assert!(script.contains("export ACTION_THREADS_PER_PROCESS=4\n")); + assert!(script.contains("export ACTION_GPUS_PER_PROCESS=1\n")); } #[test] @@ -640,10 +756,10 @@ mod tests { println!("{script}"); - assert!(script.contains("export ACTION_CLUSTER=\"cluster\"\n")); - assert!(script.contains("export ACTION_NAME=\"action\"\n")); - assert!(script.contains("export ACTION_PROCESSES=\"10\"\n")); - assert!(script.contains("export ACTION_WALLTIME_IN_MINUTES=\"3\"\n")); + assert!(script.contains("export ACTION_CLUSTER=cluster\n")); + assert!(script.contains("export ACTION_NAME=action\n")); + assert!(script.contains("export ACTION_PROCESSES=10\n")); + assert!(script.contains("export ACTION_WALLTIME_IN_MINUTES=3\n")); assert!(!script.contains("export ACTION_PROCESSES_PER_DIRECTORY")); assert!(!script.contains("export ACTION_THREADS_PER_PROCESS")); assert!(!script.contains("export ACTION_GPUS_PER_PROCESS")); @@ -708,4 +824,192 @@ mod tests { assert!(matches!(result, Err(Error::TooManyProcessLaunchers(_)))); } + + #[test] + #[parallel] + fn invalid_template_without_pointer() { + let (mut action, directories, launchers) = setup(); + action.command = Some(r"command {directory} {invalid}".to_string()); + let workspace_path = PathBuf::from("workspace_path/test"); + let directory_values = HashMap::new(); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(!builder.contains_json_pointer()); + + let result = builder.build(); + + assert!(matches!(result, Err(Error::InvalidTemplate(_, _)))); + } + + #[test] + #[parallel] + fn invalid_template_with_pointer() { + let (mut action, directories, launchers) = setup(); + action.command = Some(r"command {directory} {invalid} {/pointer}".to_string()); + let workspace_path = PathBuf::from("workspace_path/test"); + let directory_values = HashMap::new(); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(builder.contains_json_pointer()); + + let result = builder.build(); + + assert!(matches!(result, Err(Error::InvalidTemplate(_, _)))); + } + + #[test] + #[parallel] + fn workspace_path_without_pointer() { + let (mut action, directories, launchers) = setup(); + action.command = Some(r"command {directory} {workspace_path}".to_string()); + let workspace_path = PathBuf::from("test/path"); + let directory_values = HashMap::new(); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(!builder.contains_json_pointer()); + + let script = builder.build().expect("valid script"); + + println!("{script}"); + + assert!(script.contains("export ACTION_WORKSPACE_PATH=test/path\n")); + assert!(script.contains("command $directory test/path")); + + // Test again with a path that requires escaping + let workspace_path = PathBuf::from("test $path"); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(!builder.contains_json_pointer()); + + let script = builder.build().expect("valid script"); + + println!("{script}"); + + assert!(script.contains("export ACTION_WORKSPACE_PATH=$'test $path'\n")); + assert!(script.contains("command $directory $'test $path'")); + } + + #[test] + #[parallel] + fn workspace_path_with_directories() { + let (mut action, directories, launchers) = setup(); + action.command = Some(r"command {directories} {workspace_path}".to_string()); + let workspace_path = PathBuf::from("test_path"); + let directory_values = HashMap::new(); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(!builder.contains_json_pointer()); + + let script = builder.build().expect("valid script"); + + println!("{script}"); + + assert!(script.contains("export ACTION_WORKSPACE_PATH=test_path\n")); + assert!(script.contains(r#"command "${directories[@]}" test_path"#)); + + // Test again with a path that requires escaping + let workspace_path = PathBuf::from("test $path"); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(!builder.contains_json_pointer()); + + let script = builder.build().expect("valid script"); + + println!("{script}"); + + assert!(script.contains("export ACTION_WORKSPACE_PATH=$'test $path'\n")); + assert!(script.contains(r#"command "${directories[@]}" $'test $path'"#)); + } + + #[test] + #[parallel] + fn workspace_path_with_json_pointers() { + let (mut action, directories, launchers) = setup(); + action.command = Some( + r"command {directory} {workspace_path} {/value} {/name} {/valid} {/array} {}" + .to_string(), + ); + let workspace_path = PathBuf::from("test $path"); + let mut directory_values = HashMap::new(); + directory_values.insert( + PathBuf::from("a"), + json!({"value": 1, "name": "directory_a", "valid": true, "array": [1,2,3]}), + ); + directory_values.insert( + PathBuf::from("b"), + json!({"value": 5, "name": "directory_b", "valid": false, "array": [4,5,6]}), + ); + directory_values.insert( + PathBuf::from("c"), + json!({"value": 7, "name": "directory_c", "valid": null, "array": [7,8,9]}), + ); + + let builder = BashScriptBuilder::new( + "cluster", + &action, + &directories, + &workspace_path, + &directory_values, + &launchers, + ); + + assert!(builder.contains_json_pointer()); + + let script = builder.build().expect("valid script"); + + println!("{script}"); + + assert!(script.contains("export ACTION_WORKSPACE_PATH=$'test $path'\n")); + assert!(script.contains("command a $'test $path' 1 directory_a true $'[1,2,3]'")); + assert!(script.contains("command b $'test $path' 5 directory_b false $'[4,5,6]'")); + assert!(script.contains("command c $'test $path' 7 directory_c null $'[7,8,9]'")); + } } From 8072f9e1e9d6ca5b7495e5941b7579ef10e766e8 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 12:12:43 -0400 Subject: [PATCH 4/9] Document action.command template parameters. --- doc/src/workflow/action/index.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/doc/src/workflow/action/index.md b/doc/src/workflow/action/index.md index 96b1fa4..e6a9a45 100644 --- a/doc/src/workflow/action/index.md +++ b/doc/src/workflow/action/index.md @@ -65,6 +65,27 @@ or chain the steps with `&&`. For example: command = "echo Message && python action.py {directory}" ``` +### Template parameters + +`action.command` will expand any template parameter contained within curly braces: +`{template_parameter}`. + +* `{directory}` and `{directories}` are described above. +* `{workspace_path}` will be replaced with the _relative_ path from the project root + (the directory containing `workflow.toml`) to the currently selected workspace. +* `{/JSON pointer}` will be replaced by a portion of the directory's value referenced + by the given [JSON pointer]. Must be used with `{directory}`. +* `{}` will be replaced by the entire directory value formatted in JSON as a single + command line argument. Must be used with `{directory}` +* All other template parameters are invalid. + +For example: +```toml +command = "application -p {/pressure} -s {/seed} -o {workspace_path}/{directory}/out" +``` + +[JSON pointer]: ../../guide/concepts/json-pointers.md + ## launchers `action.launchers`: **array** of **strings** - The launchers to apply when executing a From d15ccc1b359fabd89d9fbe6a3bbf355b39813131 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 14:37:31 -0400 Subject: [PATCH 5/9] Document template parameters in the tutorial Reorganize the tutorial to introduce values before groups. Demonstrate using values with command template parameters. --- doc/src/SUMMARY.md | 1 + doc/src/guide/tutorial/.gitignore | 1 + doc/src/guide/tutorial/group-workflow1.toml | 2 - doc/src/guide/tutorial/group.md | 51 ++++--------- doc/src/guide/tutorial/value-workflow.toml | 10 +++ doc/src/guide/tutorial/value.md | 85 +++++++++++++++++++++ doc/src/guide/tutorial/value.sh | 19 +++++ 7 files changed, 132 insertions(+), 37 deletions(-) delete mode 100644 doc/src/guide/tutorial/group-workflow1.toml create mode 100644 doc/src/guide/tutorial/value-workflow.toml create mode 100644 doc/src/guide/tutorial/value.md create mode 100644 doc/src/guide/tutorial/value.sh diff --git a/doc/src/SUMMARY.md b/doc/src/SUMMARY.md index 1d632de..9ae989f 100644 --- a/doc/src/SUMMARY.md +++ b/doc/src/SUMMARY.md @@ -8,6 +8,7 @@ - [Tutorial](guide/tutorial/index.md) - [Hello, workflow!](guide/tutorial/hello.md) - [Managing multiple actions](guide/tutorial/multiple.md) + - [Assigning values to directories](guide/tutorial/value.md) - [Grouping directories](guide/tutorial/group.md) - [Submitting jobs manually](guide/tutorial/scheduler.md) - [Requesting resources with row](guide/tutorial/resources.md) diff --git a/doc/src/guide/tutorial/.gitignore b/doc/src/guide/tutorial/.gitignore index e44ccab..45c211d 100644 --- a/doc/src/guide/tutorial/.gitignore +++ b/doc/src/guide/tutorial/.gitignore @@ -1,2 +1,3 @@ /hello-workflow /group-workflow +/value-workflow diff --git a/doc/src/guide/tutorial/group-workflow1.toml b/doc/src/guide/tutorial/group-workflow1.toml deleted file mode 100644 index 5b4a65d..0000000 --- a/doc/src/guide/tutorial/group-workflow1.toml +++ /dev/null @@ -1,2 +0,0 @@ -[workspace] -value_file = "value.json" diff --git a/doc/src/guide/tutorial/group.md b/doc/src/guide/tutorial/group.md index 791e695..8faf81e 100644 --- a/doc/src/guide/tutorial/group.md +++ b/doc/src/guide/tutorial/group.md @@ -2,49 +2,30 @@ ## Overview -This section shows how you can assign a **value** to each directory and use that -**value** to form **groups** of directories. Each **job** executes an action's command -on a **group** of directories. - -## Directory values - -So far, this tutorial has demonstrated small toy examples. In practice, any workflow -that you need to execute on a cluster likely has hundreds or thousands of directories - -each with different parameters. You could try to encode these parameters into the -directory names, but *please don't* - it quickly becomes unmanageable. Instead, you -should include a [JSON](https://www.json.org) file in each directory that identifies -its **value**. - -> Note: For pedagogical reasons, this next code block manually creates directory names -> and value files. In practice, you will likely find [signac](../python/signac.md) more -> convenient to work with - it will create the JSON files and directories for you with -> a cleaner syntax. This tutorial will cover **row** ↔ **signac** interoperation in a -> later section. - -Create a new workflow project and place JSON files in each directory: +This section shows how you can use **values** to form **groups** of directories. Each +**job** executes an action's command on a **group** of directories. + +## Initialize a workspace with values + +To demonstrate the capabilities of **groups**, create a workspace with multiple types of +values. The following script follows the same process used in the previous section: + ```bash {{#include group.sh:init}} ``` -The JSON files must all have the same name. Instruct **row** to read these files -with the `workspace.value_file` key in `workflow.toml`: -```toml -{{#include group-workflow1.toml}} -``` +As mentioned previously, `echo` is used here to create a _minimal_ script that you can +execute to follow along. For any serious production work you will likely find [signac] +more convenient to work with. -Once you create a directory with a **value** file, that value **MUST NOT CHANGE**. Think -of it this way: The results of your computations (the final contents of the directory) -are a mathematical *function* of the **value**. When you want to know the results for -another value, *create a new directory with that value!*. **row** assumes this data -model and [caches](../concepts/cache.md) all value files so that it does not need to -read thousands of files every time you execute a **row** command. +[signac]: ../python/signac.md ## Grouping by value -Now that your workspace directories have **values**, you can use those to form -**groups**. Every action in your workflow operates on **groups**. Add entries to the -`action.group.include` array in an action to select which directories to include by -**value**. To see how this works, replace the contents of `workflow.toml` with: +You can use **values** those to form **groups** of **directories**. Every action in +your workflow operates on **groups**. Add entries to the `action.group.include` array +in an action to select which directories to include by **value**. To see how this works, +replace the contents of `workflow.toml` with: ```toml {{#include group-workflow2.toml}} diff --git a/doc/src/guide/tutorial/value-workflow.toml b/doc/src/guide/tutorial/value-workflow.toml new file mode 100644 index 0000000..9306cce --- /dev/null +++ b/doc/src/guide/tutorial/value-workflow.toml @@ -0,0 +1,10 @@ +# ANCHOR: workspace +[workspace] +value_file = "value.json" +# ANCHOR_END: workspace + +# ANCHOR: action +[[action]] +name = "show" +command = 'echo {directory}, seed: {/seed}, pressure: {/pressure}' +# ANCHOR_END: action diff --git a/doc/src/guide/tutorial/value.md b/doc/src/guide/tutorial/value.md new file mode 100644 index 0000000..01c8611 --- /dev/null +++ b/doc/src/guide/tutorial/value.md @@ -0,0 +1,85 @@ +# Assigning values to directories + +## Overview + +This section shows how you can assign a **value** to each directory and use a command +**template** to access portions of that value when submitting **actions**. + +## Directory values + +So far, this tutorial has demonstrated small toy examples. In practice, any workflow +that you need to execute on a cluster likely has hundreds or thousands of directories - +each with different parameters. You could try to encode these parameters into the +directory names, but *please don't* - it quickly becomes unmanageable. Instead, you +should include a [JSON] file in each directory that identifies its **value**. + +[JSON]: https://www.json.org + +> Note: For pedagogical reasons, this next code block manually creates directory names +> and value files. In practice, you will likely find [signac] more +> convenient to work with - it will create the JSON files and directories for you with +> a cleaner syntax. This tutorial will cover **row** ↔ **signac** interoperation in a +> later section. + +[signac]: ../python/signac.md + +Create a new workflow project and place JSON files in each directory: +```bash +{{#include value.sh:init}} +``` +The JSON files must all have the same name. Instruct **row** to read these files +with the `workspace.value_file` key in `workflow.toml`: + +```toml +{{#include value-workflow.toml:workspace}} +``` + +Once you create a directory with a **value** file, that value **MUST NOT CHANGE**. Think +of it this way: The results of your computations (the final contents of the directory) +are a mathematical *function* of the **value**. When you want to know the results for +another value, *create a new directory with that value!*. **row** assumes this data +model and [caches] all value files so that it does not need to read thousands of files +every time you execute a **row** command. + +[caches]: ../concepts/cache.md + +## Passing values to commands + +Now that your workspace directories have **values**, you can pass them to your +commands using **template parameters**. You have already used one template parameter: +`{directory}`. Each **template parameter** name is surrounded by curly braces. + +[JSON] files store a (possibly nested) key/value mapping. Use a [*JSON pointer*] to +reference a portion of the directory's value by placing the [*JSON pointer*] between +curly braces. Add the following section to `workflow.toml` that uses **template +parameters** in the action's `command`: + +```toml +{{#include value-workflow.toml:action}} +``` + +[*JSON pointer*]: ../concepts/json-pointers.md + +Execute the following (and answer yes at the prompt): +```bash +{{#include value.sh:submit}} +``` + +You should see: +```plaintext +directory1, seed: 0, pressure: 1.5 +directory2, seed: 1, pressure: 1.5 +directory3, seed: 0, pressure: 2.1 +directory4, seed: 1, pressure: 2.1 +``` + +Consider how you would use this for your own workflows. For example: +```toml +command = './application -s {/seed} -p {/pressure} -o workspace/{directory}/out' +``` + +# Next steps + +You have now assigned **values** to each **directory** in the workspace and learned +how you can use these **values** with **template parameters** in the **command**. The +next section will show you how to use **values** to form **groups**. diff --git a/doc/src/guide/tutorial/value.sh b/doc/src/guide/tutorial/value.sh new file mode 100644 index 0000000..34fc832 --- /dev/null +++ b/doc/src/guide/tutorial/value.sh @@ -0,0 +1,19 @@ + +# ANCHOR: init +row init value-workflow +cd value-workflow/workspace + +mkdir directory1 && echo '{"seed": 0, "pressure": 1.5}' > directory1/value.json +mkdir directory2 && echo '{"seed": 1, "pressure": 1.5}' > directory2/value.json +mkdir directory3 && echo '{"seed": 0, "pressure": 2.1}' > directory3/value.json +mkdir directory4 && echo '{"seed": 1, "pressure": 2.1}' > directory4/value.json + +# ANCHOR_END: init + +cd .. + +cp ../value-workflow.toml workflow.toml + +# ANCHOR: submit +row submit +# ANCHOR_END: submit From e8a09401e298340c943e0c6cece094c2ea9ed3e5 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 14:37:37 -0400 Subject: [PATCH 6/9] Remove debug print. --- src/scheduler/bash.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scheduler/bash.rs b/src/scheduler/bash.rs index f9a5cb0..7bae31b 100644 --- a/src/scheduler/bash.rs +++ b/src/scheduler/bash.rs @@ -280,7 +280,6 @@ done */ fn substitute(&self, command: &str, directory: &Path) -> Result { let replacement = |caps: &Captures| -> Result { - println!("Matching {}", &caps[0]); match &caps[0] { "{workspace_path}" => Ok(shell_quote::Bash::quote( self.workspace_path From 27465b29a29c7f31474ff8ef8404ac45a9035794 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 14:48:21 -0400 Subject: [PATCH 7/9] Update third party licenses. --- THIRDPARTY.yaml | 209 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) diff --git a/THIRDPARTY.yaml b/THIRDPARTY.yaml index 1d80ed8..0b75873 100644 --- a/THIRDPARTY.yaml +++ b/THIRDPARTY.yaml @@ -7064,6 +7064,215 @@ third_party_libraries: See the License for the specific language governing permissions and limitations under the License. +- package_name: shell-quote + package_version: 0.7.1 + repository: https://github.com/allenap/shell-quote + license: Apache-2.0 + licenses: + - license: Apache-2.0 + text: |2 + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. - package_name: signal-hook package_version: 0.3.17 repository: https://github.com/vorner/signal-hook From a03bcc0114fd00091360e4b8ffda4a0c5532ed2d Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 14:48:41 -0400 Subject: [PATCH 8/9] pre-commit --- doc/src/guide/tutorial/value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/guide/tutorial/value.md b/doc/src/guide/tutorial/value.md index 01c8611..16020dd 100644 --- a/doc/src/guide/tutorial/value.md +++ b/doc/src/guide/tutorial/value.md @@ -49,7 +49,7 @@ Now that your workspace directories have **values**, you can pass them to your commands using **template parameters**. You have already used one template parameter: `{directory}`. Each **template parameter** name is surrounded by curly braces. -[JSON] files store a (possibly nested) key/value mapping. Use a [*JSON pointer*] to +[JSON] files store a (possibly nested) key/value mapping. Use a [*JSON pointer*] to reference a portion of the directory's value by placing the [*JSON pointer*] between curly braces. Add the following section to `workflow.toml` that uses **template parameters** in the action's `command`: From d63f5a0ceb697643bfc191f760cb3ea60cf67b0c Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 23 Oct 2024 14:56:06 -0400 Subject: [PATCH 9/9] Update release notes. --- doc/src/release-notes.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doc/src/release-notes.md b/doc/src/release-notes.md index b036726..a63f60d 100644 --- a/doc/src/release-notes.md +++ b/doc/src/release-notes.md @@ -1,5 +1,23 @@ # Release notes +## 0.4.0 (not yet released) + +*Highlights:* + +*Added:* + +* In job scripts, set the environment variable `ACTION_WORKSPACE_PATH` to the _relative_ + path to the current workspace. +* `{workspace_path}` template parameter in `action.command` - replaced with the + _relative_ path to the current workspace. +* `{/JSON pointer}` template parameter in `action.command` - replaced with the portion + of the directory's value referenced by the given JSON pointer. + +*Fixed:* + +* All user-provided content (directories, action names, cluster names, and values) are + properly escaped in the bash script output. + ## 0.3.1 (2024-10-04) *Changed:*