From 70accc0e170761098cd6fa4e0ec325c3d7ed597f Mon Sep 17 00:00:00 2001 From: WXTIM <26465611+wxtim@users.noreply.github.com> Date: Fri, 28 Jun 2024 12:02:53 +0100 Subject: [PATCH] added simplest example --- simplest-useful/bin/get_data.sh | 20 +++++++++++ simplest-useful/bin/process_data.sh | 8 +++++ simplest-useful/flow.cylc | 56 +++++++++++++++++++++++++++++ simplest-useful/simple_script.sh | 8 +++++ 4 files changed, 92 insertions(+) create mode 100755 simplest-useful/bin/get_data.sh create mode 100755 simplest-useful/bin/process_data.sh create mode 100644 simplest-useful/flow.cylc create mode 100755 simplest-useful/simple_script.sh diff --git a/simplest-useful/bin/get_data.sh b/simplest-useful/bin/get_data.sh new file mode 100755 index 0000000..ecf6822 --- /dev/null +++ b/simplest-useful/bin/get_data.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +HEAD="[MY SUPERCOMPUTER] " + +echo "${HEAD}Waiting for resources" +sleep 7 +echo "${HEAD}" + +if [[ -f output ]]; then + rm output +fi + +if [[ $((RANDOM % 4)) == 0 ]]; then + echo "Data Retrieval Succeeded" + touch output + exit 0 +else + echo "Data Retrieval Failed" + exit 1 +fi diff --git a/simplest-useful/bin/process_data.sh b/simplest-useful/bin/process_data.sh new file mode 100755 index 0000000..705c89b --- /dev/null +++ b/simplest-useful/bin/process_data.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cylc message -- "what is the shortest possible timespan for a pretend hpc job?" +if [[ ! -f output ]]; then + echo 'ERROR - NO INPUT DATA!' + exit 1 +fi +sleep 14 diff --git a/simplest-useful/flow.cylc b/simplest-useful/flow.cylc new file mode 100644 index 0000000..f6884fb --- /dev/null +++ b/simplest-useful/flow.cylc @@ -0,0 +1,56 @@ +[meta] + title = Pretty much the simplest use case. + description = """ + ## What is the smallest workflow worth converting to Cylc? + + Consider the case where you want to get some data and + do some number crunching: + + ```bash + #!bin/bash + #@supercomputer --time 300 + #@supercomputer --memory LOTS + #@supercomputer --CPU MANY + + get_data.sh + + process_data.sh + ``` + + Where `get_data.sh` is a data retrieval programme: + + * High IO & Slow + * Low Memory and CPU requirements + * Might be flaky? + + And `process_data.sh` + + * High memory and CPU + + ## Why use Cylc + + #. Don't ask for resources until pre-reqs are done + #. Retry flaky steps + #. Using `execution time limit` means that Cylc knows that + a task has taken too long even if communications are + disrupted. + """ + +[scheduling] + [[graph]] + R1 = get_data => process_data + +[runtime] + [[get_data]] + script = get_data.sh + platform = any_old_server + execution retry delays = 4*PT15M, PT1D + + [[process_data]] + script = process_data.sh + platform = supercomputer + # Cylc converts to #@supercomputer --time 300 + execution time limit = PT5M + [[[directives]]] + --memory LOTS + --CPU MANY diff --git a/simplest-useful/simple_script.sh b/simplest-useful/simple_script.sh new file mode 100755 index 0000000..6b5e7b1 --- /dev/null +++ b/simplest-useful/simple_script.sh @@ -0,0 +1,8 @@ +#!/bin/bash +#@supercomputer --time 300 +#@supercomputer --memory LOTS +#@supercomputer --CPU MANY + +./bin/get_data.sh + +./bin/process_data.sh