legend-exp · gipert · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -68,21 +68,8 @@ jobs:
           token: ${{ secrets.CLONE_LEGEND_METADATA }}
           path: ${{ env.LEGEND_METADATA }}
 
-      - name: Get dependencies and install legend-dataflow
-        run: |
-          python -m pip install --upgrade uv
-          python -m uv pip install --upgrade .[runprod]
-
-      - name: Set the PRODENV variable
-        run: |
-          echo "PRODENV=$(realpath $GITHUB_WORKSPACE/..)" >> $GITHUB_ENV
-
-      - name: run workflows in dry-run mode
-        run: |
-          snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-daq.gen
-          snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-raw.gen
-          snakemake --workflow-profile workflow/profiles/lngs -n all-*-evt.gen
-          snakemake --workflow-profile workflow/profiles/lngs -n all-*-skm.gen
+      - name: Run data production tests
+        run: ./tests/runprod/run-all.sh
 
   test-coverage:
     name: Calculate and upload test coverage

diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,17 @@
+codecov:
+  require_ci_to_pass: true
+
+coverage:
+  status:
+    project:
+      default:
+        enabled: no
+    patch:
+      default:
+        enabled: no
+    changes:
+      default:
+        enabled: no
+
+github_checks:
+  annotations: false
diff --git a/tests/runprod/conftest.sh b/tests/runprod/conftest.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# IMPORTANT: this script must be *sourced* from the legend-dataflow directory
+
+_prod_cycle="$(realpath .)"
+
+function get_dataflow_config_value() {
+    python -c "import dbetto; print(dbetto.AttrsDict(dbetto.utils.load_dict('${_prod_cycle}/dataflow-config.yaml')).${1})" \
+        | sed "s|\$_|${_prod_cycle}|g"
+}
+
+run_test_command() {
+    printf "\033[32m%s\033[0m\n" "INFO: running command: $*"
+
+    output=$("$@" 2>&1)
+    status=$?
+
+    if [ $status -ne 0 ]; then
+        printf "\033[31m%s\033[0m\n" "vvvvvv ERROR: command failed with status $status vvvvvv"
+        echo "$output"
+        printf "\033[31m%s\033[0m\n" "^^^^^^ ERROR: command failed with status $status ^^^^^^"
+    fi
+
+    return $status
+}
+
+
+export -f get_dataflow_config_value run_test_command
+
+PRODENV="$(realpath ..)"
+export PRODENV
diff --git a/tests/runprod/install.sh b/tests/runprod/install.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# IMPORTANT: this script must be executed from the legend-dataflow directory
+
+printf "\033[32m%s\033[0m\n" "DEBUG: setting up test environment"
+
+PRODENV="$(realpath ..)"
+export PRODENV
+
+python -m pip --quiet install --upgrade pip wheel setuptools
+python -m pip --quiet install --upgrade '.[runprod]'
+
+dataprod -v install --remove --system bare -- dataflow-config.yaml
diff --git a/tests/runprod/run-all.sh b/tests/runprod/run-all.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# IMPORTANT: this script must be executed from the legend-dataflow directory
+
+./tests/runprod/install.sh
+
+for test in tests/runprod/test-*.sh; do
+    printf "\033[32m%s\033[0m\n" "INFO: running test $test"
+    ./"$test"
+done
diff --git a/tests/runprod/test-raw.sh b/tests/runprod/test-raw.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# IMPORTANT: this script must be executed from the legend-dataflow directory
+
+# shellcheck disable=SC1091
+source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"
+
+sandbox=$(get_dataflow_config_value paths.sandbox_path)
+mkdir -p "${sandbox}"
+
+(
+    cd "${sandbox}" || exit 1
+    touch \
+        l200-p03-r000-cal-20230311T235840Z.orca \
+        l200-p03-r001-cal-20230317T211819Z.orca \
+        l200-p03-r002-cal-20230324T161401Z.orca \
+        l200-p04-r000-cal-20230414T215158Z.orca \
+        l200-p04-r001-cal-20230421T131817Z.orca \
+        l200-p03-r000-phy-20230312T043356Z.orca \
+        l200-p03-r001-phy-20230318T015140Z.orca \
+        l200-p03-r002-phy-20230324T205907Z.orca \
+        l200-p04-r000-phy-20230415T033517Z.orca \
+        l200-p04-r001-phy-20230421T174901Z.orca \
+        l200-p13-r006-acs-20241221T150307Z.fcio \
+        l200-p13-r006-anc-20241221T150249Z.fcio \
+        l200-p13-r002-anp-20241217T094846Z.fcio
+)
+
+# FIXME: --touch does not do what I thought. need to add this functionality to
+# the future plugin
+_smk_opts=(
+    --forcerun
+    --touch
+    --config system=bare
+    --workflow-profile workflow/profiles/lngs-build-raw
+)
+
+for tier in daq raw; do
+    run_test_command snakemake "${_smk_opts[@]}" "all-*-${tier}.gen" || exit 1
+done
+
+rm -rf "${sandbox}"
diff --git a/tests/runprod/test-skm.sh b/tests/runprod/test-skm.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# IMPORTANT: this script must be executed from the legend-dataflow directory
+
+# shellcheck disable=SC1091
+source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"
+
+rawdir=$(get_dataflow_config_value paths.tier_raw)
+mkdir -p "${rawdir}" || exit 1
+
+function mkdir_n_touch() {
+    mkdir -p "$(dirname "${1}")" || return 1
+    touch "${1}" || return 1
+}
+
+rawfiles=(
+    phy/p04/r001/l200-p04-r001-phy-20230421T174901Z-tier_raw.lh5
+    phy/p04/r000/l200-p04-r000-phy-20230415T033517Z-tier_raw.lh5
+    phy/p03/r001/l200-p03-r001-phy-20230318T015140Z-tier_raw.lh5
+    phy/p03/r000/l200-p03-r000-phy-20230312T043356Z-tier_raw.lh5
+    phy/p03/r002/l200-p03-r002-phy-20230324T205907Z-tier_raw.lh5
+    cal/p04/r001/l200-p04-r001-cal-20230421T131817Z-tier_raw.lh5
+    cal/p04/r000/l200-p04-r000-cal-20230414T215158Z-tier_raw.lh5
+    cal/p03/r001/l200-p03-r001-cal-20230317T211819Z-tier_raw.lh5
+    cal/p03/r000/l200-p03-r000-cal-20230311T235840Z-tier_raw.lh5
+    cal/p03/r002/l200-p03-r002-cal-20230324T161401Z-tier_raw.lh5
+    anp/p13/r002/l200-p13-r002-anp-20241217T094846Z-tier_raw.lh5
+    anc/p13/r006/l200-p13-r006-anc-20241221T150249Z-tier_raw.lh5
+    acs/p13/r006/l200-p13-r006-acs-20241221T150307Z-tier_raw.lh5
+)
+
+(
+    cd "${rawdir}" || exit 1
+    for file in "${rawfiles[@]}"; do
+        mkdir_n_touch "$file"
+    done
+)
+
+_smk_opts=(
+    --touch
+    --config system=bare
+    --workflow-profile workflow/profiles/lngs
+)
+
+run_test_command snakemake "${_smk_opts[@]}" "all-*-evt.gen" || exit 1
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -80,10 +80,11 @@ localrules:
 onstart:
     print("INFO: starting workflow")
     # Make sure some packages are initialized before we begin to avoid race conditions
-    for pkg in ["dspeed", "lgdo", "matplotlib"]:
-        shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")
+    if not workflow.touch:
+        for pkg in ["dspeed", "lgdo", "matplotlib"]:
+            shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")
 
-        # Log parameter catalogs in validity files
+            # Log parameter catalogs in validity files
     hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
     if hit_par_cat_file.is_file():
         hit_par_cat_file.unlink()

diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
@@ -54,7 +54,10 @@ onstart:
     print("INFO: initializing workflow")
 
     # Make sure some packages are initialized before we send jobs to avoid race conditions
-    shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'")
+    if not workflow.touch:
+        shell(
+            execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'"
+        )
 
     raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
     if raw_par_cat_file.is_file():
@@ -87,16 +90,24 @@ rule gen_filelist:
 
 
 rule sort_data:
-    """
-    This rules moves the daq data from the unsorted sandbox dir
-    to the sorted dirs under generated
+    """Move DAQ data from sandbox to organized folder.
+
+    This rules moves the DAQ data from the unsorted sandbox directory to the
+    correct location in the `tier_raw` folder.
     """
     input:
-        patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
+        patt.get_pattern_tier_daq_unsorted(config),
     output:
-        patt.get_pattern_tier_daq(config, extension="fcio"),
+        patt.get_pattern_tier_daq(config),
     shell:
         "mv {input} {output}"
 
 
+use rule sort_data as sort_data_fcio with:
+    input:
+        patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
+    output:
+        patt.get_pattern_tier_daq(config, extension="fcio"),
+
+
 # vim: filetype=snakemake
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
@@ -53,7 +53,7 @@ def execenv_prefix(
     cmdline = []
     cmdenv = {}
     if "execenv" in config and "env" in config.execenv:
-        cmdenv = config.execenv.env
+        cmdenv |= config.execenv.env
 
     if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv:
         cmdline = shlex.split(config.execenv.cmd)
@@ -137,7 +137,12 @@ def dataprod() -> None:
         "config_file", help="production cycle configuration file"
     )
     parser_install.add_argument(
-        "--system", help="system running on", default="local", type=str, required=False
+        "-s",
+        "--system",
+        help="system running on",
+        default="bare",
+        type=str,
+        required=False,
     )
     parser_install.add_argument(
         "-r",
@@ -222,7 +227,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
         msg = "running: " + _execenv2str(cmd_expr, cmd_env)
         log.debug(msg)
 
-        subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs)
+        subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True, **kwargs)
 
     cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False)
     # HACK: get the full path to this python interpreter in case there is no execenv prefix
@@ -242,12 +247,12 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
         uv_expr = [*cmd_prefix, "uv", "--version"]
     except (subprocess.CalledProcessError, FileNotFoundError):
         # we'll use uv from the virtualenv (installed below)
-        uv_expr = [*python_venv, "-m", "uv"]
+        uv_expr = [*python_venv, "-m", "uv", "--quiet"]
 
     # configure venv
     if has_uv:
         # if uv is available, just use it to create the venv
-        cmd_expr = [*cmd_prefix, "uv", "venv", path_install]
+        cmd_expr = [*cmd_prefix, "uv", "--quiet", "venv", path_install]
     else:
         # otherwise use python-venv
         cmd_expr = [*cmd_prefix, python, "-m", "venv", path_install]
@@ -260,6 +265,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
             *python_venv,
             "-m",
             "pip",
+            "--quiet",
             "--no-cache-dir",
             "install",
             "--upgrade",
@@ -274,6 +280,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
             *python_venv,
             "-m",
             "pip",
+            "--quiet",
             "--no-cache-dir",
             "install",
             "--no-warn-script-location",
@@ -319,4 +326,4 @@ def cmdexec(args) -> None:
     msg = "running: " + _execenv2str(cmd_expr, cmd_env)
     log.debug(msg)
 
-    subprocess.run(cmd_expr, env=cmd_env, check=True)
+    subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True)
diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py
@@ -161,9 +161,12 @@ def subst_vars(
 
 
 def subst_vars_in_snakemake_config(workflow, config):
-    config_filename = workflow.overwrite_configfiles[
-        0
-    ]  # ToDo: Better way of handling this?
+    if len(workflow.overwrite_configfiles) == 0:
+        msg = "configfile not set!"
+        raise RuntimeError(msg)
+
+    config_filename = workflow.overwrite_configfiles[0]
+
     subst_vars(
         config,
         var_values={"_": Path(config_filename).parent},