diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a581187..558e613 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -68,21 +68,8 @@ jobs: token: ${{ secrets.CLONE_LEGEND_METADATA }} path: ${{ env.LEGEND_METADATA }} - - name: Get dependencies and install legend-dataflow - run: | - python -m pip install --upgrade uv - python -m uv pip install --upgrade .[runprod] - - - name: Set the PRODENV variable - run: | - echo "PRODENV=$(realpath $GITHUB_WORKSPACE/..)" >> $GITHUB_ENV - - - name: run workflows in dry-run mode - run: | - snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-daq.gen - snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-raw.gen - snakemake --workflow-profile workflow/profiles/lngs -n all-*-evt.gen - snakemake --workflow-profile workflow/profiles/lngs -n all-*-skm.gen + - name: Run data production tests + run: ./tests/runprod/run-all.sh test-coverage: name: Calculate and upload test coverage diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..6b72d0a --- /dev/null +++ b/codecov.yml @@ -0,0 +1,17 @@ +codecov: + require_ci_to_pass: true + +coverage: + status: + project: + default: + enabled: no + patch: + default: + enabled: no + changes: + default: + enabled: no + +github_checks: + annotations: false diff --git a/tests/runprod/conftest.sh b/tests/runprod/conftest.sh new file mode 100644 index 0000000..d7747ba --- /dev/null +++ b/tests/runprod/conftest.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# IMPORTANT: this script must be *sourced* from the legend-dataflow directory + +_prod_cycle="$(realpath .)" + +function get_dataflow_config_value() { + python -c "import dbetto; print(dbetto.AttrsDict(dbetto.utils.load_dict('${_prod_cycle}/dataflow-config.yaml')).${1})" \ + | sed "s|\$_|${_prod_cycle}|g" +} + +run_test_command() { + output=$("$@" 2>&1) + status=$? + + if [ $status -ne 0 ]; then + echo "::error::command failed with status $status" + echo "$output" + fi + + return $status +} + + +export -f get_dataflow_config_value run_test_command + +PRODENV="$(realpath ..)" +export PRODENV diff --git a/tests/runprod/install.sh b/tests/runprod/install.sh new file mode 100755 index 0000000..fbe8259 --- /dev/null +++ b/tests/runprod/install.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# IMPORTANT: this script must be executed from the legend-dataflow directory + +echo "::group::setting up test environment" + +PRODENV="$(realpath ..)" +export PRODENV + +python -m pip --quiet install --upgrade pip wheel setuptools +python -m pip --quiet install --upgrade '.[runprod]' + +dataprod -v install --remove --system bare -- dataflow-config.yaml + +echo "::endgroup::" diff --git a/tests/runprod/run-all.sh b/tests/runprod/run-all.sh new file mode 100755 index 0000000..3382525 --- /dev/null +++ b/tests/runprod/run-all.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# IMPORTANT: this script must be executed from the legend-dataflow directory + +./tests/runprod/install.sh + +for test in tests/runprod/test-*.sh; do + echo "::group::test $test" + ./"$test" || exit 1 + echo "::endgroup::" +done diff --git a/tests/runprod/test-raw.sh b/tests/runprod/test-raw.sh new file mode 100755 index 0000000..059bd40 --- /dev/null +++ b/tests/runprod/test-raw.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# IMPORTANT: this script must be executed from the legend-dataflow directory + +# shellcheck disable=SC1091 +source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh" + +sandbox=$(get_dataflow_config_value paths.sandbox_path) +mkdir -p "${sandbox}" + +( + cd "${sandbox}" || exit 1 + touch \ + l200-p03-r000-cal-20230311T235840Z.orca \ + l200-p03-r001-cal-20230317T211819Z.orca \ + l200-p03-r002-cal-20230324T161401Z.orca \ + l200-p04-r000-cal-20230414T215158Z.orca \ + l200-p04-r001-cal-20230421T131817Z.orca \ + l200-p03-r000-phy-20230312T043356Z.orca \ + l200-p03-r001-phy-20230318T015140Z.orca \ + l200-p03-r002-phy-20230324T205907Z.orca \ + l200-p04-r000-phy-20230415T033517Z.orca \ + l200-p04-r001-phy-20230421T174901Z.orca \ + l200-p13-r006-acs-20241221T150307Z.fcio \ + l200-p13-r006-anc-20241221T150249Z.fcio \ + l200-p13-r002-anp-20241217T094846Z.fcio +) + +# FIXME: --touch does not do what I thought. need to add this functionality to +# the future plugin +_smk_opts=( + --forcerun + --touch + --config system=bare + --workflow-profile workflow/profiles/lngs-build-raw +) + +for tier in daq raw; do + run_test_command snakemake "${_smk_opts[@]}" "all-*-${tier}.gen" || exit 1 +done + +rm -rf "${sandbox}" diff --git a/tests/runprod/test-skm.sh b/tests/runprod/test-skm.sh new file mode 100755 index 0000000..7f39de7 --- /dev/null +++ b/tests/runprod/test-skm.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# IMPORTANT: this script must be executed from the legend-dataflow directory + +# shellcheck disable=SC1091 +source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh" + +rawdir=$(get_dataflow_config_value paths.tier_raw) +mkdir -p "${rawdir}" || exit 1 + +function mkdir_n_touch() { + mkdir -p "$(dirname "${1}")" || return 1 + touch "${1}" || return 1 +} + +rawfiles=( + phy/p04/r001/l200-p04-r001-phy-20230421T174901Z-tier_raw.lh5 + phy/p04/r000/l200-p04-r000-phy-20230415T033517Z-tier_raw.lh5 + phy/p03/r001/l200-p03-r001-phy-20230318T015140Z-tier_raw.lh5 + phy/p03/r000/l200-p03-r000-phy-20230312T043356Z-tier_raw.lh5 + phy/p03/r002/l200-p03-r002-phy-20230324T205907Z-tier_raw.lh5 + cal/p04/r001/l200-p04-r001-cal-20230421T131817Z-tier_raw.lh5 + cal/p04/r000/l200-p04-r000-cal-20230414T215158Z-tier_raw.lh5 + cal/p03/r001/l200-p03-r001-cal-20230317T211819Z-tier_raw.lh5 + cal/p03/r000/l200-p03-r000-cal-20230311T235840Z-tier_raw.lh5 + cal/p03/r002/l200-p03-r002-cal-20230324T161401Z-tier_raw.lh5 + anp/p13/r002/l200-p13-r002-anp-20241217T094846Z-tier_raw.lh5 + anc/p13/r006/l200-p13-r006-anc-20241221T150249Z-tier_raw.lh5 + acs/p13/r006/l200-p13-r006-acs-20241221T150307Z-tier_raw.lh5 +) + +( + cd "${rawdir}" || exit 1 + for file in "${rawfiles[@]}"; do + mkdir_n_touch "$file" + done +) + +_smk_opts=( + --touch + --config system=bare + --workflow-profile workflow/profiles/lngs +) + +run_test_command snakemake "${_smk_opts[@]}" "all-*-evt.gen" || exit 1 diff --git a/workflow/Snakefile b/workflow/Snakefile index f64c8f7..c202f44 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -80,10 +80,11 @@ localrules: onstart: print("INFO: starting workflow") # Make sure some packages are initialized before we begin to avoid race conditions - for pkg in ["dspeed", "lgdo", "matplotlib"]: - shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'") + if not workflow.touch: + for pkg in ["dspeed", "lgdo", "matplotlib"]: + shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'") - # Log parameter catalogs in validity files + # Log parameter catalogs in validity files hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml" if hit_par_cat_file.is_file(): hit_par_cat_file.unlink() diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index 31f1e21..eef2752 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -54,7 +54,10 @@ onstart: print("INFO: initializing workflow") # Make sure some packages are initialized before we send jobs to avoid race conditions - shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'") + if not workflow.touch: + shell( + execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'" + ) raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml" if raw_par_cat_file.is_file(): @@ -87,16 +90,24 @@ rule gen_filelist: rule sort_data: - """ - This rules moves the daq data from the unsorted sandbox dir - to the sorted dirs under generated + """Move DAQ data from sandbox to organized folder. + + This rules moves the DAQ data from the unsorted sandbox directory to the + correct location in the `tier_raw` folder. """ input: - patt.get_pattern_tier_daq_unsorted(config, extension="fcio"), + patt.get_pattern_tier_daq_unsorted(config), output: - patt.get_pattern_tier_daq(config, extension="fcio"), + patt.get_pattern_tier_daq(config), shell: "mv {input} {output}" +use rule sort_data as sort_data_fcio with: + input: + patt.get_pattern_tier_daq_unsorted(config, extension="fcio"), + output: + patt.get_pattern_tier_daq(config, extension="fcio"), + + # vim: filetype=snakemake diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 1e48623..a729699 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -15,7 +15,9 @@ from legenddataflow.execenv import execenv_pyexe rule build_ann: input: dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), - pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + pars_file=lambda wildcards: get_input_par_file( + setup=config, wildcards=wildcards, tier="ann", name="cuts" + ), params: timestamp="{timestamp}", datatype="{datatype}", @@ -45,7 +47,9 @@ rule build_ann: rule build_pan: input: dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False), - pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + pars_file=lambda wildcards: get_input_par_file( + setup=config, wildcards=wildcards, tier="ann", name="cuts" + ), params: timestamp="{timestamp}", datatype="{datatype}", diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 663635a..dced365 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -66,16 +66,21 @@ def set_last_rule_name(workflow, new_name): workflow.check_localrules() -def get_input_par_file(wildcards, tier, name): +def get_input_par_file(setup, wildcards, tier, name): + allow_none = setup.get("allow_none", False) par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml" pars_files_overwrite = Catalog.get_files( par_overwrite_file, wildcards.timestamp, + category=wildcards.datatype if hasattr(wildcards, "datatype") else "all", ) for pars_file in pars_files_overwrite: if name in str(pars_file): return Path(patt.par_overwrite_path(config)) / tier / pars_file - raise ValueError(f"Could not find model in {pars_files_overwrite}") + if allow_none or (wildcards.datatype != "phy"): + return [] + else: + raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index e410a80..d391fff 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -214,10 +214,12 @@ rule build_pars_dsp_eopt_geds: rule build_svm_dsp_geds: input: hyperpars=lambda wildcards: get_input_par_file( - wildcards, "dsp", "svm_hyperpars" + setup=config, wildcards=wildcards, tier="dsp", name="svm_hyperpars" ), train_data=lambda wildcards: str( - get_input_par_file(wildcards, "dsp", "svm_hyperpars") + get_input_par_file( + setup=config, wildcards=wildcards, tier="dsp", name="svm_hyperpars" + ) ).replace("hyperpars.yaml", "train.lh5"), params: timestamp="{timestamp}", diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index c343ac9..efb662e 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -26,7 +26,7 @@ rule build_evt: config, wildcards.timestamp, "hit" ), xtalk_matrix=lambda wildcards: get_input_par_file( - tier="evt", wildcards=wildcards, name="xtc" + setup=config, tier="evt", wildcards=wildcards, name="xtc" ), output: get_pattern_tier(config, "evt", check_in_cycle=check_in_cycle), @@ -77,7 +77,7 @@ rule build_pet: config, wildcards.timestamp, "pht" ), xtalk_matrix=lambda wildcards: get_input_par_file( - tier="pet", wildcards=wildcards, name="xtc" + setup=config, tier="pet", wildcards=wildcards, name="xtc" ), output: get_pattern_tier(config, "pet", check_in_cycle=check_in_cycle), diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index d651c1f..e2983bd 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -167,10 +167,12 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: hyperpars=lambda wildcards: get_input_par_file( - wildcards, "psp", "svm_hyperpars" + setup=config, wildcards=wildcards, tier="psp", name="svm_hyperpars" ), train_data=lambda wildcards: str( - get_input_par_file(wildcards, "psp", "svm_hyperpars") + get_input_par_file( + setup=config, wildcards=wildcards, tier="psp", name="svm_hyperpars" + ) ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(config, "psp", "svm", "pkl"), diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index ae61247..8e02f3a 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -53,7 +53,7 @@ def execenv_prefix( cmdline = [] cmdenv = {} if "execenv" in config and "env" in config.execenv: - cmdenv = config.execenv.env + cmdenv |= config.execenv.env if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv: cmdline = shlex.split(config.execenv.cmd) @@ -137,7 +137,12 @@ def dataprod() -> None: "config_file", help="production cycle configuration file" ) parser_install.add_argument( - "--system", help="system running on", default="local", type=str, required=False + "-s", + "--system", + help="system running on", + default="bare", + type=str, + required=False, ) parser_install.add_argument( "-r", @@ -222,7 +227,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): msg = "running: " + _execenv2str(cmd_expr, cmd_env) log.debug(msg) - subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs) + subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True, **kwargs) cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False) # HACK: get the full path to this python interpreter in case there is no execenv prefix @@ -242,12 +247,12 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): uv_expr = [*cmd_prefix, "uv", "--version"] except (subprocess.CalledProcessError, FileNotFoundError): # we'll use uv from the virtualenv (installed below) - uv_expr = [*python_venv, "-m", "uv"] + uv_expr = [*python_venv, "-m", "uv", "--quiet"] # configure venv if has_uv: # if uv is available, just use it to create the venv - cmd_expr = [*cmd_prefix, "uv", "venv", path_install] + cmd_expr = [*cmd_prefix, "uv", "--quiet", "venv", path_install] else: # otherwise use python-venv cmd_expr = [*cmd_prefix, python, "-m", "venv", path_install] @@ -260,6 +265,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): *python_venv, "-m", "pip", + "--quiet", "--no-cache-dir", "install", "--upgrade", @@ -274,6 +280,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): *python_venv, "-m", "pip", + "--quiet", "--no-cache-dir", "install", "--no-warn-script-location", @@ -319,4 +326,4 @@ def cmdexec(args) -> None: msg = "running: " + _execenv2str(cmd_expr, cmd_env) log.debug(msg) - subprocess.run(cmd_expr, env=cmd_env, check=True) + subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True) diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py index 8655e42..dc90ed0 100644 --- a/workflow/src/legenddataflow/utils.py +++ b/workflow/src/legenddataflow/utils.py @@ -161,9 +161,12 @@ def subst_vars( def subst_vars_in_snakemake_config(workflow, config): - config_filename = workflow.overwrite_configfiles[ - 0 - ] # ToDo: Better way of handling this? + if len(workflow.overwrite_configfiles) == 0: + msg = "configfile not set!" + raise RuntimeError(msg) + + config_filename = workflow.overwrite_configfiles[0] + subst_vars( config, var_values={"_": Path(config_filename).parent},