Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI improvements #100

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
17 changes: 2 additions & 15 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,8 @@ jobs:
token: ${{ secrets.CLONE_LEGEND_METADATA }}
path: ${{ env.LEGEND_METADATA }}

- name: Get dependencies and install legend-dataflow
run: |
python -m pip install --upgrade uv
python -m uv pip install --upgrade .[runprod]

- name: Set the PRODENV variable
run: |
echo "PRODENV=$(realpath $GITHUB_WORKSPACE/..)" >> $GITHUB_ENV

- name: run workflows in dry-run mode
run: |
snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-daq.gen
snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-raw.gen
snakemake --workflow-profile workflow/profiles/lngs -n all-*-evt.gen
snakemake --workflow-profile workflow/profiles/lngs -n all-*-skm.gen
- name: Run data production tests
run: ./tests/runprod/run-all.sh

test-coverage:
name: Calculate and upload test coverage
Expand Down
17 changes: 17 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
codecov:
require_ci_to_pass: true

coverage:
status:
project:
default:
enabled: no
patch:
default:
enabled: no
changes:
default:
enabled: no

github_checks:
annotations: false
31 changes: 31 additions & 0 deletions tests/runprod/conftest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

# IMPORTANT: this script must be *sourced* from the legend-dataflow directory

_prod_cycle="$(realpath .)"

function get_dataflow_config_value() {
python -c "import dbetto; print(dbetto.AttrsDict(dbetto.utils.load_dict('${_prod_cycle}/dataflow-config.yaml')).${1})" \
| sed "s|\$_|${_prod_cycle}|g"
}

run_test_command() {
printf "\033[32m%s\033[0m\n" "INFO: running command: $*"

output=$("$@" 2>&1)
status=$?

if [ $status -ne 0 ]; then
printf "\033[31m%s\033[0m\n" "vvvvvv ERROR: command failed with status $status vvvvvv"
echo "$output"
printf "\033[31m%s\033[0m\n" "^^^^^^ ERROR: command failed with status $status ^^^^^^"
fi

return $status
}


export -f get_dataflow_config_value run_test_command

PRODENV="$(realpath ..)"
export PRODENV
13 changes: 13 additions & 0 deletions tests/runprod/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

# IMPORTANT: this script must be executed from the legend-dataflow directory

printf "\033[32m%s\033[0m\n" "DEBUG: setting up test environment"

PRODENV="$(realpath ..)"
export PRODENV

python -m pip --quiet install --upgrade pip wheel setuptools
python -m pip --quiet install --upgrade '.[runprod]'

dataprod -v install --remove --system bare -- dataflow-config.yaml
10 changes: 10 additions & 0 deletions tests/runprod/run-all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# IMPORTANT: this script must be executed from the legend-dataflow directory

./tests/runprod/install.sh

for test in tests/runprod/test-*.sh; do
printf "\033[32m%s\033[0m\n" "INFO: running test $test"
./"$test"
done
42 changes: 42 additions & 0 deletions tests/runprod/test-raw.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

# IMPORTANT: this script must be executed from the legend-dataflow directory

# shellcheck disable=SC1091
source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"

sandbox=$(get_dataflow_config_value paths.sandbox_path)
mkdir -p "${sandbox}"

(
cd "${sandbox}" || exit 1
touch \
l200-p03-r000-cal-20230311T235840Z.orca \
l200-p03-r001-cal-20230317T211819Z.orca \
l200-p03-r002-cal-20230324T161401Z.orca \
l200-p04-r000-cal-20230414T215158Z.orca \
l200-p04-r001-cal-20230421T131817Z.orca \
l200-p03-r000-phy-20230312T043356Z.orca \
l200-p03-r001-phy-20230318T015140Z.orca \
l200-p03-r002-phy-20230324T205907Z.orca \
l200-p04-r000-phy-20230415T033517Z.orca \
l200-p04-r001-phy-20230421T174901Z.orca \
l200-p13-r006-acs-20241221T150307Z.fcio \
l200-p13-r006-anc-20241221T150249Z.fcio \
l200-p13-r002-anp-20241217T094846Z.fcio
)

# FIXME: --touch does not do what I thought. need to add this functionality to
# the future plugin
_smk_opts=(
--forcerun
--touch
--config system=bare
--workflow-profile workflow/profiles/lngs-build-raw
)

for tier in daq raw; do
run_test_command snakemake "${_smk_opts[@]}" "all-*-${tier}.gen" || exit 1
done

rm -rf "${sandbox}"
45 changes: 45 additions & 0 deletions tests/runprod/test-skm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash

# IMPORTANT: this script must be executed from the legend-dataflow directory

# shellcheck disable=SC1091
source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"

rawdir=$(get_dataflow_config_value paths.tier_raw)
mkdir -p "${rawdir}" || exit 1

function mkdir_n_touch() {
mkdir -p "$(dirname "${1}")" || return 1
touch "${1}" || return 1
}

rawfiles=(
phy/p04/r001/l200-p04-r001-phy-20230421T174901Z-tier_raw.lh5
phy/p04/r000/l200-p04-r000-phy-20230415T033517Z-tier_raw.lh5
phy/p03/r001/l200-p03-r001-phy-20230318T015140Z-tier_raw.lh5
phy/p03/r000/l200-p03-r000-phy-20230312T043356Z-tier_raw.lh5
phy/p03/r002/l200-p03-r002-phy-20230324T205907Z-tier_raw.lh5
cal/p04/r001/l200-p04-r001-cal-20230421T131817Z-tier_raw.lh5
cal/p04/r000/l200-p04-r000-cal-20230414T215158Z-tier_raw.lh5
cal/p03/r001/l200-p03-r001-cal-20230317T211819Z-tier_raw.lh5
cal/p03/r000/l200-p03-r000-cal-20230311T235840Z-tier_raw.lh5
cal/p03/r002/l200-p03-r002-cal-20230324T161401Z-tier_raw.lh5
anp/p13/r002/l200-p13-r002-anp-20241217T094846Z-tier_raw.lh5
anc/p13/r006/l200-p13-r006-anc-20241221T150249Z-tier_raw.lh5
acs/p13/r006/l200-p13-r006-acs-20241221T150307Z-tier_raw.lh5
gipert marked this conversation as resolved.
Show resolved Hide resolved
)

(
cd "${rawdir}" || exit 1
for file in "${rawfiles[@]}"; do
mkdir_n_touch "$file"
done
)

_smk_opts=(
--touch
--config system=bare
--workflow-profile workflow/profiles/lngs
)

run_test_command snakemake "${_smk_opts[@]}" "all-*-evt.gen" || exit 1
7 changes: 4 additions & 3 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,11 @@ localrules:
onstart:
print("INFO: starting workflow")
# Make sure some packages are initialized before we begin to avoid race conditions
for pkg in ["dspeed", "lgdo", "matplotlib"]:
shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")
if not workflow.touch:
for pkg in ["dspeed", "lgdo", "matplotlib"]:
shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")

# Log parameter catalogs in validity files
# Log parameter catalogs in validity files
hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
if hit_par_cat_file.is_file():
hit_par_cat_file.unlink()
Expand Down
23 changes: 17 additions & 6 deletions workflow/Snakefile-build-raw
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ onstart:
print("INFO: initializing workflow")

# Make sure some packages are initialized before we send jobs to avoid race conditions
shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'")
if not workflow.touch:
shell(
execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'"
)

raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
if raw_par_cat_file.is_file():
Expand Down Expand Up @@ -87,16 +90,24 @@ rule gen_filelist:


rule sort_data:
"""
This rules moves the daq data from the unsorted sandbox dir
to the sorted dirs under generated
"""Move DAQ data from sandbox to organized folder.

This rules moves the DAQ data from the unsorted sandbox directory to the
correct location in the `tier_raw` folder.
"""
input:
patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
patt.get_pattern_tier_daq_unsorted(config),
output:
patt.get_pattern_tier_daq(config, extension="fcio"),
patt.get_pattern_tier_daq(config),
shell:
"mv {input} {output}"


use rule sort_data as sort_data_fcio with:
input:
patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
output:
patt.get_pattern_tier_daq(config, extension="fcio"),


# vim: filetype=snakemake
19 changes: 13 additions & 6 deletions workflow/src/legenddataflow/execenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def execenv_prefix(
cmdline = []
cmdenv = {}
if "execenv" in config and "env" in config.execenv:
cmdenv = config.execenv.env
cmdenv |= config.execenv.env

if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv:
cmdline = shlex.split(config.execenv.cmd)
Expand Down Expand Up @@ -137,7 +137,12 @@ def dataprod() -> None:
"config_file", help="production cycle configuration file"
)
parser_install.add_argument(
"--system", help="system running on", default="local", type=str, required=False
"-s",
"--system",
help="system running on",
default="bare",
type=str,
required=False,
)
parser_install.add_argument(
"-r",
Expand Down Expand Up @@ -222,7 +227,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
msg = "running: " + _execenv2str(cmd_expr, cmd_env)
log.debug(msg)

subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs)
subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True, **kwargs)

cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False)
# HACK: get the full path to this python interpreter in case there is no execenv prefix
Expand All @@ -242,12 +247,12 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
uv_expr = [*cmd_prefix, "uv", "--version"]
except (subprocess.CalledProcessError, FileNotFoundError):
# we'll use uv from the virtualenv (installed below)
uv_expr = [*python_venv, "-m", "uv"]
uv_expr = [*python_venv, "-m", "uv", "--quiet"]

# configure venv
if has_uv:
# if uv is available, just use it to create the venv
cmd_expr = [*cmd_prefix, "uv", "venv", path_install]
cmd_expr = [*cmd_prefix, "uv", "--quiet", "venv", path_install]
else:
# otherwise use python-venv
cmd_expr = [*cmd_prefix, python, "-m", "venv", path_install]
Expand All @@ -260,6 +265,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
*python_venv,
"-m",
"pip",
"--quiet",
"--no-cache-dir",
"install",
"--upgrade",
Expand All @@ -274,6 +280,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
*python_venv,
"-m",
"pip",
"--quiet",
"--no-cache-dir",
"install",
"--no-warn-script-location",
Expand Down Expand Up @@ -319,4 +326,4 @@ def cmdexec(args) -> None:
msg = "running: " + _execenv2str(cmd_expr, cmd_env)
log.debug(msg)

subprocess.run(cmd_expr, env=cmd_env, check=True)
subprocess.run(cmd_expr, env=os.environ | cmd_env, check=True)
9 changes: 6 additions & 3 deletions workflow/src/legenddataflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,12 @@ def subst_vars(


def subst_vars_in_snakemake_config(workflow, config):
config_filename = workflow.overwrite_configfiles[
0
] # ToDo: Better way of handling this?
if len(workflow.overwrite_configfiles) == 0:
msg = "configfile not set!"
raise RuntimeError(msg)

config_filename = workflow.overwrite_configfiles[0]

subst_vars(
config,
var_values={"_": Path(config_filename).parent},
Expand Down
Loading