From e6290215019e8002de5c9892907ce8bdc09f070e Mon Sep 17 00:00:00 2001 From: Ville-Pekka Juntunen Date: Fri, 20 Sep 2024 12:24:43 +0300 Subject: [PATCH] Add ghaf-parallel-pipelines Add ghaf-prallel-pipeline to test hw tests parallel with parallel builds Add ghaf-parallel-hw-test pipeline to run target tests on dedicated agent-service for each target device Add own parallel hw test function to utils.groovy Signed-off-by: Ville-Pekka Juntunen --- ghaf-parallel-hw-test.groovy | 294 ++++++++++++++++++++++++++++++++++ ghaf-parallel-pipeline.groovy | 145 +++++++++++++++++ utils.groovy | 62 ++++++- 3 files changed, 499 insertions(+), 2 deletions(-) create mode 100644 ghaf-parallel-hw-test.groovy create mode 100644 ghaf-parallel-pipeline.groovy diff --git a/ghaf-parallel-hw-test.groovy b/ghaf-parallel-hw-test.groovy new file mode 100644 index 0000000..943fe5a --- /dev/null +++ b/ghaf-parallel-hw-test.groovy @@ -0,0 +1,294 @@ +#!/usr/bin/env groovy + +// SPDX-FileCopyrightText: 2022-2024 TII (SSRC) and the Ghaf contributors +// SPDX-License-Identifier: Apache-2.0 + +//////////////////////////////////////////////////////////////////////////////// + +def REPO_URL = 'https://github.com/tiiuae/ci-test-automation/' +def DEF_LABEL = 'testagent' +def TMP_IMG_DIR = 'image' +def CONF_FILE_PATH = '/etc/jenkins/test_config.json' + +//////////////////////////////////////////////////////////////////////////////// + +def run_cmd(String cmd) { + // Run cmd returning stdout + return sh(script: cmd, returnStdout:true).trim() +} + +def get_test_conf_property(String file_path, String device, String property) { + // Get the requested device property data from test_config.json file + def device_data = readJSON file: file_path + property_data = "${device_data['addresses'][device][property]}" + println "Got device '${device}' property '${property}' value: '${property_data}'" + return property_data +} + +def ghaf_robot_test(String testname='boot') { + if (!env.DEVICE_TAG) { + sh "echo 'DEVICE_TAG not set'; exit 1" + } + if (!env.DEVICE_NAME) { + sh "echo 'DEVICE_NAME not set'; exit 1" + } + if (testname == 'turnoff') { + env.INCLUDE_TEST_TAGS = "${testname}" + } else { + env.INCLUDE_TEST_TAGS = "${testname}AND${env.DEVICE_TAG}" + } + // TODO: do we really need credentials to access the target devices? + // Target devices are connected to the testagent, which itself is + // only available over a private network. What is the risk + // we are protecting against by having additional authentication + // for the test devices? + // The current configuration requires additional manual configuration + // on the jenkins UI to add the following secrets: + withCredentials([ + string(credentialsId: 'testagent-dut-pass', variable: 'DUT_PASS'), + string(credentialsId: 'testagent-plug-pass', variable: 'PLUG_PASS'), + string(credentialsId: 'testagent-switch-token', variable: 'SW_TOKEN'), + string(credentialsId: 'testagent-switch-secret', variable: 'SW_SECRET'), + ]) { + dir("Robot-Framework/test-suites") { + sh 'rm -f *.png output.xml report.html log.html' + // On failure, continue the pipeline execution + try { + // Pass the secrets to the shell as environment variables, as we + // don't want Groovy to interpolate them. Similary, we pass + // other variables as environment variables to shell. + // Ref: https://www.jenkins.io/doc/book/pipeline/jenkinsfile/#string-interpolation + sh ''' + nix run .#ghaf-robot -- \ + -v DEVICE:$DEVICE_NAME \ + -v DEVICE_TYPE:$DEVICE_TAG \ + -v LOGIN:ghaf \ + -v PASSWORD:$DUT_PASS \ + -v PLUG_USERNAME:ville-pekka.juntunen@unikie.com \ + -v PLUG_PASSWORD:$PLUG_PASS \ + -v SWITCH_TOKEN:$SW_TOKEN \ + -v SWITCH_SECRET:$SW_SECRET \ + -v BUILD_ID:${BUILD_NUMBER} \ + -i $INCLUDE_TEST_TAGS . + ''' + if (testname == 'boot') { + // Set an environment variable to indicate boot test passed + env.BOOT_PASSED = 'true' + } + } catch (Exception e) { + currentBuild.result = "FAILURE" + unstable("FAILED '${testname}': ${e.toString()}") + } finally { + // Move the test output (if any) to a subdirectory + sh """ + rm -fr $testname; mkdir -p $testname + mv -f *.png output.xml report.html log.html $testname/ || true + """ + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +pipeline { + agent { label "${params.getOrDefault('LABEL', DEF_LABEL)}" } + options { timestamps () } + stages { + stage('Checkout') { + steps { + checkout scmGit( + branches: [[name: 'switchbot']], + extensions: [cleanBeforeCheckout()], + userRemoteConfigs: [[url: REPO_URL]] + ) + } + } + stage('Setup') { + steps { + script { + env.TEST_CONFIG_DIR = 'Robot-Framework/config' + if(!params.getOrDefault('TARGET', null)) { + println "Missing TARGET parameter" + sh "exit 1" + } + println "Using TARGET: ${params.TARGET}" + sh """ + mkdir -p ${TEST_CONFIG_DIR} + rm -f ${TEST_CONFIG_DIR}/*.json + ln -sv ${CONF_FILE_PATH} ${TEST_CONFIG_DIR} + echo { \\\"Job\\\": \\\"${params.TARGET}\\\" } > ${TEST_CONFIG_DIR}/${BUILD_NUMBER}.json + ls -la ${TEST_CONFIG_DIR} + """ + if(!params.containsKey('DESC')) { + println "Missing DESC parameter, skip setting description" + } else { + currentBuild.description = "${params.DESC}" + } + env.TESTSET = params.getOrDefault('TESTSET', '_boot_') + println "Using TESTSET: ${env.TESTSET}" + } + } + } + stage('Image download') { + steps { + script { + if(!params.containsKey('IMG_URL')) { + println "Missing IMG_URL parameter" + sh "exit 1" + } + sh "rm -fr ${TMP_IMG_DIR}" + // Wget occasionally fails due to a failure in name lookup. Below is a + // hack to force re-try a few times before aborting. Wget options, such + // as --tries, --waitretry, --retry-connrefused, etc. do not help in case + // the failure is due to an issue in name resolution which is considered + // a fatal error. Therefore, we need to add the below retry loop. + // TODO: remove the below re-try loop when test network DNS works + // reliably. + sh """ + retry=1 + max_retry=3 + while ! wget -nv --show-progress --progress=dot:giga -P ${TMP_IMG_DIR} ${params.IMG_URL}; + do + if (( \$retry >= \$max_retry )); then + echo "wget failed after \$retry retries" + exit 1 + fi + retry=\$(( \$retry + 1 )) + sleep 5 + done + """ + img_relpath = run_cmd("find ${TMP_IMG_DIR} -type f -print -quit | grep .") + println "Downloaded image to workspace: ${img_relpath}" + // Uncompress, keeping only the decompressed image file + if(img_relpath.endsWith("zst")) { + sh "zstd -dfv ${img_relpath} && rm ${img_relpath}" + } + sh "ls -la ${TMP_IMG_DIR}" + } + } + } + stage('Flash') { + steps { + script { + if(!params.getOrDefault('DEVICE_CONFIG_NAME', null)) { + println "Missing DEVICE_CONFIG_NAME parameter" + sh "exit 1" + } + // Determine the device name + if(params.DEVICE_CONFIG_NAME == "orin-agx") { + env.DEVICE_NAME = 'OrinAGX1' + } else if(params.DEVICE_CONFIG_NAME == "orin-nx") { + env.DEVICE_NAME = 'OrinNX1' + } else if(params.DEVICE_CONFIG_NAME == "lenovo-x1") { + env.DEVICE_NAME = 'LenovoX1-1' + } else if(params.DEVICE_CONFIG_NAME == "nuc") { + env.DEVICE_NAME = 'NUC1' + } else if(params.DEVICE_CONFIG_NAME == "riscv") { + env.DEVICE_NAME = 'Polarfire1' + } else { + println "Error: unsupported device config '${params.DEVICE_CONFIG_NAME}'" + sh "exit 1" + } + // Determine mount commands + if(params.DEVICE_CONFIG_NAME == "riscv") { + muxport = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'usb_sd_mux_port') + dgrep = 'sdmux' + mount_cmd = "/run/wrappers/bin/sudo usbsdmux ${muxport} host; sleep 10" + unmount_cmd = "/run/wrappers/bin/sudo usbsdmux ${muxport} dut" + } else { + serial = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'usbhub_serial') + //dgrep = 'PSSD' + mount_cmd = "/run/wrappers/bin/sudo AcronameHubCLI -u 0 -s ${serial}; sleep 10" + unmount_cmd = "/run/wrappers/bin/sudo AcronameHubCLI -u 1 -s ${serial}" + } + env.DEVICE_TAG = params.DEVICE_CONFIG_NAME + // Mount the target disk + sh "${mount_cmd}" + // Read the device name + dev = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'ext_drive_by-id') + //dev = run_cmd("lsblk -o model,name | grep '${dgrep}' | rev | cut -d ' ' -f 1 | rev | grep .") + println "Using device '$dev'" + // Wipe possible ZFS leftovers, more details here: + // https://github.com/tiiuae/ghaf/blob/454b18bc/packages/installer/ghaf-installer.sh#L75 + // TODO: use ghaf flashing scripts or installers? + if(params.DEVICE_CONFIG_NAME == "lenovo-x1") { + echo "Wiping filesystem..." + SECTOR = 512 + MIB_TO_SECTORS = 20480 + // Disk size in 512-byte sectors + SECTORS = sh(script: "/run/wrappers/bin/sudo blockdev --getsz /dev/disk/by-id/${dev}", returnStdout: true).trim() + // Unmount possible mounted filesystems + sh "sync; /run/wrappers/bin/sudo umount -q /dev/disk/by-id/${dev}* || true" + // Wipe first 10MiB of disk + sh "/run/wrappers/bin/sudo dd if=/dev/zero of=/dev/disk/by-id/${dev} bs=${SECTOR} count=${MIB_TO_SECTORS} conv=fsync status=none" + // Wipe last 10MiB of disk + sh "/run/wrappers/bin/sudo dd if=/dev/zero of=/dev/disk/by-id/${dev} bs=${SECTOR} count=${MIB_TO_SECTORS} seek=\$(( ${SECTORS} - ${MIB_TO_SECTORS} )) conv=fsync status=none" + } + // Write the image + img_relpath = run_cmd("find ${TMP_IMG_DIR} -type f -print -quit | grep .") + println "Using image '$img_relpath'" + sh "/run/wrappers/bin/sudo dd if=${img_relpath} of=/dev/disk/by-id/${dev} bs=1M status=progress conv=fsync" + // Unmount + sh "${unmount_cmd}" + } + } + } + stage('Boot test') { + when { expression { env.TESTSET.contains('_boot_')} } + steps { + script { + env.BOOT_PASSED = 'false' + ghaf_robot_test('boot') + println "Boot test passed: ${env.BOOT_PASSED}" + } + } + } + stage('Bat test') { + when { expression { env.BOOT_PASSED == 'true' && env.TESTSET.contains('_bat_')} } + steps { + script { + ghaf_robot_test('bat') + } + } + } + stage('Perf test') { + when { expression { env.BOOT_PASSED == 'true' && env.TESTSET.contains('_perf_')} } + steps { + script { + ghaf_robot_test('performance') + } + } + } + stage('Turn off') { + steps { + script { + ghaf_robot_test('turnoff') + } + } + } + } + post { + always { + // Archive Robot-Framework results as artifacts + archiveArtifacts allowEmptyArchive: true, artifacts: 'Robot-Framework/test-suites/**/*.html, Robot-Framework/test-suites/**/*.xml, Robot-Framework/test-suites/**/*.png' + // Publish all results under Robot-Framework/test-suites subfolders + step( + [$class: 'RobotPublisher', + archiveDirName: 'robot-plugin', + outputPath: 'Robot-Framework/test-suites', + outputFileName: '**/output.xml', + otherFiles: '**/*.png', + disableArchiveOutput: false, + reportFileName: '**/report.html', + logFileName: '**/log.html', + passThreshold: 0, + unstableThreshold: 0, + onlyCritical: true, + ] + ) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/ghaf-parallel-pipeline.groovy b/ghaf-parallel-pipeline.groovy new file mode 100644 index 0000000..a1ef696 --- /dev/null +++ b/ghaf-parallel-pipeline.groovy @@ -0,0 +1,145 @@ +#!/usr/bin/env groovy + +// SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII) +// +// SPDX-License-Identifier: Apache-2.0 + +def REPO_URL = 'https://github.com/tiiuae/ghaf/' +def WORKDIR = 'ghaf' + +properties([ + githubProjectProperty(displayName: '', projectUrlStr: REPO_URL), +]) + +// Ghaf targets to build +// Must match target names defined in #hydraJobs +def targets = [ + [ target: "generic-x86_64-debug.x86_64-linux", + hwtest_device: "nuc" + ], + [ target: "lenovo-x1-carbon-gen11-debug.x86_64-linux", + hwtest_device: "lenovo-x1" + ], + [ target: "microchip-icicle-kit-debug-from-x86_64", + hwtest_device: "riscv" + ], + [ target: "nvidia-jetson-orin-agx-debug.aarch64-linux", + hwtest_device: "orin-agx" + ], + [ target: "nvidia-jetson-orin-agx-debug-from-x86_64.x86_64-linux", + hwtest_device: "orin-agx" + ], + [ target: "nvidia-jetson-orin-nx-debug.aarch64-linux", + hwtest_device: "orin-nx" + ], + [ target: "nvidia-jetson-orin-nx-debug-from-x86_64.x86_64-linux", + hwtest_device: "orin-nx" + ], +] + +// Utils module will be loaded in the first pipeline stage +def utils = null + +// Container for the parallel build stages +def target_jobs = [:] + +pipeline { + agent { label 'built-in' } + triggers { + pollSCM '0 23 * * *' + } + options { + timestamps () + buildDiscarder(logRotator(numToKeepStr: '100')) + } + stages { + stage('Checkout') { + steps { + script { utils = load "utils.groovy" } + dir(WORKDIR) { + checkout scmGit( + branches: [[name: 'main']], + extensions: [cleanBeforeCheckout()], + userRemoteConfigs: [[url: REPO_URL]] + ) + script { + env.TARGET_REPO = sh(script: 'git remote get-url origin', returnStdout: true).trim() + env.TARGET_COMMIT = sh(script: 'git rev-parse HEAD', returnStdout: true).trim() + env.ARTIFACTS_REMOTE_PATH = "${env.JOB_NAME}/build_${env.BUILD_ID}-commit_${env.TARGET_COMMIT}" + } + } + } + } + + stage('Evaluate') { + steps { + dir(WORKDIR) { + script { + // Which attribute of the flake to evaluate for building + // Target names must be direct children of this attribute + def flakeAttr = ".#hydraJobs" + + // nix-eval-jobs is used to evaluate the given flake attribute, and output target information into jobs.json + sh "nix run github:nix-community/nix-eval-jobs -- --gc-roots-dir gcroots --flake ${flakeAttr} --force-recurse > jobs.json" + // jobs.json is parsed using jq. target's name and derivation path are appended as space separated row into jobs.txt + sh "nix run nixpkgs#jq -- -r '.attr + \" \" + .drvPath' < jobs.json > jobs.txt" + + targets.each { + def target = it['target'] + + // row that matches this target is grepped from jobs.txt, extracting the pre-evaluated derivation path + def drvPath = sh (script: "cat jobs.txt | grep ${target} | cut -d ' ' -f 2", returnStdout: true).trim() + + target_jobs[target] = { + stage("Build ${target}") { + def opts = "" + if (it['hwtest_device'] != null) { + opts = "--out-link archive/${target}" + } else { + opts = "--no-link" + } + try { + if (drvPath) { + sh "nix build -L ${drvPath}\\^* ${opts}" + } else { + error("Target \"${target}\" was not found in ${flakeAttr}") + } + } catch (InterruptedException e) { + throw e + } catch (Exception e) { + unstable("FAILED: ${target}") + currentBuild.result = "FAILURE" + println "Error: ${e.toString()}" + } + } + + if (it['hwtest_device'] != null) { + stage("Archive ${target}") { + script { + utils.archive_artifacts("archive", target) + } + } + } + + if (it['hwtest_device'] != null) { + stage("Test ${target}") { + utils.ghaf_parallel_hw_test(target, it['hwtest_device']) + } + } + + } + } + } + } + } + } + + stage('Parallel build targets') { + steps { + script { + parallel target_jobs + } + } + } + } +} diff --git a/utils.groovy b/utils.groovy index b970603..7c94202 100644 --- a/utils.groovy +++ b/utils.groovy @@ -29,7 +29,7 @@ def run_rclone(String opts) { """ } -def archive_artifacts(String subdir) { +def archive_artifacts(String subdir, String target="") { if (!subdir) { println "Warning: skipping archive, subdir not set" return @@ -39,7 +39,7 @@ def archive_artifacts(String subdir) { println "Warning: skipping archive, ARTIFACTS_REMOTE_PATH not set" return } - run_rclone("copy -L ${subdir}/ :webdav:/${env.ARTIFACTS_REMOTE_PATH}/") + run_rclone("copy -L ${subdir}/${target} :webdav:/${env.ARTIFACTS_REMOTE_PATH}/${target}") // Add a link to Artifacts on the build description if it isn't added yet href = "/artifacts/${env.ARTIFACTS_REMOTE_PATH}/" artifacts_anchor = "📦 Artifacts" @@ -246,6 +246,64 @@ def ghaf_hw_test(String flakeref, String device_config, String testset='_boot_') archive_artifacts("ghaf-hw-test") } +def ghaf_parallel_hw_test(String flakeref, String device_config, String testset='_boot_') { + testagent_nodes = nodesByLabel(label: "testagent_$device_config", offline: false) + if (!testagent_nodes) { + println "Warning: Skipping HW test '$flakeref', no test agents online" + unstable("No test agents online") + return + } + if (!env.ARTIFACTS_REMOTE_PATH) { + println "Warning: skipping HW test '$flakeref', ARTIFACTS_REMOTE_PATH not set" + return + } + if (!env.JENKINS_URL) { + println "Warning: skipping HW test '$flakeref', JENKINS_URL not set" + return + } + // Compose the image URL; testagent will need this URL to download the image + imgdir = find_img_relpath(flakeref, 'archive') + remote_path = "artifacts/${env.ARTIFACTS_REMOTE_PATH}" + img_url = "${env.JENKINS_URL}/${remote_path}/${imgdir}" + build_url = "${env.JENKINS_URL}/job/${env.JOB_NAME}/${env.BUILD_ID}" + build_href = "${env.JOB_NAME}#${env.BUILD_ID}" + flakeref_trimmed = "${flakeref_trim(flakeref)}" + description = "Triggered by ${build_href}
(${flakeref_trimmed})" + // Trigger a build in 'ghaf-parallel-hw-test' pipeline. + // 'build' step is documented in https://plugins.jenkins.io/pipeline-build-step/ + job = build( + job: "ghaf-parallel-hw-test", + propagate: false, + parameters: [ + string(name: "LABEL", value: "testagent_$device_config"), + string(name: "DEVICE_CONFIG_NAME", value: "$device_config"), + string(name: "IMG_URL", value: "$img_url"), + string(name: "DESC", value: "$description"), + string(name: "TESTSET", value: "$testset"), + string(name: "TARGET", value: "$flakeref_trimmed"), + ], + wait: true, + ) + println "ghaf-parallel-hw-test result (${device_config}:${testset}): ${job.result}" + // If the test job failed, mark the current step unstable and set + // the final build result failed, but continue the pipeline execution. + if (job.result != "SUCCESS") { + unstable("FAILED: ${device_config} ${testset}") + currentBuild.result = "FAILURE" + // Add a link to failed test job(s) on the calling pipeline + test_href = "â›” ${flakeref_trimmed}" + currentBuild.description = "${currentBuild.description}
${test_href}" + } + // Copy test results from agent to controller to 'test-results' directory + copyArtifacts( + projectName: "ghaf-parallel-hw-test", + selector: specific("${job.number}"), + target: "ghaf-parallel-hw-test/${flakeref_trimmed}/test-results", + ) + // Archive the test results + archive_artifacts("ghaf-parallel-hw-test", flakeref_trimmed) +} + return this ////////////////////////////////////////////////////////////////////////////////