From e6290215019e8002de5c9892907ce8bdc09f070e Mon Sep 17 00:00:00 2001
From: Ville-Pekka Juntunen <ville-pekka.juntunen@unikie.com>
Date: Fri, 20 Sep 2024 12:24:43 +0300
Subject: [PATCH] Add ghaf-parallel-pipelines

Add ghaf-prallel-pipeline to test
hw tests parallel with parallel builds

Add ghaf-parallel-hw-test pipeline
to run target tests on dedicated agent-service
for each target device

Add own parallel hw test function to utils.groovy

Signed-off-by: Ville-Pekka Juntunen <ville-pekka.juntunen@unikie.com>
---
 ghaf-parallel-hw-test.groovy  | 294 ++++++++++++++++++++++++++++++++++
 ghaf-parallel-pipeline.groovy | 145 +++++++++++++++++
 utils.groovy                  |  62 ++++++-
 3 files changed, 499 insertions(+), 2 deletions(-)
 create mode 100644 ghaf-parallel-hw-test.groovy
 create mode 100644 ghaf-parallel-pipeline.groovy

diff --git a/ghaf-parallel-hw-test.groovy b/ghaf-parallel-hw-test.groovy
new file mode 100644
index 0000000..943fe5a
--- /dev/null
+++ b/ghaf-parallel-hw-test.groovy
@@ -0,0 +1,294 @@
+#!/usr/bin/env groovy
+
+// SPDX-FileCopyrightText: 2022-2024 TII (SSRC) and the Ghaf contributors
+// SPDX-License-Identifier: Apache-2.0
+
+////////////////////////////////////////////////////////////////////////////////
+
+def REPO_URL = 'https://github.com/tiiuae/ci-test-automation/'
+def DEF_LABEL = 'testagent'
+def TMP_IMG_DIR = 'image'
+def CONF_FILE_PATH = '/etc/jenkins/test_config.json'
+
+////////////////////////////////////////////////////////////////////////////////
+
+def run_cmd(String cmd) {
+  // Run cmd returning stdout
+  return sh(script: cmd, returnStdout:true).trim()
+}
+
+def get_test_conf_property(String file_path, String device, String property) {
+  // Get the requested device property data from test_config.json file
+  def device_data = readJSON file: file_path
+  property_data = "${device_data['addresses'][device][property]}"
+  println "Got device '${device}' property '${property}' value: '${property_data}'"
+  return property_data
+}
+
+def ghaf_robot_test(String testname='boot') {
+  if (!env.DEVICE_TAG) {
+    sh "echo 'DEVICE_TAG not set'; exit 1"
+  }
+  if (!env.DEVICE_NAME) {
+    sh "echo 'DEVICE_NAME not set'; exit 1"
+  }
+  if (testname == 'turnoff') {
+    env.INCLUDE_TEST_TAGS = "${testname}"
+  } else {
+    env.INCLUDE_TEST_TAGS = "${testname}AND${env.DEVICE_TAG}"
+  }
+  // TODO: do we really need credentials to access the target devices?
+  // Target devices are connected to the testagent, which itself is
+  // only available over a private network. What is the risk
+  // we are protecting against by having additional authentication
+  // for the test devices?
+  // The current configuration requires additional manual configuration
+  // on the jenkins UI to add the following secrets:
+  withCredentials([
+    string(credentialsId: 'testagent-dut-pass', variable: 'DUT_PASS'),
+    string(credentialsId: 'testagent-plug-pass', variable: 'PLUG_PASS'),
+    string(credentialsId: 'testagent-switch-token', variable: 'SW_TOKEN'),
+    string(credentialsId: 'testagent-switch-secret', variable: 'SW_SECRET'),
+    ]) {
+    dir("Robot-Framework/test-suites") {
+      sh 'rm -f *.png output.xml report.html log.html'
+      // On failure, continue the pipeline execution
+      try {
+        // Pass the secrets to the shell as environment variables, as we
+        // don't want Groovy to interpolate them. Similary, we pass
+        // other variables as environment variables to shell.
+        // Ref: https://www.jenkins.io/doc/book/pipeline/jenkinsfile/#string-interpolation
+        sh '''
+          nix run .#ghaf-robot -- \
+            -v DEVICE:$DEVICE_NAME \
+            -v DEVICE_TYPE:$DEVICE_TAG \
+            -v LOGIN:ghaf \
+            -v PASSWORD:$DUT_PASS \
+            -v PLUG_USERNAME:ville-pekka.juntunen@unikie.com \
+            -v PLUG_PASSWORD:$PLUG_PASS \
+            -v SWITCH_TOKEN:$SW_TOKEN \
+            -v SWITCH_SECRET:$SW_SECRET \
+            -v BUILD_ID:${BUILD_NUMBER} \
+            -i $INCLUDE_TEST_TAGS .
+        '''
+        if (testname == 'boot') {
+          // Set an environment variable to indicate boot test passed
+          env.BOOT_PASSED = 'true'
+        }
+      } catch (Exception e) {
+        currentBuild.result = "FAILURE"
+        unstable("FAILED '${testname}': ${e.toString()}")
+      } finally {
+        // Move the test output (if any) to a subdirectory
+        sh """
+          rm -fr $testname; mkdir -p $testname
+          mv -f *.png output.xml report.html log.html $testname/ || true
+        """
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+pipeline {
+  agent { label "${params.getOrDefault('LABEL', DEF_LABEL)}" }
+  options { timestamps () }
+  stages {
+    stage('Checkout') {
+      steps {
+        checkout scmGit(
+          branches: [[name: 'switchbot']],
+          extensions: [cleanBeforeCheckout()],
+          userRemoteConfigs: [[url: REPO_URL]]
+        )
+      }
+    }
+    stage('Setup') {
+      steps {
+        script {
+          env.TEST_CONFIG_DIR = 'Robot-Framework/config'
+          if(!params.getOrDefault('TARGET', null)) {
+            println "Missing TARGET parameter"
+            sh "exit 1"
+          }
+          println "Using TARGET: ${params.TARGET}"
+          sh """
+            mkdir -p ${TEST_CONFIG_DIR}
+            rm -f ${TEST_CONFIG_DIR}/*.json
+            ln -sv ${CONF_FILE_PATH} ${TEST_CONFIG_DIR}
+            echo { \\\"Job\\\": \\\"${params.TARGET}\\\" } > ${TEST_CONFIG_DIR}/${BUILD_NUMBER}.json
+            ls -la ${TEST_CONFIG_DIR}
+          """
+          if(!params.containsKey('DESC')) {
+            println "Missing DESC parameter, skip setting description"
+          } else {
+            currentBuild.description = "${params.DESC}"
+          }
+          env.TESTSET = params.getOrDefault('TESTSET', '_boot_')
+          println "Using TESTSET: ${env.TESTSET}"
+        }
+      }
+    }
+    stage('Image download') {
+      steps {
+        script {
+          if(!params.containsKey('IMG_URL')) {
+            println "Missing IMG_URL parameter"
+            sh "exit 1"
+          }
+          sh "rm -fr ${TMP_IMG_DIR}"
+          // Wget occasionally fails due to a failure in name lookup. Below is a
+          // hack to force re-try a few times before aborting. Wget options, such
+          // as --tries, --waitretry, --retry-connrefused, etc. do not help in case
+          // the failure is due to an issue in name resolution which is considered
+          // a fatal error. Therefore, we need to add the below retry loop.
+          // TODO: remove the below re-try loop when test network DNS works
+          // reliably.
+          sh """
+            retry=1
+            max_retry=3
+            while ! wget -nv --show-progress --progress=dot:giga -P ${TMP_IMG_DIR} ${params.IMG_URL};
+            do
+              if (( \$retry >= \$max_retry )); then
+                echo "wget failed after \$retry retries"
+                exit 1
+              fi
+              retry=\$(( \$retry + 1 ))
+              sleep 5
+            done
+          """
+          img_relpath = run_cmd("find ${TMP_IMG_DIR} -type f -print -quit | grep .")
+          println "Downloaded image to workspace: ${img_relpath}"
+          // Uncompress, keeping only the decompressed image file
+          if(img_relpath.endsWith("zst")) {
+            sh "zstd -dfv ${img_relpath} && rm ${img_relpath}"
+          }
+          sh "ls -la ${TMP_IMG_DIR}"
+        }
+      }
+    }
+    stage('Flash') {
+      steps {
+        script {
+          if(!params.getOrDefault('DEVICE_CONFIG_NAME', null)) {
+            println "Missing DEVICE_CONFIG_NAME parameter"
+            sh "exit 1"
+          }
+          // Determine the device name
+          if(params.DEVICE_CONFIG_NAME == "orin-agx") {
+            env.DEVICE_NAME = 'OrinAGX1'
+          } else if(params.DEVICE_CONFIG_NAME == "orin-nx") {
+            env.DEVICE_NAME = 'OrinNX1'
+          } else if(params.DEVICE_CONFIG_NAME == "lenovo-x1") {
+            env.DEVICE_NAME = 'LenovoX1-1'
+          } else if(params.DEVICE_CONFIG_NAME == "nuc") {
+            env.DEVICE_NAME = 'NUC1'
+          } else if(params.DEVICE_CONFIG_NAME == "riscv") {
+            env.DEVICE_NAME = 'Polarfire1'
+          } else {
+            println "Error: unsupported device config '${params.DEVICE_CONFIG_NAME}'"
+            sh "exit 1"
+          }
+          // Determine mount commands
+          if(params.DEVICE_CONFIG_NAME == "riscv") {
+            muxport = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'usb_sd_mux_port')
+            dgrep = 'sdmux'
+            mount_cmd = "/run/wrappers/bin/sudo usbsdmux ${muxport} host; sleep 10"
+            unmount_cmd = "/run/wrappers/bin/sudo usbsdmux ${muxport} dut"
+          } else {
+            serial = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'usbhub_serial')
+            //dgrep = 'PSSD'
+            mount_cmd = "/run/wrappers/bin/sudo AcronameHubCLI -u 0 -s ${serial}; sleep 10"
+            unmount_cmd = "/run/wrappers/bin/sudo AcronameHubCLI -u 1 -s ${serial}"
+          }
+          env.DEVICE_TAG = params.DEVICE_CONFIG_NAME
+          // Mount the target disk
+          sh "${mount_cmd}"
+          // Read the device name
+          dev = get_test_conf_property(CONF_FILE_PATH, env.DEVICE_NAME, 'ext_drive_by-id')
+          //dev = run_cmd("lsblk -o model,name | grep '${dgrep}' | rev | cut -d ' ' -f 1 | rev | grep .")
+          println "Using device '$dev'"
+          // Wipe possible ZFS leftovers, more details here:
+          // https://github.com/tiiuae/ghaf/blob/454b18bc/packages/installer/ghaf-installer.sh#L75
+          // TODO: use ghaf flashing scripts or installers?
+          if(params.DEVICE_CONFIG_NAME == "lenovo-x1") {
+            echo "Wiping filesystem..."
+            SECTOR = 512
+            MIB_TO_SECTORS = 20480
+            // Disk size in 512-byte sectors
+            SECTORS = sh(script: "/run/wrappers/bin/sudo blockdev --getsz /dev/disk/by-id/${dev}", returnStdout: true).trim()
+            // Unmount possible mounted filesystems
+            sh "sync; /run/wrappers/bin/sudo umount -q /dev/disk/by-id/${dev}* || true"
+            // Wipe first 10MiB of disk
+            sh "/run/wrappers/bin/sudo dd if=/dev/zero of=/dev/disk/by-id/${dev} bs=${SECTOR} count=${MIB_TO_SECTORS} conv=fsync status=none"
+            // Wipe last 10MiB of disk
+            sh "/run/wrappers/bin/sudo dd if=/dev/zero of=/dev/disk/by-id/${dev} bs=${SECTOR} count=${MIB_TO_SECTORS} seek=\$(( ${SECTORS} - ${MIB_TO_SECTORS} )) conv=fsync status=none"
+          }
+          // Write the image
+          img_relpath = run_cmd("find ${TMP_IMG_DIR} -type f -print -quit | grep .")
+          println "Using image '$img_relpath'"
+          sh "/run/wrappers/bin/sudo dd if=${img_relpath} of=/dev/disk/by-id/${dev} bs=1M status=progress conv=fsync"
+          // Unmount
+          sh "${unmount_cmd}"
+        }
+      }
+    }
+    stage('Boot test') {
+      when { expression { env.TESTSET.contains('_boot_')} }
+      steps {
+        script {
+          env.BOOT_PASSED = 'false'
+          ghaf_robot_test('boot')
+          println "Boot test passed: ${env.BOOT_PASSED}"
+        }
+      }
+    }
+    stage('Bat test') {
+      when { expression { env.BOOT_PASSED == 'true' && env.TESTSET.contains('_bat_')} }
+      steps {
+        script {
+          ghaf_robot_test('bat')
+        }
+      }
+    }
+    stage('Perf test') {
+      when { expression { env.BOOT_PASSED == 'true' && env.TESTSET.contains('_perf_')} }
+      steps {
+        script {
+          ghaf_robot_test('performance')
+        }
+      }
+    }
+    stage('Turn off') {
+      steps {
+        script {
+          ghaf_robot_test('turnoff')
+        }
+      }
+    }
+  }
+  post {
+    always {
+      // Archive Robot-Framework results as artifacts
+      archiveArtifacts allowEmptyArchive: true, artifacts: 'Robot-Framework/test-suites/**/*.html, Robot-Framework/test-suites/**/*.xml, Robot-Framework/test-suites/**/*.png'
+      // Publish all results under Robot-Framework/test-suites subfolders
+      step(
+        [$class: 'RobotPublisher',
+          archiveDirName: 'robot-plugin',
+          outputPath: 'Robot-Framework/test-suites',
+          outputFileName: '**/output.xml',
+          otherFiles: '**/*.png',
+          disableArchiveOutput: false,
+          reportFileName: '**/report.html',
+          logFileName: '**/log.html',
+          passThreshold: 0,
+          unstableThreshold: 0,
+          onlyCritical: true,
+        ]
+      )
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/ghaf-parallel-pipeline.groovy b/ghaf-parallel-pipeline.groovy
new file mode 100644
index 0000000..a1ef696
--- /dev/null
+++ b/ghaf-parallel-pipeline.groovy
@@ -0,0 +1,145 @@
+#!/usr/bin/env groovy
+
+// SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII)
+//
+// SPDX-License-Identifier: Apache-2.0
+
+def REPO_URL = 'https://github.com/tiiuae/ghaf/'
+def WORKDIR  = 'ghaf'
+
+properties([
+  githubProjectProperty(displayName: '', projectUrlStr: REPO_URL),
+])
+
+// Ghaf targets to build
+// Must match target names defined in #hydraJobs
+def targets = [
+  [ target: "generic-x86_64-debug.x86_64-linux",
+    hwtest_device: "nuc"
+  ],
+  [ target: "lenovo-x1-carbon-gen11-debug.x86_64-linux",
+    hwtest_device: "lenovo-x1"
+  ],
+  [ target: "microchip-icicle-kit-debug-from-x86_64",
+    hwtest_device: "riscv"
+  ],
+  [ target: "nvidia-jetson-orin-agx-debug.aarch64-linux",
+    hwtest_device: "orin-agx"
+  ],
+  [ target: "nvidia-jetson-orin-agx-debug-from-x86_64.x86_64-linux",
+    hwtest_device: "orin-agx"
+  ],
+  [ target: "nvidia-jetson-orin-nx-debug.aarch64-linux",
+    hwtest_device: "orin-nx"
+  ],
+  [ target: "nvidia-jetson-orin-nx-debug-from-x86_64.x86_64-linux",
+    hwtest_device: "orin-nx"
+  ],
+]
+
+// Utils module will be loaded in the first pipeline stage
+def utils = null
+
+// Container for the parallel build stages
+def target_jobs = [:]
+
+pipeline {
+  agent { label 'built-in' }
+  triggers {
+    pollSCM '0 23 * * *'
+  }
+  options {
+    timestamps ()
+    buildDiscarder(logRotator(numToKeepStr: '100'))
+  }
+  stages {
+    stage('Checkout') {
+      steps {
+        script { utils = load "utils.groovy" }
+        dir(WORKDIR) {
+          checkout scmGit(
+            branches: [[name: 'main']],
+            extensions: [cleanBeforeCheckout()],
+            userRemoteConfigs: [[url: REPO_URL]]
+          )
+          script {
+            env.TARGET_REPO = sh(script: 'git remote get-url origin', returnStdout: true).trim()
+            env.TARGET_COMMIT = sh(script: 'git rev-parse HEAD', returnStdout: true).trim()
+            env.ARTIFACTS_REMOTE_PATH = "${env.JOB_NAME}/build_${env.BUILD_ID}-commit_${env.TARGET_COMMIT}"
+          }
+        }
+      }
+    }
+
+    stage('Evaluate') {
+      steps {
+        dir(WORKDIR) {
+          script {
+            // Which attribute of the flake to evaluate for building
+            // Target names must be direct children of this attribute
+            def flakeAttr = ".#hydraJobs"
+
+            // nix-eval-jobs is used to evaluate the given flake attribute, and output target information into jobs.json
+            sh "nix run github:nix-community/nix-eval-jobs -- --gc-roots-dir gcroots --flake ${flakeAttr} --force-recurse > jobs.json"
+            // jobs.json is parsed using jq. target's name and derivation path are appended as space separated row into jobs.txt
+            sh "nix run nixpkgs#jq -- -r '.attr + \" \" + .drvPath' < jobs.json > jobs.txt"
+
+            targets.each {
+              def target = it['target']
+
+              // row that matches this target is grepped from jobs.txt, extracting the pre-evaluated derivation path
+              def drvPath = sh (script: "cat jobs.txt | grep ${target} | cut -d ' ' -f 2", returnStdout: true).trim()
+
+              target_jobs[target] = {
+                stage("Build ${target}") {
+                  def opts = ""
+                  if (it['hwtest_device'] != null) {
+                    opts = "--out-link archive/${target}"
+                  } else {
+                    opts = "--no-link"
+                  }
+                  try {
+                    if (drvPath) {
+                      sh "nix build -L ${drvPath}\\^* ${opts}"
+                    } else {
+                      error("Target \"${target}\" was not found in ${flakeAttr}")
+                    }
+                  } catch (InterruptedException e) {
+                    throw e
+                  } catch (Exception e) {
+                    unstable("FAILED: ${target}")
+                    currentBuild.result = "FAILURE"
+                    println "Error: ${e.toString()}"
+                  }
+                }
+
+                if (it['hwtest_device'] != null) {
+                  stage("Archive ${target}") {
+                    script {
+                      utils.archive_artifacts("archive", target)
+                    }
+                  }
+                }
+
+                if (it['hwtest_device'] != null) {
+                  stage("Test ${target}") {
+                    utils.ghaf_parallel_hw_test(target, it['hwtest_device'])
+                  }
+                }
+
+              }
+            }
+          }
+        }
+      }
+    }
+
+    stage('Parallel build targets') {
+      steps {
+        script {
+          parallel target_jobs
+        }
+      }
+    }
+  }
+}
diff --git a/utils.groovy b/utils.groovy
index b970603..7c94202 100644
--- a/utils.groovy
+++ b/utils.groovy
@@ -29,7 +29,7 @@ def run_rclone(String opts) {
   """
 }
 
-def archive_artifacts(String subdir) {
+def archive_artifacts(String subdir, String target="") {
   if (!subdir) {
     println "Warning: skipping archive, subdir not set"
     return
@@ -39,7 +39,7 @@ def archive_artifacts(String subdir) {
     println "Warning: skipping archive, ARTIFACTS_REMOTE_PATH not set"
     return
   }
-  run_rclone("copy -L ${subdir}/ :webdav:/${env.ARTIFACTS_REMOTE_PATH}/")
+  run_rclone("copy -L ${subdir}/${target} :webdav:/${env.ARTIFACTS_REMOTE_PATH}/${target}")
   // Add a link to Artifacts on the build description if it isn't added yet
   href = "/artifacts/${env.ARTIFACTS_REMOTE_PATH}/"
   artifacts_anchor = "<a href=\"${href}\">📦 Artifacts</a>"
@@ -246,6 +246,64 @@ def ghaf_hw_test(String flakeref, String device_config, String testset='_boot_')
   archive_artifacts("ghaf-hw-test")
 }
 
+def ghaf_parallel_hw_test(String flakeref, String device_config, String testset='_boot_') {
+  testagent_nodes = nodesByLabel(label: "testagent_$device_config", offline: false)
+  if (!testagent_nodes) {
+    println "Warning: Skipping HW test '$flakeref', no test agents online"
+    unstable("No test agents online")
+    return
+  }
+  if (!env.ARTIFACTS_REMOTE_PATH) {
+    println "Warning: skipping HW test '$flakeref', ARTIFACTS_REMOTE_PATH not set"
+    return
+  }
+  if (!env.JENKINS_URL) {
+    println "Warning: skipping HW test '$flakeref', JENKINS_URL not set"
+    return
+  }
+  // Compose the image URL; testagent will need this URL to download the image
+  imgdir = find_img_relpath(flakeref, 'archive')
+  remote_path = "artifacts/${env.ARTIFACTS_REMOTE_PATH}"
+  img_url = "${env.JENKINS_URL}/${remote_path}/${imgdir}"
+  build_url = "${env.JENKINS_URL}/job/${env.JOB_NAME}/${env.BUILD_ID}"
+  build_href = "<a href=\"${build_url}\">${env.JOB_NAME}#${env.BUILD_ID}</a>"
+  flakeref_trimmed = "${flakeref_trim(flakeref)}"
+  description = "Triggered by ${build_href}<br>(${flakeref_trimmed})"
+  // Trigger a build in 'ghaf-parallel-hw-test' pipeline.
+  // 'build' step is documented in https://plugins.jenkins.io/pipeline-build-step/
+  job = build(
+    job: "ghaf-parallel-hw-test",
+    propagate: false,
+    parameters: [
+      string(name: "LABEL", value: "testagent_$device_config"),
+      string(name: "DEVICE_CONFIG_NAME", value: "$device_config"),
+      string(name: "IMG_URL", value: "$img_url"),
+      string(name: "DESC", value: "$description"),
+      string(name: "TESTSET", value: "$testset"),
+      string(name: "TARGET", value: "$flakeref_trimmed"),
+    ],
+    wait: true,
+  )
+  println "ghaf-parallel-hw-test result (${device_config}:${testset}): ${job.result}"
+  // If the test job failed, mark the current step unstable and set
+  // the final build result failed, but continue the pipeline execution.
+  if (job.result != "SUCCESS") {
+    unstable("FAILED: ${device_config} ${testset}")
+    currentBuild.result = "FAILURE"
+    // Add a link to failed test job(s) on the calling pipeline
+    test_href = "<a href=\"${job.absoluteUrl}\">⛔ ${flakeref_trimmed}</a>"
+    currentBuild.description = "${currentBuild.description}<br>${test_href}"
+  }
+  // Copy test results from agent to controller to 'test-results' directory
+  copyArtifacts(
+      projectName: "ghaf-parallel-hw-test",
+      selector: specific("${job.number}"),
+      target: "ghaf-parallel-hw-test/${flakeref_trimmed}/test-results",
+  )
+  // Archive the test results
+  archive_artifacts("ghaf-parallel-hw-test", flakeref_trimmed)
+}
+
 return this
 
 ////////////////////////////////////////////////////////////////////////////////