grafana · petewall · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 3, 2024
@@ -47,36 +47,16 @@ jobs:
       - name: Set up Helm
         uses: azure/setup-helm@v4
 
-      - name: Check for cluster config
-        id: check-cluster-config
-        run: |
-          testManifest="charts/k8s-monitoring/tests/integration/${{ matrix.test }}/test-manifest.yaml"
-          clusterName="$(yq -r ".cluster.name // \"test-cluster\"" "${testManifest}")"
-          clusterConfig="$(yq -r ".cluster.config // \"\"" "${testManifest}")"
-          if [ -f "charts/k8s-monitoring/tests/integration/${{ matrix.test }}/${clusterConfig}" ]; then
-            echo "has-cluster-config=true" >> "${GITHUB_OUTPUT}"
-            echo "cluster-config=charts/k8s-monitoring/tests/integration/${{ matrix.test }}/${clusterConfig}" >> "${GITHUB_OUTPUT}"
-          else
-            echo "has-cluster-config=false" >> "${GITHUB_OUTPUT}"
-          fi
-          echo "cluster-name=${clusterName}" >> "${GITHUB_OUTPUT}"
-
-      - name: Create kind cluster
-        if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'false' }}
+      - name: Setup Kind CLI
         uses: helm/kind-action@v1
         with:
-          cluster_name: ${{ steps.check-cluster-config.outputs.cluster-name }}
-          ignore_failed_clean: true
+          install_only: true
 
-      - name: Create kind cluster with special config
-        if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'true' }}
-        uses: helm/kind-action@v1
-        with:
-          cluster_name: ${{ steps.check-cluster-config.outputs.cluster-name }}
-          config: ${{ steps.check-cluster-config.outputs.cluster-config }}
-          ignore_failed_clean: true
+      - name: Setup Flux CLI
+        uses: fluxcd/flux2/action@main
 
       - name: Run test
+        run: ./scripts/run-cluster-test.sh "charts/k8s-monitoring/tests/integration/${{ matrix.test }}"
         env:
-          HEADLESS: "true"
-        run: ./scripts/run-integration-test.sh "charts/k8s-monitoring/tests/integration/${{ matrix.test }}"
+          CREATE_CLUSTER: "true"
+          DELETE_CLUSTER: "true"
@@ -60,21 +60,54 @@ jobs:
       - name: Check for cluster config
         id: check-cluster-config
         run: |
-          if [ -f "charts/k8s-monitoring/tests/integration/${{ matrix.test }}/cluster.yaml" ]; then
-            echo "has-cluster-config=true" >> "${GITHUB_OUTPUT}"
+          if [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/eks-cluster-config.yaml" ]; then
+            echo "cluster-type=eks" >> "${GITHUB_OUTPUT}"
+          elif [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/gke-cluster-config.yaml" ]; then
+            echo "cluster-type=gke" >> "${GITHUB_OUTPUT}"
+          elif [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/gke-autopilot-cluster-config.yaml" ]; then
+            echo "cluster-type=gke" >> "${GITHUB_OUTPUT}"
           else
-            echo "has-cluster-config=false" >> "${GITHUB_OUTPUT}"
+            echo "cluster-type=kind" >> "${GITHUB_OUTPUT}"
           fi
 
-      - name: Create kind cluster
-        if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'false' }}
+      - name: Setup Kind CLI
+        if: ${{ steps.check-cluster-config.outputs.cluster-type == 'kind' }}
         uses: helm/kind-action@v1
+        with:
+          install_only: true
 
-      - name: Create kind cluster with special config
-        if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'true' }}
-        uses: helm/kind-action@v1
+      - name: Setup eksctl CLI
+        if: ${{ steps.check-cluster-config.outputs.cluster-type == 'eks' }}
+        run: |
+          PLATFORM="$(uname -s)_$ARCH"
+          curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$PLATFORM.tar.gz"
+          tar -xzf "eksctl_$PLATFORM.tar.gz" -C /tmp && rm "eksctl_$PLATFORM.tar.gz"
+          sudo mv /tmp/eksctl /usr/local/bin
+        env:
+          ARCH: amd64
+
+      - name: Configure AWS Credentials
+        if: ${{ steps.check-cluster-config.outputs.cluster-type == 'eks' }}
+        uses: aws-actions/configure-aws-credentials@v4
         with:
-          config: charts/k8s-monitoring/tests/integration/${{ matrix.test }}/cluster.yaml
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ap-northeast-2
+
+      - name: Configure GCP Credentials
+        if: ${{ steps.check-cluster-config.outputs.cluster-type == 'gke' }}
+        uses: google-github-actions/auth@v2
+        with:
+          credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT_TOKEN }}'
+
+      - name: Set up Cloud SDK
+        if: ${{ steps.check-cluster-config.outputs.cluster-type == 'gke' }}
+        uses: google-github-actions/setup-gcloud@v2
+        with:
+          install_components: gke-gcloud-auth-plugin
+
+      - name: Setup Flux CLI
+        uses: fluxcd/flux2/action@main
 
       - name: Random number
         id: random-number
@@ -84,13 +117,14 @@ jobs:
           max: 999999
 
       - name: Run test
+        run: ./scripts/run-cluster-test.sh "charts/k8s-monitoring/tests/platform/${{ matrix.test }}"
         env:
-          HEADLESS: "true"
+          CREATE_CLUSTER: "true"
+          DELETE_CLUSTER: "true"
           GRAFANA_CLOUD_FLEET_MGMT_USER: ${{ secrets.GRAFANA_CLOUD_FLEET_MGMT_USER }}
           GRAFANA_CLOUD_FLEET_MGMT_TOKEN: ${{ secrets.GRAFANA_CLOUD_FLEET_MGMT_TOKEN }}
           GRAFANA_CLOUD_METRICS_USERNAME: ${{ secrets.GRAFANA_CLOUD_METRICS_USERNAME }}
           GRAFANA_CLOUD_LOGS_USERNAME: ${{ secrets.GRAFANA_CLOUD_LOGS_USERNAME }}
+          GRAFANA_CLOUD_TRACES_USERNAME: ${{ secrets.GRAFANA_CLOUD_TRACES_USERNAME }}
           GRAFANA_CLOUD_RW_POLICY_TOKEN: ${{ secrets.GRAFANA_CLOUD_RW_POLICY_TOKEN }}
           RANDOM_NUMBER: ${{ steps.random-number.outputs.number }}
-
-        run: ./scripts/run-integration-test.sh "charts/k8s-monitoring/tests/platform/${{ matrix.test }}"
@@ -30,6 +30,7 @@ installing them for a better experience:
 
 -   [helm-docs](https://github.com/norwoodj/helm-docs) - Used for generating Helm chart README.md files.
 -   [helm unittest](https://github.com/helm-unittest/helm-unittest) - Used for executing Helm chart unit tests.
+-   [Flux CLI](https://fluxcd.io/flux/cmd/) - Used for executing Helm chart integration and platform tests.
 -   [shellspec](https://github.com/shellspec/shellspec) - Used for executing some unit tests.
 -   [vendir](https://carvel.dev/vendir/) - Used for downloading Alloy Module
 

@@ -20,6 +20,12 @@ values.schema.json: values.yaml $$(wildcard schema-mods/*)
 .updatecli-%.yaml: Chart.yaml
 	../../scripts/charts-to-updatecli.sh Chart.yaml
 
+build-query-test-image: query-test/Dockerfile query-test/query-test.sh
+	docker build --platform linux/amd64 --tag ghcr.io/grafana/query-test:$(shell yq -r '.version' Chart.yaml) query-test
+
+push-query-test-image:
+	docker push ghcr.io/grafana/query-test:$(shell yq -r '.version' Chart.yaml)
+
 .PHONY: clean
 clean:
 	rm -f README.md values.schema.json $(UPDATECLI_FILES)

@@ -94,7 +94,7 @@ In order to specify different destinations of the same type, you can use multipl
 |-----|------|---------|-------------|
 | image.pullSecrets | list | `[]` | Optional set of image pull secrets. |
 | image.registry | string | `"ghcr.io"` | Test pod image registry. |
-| image.repository | string | `"grafana/k8s-monitoring-test"` | Test pod image repository. |
+| image.repository | string | `"grafana/query-test"` | Test pod image repository. |
 | image.tag | string | `""` | Test pod image tag. Default is the chart version. |
 
 ### Job settings

@@ -0,0 +1,7 @@
+FROM grafana/pyroscope:1.10.0 AS pyroscope
+FROM ubuntu
+
+RUN apt-get update && apt-get install -y bc curl gettext jq && rm -rf /var/lib/apt/lists/*
+COPY --from=pyroscope /usr/bin/profilecli /usr/bin/profilecli
+
+COPY ["query-test.sh", "/usr/bin/"]
@@ -0,0 +1,207 @@
+#!/bin/bash
+
+usage() {
+  echo "USAGE: query-test.sh queries.json"
+  echo "Run a set of queries against Prometheus, Loki, or Tempo"
+  echo
+  echo "Required environment variables:"
+  echo "  If using any PromQL queries:"
+  echo "  PROMETHEUS_URL - The query URL for your Prometheus service (e.g. localhost:9090/api/v1/query)"
+  echo "  PROMETHEUS_USER - The username for running PromQL queries"
+  echo "  PROMETHEUS_PASS - The password for running PromQL queries"
+  echo
+  echo "  If using any LogQL queries:"
+  echo "  LOKI_URL - The query URL for your Loki service (e.g. localhost:9090/api/v1/query)"
+  echo "  LOKI_TENANTID - The tenant ID for running LogQL queries"
+  echo "  LOKI_USER - The username for running LogQL queries"
+  echo "  LOKI_PASS - The password for running LogQL queries"
+  echo
+  echo "  If using any TraceQL queries:"
+  echo "  TEMPO_URL - The search URL for your Tempo service (e.g. localhost:9090/api/search)"
+  echo "  TEMPO_USER - The username for running TraceQL queries"
+  echo "  TEMPO_PASS - The password for running TraceQL queries"
+  echo
+  echo "  If using any profile queries:"
+  echo "  PROFILECLI_URL - The URL for your Pyroscope service (e.g. localhost:4040)"
+  echo "  PROFILECLI_USERNAME - The username for running Pyroscope queries"
+  echo "  PROFILECLI_PASSWORD - The password for running Pyroscope queries"
+  echo
+  echo "queries.json is the queries file, and should be in the format:"
+  echo '{"queries": [<query>]}'
+  echo
+  echo "Each query has this format:"
+  echo '{'
+  echo '  "query": "<query string>",'
+  echo '  "type": "[promql (default)|logql|traceql]|[pyroql]",'
+  echo '}'
+  echo
+  echo 'You can add an "expect" section to the query to validate the returned value'
+  echo '  "expect": {'
+  echo '    "operator": "[<, <=, ==, !=, =>, >]",'
+  echo '    "value": <expected value>'
+  echo '  }'
+}
+
+if [ -z "${1}" ] || [ "${1}" == "-h" ]; then
+  usage
+  exit 0
+fi
+
+QUERIES_FILE="${1}"
+if [ ! -f "${QUERIES_FILE}" ]; then
+  echo "Queries file not found: ${QUERIES_FILE}"
+  usage
+  exit 1
+fi
+
+function check_value {
+  local actualValue=$1
+  local expectedValue=$2
+  local operator=$3
+
+  echo "  Expected (${expectedValue}), Operator (${operator}), Actual (${actualValue})"
+
+  case "${operator}" in
+  "<")  operator="<" ;;
+  "<=") operator="<=" ;;
+  "=")  operator="==" ;;
+  "==")  operator="==" ;;
+  "!=") operator="!=" ;;
+  ">=") operator=">=" ;;
+  ">")  operator=">" ;;
+  *)
+    echo "  Unsupported operator: \"${operator}\""
+    return 1
+  esac
+  local result
+
+  if ! result=$(echo "${expectedValue} ${operator} ${actualValue}" | bc); then
+    echo "  An error occurred while checking the result: ${result}"
+    return 1
+  fi
+  if [ "${result}" -ne "1" ]; then
+    echo "  Unexpected query result!"
+    return 1
+  fi
+  return 0
+}
+
+function metrics_query {
+  local query="${1}"
+  local expectedCount="${2}"
+  local expectedValue="${3}"
+  local expectedOperator="${4}"
+
+  if [ -z "${PROMETHEUS_URL}" ]; then
+    echo "PROMETHEUS_URL is not defined. Unable to run PromQL queries!"
+    return 1
+  fi
+
+  echo "Running PromQL query: ${PROMETHEUS_URL}?query=${query}..."
+  result=$(curl -skX POST -u "${PROMETHEUS_USER}:${PROMETHEUS_PASS}" "${PROMETHEUS_URL}" --data-urlencode "query=${query}")
+  status=$(echo "${result}" | jq -r .status)
+  if [ "${status}" != "success" ]; then
+    echo "Query failed!"
+    echo "Response: ${result}"
+    return 1
+  fi
+
+  resultCount=$(echo "${result}" | jq '.data.result | length')
+  if [ -n "${expectedCount}" ]; then
+    echo "  Expected ${expectedCount} results. Found ${resultCount} results."
+    if [ "${resultCount}" -ne "${expectedCount}" ]; then
+      echo "  Unexpected number of results returned!"
+      echo "Result: ${result}"
+      return 1
+    fi
+  else
+    if [ "${resultCount}" -eq 0 ]; then
+      echo "Query returned no results"
+      echo "Result: ${result}"
+      return 1
+    fi
+
+    if [ -n "${expectedValue}" ]; then
+      check_value "$(echo "${result}" | jq -r '.data.result[0].value[1] | tostring')" "${expectedValue}" "${expectedOperator}"
+    fi
+  fi
+
+}
+
+function logs_query {
+  echo "Running LogQL query: ${LOKI_URL}?query=${1}..."
+  result=$(curl -s --get -H "X-Scope-OrgID:${LOKI_TENANTID}" -u "${LOKI_USER}:${LOKI_PASS}" "${LOKI_URL}" --data-urlencode "query=${1}")
+  status=$(echo "${result}" | jq -r .status)
+  if [ "${status}" != "success" ]; then
+    echo "Query failed!"
+    echo "Response: ${result}"
+    return 1
+  fi
+
+  resultCount=$(echo "${result}" | jq '.data.result | length')
+  if [ "${resultCount}" -eq 0 ]; then
+    echo "Query returned no results"
+    echo "Result: ${result}"
+    return 1
+  fi
+}
+
+function traces_query {
+  echo "Running TraceQL query: ${TEMPO_URL}?q=${1}..."
+  result=$(curl -sk --get -u "${TEMPO_USER}:${TEMPO_PASS}" "${TEMPO_URL}" --data-urlencode "q=${1}")
+  resultCount=$(echo "${result}" | jq '.traces | length')
+  if [ "${resultCount}" -eq 0 ]; then
+    echo "Query returned no results"
+    echo "Result: ${result}"
+    return 1
+  fi
+}
+
+function profiles_query {
+    echo "Running profiles query: ${1}..."
+    result=$(profilecli query series --query="${1}")
+    resultCount=$(echo "${result}" 2>/dev/null | jq --slurp 'length')
+    if [ "${resultCount}" -eq 0 ]; then
+      echo "Query returned no results"
+      echo "Result: ${result}"
+      return 1
+    fi
+}
+
+count=$(jq -r ".queries | length-1" "${QUERIES_FILE}")
+for i in $(seq 0 "${count}"); do
+  query=$(jq -r --argjson i "${i}" '.queries[$i].query' "${QUERIES_FILE}" | envsubst)
+  type=$(jq -r --argjson i "${i}" '.queries[$i] | .type // "promql"' "${QUERIES_FILE}")
+  expectedCount=$(jq -r --argjson i "${i}" '.queries[$i].expect.count // empty | tostring' "${QUERIES_FILE}")
+  expectedValue=$(jq -r --argjson i "${i}" '.queries[$i].expect.value // empty | tostring' "${QUERIES_FILE}")
+  expectedOperator=$(jq -r --argjson i "${i}" '.queries[$i].expect | .operator // "=="' "${QUERIES_FILE}")
+
+  case "${type}" in
+    promql)
+      if ! metrics_query "${query}" "${expectedCount}" "${expectedValue}" "${expectedOperator}"; then
+        exit 1
+      fi
+      ;;
+    logql)
+      if ! logs_query "${query}"; then
+        exit 1
+      fi
+      ;;
+    traceql)
+      if ! traces_query "${query}"; then
+        exit 1
+      fi
+      ;;
+    pyroql)
+      if ! profiles_query "${query}"; then
+        exit 1
+      fi
+      ;;
+    *)
+      echo "Query type ${type} is not yet supported in this test"
+      exit 1
+      ;;
+  esac
+done
+
+echo "All queries passed!"
@@ -56,7 +56,7 @@ spec:
         - |
           for i in $(seq 1 {{ $.Values.attempts | int }}); do
             echo "Running test... ($i/{{ $.Values.attempts | int }})"
-            if /etc/bin/query-test.sh /etc/test/queries.json; then
+            if /usr/bin/query-test.sh /etc/test/queries.json; then
               exit 0
             fi
             sleep {{ $.Values.delay | int }}

@@ -52,7 +52,7 @@ image:
   registry: ghcr.io
   # -- Test pod image repository.
   # @section -- Image settings
-  repository: grafana/k8s-monitoring-test
+  repository: grafana/query-test
   # -- Test pod image tag. Default is the chart version.
   # @section -- Image settings
   tag: ""