Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackathon: Platform Cluster provisioning #961

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 7 additions & 27 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,36 +47,16 @@ jobs:
- name: Set up Helm
uses: azure/setup-helm@v4

- name: Check for cluster config
id: check-cluster-config
run: |
testManifest="charts/k8s-monitoring/tests/integration/${{ matrix.test }}/test-manifest.yaml"
clusterName="$(yq -r ".cluster.name // \"test-cluster\"" "${testManifest}")"
clusterConfig="$(yq -r ".cluster.config // \"\"" "${testManifest}")"
if [ -f "charts/k8s-monitoring/tests/integration/${{ matrix.test }}/${clusterConfig}" ]; then
echo "has-cluster-config=true" >> "${GITHUB_OUTPUT}"
echo "cluster-config=charts/k8s-monitoring/tests/integration/${{ matrix.test }}/${clusterConfig}" >> "${GITHUB_OUTPUT}"
else
echo "has-cluster-config=false" >> "${GITHUB_OUTPUT}"
fi
echo "cluster-name=${clusterName}" >> "${GITHUB_OUTPUT}"

- name: Create kind cluster
if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'false' }}
- name: Setup Kind CLI
uses: helm/kind-action@v1
with:
cluster_name: ${{ steps.check-cluster-config.outputs.cluster-name }}
ignore_failed_clean: true
install_only: true

- name: Create kind cluster with special config
if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'true' }}
uses: helm/kind-action@v1
with:
cluster_name: ${{ steps.check-cluster-config.outputs.cluster-name }}
config: ${{ steps.check-cluster-config.outputs.cluster-config }}
ignore_failed_clean: true
- name: Setup Flux CLI
uses: fluxcd/flux2/action@main

- name: Run test
run: ./scripts/run-cluster-test.sh "charts/k8s-monitoring/tests/integration/${{ matrix.test }}"
env:
HEADLESS: "true"
run: ./scripts/run-integration-test.sh "charts/k8s-monitoring/tests/integration/${{ matrix.test }}"
CREATE_CLUSTER: "true"
DELETE_CLUSTER: "true"
58 changes: 46 additions & 12 deletions .github/workflows/platform-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,54 @@ jobs:
- name: Check for cluster config
id: check-cluster-config
run: |
if [ -f "charts/k8s-monitoring/tests/integration/${{ matrix.test }}/cluster.yaml" ]; then
echo "has-cluster-config=true" >> "${GITHUB_OUTPUT}"
if [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/eks-cluster-config.yaml" ]; then
echo "cluster-type=eks" >> "${GITHUB_OUTPUT}"
elif [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/gke-cluster-config.yaml" ]; then
echo "cluster-type=gke" >> "${GITHUB_OUTPUT}"
elif [ -f "charts/k8s-monitoring/tests/platform/${{ matrix.test }}/gke-autopilot-cluster-config.yaml" ]; then
echo "cluster-type=gke" >> "${GITHUB_OUTPUT}"
else
echo "has-cluster-config=false" >> "${GITHUB_OUTPUT}"
echo "cluster-type=kind" >> "${GITHUB_OUTPUT}"
fi

- name: Create kind cluster
if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'false' }}
- name: Setup Kind CLI
if: ${{ steps.check-cluster-config.outputs.cluster-type == 'kind' }}
uses: helm/kind-action@v1
with:
install_only: true

- name: Create kind cluster with special config
if: ${{ steps.check-cluster-config.outputs.has-cluster-config == 'true' }}
uses: helm/kind-action@v1
- name: Setup eksctl CLI
if: ${{ steps.check-cluster-config.outputs.cluster-type == 'eks' }}
run: |
PLATFORM="$(uname -s)_$ARCH"
curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$PLATFORM.tar.gz"
tar -xzf "eksctl_$PLATFORM.tar.gz" -C /tmp && rm "eksctl_$PLATFORM.tar.gz"
sudo mv /tmp/eksctl /usr/local/bin
env:
ARCH: amd64

- name: Configure AWS Credentials
if: ${{ steps.check-cluster-config.outputs.cluster-type == 'eks' }}
uses: aws-actions/configure-aws-credentials@v4
with:
config: charts/k8s-monitoring/tests/integration/${{ matrix.test }}/cluster.yaml
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ap-northeast-2

- name: Configure GCP Credentials
if: ${{ steps.check-cluster-config.outputs.cluster-type == 'gke' }}
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT_TOKEN }}'

- name: Set up Cloud SDK
if: ${{ steps.check-cluster-config.outputs.cluster-type == 'gke' }}
uses: google-github-actions/setup-gcloud@v2
with:
install_components: gke-gcloud-auth-plugin

- name: Setup Flux CLI
uses: fluxcd/flux2/action@main

- name: Random number
id: random-number
Expand All @@ -84,13 +117,14 @@ jobs:
max: 999999

- name: Run test
run: ./scripts/run-cluster-test.sh "charts/k8s-monitoring/tests/platform/${{ matrix.test }}"
env:
HEADLESS: "true"
CREATE_CLUSTER: "true"
DELETE_CLUSTER: "true"
GRAFANA_CLOUD_FLEET_MGMT_USER: ${{ secrets.GRAFANA_CLOUD_FLEET_MGMT_USER }}
GRAFANA_CLOUD_FLEET_MGMT_TOKEN: ${{ secrets.GRAFANA_CLOUD_FLEET_MGMT_TOKEN }}
GRAFANA_CLOUD_METRICS_USERNAME: ${{ secrets.GRAFANA_CLOUD_METRICS_USERNAME }}
GRAFANA_CLOUD_LOGS_USERNAME: ${{ secrets.GRAFANA_CLOUD_LOGS_USERNAME }}
GRAFANA_CLOUD_TRACES_USERNAME: ${{ secrets.GRAFANA_CLOUD_TRACES_USERNAME }}
GRAFANA_CLOUD_RW_POLICY_TOKEN: ${{ secrets.GRAFANA_CLOUD_RW_POLICY_TOKEN }}
RANDOM_NUMBER: ${{ steps.random-number.outputs.number }}

run: ./scripts/run-integration-test.sh "charts/k8s-monitoring/tests/platform/${{ matrix.test }}"
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ installing them for a better experience:

- [helm-docs](https://github.com/norwoodj/helm-docs) - Used for generating Helm chart README.md files.
- [helm unittest](https://github.com/helm-unittest/helm-unittest) - Used for executing Helm chart unit tests.
- [Flux CLI](https://fluxcd.io/flux/cmd/) - Used for executing Helm chart integration and platform tests.
- [shellspec](https://github.com/shellspec/shellspec) - Used for executing some unit tests.
- [vendir](https://carvel.dev/vendir/) - Used for downloading Alloy Module

Expand Down
6 changes: 6 additions & 0 deletions charts/k8s-monitoring-test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ values.schema.json: values.yaml $$(wildcard schema-mods/*)
.updatecli-%.yaml: Chart.yaml
../../scripts/charts-to-updatecli.sh Chart.yaml

build-query-test-image: query-test/Dockerfile query-test/query-test.sh
docker build --platform linux/amd64 --tag ghcr.io/grafana/query-test:$(shell yq -r '.version' Chart.yaml) query-test

push-query-test-image:
docker push ghcr.io/grafana/query-test:$(shell yq -r '.version' Chart.yaml)

.PHONY: clean
clean:
rm -f README.md values.schema.json $(UPDATECLI_FILES)
Expand Down
2 changes: 1 addition & 1 deletion charts/k8s-monitoring-test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ In order to specify different destinations of the same type, you can use multipl
|-----|------|---------|-------------|
| image.pullSecrets | list | `[]` | Optional set of image pull secrets. |
| image.registry | string | `"ghcr.io"` | Test pod image registry. |
| image.repository | string | `"grafana/k8s-monitoring-test"` | Test pod image repository. |
| image.repository | string | `"grafana/query-test"` | Test pod image repository. |
| image.tag | string | `""` | Test pod image tag. Default is the chart version. |

### Job settings
Expand Down
7 changes: 7 additions & 0 deletions charts/k8s-monitoring-test/query-test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM grafana/pyroscope:1.10.0 AS pyroscope
FROM ubuntu

RUN apt-get update && apt-get install -y bc curl gettext jq && rm -rf /var/lib/apt/lists/*
COPY --from=pyroscope /usr/bin/profilecli /usr/bin/profilecli

COPY ["query-test.sh", "/usr/bin/"]
207 changes: 207 additions & 0 deletions charts/k8s-monitoring-test/query-test/query-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
#!/bin/bash

usage() {
echo "USAGE: query-test.sh queries.json"
echo "Run a set of queries against Prometheus, Loki, or Tempo"
echo
echo "Required environment variables:"
echo " If using any PromQL queries:"
echo " PROMETHEUS_URL - The query URL for your Prometheus service (e.g. localhost:9090/api/v1/query)"
echo " PROMETHEUS_USER - The username for running PromQL queries"
echo " PROMETHEUS_PASS - The password for running PromQL queries"
echo
echo " If using any LogQL queries:"
echo " LOKI_URL - The query URL for your Loki service (e.g. localhost:9090/api/v1/query)"
echo " LOKI_TENANTID - The tenant ID for running LogQL queries"
echo " LOKI_USER - The username for running LogQL queries"
echo " LOKI_PASS - The password for running LogQL queries"
echo
echo " If using any TraceQL queries:"
echo " TEMPO_URL - The search URL for your Tempo service (e.g. localhost:9090/api/search)"
echo " TEMPO_USER - The username for running TraceQL queries"
echo " TEMPO_PASS - The password for running TraceQL queries"
echo
echo " If using any profile queries:"
echo " PROFILECLI_URL - The URL for your Pyroscope service (e.g. localhost:4040)"
echo " PROFILECLI_USERNAME - The username for running Pyroscope queries"
echo " PROFILECLI_PASSWORD - The password for running Pyroscope queries"
echo
echo "queries.json is the queries file, and should be in the format:"
echo '{"queries": [<query>]}'
echo
echo "Each query has this format:"
echo '{'
echo ' "query": "<query string>",'
echo ' "type": "[promql (default)|logql|traceql]|[pyroql]",'
echo '}'
echo
echo 'You can add an "expect" section to the query to validate the returned value'
echo ' "expect": {'
echo ' "operator": "[<, <=, ==, !=, =>, >]",'
echo ' "value": <expected value>'
echo ' }'
}

if [ -z "${1}" ] || [ "${1}" == "-h" ]; then
usage
exit 0
fi

QUERIES_FILE="${1}"
if [ ! -f "${QUERIES_FILE}" ]; then
echo "Queries file not found: ${QUERIES_FILE}"
usage
exit 1
fi

function check_value {
local actualValue=$1
local expectedValue=$2
local operator=$3

echo " Expected (${expectedValue}), Operator (${operator}), Actual (${actualValue})"

case "${operator}" in
"<") operator="<" ;;
"<=") operator="<=" ;;
"=") operator="==" ;;
"==") operator="==" ;;
"!=") operator="!=" ;;
">=") operator=">=" ;;
">") operator=">" ;;
*)
echo " Unsupported operator: \"${operator}\""
return 1
esac
local result

if ! result=$(echo "${expectedValue} ${operator} ${actualValue}" | bc); then
echo " An error occurred while checking the result: ${result}"
return 1
fi
if [ "${result}" -ne "1" ]; then
echo " Unexpected query result!"
return 1
fi
return 0
}

function metrics_query {
local query="${1}"
local expectedCount="${2}"
local expectedValue="${3}"
local expectedOperator="${4}"

if [ -z "${PROMETHEUS_URL}" ]; then
echo "PROMETHEUS_URL is not defined. Unable to run PromQL queries!"
return 1
fi

echo "Running PromQL query: ${PROMETHEUS_URL}?query=${query}..."
result=$(curl -skX POST -u "${PROMETHEUS_USER}:${PROMETHEUS_PASS}" "${PROMETHEUS_URL}" --data-urlencode "query=${query}")
status=$(echo "${result}" | jq -r .status)
if [ "${status}" != "success" ]; then
echo "Query failed!"
echo "Response: ${result}"
return 1
fi

resultCount=$(echo "${result}" | jq '.data.result | length')
if [ -n "${expectedCount}" ]; then
echo " Expected ${expectedCount} results. Found ${resultCount} results."
if [ "${resultCount}" -ne "${expectedCount}" ]; then
echo " Unexpected number of results returned!"
echo "Result: ${result}"
return 1
fi
else
if [ "${resultCount}" -eq 0 ]; then
echo "Query returned no results"
echo "Result: ${result}"
return 1
fi

if [ -n "${expectedValue}" ]; then
check_value "$(echo "${result}" | jq -r '.data.result[0].value[1] | tostring')" "${expectedValue}" "${expectedOperator}"
fi
fi

}

function logs_query {
echo "Running LogQL query: ${LOKI_URL}?query=${1}..."
result=$(curl -s --get -H "X-Scope-OrgID:${LOKI_TENANTID}" -u "${LOKI_USER}:${LOKI_PASS}" "${LOKI_URL}" --data-urlencode "query=${1}")
status=$(echo "${result}" | jq -r .status)
if [ "${status}" != "success" ]; then
echo "Query failed!"
echo "Response: ${result}"
return 1
fi

resultCount=$(echo "${result}" | jq '.data.result | length')
if [ "${resultCount}" -eq 0 ]; then
echo "Query returned no results"
echo "Result: ${result}"
return 1
fi
}

function traces_query {
echo "Running TraceQL query: ${TEMPO_URL}?q=${1}..."
result=$(curl -sk --get -u "${TEMPO_USER}:${TEMPO_PASS}" "${TEMPO_URL}" --data-urlencode "q=${1}")
resultCount=$(echo "${result}" | jq '.traces | length')
if [ "${resultCount}" -eq 0 ]; then
echo "Query returned no results"
echo "Result: ${result}"
return 1
fi
}

function profiles_query {
echo "Running profiles query: ${1}..."
result=$(profilecli query series --query="${1}")
resultCount=$(echo "${result}" 2>/dev/null | jq --slurp 'length')
if [ "${resultCount}" -eq 0 ]; then
echo "Query returned no results"
echo "Result: ${result}"
return 1
fi
}

count=$(jq -r ".queries | length-1" "${QUERIES_FILE}")
for i in $(seq 0 "${count}"); do
query=$(jq -r --argjson i "${i}" '.queries[$i].query' "${QUERIES_FILE}" | envsubst)
type=$(jq -r --argjson i "${i}" '.queries[$i] | .type // "promql"' "${QUERIES_FILE}")
expectedCount=$(jq -r --argjson i "${i}" '.queries[$i].expect.count // empty | tostring' "${QUERIES_FILE}")
expectedValue=$(jq -r --argjson i "${i}" '.queries[$i].expect.value // empty | tostring' "${QUERIES_FILE}")
expectedOperator=$(jq -r --argjson i "${i}" '.queries[$i].expect | .operator // "=="' "${QUERIES_FILE}")

case "${type}" in
promql)
if ! metrics_query "${query}" "${expectedCount}" "${expectedValue}" "${expectedOperator}"; then
exit 1
fi
;;
logql)
if ! logs_query "${query}"; then
exit 1
fi
;;
traceql)
if ! traces_query "${query}"; then
exit 1
fi
;;
pyroql)
if ! profiles_query "${query}"; then
exit 1
fi
;;
*)
echo "Query type ${type} is not yet supported in this test"
exit 1
;;
esac
done

echo "All queries passed!"
2 changes: 1 addition & 1 deletion charts/k8s-monitoring-test/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ spec:
- |
for i in $(seq 1 {{ $.Values.attempts | int }}); do
echo "Running test... ($i/{{ $.Values.attempts | int }})"
if /etc/bin/query-test.sh /etc/test/queries.json; then
if /usr/bin/query-test.sh /etc/test/queries.json; then
exit 0
fi
sleep {{ $.Values.delay | int }}
Expand Down
2 changes: 1 addition & 1 deletion charts/k8s-monitoring-test/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ image:
registry: ghcr.io
# -- Test pod image repository.
# @section -- Image settings
repository: grafana/k8s-monitoring-test
repository: grafana/query-test
# -- Test pod image tag. Default is the chart version.
# @section -- Image settings
tag: ""
Expand Down
Loading
Loading