From 90bfbfdc4043b4cb89c789447a8f35a035f8027c Mon Sep 17 00:00:00 2001 From: Muhammad Atif Ali Date: Mon, 18 Nov 2024 16:11:12 +0500 Subject: [PATCH 1/2] chore: add health check badge (#341) --- .github/workflows/check.yaml | 4 ++-- README.md | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index c43feea2..aabaa17c 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -1,5 +1,5 @@ -name: Check modules on registry.coder.com - +name: Health +# Check modules health on registry.coder.com on: schedule: - cron: "*/13 * * * *" # Runs every 13th minute diff --git a/README.md b/README.md index 48a96a3a..8228deb3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ [![discord](https://img.shields.io/discord/747933592273027093?label=discord)](https://discord.gg/coder) [![license](https://img.shields.io/github/license/coder/modules)](./LICENSE) +[![Health](https://github.com/coder/modules/actions/workflows/check.yaml/badge.svg)](https://github.com/coder/modules/actions/workflows/check.yaml) From 5101c27c83e6ede0f1ae561a152a0b32ac40b1ee Mon Sep 17 00:00:00 2001 From: Muhammad Atif Ali Date: Tue, 19 Nov 2024 14:22:03 +0500 Subject: [PATCH 2/2] chore: integrate Instatus in check script (#342) --- .github/scripts/check.sh | 91 +++++++++++++++++++++++++++++++++--- .github/workflows/check.yaml | 6 ++- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/.github/scripts/check.sh b/.github/scripts/check.sh index ad04ca1f..abb47907 100755 --- a/.github/scripts/check.sh +++ b/.github/scripts/check.sh @@ -1,24 +1,85 @@ #!/usr/bin/env bash set -o pipefail -REGISTRY_BASE_URL="${REGISTRY_BASE_URL:-https://registry.coder.com}" set -u -if [[ -n "${VERBOSE:-}" ]]; then - set -x -fi +# List of required environment variables +required_vars=( + "INSTATUS_API_KEY" + "INSTATUS_PAGE_ID" + "INSTATUS_COMPONENT_ID" +) + +# Check if each required variable is set +for var in "${required_vars[@]}"; do + if [[ -z "${!var:-}" ]]; then + echo "Error: Environment variable '$var' is not set." + exit 1 + fi +done + +REGISTRY_BASE_URL="${REGISTRY_BASE_URL:-https://registry.coder.com}" status=0 declare -a modules=() declare -a failures=() + +# Collect all module directories containing a main.tf file for path in $(find . -not -path '*/.*' -type f -name main.tf -maxdepth 2 | cut -d '/' -f 2 | sort -u); do modules+=("${path}") done + echo "Checking modules: ${modules[*]}" + +# Function to update the component status on Instatus +update_component_status() { + local component_status=$1 + # see https://instatus.com/help/api/components + (curl -X PUT "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/components/$INSTATUS_COMPONENT_ID" \ + -H "Authorization: Bearer $INSTATUS_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"status\": \"$component_status\"}") +} + +# Function to create an incident +create_incident() { + local incident_name="Testing Instatus" + local message="The following modules are experiencing issues:\n" + for i in "${!failures[@]}"; do + message+="$(($i + 1)). ${failures[$i]}\n" + done + + component_status="PARTIALOUTAGE" + if (( ${#failures[@]} == ${#modules[@]} )); then + component_status="MAJOROUTAGE" + fi + # see https://instatus.com/help/api/incidents + response=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \ + -H "Authorization: Bearer $INSTATUS_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{ + \"name\": \"$incident_name\", + \"message\": \"$message\", + \"components\": [\"$INSTATUS_COMPONENT_ID\"], + \"status\": \"INVESTIGATING\", + \"notify\": true, + \"statuses\": [ + { + \"id\": \"$INSTATUS_COMPONENT_ID\", + \"status\": \"PARTIALOUTAGE\" + } + ] + }") + + incident_id=$(echo "$response" | jq -r '.id') + echo "$incident_id" +} + +# Check each module's accessibility for module in "${modules[@]}"; do # Trim leading/trailing whitespace from module name module=$(echo "${module}" | xargs) url="${REGISTRY_BASE_URL}/modules/${module}" - printf "=== Check module %s at %s\n" "${module}" "${url}" + printf "=== Checking module %s at %s\n" "${module}" "${url}" status_code=$(curl --output /dev/null --head --silent --fail --location "${url}" --retry 3 --write-out "%{http_code}") # shellcheck disable=SC2181 if (( status_code != 200 )); then @@ -30,7 +91,23 @@ for module in "${modules[@]}"; do fi done -if (( status != 0 )); then - echo "The following modules appear to have issues: ${failures[*]}" +# Determine overall status and update Instatus component +if (( status == 0 )); then + echo "All modules are operational." + # set to + update_component_status "OPERATIONAL" +else + echo "The following modules have issues: ${failures[*]}" + # check if all modules are down + if (( ${#failures[@]} == ${#modules[@]} )); then + update_component_status "MAJOROUTAGE" + else + update_component_status "PARTIALOUTAGE" + fi + + # Create a new incident + incident_id=$(create_incident) + echo "Created incident with ID: $incident_id" fi + exit "${status}" diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index aabaa17c..4095073e 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -11,8 +11,12 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Run check.sh run: | ./.github/scripts/check.sh + env: + INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }} + INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }} + INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }}