diff --git a/scripts/jenkins/backup_check.sh b/scripts/jenkins/backup_check.sh new file mode 100644 index 000000000..fd33dba67 --- /dev/null +++ b/scripts/jenkins/backup_check.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Check if your Pantheon live sites are properly backed up each day. + +# Add known problematic sites to EXCLUDED_SITES environment variable. +# @see https://plugins.jenkins.io/envinject/ +# Example: EXCLUDED_SITES=("foo" "bar") + +if [ -z "${EXCLUDED_SITES+x}" ]; then + EXCLUDED_SITES=() +fi + +# Fetch all sites +SITES_JSON=$(terminus site:list --format=json 2>/dev/null) + +# Filter non-frozen sites +NON_FROZEN_SITES=$(echo "$SITES_JSON" | jq -r '.[] | select(.frozen == false) | .name') + +# Initialize flag for missing backups +MISSING_BACKUP_FLAG=0 + +# Function to check if a site is in the EXCLUDED_SITES array +function is_excluded() { + local site=$1 + for excluded_site in "${EXCLUDED_SITES[@]}"; do + if [[ "$site" == "$excluded_site" ]]; then + return 0 + fi + done + + local live_initialized + live_initialized=$(terminus env:list --format=json "${site}" | jq -r '.live.initialized') + if [[ "$live_initialized" == "false" ]]; then + return 0 + fi + + return 1 +} + +# Iterate through each non-frozen site. +for site_name in "${NON_FROZEN_SITES[@]}"; do + + # Check if the site should be excluded. + if is_excluded "$site_name"; then + continue + fi + + echo "Checking backups for site: $site_name" + + # Fetch backups for site + BACKUPS_JSON=$(terminus backup:list "${site_name}.live" --format=json 2>/dev/null) + + # Components to check + COMPONENTS=("files" "code" "database") + + for component in "${COMPONENTS[@]}"; do + # Get the latest backup date for the component + LATEST_BACKUP_DATE=$(echo "$BACKUPS_JSON" | jq -r --arg COMPONENT "${component}" 'to_entries[] | select(.key | contains($COMPONENT)) | .value.date' | while read -r date; do date -d "$date" +%s; done | sort -nr | head -n1) + + # Get the current date + CURRENT_DATE=$(date +%s) + + # Calculate the time difference in seconds + TIME_DIFF=$((CURRENT_DATE - LATEST_BACKUP_DATE)) + + # Check if backup is older than 2 days (172800 seconds) + if [ $TIME_DIFF -gt 172800 ]; then + echo "WARNING: No $component backup in the past 2 days for site: $site_name" + MISSING_BACKUP_FLAG=1 + fi + done +done + +# Exit status based on missing backups. +exit $MISSING_BACKUP_FLAG diff --git a/scripts/jenkins/cache_ratio_check.sh b/scripts/jenkins/cache_ratio_check.sh new file mode 100644 index 000000000..59b1e8fc2 --- /dev/null +++ b/scripts/jenkins/cache_ratio_check.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +shopt -s lastpipe + +# Check if your Pantheon live sites are cached properly. +# It checks the CDN cache hit ratio, if anonymous visitors +# got fully cached requests or not, at least for some parts +# of the website. +# Let's say if max-age header is set to zero, it will throw an alert. + +# Add known problematic sites to EXCLUDED_SITES environment variable. +# @see https://plugins.jenkins.io/envinject/ +# Example: EXCLUDED_SITES=("foo" "bar") + +if [ -z "${EXCLUDED_SITES+x}" ]; then + EXCLUDED_SITES=() +fi + +# Fetch all sites +SITES_JSON=$(terminus site:list --format=json 2>/dev/null) +# Filter non-frozen sites +SITES_TO_CHECK=$(echo "$SITES_JSON" | jq -r '.[] | select(.frozen == false) | .name') + +# Initialize flag for cache issues +CACHE_ISSUE_FLAG=0 + +# Function to check cache hit ratio +check_cache_hit_ratio() { + local site_name=$1 + + # Get metrics in CSV format + local metrics_csv + metrics_csv=$(terminus env:metrics "${site_name}.live" --format=csv 2>/dev/null) + + # Convert CSV to an array of the last three cache hit ratios + IFS=$'\n' echo "$metrics_csv" | tail -n 4 | cut -d ',' -f6 | tr -d '%' | tail -n 3 | read -r -a cache_hit_ratios + + # Initialize counter for consecutive zero hit ratios + local zero_hit_ratio_count=0 + + # Loop through cache hit ratios + for ratio in "${cache_hit_ratios[@]}"; do + # Skip iteration if the ratio is not a valid number + if ! [[ "$ratio" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + echo "WARNING: Invalid cache hit ratio '$ratio' for ${site_name}, skipping..." + continue + fi + + # Use 'bc' to compare the floating-point number + if [[ $(echo "$ratio <= 0" | bc -l) -eq 1 ]]; then + # Increment counter + ((zero_hit_ratio_count++)) + fi + done + + # Check if all the last three values are 0% cache hit ratios + if [ "$zero_hit_ratio_count" -eq 3 ]; then + echo "ALERT: ${site_name} has had a 0% cache hit ratio for the last 3 days." + CACHE_ISSUE_FLAG=1 + fi +} + +# Check if a site should be excluded +is_excluded() { + local site=$1 + for excluded_site in "${EXCLUDED_SITES[@]}"; do + if [[ "$site" == "$excluded_site" ]]; then + return 0 + fi + done + return 1 +} + +# Iterate over sites and check their cache hit ratios +for site in "${SITES_TO_CHECK[@]}"; do + if is_excluded "$site"; then + continue + fi + check_cache_hit_ratio "$site" +done + +# Exit with 1 if any cache issues were found +exit $CACHE_ISSUE_FLAG +