From 11f09a79aa62d992a23bac0cabccbddf9c835588 Mon Sep 17 00:00:00 2001 From: Paul Hirsch <59626817+pauldoomgov@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:03:34 -0500 Subject: [PATCH] Add services support (#5) * Launches services as separate applications with the same initial name as the "worker" instances * Creates a route in the apps.internal domain to allow the main worker to connect to the service(s) * Adds a network policy to allow access from the worker to the service(s) * Allows each worker to have separate services, avoiding collision * Cleans up after itself like a responsible runner --- README.md | 2 +- runner/cf-driver/base.sh | 10 ++-- runner/cf-driver/cleanup.sh | 52 +++++++++++++++++++- runner/cf-driver/prepare.sh | 97 +++++++++++++++++++++++++++++++++---- 4 files changed, 145 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 8ffabec..50bcf48 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ Test Only - For a basic test environment with no privileged access you can use the following to apply a loose egress security group policy on cloud.gov: ~~~ -bind cf bind-security-group public_networks_egress ORG_NAME --space SPACE_NAME +cf bind-security-group public_networks_egress ORG_NAME --space SPACE_NAME ~~~ ## TODO diff --git a/runner/cf-driver/base.sh b/runner/cf-driver/base.sh index 38b894a..d31384a 100644 --- a/runner/cf-driver/base.sh +++ b/runner/cf-driver/base.sh @@ -3,12 +3,14 @@ # This is sourced by prepare, run, and cleanup # This name will be long. Hopefully not too long! -CONTAINER_ID="runner-$CUSTOM_ENV_CI_RUNNER_ID-project-$CUSTOM_ENV_CI_PROJECT_ID-concurrent-$CUSTOM_ENV_CI_CONCURRENT_PROJECT_ID-$CUSTOM_ENV_CI_JOB_ID" +# Any changes to this pattern need to be mirrored in .gitlab-ci.yml when +# used to prefix service names. +CONTAINER_ID="glrw-r$CUSTOM_ENV_CI_RUNNER_ID-p$CUSTOM_ENV_CI_PROJECT_ID-c$CUSTOM_ENV_CI_CONCURRENT_PROJECT_ID-j$CUSTOM_ENV_CI_JOB_ID" # Set a fallback if not set but complain -if [ -v $DEFAULT_JOB_IMAGE ]; then - DEFAULT_JOB_IMAGE="ubuntu:latest" - echo "WARNING: DEFAULT_JOB_IMAGE not set! Falling back to ${DEFAULT_JOB_IMAGE}" +if [ -z "$DEFAULT_JOB_IMAGE" ]; then + DEFAULT_JOB_IMAGE="ubuntu:latest" + echo "WARNING: DEFAULT_JOB_IMAGE not set! Falling back to ${DEFAULT_JOB_IMAGE}" fi # Use a custom image if provided, else fallback to configured default diff --git a/runner/cf-driver/cleanup.sh b/runner/cf-driver/cleanup.sh index 7dd45e7..1f9926e 100755 --- a/runner/cf-driver/cleanup.sh +++ b/runner/cf-driver/cleanup.sh @@ -10,8 +10,56 @@ set -eo pipefail # trap any error, and mark it as a system failure. trap 'exit $SYSTEM_FAILURE_EXIT_CODE' ERR -echo "Deleting executor instance $CONTAINER_ID" +cleanup_service () { + alias_name="$1" + container_id="$2" + # Delete the service app and the associated route(s) + cf delete -r -f "$container_id" +} + +remove_access_to_service () { + source_app="$1" + destination_service_app="$2" + current_org=$(echo "$VCAP_APPLICATION" | jq --raw-output ".organization_name") + current_space=$(echo "$VCAP_APPLICATION" | jq --raw-output ".space_name") + + # TODO NOTE: This is foolish and allows all TCP ports for now. + # This is limiting and sloppy. + protocol="tcp" + ports="20-10000" + + cf remove-network-policy "$source_app" \ + --destination-app "$destination_service_app" \ + -o "$current_org" -s "$current_space" \ + --protocol "$protocol" --port "$ports" +} + +cleanup_services () { + container_id_base="$1" + ci_job_services="$2" + + if [ -z "$ci_job_services" ]; then + echo "[cf-driver] No services defined in ci_job_services - Skipping service cleanup" + return + fi + + for l in $(echo "$ci_job_services" | jq -rc '.[]'); do + # Using jq -er to fail of alias or name are not found + alias_name=$(echo "$l" | jq -er '.alias | select(.)') + container_id="${container_id_base}-svc-${alias_name}" + + echo "[cf-driver] Removing network policy from $container_id_base to $container_id" + remove_access_to_service "$container_id_base" "$container_id" + + echo "[cf-driver] Deleting service $alias_name" + cleanup_service "$alias_name" "$container_id" + done +} + +cleanup_services "$CONTAINER_ID" "$CUSTOM_ENV_CI_JOB_SERVICES" + +echo "[cf-driver] Deleting executor instance $CONTAINER_ID" cf delete -f "$CONTAINER_ID" -echo "Cleanup completed for $CONTAINER_ID" +echo "[cf-driver] Cleanup completed for $CONTAINER_ID" diff --git a/runner/cf-driver/prepare.sh b/runner/cf-driver/prepare.sh index 44a3f4c..c363b18 100755 --- a/runner/cf-driver/prepare.sh +++ b/runner/cf-driver/prepare.sh @@ -24,26 +24,100 @@ create_temporary_varfile () { echo "$v: \"$v\"" >> "$TMPVARFILE" done - echo "Added $(wc -l "$TMPVARFILE") lines to $TMPVARFILE" + echo "[cf-driver] [DEBUG] Added $(wc -l "$TMPVARFILE") lines to $TMPVARFILE" } start_container () { - if cf app --guid "$CONTAINER_ID" >/dev/null 2>/dev/null ; then - echo 'Found old instance of runner executor, deleting' - cf delete "$CONTAINER_ID" + container_id="$1" + if cf app --guid "$container_id" >/dev/null 2>/dev/null ; then + echo '[cf-driver] Found old instance of runner executor, deleting' + cf delete -f "$container_id" fi - cf push "$CONTAINER_ID" -f "${currentDir}/worker-manifest.yml" \ + cf push "$container_id" -f "${currentDir}/worker-manifest.yml" \ --docker-image "$CUSTOM_ENV_CI_JOB_IMAGE" -m "$WORKER_MEMORY" \ --vars-file "$TMPVARFILE" } +start_service () { + alias_name="$1" + container_id="$2" + image_name="$3" + container_entrypoint="$4" + container_command="$5" + + if [ -z "$container_id" ] || [ -z "$image_name" ]; then + echo 'Usage: start_service CONTAINER_ID IMAGE_NAME CONTAINER_ENTRYPOINT CONTAINER_COMMAND' + exit 1 + fi + if [ -n "$container_entrypoint" ] || [ -n "$container_command" ]; then + # TODO - cf push allows use of -c or --start-command but not a separate + # entrypoint. May need to add logic to gracefully convert entrypoint to + # a command. + echo '[cf-driver] container_entrypoint and container_command are not yet supported in services - Sorry!' + exit 1 + fi + + if cf app --guid "$container_id" >/dev/null 2>/dev/null ; then + echo '[cf-driver] Found old instance of runner service, deleting' + cf delete -f "$container_id" + fi + + # TODO - Figure out how to handle command and non-global memory definition + cf push "$container_id" --docker-image "$image_name" -m "$WORKER_MEMORY" \ + --no-route --health-check-type process + + cf map-route "$container_id" apps.internal --hostname "$container_id" +} + +allow_access_to_service () { + source_app="$1" + destination_service_app="$2" + current_org=$(echo "$VCAP_APPLICATION" | jq --raw-output ".organization_name") + current_space=$(echo "$VCAP_APPLICATION" | jq --raw-output ".space_name") + + # TODO NOTE: This is foolish and allows all TCP ports for now. + # This is limiting and sloppy. + protocol="tcp" + ports="20-10000" + + cf add-network-policy "$source_app" \ + --destination-app "$destination_service_app" \ + -o "$current_org" -s "$current_space" \ + --protocol "$protocol" --port "$ports" +} + +start_services () { + container_id_base="$1" + ci_job_services="$2" + + if [ -z "$ci_job_services" ]; then + echo "[cf-driver] No services defined in ci_job_services - Skipping service startup" + return + fi + + for l in $(echo "$ci_job_services" | jq -rc '.[]'); do + # Using jq -er to fail of alias or name are not found + alias_name=$(echo "$l" | jq -er '.alias | select(.)') + container_id="${container_id_base}-svc-${alias_name}" + image_name=$(echo "$l" | jq -er '.name | select(.)') + # Using jq -r to allow entrypoint and command to be empty + container_entrypoint=$(echo "$l" | jq -r '.entrypoint | select(.)') + container_command=$(echo "$l" | jq -r '.command | select(.)') + + start_service "$alias_name" "$container_id" "$image_name" "$container_entrypoint" "$container_command" + allow_access_to_service "$container_id_base" "$container_id" + done +} + install_dependencies () { + container_id="$1" + # Build a command to try and install git and git-lfs on common distros. # Of course, RedHat/UBI will need more help to add RPM repos with the correct # version. TODO - RedHat support echo "[cf-driver] Ensuring git, git-lfs, and curl are installed" - cf ssh "$CONTAINER_ID" -c '(which git && which git-lfs && which curl) || \ + cf ssh "$container_id" -c '(which git && which git-lfs && which curl) || \ (which apk && apk add git git-lfs curl) || \ (which apt-get && apt-get update && apt-get install -y git git-lfs curl) || \ (echo "Required packages missing and I do not know what to do about it" && exit 1)' @@ -55,7 +129,7 @@ install_dependencies () { # Symlinks gitlab-runner to avoid having to alter more of the executor. # TODO: Pin the version and support more arches than X86_64 echo "[cf-driver] Installing gitlab-runner-helper" - cf ssh "$CONTAINER_ID" -c 'curl -L --output /usr/bin/gitlab-runner-helper \ + cf ssh "$container_id" -c 'curl -L --output /usr/bin/gitlab-runner-helper \ "https://s3.dualstack.us-east-1.amazonaws.com/gitlab-runner-downloads/latest/binaries/gitlab-runner-helper/gitlab-runner-helper.x86_64"; \ chmod +x /usr/bin/gitlab-runner-helper; \ ln -s /usr/bin/gitlab-runner-helper /usr/bin/gitlab-runner' @@ -65,9 +139,14 @@ echo "[cf-driver] Preparing environment variables for $CONTAINER_ID" create_temporary_varfile echo "[cf-driver] Starting $CONTAINER_ID with image $CUSTOM_ENV_CI_JOB_IMAGE" -start_container +start_container "$CONTAINER_ID" echo "[cf-driver] Installing dependencies into $CONTAINER_ID" -install_dependencies +install_dependencies "$CONTAINER_ID" + +if [ -n "$CUSTOM_ENV_CI_JOB_SERVICES" ]; then + echo "[cf-driver] Starting services" + start_services "$CONTAINER_ID" "$CUSTOM_ENV_CI_JOB_SERVICES" +fi echo "[cf-driver] $CONTAINER_ID preparation complete"