From 720433400f14bf62012ece749d050c7982426d0f Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Sat, 28 Sep 2024 13:08:56 -0700 Subject: [PATCH] GTC-2958 Tag Docker image from docker hash, so it always exists Currently, we tag the new Docker image with the current Git SHA. But the container_registry module only creates a new Docker image if the docker contents change. So, if the docker contents haven't changed with this Git change, we can have a bug where we reference the app docker via a tag (of the new Git SHA) which doesn't exist. The fix is to use instead as a tag the hash of the docker contents. We use the same hash script that the container_registry module uses. Therefore, we will always be using a tag that exists, either because the container_register module just pushed a new docker with the new tag, or the docker already exists under the docker hash tag, because the docker contents and has haven't changed. I noticed a bunch of bugs in the container_registry module's hash.sh script, which I will fix later. One of the main things is that it doesn't ignore comments, so it can match on words in the comments. For that reason, I removed the '# Docker Files' comment, which was causing the Dockerfile itself to be ignored during hashing. --- .dockerignore | 1 - terraform/data.tf | 10 +++++++- terraform/main.tf | 8 ++++++- terraform/scripts/hash.sh | 50 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100755 terraform/scripts/hash.sh diff --git a/.dockerignore b/.dockerignore index f2fec2f03..cd78e8236 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,7 +6,6 @@ # MyPy .mypy_cache/* -# Docker Files docker-compose.dev.yml docker-compose.prod.yml docker-compose.test.yml diff --git a/terraform/data.tf b/terraform/data.tf index d3748b037..34fd7fd02 100644 --- a/terraform/data.tf +++ b/terraform/data.tf @@ -190,4 +190,12 @@ data "template_file" "step_function_policy" { vars = { raster_analysis_state_machine_arn = data.terraform_remote_state.raster_analysis_lambda.outputs.raster_analysis_state_machine_arn } -} \ No newline at end of file +} + +# Hash of the contents of the FastAPI app docker. The docker commands run in the main +# directory (parent directory of terraform directory), and the Docker file is in the +# same directory. +data "external" "hash" { + program = ["${path.root}/scripts/hash.sh", "${path.root}/../", "."] +} + diff --git a/terraform/main.tf b/terraform/main.tf index 263d70da0..89a321402 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -26,7 +26,13 @@ locals { aurora_instance_class = data.terraform_remote_state.core.outputs.aurora_cluster_instance_class aurora_max_vcpus = local.aurora_instance_class == "db.t3.medium" ? 2 : local.aurora_instance_class == "db.r6g.large" ? 2 : local.aurora_instance_class == "db.r6g.xlarge" ? 4 : local.aurora_instance_class == "db.r6g.2xlarge" ? 8 : local.aurora_instance_class == "db.r6g.4xlarge" ? 16 : local.aurora_instance_class == "db.r6g.8xlarge" ? 32 : local.aurora_instance_class == "db.r6g.16xlarge" ? 64 : local.aurora_instance_class == "db.r5.large" ? 2 : local.aurora_instance_class == "db.r5.xlarge" ? 4 : local.aurora_instance_class == "db.r5.2xlarge" ? 8 : local.aurora_instance_class == "db.r5.4xlarge" ? 16 : local.aurora_instance_class == "db.r5.8xlarge" ? 32 : local.aurora_instance_class == "db.r5.12xlarge" ? 48 : local.aurora_instance_class == "db.r5.16xlarge" ? 64 : local.aurora_instance_class == "db.r5.24xlarge" ? 96 : "" service_url = var.environment == "dev" ? "http://${module.fargate_autoscaling.lb_dns_name}" : var.service_url - container_tag = substr(var.git_sha, 0, 7) + # The container_registry module only pushes a new Docker image if the docker hash + # computed by its hash.sh script has changed. So, we make the container tag exactly + # be that hash. Therefore, we will know that either the previous docker with the + # same contents and tag will already exist, if nothing has changed in the docker + # image, or the container registry module will push a new docker with the tag we + # want. + container_tag = lookup(data.external.hash.result, "hash") lb_dns_name = coalesce(module.fargate_autoscaling.lb_dns_name, var.lb_dns_name) } diff --git a/terraform/scripts/hash.sh b/terraform/scripts/hash.sh new file mode 100755 index 000000000..a67cc4713 --- /dev/null +++ b/terraform/scripts/hash.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# +# Calculates hash of Docker image source contents +# +# Must be identical to the script that is used by the +# gfw-terraform-modules:terraform/modules/container_registry Terraform module. +# +# Usage: +# +# $ ./hash.sh . +# + +set -e + +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + +ROOT_DIR=${1:-.} +DOCKER_PATH=${2:-.} +IGNORE="${DOCKER_PATH}/.dockerignore" + +pushd "$ROOT_DIR" + +# Hash all source files of the Docker image +if [ -f "$IGNORE" ]; then + # We don't want to compute hashes for files listed in .dockerignore + # to match regex pattern we need to escape leading . + a=$(printf "! -regex ^./%s.* " `< .dockerignore`) + b=${a//\/.//\\\.} + + file_hashes="$( + find . -type f $b -exec md5sum {} \; + )" +else + # Exclude Python cache files, dot files + file_hashes="$( + find . -type f -not -name '*.pyc' -not -path './.**' -exec md5sum {} \; + )" +fi + +popd + +hash="$(echo "$file_hashes" | md5sum | cut -d' ' -f1)" + +echo '{ "hash": "'"$hash"'" }'