Skip to content

Commit

Permalink
Merge pull request #183 from wri/develop
Browse files Browse the repository at this point in the history
Develop to master
  • Loading branch information
jterry64 authored Feb 25, 2025
2 parents 8792604 + cc7fce9 commit 7df48b8
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 20 deletions.
12 changes: 4 additions & 8 deletions src/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# This Dockerfile is used to build the datapump layer for the lambdas.
FROM public.ecr.aws/lambda/python:3.10

ENV WORKDIR /opt
Expand All @@ -8,18 +9,13 @@ RUN mkdir -p /opt/python
# Make the dir and to install all packages into packages/
COPY . $WORKDIR

# installing dependencies to build package
# Installing dependencies to build package. This implicitly uses setup.py for the
# dependency list.
RUN pip install . -t python

# This next line needs to be changed (just increment the number) in order
# to change the hash of the file and get TF to realize it needs to be
# redeployed. Ticket for a better solution:
# https://gfw.atlassian.net/browse/GTC-1250
# change 35

RUN yum install -y zip geos-devel

# Precompile all python packages and remove .py files
# Remove any precompiled files and __pycache__ dirs
RUN find python/ -type f -name '*.pyc' -print0 | xargs -0 rm -rf
RUN find python/ -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf

Expand Down
53 changes: 45 additions & 8 deletions terraform/modules/datapump/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,48 @@ data "template_file" "sfn_datapump" {
}
}

module "py310_datapump_021" {
source = "git::https://github.com/wri/gfw-lambda-layers.git//terraform/modules/lambda_layer"
bucket = var.pipelines_bucket
name = "datapump-${terraform.workspace}"
module_version = "0.2.1"
runtime = "python3.10"
layer_path = "${var.lambda_layers_path}/"
}
# Terraform to create and upload layer.zip of the datapump source code
# and dependencies.

locals {
layer_name = substr("python3.10-datapump-${terraform.workspace}_0.2.1", 0, 64)

}

# Build the Docker image and copy ZIP file to local folder
# Always build the zip file so we can do a hash on the entire source.
resource "null_resource" "build" {
triggers = {
curtime = timestamp()
}

provisioner "local-exec" {
command = "${path.module}/scripts/build.sh ${var.lambda_layers_path} ${local.layer_name}"
interpreter = ["bash", "-c"]
}
}

data "external" "layer_sha256" {
program = [ "${path.module}/scripts/hash.sh", "${var.lambda_layers_path}/layer.zip"]
depends_on = [null_resource.build]
}

resource "aws_s3_bucket_object" "py310_datapump_021" {
bucket = var.pipelines_bucket
key = "lambda_layers/${local.layer_name}.zip"
source = "${var.lambda_layers_path}/layer.zip"
# This is what decides if the s3 upload of the layer will happen,
# though terraform seems to do its own hash of the zip file as well.
etag = lookup(data.external.layer_sha256.result, "hash")
}

resource "aws_lambda_layer_version" "py310_datapump_021" {
layer_name = replace(local.layer_name, ".", "")
s3_bucket = aws_s3_bucket_object.py310_datapump_021.bucket
s3_key = aws_s3_bucket_object.py310_datapump_021.key
compatible_runtimes = ["python3.10"]
# This decides if the actual layer will be replaced in the lambda,
# though terraform seems use its own etag of the zip file on S3 as well,
# which means we always update the zip file.
source_code_hash = lookup(data.external.layer_sha256.result, "hash")
}
8 changes: 4 additions & 4 deletions terraform/modules/datapump/lambdas.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ resource "aws_lambda_function" "dispatcher" {
publish = true
tags = local.tags
layers = [
module.py310_datapump_021.layer_arn,
aws_lambda_layer_version.py310_datapump_021.arn,
var.numpy_lambda_layer_arn,
var.rasterio_lambda_layer_arn,
var.shapely_lambda_layer_arn
Expand Down Expand Up @@ -39,7 +39,7 @@ resource "aws_lambda_function" "executor" {
timeout = var.lambda_params.timeout
publish = true
tags = local.tags
layers = [module.py310_datapump_021.layer_arn]
layers = [aws_lambda_layer_version.py310_datapump_021.arn]
environment {
variables = {
ENV = var.environment
Expand Down Expand Up @@ -68,7 +68,7 @@ resource "aws_lambda_function" "postprocessor" {
publish = true
tags = local.tags
layers = [
module.py310_datapump_021.layer_arn,
aws_lambda_layer_version.py310_datapump_021.arn,
var.numpy_lambda_layer_arn,
var.rasterio_lambda_layer_arn,
var.shapely_lambda_layer_arn
Expand All @@ -82,4 +82,4 @@ resource "aws_lambda_function" "postprocessor" {
DATAPUMP_TABLE_NAME = aws_dynamodb_table.datapump.name
}
}
}
}
33 changes: 33 additions & 0 deletions terraform/modules/datapump/scripts/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

# This is the same build script as in gfw-lambda-layers/terraform/modules/lambda_layer/scripts/build.sh
# It builds and runs a docker as specified in ${1}/Dockerfile to create a layer.zip.

set -e

LAYER_PATH="${1}"
IMAGE="globalforestwatch/${2}"

echo -n "${LAYER_PATH}" > "${LAYER_PATH}/foo.txt"
date >> "${LAYER_PATH}/foo.txt"
CONTAINER_NAME="container_$(sha1sum ${LAYER_PATH}/foo.txt |cut -c 1-8)"

pushd "${LAYER_PATH}"

echo "BUILD image ${IMAGE}"
docker build --no-cache -t "${IMAGE}" .

echo "CREATE container ${CONTAINER_NAME}"
docker run -itd --name "${CONTAINER_NAME}" "${IMAGE}" /bin/bash

echo "COPY ZIP package to host"
docker cp "${CONTAINER_NAME}":"/opt/layer.zip" layer.zip

echo "STOP container"
docker stop "${CONTAINER_NAME}"
docker wait "${CONTAINER_NAME}"

echo "REMOVE container"
docker rm -f "${CONTAINER_NAME}"

popd
10 changes: 10 additions & 0 deletions terraform/modules/datapump/scripts/hash.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# This does a hash of the zip file, but that includes all the modified times of the
# files, which keep changing, even when the file names and contents are the same. I
# tried generating a hash using only filenames and contents, but terraform seems to
# create its own hash of the layer.zip file as well, so basically we're always going
# to update the layer.zip no matter what, which seems fine.
hash=$(sha256sum $1 | cut -d' ' -f1)

echo '{ "hash": "'"$hash"'" }'

0 comments on commit 7df48b8

Please sign in to comment.