Skip to content

Commit

Permalink
Merge pull request #591 from wri/develop
Browse files Browse the repository at this point in the history
staging -> production:  batch queue with on-demand instances
  • Loading branch information
solomon-negusse authored Oct 10, 2024
2 parents f93e363 + b73a890 commit 284d3fa
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 21 deletions.
5 changes: 3 additions & 2 deletions app/models/pydantic/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from ...settings.globals import (
AURORA_JOB_QUEUE,
ON_DEMAND_COMPUTE_JOB_QUEUE,
DATA_LAKE_JOB_QUEUE,
DEFAULT_JOB_DURATION,
GDAL_PYTHON_JOB_DEFINITION,
Expand Down Expand Up @@ -138,9 +139,9 @@ class PixETLJob(Job):


class GDALCOGJob(Job):
"""Use for creating COG files using GDAL Python docker in PixETL queue."""
"""Use for creating COG files using GDAL Python docker in on-demand compute queue."""

job_queue = PIXETL_JOB_QUEUE
job_queue = ON_DEMAND_COMPUTE_JOB_QUEUE
job_definition = GDAL_PYTHON_JOB_DEFINITION
vcpus = 8
memory = 64000
Expand Down
1 change: 1 addition & 0 deletions app/settings/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
MAX_MEM = config("MAX_MEM", cast=int, default=760000)
PIXETL_JOB_DEFINITION = config("PIXETL_JOB_DEFINITION", cast=str)
PIXETL_JOB_QUEUE = config("PIXETL_JOB_QUEUE", cast=str)
ON_DEMAND_COMPUTE_JOB_QUEUE = config("ON_DEMAND_COMPUTE_JOB_QUEUE", cast=str)
PIXETL_CORES = config("PIXETL_CORES", cast=int, default=48)
PIXETL_MAX_MEM = config("PIXETL_MAX_MEM", cast=int, default=380000)
PIXETL_DEFAULT_RESAMPLING = config(
Expand Down
1 change: 1 addition & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ services:
- TILE_CACHE_CLUSTER=tile_cache_cluster
- TILE_CACHE_SERVICE=tile_cache_service
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- API_URL=http://app_dev:80
- RASTER_ANALYSIS_LAMBDA_NAME=raster-analysis-tiled_raster_analysis-default
- RW_API_URL=https://staging-api.resourcewatch.org
Expand Down
1 change: 1 addition & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ services:
- DATA_LAKE_JOB_QUEUE=data_lake_jq
- TILE_CACHE_JOB_QUEUE=tile_cache_jq
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- RASTER_ANALYSIS_LAMBDA_NAME=raster_analysis
- API_URL="http://app_dev:80"
- RW_API_URL=https://api.resourcewatch.org
Expand Down
1 change: 1 addition & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- TILE_CACHE_CLUSTER=tile_cache_cluster
- TILE_CACHE_SERVICE=tile_cache_service
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- PIXETL_CORES=1
- MAX_CORES=1
- NUM_PROCESSES=1
Expand Down
22 changes: 12 additions & 10 deletions terraform/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ data "template_file" "container_definition" {
tile_cache_job_queue = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue = module.batch_job_queues.pixetl_job_queue_arn
on_demand_compute_job_queue = module.batch_job_queues.on_demand_compute_job_queue_arn
raster_analysis_lambda_name = "raster-analysis-tiled_raster_analysis-default"
raster_analysis_sfn_arn = data.terraform_remote_state.raster_analysis_lambda.outputs.raster_analysis_state_machine_arn
service_url = local.service_url
Expand Down Expand Up @@ -95,15 +96,16 @@ data "template_file" "container_definition" {
data "template_file" "task_batch_policy" {
template = file("${path.root}/templates/run_batch_policy.json.tmpl")
vars = {
aurora_job_definition_arn = module.batch_job_queues.aurora_job_definition_arn
aurora_job_queue_arn = module.batch_job_queues.aurora_job_queue_arn
aurora_job_queue_fast_arn = module.batch_job_queues.aurora_job_queue_fast_arn
data_lake_job_definition_arn = module.batch_job_queues.data_lake_job_definition_arn
data_lake_job_queue_arn = module.batch_job_queues.data_lake_job_queue_arn
tile_cache_job_definition_arn = module.batch_job_queues.tile_cache_job_definition_arn
tile_cache_job_queue_arn = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition_arn = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue_arn = module.batch_job_queues.pixetl_job_queue_arn
aurora_job_definition_arn = module.batch_job_queues.aurora_job_definition_arn
aurora_job_queue_arn = module.batch_job_queues.aurora_job_queue_arn
aurora_job_queue_fast_arn = module.batch_job_queues.aurora_job_queue_fast_arn
data_lake_job_definition_arn = module.batch_job_queues.data_lake_job_definition_arn
data_lake_job_queue_arn = module.batch_job_queues.data_lake_job_queue_arn
tile_cache_job_definition_arn = module.batch_job_queues.tile_cache_job_definition_arn
tile_cache_job_queue_arn = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition_arn = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue_arn = module.batch_job_queues.pixetl_job_queue_arn
on_demand_compute_job_queue_arn = module.batch_job_queues.on_demand_compute_job_queue_arn
}
depends_on = [
module.batch_job_queues.aurora_job_definition,
Expand Down Expand Up @@ -190,4 +192,4 @@ data "template_file" "step_function_policy" {
vars = {
raster_analysis_state_machine_arn = data.terraform_remote_state.raster_analysis_lambda.outputs.raster_analysis_state_machine_arn
}
}
}
35 changes: 29 additions & 6 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -174,21 +174,44 @@ module "batch_data_lake_writer" {
tags = local.batch_tags
use_ephemeral_storage = true
# SPOT is actually the default, this is just a placeholder until GTC-1791 is done
launch_type = "SPOT"
instance_types = [
"r6id.large", "r6id.xlarge", "r6id.2xlarge", "r6id.4xlarge", "r6id.8xlarge", "r6id.12xlarge", "r6id.16xlarge", "r6id.24xlarge",
"r5ad.large", "r5ad.xlarge", "r5ad.2xlarge", "r5ad.4xlarge", "r5ad.8xlarge", "r5ad.12xlarge", "r5ad.16xlarge", "r5ad.24xlarge",
"r5d.large", "r5d.xlarge", "r5d.2xlarge", "r5d.4xlarge", "r5d.8xlarge", "r5d.12xlarge", "r5d.16xlarge", "r5d.24xlarge"
]
launch_type = "SPOT"
instance_types = var.data_lake_writer_instance_types
compute_environment_name = "data_lake_writer"
}

module "batch_cogify" {
source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/compute_environment?ref=v0.4.2.3"
ecs_role_policy_arns = [
aws_iam_policy.query_batch_jobs.arn,
aws_iam_policy.s3_read_only.arn,
data.terraform_remote_state.core.outputs.iam_policy_s3_write_data-lake_arn,
data.terraform_remote_state.core.outputs.secrets_postgresql-reader_policy_arn,
data.terraform_remote_state.core.outputs.secrets_postgresql-writer_policy_arn,
data.terraform_remote_state.core.outputs.secrets_read-gfw-gee-export_policy_arn
]
key_pair = var.key_pair
max_vcpus = var.data_lake_max_vcpus
project = local.project
security_group_ids = [
data.terraform_remote_state.core.outputs.default_security_group_id,
data.terraform_remote_state.core.outputs.postgresql_security_group_id
]
subnets = data.terraform_remote_state.core.outputs.private_subnet_ids
suffix = local.name_suffix
tags = local.batch_tags
use_ephemeral_storage = true
launch_type = "EC2"
instance_types = var.data_lake_writer_instance_types
compute_environment_name = "batch_cogify"
}

module "batch_job_queues" {
source = "./modules/batch"
aurora_compute_environment_arn = module.batch_aurora_writer.arn
data_lake_compute_environment_arn = module.batch_data_lake_writer.arn
pixetl_compute_environment_arn = module.batch_data_lake_writer.arn
tile_cache_compute_environment_arn = module.batch_data_lake_writer.arn
cogify_compute_environment_arn = module.batch_cogify.arn
environment = var.environment
name_suffix = local.name_suffix
project = local.project
Expand Down
9 changes: 8 additions & 1 deletion terraform/modules/batch/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ resource "aws_batch_job_queue" "pixetl" {
depends_on = [var.pixetl_compute_environment_arn]
}

resource "aws_batch_job_queue" "on_demand" {
name = substr("${var.project}-on-demand-job-queue${var.name_suffix}", 0, 64)
state = "ENABLED"
priority = 1
compute_environments = [var.cogify_compute_environment_arn]
depends_on = [var.cogify_compute_environment_arn]
}

resource "aws_batch_job_definition" "tile_cache" {
name = substr("${var.project}-tile_cache${var.name_suffix}", 0, 64)
Expand Down Expand Up @@ -190,4 +197,4 @@ data "template_file" "ecs-task_assume" {
vars = {
service = "ecs-tasks"
}
}
}
6 changes: 5 additions & 1 deletion terraform/modules/batch/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ output "pixetl_job_queue_arn" {
value = aws_batch_job_queue.pixetl.arn
}

output "on_demand_compute_job_queue_arn" {
value = aws_batch_job_queue.on_demand.arn
}

output "tile_cache_job_definition_arn" {
value = aws_batch_job_definition.tile_cache.arn
}
Expand All @@ -48,4 +52,4 @@ output "tile_cache_job_definition" {

output "tile_cache_job_queue_arn" {
value = aws_batch_job_queue.tile_cache.arn
}
}
1 change: 1 addition & 0 deletions terraform/modules/batch/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ variable "project" { type = string }
variable "name_suffix" { type = string }
variable "aurora_compute_environment_arn" { type = string }
variable "data_lake_compute_environment_arn" { type = string }
variable "cogify_compute_environment_arn" { type = string }
variable "tile_cache_compute_environment_arn" { type = string }
variable "pixetl_compute_environment_arn" { type = string }
variable "gdal_repository_url" { type = string }
Expand Down
4 changes: 4 additions & 0 deletions terraform/templates/container_definition.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@
"name": "PIXETL_JOB_QUEUE",
"value": "${pixetl_job_queue}"
},
{
"name": "ON_DEMAND_COMPUTE_JOB_QUEUE",
"value": "${on_demand_compute_job_queue}"
},
{
"name": "API_URL",
"value": "${service_url}"
Expand Down
4 changes: 3 additions & 1 deletion terraform/templates/run_batch_policy.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
"${tile_cache_job_definition_arn}",

"${pixetl_job_queue_arn}",
"${pixetl_job_definition_arn}"
"${pixetl_job_definition_arn}",

"${on_demand_compute_job_queue_arn}"
]
},
{
Expand Down
10 changes: 10 additions & 0 deletions terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,13 @@ variable "api_gateway_url" {
description = "The invoke url of the API Gateway stage"
default = ""
}

variable "data_lake_writer_instance_types" {
type = list(string)
description = "memory optimized EC2 instances with local NVMe SSDs for data lake writer batche queues"
default = [
"r6id.large", "r6id.xlarge", "r6id.2xlarge", "r6id.4xlarge", "r6id.8xlarge", "r6id.12xlarge", "r6id.16xlarge", "r6id.24xlarge",
"r5ad.large", "r5ad.xlarge", "r5ad.2xlarge", "r5ad.4xlarge", "r5ad.8xlarge", "r5ad.12xlarge", "r5ad.16xlarge", "r5ad.24xlarge",
"r5d.large", "r5d.xlarge", "r5d.2xlarge", "r5d.4xlarge", "r5d.8xlarge", "r5d.12xlarge", "r5d.16xlarge", "r5d.24xlarge"
]
}
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
GDAL_PYTHON_JOB_DEFINITION,
PIXETL_JOB_DEFINITION,
PIXETL_JOB_QUEUE,
ON_DEMAND_COMPUTE_JOB_QUEUE,
POSTGRESQL_CLIENT_JOB_DEFINITION,
TILE_CACHE_BUCKET,
TILE_CACHE_JOB_DEFINITION,
Expand Down Expand Up @@ -167,6 +168,7 @@ def patch_run(self, *k, **kwargs):
"s3_writer", subnet_id, sg_id, iam_arn
)
pixetl_env = aws_mock.add_compute_environment("pixetl", subnet_id, sg_id, iam_arn)
cogify_env = aws_mock.add_compute_environment("cogify", subnet_id, sg_id, iam_arn)

aws_mock.add_job_queue(AURORA_JOB_QUEUE, aurora_writer_env["computeEnvironmentArn"])
aws_mock.add_job_queue(
Expand All @@ -175,6 +177,9 @@ def patch_run(self, *k, **kwargs):
aws_mock.add_job_queue(DATA_LAKE_JOB_QUEUE, s3_writer_env["computeEnvironmentArn"])
aws_mock.add_job_queue(TILE_CACHE_JOB_QUEUE, s3_writer_env["computeEnvironmentArn"])
aws_mock.add_job_queue(PIXETL_JOB_QUEUE, pixetl_env["computeEnvironmentArn"])
aws_mock.add_job_queue(
ON_DEMAND_COMPUTE_JOB_QUEUE, cogify_env["computeEnvironmentArn"]
)

aws_mock.add_job_definition(GDAL_PYTHON_JOB_DEFINITION, "batch_gdal-python_test")
aws_mock.add_job_definition(
Expand Down

0 comments on commit 284d3fa

Please sign in to comment.