Skip to content
This repository has been archived by the owner on May 8, 2024. It is now read-only.

Commit

Permalink
Merge pull request #276 from unity-sds/256-airflow
Browse files Browse the repository at this point in the history
#256 - Deploy Airflow to MCP venues with Terraform (no adaptation yet)
  • Loading branch information
drewm-swe authored Feb 20, 2024
2 parents cc2fba3 + 1bd55db commit a89ef92
Show file tree
Hide file tree
Showing 83 changed files with 3,090 additions and 2,123 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
SPS_API_IMAGE_NAME: ${{ github.repository }}/sps-api
SPS_HYSDS_PGE_BASE_IMAGE_NAME: ${{ github.repository }}/sps-hysds-pge-base
SPS_JOB_PUBLISHER_IMAGE_NAME: ${{ github.repository }}/sps-job-publisher
SPS_AIRFLOW: ${{ github.repository }}/sps-airflow

jobs:
build-hysds-core:
Expand Down Expand Up @@ -308,3 +309,27 @@ jobs:
push: true
tags: ${{ env.REGISTRY }}/${{ env.SPS_JOB_PUBLISHER_IMAGE_NAME }}:${{ env.TAG }}
labels: ${{ steps.metasps.outputs.labels }}

build-sps-airflow:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for SPS Airflow Docker image
id: metascheduler
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: ${{ env.REGISTRY }}/${{ env.SPS_AIRFLOW }}
- name: Build and push SPS Airflow Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
file: airflow/docker/custom_airflow/Dockerfile
push: true
tags: ${{ env.REGISTRY }}/${{ env.SPS_AIRFLOW }}:${{ env.TAG }}
labels: ${{ steps.metascheduler.outputs.labels }}
32 changes: 7 additions & 25 deletions .github/workflows/regression_test_mcp_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,16 @@ on:
- cron: "0 7 * * *"
workflow_dispatch:
inputs:
PROCESS_SERVICE_ENDPOINT:
description: "Base URL for the WPS-T API endpoint (i.e. http://abc.def.ghi:port-number)"
type: string
SPS_API_SERVICE_ENDPOINT:
description: "Base URL for the SPS API endpoint (i.e. http://abc.def.ghi:port-number)"
type: string
JOBS_DATABASE_ENDPOINT:
description: "Base URL for the Jobs DB endpoint (i.e. http://abc.def.ghi:port-number)"
type: string
SOUNDER_SIPS_PROCESS_SELECTION:
description: "The Sounder SIPS process to test (L1A, L1B, chirp)"
default: ".*"
required: false
AIRFLOW_ENDPOINT:
description: "Base URL for the Airflow endpoint (i.e. http://abc.def.ghi:port-number)"
type: string

jobs:
print_inputs:
runs-on: ubuntu-latest
steps:
- run: |
echo "Base URL for the WPS-T API endpoint (i.e. http://abc.def.ghi:port-number): ${{ github.event.inputs.PROCESS_SERVICE_ENDPOINT || vars.MCP_DEV_PROCESS_SERVICE_ENDPOINT }}"
echo "Base URL for the SPS API endpoint (i.e. http://abc.def.ghi:port-number): ${{ github.event.inputs.SPS_API_SERVICE_ENDPOINT || vars.MCP_DEV_SPS_API_SERVICE_ENDPOINT }}"
echo "Base URL for the Jobs DB endpoint (i.e. http://abc.def.ghi:port-number): ${{ github.event.inputs.JOBS_DATABASE_ENDPOINT || vars.MCP_DEV_JOBS_DATABASE_ENDPOINT }}"
echo "The Sounder SIPS PGEs to test (L1A, L1B, chirp): ${{ github.event.inputs.SOUNDER_SIPS_PROCESS_SELECTION || vars.SOUNDER_SIPS_PROCESS_SELECTION }}"
echo "Base URL for the Airflow endpoint (i.e. http://abc.def.ghi:port-number): ${{ github.event.inputs.AIRFLOW_ENDPOINT || vars.MCP_DEV_AIRFLOW_ENDPOINT }}"
regression_test:
runs-on: ubuntu-latest
steps:
Expand All @@ -45,12 +31,8 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt --use-pep517
- name: Run the regression test
working-directory: ${{ github.workspace }}/unity-test/step_defs
working-directory: ${{ github.workspace }}/unity-test
run: >
pytest -s
--process-service-endpoint=${{ github.event.inputs.PROCESS_SERVICE_ENDPOINT || vars.MCP_DEV_PROCESS_SERVICE_ENDPOINT }}
--sps-api-service-endpoint=${{ github.event.inputs.SPS_API_SERVICE_ENDPOINT || vars.MCP_DEV_SPS_API_SERVICE_ENDPOINT }}
--jobs-database-endpoint=${{ github.event.inputs.JOBS_DATABASE_ENDPOINT || vars.MCP_DEV_JOBS_DATABASE_ENDPOINT }}
--sounder-sips-process-selection=${{ github.event.inputs.SOUNDER_SIPS_PROCESS_SELECTION || vars.SOUNDER_SIPS_PROCESS_SELECTION }}
--environment "dev"
-rs
pytest -s -vv --gherkin-terminal-reporter
step_defs/test_airflow_api_health.py
--airflow-endpoint=${{ github.event.inputs.AIRFLOW_ENDPOINT || vars.MCP_DEV_AIRFLOW_ENDPOINT }}
16 changes: 16 additions & 0 deletions .markdownlintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"default": true,
"MD029": {
"style": "ordered"
},
"MD033": false,
"MD013": false,
"MD002": false,
"MD026": false,
"MD041": false,
"MD005": false,
"MD007": false,
"MD034": false,
"MD024": false,
"MD045": false
}
86 changes: 69 additions & 17 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,80 @@
fail_fast: true
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
# - id: check-yaml
- id: check-xml
- id: check-added-large-files
- id: check-json #checks json files for parseable syntax.
- id: pretty-format-json #sets a standard for formatting json files.
- id: detect-aws-credentials #detects *your* aws credentials from the aws cli credentials file.
- id: detect-private-key #detects the presence of private keys.
- id: requirements-txt-fixer #sorts entries in requirements.txt.
args:
- --maxkb=50000
- id: check-json # Checks json files for parsable syntax.
- id: pretty-format-json # Sets a standard for formatting json files.
args:
- --autofix
- id: requirements-txt-fixer # Sorts entries in requirements.txt.
- id: check-ast # Simply checks whether the files parse as valid python.
- id: detect-private-key # Detects the presence of private keys.
- id: detect-aws-credentials # Detects *your* aws credentials from the aws cli credentials file.
- id: check-toml # Checks toml files for parsable syntax.

- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.74.1
# - repo: https://github.com/igorshubovych/markdownlint-cli
# rev: "v0.39.0"
# hooks:
# - id: markdownlint
# args: ["--config", ".markdownlintrc", "--ignore", "CHANGELOG.md"]

- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.1.1
hooks:
- id: black

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.1
hooks:
- id: ruff

# - repo: https://github.com/PyCQA/bandit
# rev: "1.7.7" # you must change this to newest version
# hooks:
# - id: bandit
# args:
# [
# "--configfile=pyproject.toml",
# "--severity-level=high",
# "--confidence-level=high",
# ]
# additional_dependencies: [".[toml]"]

- repo: https://github.com/hadolint/hadolint
rev: v2.12.1-beta
hooks:
- id: terraform_validate #Validates all Terraform configuration files.
- id: terraform_fmt #Rewrites all Terraform configuration files to a canonical format.
- id: terraform_tflint #Validates all Terraform configuration files with TFLint.
- id: terraform_tfsec #Static analysis of Terraform templates to spot potential security issues.
- id: hadolint # requires hadolint is installed (brew install hadolint)
args:
- --args=--tfvars-file="/Users/drewm/Documents/projects/398G/Unity/unity-sps-prototype/terraform-unity/terraform.tfvars"
- id: terrascan #Detect compliance and security violations of Terraform templates.
- --no-color
- --failure-threshold=error
- --verbose

- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.86.0
hooks:
# - id: terraform_validate # Validates all Terraform configuration files.
- id: terraform_fmt # Rewrites all Terraform configuration files to a canonical format.
# - id: terraform_tflint # Validates all Terraform configuration files with TFLint.
# - id: terraform_trivy # Static analysis of Terraform templates to spot potential security issues.
# args:
# - >
# --args=--severity=CRITICAL
# --skip-dirs="**/.terraform"
# --tf-exclude-downloaded-modules
- id: terraform_docs
args:
- --hook-config=--add-to-existing-file=true # Boolean. true or false
- --hook-config=--create-file-if-not-exist=true # Boolean. true or false
- --hook-config=--add-to-existing-file=true
- --hook-config=--create-file-if-not-exist=true
26 changes: 26 additions & 0 deletions airflow/dags/cwltool_hello_world.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from datetime import datetime

from airflow.operators.bash_operator import BashOperator

from airflow import DAG

default_args = {
"owner": "airflow",
"start_date": datetime(2021, 1, 1),
}

dag = DAG(
"cwltool_help_dag",
default_args=default_args,
description="A simple DAG to run cwltool --help",
schedule=None,
is_paused_upon_creation=False,
)

run_cwl_help = BashOperator(
task_id="run_cwltool_help",
bash_command="cwltool --help",
dag=dag,
)

run_cwl_help
38 changes: 38 additions & 0 deletions airflow/dags/hello_world.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
# DAG Name: Hello World
# Purpose
# Usage
""" # noqa: E501

import time
from datetime import datetime

from airflow.operators.python import PythonOperator

from airflow import DAG

default_args = {
"owner": "example",
"start_date": datetime.utcfromtimestamp(0),
}


def hello_world():
print("Hello World")
time.sleep(30)


with DAG(
dag_id="hello_world",
doc_md=__doc__,
default_args=default_args,
schedule=None,
is_paused_upon_creation=False,
tags=["example"],
) as dag:
hello_world_task = PythonOperator(
task_id="hello_world", python_callable=hello_world
)
hello_world_task
5 changes: 5 additions & 0 deletions airflow/docker/custom_airflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM apache/airflow:2.8.1-python3.11

COPY ./airflow/dags/ ${AIRFLOW_HOME}/dags/

RUN pip install cwltool==3.1.20240112164112
122 changes: 122 additions & 0 deletions airflow/helm_values/values.tmpl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
# Source of default values: https://github.com/apache/airflow/blob/main/chart/values.yaml

# Airflow create user job settings
createUserJob:
# In case you need to disable the helm hooks that create the jobs after install.
# Disable this if you are using ArgoCD for example
useHelmHooks: false
applyCustomEnv: false

# Airflow database migration job settings
migrateDatabaseJob:
# In case you need to disable the helm hooks that create the jobs after install.
# Disable this if you are using ArgoCD for example
useHelmHooks: false
applyCustomEnv: false
# To run database migrations with Argo CD automatically, you will need to add the
# following. This will run database migrations every time there is a Sync event
# in Argo CD. While it is not ideal to run the migrations on every sync, it is a
# trade-off that allows them to be run automatically.
jobAnnotations:
"argocd.argoproj.io/hook": Sync

images:
airflow:
repository: ${airflow_image_repo}
tag: ${airflow_image_tag}

scheduler:
replicas: 1

triggerer:
replicas: 1

dagProcessor:
enabled: true
replicas: 1

postgresql:
enabled: false

pgbouncer:
enabled: true
replicas: 1

webserverSecretKeySecretName: ${webserver_secret_name}

webserver:
replicas: 1

workers:
keda:
enabled: true
pollingInterval: 1
maxReplicaCount: 128
# Specify HPA related options
# https://github.com/kubernetes/enhancements/blob/master/keps/sig-autoscaling/853-configurable-hpa-scale-velocity/README.md
advanced:
horizontalPodAutoscalerConfig:
behavior:
scaleUp:
policies:
- type: Percent
value: 900
periodSeconds: 30
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 100
periodSeconds: 5

data:
metadataSecretName: ${metadata_secret_name}
resultBackendSecretName: ~

config:
logging:
remote_logging: true
logging_level: "INFO"
remote_base_log_folder: ${airflow_logs_s3_location}
remote_log_conn_id: "aws_default"
encrypt_s3_logs: false
celery:
worker_concurrency: 1

env:
- name: "AIRFLOW_VAR_KUBERNETES_PIPELINE_NAMESPACE"
value: "${kubernetes_namespace}"

# https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/security/api.html
extraEnv: |
- name: AIRFLOW__API__AUTH_BACKENDS
value: "airflow.api.auth.backend.basic_auth"
- name: AIRFLOW__CORE__PARALLELISM
value: "128"
- name: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG
value: "64"
- name: AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG
value: "64"
- name: AIRFLOW__SCHEDULER__MAX_DAGRUNS_TO_CREATE_PER_LOOP
value: "64"
- name: AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC
value: "1"
- name: AIRFLOW__KUBERNETES__WORKER_PODS_CREATION_BATCH_SIZE
value: "8"
Loading

0 comments on commit a89ef92

Please sign in to comment.