From f2b60651e377b5f02e4a14e0202461e571e37d8b Mon Sep 17 00:00:00 2001 From: bahill Date: Tue, 16 Jan 2024 12:55:26 -0500 Subject: [PATCH 01/24] Updated manifest.py with testing info, and updated dev & main git actions with better conditional logic (see FE-128) --- .github/workflows/build-and-publish-main.yaml | 76 ++++++++++--------- .github/workflows/build_and_publish_dev.yaml | 4 +- orchestration/dcp100_manifest.csv | 1 + orchestration/dcp81_manifest.csv | 1 + orchestration/dcp98_manifest.csv | 3 + orchestration/dcp99_manfest.csv | 4 + orchestration/hca_manage/manifest.py | 6 +- 7 files changed, 56 insertions(+), 39 deletions(-) create mode 100644 orchestration/dcp100_manifest.csv create mode 100644 orchestration/dcp81_manifest.csv create mode 100644 orchestration/dcp98_manifest.csv create mode 100644 orchestration/dcp99_manfest.csv diff --git a/.github/workflows/build-and-publish-main.yaml b/.github/workflows/build-and-publish-main.yaml index 54ae3408..b308031e 100644 --- a/.github/workflows/build-and-publish-main.yaml +++ b/.github/workflows/build-and-publish-main.yaml @@ -1,40 +1,42 @@ name: Main Validation and Release on: - push: - branches: - - main + pull_request_target: + types: + - closed jobs: - main-ci: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Fetch tag history - run: git fetch --tags - - uses: olafurpg/setup-scala@v10 - with: - java-version: graalvm@20.0.0 - - uses: google-github-actions/setup-gcloud@v0.2.1 - name: Setup gcloud for pushing Docker images - with: - service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com - service_account_key: ${{ secrets.Base64ServiceAccountKey }} - export_default_credentials: true - - name: Setup GCR auth - run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev - - name: Push Scala Dataflow Docker image - run: sbt publish - - name: Get artifact slug - id: get-artifact-slug - run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' - - name: Push Dagster User Code Docker image - uses: docker/build-push-action@v2 - with: - context: ./orchestration - push: true - tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest - - name: Push Compose Dev Env Docker image - uses: docker/build-push-action@v2 - with: - context: . - push: true - tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest + if_merged: + if: github.event.pull_request.merged == true + main-ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Fetch tag history + run: git fetch --tags + - uses: olafurpg/setup-scala@v10 + with: + java-version: graalvm@20.0.0 + - uses: google-github-actions/setup-gcloud@v0.2.1 + name: Setup gcloud for pushing Docker images + with: + service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com + service_account_key: ${{ secrets.Base64ServiceAccountKey }} + export_default_credentials: true + - name: Setup GCR auth + run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev + - name: Push Scala Dataflow Docker image + run: sbt publish + - name: Get artifact slug + id: get-artifact-slug + run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' + - name: Push Dagster User Code Docker image + uses: docker/build-push-action@v2 + with: + context: ./orchestration + push: true + tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest + - name: Push Compose Dev Env Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index 3ebb195c..5e98f7a0 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -1,6 +1,8 @@ name: build-and-publish-dev on: push: + branches: + - '!main' jobs: main-ci: runs-on: ubuntu-latest @@ -45,7 +47,7 @@ jobs: run: sbt Compile/compile Test/compile IntegrationTest/compile - name: Scala Test run: sbt "set ThisBuild/coverageEnabled := true" test IntegrationTest/test coverageAggregate - - name: Run E2E test suite + - name: Run pytest E2E test suite run: poetry run pytest -v -m e2e working-directory: ${{ github.workspace }}/orchestration - name: Publish Scala coverage diff --git a/orchestration/dcp100_manifest.csv b/orchestration/dcp100_manifest.csv new file mode 100644 index 00000000..59e0eaf3 --- /dev/null +++ b/orchestration/dcp100_manifest.csv @@ -0,0 +1 @@ +TEST,07e5ebc0-1386-4a33-8ce4-3007705adad8 \ No newline at end of file diff --git a/orchestration/dcp81_manifest.csv b/orchestration/dcp81_manifest.csv new file mode 100644 index 00000000..61f7ded4 --- /dev/null +++ b/orchestration/dcp81_manifest.csv @@ -0,0 +1 @@ +EBI,7be05025-9972-493a-856f-3342a8d1b183 \ No newline at end of file diff --git a/orchestration/dcp98_manifest.csv b/orchestration/dcp98_manifest.csv new file mode 100644 index 00000000..6fc5780e --- /dev/null +++ b/orchestration/dcp98_manifest.csv @@ -0,0 +1,3 @@ +EBI,07e5ebc0-1386-4a33-8ce4-3007705adad8 +EBI,0ab14089-66be-4ea9-b1f2-36195a1ec03c +EBI,0bec963a-ee00-4dcc-b1a5-816afabf0ae6 \ No newline at end of file diff --git a/orchestration/dcp99_manfest.csv b/orchestration/dcp99_manfest.csv new file mode 100644 index 00000000..b83d3271 --- /dev/null +++ b/orchestration/dcp99_manfest.csv @@ -0,0 +1,4 @@ +UCSC,003d5674-9bf6-4e51-ab1b-8fed80c308b9 +UCSC,0198da65-ca81-40bb-ac81-2782c67406b9 +EBI,07346726-1cc2-42cc-aa02-d5d207e51d5b +EBI,18ee211b-2571-4733-a532-d0692747f97c \ No newline at end of file diff --git a/orchestration/hca_manage/manifest.py b/orchestration/hca_manage/manifest.py index dc41fd9b..7ca03089 100644 --- a/orchestration/hca_manage/manifest.py +++ b/orchestration/hca_manage/manifest.py @@ -40,12 +40,16 @@ "dev": "broad-dsp-monster-hca-dev-etl-partitions", "prod": "broad-dsp-monster-hca-prod-etl-partitions" } +# Test contains a single staging area for testing purposes +# The staging area is not used for any production pipelines +# Be sure to delete any snapshots and datasets created using this test staging area STAGING_AREA_BUCKETS = { "prod": { "EBI": "gs://broad-dsp-monster-hca-prod-ebi-storage/prod", "UCSC": "gs://broad-dsp-monster-hca-prod-ebi-storage/prod", "LANTERN": "gs://broad-dsp-monster-hca-prod-lantern", - "LATTICE": "gs://broad-dsp-monster-hca-prod-lattice/staging" + "LATTICE": "gs://broad-dsp-monster-hca-prod-lattice/staging", + "TEST": "gs://broad-dsp-monster-hca-prod-ebi-storage/broad_test_dataset" }, "dev": { "EBI": "gs://broad-dsp-monster-hca-dev-ebi-staging/dev", From e30dd5023dc299e88955e66c365abf311e30766c Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 11:13:29 -0500 Subject: [PATCH 02/24] removing test data & updating build & publish yamls with better conditional logic --- .github/workflows/build-and-publish-main.yaml | 76 ++++++++++--------- .github/workflows/build_and_publish_dev.yaml | 4 +- orchestration/hca_manage/manifest.py | 6 +- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/.github/workflows/build-and-publish-main.yaml b/.github/workflows/build-and-publish-main.yaml index 54ae3408..b308031e 100644 --- a/.github/workflows/build-and-publish-main.yaml +++ b/.github/workflows/build-and-publish-main.yaml @@ -1,40 +1,42 @@ name: Main Validation and Release on: - push: - branches: - - main + pull_request_target: + types: + - closed jobs: - main-ci: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Fetch tag history - run: git fetch --tags - - uses: olafurpg/setup-scala@v10 - with: - java-version: graalvm@20.0.0 - - uses: google-github-actions/setup-gcloud@v0.2.1 - name: Setup gcloud for pushing Docker images - with: - service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com - service_account_key: ${{ secrets.Base64ServiceAccountKey }} - export_default_credentials: true - - name: Setup GCR auth - run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev - - name: Push Scala Dataflow Docker image - run: sbt publish - - name: Get artifact slug - id: get-artifact-slug - run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' - - name: Push Dagster User Code Docker image - uses: docker/build-push-action@v2 - with: - context: ./orchestration - push: true - tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest - - name: Push Compose Dev Env Docker image - uses: docker/build-push-action@v2 - with: - context: . - push: true - tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest + if_merged: + if: github.event.pull_request.merged == true + main-ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Fetch tag history + run: git fetch --tags + - uses: olafurpg/setup-scala@v10 + with: + java-version: graalvm@20.0.0 + - uses: google-github-actions/setup-gcloud@v0.2.1 + name: Setup gcloud for pushing Docker images + with: + service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com + service_account_key: ${{ secrets.Base64ServiceAccountKey }} + export_default_credentials: true + - name: Setup GCR auth + run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev + - name: Push Scala Dataflow Docker image + run: sbt publish + - name: Get artifact slug + id: get-artifact-slug + run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' + - name: Push Dagster User Code Docker image + uses: docker/build-push-action@v2 + with: + context: ./orchestration + push: true + tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest + - name: Push Compose Dev Env Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index 3ebb195c..5e98f7a0 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -1,6 +1,8 @@ name: build-and-publish-dev on: push: + branches: + - '!main' jobs: main-ci: runs-on: ubuntu-latest @@ -45,7 +47,7 @@ jobs: run: sbt Compile/compile Test/compile IntegrationTest/compile - name: Scala Test run: sbt "set ThisBuild/coverageEnabled := true" test IntegrationTest/test coverageAggregate - - name: Run E2E test suite + - name: Run pytest E2E test suite run: poetry run pytest -v -m e2e working-directory: ${{ github.workspace }}/orchestration - name: Publish Scala coverage diff --git a/orchestration/hca_manage/manifest.py b/orchestration/hca_manage/manifest.py index dc41fd9b..7ca03089 100644 --- a/orchestration/hca_manage/manifest.py +++ b/orchestration/hca_manage/manifest.py @@ -40,12 +40,16 @@ "dev": "broad-dsp-monster-hca-dev-etl-partitions", "prod": "broad-dsp-monster-hca-prod-etl-partitions" } +# Test contains a single staging area for testing purposes +# The staging area is not used for any production pipelines +# Be sure to delete any snapshots and datasets created using this test staging area STAGING_AREA_BUCKETS = { "prod": { "EBI": "gs://broad-dsp-monster-hca-prod-ebi-storage/prod", "UCSC": "gs://broad-dsp-monster-hca-prod-ebi-storage/prod", "LANTERN": "gs://broad-dsp-monster-hca-prod-lantern", - "LATTICE": "gs://broad-dsp-monster-hca-prod-lattice/staging" + "LATTICE": "gs://broad-dsp-monster-hca-prod-lattice/staging", + "TEST": "gs://broad-dsp-monster-hca-prod-ebi-storage/broad_test_dataset" }, "dev": { "EBI": "gs://broad-dsp-monster-hca-dev-ebi-staging/dev", From e2c5218b334193936b9aa7cbcc0f52cabf0c168f Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 11:14:00 -0500 Subject: [PATCH 03/24] ignore test data --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index ebcac6f8..c3ffe0cf 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ dagster_home/ venv/ # Build files build/ +# Test files +*_manifest.csv From acbc1378a33b7a1e55a22923ad60708266667a71 Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 12:58:48 -0500 Subject: [PATCH 04/24] removing test data --- orchestration/dcp100_manifest.csv | 1 - orchestration/dcp81_manifest.csv | 1 - orchestration/dcp98_manifest.csv | 3 --- orchestration/dcp99_manfest.csv | 4 ---- 4 files changed, 9 deletions(-) delete mode 100644 orchestration/dcp100_manifest.csv delete mode 100644 orchestration/dcp81_manifest.csv delete mode 100644 orchestration/dcp98_manifest.csv delete mode 100644 orchestration/dcp99_manfest.csv diff --git a/orchestration/dcp100_manifest.csv b/orchestration/dcp100_manifest.csv deleted file mode 100644 index 59e0eaf3..00000000 --- a/orchestration/dcp100_manifest.csv +++ /dev/null @@ -1 +0,0 @@ -TEST,07e5ebc0-1386-4a33-8ce4-3007705adad8 \ No newline at end of file diff --git a/orchestration/dcp81_manifest.csv b/orchestration/dcp81_manifest.csv deleted file mode 100644 index 61f7ded4..00000000 --- a/orchestration/dcp81_manifest.csv +++ /dev/null @@ -1 +0,0 @@ -EBI,7be05025-9972-493a-856f-3342a8d1b183 \ No newline at end of file diff --git a/orchestration/dcp98_manifest.csv b/orchestration/dcp98_manifest.csv deleted file mode 100644 index 6fc5780e..00000000 --- a/orchestration/dcp98_manifest.csv +++ /dev/null @@ -1,3 +0,0 @@ -EBI,07e5ebc0-1386-4a33-8ce4-3007705adad8 -EBI,0ab14089-66be-4ea9-b1f2-36195a1ec03c -EBI,0bec963a-ee00-4dcc-b1a5-816afabf0ae6 \ No newline at end of file diff --git a/orchestration/dcp99_manfest.csv b/orchestration/dcp99_manfest.csv deleted file mode 100644 index b83d3271..00000000 --- a/orchestration/dcp99_manfest.csv +++ /dev/null @@ -1,4 +0,0 @@ -UCSC,003d5674-9bf6-4e51-ab1b-8fed80c308b9 -UCSC,0198da65-ca81-40bb-ac81-2782c67406b9 -EBI,07346726-1cc2-42cc-aa02-d5d207e51d5b -EBI,18ee211b-2571-4733-a532-d0692747f97c \ No newline at end of file From 2d0fe27df4a2eddfb287d989d0644986e26530aa Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 13:07:00 -0500 Subject: [PATCH 05/24] fixing git action syntax --- .github/workflows/build-and-publish-main.yaml | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/.github/workflows/build-and-publish-main.yaml b/.github/workflows/build-and-publish-main.yaml index b308031e..109ef801 100644 --- a/.github/workflows/build-and-publish-main.yaml +++ b/.github/workflows/build-and-publish-main.yaml @@ -4,39 +4,38 @@ on: types: - closed jobs: - if_merged: + main-ci: if: github.event.pull_request.merged == true - main-ci: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Fetch tag history - run: git fetch --tags - - uses: olafurpg/setup-scala@v10 - with: - java-version: graalvm@20.0.0 - - uses: google-github-actions/setup-gcloud@v0.2.1 - name: Setup gcloud for pushing Docker images - with: - service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com - service_account_key: ${{ secrets.Base64ServiceAccountKey }} - export_default_credentials: true - - name: Setup GCR auth - run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev - - name: Push Scala Dataflow Docker image - run: sbt publish - - name: Get artifact slug - id: get-artifact-slug - run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' - - name: Push Dagster User Code Docker image - uses: docker/build-push-action@v2 - with: - context: ./orchestration - push: true - tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest - - name: Push Compose Dev Env Docker image - uses: docker/build-push-action@v2 - with: - context: . - push: true - tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Fetch tag history + run: git fetch --tags + - uses: olafurpg/setup-scala@v10 + with: + java-version: graalvm@20.0.0 + - uses: google-github-actions/setup-gcloud@v0.2.1 + name: Setup gcloud for pushing Docker images + with: + service_account_email: jenkins-gcr-pusher@broad-dsp-monster-dev.iam.gserviceaccount.com + service_account_key: ${{ secrets.Base64ServiceAccountKey }} + export_default_credentials: true + - name: Setup GCR auth + run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev + - name: Push Scala Dataflow Docker image + run: sbt publish + - name: Get artifact slug + id: get-artifact-slug + run: 'echo ::set-output name=slug::$(git rev-parse --short "$GITHUB_SHA")' + - name: Push Dagster User Code Docker image + uses: docker/build-push-action@v2 + with: + context: ./orchestration + push: true + tags: us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:${{steps.get-artifact-slug.outputs.slug}}, us.gcr.io/broad-dsp-gcr-public/monster-hca-dagster:latest + - name: Push Compose Dev Env Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:latest From d241965d06cd62b21c079c9f847a601b537cfda5 Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 13:13:16 -0500 Subject: [PATCH 06/24] renaming with underscores --- .../{build-and-publish-main.yaml => build_and_publish_main.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{build-and-publish-main.yaml => build_and_publish_main.yaml} (100%) diff --git a/.github/workflows/build-and-publish-main.yaml b/.github/workflows/build_and_publish_main.yaml similarity index 100% rename from .github/workflows/build-and-publish-main.yaml rename to .github/workflows/build_and_publish_main.yaml From 76e0324f8863e0b952e36fdbd7186ee0a4c48d9e Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 13:19:54 -0500 Subject: [PATCH 07/24] updated syntax for ignoring branches and improved name --- .github/workflows/build_and_publish_dev.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index 5e98f7a0..c7968311 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -1,8 +1,7 @@ -name: build-and-publish-dev +name: Build and Publish Dev Images on: push: - branches: - - '!main' + branches-ignore: [master, main] jobs: main-ci: runs-on: ubuntu-latest From f5d1f9648a2737a83ce55e75970ccb20cb5d6cdc Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 13:38:35 -0500 Subject: [PATCH 08/24] Updating dev action to not run tests, and main name to fix display issue. Testing is not needed in dev as we run this on all PR pushes & syncs. Testing can/should be run locally in dev (takes 90 minutes) --- .github/workflows/build_and_publish_dev.yaml | 46 ++----------------- .github/workflows/build_and_publish_main.yaml | 2 +- 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index c7968311..2cb13aa2 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -9,48 +9,6 @@ jobs: - uses: actions/checkout@v2 - name: Fetch tag history run: git fetch --tags - - uses: google-github-actions/setup-gcloud@v0.2.1 - name: Setup gcloud for Dataflow tests - with: - project_id: ${{ secrets.DEV_PROJECT_ID }} - service_account_key: ${{ secrets.GCP_TEST_KEY }} - export_default_credentials: true - - name: Set up Python 3.9 for dataflow tests - uses: actions/setup-python@v2 - with: - python-version: 3.9.16 - - name: Install Poetry - uses: snok/install-poetry@v1.2 - with: - version: 1.1.9 - - name: Restore cache dependencies - uses: actions/cache@v2 - env: - cache-name: cache-poetry-v2 - with: - path: ~/.cache/pypoetry - key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('./orchestration/pyproject.toml') }} - restore-keys: | - ${{ runner.os }}-build-${{ env.cache-name }}- - ${{ runner.os }}-build- - ${{ runner.os }}- - - name: Install dependencies - run: poetry install - working-directory: ${{ github.workspace }}/orchestration - - uses: olafurpg/setup-scala@v10 - with: - java-version: graalvm@20.0.0 - - name: Check Scala formatting - run: sbt scalafmtCheckAll - - name: Scala Compile - run: sbt Compile/compile Test/compile IntegrationTest/compile - - name: Scala Test - run: sbt "set ThisBuild/coverageEnabled := true" test IntegrationTest/test coverageAggregate - - name: Run pytest E2E test suite - run: poetry run pytest -v -m e2e - working-directory: ${{ github.workspace }}/orchestration - - name: Publish Scala coverage - uses: codecov/codecov-action@v1 - uses: google-github-actions/setup-gcloud@v0.2.1 name: Setup gcloud for pushing Docker images with: @@ -59,6 +17,9 @@ jobs: export_default_credentials: true - name: Setup GCR auth run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev + - uses: olafurpg/setup-scala@v10 + with: + java-version: graalvm@20.0.0 - name: Push Scala Dataflow Docker image run: sbt publish - name: Get artifact slug @@ -76,4 +37,3 @@ jobs: context: . push: true tags: us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:${{steps.get-artifact-slug.outputs.slug}}, us-east4-docker.pkg.dev/broad-dsp-monster-hca-dev/monster-dev-env/hca_ingest_compose_dev_env:dev - diff --git a/.github/workflows/build_and_publish_main.yaml b/.github/workflows/build_and_publish_main.yaml index 109ef801..74f11c9f 100644 --- a/.github/workflows/build_and_publish_main.yaml +++ b/.github/workflows/build_and_publish_main.yaml @@ -1,4 +1,4 @@ -name: Main Validation and Release +name: Main Validation and Release Workflow on: pull_request_target: types: From 59f94afe35e8117bf91e959edb82637e8c528cb4 Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 13:46:24 -0500 Subject: [PATCH 09/24] fixing syntax --- .github/workflows/build_and_publish_dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index 2cb13aa2..31af2dae 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -17,7 +17,7 @@ jobs: export_default_credentials: true - name: Setup GCR auth run: gcloud auth configure-docker --quiet us.gcr.io,us-east4-docker.pkg.dev - - uses: olafurpg/setup-scala@v10 + - uses: olafurpg/setup-scala@v10 with: java-version: graalvm@20.0.0 - name: Push Scala Dataflow Docker image From 4bcd7223c19ec4f088d0424b3fa557df7974ce56 Mon Sep 17 00:00:00 2001 From: dsp-fieldeng-bot Date: Thu, 18 Jan 2024 19:00:07 +0000 Subject: [PATCH 10/24] Update requirements.txt From c3190dd066f29f08952193444ea48de96e3dfc19 Mon Sep 17 00:00:00 2001 From: bahill Date: Thu, 18 Jan 2024 14:44:05 -0500 Subject: [PATCH 11/24] give name to validation job so it can be required by main as a check --- .github/workflows/validate-pull-request.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/validate-pull-request.yaml b/.github/workflows/validate-pull-request.yaml index 415656bc..05788441 100644 --- a/.github/workflows/validate-pull-request.yaml +++ b/.github/workflows/validate-pull-request.yaml @@ -5,6 +5,7 @@ on: - main jobs: pr-validation: + name: PR Validation runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 From a9f06ba711f2c6bf01c8a94c54409e307d08cdeb Mon Sep 17 00:00:00 2001 From: bahill Date: Fri, 19 Jan 2024 12:11:55 -0500 Subject: [PATCH 12/24] added Sentry to dependencies and Docker env --- orchestration/Dockerfile | 3 ++- orchestration/poetry.lock | 47 ++++++++++++++++++++++++++++++++++-- orchestration/pyproject.toml | 1 + 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/orchestration/Dockerfile b/orchestration/Dockerfile index ebf02972..eb8dbe85 100644 --- a/orchestration/Dockerfile +++ b/orchestration/Dockerfile @@ -9,7 +9,8 @@ ENV PYTHONFAULTHANDLER=1 \ PIP_NO_CACHE_DIR=off \ PIP_DISABLE_PIP_VERSION_CHECK=on \ PIP_DEFAULT_TIMEOUT=100 \ - POETRY_VERSION=1.1.8 + POETRY_VERSION=1.1.8 \ + SENTRY_DSN=https://922d76ad2dceb6cd328436ae551ffb11@o54426.ingest.sentry.io/4506559533088768 RUN pip install "poetry==$POETRY_VERSION" diff --git a/orchestration/poetry.lock b/orchestration/poetry.lock index 7c36ce0c..329f7c70 100644 --- a/orchestration/poetry.lock +++ b/orchestration/poetry.lock @@ -598,7 +598,7 @@ beautifulsoup4 = "*" [[package]] name = "google-api-core" -version = "2.12.0" +version = "2.15.0" description = "Google API client core library" category = "main" optional = false @@ -1690,6 +1690,48 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "sentry-sdk" +version = "1.39.2" +description = "Python client for Sentry (https://sentry.io)" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +certifi = "*" +urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} + +[package.extras] +aiohttp = ["aiohttp (>=3.5)"] +arq = ["arq (>=0.23)"] +asyncpg = ["asyncpg (>=0.23)"] +beam = ["apache-beam (>=2.12)"] +bottle = ["bottle (>=0.12.13)"] +celery = ["celery (>=3)"] +chalice = ["chalice (>=1.16.0)"] +clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] +django = ["django (>=1.8)"] +falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] +flask = ["flask (>=0.11)", "blinker (>=1.1)", "markupsafe"] +grpcio = ["grpcio (>=1.21.1)"] +httpx = ["httpx (>=0.16.0)"] +huey = ["huey (>=2)"] +loguru = ["loguru (>=0.5)"] +opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0.0)"] +pure-eval = ["pure-eval", "executing", "asttokens"] +pymongo = ["pymongo (>=3.1)"] +pyspark = ["pyspark (>=2.4.4)"] +quart = ["quart (>=0.16.1)", "blinker (>=1.1)"] +rq = ["rq (>=0.6)"] +sanic = ["sanic (>=0.8)"] +sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] +starlite = ["starlite (>=1.48)"] +tornado = ["tornado (>=5)"] + [[package]] name = "six" version = "1.16.0" @@ -2015,7 +2057,7 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "1.1" python-versions = "3.9.16" -content-hash = "e7ea34813d7c2a0f82d60fd8c11cc9ca4051dc65b48c14ef5a29ff400e5d21a4" +content-hash = "a0d8b79f839c59a1bdec6916e67491f2a7f1a709ac88f28220c1a927c328a376" [metadata.files] aiohttp = [] @@ -2151,6 +2193,7 @@ rfc3339-validator = [] rpds-py = [] rsa = [] rx = [] +sentry-sdk = [] six = [] slack-sdk = [] slackclient = [] diff --git a/orchestration/pyproject.toml b/orchestration/pyproject.toml index 190c7bbe..0345be87 100644 --- a/orchestration/pyproject.toml +++ b/orchestration/pyproject.toml @@ -30,6 +30,7 @@ protobuf = "3.20.2" python-dateutil = "^2.8.1" pyyaml = "^5.3" rfc3339-validator = "^0.1.4" +sentry-sdk = "^1.39.2" typing-extensions = "^3.7.4" # werkzeug = "2.2.3" # will have to update dagit which means updating broad-dagster-utils - FE-36 From a191fe0da38a0498e02319baa4d91b275cdaff9b Mon Sep 17 00:00:00 2001 From: bahill Date: Fri, 19 Jan 2024 15:26:23 -0500 Subject: [PATCH 13/24] adding fetch depth 0 for sonar cloud --- .github/workflows/build_and_publish_dev.yaml | 3 +++ .github/workflows/build_and_publish_main.yaml | 9 +++++++-- .github/workflows/generate-requirements-file.yaml | 3 +++ .github/workflows/trivy.yaml | 3 +++ .github/workflows/validate-pull-request.yaml | 1 + .github/workflows/validate-python.yaml | 3 +++ 6 files changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_publish_dev.yaml b/.github/workflows/build_and_publish_dev.yaml index 31af2dae..c0f4a94d 100644 --- a/.github/workflows/build_and_publish_dev.yaml +++ b/.github/workflows/build_and_publish_dev.yaml @@ -7,6 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - name: Fetch tag history run: git fetch --tags - uses: google-github-actions/setup-gcloud@v0.2.1 diff --git a/.github/workflows/build_and_publish_main.yaml b/.github/workflows/build_and_publish_main.yaml index 74f11c9f..4d3fc3f6 100644 --- a/.github/workflows/build_and_publish_main.yaml +++ b/.github/workflows/build_and_publish_main.yaml @@ -1,14 +1,19 @@ name: Main Validation and Release Workflow on: pull_request_target: - types: - - closed + types: + - closed + branches: + - main jobs: main-ci: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - name: Fetch tag history run: git fetch --tags - uses: olafurpg/setup-scala@v10 diff --git a/.github/workflows/generate-requirements-file.yaml b/.github/workflows/generate-requirements-file.yaml index 4e63b958..505429d6 100644 --- a/.github/workflows/generate-requirements-file.yaml +++ b/.github/workflows/generate-requirements-file.yaml @@ -13,6 +13,9 @@ jobs: steps: - name: Check out repository uses: actions/checkout@v3 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - name: Set up python id: setup-python uses: actions/setup-python@v4 diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml index 8e55de9c..f7dd7906 100644 --- a/.github/workflows/trivy.yaml +++ b/.github/workflows/trivy.yaml @@ -8,6 +8,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - uses: broadinstitute/dsp-appsec-trivy-action@v1 with: context: ./orchestration diff --git a/.github/workflows/validate-pull-request.yaml b/.github/workflows/validate-pull-request.yaml index 05788441..5a397071 100644 --- a/.github/workflows/validate-pull-request.yaml +++ b/.github/workflows/validate-pull-request.yaml @@ -14,6 +14,7 @@ jobs: project_id: ${{ secrets.DEV_PROJECT_ID }} service_account_key: ${{ secrets.GCP_TEST_KEY }} export_default_credentials: true + fetch-depth: 0 - name: Set up Python 3.9 for dataflow tests uses: actions/setup-python@v2 with: diff --git a/.github/workflows/validate-python.yaml b/.github/workflows/validate-python.yaml index c39b28cf..e1e2049f 100644 --- a/.github/workflows/validate-python.yaml +++ b/.github/workflows/validate-python.yaml @@ -15,6 +15,9 @@ jobs: ENV: test steps: - uses: actions/checkout@v2 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - uses: google-github-actions/setup-gcloud@v0.2.1 with: project_id: ${{ secrets.DEV_PROJECT_ID }} From c4224834a16300aaee85981018d8e2bf9e16e8a7 Mon Sep 17 00:00:00 2001 From: bahill Date: Fri, 19 Jan 2024 16:00:00 -0500 Subject: [PATCH 14/24] First attempt to integrate sentry - test 1 --- .../hca_orchestration/pipelines/validate_ingress.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/orchestration/hca_orchestration/pipelines/validate_ingress.py b/orchestration/hca_orchestration/pipelines/validate_ingress.py index a32414da..40c68f7c 100644 --- a/orchestration/hca_orchestration/pipelines/validate_ingress.py +++ b/orchestration/hca_orchestration/pipelines/validate_ingress.py @@ -1,3 +1,5 @@ +import sentry_sdk + from dagster import ( HookContext, InitResourceContext, @@ -13,6 +15,12 @@ pre_flight_validate, ) +SENTRY_DSN = os.getenv( + "SENTRY_DSN", + "", +) +if SENTRY_DSN: + sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) def run_config_for_validation_ingress_partition( partition: Partition, From c4910d9f6d6d073f62f432a7f8c1942cd4b60112 Mon Sep 17 00:00:00 2001 From: bahill Date: Fri, 19 Jan 2024 16:09:21 -0500 Subject: [PATCH 15/24] fixed left over from merge conflict resolution --- .github/workflows/build_and_publish_main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_and_publish_main.yaml b/.github/workflows/build_and_publish_main.yaml index d36e4d98..4d3fc3f6 100644 --- a/.github/workflows/build_and_publish_main.yaml +++ b/.github/workflows/build_and_publish_main.yaml @@ -1,7 +1,6 @@ name: Main Validation and Release Workflow on: pull_request_target: -<<<<<<< HEAD types: - closed branches: From b2a918af2d0310b929949b8f85467bcec2cb9c54 Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 22 Jan 2024 11:46:24 -0500 Subject: [PATCH 16/24] Adding fetch depth --- .github/workflows/generate-requirements-file.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/generate-requirements-file.yaml b/.github/workflows/generate-requirements-file.yaml index 505429d6..02dd48a6 100644 --- a/.github/workflows/generate-requirements-file.yaml +++ b/.github/workflows/generate-requirements-file.yaml @@ -13,9 +13,9 @@ jobs: steps: - name: Check out repository uses: actions/checkout@v3 - # Needed by sonar to get the git history for the branch the PR will be merged into. - with: - fetch-depth: 0 + # Needed by sonar to get the git history for the branch the PR will be merged into. + with: + fetch-depth: 0 - name: Set up python id: setup-python uses: actions/setup-python@v4 From b9d66a04311f86f35533d588c616c89a94692cef Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 22 Jan 2024 11:46:42 -0500 Subject: [PATCH 17/24] importing os - maybe this is why the repo didn't load? --- orchestration/hca_orchestration/pipelines/validate_ingress.py | 1 + 1 file changed, 1 insertion(+) diff --git a/orchestration/hca_orchestration/pipelines/validate_ingress.py b/orchestration/hca_orchestration/pipelines/validate_ingress.py index 40c68f7c..bdb65916 100644 --- a/orchestration/hca_orchestration/pipelines/validate_ingress.py +++ b/orchestration/hca_orchestration/pipelines/validate_ingress.py @@ -1,3 +1,4 @@ +import os import sentry_sdk from dagster import ( From b32857516d9604f89bfc661deafddc510af2bca5 Mon Sep 17 00:00:00 2001 From: bahill Date: Tue, 23 Jan 2024 14:59:17 -0500 Subject: [PATCH 18/24] linting --- orchestration/hca_orchestration/pipelines/validate_ingress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orchestration/hca_orchestration/pipelines/validate_ingress.py b/orchestration/hca_orchestration/pipelines/validate_ingress.py index bdb65916..63774fed 100644 --- a/orchestration/hca_orchestration/pipelines/validate_ingress.py +++ b/orchestration/hca_orchestration/pipelines/validate_ingress.py @@ -1,6 +1,6 @@ import os -import sentry_sdk +import sentry_sdk from dagster import ( HookContext, InitResourceContext, From 182f2ecd9197396f5387789503c892cae8a2bcc3 Mon Sep 17 00:00:00 2001 From: bahill Date: Wed, 24 Jan 2024 18:57:43 -0500 Subject: [PATCH 19/24] Flake8 lint - needed two spaces before my func def --- orchestration/hca_orchestration/pipelines/validate_ingress.py | 1 + 1 file changed, 1 insertion(+) diff --git a/orchestration/hca_orchestration/pipelines/validate_ingress.py b/orchestration/hca_orchestration/pipelines/validate_ingress.py index 63774fed..e72e3388 100644 --- a/orchestration/hca_orchestration/pipelines/validate_ingress.py +++ b/orchestration/hca_orchestration/pipelines/validate_ingress.py @@ -23,6 +23,7 @@ if SENTRY_DSN: sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) + def run_config_for_validation_ingress_partition( partition: Partition, ) -> DagsterObjectConfigSchema: From 80ec9765b6d0cfcb1bfdf1757ab06f305b92b94f Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 29 Jan 2024 12:35:59 -0500 Subject: [PATCH 20/24] Adding Sentry to our active pipelines --- orchestration/hca_orchestration/pipelines/cut_snapshot.py | 8 ++++++++ orchestration/hca_orchestration/pipelines/load_hca.py | 8 ++++++++ .../hca_orchestration/pipelines/set_snapshot_public.py | 7 +++++++ 3 files changed, 23 insertions(+) diff --git a/orchestration/hca_orchestration/pipelines/cut_snapshot.py b/orchestration/hca_orchestration/pipelines/cut_snapshot.py index 158144af..fa47d320 100644 --- a/orchestration/hca_orchestration/pipelines/cut_snapshot.py +++ b/orchestration/hca_orchestration/pipelines/cut_snapshot.py @@ -36,6 +36,14 @@ warnings.filterwarnings("ignore", category=ExperimentalWarning) +SENTRY_DSN = os.getenv( + "SENTRY_DSN", + "", +) +if SENTRY_DSN: + sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) + + def cut_project_snapshot_job(hca_env: str, jade_env: str, steward: str) -> PipelineDefinition: return cut_snapshot.to_job( description="Given a source project ID, this pipeline will determine the corresponding " diff --git a/orchestration/hca_orchestration/pipelines/load_hca.py b/orchestration/hca_orchestration/pipelines/load_hca.py index 472d8d42..246a2e3b 100644 --- a/orchestration/hca_orchestration/pipelines/load_hca.py +++ b/orchestration/hca_orchestration/pipelines/load_hca.py @@ -16,6 +16,14 @@ warnings.filterwarnings("ignore", category=ExperimentalWarning) +SENTRY_DSN = os.getenv( + "SENTRY_DSN", + "", +) +if SENTRY_DSN: + sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) + + @graph def load_hca() -> None: staging_dataset = create_scratch_dataset( diff --git a/orchestration/hca_orchestration/pipelines/set_snapshot_public.py b/orchestration/hca_orchestration/pipelines/set_snapshot_public.py index 1cd5bc7d..a1392bcc 100644 --- a/orchestration/hca_orchestration/pipelines/set_snapshot_public.py +++ b/orchestration/hca_orchestration/pipelines/set_snapshot_public.py @@ -30,6 +30,13 @@ warnings.filterwarnings("ignore", category=ExperimentalWarning) +SENTRY_DSN = os.getenv( + "SENTRY_DSN", + "", +) +if SENTRY_DSN: + sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) + def make_snapshot_public_job(hca_env: str, jade_env: str) -> PipelineDefinition: return set_snapshot_public.to_job( From c93a2fa7500c63e45fd6da053a4cbbd04530e337 Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 29 Jan 2024 12:54:36 -0500 Subject: [PATCH 21/24] linting --- .../pipelines/cut_snapshot.py | 2 ++ .../hca_orchestration/pipelines/load_hca.py | 28 ++++++++++++++----- .../pipelines/set_snapshot_public.py | 2 ++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/orchestration/hca_orchestration/pipelines/cut_snapshot.py b/orchestration/hca_orchestration/pipelines/cut_snapshot.py index fa47d320..5ab46841 100644 --- a/orchestration/hca_orchestration/pipelines/cut_snapshot.py +++ b/orchestration/hca_orchestration/pipelines/cut_snapshot.py @@ -1,5 +1,7 @@ +import os import warnings +import sentry_sdk from dagster import ( ExperimentalWarning, HookContext, diff --git a/orchestration/hca_orchestration/pipelines/load_hca.py b/orchestration/hca_orchestration/pipelines/load_hca.py index 246a2e3b..0e0d0198 100644 --- a/orchestration/hca_orchestration/pipelines/load_hca.py +++ b/orchestration/hca_orchestration/pipelines/load_hca.py @@ -3,15 +3,30 @@ area, transforming via Google Cloud Dataflow jobs into a form suitable for ingestion to TDR and the final load to TDR itself. """ +import os import warnings -from dagster import graph, ExperimentalWarning +import sentry_sdk +from dagster import ExperimentalWarning, graph -from hca_orchestration.solids.load_hca.data_files.load_data_files import import_data_files -from hca_orchestration.solids.load_hca.data_files.load_data_metadata_files import file_metadata_fanout -from hca_orchestration.solids.load_hca.non_file_metadata.load_non_file_metadata import non_file_metadata_fanout -from hca_orchestration.solids.load_hca.stage_data import clear_scratch_dir, pre_process_metadata, create_scratch_dataset -from hca_orchestration.solids.load_hca.utilities import send_start_notification, validate_and_send_finish_notification +from hca_orchestration.solids.load_hca.data_files.load_data_files import ( + import_data_files, +) +from hca_orchestration.solids.load_hca.data_files.load_data_metadata_files import ( + file_metadata_fanout, +) +from hca_orchestration.solids.load_hca.non_file_metadata.load_non_file_metadata import ( + non_file_metadata_fanout, +) +from hca_orchestration.solids.load_hca.stage_data import ( + clear_scratch_dir, + create_scratch_dataset, + pre_process_metadata, +) +from hca_orchestration.solids.load_hca.utilities import ( + send_start_notification, + validate_and_send_finish_notification, +) warnings.filterwarnings("ignore", category=ExperimentalWarning) @@ -22,7 +37,6 @@ ) if SENTRY_DSN: sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) - @graph def load_hca() -> None: diff --git a/orchestration/hca_orchestration/pipelines/set_snapshot_public.py b/orchestration/hca_orchestration/pipelines/set_snapshot_public.py index a1392bcc..735b9fa4 100644 --- a/orchestration/hca_orchestration/pipelines/set_snapshot_public.py +++ b/orchestration/hca_orchestration/pipelines/set_snapshot_public.py @@ -1,5 +1,7 @@ +import os import warnings +import sentry_sdk from dagster import ( ExperimentalWarning, HookContext, From 3de4055d0ca6e9accee79392d24785441890aaa2 Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 29 Jan 2024 14:40:43 -0500 Subject: [PATCH 22/24] linting --- .github/linters/.pylintrc | 624 ++++++++++++++++++ .../hca_orchestration/pipelines/load_hca.py | 6 +- 2 files changed, 627 insertions(+), 3 deletions(-) create mode 100644 .github/linters/.pylintrc diff --git a/.github/linters/.pylintrc b/.github/linters/.pylintrc new file mode 100644 index 00000000..f629dcb0 --- /dev/null +++ b/.github/linters/.pylintrc @@ -0,0 +1,624 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/orchestration/hca_orchestration/pipelines/load_hca.py b/orchestration/hca_orchestration/pipelines/load_hca.py index 0e0d0198..5ecde397 100644 --- a/orchestration/hca_orchestration/pipelines/load_hca.py +++ b/orchestration/hca_orchestration/pipelines/load_hca.py @@ -8,7 +8,6 @@ import sentry_sdk from dagster import ExperimentalWarning, graph - from hca_orchestration.solids.load_hca.data_files.load_data_files import ( import_data_files, ) @@ -20,11 +19,11 @@ ) from hca_orchestration.solids.load_hca.stage_data import ( clear_scratch_dir, - create_scratch_dataset, + create_scratch_dataset, pre_process_metadata, ) from hca_orchestration.solids.load_hca.utilities import ( - send_start_notification, + send_start_notification, validate_and_send_finish_notification, ) @@ -38,6 +37,7 @@ if SENTRY_DSN: sentry_sdk.init(dsn=SENTRY_DSN, traces_sample_rate=1.0) + @graph def load_hca() -> None: staging_dataset = create_scratch_dataset( From bc271bed0d413ec6746869cf68b686b4c0c533fc Mon Sep 17 00:00:00 2001 From: bahill Date: Mon, 29 Jan 2024 14:41:38 -0500 Subject: [PATCH 23/24] Adding config for pylint - ignoring check for unpacking parameters --- .github/linters/.pylintrc | 3 ++- .github/workflows/super_linter.yaml | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/linters/.pylintrc b/.github/linters/.pylintrc index f629dcb0..9b25dc98 100644 --- a/.github/linters/.pylintrc +++ b/.github/linters/.pylintrc @@ -421,7 +421,8 @@ disable=raw-checker-failed, suppressed-message, useless-suppression, deprecated-pragma, - use-symbolic-message-instead + use-symbolic-message-instead, + E1120 # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/.github/workflows/super_linter.yaml b/.github/workflows/super_linter.yaml index e31f6eb5..6e2cb618 100644 --- a/.github/workflows/super_linter.yaml +++ b/.github/workflows/super_linter.yaml @@ -59,4 +59,6 @@ jobs: VALIDATE_PYTHON_BLACK: false VALIDATE_PYTHON_MYPY: false DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + LINTER_RULES_PATH: ../linters + \ No newline at end of file From ce167b5bc5de011b8d5fb805940e35073c9e120e Mon Sep 17 00:00:00 2001 From: dsp-fieldeng-bot Date: Mon, 29 Jan 2024 19:57:00 +0000 Subject: [PATCH 24/24] Update requirements.txt --- orchestration/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/orchestration/requirements.txt b/orchestration/requirements.txt index c74b79dc..7cea6fa3 100644 --- a/orchestration/requirements.txt +++ b/orchestration/requirements.txt @@ -80,6 +80,7 @@ rfc3339-validator==0.1.4; (python_version >= "2.7" and python_full_version < "3. rpds-py==0.10.6; python_version >= "3.8" rsa==4.9; python_version >= "3.6" and python_version < "4" and (python_version >= "3.9" and python_full_version < "3.0.0" and python_version < "3.10" or python_full_version >= "3.6.0" and python_version >= "3.9" and python_version < "3.10") rx==1.6.3; python_version >= "3.9" and python_version < "3.10" +sentry-sdk==1.39.2 six==1.16.0; python_version >= "3.9" and python_full_version < "3.0.0" and python_version < "3.10" or python_version >= "3.9" and python_version < "3.10" and python_full_version >= "3.6.0" slack-sdk==3.23.0; python_full_version >= "3.6.0" slackclient==2.9.4; python_version >= "3.9" and python_version < "3.10" and python_full_version >= "3.6.0"