From cf052dc64f00e851427a41a34ffe576fd39be51b Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 1 Dec 2023 01:09:48 +0100 Subject: [PATCH] Add feature to build "chicken-egg" packages from sources (#35890) When we build on ci a pre-release version of provider, and we want to include packages that have >= CURRENTLY_RELEASED_VERSION we have to make sure that those packages are built from sources during building of PROD image. Otherwise they will not be installable on CURRENT_VERSION.dev0, CURRENT_VERSION.rc* etc. Until we "Actually" release a provider we should have a way to build such provider from sources. This is the CI version of it, once we have it working we can also apply it to the workflow that releases images to dockerhub. --- .github/actions/build-prod-images/action.yml | 12 ++++ .github/workflows/build-images.yml | 2 + .github/workflows/ci.yml | 15 +++++ airflow/providers/amazon/provider.yaml | 2 +- airflow/providers/google/provider.yaml | 2 +- .../providers/microsoft/azure/provider.yaml | 2 +- .../commands/release_management_commands.py | 25 +++++--- .../release_management_commands_config.py | 1 + .../src/airflow_breeze/params/shell_params.py | 2 + .../src/airflow_breeze/utils/packages.py | 4 -- .../airflow_breeze/utils/selective_checks.py | 5 ++ .../apache-airflow-providers-google/index.rst | 2 +- .../index.rst | 2 +- generated/provider_dependencies.json | 4 +- ...elease-management_generate-constraints.svg | 58 ++++++++++++------- ...elease-management_generate-constraints.txt | 2 +- scripts/in_container/_in_container_utils.sh | 23 ++++++-- setup.cfg | 2 +- setup.py | 2 +- 19 files changed, 120 insertions(+), 47 deletions(-) diff --git a/.github/actions/build-prod-images/action.yml b/.github/actions/build-prod-images/action.yml index 5fdbb795c4124..f038234087c4e 100644 --- a/.github/actions/build-prod-images/action.yml +++ b/.github/actions/build-prod-images/action.yml @@ -22,6 +22,9 @@ inputs: build-provider-packages: description: 'Whether to build provider packages from sources' required: true + chicken-egg-providers: + description: 'List of chicken-egg provider packages to build from sources' + required: true runs: using: "composite" steps: @@ -41,6 +44,15 @@ runs: --package-list-file ./airflow/providers/installed_providers.txt --package-format wheel --version-suffix-for-pypi dev0 if: ${{ inputs.build-provider-packages == 'true' }} + - name: "Prepare chicken-eggs provider packages" + # In case of provider packages which use latest dev0 version of providers, we should prepare them + # from the source code, not from the PyPI because they have apache-airflow>=X.Y.Z dependency + # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 + shell: bash + run: > + breeze release-management prepare-provider-packages + --package-format wheel --version-suffix-for-pypi dev0 ${{ inputs.chicken-egg-providers }} + if: ${{ inputs.build-provider-packages != 'true' && inputs.chicken-egg-providers != '' }} - name: "Prepare airflow package" shell: bash run: > diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index b29d49de17813..82cc6e598785d 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -74,6 +74,7 @@ jobs: is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }} is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} + chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha || github.event.pull_request.head.sha || github.sha @@ -293,6 +294,7 @@ jobs: uses: ./.github/actions/build-prod-images with: build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} env: UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74093010c0287..3d7e9146f5f55 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -121,6 +121,7 @@ jobs: is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }} is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} + chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} has-migrations: ${{ steps.selective-checks.outputs.has-migrations }} source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} pull-request-labels: ${{ steps.source-run-info.outputs.pr-labels }} @@ -474,6 +475,7 @@ jobs: RUNS_ON: "${{ needs.build-info.outputs.runs-on }}" PYTHON_VERSIONS: ${{needs.build-info.outputs.all-python-versions-list-as-string}} DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} + VERSION_SUFFIX_FOR_PYPI: "dev0" if: needs.build-info.outputs.ci-image-build == 'true' steps: - name: Cleanup repo @@ -502,12 +504,23 @@ jobs: run: > breeze release-management generate-constraints --run-in-parallel --airflow-constraints-mode constraints-no-providers + - name: "Prepare chicken-eggs provider packages" + # In case of provider packages which use latest dev0 version of providers, we should prepare them + # from the source code, not from the PyPI because they have apache-airflow>=X.Y.Z dependency + # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 + shell: bash + run: > + breeze release-management prepare-provider-packages + --package-format wheel --version-suffix-for-pypi dev0 + ${{ needs.build-info.outputs.chicken-egg-providers }} + if: needs.build-info.outputs.chicken-egg-providers != '' - name: "PyPI constraints" shell: bash timeout-minutes: 25 run: > breeze release-management generate-constraints --run-in-parallel --airflow-constraints-mode constraints + --chicken-egg-providers "${{ needs.build-info.outputs.chicken-egg-providers }}" - name: "Dependency upgrade summary" shell: bash run: | @@ -1608,6 +1621,7 @@ jobs: if: needs.build-info.outputs.in-workflow-build == 'true' with: build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} env: UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} @@ -1647,6 +1661,7 @@ jobs: uses: ./.github/actions/build-prod-images with: build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} + chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} env: UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index d6869683d076d..798043abe8ad6 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -753,7 +753,7 @@ additional-extras: - apache-airflow-providers-cncf-kubernetes>=7.2.0 - name: s3fs dependencies: - - s3fs>=2023.9.2 + - s3fs>=2023.10.0 - name: python3-saml dependencies: - python3-saml>=1.16.0 diff --git a/airflow/providers/google/provider.yaml b/airflow/providers/google/provider.yaml index 86a03ff9003c5..7e6f69349c9a6 100644 --- a/airflow/providers/google/provider.yaml +++ b/airflow/providers/google/provider.yaml @@ -87,7 +87,7 @@ dependencies: - gcloud-aio-auth>=4.0.0,<5.0.0 - gcloud-aio-bigquery>=6.1.2 - gcloud-aio-storage - - gcsfs>=2023.9.2 + - gcsfs>=2023.10.0 - google-ads>=22.1.0 - google-api-core>=2.11.0 - google-api-python-client>=1.6.0 diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index 4f2308803dda4..186719a9af00e 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -70,7 +70,7 @@ versions: dependencies: - apache-airflow>=2.5.0 - - adlfs>=2023.9.2 + - adlfs>=2023.10.0 - azure-batch>=8.0.0 - azure-cosmos>=4.0.0 - azure-mgmt-cosmosdb diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index bbcf525e77fab..5aa1f65bf114d 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -684,21 +684,30 @@ def run_generate_constraints_in_parallel( @option_image_tag_for_running @option_debug_release_management @option_airflow_constraints_mode_ci +@click.option( + "--chicken-egg-providers", + default="", + help="List of chicken-egg provider packages - " + "those that have airflow_version >= current_version and should " + "be installed in CI from locally built packages with >= current_version.dev0 ", + envvar="CHICKEN_EGG_PROVIDERS", +) @option_github_repository @option_verbose @option_dry_run @option_answer def generate_constraints( + airflow_constraints_mode: str, + debug: bool, + debug_resources: bool, + github_repository: str, + image_tag: str | None, + parallelism: int, python: str, + python_versions: str, run_in_parallel: bool, - parallelism: int, skip_cleanup: bool, - debug_resources: bool, - python_versions: str, - image_tag: str | None, - debug: bool, - airflow_constraints_mode: str, - github_repository: str, + chicken_egg_providers: str, ): perform_environment_checks() check_remote_ghcr_io_commands() @@ -742,6 +751,7 @@ def generate_constraints( python=python, github_repository=github_repository, airflow_constraints_mode=airflow_constraints_mode, + chicken_egg_providers=chicken_egg_providers, ) for python in python_version_list ] @@ -762,6 +772,7 @@ def generate_constraints( skip_image_upgrade_check=True, quiet=True, airflow_constraints_mode=airflow_constraints_mode, + chicken_egg_providers=chicken_egg_providers, ) return_code, info = run_generate_constraints( shell_params=shell_params, diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index e27ccc3dd6199..ff0c9cdcd0426 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -134,6 +134,7 @@ "--image-tag", "--python", "--airflow-constraints-mode", + "--chicken-egg-providers", "--debug", "--github-repository", ], diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index 32f6a111ce24d..abfe2c4ddd350 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -118,6 +118,7 @@ class ShellParams: builder: str = "autodetect" celery_broker: str = DEFAULT_CELERY_BROKER celery_flower: bool = False + chicken_egg_providers: str = "" collect_only: bool = False database_isolation: bool = False db_reset: bool = False @@ -435,6 +436,7 @@ def env_variables_for_docker_commands(self) -> _Environ: _set_var(_env, "BREEZE", "true") _set_var(_env, "BREEZE_INIT_COMMAND", None, "") _set_var(_env, "CELERY_FLOWER", self.celery_flower) + _set_var(_env, "CHICKEN_EGG_PROVIDERS", self.chicken_egg_providers) _set_var(_env, "CI", None, "false") _set_var(_env, "CI_BUILD_ID", None, "0") _set_var(_env, "CI_EVENT_TYPE", None, "pull_request") diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 3d59643dbca3e..5518fdebd70ad 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -56,10 +56,6 @@ # TODO: use single source of truth for those # for now we need to keep them in sync with the ones in setup.py PREINSTALLED_PROVIDERS = [ - # Until we cut off the 2.8.0 branch and bump current airflow version to 2.9.0, we should - # Keep common.io commented out in order ot be able to generate PyPI constraints because - # The version from PyPI has requirement of apache-airflow>=2.8.0 - # "common.io", "common.sql", "ftp", "http", diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 0a751565ef9ae..eba3023a6c143 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -1019,3 +1019,8 @@ def mssql_parallelism(self) -> int: @cached_property def has_migrations(self) -> bool: return any([file.startswith("airflow/migrations/") for file in self._files]) + + @cached_property + def chicken_egg_providers(self) -> str: + """Space separated list of providers with chicken-egg problem and should be built from sources.""" + return "common.io" diff --git a/docs/apache-airflow-providers-google/index.rst b/docs/apache-airflow-providers-google/index.rst index d642d19ee25ce..b9defcd150d5f 100644 --- a/docs/apache-airflow-providers-google/index.rst +++ b/docs/apache-airflow-providers-google/index.rst @@ -116,7 +116,7 @@ PIP package Version required ``gcloud-aio-auth`` ``>=4.0.0,<5.0.0`` ``gcloud-aio-bigquery`` ``>=6.1.2`` ``gcloud-aio-storage`` -``gcsfs`` ``>=2023.9.2`` +``gcsfs`` ``>=2023.10.0`` ``google-ads`` ``>=22.1.0`` ``google-api-core`` ``>=2.11.0`` ``google-api-python-client`` ``>=1.6.0`` diff --git a/docs/apache-airflow-providers-microsoft-azure/index.rst b/docs/apache-airflow-providers-microsoft-azure/index.rst index 95a23f39573a6..cc1467e64fc46 100644 --- a/docs/apache-airflow-providers-microsoft-azure/index.rst +++ b/docs/apache-airflow-providers-microsoft-azure/index.rst @@ -106,7 +106,7 @@ The minimum Apache Airflow version supported by this provider package is ``2.5.0 PIP package Version required ================================ ================== ``apache-airflow`` ``>=2.5.0`` -``adlfs`` ``>=2023.9.2`` +``adlfs`` ``>=2023.10.0`` ``azure-batch`` ``>=8.0.0`` ``azure-cosmos`` ``>=4.0.0`` ``azure-mgmt-cosmosdb`` diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 3b9d543169af6..e6405cfe77bdc 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -415,7 +415,7 @@ "gcloud-aio-auth>=4.0.0,<5.0.0", "gcloud-aio-bigquery>=6.1.2", "gcloud-aio-storage", - "gcsfs>=2023.9.2", + "gcsfs>=2023.10.0", "google-ads>=22.1.0", "google-api-core>=2.11.0", "google-api-python-client>=1.6.0", @@ -557,7 +557,7 @@ "microsoft.azure": { "deps": [ "adal>=1.2.7", - "adlfs>=2023.9.2", + "adlfs>=2023.10.0", "apache-airflow>=2.5.0", "azure-batch>=8.0.0", "azure-cosmos>=4.0.0", diff --git a/images/breeze/output_release-management_generate-constraints.svg b/images/breeze/output_release-management_generate-constraints.svg index 9801441959831..0fd9c471c2201 100644 --- a/images/breeze/output_release-management_generate-constraints.svg +++ b/images/breeze/output_release-management_generate-constraints.svg @@ -1,4 +1,4 @@ - +