From 2a1f17d0521fd82736c76dfe05d0695505ffffec Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 22 Jan 2025 23:43:57 +0100 Subject: [PATCH] Add script to move providers to the new directory structure (#45945) --- .github/boring-cyborg.yml | 4 +- .../provider_documentation.py | 22 +- .../prepare_providers/provider_packages.py | 2 + .../templates/pyproject_TEMPLATE.toml.jinja2 | 17 +- .../src/airflow_breeze/utils/packages.py | 6 + dev/moving_providers/README.md | 87 +++ dev/moving_providers/move_providers.py | 516 ++++++++++++++++++ dev/requirements.txt | 2 +- hatch_build.py | 2 +- providers/airbyte/pyproject.toml | 9 +- providers/apache/iceberg/pyproject.toml | 10 +- providers/celery/pyproject.toml | 10 +- providers/edge/pyproject.toml | 10 +- scripts/in_container/run_fix_ownership.py | 4 +- 14 files changed, 652 insertions(+), 49 deletions(-) create mode 100644 dev/moving_providers/README.md create mode 100755 dev/moving_providers/move_providers.py diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 22cee73155b2b..2c97219a4e5bd 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -27,8 +27,8 @@ labelPRBasedOnFilePath: - providers/tests/alibaba/**/* - providers/tests/system/alibaba/**/* - provider:amazon-aws: - - providers/src/airflow/providers/amazon/aws/**/* + provider:amazon: + - providers/src/airflow/providers/amazon/**/* - providers/tests/amazon/aws/**/* - docs/apache-airflow-providers-amazon/**/* - providers/tests/system/amazon/aws/**/* diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index 1fc80a46e8e98..6b705148c94cb 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -1183,18 +1183,22 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid if in_required_dependencies and line == "]": in_required_dependencies = False continue - if line == "[project.optional-dependencies]": - in_optional_dependencies = True - continue - if in_optional_dependencies and line == "": - in_optional_dependencies = False - continue if line == "[dependency-groups]": in_dependency_groups = True continue if in_dependency_groups and line == "": in_dependency_groups = False continue + if in_dependency_groups and line.startswith("["): + in_dependency_groups = False + if line == "[project.optional-dependencies]": + in_optional_dependencies = True + continue + if in_optional_dependencies and line == "": + in_optional_dependencies = False + continue + if in_optional_dependencies and line.startswith("["): + in_optional_dependencies = False if in_required_dependencies: required_dependencies.append(line) if in_optional_dependencies: @@ -1209,7 +1213,9 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid # Add cross-provider dependencies to the optional dependencies if they are missing for module in PROVIDER_DEPENDENCIES.get(provider_details.provider_id)["cross-providers-deps"]: - if f'"{module}" = [' not in optional_dependencies: + if f'"{module}" = [' not in optional_dependencies and get_pip_package_name(module) not in "\n".join( + required_dependencies + ): optional_dependencies.append(f'"{module}" = [') optional_dependencies.append(f' "{get_pip_package_name(module)}"') optional_dependencies.append("]") @@ -1221,6 +1227,8 @@ def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: Provid context=context, extension=".toml", autoescape=False, + lstrip_blocks=True, + trim_blocks=True, keep_trailing_newline=True, ) get_pyproject_toml_path.write_text(get_pyproject_toml_content) diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py index 17f00033d21b6..57ca5942f67e5 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_packages.py @@ -137,6 +137,8 @@ def _prepare_pyproject_toml_file(context: dict[str, Any], target_path: Path): context=context, extension=".toml", autoescape=False, + lstrip_blocks=True, + trim_blocks=True, keep_trailing_newline=True, ) (target_path / "pyproject.toml").write_text(manifest_content) diff --git a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 index 525fa17a2bb35..330b0c9f19e9e 100644 --- a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 @@ -69,25 +69,26 @@ classifiers = [ "Topic :: System :: Monitoring", ] requires-python = "~=3.9" + # The dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated dependencies = [ {{ INSTALL_REQUIREMENTS }} ] +{% if EXTRAS_REQUIREMENTS %} -{%- if EXTRAS_REQUIREMENTS %} # The optional dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated [project.optional-dependencies] {{ EXTRAS_REQUIREMENTS }} -{%- endif %} +{% endif %} +{% if DEPENDENCY_GROUPS %} -{%- if DEPENDENCY_GROUPS %} # The dependency groups should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated [dependency-groups] {{ DEPENDENCY_GROUPS }} -{%- endif %} +{% endif %} [project.urls] "Documentation" = "https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}" @@ -100,13 +101,13 @@ dependencies = [ [project.entry-points."apache_airflow_provider"] provider_info = "airflow.providers.{{ PROVIDER_ID }}.get_provider_info:get_provider_info" +{% if PLUGINS %} -{%- if PLUGINS %} [project.entry-points."airflow.plugins"] -{%- for plugin in PLUGINS %} +{% for plugin in PLUGINS %} {{ plugin.name }} = "{{ plugin.package_name }}:{{ plugin.class_name }}" -{%- endfor %} -{%- endif %} +{% endfor %} +{% endif %} [tool.flit.module] name = "airflow.providers.{{ PROVIDER_ID }}" diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 46f367618b570..fec5ed898d697 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -834,6 +834,8 @@ def render_template( context: dict[str, Any], extension: str, autoescape: bool = True, + lstrip_blocks: bool = False, + trim_blocks: bool = False, keep_trailing_newline: bool = False, ) -> str: """ @@ -842,6 +844,8 @@ def render_template( :param context: Jinja2 context :param extension: Target file extension :param autoescape: Whether to autoescape HTML + :param lstrip_blocks: Whether to strip leading blocks + :param trim_blocks: Whether to trim blocks :param keep_trailing_newline: Whether to keep the newline in rendered output :return: rendered template """ @@ -852,6 +856,8 @@ def render_template( loader=template_loader, undefined=jinja2.StrictUndefined, autoescape=autoescape, + lstrip_blocks=lstrip_blocks, + trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline, ) template = template_env.get_template(f"{template_name}_TEMPLATE{extension}.jinja2") diff --git a/dev/moving_providers/README.md b/dev/moving_providers/README.md new file mode 100644 index 0000000000000..abd885b69f38a --- /dev/null +++ b/dev/moving_providers/README.md @@ -0,0 +1,87 @@ + + + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Moving providers to new structure](#moving-providers-to-new-structure) + - [How to use the script](#how-to-use-the-script) + - [Options](#options) + + + +# Moving providers to new structure + +We are moving providers to a new structure, where each provider has a separate sub-project in +"providers" sub-folder. + +This means that we need to migrate some 90+ providers to the new structure. This is a big task and while we +could do it in one huge PR, it would be disruptive and likely take some time to review and fix some individual +edge-cases - even if we have automated most of the work. + +This directory contains a script that contributors can use to move a provider (or a few providers to the +new structure as a starting point for their PR. Most of the work is automated, but there will be likely +some manual adjustments needed in more complex cases. + +## How to use the script + +The script follows https://peps.python.org/pep-0723/ and uses inlined dependencies - so it can be run as-is +by modern tools without creating dedicated virtualenv - the virtualenv with dependencies is +created on-the-fly by PEP 723 compatible tools. For example one can use uv to run it: + +```shell +uv run dev/moving_providers/move_providers.py alibaba +``` + +## Options + + +> [!NOTE] +> You can see all the options by running the script with `--help` option: +> +> ```shell +> uv run dev/moving_providers/move_providers.py --help +> ``` + +By default the script runs in `--dry-run` mode, which means it will not make any changes to the file system, +but will print what it would do. To actually move the files, you need to pass `--no-dry-run` option and you +will be asked to commit the code and create a PR: + +```shell +uv run dev/moving_providers/move_providers.py alibaba --no-dry-run +``` + +You can specify multiple providers to move in one go: + +```shell +uv run dev/moving_providers/move_providers.py alibaba amazon microsoft.azure +``` + +You can specify `--verbose` option to see more details about what the script is doing: + +```shell +uv run dev/moving_providers/move_providers.py alibaba --verbose +``` + +You can also specify `--quiet` option to see less output: + +```shell +uv run dev/moving_providers/move_providers.py alibaba --quiet +``` diff --git a/dev/moving_providers/move_providers.py b/dev/moving_providers/move_providers.py new file mode 100755 index 0000000000000..2a4316e2a549f --- /dev/null +++ b/dev/moving_providers/move_providers.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "click>=8.1.8", +# "rich>=13.6.0", +# "rich-click>=1.7.1", +# "pyyaml>=6.0.1", +# ] +# /// +from __future__ import annotations + +import difflib +import shutil +import subprocess +import sys +from functools import cache +from pathlib import Path + +import rich_click as click +from rich.console import Console +from rich.syntax import Syntax + +ROOT_PROJECT_DIR_PATH = Path(__file__).parent.parent.parent +PROVIDERS_DIR_PATH = ROOT_PROJECT_DIR_PATH / "providers" +OLD_PROVIDERS_SRC_DIR_PATH = PROVIDERS_DIR_PATH / "src" +OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH = OLD_PROVIDERS_SRC_DIR_PATH / "airflow" / "providers" +OLD_PROVIDERS_TEST_DIR_PATH = ROOT_PROJECT_DIR_PATH / "providers" / "tests" +OLD_PROVIDERS_SYSTEM_TEST_DIR_PATH = OLD_PROVIDERS_TEST_DIR_PATH / "system" +DOCS_DIR_PATH = ROOT_PROJECT_DIR_PATH / "docs" + + +@cache +def _get_all_old_providers() -> list[str]: + return sorted( + [ + ".".join( + provider_yaml_path.parent.relative_to(OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH).parts + ) + for provider_yaml_path in OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH.rglob("provider.yaml") + ] + ) + + +def _get_provider_distribution_name(provider_id: str) -> str: + return f"apache-airflow-providers-{provider_id.replace('.', '-')}" + + +def _get_provider_only_path(provider_id: str) -> str: + return provider_id.replace(".", "/") + + +CONTENT_OVERRIDE = ["This content will be overridden by pre-commit hook"] + +console = Console(color_system="standard") + +is_verbose = False +is_quiet = False +is_dry_run = False + + +def _do_stuff( + *, + syntax: str, + from_path: Path | None = None, + to_path: Path | None = None, + from_content: list[str] | None = None, + updated_content: list[str] | None = None, + delete_from: bool = False, +): + if not to_path: + # in place update + to_path = from_path + updated_str = "" + if updated_content: + updated_str = "\n".join(updated_content) + "\n" + if is_verbose: + console.print(Syntax(updated_str, syntax, theme="ansi_dark")) + console.rule() + if not is_quiet: + if updated_content and from_content and from_path and to_path: + diff = difflib.unified_diff( + from_content, updated_content, fromfile=from_path.as_posix(), tofile=to_path.as_posix() + ) + console.print(Syntax("\n".join(diff), "diff", theme="ansi_dark")) + console.print() + elif updated_content and not from_content and to_path: + console.print(Syntax(updated_str, syntax, theme="ansi_dark")) + elif updated_content and to_path: + console.print(f"\n[yellow]Creating {to_path}\n") + elif not from_content and not updated_content and from_path and to_path and delete_from: + console.print(f"\n[yellow]Moving[/] {from_path} -> {to_path}\n") + elif not from_content and not updated_content and from_path and to_path and not delete_from: + console.print(f"\n[yellow]Copying[/] {from_path} -> {to_path}\n") + elif delete_from and from_path: + console.print(f"\n[yellow]Deleting {from_path}\n") + if not is_dry_run: + if updated_content and to_path: + to_path.parent.mkdir(parents=True, exist_ok=True) + to_path.write_text(updated_str) + console.print(f"\n[yellow]Written {to_path}\n") + elif not from_content and not updated_content and from_path and to_path: + if delete_from: + to_path.parent.mkdir(parents=True, exist_ok=True) + if from_path.is_dir() and to_path.exists(): + shutil.rmtree(to_path) + shutil.move(from_path, to_path) + console.print(f"\n[yellow]Moved {from_path} -> {to_path}\n") + return + else: + to_path.parent.mkdir(parents=True, exist_ok=True) + if from_path.is_dir(): + shutil.rmtree(to_path) + shutil.copytree(from_path, to_path) + else: + to_path.write_text(from_path.read_text()) + console.print(f"\n[yellow]Copied {from_path} -> {to_path}\n") + return + if delete_from and from_path: + from_path.unlink() + console.print(f"\n[yellow]Deleted {from_path}\n") + + +@click.command() +@click.argument("provider_ids", type=click.Choice(_get_all_old_providers()), required=True, nargs=-1) +@click.option( + "--dry-run/--no-dry-run", + default=True, + help="Whether to run the command without making changes.", + show_default=True, + is_flag=True, +) +@click.option( + "--verbose", + help="Whether to show complete content of generated files. (mutually exclusive with --quiet).", + is_flag=True, +) +@click.option( + "--skip-build-file-generation", + help="When set, the step to generate build files is skipped.", + is_flag=True, +) +@click.option( + "--quiet", + help="Whether to be quite - only show providers updated (mutually exclusive with --verbose).", + is_flag=True, +) +def move_providers( + provider_ids: tuple[str, ...], dry_run: bool, skip_build_file_generation: bool, verbose: bool, quiet: bool +): + if quiet and verbose: + console.print("\n[red]Cannot use --quiet and --verbose at the same time\n") + sys.exit(1) + if dry_run: + console.print("\n[yellow]Running in dry-run mode, no changes will be made\n") + global is_quiet, is_verbose, is_dry_run + is_quiet = quiet + is_verbose = verbose + is_dry_run = dry_run + + console.print("\n[blue]Moving providers:[/]\n") + console.print("* " + "\n *".join(provider_ids)) + console.print() + + for provider_id in provider_ids: + console.rule(f"\n[magenta]Moving provider: {provider_id}[/]\n", align="left") + move_provider(provider_id) + console.rule() + console.print() + + count_providers = len(_get_all_old_providers()) + if not dry_run: + subprocess.run("git add .", shell=True, check=True) + if not skip_build_file_generation: + subprocess.run("pre-commit run update-providers-build-files", shell=True, check=False) + subprocess.run("breeze ci-image build --python 3.9 --answer yes", shell=True, check=True) + subprocess.run("git add . ", shell=True, check=False) + subprocess.run("git diff HEAD", shell=True, check=False) + console.print("\n[bright_green]First part of migration is complete[/].\n") + console.print("[yellow]Next steps:") + console.print("* run `pre-commit run`") + console.print("* fix all remaining errors, ") + console.print("* create branch, commit the changes and create a PR!\n") + console.print( + f"\nAfter the PR is merged there will be {count_providers - len(provider_ids)} providers " + f"left in the old location.\n" + ) + else: + console.print("\n[yellow]Dry-run mode, no changes were made.\n") + console.print(f"\nThere are currently {count_providers} providers left in the old structure.\n") + + +def fix_boring_cyborg(provider_id: str): + boring_cyborg_file_path = ROOT_PROJECT_DIR_PATH / ".github" / "boring-cyborg.yml" + console.print(f"\n[bright_blue]Updating {boring_cyborg_file_path}\n") + original_content = boring_cyborg_file_path.read_text().splitlines() + updated_content = [] + in_provider = False + for line in original_content: + if not in_provider: + updated_content.append(line) + if line.strip() == f"provider:{provider_id.replace('.', '-')}:": + in_provider = True + updated_content.append(f" - providers/{provider_id.replace('.', '/')}/**") + updated_content.append("") + if in_provider and line.strip() == "": + in_provider = False + _do_stuff( + syntax="yaml", + from_path=boring_cyborg_file_path, + from_content=original_content, + updated_content=updated_content, + ) + + +def add_docs_to_gitignore(provider_id: str): + gitignore_path = DOCS_DIR_PATH / ".gitignore" + console.print(f"\n[bright_blue]Updating {gitignore_path}\n") + original_content = gitignore_path.read_text().splitlines() + provider_line = f"apache-airflow-providers-{provider_id.replace('.', '-')}" + if provider_line in original_content: + console.print(f"\n[yellow]Provider {provider_id} already in .gitignore\n") + return + updated_content = [] + updated = False + for line in original_content: + if not line.startswith("#") and line > provider_line and not updated: + updated_content.append(provider_line) + updated = True + updated_content.append(line) + if not updated: + updated_content.append(provider_line) + _do_stuff( + syntax="gitignore", + from_path=gitignore_path, + from_content=original_content, + updated_content=updated_content, + ) + + +def _replace_string(path: Path, old: str, new: str): + content = path.read_text() + new_content = content.replace(old, new) + if content != new_content: + console.print(f"\n[bright_blue]Replacing {old} with {new} in {path}\n") + if not is_dry_run: + path.write_text(new_content) + + +def remove_changelog(provider_id: str): + changelog_path = DOCS_DIR_PATH / _get_provider_distribution_name(provider_id) / "changelog.rst" + console.print(f"\n[bright_blue]Deleting {changelog_path}\n") + _do_stuff(syntax="gitignore", from_path=changelog_path, delete_from=True) + + +def create_readme(provider_id: str): + readme_path = PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "README.rst" + console.print(f"\n[bright_blue]Creating {readme_path}\n") + _do_stuff(syntax="rst", to_path=readme_path, updated_content=CONTENT_OVERRIDE) + + +def move_docs(provider_id: str): + source_doc_dir = DOCS_DIR_PATH / _get_provider_distribution_name(provider_id) + dest_doc_dir = PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "docs" + console.print(f"\n[bright_blue]Moving docs to {dest_doc_dir}\n") + _do_stuff(syntax="rst", from_path=source_doc_dir, to_path=dest_doc_dir, delete_from=True) + provider_package_source_dir = OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH / _get_provider_only_path( + provider_id + ) + _do_stuff( + syntax="rst", + from_path=provider_package_source_dir / "CHANGELOG.rst", + to_path=dest_doc_dir / "changelog.rst", + delete_from=True, + ) + _do_stuff( + syntax="txt", + from_path=provider_package_source_dir / ".latest-doc-only-change.txt", + to_path=dest_doc_dir / ".latest-doc-only-change.txt", + delete_from=True, + ) + + +def move_provider_yaml(provider_id: str) -> tuple[list[str], list[str], list[str]]: + source_provider_yaml_path = ( + OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH + / _get_provider_only_path(provider_id) + / "provider.yaml" + ) + target_provider_yaml_path = PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "provider.yaml" + console.print(f"\n[bright_blue]Moving {source_provider_yaml_path} to {target_provider_yaml_path}\n") + original_content = source_provider_yaml_path.read_text().splitlines() + in_dependencies = False + in_optional_dependencies = False + in_devel_dependencies = False + updated_content = [] + + dependencies = [] + optional_dependencies = [] + devel_dependencies = [] + for line in original_content: + if line == "dependencies:" and not in_dependencies: + in_dependencies = True + continue + if in_dependencies: + if not line: + continue + if line.startswith(" -"): + dependencies.append(f' "{line[len(" - ") :]}",') + elif line.strip().startswith("#"): + dependencies.append(f" {line.strip()}") + else: + in_dependencies = False + if line == "devel-dependencies:" and not in_devel_dependencies: + in_devel_dependencies = True + continue + if in_devel_dependencies: + if not line: + continue + if line.startswith(" - "): + devel_dependencies.append(f' "{line[len(" - ") :]}",') + elif line.strip().startswith("#"): + devel_dependencies.append(f" {line.strip()}") + else: + in_devel_dependencies = False + if line == "additional-extras:" and not in_optional_dependencies: + in_optional_dependencies = True + continue + if in_optional_dependencies: + if not line: + continue + if line.startswith(" "): + optional_dependencies.append(line) + else: + in_optional_dependencies = False + if not in_dependencies and not in_optional_dependencies and not in_devel_dependencies: + updated_content.append(line) + + _do_stuff( + syntax="yml", + from_path=source_provider_yaml_path, + to_path=target_provider_yaml_path, + from_content=original_content, + updated_content=updated_content, + delete_from=True, + ) + if optional_dependencies: + in_dependency = False + optional_dependencies_processed = [] + for line in optional_dependencies: + if line.startswith(" - name: "): + name = line[len(" - name: ") :] + if in_dependency: + optional_dependencies_processed.append("]") + optional_dependencies_processed.append(f'"{name}" = [') + in_dependency = True + elif line.startswith(" -"): + dependency = line[len(" - ") :] + optional_dependencies_processed.append(f' "{dependency}",') + elif line.startswith(" #"): + optional_dependencies_processed.append(f" {line.strip()}") + elif line.startswith(" #"): + if in_dependency: + optional_dependencies_processed.append("]") + in_dependency = False + optional_dependencies_processed.append(f"{line.strip()}") + optional_dependencies_processed.append("]") + else: + optional_dependencies_processed = [] + return ( + dependencies, + devel_dependencies, + optional_dependencies_processed, + ) + + +def create_pyproject_toml( + provider_id: str, + dependencies: list[str], + devel_dependencies: list[str], + optional_dependencies: list[str], +): + dependencies_str = "\n".join(dependencies) + devel_dependencies_str = "\n".join(devel_dependencies) + optional_dependencies_str = "\n".join(optional_dependencies) + start_pyproject_toml = f""" +# Content of this file will be replaced by pre-commit hook +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-SOME_PROVIDER" +version = "VERSION" +description = "Provider package PROVIDER for Apache Airflow" +readme = "README.rst" + +dependencies = [ +{dependencies_str} +] +""" + optional_dependencies_toml = f""" +[project.optional-dependencies] +{optional_dependencies_str} +""" + devel_dependencies_toml = f""" +[dependency-groups] +dev = [ +{devel_dependencies_str} +] + +[project.urls] +""" + pyproject_toml_path = PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "pyproject.toml" + console.print( + f"\n[bright_blue]Creating basic pyproject.toml for {provider_id} in {pyproject_toml_path}\n" + ) + + pyproject_toml_content = start_pyproject_toml + if optional_dependencies: + pyproject_toml_content += optional_dependencies_toml + if devel_dependencies: + pyproject_toml_content += devel_dependencies_toml + + _do_stuff(syntax="toml", to_path=pyproject_toml_path, updated_content=pyproject_toml_content.splitlines()) + + +def move_sources(provider_id: str): + source_provider_dir = OLD_PROVIDERS_AIRFLOW_PROVIDERS_SRC_PACKAGE_PATH / _get_provider_only_path( + provider_id + ) + dest_provider_dir = ( + PROVIDERS_DIR_PATH + / _get_provider_only_path(provider_id) + / "src" + / "airflow" + / "providers" + / _get_provider_only_path(provider_id) + ) + console.print(f"\n[bright_blue]Moving sources from {source_provider_dir} to {dest_provider_dir}\n") + _do_stuff(syntax="bash", from_path=source_provider_dir, to_path=dest_provider_dir, delete_from=True) + + +def move_tests(provider_id: str): + source_test_dir = OLD_PROVIDERS_TEST_DIR_PATH / _get_provider_only_path(provider_id) + dest_test_dir = ( + PROVIDERS_DIR_PATH + / _get_provider_only_path(provider_id) + / "tests" + / "providers" + / _get_provider_only_path(provider_id) + ) + console.print(f"\n[bright_blue]Moving tests from {source_test_dir} to {dest_test_dir}\n") + _do_stuff(syntax="bash", from_path=source_test_dir, to_path=dest_test_dir, delete_from=True) + + +def move_system_tests(provider_id: str): + source_system_test_dir = OLD_PROVIDERS_SYSTEM_TEST_DIR_PATH / _get_provider_only_path(provider_id) + dest_system_test_dir = ( + PROVIDERS_DIR_PATH + / _get_provider_only_path(provider_id) + / "tests" + / "system" + / _get_provider_only_path(provider_id) + ) + console.print( + f"\n[bright_blue]Moving system tests from {source_system_test_dir} to {dest_system_test_dir}\n" + ) + _do_stuff(syntax="bash", from_path=source_system_test_dir, to_path=dest_system_test_dir, delete_from=True) + + +def replace_system_test_example_includes(provider_id: str): + target_doc_providers_dir = PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "docs" + console.print(f"\n[bright_blue]Replacing system test example includes in {target_doc_providers_dir}\n") + for rst_file in target_doc_providers_dir.rglob("*.rst"): + provider_only_path = _get_provider_only_path(provider_id) + _replace_string( + rst_file, + f"../providers/tests/system/{provider_only_path}/", + f"../providers/{provider_only_path}/tests/system/{provider_only_path}/", + ) + + +def move_provider(provider_id: str): + fix_boring_cyborg(provider_id) + add_docs_to_gitignore(provider_id) + remove_changelog(provider_id) + create_readme(provider_id) + move_docs(provider_id) + dependencies, devel_dependencies, optional_dependencies = move_provider_yaml(provider_id) + create_pyproject_toml(provider_id, dependencies, devel_dependencies, optional_dependencies) + move_sources(provider_id) + move_tests(provider_id) + move_system_tests(provider_id) + replace_system_test_example_includes(provider_id) + + +if __name__ == "__main__": + move_providers() diff --git a/dev/requirements.txt b/dev/requirements.txt index a631eb34cc675..ac47dd8b7451c 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,4 +1,4 @@ -click>=8.0 +click>=8.1.8 jinja2>=2.11.3 keyring==25.6.0 PyGithub diff --git a/hatch_build.py b/hatch_build.py index baa6958c336ab..cb0309c942981 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -189,7 +189,7 @@ "pdbr>=0.8.9", ], "devel-devscripts": [ - "click>=8.0", + "click>=8.1.8", "gitpython>=3.1.40", "incremental>=24.7.2", "pipdeptree>=2.13.1", diff --git a/providers/airbyte/pyproject.toml b/providers/airbyte/pyproject.toml index 8a49050f021ec..37022d69b5ced 100644 --- a/providers/airbyte/pyproject.toml +++ b/providers/airbyte/pyproject.toml @@ -1,4 +1,3 @@ - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -44,14 +43,10 @@ classifiers = [ "Intended Audience :: System Administrators", "Framework :: Apache Airflow", "Framework :: Apache Airflow :: Provider", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: System :: Monitoring", + "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: System :: Monitoring", ] requires-python = "~=3.9" + # The dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated dependencies = [ diff --git a/providers/apache/iceberg/pyproject.toml b/providers/apache/iceberg/pyproject.toml index cfc1cf1023d7c..8059b1b554ded 100644 --- a/providers/apache/iceberg/pyproject.toml +++ b/providers/apache/iceberg/pyproject.toml @@ -1,4 +1,3 @@ - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -44,19 +43,16 @@ classifiers = [ "Intended Audience :: System Administrators", "Framework :: Apache Airflow", "Framework :: Apache Airflow :: Provider", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: System :: Monitoring", + "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: System :: Monitoring", ] requires-python = "~=3.9" + # The dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated dependencies = [ "apache-airflow>=2.9.0", ] + # The dependency groups should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated [dependency-groups] diff --git a/providers/celery/pyproject.toml b/providers/celery/pyproject.toml index 6d510f852e1c0..3ec3c28f27719 100644 --- a/providers/celery/pyproject.toml +++ b/providers/celery/pyproject.toml @@ -1,4 +1,3 @@ - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -44,14 +43,10 @@ classifiers = [ "Intended Audience :: System Administrators", "Framework :: Apache Airflow", "Framework :: Apache Airflow :: Provider", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: System :: Monitoring", + "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: System :: Monitoring", ] requires-python = "~=3.9" + # The dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated dependencies = [ @@ -65,6 +60,7 @@ dependencies = [ "flower>=1.0.0", "google-re2>=1.0", ] + # The optional dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated [project.optional-dependencies] diff --git a/providers/edge/pyproject.toml b/providers/edge/pyproject.toml index dfe14b7927267..055e38ec525a3 100644 --- a/providers/edge/pyproject.toml +++ b/providers/edge/pyproject.toml @@ -1,4 +1,3 @@ - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -44,14 +43,10 @@ classifiers = [ "Intended Audience :: System Administrators", "Framework :: Apache Airflow", "Framework :: Apache Airflow :: Provider", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: System :: Monitoring", + "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: System :: Monitoring", ] requires-python = "~=3.9" + # The dependencies should be modified in place in the generated file # Any change in the dependencies is preserved when the file is regenerated dependencies = [ @@ -71,6 +66,7 @@ dependencies = [ [project.entry-points."apache_airflow_provider"] provider_info = "airflow.providers.edge.get_provider_info:get_provider_info" + [project.entry-points."airflow.plugins"] edge_executor = "airflow.providers.edge.plugins.edge_executor_plugin:EdgeExecutorPlugin" diff --git a/scripts/in_container/run_fix_ownership.py b/scripts/in_container/run_fix_ownership.py index 8062a72980d79..bb25fc17bdc44 100755 --- a/scripts/in_container/run_fix_ownership.py +++ b/scripts/in_container/run_fix_ownership.py @@ -77,8 +77,8 @@ def change_ownership_of_files(path: Path) -> None: # another place if os.environ.get("VERBOSE_COMMANDS", "false") == "true": print(f"Could not change ownership of {file}") - if count_files: - print(f"Changed ownership of {count_files} files back to {host_user_id}:{host_group_id}.") + if count_files and os.environ.get("VERBOSE_COMMANDS", "false") == "true": + print(f"Changed ownership of {count_files} files back to {host_user_id}:{host_group_id}.") if __name__ == "__main__":