From 9c168b76e8b0c518b75a6d4226489f68d7a6987f Mon Sep 17 00:00:00 2001 From: Amogh Desai Date: Sun, 3 Dec 2023 05:56:32 +0530 Subject: [PATCH] New breeze command to clean up previous provider artifacts (#35970) --------- Co-authored-by: Jarek Potiuk --- BREEZE.rst | 16 +++ dev/README_RELEASE_PROVIDER_PACKAGES.md | 6 +- .../commands/release_management_commands.py | 83 ++++++++++++- .../release_management_commands_config.py | 7 ++ .../airflow_breeze/utils/common_options.py | 8 ++ dev/provider_packages/remove_old_releases.py | 107 ---------------- images/breeze/output_release-management.svg | 28 +++-- images/breeze/output_release-management.txt | 2 +- ...anagement_clean-old-provider-artifacts.svg | 114 ++++++++++++++++++ ...anagement_clean-old-provider-artifacts.txt | 1 + ...utput_setup_check-all-params-in-groups.svg | 46 +++---- ...utput_setup_check-all-params-in-groups.txt | 2 +- ...output_setup_regenerate-command-images.svg | 26 ++-- ...output_setup_regenerate-command-images.txt | 2 +- 14 files changed, 287 insertions(+), 161 deletions(-) delete mode 100644 dev/provider_packages/remove_old_releases.py create mode 100644 images/breeze/output_release-management_clean-old-provider-artifacts.svg create mode 100644 images/breeze/output_release-management_clean-old-provider-artifacts.txt diff --git a/BREEZE.rst b/BREEZE.rst index a91f1a9119260..4350c243e43c0 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -2369,6 +2369,22 @@ You can read more details about what happens when you update constraints in the `Manually generating image cache and constraints `_ +Cleaning up of old providers +"""""""""""""""""""""""""""" + +During the provider releases, we need to clean up the older provider versions in the SVN release folder. +Earlier this was done using a script, but now it is being migrated to a breeze command to ease the life of +release managers for providers. This can be achieved using ``breeze release-management clean-old-provider-artifacts`` +command. + + +These are all available flags of ``clean-old-provider-artifacts`` command: + +.. image:: ./images/breeze/images/breeze/output_release-management_clean-old-provider-artifacts.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/images/breeze/images/breeze/output_release-management_clean-old-provider-artifacts.svg + :width: 100% + :alt: Breeze Clean Old Provider Artifacts + SBOM generation tasks ---------------------- diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 711bb1713c2e6..1f116212cd346 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -1039,11 +1039,11 @@ do svn mv "${file}" "${base_file//rc[0-9]/}" done -# Check which old packages will be removed (you need Python 3.8+ and dev/requirements.txt installed) -python ${AIRFLOW_REPO_ROOT}/dev/provider_packages/remove_old_releases.py --directory . +# Check which old packages will be removed using dry run +breeze release-management clean-old-provider-artifacts --directory . --dry-run # Remove those packages -python ${AIRFLOW_REPO_ROOT}/dev/provider_packages/remove_old_releases.py --directory . --execute +breeze release-management clean-old-provider-artifacts --directory . # You need to do go to the asf-dist directory in order to commit both dev and release together cd ${ASF_DIST_PARENT}/asf-dist diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 3822d9da4b6fd..ad77e97b79ef4 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +import glob +import operator import os import re import shlex @@ -23,11 +25,12 @@ import sys import textwrap import time +from collections import defaultdict from copy import deepcopy from datetime import datetime from pathlib import Path from subprocess import DEVNULL -from typing import IO, Any, Generator, NamedTuple +from typing import IO, TYPE_CHECKING, Any, Generator, NamedTuple import click from rich.progress import Progress @@ -74,6 +77,7 @@ option_chicken_egg_providers, option_commit_sha, option_debug_resources, + option_directory, option_dry_run, option_github_repository, option_historical_python_version, @@ -149,6 +153,17 @@ envvar="DEBUG", ) +if TYPE_CHECKING: + from packaging.version import Version + + +class VersionedFile(NamedTuple): + base: str + version: str + suffix: str + type: str + comparable_version: Version + def run_docker_command_with_debug( shell_params: ShellParams, @@ -209,7 +224,6 @@ def run_docker_command_with_debug( GITPYTHON_VERSION = "3.1.40" RICH_VERSION = "13.7.0" - AIRFLOW_BUILD_DOCKERFILE = f""" FROM python:{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}-slim-{ALLOWED_DEBIAN_VERSIONS[0]} RUN apt-get update && apt-get install -y --no-install-recommends git @@ -1214,6 +1228,56 @@ def _add_chicken_egg_providers_to_build_args( python_build_args["DOCKER_CONTEXT_FILES"] = "./docker-context-files" +@release_management.command( + name="clean-old-provider-artifacts", + help="Cleans the old provider artifacts", +) +@option_directory +@option_verbose +@option_dry_run +def clean_old_provider_artifacts( + directory: str, +): + """Cleans up the old airflow providers artifacts in order to maintain + only one provider version in the release SVN folder""" + cleanup_suffixes = [ + ".tar.gz", + ".tar.gz.sha512", + ".tar.gz.asc", + "-py3-none-any.whl", + "-py3-none-any.whl.sha512", + "-py3-none-any.whl.asc", + ] + + for suffix in cleanup_suffixes: + get_console().print(f"[info]Running provider cleanup for suffix: {suffix}[/]") + package_types_dicts: dict[str, list[VersionedFile]] = defaultdict(list) + os.chdir(directory) + + for file in glob.glob(f"*{suffix}"): + versioned_file = split_version_and_suffix(file, suffix) + package_types_dicts[versioned_file.type].append(versioned_file) + + for package_types in package_types_dicts.values(): + package_types.sort(key=operator.attrgetter("comparable_version")) + + for package_types in package_types_dicts.values(): + if len(package_types) == 1: + versioned_file = package_types[0] + get_console().print( + f"[success]Leaving the only version: " + f"{versioned_file.base + versioned_file.version + versioned_file.suffix}[/]" + ) + # Leave only last version from each type + for versioned_file in package_types[:-1]: + get_console().print( + f"""[warning]Removing {versioned_file.base + versioned_file.version + + versioned_file.suffix} as they are older than remaining file""" + ) + command = ["svn", "rm", versioned_file.base + versioned_file.version + versioned_file.suffix] + run_command(command, check=False) + + @release_management.command( name="release-prod-images", help="Release production images to DockerHub (needs DockerHub permissions)." ) @@ -1841,3 +1905,18 @@ def update_constraints( if confirm_modifications(constraints_repo): commit_constraints_and_tag(constraints_repo, airflow_version, commit_message) push_constraints_and_tag(constraints_repo, remote_name, airflow_version) + + +def split_version_and_suffix(file_name: str, suffix: str) -> VersionedFile: + from packaging.version import Version + + no_suffix_file = file_name[: -len(suffix)] + no_version_file, version = no_suffix_file.rsplit("-", 1) + no_version_file = no_version_file.replace("_", "-") + return VersionedFile( + base=no_version_file + "-", + version=version, + suffix=suffix, + type=no_version_file + "-" + suffix, + comparable_version=Version(version), + ) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index fb60e12463e9b..207b02f63e49e 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -36,6 +36,7 @@ "verify-provider-packages", "generate-providers-metadata", "generate-issue-content-providers", + "clean-old-provider-artifacts", ], } @@ -203,6 +204,12 @@ ], } ], + "breeze release-management clean-old-provider-artifacts": [ + { + "name": "Cleans the old provider artifacts", + "options": ["--directory"], + } + ], "breeze release-management generate-providers-metadata": [ {"name": "Generate providers metadata flags", "options": ["--refresh-constraints", "--python"]} ], diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index 4f6705934ad2c..fa8ce7f4456a0 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -563,6 +563,14 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option, is_flag=True, envvar="SKIP_CLEANUP", ) + +option_directory = click.option( + "--directory", + type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True), + required=True, + help="Directory to clean the provider artifacts from.", +) + option_include_mypy_volume = click.option( "--include-mypy-volume", help="Whether to include mounting of the mypy volume (useful for debugging mypy).", diff --git a/dev/provider_packages/remove_old_releases.py b/dev/provider_packages/remove_old_releases.py deleted file mode 100644 index 33ee56a751c05..0000000000000 --- a/dev/provider_packages/remove_old_releases.py +++ /dev/null @@ -1,107 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Removes older releases of provider packages from the folder using svn rm. - -It iterates over the folder specified as first parameter and removes all but latest releases of -packages found in that directory. -""" -from __future__ import annotations - -import argparse -import glob -import operator -import os -import subprocess -from collections import defaultdict -from typing import NamedTuple - -from packaging.version import Version - - -class VersionedFile(NamedTuple): - base: str - version: str - suffix: str - type: str - comparable_version: Version - - -def split_version_and_suffix(file_name: str, suffix: str) -> VersionedFile: - no_suffix_file = file_name[: -len(suffix)] - no_version_file, version = no_suffix_file.rsplit("-", 1) - no_version_file = no_version_file.replace("_", "-") - return VersionedFile( - base=no_version_file + "-", - version=version, - suffix=suffix, - type=no_version_file + "-" + suffix, - comparable_version=Version(version), - ) - - -def process_all_files(directory: str, suffix: str, execute: bool): - package_types_dicts: dict[str, list[VersionedFile]] = defaultdict(list) - os.chdir(directory) - - for file in glob.glob("*" + suffix): - versioned_file = split_version_and_suffix(file, suffix) - package_types_dicts[versioned_file.type].append(versioned_file) - - for package_types in package_types_dicts.values(): - package_types.sort(key=operator.attrgetter("comparable_version")) - - for package_types in package_types_dicts.values(): - if len(package_types) == 1: - versioned_file = package_types[0] - print( - "Leaving the only version: " - f"{versioned_file.base + versioned_file.version + versioned_file.suffix}" - ) - # Leave only last version from each type - for versioned_file in package_types[:-1]: - command = ["svn", "rm", versioned_file.base + versioned_file.version + versioned_file.suffix] - if not execute: - print(command) - else: - subprocess.run(command, check=True) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Removes old releases.") - parser.add_argument( - "--directory", - dest="directory", - action="store", - required=True, - help="Directory to remove old releases in", - ) - parser.add_argument( - "--execute", dest="execute", action="store_true", help="Execute the removal rather than dry run" - ) - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - process_all_files(args.directory, ".tar.gz", args.execute) - process_all_files(args.directory, ".tar.gz.sha512", args.execute) - process_all_files(args.directory, ".tar.gz.asc", args.execute) - process_all_files(args.directory, "-py3-none-any.whl", args.execute) - process_all_files(args.directory, "-py3-none-any.whl.sha512", args.execute) - process_all_files(args.directory, "-py3-none-any.whl.asc", args.execute) diff --git a/images/breeze/output_release-management.svg b/images/breeze/output_release-management.svg index ccfef4e9ee36d..13ff72831ff6d 100644 --- a/images/breeze/output_release-management.svg +++ b/images/breeze/output_release-management.svg @@ -1,4 +1,4 @@ - +