diff --git a/ocw/lib/EC2.py b/ocw/lib/EC2.py index 7b5dcea7..a5fcffcb 100644 --- a/ocw/lib/EC2.py +++ b/ocw/lib/EC2.py @@ -1,4 +1,4 @@ -from .provider import Provider, Image +from .provider import Provider from webui.settings import PCWConfig, ConfigFile from dateutil.parser import parse import boto3 @@ -89,32 +89,21 @@ def all_clusters(self): return clusters @staticmethod - def needs_to_delete_snapshot(snapshot, cleanup_ec2_max_snapshot_age_days) -> bool: - delete_older_than = date.today() - timedelta(days=cleanup_ec2_max_snapshot_age_days) - if datetime.date(snapshot['StartTime']) < delete_older_than: - regexes = [ - re.compile(r'''^OpenQA upload image$'''), - re.compile(r'''^Created by CreateImage\([\w-]+\) for ami-\w+ from vol-\w+$''') - ] - for regex in regexes: - m = re.match(regex, snapshot['Description'].strip()) - if m: - return True - return False + def needs_to_delete_by_age(creation_time, cleanup_ec2_max_age_days) -> bool: + return datetime.date(creation_time) < (date.today() - timedelta(days=cleanup_ec2_max_age_days)) def cleanup_snapshots(self, cleanup_ec2_max_snapshot_age_days): for region in self.all_regions: response = self.ec2_client(region).describe_snapshots(OwnerIds=['self']) - response['Snapshots'].sort(key=lambda snapshot: snapshot['StartTime'].timestamp()) for snapshot in response['Snapshots']: - if EC2.needs_to_delete_snapshot(snapshot, cleanup_ec2_max_snapshot_age_days): - self.log_info("Deleting snapshot {} in region {} with StartTime={}", snapshot['SnapshotId'], - region, snapshot['StartTime']) + if EC2.needs_to_delete_by_age(snapshot['StartTime'], cleanup_ec2_max_snapshot_age_days): try: if self.dry_run: self.log_info("Snapshot deletion of {} skipped due to dry run mode", snapshot['SnapshotId']) else: + self.log_info("Deleting snapshot {} in region {} with StartTime={}", + snapshot['SnapshotId'], region, snapshot['StartTime']) self.ec2_client(region).delete_snapshot(SnapshotId=snapshot['SnapshotId']) except ClientError as ex: if ex.response['Error']['Code'] == 'InvalidSnapshot.InUse': @@ -122,12 +111,11 @@ def cleanup_snapshots(self, cleanup_ec2_max_snapshot_age_days): else: raise ex - def cleanup_volumes(self, cleanup_ec2_max_volumes_age_days): - delete_older_than = date.today() - timedelta(days=cleanup_ec2_max_volumes_age_days) + def cleanup_volumes(self, cleanup_ec2_max_age_days): for region in self.all_regions: response = self.ec2_client(region).describe_volumes() for volume in response['Volumes']: - if datetime.date(volume['CreateTime']) < delete_older_than: + if EC2.needs_to_delete_by_age(volume['CreateTime'], cleanup_ec2_max_age_days): if self.volume_protected(volume): self.log_info('Volume {} has tag DO_NOT_DELETE so protected from deletion', volume['VolumeId']) @@ -209,66 +197,14 @@ def delete_all_clusters(self): self.log_info("Finally deleting {} cluster", cluster) self.eks_client(region).delete_cluster(name=cluster) - def parse_image_name(self, img_name): - regexes = [ - # openqa-SLES12-SP5-EC2.x86_64-0.9.1-BYOS-Build1.55.raw.xz - re.compile(r'''^openqa-SLES - (?P\d+(-SP\d+)?) - -(?PEC2) - \. - (?P[^-]+) - - - (?P\d+\.\d+\.\d+) - - - (?P(BYOS|On-Demand)) - -Build - (?P\d+\.\d+) - \.raw\.xz - ''', re.RegexFlag.X), - # openqa-SLES15-SP2.x86_64-0.9.3-EC2-HVM-Build1.10.raw.xz' - # openqa-SLES15-SP2-BYOS.x86_64-0.9.3-EC2-HVM-Build1.10.raw.xz' - # openqa-SLES15-SP2.aarch64-0.9.3-EC2-HVM-Build1.49.raw.xz' - # openqa-SLES15-SP4-SAP-BYOS.x86_64-0.9.3-EC2-Build150400.1.31.raw.xz - re.compile(r'''^openqa-SLES - (?P\d+(-SP\d+)?) - (-(?P[^\.]+))? - \. - (?P[^-]+) - - - (?P\d+\.\d+\.\d+) - - - (?PEC2[-\w]*) - -Build(\d+\.)? - (?P\d+\.\d+) - \.raw\.xz - ''', re.RegexFlag.X), - # openqa-SLES12-SP4-EC2-HVM-BYOS.x86_64-0.9.2-Build2.56.raw.xz' - re.compile(r'''^openqa-SLES - (?P\d+(-SP\d+)?) - - - (?PEC2[^\.]+) - \. - (?P[^-]+) - - - (?P\d+\.\d+\.\d+) - - - Build - (?P\d+\.\d+) - \.raw\.xz - ''', re.RegexFlag.X) - ] - return self.parse_image_name_helper(img_name, regexes) def cleanup_all(self): - cleanup_ec2_max_snapshot_age_days = PCWConfig.get_feature_property('cleanup', 'ec2-max-snapshot-age-days', - self._namespace) - cleanup_ec2_max_volumes_age_days = PCWConfig.get_feature_property('cleanup', 'ec2-max-volumes-age-days', - self._namespace) - self.cleanup_images() - if cleanup_ec2_max_snapshot_age_days >= 0: - self.cleanup_snapshots(cleanup_ec2_max_snapshot_age_days) - if cleanup_ec2_max_volumes_age_days >= 0: - self.cleanup_volumes(cleanup_ec2_max_volumes_age_days) + cleanup_ec2_max_age_days = PCWConfig.get_feature_property('cleanup', 'ec2-max-age-days', self._namespace) + + if cleanup_ec2_max_age_days >= 0: + self.cleanup_images(cleanup_ec2_max_age_days) + self.cleanup_volumes(cleanup_ec2_max_age_days) + self.cleanup_snapshots(cleanup_ec2_max_age_days) if PCWConfig.getBoolean('cleanup/vpc_cleanup', self._namespace): self.cleanup_uploader_vpcs() @@ -389,25 +325,13 @@ def cleanup_uploader_vpcs(self): region) send_mail('VPC deletion locked by running VMs', body) - def cleanup_images(self): + def cleanup_images(self, cleanup_ec2_max_age_days): for region in self.all_regions: response = self.ec2_client(region).describe_images(Owners=['self']) - images = list() for img in response['Images']: - # img is in the format described here: - # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_images - m = self.parse_image_name(img['Name']) - if m: - self.log_dbg("Image {} is candidate for deletion with build {}", img['Name'], m['build']) - images.append( - Image(img['Name'], flavor=m['key'], build=m['build'], date=parse(img['CreationDate']), - img_id=img['ImageId'])) - else: - self.log_err(" Unable to parse image name '{}'", img['Name']) - keep_images = self.get_keeping_image_names(images) - for img in [i for i in images if i.name not in keep_images]: - self.log_dbg("Delete image '{}' (ami:{})".format(img.name, img.id)) - if self.dry_run: - self.log_info("Image deletion {} skipped due to dry run mode", img.id) - else: - self.ec2_client(region).deregister_image(ImageId=img.id, DryRun=False) + if EC2.needs_to_delete_by_age(parse(img['CreationDate']), cleanup_ec2_max_age_days): + if self.dry_run: + self.log_info("Image deletion {} skipped due to dry run mode", img['ImageId']) + else: + self.log_dbg("Delete image '{}' (ami:{})".format(img['Name'], img['ImageId'])) + self.ec2_client(region).deregister_image(ImageId=img['ImageId'], DryRun=False) diff --git a/ocw/lib/azure.py b/ocw/lib/azure.py index 65cabecf..9bb1aa8b 100644 --- a/ocw/lib/azure.py +++ b/ocw/lib/azure.py @@ -1,4 +1,4 @@ -from .provider import Provider, Image +from .provider import Provider from webui.settings import PCWConfig from azure.identity import ClientSecretCredential from azure.mgmt.resource import ResourceManagementClient @@ -100,126 +100,44 @@ def list_disks_by_resource_group(self, resource_group): def list_by_resource_group(self, resource_group, filters=None): return [item for item in self.resource_mgmt_client().resources.list_by_resource_group( - resource_group, filter=filters)] - - def get_keeping_image_names(self): - images = list() - for item in self.container_client('sle-images').list_blobs(): - m = self.parse_image_name(item.name) - if m: - images.append(Image(item.name, flavor=m['key'], build=m['build'], date=item.last_modified)) - else: - self.log_err("Unable to parse image name '{}'", item.name) - - return super().get_keeping_image_names(images) + resource_group, filter=filters, expand = "changedTime")] def cleanup_all(self): - ''' Cleanup all autodateed data which might created during automated tests.''' - self.cleanup_bootdiagnostics() - - keep_images = self.get_keeping_image_names() - self.cleanup_sle_images_container(keep_images) - self.cleanup_disks_from_rg(keep_images) - self.cleanup_images_from_rg(keep_images) - for i in keep_images: - self.log_info("Keep image {} ", i) - - def cleanup_bootdiagnostics(self): - containers = self.bs_client().list_containers() + self.cleanup_blob_containers(); + self.cleanup_images_from_rg(); + self.cleanup_disks_from_rg(); + + def cleanup_blob_containers(self): + containers = self.bs_client().list_containers(include_metadata=True) for c in containers: - self.log_dbg('Found container {}', c.name) - if (re.match('^bootdiagnostics-', c.name)): - self.cleanup_bootdiagnostics_container(c) - - def cleanup_bootdiagnostics_container(self, container): - latest_modification = container.last_modified - container_blobs = self.container_client(container.name).list_blobs() - for blob in container_blobs: - if (latest_modification > blob.last_modified): - latest_modification = blob.last_modified - if (self.older_than_min_age(latest_modification)): - self.log_info("Mark container for deletion {}", container.name) - if self.dry_run: - self.log_info("Deletion of boot diagnostic container {} skipped due to dry run mode", container.name) - else: - self.bs_client().delete_container(container.name) - - def parse_image_name(self, img_name): - regexes = [ - # SLES12-SP5-Azure.x86_64-0.9.1-SAP-BYOS-Build3.3.vhd - re.compile(r""" - SLES - (?P\d+(-SP\d+)?) - -Azure\. - (?P[^-]+) - - - (?P\d+\.\d+\.\d+) - - - (?P[-\w]+) - - - Build(?P\d+\.\d+) - \.vhd - """, - re.X), - - # SLES15-SP2-BYOS.x86_64-0.9.3-Azure-Build1.10.vhd - # SLES15-SP2.x86_64-0.9.3-Azure-Basic-Build1.11.vhd - # SLES15-SP2-SAP-BYOS.x86_64-0.9.2-Azure-Build1.9.vhd - # SLES15-SP4-BYOS.x86_64-0.9.1-Azure-Build150400.2.103.vhd - re.compile(r""" - SLES - (?P\d+(-SP\d+)?) - (-(?P[^\.]+))?\. - (?P[^-]+) - - - (?P\d+\.\d+\.\d+) - (-(?PAzure[-\w]*))? - - - Build(\d+\.)?(?P\d+\.\d+) - \.vhd - """, - re.X) - ] - return self.parse_image_name_helper(img_name, regexes) - - def cleanup_sle_images_container(self, keep_images): - container_client = self.container_client('sle-images') - for img in container_client.list_blobs(): - m = self.parse_image_name(img.name) - if m: - self.log_dbg('Blob {} is candidate for deletion with build {} ', img.name, m['build']) - - if img.name not in keep_images: - self.log_info("Delete blob '{}'", img.name) - if self.dry_run: - self.log_info("Deletion of blob image {} skipped due to dry run mode", img.name) - else: - container_client.delete_blob(img.name, delete_snapshots="include") + if 'pcw_ignore' not in c['metadata']: + self.log_dbg('Found container {}', c.name) + container_blobs = self.container_client(c.name).list_blobs() + for blob in container_blobs: + if (self.older_than(blob.last_modified)): + if self.dry_run: + self.log_info("Deletion of blob {} skipped due to dry run mode", blob.name) + else: + self.log_info("Deleting blob {}", blob.name) + self.container_client(c.name).delete_blob(blob.name, delete_snapshots="include") - def cleanup_images_from_rg(self, keep_images): + def cleanup_images_from_rg(self): for item in self.list_images_by_resource_group(self.__resource_group): - m = self.parse_image_name(item.name) - if m: - self.log_dbg('Image {} is candidate for deletion with build {} ', item.name, m['build']) - if item.name not in keep_images: + if (self.older_than(item.changed_time)): + if self.dry_run: + self.log_info("Deletion of image {} skipped due to dry run mode", item.name) + else: self.log_info("Delete image '{}'", item.name) - if self.dry_run: - self.log_info("Deletion of image {} skipped due to dry run mode", item.name) - else: - self.compute_mgmt_client().images.begin_delete(self.__resource_group, item.name) + self.compute_mgmt_client().images.begin_delete(self.__resource_group, item.name) - def cleanup_disks_from_rg(self, keep_images): + def cleanup_disks_from_rg(self): for item in self.list_disks_by_resource_group(self.__resource_group): - m = self.parse_image_name(item.name) - if m: - self.log_dbg('Disk {} is candidate for deletion with build {} ', item.name, m['build']) - - if item.name not in keep_images: - if self.compute_mgmt_client().disks.get(self.__resource_group, item.name).managed_by: - self.log_warn("Disk is in use - unable delete {}", item.name) + if (self.older_than(item.changed_time)): + if self.compute_mgmt_client().disks.get(self.__resource_group, item.name).managed_by: + self.log_warn("Disk is in use - unable delete {}", item.name) + else: + if self.dry_run: + self.log_info("Deletion of disk {} skipped due to dry run mode", item.name) else: self.log_info("Delete disk '{}'", item.name) - if self.dry_run: - self.log_info("Deletion of image {} skipped due to dry run mode", item.name) - else: - self.compute_mgmt_client().disks.begin_delete(self.__resource_group, item.name) + self.compute_mgmt_client().disks.begin_delete(self.__resource_group, item.name) diff --git a/ocw/lib/gce.py b/ocw/lib/gce.py index 70c9e00d..dabb9c73 100644 --- a/ocw/lib/gce.py +++ b/ocw/lib/gce.py @@ -1,4 +1,4 @@ -from .provider import Provider, Image +from .provider import Provider import googleapiclient.discovery from google.oauth2 import service_account from dateutil.parser import parse @@ -81,6 +81,7 @@ def delete_instance(self, instance_id, zone): "Deletion of instance {} skipped due to dry run mode", instance_id ) else: + self.log_info("Delete instance {}".format(instance_id)) self.compute_client().instances().delete( project=self.__project, zone=zone, instance=instance_id ).execute() @@ -89,63 +90,6 @@ def delete_instance(self, instance_id, zone): def url_to_name(url): return url[url.rindex("/")+1:] - def parse_image_name(self, img_name): - regexes = [ - # sles12-sp5-gce-x8664-0-9-1-byos-build1-56 - re.compile( - r"""^sles - (?P\d+(-sp\d+)?) - - - (?Pgce) - - - (?P[^-]+) - - - (?P\d+-\d+-\d+) - - - (?P(byos|on-demand)) - -build - (?P\d+-\d+) - """, - re.RegexFlag.X, - ), - # sles15-sp2-byos-x8664-0-9-3-gce-build1-10 - # sles15-sp2-x8664-0-9-3-gce-build1-10 - re.compile( - r"""^sles - (?P\d+(-sp\d+)?) - (-(?P[-\w]+))? - - - (?P[^-]+) - - - (?P\d+-\d+-\d+) - - - (?Pgce) - - - build - (?P\d+-\d+) - """, - re.RegexFlag.X, - ), - # sles15-sp1-gce-byos-x8664-1-0-5-build1-101 - re.compile( - r"""^sles - (?P\d+(-sp\d+)?) - (-(?Pgce))? - - - (?P[-\w]+) - - - (?P[^-]+) - - - (?P\d+-\d+-\d+) - - - build - (?P\d+-\d+) - """, - re.RegexFlag.X, - ), - ] - return self.parse_image_name_helper(img_name, regexes) - def cleanup_all(self): images = list() request = self.compute_client().images().list(project=self.__project) @@ -154,50 +98,26 @@ def cleanup_all(self): if "items" not in response: break for image in response["items"]: - # creation:2019-11-04T14:23:06.372-08:00 - # name:sles12-sp5-gce-x8664-0-9-1-byos-build1-56 - m = self.parse_image_name(image["name"]) - if m: - images.append( - Image( - image["name"], - flavor=m["key"], - build=m["build"], - date=parse(image["creationTimestamp"]), + if self.older_than(parse(image["creationTimestamp"])): + if self.dry_run: + self.log_info( "Deletion of image {} skipped due to dry run mode", image["name"]) + else: + self.log_info("Delete image '{}'", image["name"]) + request = ( + self.compute_client() + .images() + .delete(project=self.__project, image=image["name"]) ) - ) - self.log_dbg( - "Image {} is candidate for deletion with build {}", - image["name"], - m["build"], - ) - else: - self.log_err("Unable to parse image name '{}'", image["name"]) + response = request.execute() + if "error" in response: + for e in response["error"]["errors"]: + self.log_err(e["message"]) + if "warnings" in response: + for w in response["warnings"]: + self.log_warn(w["message"]) request = ( self.compute_client() .images() .list_next(previous_request=request, previous_response=response) ) - - keep_images = self.get_keeping_image_names(images) - - for img in [i for i in images if i.name not in keep_images]: - self.log_info("Delete image '{}'", img.name) - if self.dry_run: - self.log_info( - "Deletion of image {} skipped due to dry run mode", img.name - ) - else: - request = ( - self.compute_client() - .images() - .delete(project=self.__project, image=img.name) - ) - response = request.execute() - if "error" in response: - for e in response["error"]["errors"]: - self.log_err(e["message"]) - if "warnings" in response: - for w in response["warnings"]: - self.log_warn(w["message"]) diff --git a/ocw/lib/provider.py b/ocw/lib/provider.py index 911490d8..922926bf 100644 --- a/ocw/lib/provider.py +++ b/ocw/lib/provider.py @@ -31,48 +31,16 @@ def getData(self, name=None): return self.auth_json return self.auth_json[name] - def older_than_min_age(self, age): + def older_than(self, age): + """ + older_than - calculates if certain resource bypass maximum allowed TTL + maximum allowed TTL is controled by cleanup/max-age-hours pcw.ini config param + :param age: usually creation time of resource or any other timestamp which may be used to identify + age of the resource + :return: True if resource is already too old , flase otherwise + """ return datetime.now(timezone.utc) > age + timedelta( - hours=PCWConfig.get_feature_property('cleanup', 'min-image-age-hours', self._namespace)) - - def needs_to_delete_image(self, order_number, image_date): - if self.older_than_min_age(image_date): - max_images_per_flavor = PCWConfig.get_feature_property('cleanup', 'max-images-per-flavor', - self._namespace) - max_image_age = image_date + timedelta( - hours=PCWConfig.get_feature_property('cleanup', 'max-image-age-hours', self._namespace)) - return order_number >= max_images_per_flavor or max_image_age < datetime.now(timezone.utc) - else: - return False - - def parse_image_name_helper(self, img_name, regex_s, group_key=['version', 'flavor', 'type', 'arch'], - group_build=['kiwi', 'build']): - for regex in regex_s: - m = re.match(regex, img_name) - if m: - gdict = m.groupdict() - return { - 'key': '-'.join([gdict[k] for k in group_key if k in gdict and gdict[k] is not None]), - 'build': "-".join([gdict[k] for k in group_build if k in gdict and gdict[k] is not None]), - } - return None - - def get_keeping_image_names(self, images): - images_by_flavor = dict() - for img in images: - if (img.flavor not in images_by_flavor): - images_by_flavor[img.flavor] = list() - images_by_flavor[img.flavor].append(img) - - keep_images = list() - for img_list in [images_by_flavor[x] for x in sorted(images_by_flavor)]: - img_list.sort(key=lambda x: LooseVersion(x.build), reverse=True) - for i in range(0, len(img_list)): - img = img_list[i] - if (not self.needs_to_delete_image(i, img.date)): - keep_images.append(img.name) - - return keep_images + hours=PCWConfig.get_feature_property('cleanup', 'max-age-hours', self._namespace)) def log_info(self, message: str, *args: object): if args: @@ -93,16 +61,3 @@ def log_dbg(self, message: str, *args: object): if args: message = message.format(*args) self.logger.debug("[{}] {}".format(self._namespace, message)) - - -class Image: - - def __init__(self, name, flavor, build, date, img_id=None): - self.name = name - self.flavor = flavor - self.build = build - self.date = date - self.id = img_id if img_id else name - - def __str__(self): - return "[{} {} {} {}]".format(self.name, self.flavor, self.build, self.date) diff --git a/templates/pcw.ini b/templates/pcw.ini index 4b569983..f448c8be 100644 --- a/templates/pcw.ini +++ b/templates/pcw.ini @@ -22,18 +22,16 @@ from = pcw@publiccloud.qa.suse.de to = [cleanup] -# Specify how many images per flavor get kept -max-images-per-flavor = 2 -# Max age of an image file -max-images-age-hours = 24 +# Max age of an image file ( used in EC2 only ) +ec2-max-age-days = 1 # Specify with which namespace, we will do the cleanup. # if not specifed default/namespaces list will be taken instead namespaces = qac # Values specified under "cleanup.namespace.{namespace}" have precedence over same values in [cleanup] for this certain namespace [cleanup.namespace.qac] -# EC2 snapshots younger than this amount of days will be ignored -ec2-max-snapshot-age-days = 2 +# Max age of an image file ( used in EC2 only ) +ec2-max-age-days = 2 # EC2 volumes younger than this amount of days will be ignored ec2-max-volumes-age-days = 2 azure-storage-resourcegroup = openqa-upload diff --git a/webui/settings.py b/webui/settings.py index e8b21c5a..85824841 100644 --- a/webui/settings.py +++ b/webui/settings.py @@ -210,13 +210,10 @@ class PCWConfig(): @staticmethod def get_feature_property(feature: str, property: str, namespace: str = None): default_values = { - 'cleanup/max-images-per-flavor': {'default': 1, 'return_type': int}, - 'cleanup/max-image-age-hours': {'default': 24 * 31, 'return_type': int}, - 'cleanup/min-image-age-hours': {'default': 24, 'return_type': int}, + 'cleanup/max-age-hours': {'default': 24 * 31, 'return_type': int}, 'cleanup/azure-storage-resourcegroup': {'default': 'openqa-upload', 'return_type': str}, 'cleanup/azure-storage-account-name': {'default': 'openqa', 'return_type': str}, - 'cleanup/ec2-max-snapshot-age-days': {'default': -1, 'return_type': int}, - 'cleanup/ec2-max-volumes-age-days': {'default': -1, 'return_type': int}, + 'cleanup/ec2-max-age-days': {'default': -1, 'return_type': int}, 'updaterun/default_ttl': {'default': 44400, 'return_type': int}, 'notify/to': {'default': None, 'return_type': str}, 'notify/age-hours': {'default': 12, 'return_type': int},