From e112a32807a06f6630273bba721c9d252399d21d Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Wed, 5 Feb 2025 14:29:24 -0500 Subject: [PATCH 1/3] Clean stale backups (#4686) * Initial commit to clean backups files * Adds the backups bucket to sandbox * Change timedelta to 1 * CRLF -> LF & add days flag * Update to script * Management Command delete_stale_backups * Finalize management command * add default_acl * Reset the backups elif in settings.py * Remove from settings once again * linting * cleanup * Version bump backup utility * Add Workflow * Set default acl * linting * Conditional arguement on days * get rid of scheduler * Clean logic * Linting * add doc for usage --- .github/workflows/deploy-application.yml | 4 +- .github/workflows/fac-backup-scheduler.yml | 2 +- .../workflows/fac-backup-util-scheduled.yml | 4 +- .github/workflows/fac-backup-util.yml | 4 +- .../workflows/fac-check-tables-scheduler.yml | 2 +- .github/workflows/fac-check-tables.yml | 2 +- backend/config/settings.py | 17 ++--- .../commands/delete_stale_backups.py | 72 +++++++++++++++++++ docs/backups_and_restores.md | 10 +-- docs/deleting-backups.md | 53 ++++++++++++++ terraform/sandbox/sandbox.tf | 11 +++ terraform/shared/modules/app/app.tf | 4 ++ terraform/shared/modules/app/variables.tf | 5 ++ terraform/shared/modules/sandbox/app.tf | 1 + terraform/shared/modules/sandbox/variables.tf | 5 ++ 15 files changed, 174 insertions(+), 22 deletions(-) create mode 100644 backend/support/management/commands/delete_stale_backups.py create mode 100644 docs/deleting-backups.md diff --git a/.github/workflows/deploy-application.yml b/.github/workflows/deploy-application.yml index 9d5cfabb59..d4c7fb2d6f 100644 --- a/.github/workflows/deploy-application.yml +++ b/.github/workflows/deploy-application.yml @@ -77,7 +77,7 @@ jobs: cf_password: ${{ secrets.CF_PASSWORD }} cf_org: gsa-tts-oros-fac cf_space: ${{ env.space }} - command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.10 deploy_backup" --wait + command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.11 deploy_backup" --wait - name: Deploy Preview to cloud.gov if: ${{ inputs.environment == 'preview' }} @@ -124,5 +124,5 @@ jobs: secrets: inherit with: environment: ${{ inputs.environment }} - util_version: "v0.1.10" + util_version: "v0.1.11" backup_operation: "check_tables" diff --git a/.github/workflows/fac-backup-scheduler.yml b/.github/workflows/fac-backup-scheduler.yml index 90924a8fc0..937f5b05a9 100644 --- a/.github/workflows/fac-backup-scheduler.yml +++ b/.github/workflows/fac-backup-scheduler.yml @@ -19,6 +19,6 @@ jobs: secrets: inherit with: environment: ${{ matrix.environment.name }} - util_version: "v0.1.10" + util_version: "v0.1.11" backup_operation: "scheduled_backup" diff --git a/.github/workflows/fac-backup-util-scheduled.yml b/.github/workflows/fac-backup-util-scheduled.yml index c966f2a02c..e5a66d223c 100644 --- a/.github/workflows/fac-backup-util-scheduled.yml +++ b/.github/workflows/fac-backup-util-scheduled.yml @@ -1,8 +1,8 @@ --- name: Backup the database with fac-backup-utility ### Common Commands: -# ./fac-backup-util.sh v0.1.10 scheduled_backup -# ./fac-backup-util.sh v0.1.10 daily_backup +# ./fac-backup-util.sh v0.1.11 scheduled_backup +# ./fac-backup-util.sh v0.1.11 daily_backup on: workflow_call: inputs: diff --git a/.github/workflows/fac-backup-util.yml b/.github/workflows/fac-backup-util.yml index bebcd3f4d5..7f3c529549 100644 --- a/.github/workflows/fac-backup-util.yml +++ b/.github/workflows/fac-backup-util.yml @@ -1,8 +1,8 @@ --- name: Backup the database with fac-backup-utility ### Common Commands: -# ./fac-backup-util.sh v0.1.10 initial_backup -# ./fac-backup-util.sh v0.1.10 deploy_backup +# ./fac-backup-util.sh v0.1.11 initial_backup +# ./fac-backup-util.sh v0.1.11 deploy_backup on: workflow_dispatch: inputs: diff --git a/.github/workflows/fac-check-tables-scheduler.yml b/.github/workflows/fac-check-tables-scheduler.yml index cd67fbe2a8..b7ef481116 100644 --- a/.github/workflows/fac-check-tables-scheduler.yml +++ b/.github/workflows/fac-check-tables-scheduler.yml @@ -20,5 +20,5 @@ jobs: secrets: inherit with: environment: ${{ matrix.environment.name }} - util_version: "v0.1.10" + util_version: "v0.1.11" backup_operation: "check_tables" diff --git a/.github/workflows/fac-check-tables.yml b/.github/workflows/fac-check-tables.yml index 5feab3a588..c0d267d71d 100644 --- a/.github/workflows/fac-check-tables.yml +++ b/.github/workflows/fac-check-tables.yml @@ -1,7 +1,7 @@ --- name: Check existing tables in an environment ### Common Commands: -# ./fac-backup-util.sh v0.1.10 check_tables +# ./fac-backup-util.sh v0.1.11 check_tables on: workflow_dispatch: inputs: diff --git a/backend/config/settings.py b/backend/config/settings.py index 7c8884c1ae..4acf5da29c 100644 --- a/backend/config/settings.py +++ b/backend/config/settings.py @@ -359,15 +359,16 @@ ) elif service["instance_name"] == "backups": + # Backups AWS S3 bucket for the app's backup files s3_creds = service["credentials"] - # Used for backing up the database https://django-dbbackup.readthedocs.io/en/master/storage.html#id2 - DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" - DBBACKUP_STORAGE_OPTIONS = { - "access_key": s3_creds["access_key_id"], - "secret_key": s3_creds["secret_access_key"], - "bucket_name": s3_creds["bucket"], - "default_acl": "private", # type: ignore - } + + AWS_BACKUPS_ACCESS_KEY_ID = s3_creds["access_key_id"] + AWS_BACKUPS_SECRET_ACCESS_KEY = s3_creds["secret_access_key"] + AWS_BACKUPS_STORAGE_BUCKET_NAME = s3_creds["bucket"] + AWS_S3_BACKUPS_REGION_NAME = s3_creds["region"] + AWS_S3_BACKUPS_ENDPOINT = s3_creds["endpoint"] + AWS_S3_BACKUPS_ENDPOINT_URL = f"https://{AWS_S3_BACKUPS_ENDPOINT}" + AWS_PRIVATE_DEFAULT_ACL = "private" # secure headers MIDDLEWARE.append("csp.middleware.CSPMiddleware") diff --git a/backend/support/management/commands/delete_stale_backups.py b/backend/support/management/commands/delete_stale_backups.py new file mode 100644 index 0000000000..b142376239 --- /dev/null +++ b/backend/support/management/commands/delete_stale_backups.py @@ -0,0 +1,72 @@ +# Usage: +# Do a delete: python manage.py delete_stale_backups --days X --delete true +# List objects: python manage.py delete_stale_backups --days X + +import boto3 +from datetime import datetime, timezone, timedelta +from django.conf import settings +from django.core.management.base import BaseCommand +import sys + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--days", + type=int, + required=True, + help="Max age a key(file) in days can have before we want to delete it. Value must be (14) or greater.", + ) + parser.add_argument( + "--delete", + required=False, + default=False, + help="True/False. Actually do a delete. If not specified, just list the keys found that match.", + ) + + def handle(self, *args, **options): + days = options["days"] + delete = options["delete"] + + if days < 14: + print( + "Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting..." + ) + sys.exit(1) + + s3_client = boto3.client( + "s3", + aws_access_key_id=settings.AWS_BACKUPS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_BACKUPS_SECRET_ACCESS_KEY, + endpoint_url=settings.AWS_S3_BACKUPS_ENDPOINT_URL, + ) + + objects = s3_client.list_objects( + Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, Prefix="backups/" + ) + delete_older_than = datetime.now(timezone.utc) - timedelta(days=days) + # Check if the bucket contains any objects + if "Contents" in objects: + for obj in objects["Contents"]: + # Get the last modified date of the object + last_modified = obj["LastModified"] + + # If the object is older than one week, delete it + # s3_client.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"backups/{item.file.name}") + if delete: + if last_modified < delete_older_than: + print(f"Deleting {obj['Key']} last modified on {last_modified}") + s3_client.delete_object( + Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, + Key=obj["Key"], + ) + else: + print( + f"Object {obj['Key']} younger than {delete_older_than}. Not deleting." + ) + else: + print( + f"Delete not sent. {obj['Key']} was last modified on {last_modified}" + ) + else: + print("No objects found in the bucket.") diff --git a/docs/backups_and_restores.md b/docs/backups_and_restores.md index 443b8dd094..bc534d2c5e 100644 --- a/docs/backups_and_restores.md +++ b/docs/backups_and_restores.md @@ -34,7 +34,7 @@ Information regarding the fac-backup-utility can be found [at the repository](ht Database backups occur in the following ways: 1. An initial backup, where a backup has not been run in the target environment. This input of `initial_backup` is important, as when it does a the `db_to_db` command, it will not truncate the target table, as the table does not exist in the destination database. ```bash -./fac-backup-util.sh v0.1.10 initial_backup +./fac-backup-util.sh v0.1.11 initial_backup # Curl the utility # Install AWS # DB to S3 table dump (backups) @@ -44,7 +44,7 @@ Database backups occur in the following ways: 2. A deploy backup, where the `db_to_db` function is not called. This is a standard backup strategy before the application deploys, to ensure the s3 contents of the primary s3 are sync'd to the backups bucket, and a table dump is stored in the backups bucket. ```bash -./fac-backup-util.sh v0.1.10 deploy_backup +./fac-backup-util.sh v0.1.11 deploy_backup # Curl the utility # Install AWS # DB to S3 table dump (backups) @@ -53,7 +53,7 @@ Database backups occur in the following ways: 3. A scheduled backup is run every two hours, across each environment, ensuring that we have a clean backup in s3, rds, and the bucket contents are in sync. ```bash -./fac-backup-util.sh v0.1.10 scheduled_backup +./fac-backup-util.sh v0.1.11 scheduled_backup # Curl the utility # Install AWS # DB to S3 table dump (fac-db -> backups) @@ -66,7 +66,7 @@ Restoring from backups can be run via workflow, from designated individuals. The 1. S3 Restore takes a `operation-mm-DD-HH` input (ex `scheduled-06-04-10`), and is required for the backups to be restored. The utility looks in `s3://${bucket}/backups/operation-mm-DD-HH/` for its table dumps, and without supplying the target backups, it will not restore. Once it does a `--data-only` restoration, it will then sync the files from the backups bucket to the application bucket. We do this to ensure the contents of the application bucket are up to date, relative to the data in the database. We know that if we use the latest folder in `/backups/` then the contents of the s3 are the latest available, from the prior backup. ```bash -./fac-restore-util.sh v0.1.10 s3_restore scheduled-06-04-10 +./fac-restore-util.sh v0.1.11 s3_restore scheduled-06-04-10 # Curl the utility # Install AWS # DB to S3 table dump (backups -> fac-db) [Truncate target table before --data-only pg_restore] @@ -81,7 +81,7 @@ daily-mm-dd 2. Database to database restoration also can occur as well, using `psql` to dump the tables from the cold store database to the live database. ```bash -./fac-restore-util.sh v0.1.10 db_restore +./fac-restore-util.sh v0.1.11 db_restore # Curl the utility # Install AWS # DB to DB table dump (fac-snapshot-db -> fac-db) [Truncate target table before dump] diff --git a/docs/deleting-backups.md b/docs/deleting-backups.md new file mode 100644 index 0000000000..50f5a5cd5f --- /dev/null +++ b/docs/deleting-backups.md @@ -0,0 +1,53 @@ +### Informational document regarding Management Command "delete_stale_backups" + +The purpose of this document is to highlight examples for when a developer wishes to delete stale backups from the s3 bucket `backups`. + +**Warning:** This command is classified as a destructive command, and should only be run after receiving confirmation from members of the team, and after putting a formal annoucement in the developer slack channel. It is advised that after this command is run, to take a formal backup of the environment just for extra precautions. + +#### Information: +The management command is located here: [delete_stale_backups.py](../backend/support/management/commands/delete_stale_backups.py). This command accepts two inputs. `--days` & `--delete`. +- The value of `--days` must be greater than or equal to `14` (`--days 14`) +- The value of `--delete` is required to actually perform the delete, and is a boolean (`--delete true`) +- The full command to perform a delete will look like this: +`python manage.py delete_stale_backups --days 14 --delete true` + +#### How to perform a delete +1. Login to cloud.gov `cf login -a api.fr.cloud.gov --sso` +2. Select the target environment if you have not done so after successful authentication `cf t -s ` +3. Open a new terminal and tail the logs `cf logs gsa-fac | grep "delete_stale_backups"` +4. Run the command via tasks: +`cf run-task gsa-fac -k 2G -m 3G --name delete_stale_backups --command "python manage.py delete_stale_backups --days 14 --delete true" --wait` +5. Wait for the command to finish. +6. Navigate to [The backup environment action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-util.yml) and perform a backup with the following inputs or alternatively, navigate to [the scheduled backup action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-scheduler.yml) and run. +```sh +branch: main +environment: +version: v0.1.11 +operation: on_demand_backup +``` + +#### Operation outputs examples (Fail): +``` +~$ python manage.py delete_stale_backups --days 13 +Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting... +~$ + +~$ python manage.py delete_stale_backups --days 0 --delete true +Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting... +~$ + +~$ python manage.py delete_stale_backups --days 14 --delete true +Object backups/on-demand/02-04-13/public-audit_access.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting. +Object backups/on-demand/02-04-13/public-audit_deletedaccess.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting. +[...] +``` + +#### Operation outputs example (Pass): +``` +~$ python manage.py delete_stale_backups --days 14 --delete true + +Deleting backups/on-demand/02-03-19/public-audit_access.dump last modified on 2025-01-22 18:44:02.406263+00:00 +Deleting backups/on-demand/02-03-19/public-audit_deletedaccess.dump last modified on 2025-01-22 18:44:02.406263+00:00 +[...] +``` + diff --git a/terraform/sandbox/sandbox.tf b/terraform/sandbox/sandbox.tf index 498c2756eb..044a6658c7 100644 --- a/terraform/sandbox/sandbox.tf +++ b/terraform/sandbox/sandbox.tf @@ -8,6 +8,7 @@ module "sandbox" { login_client_id = var.login_client_id login_secret_key = var.login_secret_key branch_name = var.branch_name + backups_s3_id = module.sandbox-backups-bucket.bucket_id database_plan = "medium-gp-psql" https_proxy_instances = 1 @@ -17,3 +18,13 @@ module "sandbox" { } ) } + +module "sandbox-backups-bucket" { + source = "github.com/gsa-tts/terraform-cloudgov//s3?ref=v1.1.0" + + cf_org_name = var.cf_org_name + cf_space_name = "sandbox" + name = "backups" + s3_plan_name = "basic" + tags = ["s3"] +} diff --git a/terraform/shared/modules/app/app.tf b/terraform/shared/modules/app/app.tf index 49762d95da..eab227266a 100644 --- a/terraform/shared/modules/app/app.tf +++ b/terraform/shared/modules/app/app.tf @@ -74,6 +74,10 @@ resource "cloudfoundry_app" "fac_app" { service_instance = var.public_s3_id } + service_binding { + service_instance = var.backups_s3_id + } + service_binding { service_instance = var.db_id } diff --git a/terraform/shared/modules/app/variables.tf b/terraform/shared/modules/app/variables.tf index 8acfbb3da7..0acba07313 100644 --- a/terraform/shared/modules/app/variables.tf +++ b/terraform/shared/modules/app/variables.tf @@ -48,6 +48,11 @@ variable "public_s3_id" { description = "the full string of the public s3 resource id" } +variable "backups_s3_id" { + type = string + description = "the full string of the backups s3 resource id" +} + variable "db_id" { type = string description = "the full string of the core db resource id" diff --git a/terraform/shared/modules/sandbox/app.tf b/terraform/shared/modules/sandbox/app.tf index a3ceacb3b5..3b5d3fd42e 100644 --- a/terraform/shared/modules/sandbox/app.tf +++ b/terraform/shared/modules/sandbox/app.tf @@ -12,6 +12,7 @@ module "fac-app" { new_relic_creds_id = cloudfoundry_user_provided_service.credentials.id private_s3_id = module.s3-private.bucket_id public_s3_id = module.s3-public.bucket_id + backups_s3_id = var.backups_s3_id db_id = module.database.instance_id backup_db_id = module.snapshot-database.instance_id app_instances = 1 diff --git a/terraform/shared/modules/sandbox/variables.tf b/terraform/shared/modules/sandbox/variables.tf index de4592334c..3875d0880d 100644 --- a/terraform/shared/modules/sandbox/variables.tf +++ b/terraform/shared/modules/sandbox/variables.tf @@ -133,3 +133,8 @@ variable "denylist" { # appname = [ "bad.example.com:443" ] } } + +variable "backups_s3_id" { + type = string + description = "the full string of the backups s3 resource id" +} From 9139877ff06eb540b3c5fe9300ef0ceb57410969 Mon Sep 17 00:00:00 2001 From: Sudha Kumar <135276194+gsa-suk@users.noreply.github.com> Date: Wed, 5 Feb 2025 11:53:28 -0800 Subject: [PATCH 2/3] Fix migrationinspectionrecord (#4685) * Adding management command to update note in migrationinspectionrecord * WIP * WIP * Update * Moved management command to curation folder * Added Production --- .../commands/fix_migrationinspectionrecord.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 backend/curation/management/commands/fix_migrationinspectionrecord.py diff --git a/backend/curation/management/commands/fix_migrationinspectionrecord.py b/backend/curation/management/commands/fix_migrationinspectionrecord.py new file mode 100644 index 0000000000..a10643974c --- /dev/null +++ b/backend/curation/management/commands/fix_migrationinspectionrecord.py @@ -0,0 +1,72 @@ +from django.core.management.base import BaseCommand +from dissemination.models import MigrationInspectionRecord +from config.settings import ENVIRONMENT, GSA_MIGRATION +from django.db.models import Q + +import logging + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +class Command(BaseCommand): + help = """ + Replace 'GSA_MIGRATION' with '' in policies_content and rate_content + of census_data in a note in dissemination_migrationinspectionrecord + + Usage: + manage.py update_migrationinspectionrecord + --year + """ + + def add_arguments(self, parser): + parser.add_argument( + "--year", help="Year(2016 through 2022)", type=str, default="2022" + ) + + def is_year_invalid(self, year): + valid_years = ["2016", "2017", "2018", "2019", "2020", "2021", "2022"] + return year not in valid_years + + def handle(self, *args, **options): + if ENVIRONMENT not in [ + "LOCAL", + "DEVELOPMENT", + "PREVIEW", + "STAGING", + "PRODUCTION", + ]: + print(f"Environment is not as expected, ENVIRONMENT={ENVIRONMENT}") + return + + year = options.get("year") + if self.is_year_invalid(year): + print( + f"Invalid year {year}. Expecting 2016 / 2017 / 2018 / 2019 / 2020 / 2021 / 2022" + ) + return + + migrationinspectionrecords = MigrationInspectionRecord.objects.filter( + Q(audit_year=year) + ) + print(f"Count of {year} submissions: {len(migrationinspectionrecords)}") + + count = 0 + for migrationinspectionrecord in migrationinspectionrecords: + notes = [] + is_updated = False + for note in migrationinspectionrecord.note: + if ( + note[0]["transformation_functions"][0] + == "xform_missing_notes_records" + ) & (note[0]["census_data"][0]["value"] == GSA_MIGRATION): + note[0]["census_data"][0]["value"] = "" + is_updated = True + notes += [note] + if is_updated: + migrationinspectionrecord.note = notes + migrationinspectionrecord.save() + count += 1 + + print("Number of records updated = ", count) From d122cc5fb8dc3c76056f8ad6121158b2781a15c4 Mon Sep 17 00:00:00 2001 From: Alex Steel <130377221+asteel-gsa@users.noreply.github.com> Date: Wed, 5 Feb 2025 16:29:43 -0500 Subject: [PATCH 3/3] CRLF Fix + Pagination Fix (#4687) * crlf -> lf * Fix pagination issue --- .../commands/delete_stale_backups.py | 152 +++++++++--------- 1 file changed, 80 insertions(+), 72 deletions(-) diff --git a/backend/support/management/commands/delete_stale_backups.py b/backend/support/management/commands/delete_stale_backups.py index b142376239..999a8bbaad 100644 --- a/backend/support/management/commands/delete_stale_backups.py +++ b/backend/support/management/commands/delete_stale_backups.py @@ -1,72 +1,80 @@ -# Usage: -# Do a delete: python manage.py delete_stale_backups --days X --delete true -# List objects: python manage.py delete_stale_backups --days X - -import boto3 -from datetime import datetime, timezone, timedelta -from django.conf import settings -from django.core.management.base import BaseCommand -import sys - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument( - "--days", - type=int, - required=True, - help="Max age a key(file) in days can have before we want to delete it. Value must be (14) or greater.", - ) - parser.add_argument( - "--delete", - required=False, - default=False, - help="True/False. Actually do a delete. If not specified, just list the keys found that match.", - ) - - def handle(self, *args, **options): - days = options["days"] - delete = options["delete"] - - if days < 14: - print( - "Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting..." - ) - sys.exit(1) - - s3_client = boto3.client( - "s3", - aws_access_key_id=settings.AWS_BACKUPS_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_BACKUPS_SECRET_ACCESS_KEY, - endpoint_url=settings.AWS_S3_BACKUPS_ENDPOINT_URL, - ) - - objects = s3_client.list_objects( - Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, Prefix="backups/" - ) - delete_older_than = datetime.now(timezone.utc) - timedelta(days=days) - # Check if the bucket contains any objects - if "Contents" in objects: - for obj in objects["Contents"]: - # Get the last modified date of the object - last_modified = obj["LastModified"] - - # If the object is older than one week, delete it - # s3_client.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"backups/{item.file.name}") - if delete: - if last_modified < delete_older_than: - print(f"Deleting {obj['Key']} last modified on {last_modified}") - s3_client.delete_object( - Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, - Key=obj["Key"], - ) - else: - print( - f"Object {obj['Key']} younger than {delete_older_than}. Not deleting." - ) - else: - print( - f"Delete not sent. {obj['Key']} was last modified on {last_modified}" - ) - else: - print("No objects found in the bucket.") +# Usage: +# Do a delete: python manage.py delete_stale_backups --days X --delete true +# List objects: python manage.py delete_stale_backups --days X + +import boto3 +from datetime import datetime, timezone, timedelta +from django.conf import settings +from django.core.management.base import BaseCommand +import sys + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--days", + type=int, + required=True, + help="Max age a key(file) in days can have before we want to delete it. Value must be (14) or greater.", + ) + parser.add_argument( + "--delete", + required=False, + default=False, + help="True/False. Actually do a delete. If not specified, just list the keys found that match.", + ) + + def handle(self, *args, **options): + days = options["days"] + delete = options["delete"] + + if days < 14: + print( + "Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting..." + ) + sys.exit(1) + + s3_client = boto3.client( + "s3", + aws_access_key_id=settings.AWS_BACKUPS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_BACKUPS_SECRET_ACCESS_KEY, + endpoint_url=settings.AWS_S3_BACKUPS_ENDPOINT_URL, + ) + + paginator = s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate( + Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, Prefix="backups/" + ) + + delete_older_than = datetime.now(timezone.utc) - timedelta(days=days) + total_count = 0 + for page in pages: + if "Contents" in page: + for obj in page["Contents"]: + + # Get the last modified date of the object + last_modified = obj["LastModified"] + + # If the object is older than one week, delete it + # s3_client.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"backups/{item.file.name}") + if delete: + if last_modified < delete_older_than: + print( + f"Deleting {obj['Key']} last modified on {last_modified}" + ) + s3_client.delete_object( + Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, + Key=obj["Key"], + ) + total_count += 1 + else: + print( + f"Object {obj['Key']} younger than {delete_older_than}. Not deleting." + ) + else: + print( + f"Delete not sent. {obj['Key']} was last modified on {last_modified}" + ) + else: + print("No objects found in the bucket.") + print(f"Total number of objects deleted: {total_count}")