Skip to content

Commit

Permalink
Merge pull request #4690 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
asteel-gsa authored Feb 6, 2025
2 parents 07d0b7d + d122cc5 commit d6c4fcf
Show file tree
Hide file tree
Showing 16 changed files with 254 additions and 22 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/deploy-application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
cf_password: ${{ secrets.CF_PASSWORD }}
cf_org: gsa-tts-oros-fac
cf_space: ${{ env.space }}
command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.10 deploy_backup" --wait
command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.11 deploy_backup" --wait

- name: Deploy Preview to cloud.gov
if: ${{ inputs.environment == 'preview' }}
Expand Down Expand Up @@ -124,5 +124,5 @@ jobs:
secrets: inherit
with:
environment: ${{ inputs.environment }}
util_version: "v0.1.10"
util_version: "v0.1.11"
backup_operation: "check_tables"
2 changes: 1 addition & 1 deletion .github/workflows/fac-backup-scheduler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ jobs:
secrets: inherit
with:
environment: ${{ matrix.environment.name }}
util_version: "v0.1.10"
util_version: "v0.1.11"
backup_operation: "scheduled_backup"

4 changes: 2 additions & 2 deletions .github/workflows/fac-backup-util-scheduled.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
name: Backup the database with fac-backup-utility
### Common Commands:
# ./fac-backup-util.sh v0.1.10 scheduled_backup
# ./fac-backup-util.sh v0.1.10 daily_backup
# ./fac-backup-util.sh v0.1.11 scheduled_backup
# ./fac-backup-util.sh v0.1.11 daily_backup
on:
workflow_call:
inputs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fac-backup-util.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
name: Backup the database with fac-backup-utility
### Common Commands:
# ./fac-backup-util.sh v0.1.10 initial_backup
# ./fac-backup-util.sh v0.1.10 deploy_backup
# ./fac-backup-util.sh v0.1.11 initial_backup
# ./fac-backup-util.sh v0.1.11 deploy_backup
on:
workflow_dispatch:
inputs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fac-check-tables-scheduler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ jobs:
secrets: inherit
with:
environment: ${{ matrix.environment.name }}
util_version: "v0.1.10"
util_version: "v0.1.11"
backup_operation: "check_tables"
2 changes: 1 addition & 1 deletion .github/workflows/fac-check-tables.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
name: Check existing tables in an environment
### Common Commands:
# ./fac-backup-util.sh v0.1.10 check_tables
# ./fac-backup-util.sh v0.1.11 check_tables
on:
workflow_dispatch:
inputs:
Expand Down
17 changes: 9 additions & 8 deletions backend/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,15 +359,16 @@
)

elif service["instance_name"] == "backups":
# Backups AWS S3 bucket for the app's backup files
s3_creds = service["credentials"]
# Used for backing up the database https://django-dbbackup.readthedocs.io/en/master/storage.html#id2
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
DBBACKUP_STORAGE_OPTIONS = {
"access_key": s3_creds["access_key_id"],
"secret_key": s3_creds["secret_access_key"],
"bucket_name": s3_creds["bucket"],
"default_acl": "private", # type: ignore
}

AWS_BACKUPS_ACCESS_KEY_ID = s3_creds["access_key_id"]
AWS_BACKUPS_SECRET_ACCESS_KEY = s3_creds["secret_access_key"]
AWS_BACKUPS_STORAGE_BUCKET_NAME = s3_creds["bucket"]
AWS_S3_BACKUPS_REGION_NAME = s3_creds["region"]
AWS_S3_BACKUPS_ENDPOINT = s3_creds["endpoint"]
AWS_S3_BACKUPS_ENDPOINT_URL = f"https://{AWS_S3_BACKUPS_ENDPOINT}"
AWS_PRIVATE_DEFAULT_ACL = "private"

# secure headers
MIDDLEWARE.append("csp.middleware.CSPMiddleware")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from django.core.management.base import BaseCommand
from dissemination.models import MigrationInspectionRecord
from config.settings import ENVIRONMENT, GSA_MIGRATION
from django.db.models import Q

import logging


logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


class Command(BaseCommand):
help = """
Replace 'GSA_MIGRATION' with '' in policies_content and rate_content
of census_data in a note in dissemination_migrationinspectionrecord
Usage:
manage.py update_migrationinspectionrecord
--year <audit year>
"""

def add_arguments(self, parser):
parser.add_argument(
"--year", help="Year(2016 through 2022)", type=str, default="2022"
)

def is_year_invalid(self, year):
valid_years = ["2016", "2017", "2018", "2019", "2020", "2021", "2022"]
return year not in valid_years

def handle(self, *args, **options):
if ENVIRONMENT not in [
"LOCAL",
"DEVELOPMENT",
"PREVIEW",
"STAGING",
"PRODUCTION",
]:
print(f"Environment is not as expected, ENVIRONMENT={ENVIRONMENT}")
return

year = options.get("year")
if self.is_year_invalid(year):
print(
f"Invalid year {year}. Expecting 2016 / 2017 / 2018 / 2019 / 2020 / 2021 / 2022"
)
return

migrationinspectionrecords = MigrationInspectionRecord.objects.filter(
Q(audit_year=year)
)
print(f"Count of {year} submissions: {len(migrationinspectionrecords)}")

count = 0
for migrationinspectionrecord in migrationinspectionrecords:
notes = []
is_updated = False
for note in migrationinspectionrecord.note:
if (
note[0]["transformation_functions"][0]
== "xform_missing_notes_records"
) & (note[0]["census_data"][0]["value"] == GSA_MIGRATION):
note[0]["census_data"][0]["value"] = ""
is_updated = True
notes += [note]
if is_updated:
migrationinspectionrecord.note = notes
migrationinspectionrecord.save()
count += 1

print("Number of records updated = ", count)
80 changes: 80 additions & 0 deletions backend/support/management/commands/delete_stale_backups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Usage:
# Do a delete: python manage.py delete_stale_backups --days X --delete true
# List objects: python manage.py delete_stale_backups --days X

import boto3
from datetime import datetime, timezone, timedelta
from django.conf import settings
from django.core.management.base import BaseCommand
import sys


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--days",
type=int,
required=True,
help="Max age a key(file) in days can have before we want to delete it. Value must be (14) or greater.",
)
parser.add_argument(
"--delete",
required=False,
default=False,
help="True/False. Actually do a delete. If not specified, just list the keys found that match.",
)

def handle(self, *args, **options):
days = options["days"]
delete = options["delete"]

if days < 14:
print(
"Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting..."
)
sys.exit(1)

s3_client = boto3.client(
"s3",
aws_access_key_id=settings.AWS_BACKUPS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_BACKUPS_SECRET_ACCESS_KEY,
endpoint_url=settings.AWS_S3_BACKUPS_ENDPOINT_URL,
)

paginator = s3_client.get_paginator("list_objects_v2")
pages = paginator.paginate(
Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, Prefix="backups/"
)

delete_older_than = datetime.now(timezone.utc) - timedelta(days=days)
total_count = 0
for page in pages:
if "Contents" in page:
for obj in page["Contents"]:

# Get the last modified date of the object
last_modified = obj["LastModified"]

# If the object is older than one week, delete it
# s3_client.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"backups/{item.file.name}")
if delete:
if last_modified < delete_older_than:
print(
f"Deleting {obj['Key']} last modified on {last_modified}"
)
s3_client.delete_object(
Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME,
Key=obj["Key"],
)
total_count += 1
else:
print(
f"Object {obj['Key']} younger than {delete_older_than}. Not deleting."
)
else:
print(
f"Delete not sent. {obj['Key']} was last modified on {last_modified}"
)
else:
print("No objects found in the bucket.")
print(f"Total number of objects deleted: {total_count}")
10 changes: 5 additions & 5 deletions docs/backups_and_restores.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Information regarding the fac-backup-utility can be found [at the repository](ht
Database backups occur in the following ways:
1. An initial backup, where a backup has not been run in the target environment. This input of `initial_backup` is important, as when it does a the `db_to_db` command, it will not truncate the target table, as the table does not exist in the destination database.
```bash
./fac-backup-util.sh v0.1.10 initial_backup
./fac-backup-util.sh v0.1.11 initial_backup
# Curl the utility
# Install AWS
# DB to S3 table dump (backups)
Expand All @@ -44,7 +44,7 @@ Database backups occur in the following ways:

2. A deploy backup, where the `db_to_db` function is not called. This is a standard backup strategy before the application deploys, to ensure the s3 contents of the primary s3 are sync'd to the backups bucket, and a table dump is stored in the backups bucket.
```bash
./fac-backup-util.sh v0.1.10 deploy_backup
./fac-backup-util.sh v0.1.11 deploy_backup
# Curl the utility
# Install AWS
# DB to S3 table dump (backups)
Expand All @@ -53,7 +53,7 @@ Database backups occur in the following ways:

3. A scheduled backup is run every two hours, across each environment, ensuring that we have a clean backup in s3, rds, and the bucket contents are in sync.
```bash
./fac-backup-util.sh v0.1.10 scheduled_backup
./fac-backup-util.sh v0.1.11 scheduled_backup
# Curl the utility
# Install AWS
# DB to S3 table dump (fac-db -> backups)
Expand All @@ -66,7 +66,7 @@ Restoring from backups can be run via workflow, from designated individuals. The

1. S3 Restore takes a `operation-mm-DD-HH` input (ex `scheduled-06-04-10`), and is required for the backups to be restored. The utility looks in `s3://${bucket}/backups/operation-mm-DD-HH/` for its table dumps, and without supplying the target backups, it will not restore. Once it does a `--data-only` restoration, it will then sync the files from the backups bucket to the application bucket. We do this to ensure the contents of the application bucket are up to date, relative to the data in the database. We know that if we use the latest folder in `/backups/` then the contents of the s3 are the latest available, from the prior backup.
```bash
./fac-restore-util.sh v0.1.10 s3_restore scheduled-06-04-10
./fac-restore-util.sh v0.1.11 s3_restore scheduled-06-04-10
# Curl the utility
# Install AWS
# DB to S3 table dump (backups -> fac-db) [Truncate target table before --data-only pg_restore]
Expand All @@ -81,7 +81,7 @@ daily-mm-dd

2. Database to database restoration also can occur as well, using `psql` to dump the tables from the cold store database to the live database.
```bash
./fac-restore-util.sh v0.1.10 db_restore
./fac-restore-util.sh v0.1.11 db_restore
# Curl the utility
# Install AWS
# DB to DB table dump (fac-snapshot-db -> fac-db) [Truncate target table before dump]
Expand Down
53 changes: 53 additions & 0 deletions docs/deleting-backups.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
### Informational document regarding Management Command "delete_stale_backups"

The purpose of this document is to highlight examples for when a developer wishes to delete stale backups from the s3 bucket `backups`.

**Warning:** This command is classified as a destructive command, and should only be run after receiving confirmation from members of the team, and after putting a formal annoucement in the developer slack channel. It is advised that after this command is run, to take a formal backup of the environment just for extra precautions.

#### Information:
The management command is located here: [delete_stale_backups.py](../backend/support/management/commands/delete_stale_backups.py). This command accepts two inputs. `--days` & `--delete`.
- The value of `--days` must be greater than or equal to `14` (`--days 14`)
- The value of `--delete` is required to actually perform the delete, and is a boolean (`--delete true`)
- The full command to perform a delete will look like this:
`python manage.py delete_stale_backups --days 14 --delete true`

#### How to perform a delete
1. Login to cloud.gov `cf login -a api.fr.cloud.gov --sso`
2. Select the target environment if you have not done so after successful authentication `cf t -s <env>`
3. Open a new terminal and tail the logs `cf logs gsa-fac | grep "delete_stale_backups"`
4. Run the command via tasks:
`cf run-task gsa-fac -k 2G -m 3G --name delete_stale_backups --command "python manage.py delete_stale_backups --days 14 --delete true" --wait`
5. Wait for the command to finish.
6. Navigate to [The backup environment action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-util.yml) and perform a backup with the following inputs or alternatively, navigate to [the scheduled backup action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-scheduler.yml) and run.
```sh
branch: main
environment: <env where backups were just deleted (dev/staging/prod)>
version: v0.1.11
operation: on_demand_backup
```

#### Operation outputs examples (Fail):
```
~$ python manage.py delete_stale_backups --days 13
Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting...
~$
~$ python manage.py delete_stale_backups --days 0 --delete true
Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting...
~$
~$ python manage.py delete_stale_backups --days 14 --delete true
Object backups/on-demand/02-04-13/public-audit_access.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting.
Object backups/on-demand/02-04-13/public-audit_deletedaccess.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting.
[...]
```

#### Operation outputs example (Pass):
```
~$ python manage.py delete_stale_backups --days 14 --delete true
Deleting backups/on-demand/02-03-19/public-audit_access.dump last modified on 2025-01-22 18:44:02.406263+00:00
Deleting backups/on-demand/02-03-19/public-audit_deletedaccess.dump last modified on 2025-01-22 18:44:02.406263+00:00
[...]
```

11 changes: 11 additions & 0 deletions terraform/sandbox/sandbox.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module "sandbox" {
login_client_id = var.login_client_id
login_secret_key = var.login_secret_key
branch_name = var.branch_name
backups_s3_id = module.sandbox-backups-bucket.bucket_id

database_plan = "medium-gp-psql"
https_proxy_instances = 1
Expand All @@ -17,3 +18,13 @@ module "sandbox" {
}
)
}

module "sandbox-backups-bucket" {
source = "github.com/gsa-tts/terraform-cloudgov//s3?ref=v1.1.0"

cf_org_name = var.cf_org_name
cf_space_name = "sandbox"
name = "backups"
s3_plan_name = "basic"
tags = ["s3"]
}
4 changes: 4 additions & 0 deletions terraform/shared/modules/app/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ resource "cloudfoundry_app" "fac_app" {
service_instance = var.public_s3_id
}

service_binding {
service_instance = var.backups_s3_id
}

service_binding {
service_instance = var.db_id
}
Expand Down
5 changes: 5 additions & 0 deletions terraform/shared/modules/app/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ variable "public_s3_id" {
description = "the full string of the public s3 resource id"
}

variable "backups_s3_id" {
type = string
description = "the full string of the backups s3 resource id"
}

variable "db_id" {
type = string
description = "the full string of the core db resource id"
Expand Down
1 change: 1 addition & 0 deletions terraform/shared/modules/sandbox/app.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module "fac-app" {
new_relic_creds_id = cloudfoundry_user_provided_service.credentials.id
private_s3_id = module.s3-private.bucket_id
public_s3_id = module.s3-public.bucket_id
backups_s3_id = var.backups_s3_id
db_id = module.database.instance_id
backup_db_id = module.snapshot-database.instance_id
app_instances = 1
Expand Down
Loading

0 comments on commit d6c4fcf

Please sign in to comment.