Skip to content

Commit

Permalink
don't delete ExcelFile entries from the db (#4200)
Browse files Browse the repository at this point in the history
  • Loading branch information
danswick authored Aug 17, 2024
1 parent 89907be commit 16ce264
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 130 deletions.
141 changes: 67 additions & 74 deletions backend/dissemination/remove_workbook_artifacts.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,67 @@
import logging

from django.conf import settings
from audit.models.models import ExcelFile
from boto3 import client as boto3_client
from botocore.client import ClientError, Config

logger = logging.getLogger(__name__)


def remove_workbook_artifacts(sac):
"""
Remove all workbook artifacts associated with the given sac.
"""
try:
excel_files = ExcelFile.objects.filter(sac=sac)
files = [f"excel/{excel_file.filename}" for excel_file in excel_files]

if files:
# Delete the records from the database
count = excel_files.count()
excel_files.delete()
logger.info(
f"Deleted {count} excelfile records from fac-db for report: {sac.report_id}"
)

# Delete the files from S3 in bulk
delete_files_in_bulk(files, sac)

except ExcelFile.DoesNotExist:
logger.info(f"No files found to delete for report: {sac.report_id}")
except Exception as e:
logger.error(
f"Failed to delete files from fac-db and S3 for report: {sac.report_id}. Error: {e}"
)


def delete_files_in_bulk(filenames, sac):
"""Delete files from S3 in bulk."""
# This client uses the internal endpoint URL because we're making a request to S3 from within the app
s3_client = boto3_client(
service_name="s3",
region_name=settings.AWS_S3_PRIVATE_REGION_NAME,
aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY,
endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT,
config=Config(signature_version="s3v4"),
)

try:
delete_objects = [{"Key": filename} for filename in filenames]

response = s3_client.delete_objects(
Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME,
Delete={"Objects": delete_objects},
)

deleted_files = response.get("Deleted", [])
for deleted in deleted_files:
logger.info(
f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}"
)

errors = response.get("Errors", [])
if errors:
for error in errors:
logger.error(
f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608
)

except ClientError as e:
logger.error(
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
)
import logging

from django.conf import settings
from audit.models.models import ExcelFile
from boto3 import client as boto3_client
from botocore.client import ClientError, Config

logger = logging.getLogger(__name__)


def remove_workbook_artifacts(sac):
"""
Remove all workbook artifacts associated with the given sac.
"""
try:
excel_files = ExcelFile.objects.filter(sac=sac)
files = [f"excel/{excel_file.filename}" for excel_file in excel_files]

if files:
# Delete the files from S3 in bulk
delete_files_in_bulk(files, sac)

except ExcelFile.DoesNotExist:
logger.info(f"No files found to delete for report: {sac.report_id}")
except Exception as e:
logger.error(
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
)


def delete_files_in_bulk(filenames, sac):
"""Delete files from S3 in bulk."""
# This client uses the internal endpoint URL because we're making a request to S3 from within the app
s3_client = boto3_client(
service_name="s3",
region_name=settings.AWS_S3_PRIVATE_REGION_NAME,
aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY,
endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT,
config=Config(signature_version="s3v4"),
)

try:
delete_objects = [{"Key": filename} for filename in filenames]

response = s3_client.delete_objects(
Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME,
Delete={"Objects": delete_objects},
)

deleted_files = response.get("Deleted", [])
for deleted in deleted_files:
logger.info(
f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}"
)

errors = response.get("Errors", [])
if errors:
for error in errors:
logger.error(
f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608
)

except ClientError as e:
logger.error(
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
)
103 changes: 47 additions & 56 deletions backend/dissemination/test_remove_workbook_artifacts.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,47 @@
from django.test import TestCase
from unittest.mock import patch
from audit.models.models import ExcelFile, SingleAuditChecklist
from model_bakery import baker

from dissemination.remove_workbook_artifacts import remove_workbook_artifacts


class RemovedWorkbookArtifactsTestCase(TestCase):

@patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk")
def test_removed_workbook_artifacts_success(self, mock_delete_files_in_bulk):
sac = baker.make(
SingleAuditChecklist,
submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS,
report_id="test_report_id",
)

# Create ExcelFile instances
excel_file_1 = baker.make(ExcelFile, sac=sac, form_section="fake_section")
excel_file_2 = baker.make(
ExcelFile, sac=sac, form_section="another_fake_section"
)

remove_workbook_artifacts(sac)

# Assert that the ExcelFile instances are deleted
self.assertFalse(ExcelFile.objects.filter(sac=sac).exists())

# Assert S3 bulk delete was called with the correct filenames
mock_delete_files_in_bulk.assert_called_once_with(
[
f"excel/{sac.report_id}--{excel_file_1.form_section}.xlsx",
f"excel/{sac.report_id}--{excel_file_2.form_section}.xlsx",
],
sac,
)

@patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk")
def test_removed_workbook_artifacts_no_files(self, mock_delete_files_in_bulk):
sac = baker.make(
SingleAuditChecklist,
submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS,
report_id="test_report_id",
)

# Ensure no ExcelFile instances exist for this SAC
ExcelFile.objects.filter(sac=sac).delete()

remove_workbook_artifacts(sac)

# Assert that no ExcelFile instances exist
self.assertFalse(ExcelFile.objects.filter(sac=sac).exists())

# Assert S3 bulk delete was not called
mock_delete_files_in_bulk.assert_not_called()
from django.test import TestCase
from unittest.mock import patch
from audit.models.models import ExcelFile, SingleAuditChecklist
from model_bakery import baker

from dissemination.remove_workbook_artifacts import remove_workbook_artifacts


class RemovedWorkbookArtifactsTestCase(TestCase):

@patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk")
def test_removed_workbook_artifacts_success(self, mock_delete_files_in_bulk):
sac = baker.make(
SingleAuditChecklist,
submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS,
report_id="test_report_id",
)

# Create ExcelFile instances
excel_file_1 = baker.make(ExcelFile, sac=sac, form_section="fake_section")
excel_file_2 = baker.make(
ExcelFile, sac=sac, form_section="another_fake_section"
)

remove_workbook_artifacts(sac)

# Assert S3 bulk delete was called with the correct filenames
mock_delete_files_in_bulk.assert_called_once_with(
[
f"excel/{sac.report_id}--{excel_file_1.form_section}.xlsx",
f"excel/{sac.report_id}--{excel_file_2.form_section}.xlsx",
],
sac,
)

@patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk")
def test_removed_workbook_artifacts_no_files(self, mock_delete_files_in_bulk):
sac = baker.make(
SingleAuditChecklist,
submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS,
report_id="test_report_id",
)

remove_workbook_artifacts(sac)

# Assert S3 bulk delete was not called
mock_delete_files_in_bulk.assert_not_called()

0 comments on commit 16ce264

Please sign in to comment.