From 7a9129a8e49e80eaf4753cd682a1aeb41afc743e Mon Sep 17 00:00:00 2001 From: James Person Date: Fri, 6 Sep 2024 10:37:02 -0400 Subject: [PATCH 1/3] Update "Audit search" -> "Tribal audits" link (#4254) --- backend/templates/includes/nav_primary.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/templates/includes/nav_primary.html b/backend/templates/includes/nav_primary.html index 199020f957..3115593dee 100644 --- a/backend/templates/includes/nav_primary.html +++ b/backend/templates/includes/nav_primary.html @@ -48,7 +48,7 @@ rel="noopener noreferrer">Developer resources
  • - Tribal audits
  • From 672b7e28b3db2a4c959e9a7442fee4760c9cc3ca Mon Sep 17 00:00:00 2001 From: Phil Dominguez <142051477+phildominguez-gsa@users.noreply.github.com> Date: Fri, 6 Sep 2024 11:59:16 -0400 Subject: [PATCH 2/3] Fixing Cypress gen info email fields (#4258) --- backend/cypress/support/general-info.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/cypress/support/general-info.js b/backend/cypress/support/general-info.js index f14b2bd6e8..cc00dd0a44 100644 --- a/backend/cypress/support/general-info.js +++ b/backend/cypress/support/general-info.js @@ -27,7 +27,7 @@ export function testValidGeneralInfo() { cy.get('#auditee_contact_name').type('John Doe'); cy.get('#auditee_contact_title').type('Keymaster'); cy.get('#auditee_phone').type('5558675309'); - cy.get('#auditee_email').type('va@test'); + cy.get('#auditee_email').type('va@test.com'); // Auditor information cy.get('#auditor_ein').type('987654321'); @@ -44,7 +44,7 @@ export function testValidGeneralInfo() { cy.get('#auditor_contact_name').type('Jane Doe'); cy.get('#auditor_contact_title').type('Auditor'); cy.get('#auditor_phone').type('5555555555'); - cy.get('#auditor_email').type('qualified.human.accountant@auditor'); + cy.get('#auditor_email').type('qualified.human.accountant@auditor.com'); cy.get('label[for=secondary_auditors-yes]').click(); From 5dc66d2f61790ae87c1a83a3f980921c3fc609fc Mon Sep 17 00:00:00 2001 From: "Hassan D. M. Sambo" Date: Fri, 6 Sep 2024 12:05:09 -0400 Subject: [PATCH 3/3] =?UTF-8?q?#3748=20Added=20logic=20to=20delete=20unnec?= =?UTF-8?q?essary=20workbook=20artifacts=20via=20django=E2=80=A6=20(#4193)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * #3748 Added logic to delete unnecessary workbook artifacts via django command * #3748 Updated django command to remove files from S3 only * Bug fix * Fixed linting * Fixed unit tests --- .../commands/remove-unneeded-workbooks.py | 54 ++++ .../remove_workbook_artifacts.py | 283 +++++++++++++----- .../test_remove_workbook_artifacts.py | 202 ++++++++++--- 3 files changed, 425 insertions(+), 114 deletions(-) create mode 100644 backend/dissemination/management/commands/remove-unneeded-workbooks.py diff --git a/backend/dissemination/management/commands/remove-unneeded-workbooks.py b/backend/dissemination/management/commands/remove-unneeded-workbooks.py new file mode 100644 index 0000000000..c42600cf28 --- /dev/null +++ b/backend/dissemination/management/commands/remove-unneeded-workbooks.py @@ -0,0 +1,54 @@ +from django.core.management.base import BaseCommand +from dissemination.remove_workbook_artifacts import delete_workbooks + +import logging + + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Delete workbook artifacts for a specific partition of disseminated reports." + + def add_arguments(self, parser): + parser.add_argument( + "--partition_number", + type=int, + required=True, + help="The partition number to process (e.g., 1, 2, 3).", + ) + parser.add_argument( + "--total_partitions", + type=int, + required=True, + help="The total number of partitions (e.g., 4 if splitting the load into four parts).", + ) + parser.add_argument( + "--page_size", + type=int, + required=False, + default=10, + help="Number of items to process per page", + ) + parser.add_argument( + "--pages", + type=int, + required=False, + default=None, + help="Maximum number of pages to process", + ) + + def handle(self, *args, **options): + partition_number = options["partition_number"] + total_partitions = options["total_partitions"] + page_size = options["page_size"] + pages = options["pages"] + + self.stdout.write( + self.style.SUCCESS( + f"Processing partition {partition_number} of {total_partitions}" + ) + ) + delete_workbooks( + partition_number, total_partitions, page_size=page_size, pages=pages + ) diff --git a/backend/dissemination/remove_workbook_artifacts.py b/backend/dissemination/remove_workbook_artifacts.py index c5cf4085d0..008649c8ef 100644 --- a/backend/dissemination/remove_workbook_artifacts.py +++ b/backend/dissemination/remove_workbook_artifacts.py @@ -1,67 +1,216 @@ -import logging - -from django.conf import settings -from audit.models.models import ExcelFile -from boto3 import client as boto3_client -from botocore.client import ClientError, Config - -logger = logging.getLogger(__name__) - - -def remove_workbook_artifacts(sac): - """ - Remove all workbook artifacts associated with the given sac. - """ - try: - excel_files = ExcelFile.objects.filter(sac=sac) - files = [f"excel/{excel_file.filename}" for excel_file in excel_files] - - if files: - # Delete the files from S3 in bulk - delete_files_in_bulk(files, sac) - - except ExcelFile.DoesNotExist: - logger.info(f"No files found to delete for report: {sac.report_id}") - except Exception as e: - logger.error( - f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}" - ) - - -def delete_files_in_bulk(filenames, sac): - """Delete files from S3 in bulk.""" - # This client uses the internal endpoint URL because we're making a request to S3 from within the app - s3_client = boto3_client( - service_name="s3", - region_name=settings.AWS_S3_PRIVATE_REGION_NAME, - aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY, - endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT, - config=Config(signature_version="s3v4"), - ) - - try: - delete_objects = [{"Key": filename} for filename in filenames] - - response = s3_client.delete_objects( - Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME, - Delete={"Objects": delete_objects}, - ) - - deleted_files = response.get("Deleted", []) - for deleted in deleted_files: - logger.info( - f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}" - ) - - errors = response.get("Errors", []) - if errors: - for error in errors: - logger.error( - f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608 - ) - - except ClientError as e: - logger.error( - f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}" - ) +import logging +import math + +from django.conf import settings +from audit.models.models import ExcelFile, SingleAuditChecklist +from boto3 import client as boto3_client +from botocore.client import ClientError, Config +from django.core.paginator import Paginator +from django.core.paginator import PageNotAnInteger, EmptyPage + + +logger = logging.getLogger(__name__) + + +def remove_workbook_artifacts(sac): + """ + Remove all workbook artifacts associated with the given sac. + """ + try: + excel_files = ExcelFile.objects.filter(sac=sac) + files = [f"excel/{excel_file.filename}" for excel_file in excel_files] + + if files: + # Delete the files from S3 in bulk + delete_files_in_bulk(files, sac) + + except ExcelFile.DoesNotExist: + logger.info(f"No files found to delete for report: {sac.report_id}") + except Exception as e: + logger.error( + f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}" + ) + + +def delete_files_in_bulk(filenames, sac): + """Delete files from S3 in bulk.""" + # This client uses the internal endpoint URL because we're making a request to S3 from within the app + s3_client = boto3_client( + service_name="s3", + region_name=settings.AWS_S3_PRIVATE_REGION_NAME, + aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY, + endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT, + config=Config(signature_version="s3v4"), + ) + + try: + delete_objects = [{"Key": filename} for filename in filenames] + + response = s3_client.delete_objects( + Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME, + Delete={"Objects": delete_objects}, + ) + + deleted_files = response.get("Deleted", []) + for deleted in deleted_files: + logger.info( + f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}" + ) + + errors = response.get("Errors", []) + if errors: + for error in errors: + logger.error( + f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608 + ) + + except ClientError as e: + logger.error( + f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}" + ) + + +def clean_artifacts(sac_list): + """ + Perform necessary cleanup associated with the given list of sac values. + """ + try: + excel_files = ExcelFile.objects.filter(sac__in=sac_list) + files = [f"excel/{excel_file.filename}" for excel_file in excel_files] + + if files: + logger.info( + f"Found {len(files)} ExcelFile records for reports: {[sac.report_id for sac in sac_list]}" + ) + + # Track results but do not delete the ExcelFile records from the database + successful_deletes, failed_deletes = batch_removal( + files, + sac_list, + { + f"excel/{excel_file.filename}": excel_file.sac.report_id + for excel_file in excel_files + }, + ) + + if failed_deletes: + logger.error( + f"Failed to delete the following files from S3: {failed_deletes}" + ) + if successful_deletes: + logger.info( + f"Successfully deleted the following files from S3: {successful_deletes}" + ) + + except Exception as e: + logger.error(f"Failed to process files for the provided sac values. Error: {e}") + + +def batch_removal(filenames, sac_list, sac_to_report_id_map): + """Delete files from S3 in bulk and return the results.""" + s3_client = boto3_client( + service_name="s3", + region_name=settings.AWS_S3_PRIVATE_REGION_NAME, + aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY, + endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT, + config=Config(signature_version="s3v4"), + ) + + try: + delete_objects = [{"Key": filename} for filename in filenames] + response = s3_client.delete_objects( + Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME, + Delete={"Objects": delete_objects}, + ) + + successful_deletes = [] + failed_deletes = [] + deleted_files = response.get("Deleted", []) + for deleted in deleted_files: + filename = deleted["Key"] + successful_deletes.append( + { + "filename": filename, + "sac_report_id": sac_to_report_id_map[filename], + } + ) + + errors = response.get("Errors", []) + if errors: + for error in errors: + filename = error["Key"] + failed_deletes.append( + { + "filename": filename, + "sac_report_id": sac_to_report_id_map[filename], + "error_message": error["Message"], + } + ) + + return successful_deletes, failed_deletes + + except ClientError as e: + logger.error( + f"Failed to delete files from S3 for sac values: {[sac.report_id for sac in sac_list]}. Error: {e}" + ) + return [], [{"error_message": str(e)}] + except Exception as e: + logger.error(f"Failed to delete files from S3. Error: {e}") + return [], [{"error_message": str(e)}] + + +def delete_workbooks(partition_number, total_partitions, page_size=10, pages=None): + """Iterates over disseminated reports for the specified partition.""" + + if partition_number < 1 or partition_number > total_partitions: + raise ValueError( + "Invalid partition number. It must be between 1 and the total number of partitions." + ) + + all_ids = ( + SingleAuditChecklist.objects.filter( + submission_status=SingleAuditChecklist.STATUS.DISSEMINATED + ) + .values_list("id", flat=True) + .order_by("id") + ) + + total_ids = len(all_ids) + ids_per_partition = math.ceil(total_ids / total_partitions) + + start_index = (partition_number - 1) * ids_per_partition + end_index = min(partition_number * ids_per_partition, total_ids) + + ids_to_process = all_ids[start_index:end_index] + + sacs = SingleAuditChecklist.objects.filter(id__in=ids_to_process).order_by("id") + + paginator = Paginator(sacs, page_size) + total_pages = ( + paginator.num_pages if pages is None else min(pages, paginator.num_pages) + ) + + logger.info( + f"Retrieving {sacs.count()} reports for partition {partition_number} of {total_partitions}" + ) + + for page_number in range(1, total_pages + 1): + try: + page = paginator.page(page_number) + logger.info( + f"Processing page {page_number} with {page.object_list.count()} reports." + ) + + # Extract sac values from the current page + sac_list = list(page.object_list) + clean_artifacts(sac_list) + + except PageNotAnInteger: + logger.error(f"Page number {page_number} is not an integer.") + except EmptyPage: + logger.info(f"No more pages to process after page {page_number}.") + break + except Exception as e: + logger.error(f"An error occurred while processing page {page_number}: {e}") diff --git a/backend/dissemination/test_remove_workbook_artifacts.py b/backend/dissemination/test_remove_workbook_artifacts.py index 6749897983..461b7e7ce7 100644 --- a/backend/dissemination/test_remove_workbook_artifacts.py +++ b/backend/dissemination/test_remove_workbook_artifacts.py @@ -1,47 +1,155 @@ -from django.test import TestCase -from unittest.mock import patch -from audit.models.models import ExcelFile, SingleAuditChecklist -from model_bakery import baker - -from dissemination.remove_workbook_artifacts import remove_workbook_artifacts - - -class RemovedWorkbookArtifactsTestCase(TestCase): - - @patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk") - def test_removed_workbook_artifacts_success(self, mock_delete_files_in_bulk): - sac = baker.make( - SingleAuditChecklist, - submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS, - report_id="test_report_id", - ) - - # Create ExcelFile instances - excel_file_1 = baker.make(ExcelFile, sac=sac, form_section="fake_section") - excel_file_2 = baker.make( - ExcelFile, sac=sac, form_section="another_fake_section" - ) - - remove_workbook_artifacts(sac) - - # Assert S3 bulk delete was called with the correct filenames - mock_delete_files_in_bulk.assert_called_once_with( - [ - f"excel/{sac.report_id}--{excel_file_1.form_section}.xlsx", - f"excel/{sac.report_id}--{excel_file_2.form_section}.xlsx", - ], - sac, - ) - - @patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk") - def test_removed_workbook_artifacts_no_files(self, mock_delete_files_in_bulk): - sac = baker.make( - SingleAuditChecklist, - submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS, - report_id="test_report_id", - ) - - remove_workbook_artifacts(sac) - - # Assert S3 bulk delete was not called - mock_delete_files_in_bulk.assert_not_called() +from django.test import TestCase +from unittest.mock import patch +from audit.models.models import ExcelFile, SingleAuditChecklist +from model_bakery import baker + +from dissemination.remove_workbook_artifacts import ( + clean_artifacts, + delete_workbooks, + remove_workbook_artifacts, +) + + +class RemovedWorkbookArtifactsTestCase(TestCase): + + @patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk") + def test_removed_workbook_artifacts_success(self, mock_delete_files_in_bulk): + sac = baker.make( + SingleAuditChecklist, + submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS, + report_id="test_report_id", + ) + + # Create ExcelFile instances + excel_file_1 = baker.make(ExcelFile, sac=sac, form_section="fake_section") + excel_file_2 = baker.make( + ExcelFile, sac=sac, form_section="another_fake_section" + ) + + remove_workbook_artifacts(sac) + + # Assert that the ExcelFile instances are not deleted + self.assertTrue(ExcelFile.objects.filter(sac=sac).exists()) + + # Assert S3 bulk delete was called with the correct filenames + mock_delete_files_in_bulk.assert_called_once_with( + [ + f"excel/{sac.report_id}--{excel_file_1.form_section}.xlsx", + f"excel/{sac.report_id}--{excel_file_2.form_section}.xlsx", + ], + sac, + ) + + @patch("dissemination.remove_workbook_artifacts.delete_files_in_bulk") + def test_removed_workbook_artifacts_no_files(self, mock_delete_files_in_bulk): + sac = baker.make( + SingleAuditChecklist, + submission_status=SingleAuditChecklist.STATUS.IN_PROGRESS, + report_id="test_report_id", + ) + + # Ensure no ExcelFile instances exist for this SAC + ExcelFile.objects.filter(sac=sac).delete() + + remove_workbook_artifacts(sac) + + # Assert that no ExcelFile instances exist + self.assertFalse(ExcelFile.objects.filter(sac=sac).exists()) + + # Assert S3 bulk delete was not called + mock_delete_files_in_bulk.assert_not_called() + + +class CleanArtifactsTestCase(TestCase): + + @patch("dissemination.remove_workbook_artifacts.batch_removal") + def test_clean_artifacts_success(self, mock_batch_removal): + # Create SAC instances + sac_1 = baker.make(SingleAuditChecklist, report_id="report_id_1") + sac_2 = baker.make(SingleAuditChecklist, report_id="report_id_2") + + # Create ExcelFile instances + excel_file_1 = baker.make(ExcelFile, sac=sac_1, form_section="section_1") + excel_file_2 = baker.make(ExcelFile, sac=sac_2, form_section="section_2") + + sac_list = [sac_1, sac_2] + clean_artifacts(sac_list) + + # Assert that the ExcelFile instances still exist (no deletion) + self.assertTrue(ExcelFile.objects.filter(sac__in=sac_list).exists()) + + # Assert S3 bulk delete was called with the correct filenames + mock_batch_removal.assert_called_once_with( + [ + f"excel/{sac_1.report_id}--{excel_file_1.form_section}.xlsx", + f"excel/{sac_2.report_id}--{excel_file_2.form_section}.xlsx", + ], + sac_list, + { + f"excel/{sac_1.report_id}--{excel_file_1.form_section}.xlsx": sac_1.report_id, + f"excel/{sac_2.report_id}--{excel_file_2.form_section}.xlsx": sac_2.report_id, + }, + ) + + @patch("dissemination.remove_workbook_artifacts.batch_removal") + def test_clean_artifacts_no_files(self, mock_batch_removal): + sac = baker.make(SingleAuditChecklist, report_id="test_report_id") + + # Ensure no ExcelFile instances exist for this SAC + ExcelFile.objects.filter(sac=sac).delete() + + clean_artifacts([sac]) + + # Assert that no ExcelFile instances exist + self.assertFalse(ExcelFile.objects.filter(sac=sac).exists()) + + # Assert S3 bulk delete was not called + mock_batch_removal.assert_not_called() + + +class DeleteWorkbooksTestCase(TestCase): + + def setUp(self): + # Common setup for SAC instances + self.sac_1 = baker.make(SingleAuditChecklist, id=1, report_id="report_1") + self.sac_2 = baker.make(SingleAuditChecklist, id=2, report_id="report_2") + # Create associated ExcelFile instances + self.excel_file_1 = baker.make( + ExcelFile, sac=self.sac_1, form_section="section_1" + ) + self.excel_file_2 = baker.make( + ExcelFile, sac=self.sac_2, form_section="section_2" + ) + # Update submission status to DISSEMINATED + self.sac_1.submission_status = SingleAuditChecklist.STATUS.DISSEMINATED + self.sac_2.submission_status = SingleAuditChecklist.STATUS.DISSEMINATED + self.sac_1.save() + self.sac_2.save() + + @patch("dissemination.remove_workbook_artifacts.clean_artifacts") + def test_delete_workbooks_single_page(self, mock_clean_artifacts): + """Test delete_workbooks with a single page of workbooks""" + delete_workbooks(partition_number=1, total_partitions=1, page_size=10, pages=1) + + mock_clean_artifacts.assert_called_once_with([self.sac_1, self.sac_2]) + + @patch("dissemination.remove_workbook_artifacts.clean_artifacts") + def test_delete_workbooks_multiple_pages(self, mock_clean_artifacts): + """Test delete_workbooks with multiple pages of workbooks""" + delete_workbooks(partition_number=1, total_partitions=1, page_size=1, pages=2) + + self.assertEqual(mock_clean_artifacts.call_count, 2) + + mock_clean_artifacts.assert_any_call([self.sac_1]) + mock_clean_artifacts.assert_any_call([self.sac_2]) + + @patch("dissemination.remove_workbook_artifacts.clean_artifacts") + def test_delete_workbooks_all_pages(self, mock_clean_artifacts): + """Test delete_workbooks with all pages of workbooks""" + + delete_workbooks(partition_number=1, total_partitions=1, page_size=1) + + self.assertEqual(mock_clean_artifacts.call_count, 2) + + mock_clean_artifacts.assert_any_call([self.sac_1]) + mock_clean_artifacts.assert_any_call([self.sac_2])