From 3e2a7dc64720734a1fc4aac5e33e82097325ce03 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 18 Sep 2024 15:00:15 -0400 Subject: [PATCH] Allow invoice subclasses to explicitly declare their exported columns and column names Two class attributes, `export_columns_list` and `exported_columns_map`, has been added to `Invoice`, along with a class function `_filter_columns()`. Subclasses of `Invoice` must now define `export_columns_list`, containing the ordered list of columns that must be exported in their respective invoices. Subclasses can optional define `exported_columns_map`, containing mappings between "internal" column names and what their name should be when exported. The field name `RATE_FIELD` has been added to `invoice.py`. It was previously forgotten. --- process_report/invoices/NERC_total_invoice.py | 18 ++++++++++ process_report/invoices/billable_invoice.py | 19 +++++++++++ .../invoices/bu_internal_invoice.py | 21 ++++++------ process_report/invoices/invoice.py | 13 +++++++- process_report/invoices/lenovo_invoice.py | 33 ++++++++++--------- .../invoices/nonbillable_invoice.py | 15 +++++++++ .../invoices/pi_specific_invoice.py | 21 +++++++++++- process_report/tests/unit_tests.py | 23 +++++++++++-- 8 files changed, 132 insertions(+), 31 deletions(-) diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py index 9133333..335c52a 100644 --- a/process_report/invoices/NERC_total_invoice.py +++ b/process_report/invoices/NERC_total_invoice.py @@ -12,6 +12,24 @@ class NERCTotalInvoice(invoice.Invoice): "University of Rhode Island", ] + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + @property def output_path(self) -> str: return f"NERC-{self.invoice_month}-Total-Invoice.csv" diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index efaddd1..f007670 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -22,6 +22,25 @@ class BillableInvoice(discount_invoice.DiscountInvoice): nonbillable_pis: list[str] nonbillable_projects: list[str] + + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + old_pi_filepath: str limit_new_pi_credit_to_partners: bool = False diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index 8226b97..bc6f9c2 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -7,6 +7,16 @@ @dataclass class BUInternalInvoice(discount_invoice.DiscountInvoice): + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PI_FIELD, + "Project", + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.SUBSIDY_FIELD, + invoice.BALANCE_FIELD, + ] + subsidy_amount: int def _prepare(self): @@ -22,17 +32,6 @@ def get_project(row): ].copy() self.data["Project"] = self.data.apply(get_project, axis=1) self.data[invoice.SUBSIDY_FIELD] = Decimal(0) - self.data = self.data[ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PI_FIELD, - "Project", - invoice.COST_FIELD, - invoice.CREDIT_FIELD, - invoice.SUBSIDY_FIELD, - invoice.BALANCE_FIELD, - ] - ] def _process(self): data_summed_projects = self._sum_project_allocations(self.data) diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 8599ec8..7a199d4 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -23,6 +23,7 @@ INSTITUTION_ID_FIELD = "Institution - Specific Code" SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)" SU_TYPE_FIELD = "SU Type" +RATE_FIELD = "Rate" COST_FIELD = "Cost" CREDIT_FIELD = "Credit" CREDIT_CODE_FIELD = "Credit Code" @@ -33,6 +34,9 @@ @dataclass class Invoice: + export_columns_list = list() + exported_columns_map = dict() + name: str invoice_month: str data: pandas.DataFrame @@ -78,8 +82,15 @@ def _prepare_export(self): that should or should not be exported after processing.""" pass + def _filter_columns(self): + """Filters and renames columns before exporting""" + return self.data.copy()[self.export_columns_list].rename( + columns=self.exported_columns_map + ) + def export(self): - self.data.to_csv(self.output_path, index=False) + export_data = self._filter_columns() + export_data.to_csv(self.output_path, index=False) def export_s3(self, s3_bucket): s3_bucket.upload_file(self.output_path, self.output_s3_key) diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py index fa3355f..80e6786 100644 --- a/process_report/invoices/lenovo_invoice.py +++ b/process_report/invoices/lenovo_invoice.py @@ -8,21 +8,24 @@ class LenovoInvoice(invoice.Invoice): LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] SU_CHARGE_MULTIPLIER = 1 - def _prepare(self): - self.data = self.data[ - self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) - ][ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PROJECT_FIELD, - invoice.INSTITUTION_FIELD, - invoice.SU_HOURS_FIELD, - invoice.SU_TYPE_FIELD, - ] - ].copy() + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.INSTITUTION_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + "SU Charge", + "Charge", + ] + exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"} - self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) - self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) + def _prepare(self): + self.data["SU Charge"] = self.SU_CHARGE_MULTIPLIER def _process(self): - self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] + self.data["Charge"] = self.data[invoice.SU_HOURS_FIELD] * self.data["SU Charge"] + + def _prepare_export(self): + self.data = self.data[ + self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) + ] diff --git a/process_report/invoices/nonbillable_invoice.py b/process_report/invoices/nonbillable_invoice.py index 701d308..508745a 100644 --- a/process_report/invoices/nonbillable_invoice.py +++ b/process_report/invoices/nonbillable_invoice.py @@ -8,6 +8,21 @@ class NonbillableInvoice(invoice.Invoice): nonbillable_pis: list[str] nonbillable_projects: list[str] + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + ] + def _prepare_export(self): self.data = self.data[ self.data[invoice.PI_FIELD].isin(self.nonbillable_pis) diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index fc2bd63..6d52933 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -9,6 +9,24 @@ @dataclass class PIInvoice(invoice.Invoice): + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + def _prepare(self): self.pi_list = self.data[invoice.PI_FIELD].unique() @@ -16,12 +34,13 @@ def export(self): def _export_pi_invoice(pi): if pandas.isna(pi): return - pi_projects = self.data[self.data[invoice.PI_FIELD] == pi] + pi_projects = export_data[export_data[invoice.PI_FIELD] == pi] pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0] pi_projects.to_csv( f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv" ) + export_data = self._filter_columns() if not os.path.exists( self.name ): # self.name is name of folder storing invoices diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 9bce39f..1f6b47e 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -180,7 +180,10 @@ def setUp(self): self.dataframe = pandas.DataFrame(data) self.invoice_month = data["Invoice Month"][0] - def test_export_pi(self): + @mock.patch("process_report.invoices.invoice.Invoice._filter_columns") + def test_export_pi(self, mock_filter_cols): + mock_filter_cols.return_value = self.dataframe + output_dir = tempfile.TemporaryDirectory() pi_inv = test_utils.new_pi_specific_invoice( output_dir.name, invoice_month=self.invoice_month, data=self.dataframe @@ -773,7 +776,7 @@ def test_process_lenovo(self): process_report.PROJECT_FIELD, process_report.INSTITUTION_FIELD, process_report.SU_TYPE_FIELD, - "SU Hours", + process_report.SU_HOURS_FIELD, "SU Charge", "Charge", ] @@ -785,7 +788,9 @@ def test_process_lenovo(self): row[process_report.SU_TYPE_FIELD], ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"], ) - self.assertEqual(row["Charge"], row["SU Charge"] * row["SU Hours"]) + self.assertEqual( + row["Charge"], row["SU Charge"] * row["SU Hours (GBhr or SUhr)"] + ) class TestUploadToS3(TestCase): @@ -868,3 +873,15 @@ def test_flag_limit_new_pi_credit(self, mock_load_institute_list): output_df = sample_inv._filter_partners(sample_df) answer_df = pandas.DataFrame({"Institution": ["BU", "HU", "NEU"]}) self.assertTrue(output_df.equals(answer_df)) + + +class TestBaseInvoice(TestCase): + def test_filter_exported_columns(self): + test_invoice = pandas.DataFrame(columns=["C1", "C2", "C3", "C4", "C5"]) + answer_invoice = pandas.DataFrame(columns=["C1", "C3R", "C5R"]) + inv = test_utils.new_base_invoice(data=test_invoice) + inv.export_columns_list = ["C1", "C3", "C5"] + inv.exported_columns_map = {"C3": "C3R", "C5": "C5R"} + result_invoice = inv._filter_columns() + + self.assertTrue(result_invoice.equals(answer_invoice))