From 018a36cd932749547a82c31a6b93406e12c8a817 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 18 Sep 2024 15:53:11 -0400 Subject: [PATCH] Implemented processor for Lenovo processing Note that, for now, only the Lenovo invoice will take the processed data from the `LenovoProcessor`. All other invoices will take the data from `AddInstituteProcessor`. This is due to the processors adding new columns. This odd code design will be removed once invoices gain the feature to filter out their exported columns. --- process_report/invoices/lenovo_invoice.py | 7 -- process_report/process_report.py | 8 ++- process_report/processors/lenovo_processor.py | 20 +----- process_report/tests/unit_tests.py | 71 ++++--------------- process_report/tests/util.py | 5 ++ 5 files changed, 28 insertions(+), 83 deletions(-) diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py index 80e6786..9e29c9d 100644 --- a/process_report/invoices/lenovo_invoice.py +++ b/process_report/invoices/lenovo_invoice.py @@ -6,7 +6,6 @@ @dataclass class LenovoInvoice(invoice.Invoice): LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] - SU_CHARGE_MULTIPLIER = 1 export_columns_list = [ invoice.INVOICE_DATE_FIELD, @@ -19,12 +18,6 @@ class LenovoInvoice(invoice.Invoice): ] exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"} - def _prepare(self): - self.data["SU Charge"] = self.SU_CHARGE_MULTIPLIER - - def _process(self): - self.data["Charge"] = self.data[invoice.SU_HOURS_FIELD] * self.data["SU Charge"] - def _prepare_export(self): self.data = self.data[ self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) diff --git a/process_report/process_report.py b/process_report/process_report.py index 33a80c3..fc766ce 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -17,6 +17,7 @@ from process_report.processors import ( validate_pi_alias_processor, add_institution_processor, + lenovo_processor, ) ### PI file field names @@ -214,10 +215,15 @@ def main(): ) add_institute_proc.process() + lenovo_proc = lenovo_processor.LenovoProcessor( + "", invoice_month, add_institute_proc.data + ) + lenovo_proc.process() + ### Finish preliminary processing lenovo_inv = lenovo_invoice.LenovoInvoice( - name=args.Lenovo_file, invoice_month=invoice_month, data=add_institute_proc.data + name=args.Lenovo_file, invoice_month=invoice_month, data=lenovo_proc.data ) nonbillable_inv = nonbillable_invoice.NonbillableInvoice( name=args.nonbillable_file, diff --git a/process_report/processors/lenovo_processor.py b/process_report/processors/lenovo_processor.py index 2a4b162..808e820 100644 --- a/process_report/processors/lenovo_processor.py +++ b/process_report/processors/lenovo_processor.py @@ -7,24 +7,8 @@ @dataclass class LenovoProcessor(processor.Processor): - LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] SU_CHARGE_MULTIPLIER = 1 - def _prepare(self): - self.data = self.data[ - self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) - ][ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PROJECT_FIELD, - invoice.INSTITUTION_FIELD, - invoice.SU_HOURS_FIELD, - invoice.SU_TYPE_FIELD, - ] - ].copy() - - self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) - self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) - def _process(self): - self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] + self.data["SU Charge"] = self.SU_CHARGE_MULTIPLIER + self.data["Charge"] = self.data[invoice.SU_HOURS_FIELD] * self.data["SU Charge"] diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 68ad2ca..b719df2 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -8,7 +8,7 @@ from textwrap import dedent from process_report import process_report, util -from process_report.invoices import lenovo_invoice, nonbillable_invoice +from process_report.invoices import nonbillable_invoice from process_report.tests import util as test_utils @@ -732,65 +732,22 @@ def test_validate_billables(self): ) -class TestExportLenovo(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - ], - "Institution": ["A", "B", "C", "D", "E", "F"], - "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], - "SU Type": [ - "OpenShift GPUA100SXM4", - "OpenShift GPUA100", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenStack CPU", - "OpenStack GPUK80", - ], - } - self.lenovo_invoice = lenovo_invoice.LenovoInvoice( - "Lenovo", "2023-01", pandas.DataFrame(data) - ) - self.lenovo_invoice.process() - +class TestLenovoProcessor(TestCase): def test_process_lenovo(self): - output_df = self.lenovo_invoice.data - self.assertTrue( - set( - [ - process_report.INVOICE_DATE_FIELD, - process_report.PROJECT_FIELD, - process_report.INSTITUTION_FIELD, - process_report.SU_TYPE_FIELD, - process_report.SU_HOURS_FIELD, - "SU Charge", - "Charge", - ] - ).issubset(output_df) + test_invoice = pandas.DataFrame( + { + "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], + } + ) + answer_invoice = test_invoice.copy() + answer_invoice["SU Charge"] = 1 + answer_invoice["Charge"] = ( + answer_invoice["SU Hours (GBhr or SUhr)"] * answer_invoice["SU Charge"] ) - for i, row in output_df.iterrows(): - self.assertIn( - row[process_report.SU_TYPE_FIELD], - ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"], - ) - self.assertEqual( - row["Charge"], row["SU Charge"] * row["SU Hours (GBhr or SUhr)"] - ) + lenovo_proc = test_utils.new_lenovo_processor(data=test_invoice) + lenovo_proc.process() + self.assertTrue(lenovo_proc.data.equals(answer_invoice)) class TestUploadToS3(TestCase): diff --git a/process_report/tests/util.py b/process_report/tests/util.py index d7ddef4..3a54f7b 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -10,6 +10,7 @@ from process_report.processors import ( add_institution_processor, validate_pi_alias_processor, + lenovo_processor, ) @@ -73,3 +74,7 @@ def new_validate_pi_alias_processor( return validate_pi_alias_processor.ValidatePIAliasProcessor( name, invoice_month, data, alias_map ) + + +def new_lenovo_processor(name="", invoice_month="0000-00", data=pandas.DataFrame()): + return lenovo_processor.LenovoProcessor(name, invoice_month, data)