diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index 58e65ac..e0d97d3 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -16,23 +16,35 @@ @dataclass class BillableInvoice(invoice.Invoice): + NEW_PI_CREDIT_CODE = "0002" + INITIAL_CREDIT_AMOUNT = 1000 + EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] + PI_S3_FILEPATH = "PIs/PI.csv" + nonbillable_pis: list[str] nonbillable_projects: list[str] old_pi_filepath: str - def _prepare(self): - self.data = self._remove_nonbillables( - self.data, self.nonbillable_pis, self.nonbillable_projects - ) - self.data = self._validate_pi_names(self.data) + @staticmethod + def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: + try: + old_pi_df = pandas.read_csv( + old_pi_filepath, + dtype={ + invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( + pyarrow.decimal128(21, 2) + ), + invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + }, + ) + except FileNotFoundError: + sys.exit("Applying credit 0002 failed. Old PI file does not exist") - def _process(self): - old_pi_df = self._load_old_pis(self.old_pi_filepath) - self.data, updated_old_pi_df = self._apply_credits_new_pi(self.data, old_pi_df) - self._dump_old_pis(self.old_pi_filepath, updated_old_pi_df) + return old_pi_df + @staticmethod def _remove_nonbillables( - self, data: pandas.DataFrame, nonbillable_pis: list[str], nonbillable_projects: list[str], @@ -42,7 +54,8 @@ def _remove_nonbillables( & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects) ] - def _validate_pi_names(self, data: pandas.DataFrame): + @staticmethod + def _validate_pi_names(data: pandas.DataFrame): invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])] for i, row in invalid_pi_projects.iterrows(): logger.warn( @@ -50,47 +63,88 @@ def _validate_pi_names(self, data: pandas.DataFrame): ) return data[~pandas.isna(data[invoice.PI_FIELD])] - def _load_old_pis(self, old_pi_filepath) -> pandas.DataFrame: - try: - old_pi_df = pandas.read_csv( - old_pi_filepath, - dtype={ - invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( - pyarrow.decimal128(21, 2) - ), - invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, + @staticmethod + def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): + """Returns time difference between current invoice month and PI's first invoice month + I.e 0 for new PIs + Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" + first_invoice_month = old_pi_df.loc[ + old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH + ] + if first_invoice_month.empty: + return 0 + + month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0]) + if month_diff < 0: + sys.exit( + f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" ) - except FileNotFoundError: - sys.exit("Applying credit 0002 failed. Old PI file does not exist") + else: + return month_diff - return old_pi_df + def _prepare(self): + self.data = self._remove_nonbillables( + self.data, self.nonbillable_pis, self.nonbillable_projects + ) + self.data = self._validate_pi_names(self.data) + self.data[invoice.CREDIT_FIELD] = None + self.data[invoice.CREDIT_CODE_FIELD] = None + self.data[invoice.BALANCE_FIELD] = Decimal(0) + self.old_pi_df = self._load_old_pis(self.old_pi_filepath) + + def _process(self): + self.data, self.updated_old_pi_df = self._apply_credits_new_pi( + self.data, self.old_pi_df + ) + + def _prepare_export(self): + self.updated_old_pi_df = self.updated_old_pi_df.astype( + { + invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( + pyarrow.decimal128(21, 2) + ), + invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + }, + ) + + def export(self): + super().export() + self.old_pi_df.to_csv(self.old_pi_filepath, index=False) + + def export_s3(self, s3_bucket): + super().export_s3(s3_bucket) + s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH) def _apply_credits_new_pi( self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame ): - new_pi_credit_code = "0002" - INITIAL_CREDIT_AMOUNT = 1000 - EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] - - data[invoice.CREDIT_FIELD] = None - data[invoice.CREDIT_CODE_FIELD] = None - data[invoice.BALANCE_FIELD] = Decimal(0) - - current_pi_set = set(data[invoice.PI_FIELD]) - invoice_month = data[invoice.INVOICE_DATE_FIELD].iat[0] - invoice_pis = old_pi_df[old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month] - if invoice_pis[invoice.PI_INITIAL_CREDITS].empty or pandas.isna( - new_pi_credit_amount := invoice_pis[invoice.PI_INITIAL_CREDITS].iat[0] + def get_initial_credit_amount( + old_pi_df, invoice_month, default_initial_credit_amount ): - new_pi_credit_amount = INITIAL_CREDIT_AMOUNT - - print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}") + first_month_processed_pis = old_pi_df[ + old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month + ] + if first_month_processed_pis[ + invoice.PI_INITIAL_CREDITS + ].empty or pandas.isna( + new_pi_credit_amount := first_month_processed_pis[ + invoice.PI_INITIAL_CREDITS + ].iat[0] + ): + new_pi_credit_amount = default_initial_credit_amount + + return new_pi_credit_amount + + new_pi_credit_amount = get_initial_credit_amount( + old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT + ) + print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}") + current_pi_set = set(data[invoice.PI_FIELD]) for pi in current_pi_set: pi_projects = data[data[invoice.PI_FIELD] == pi] - pi_age = self._get_pi_age(old_pi_df, pi, invoice_month) + pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month) pi_old_pi_entry = old_pi_df.loc[ old_pi_df[invoice.PI_PI_FIELD] == pi ].squeeze() @@ -101,7 +155,7 @@ def _apply_credits_new_pi( else: if pi_age == 0: if len(pi_old_pi_entry) == 0: - pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0] + pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0] old_pi_df = pandas.concat( [ pandas.DataFrame([pi_entry], columns=old_pi_df.columns), @@ -126,7 +180,7 @@ def _apply_credits_new_pi( for i, row in pi_projects.iterrows(): if ( remaining_credit == 0 - or row[invoice.SU_TYPE_FIELD] in EXCLUDE_SU_TYPES + or row[invoice.SU_TYPE_FIELD] in self.EXCLUDE_SU_TYPES ): data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD] else: @@ -134,7 +188,7 @@ def _apply_credits_new_pi( applied_credit = min(project_cost, remaining_credit) data.at[i, invoice.CREDIT_FIELD] = applied_credit - data.at[i, invoice.CREDIT_CODE_FIELD] = new_pi_credit_code + data.at[i, invoice.CREDIT_CODE_FIELD] = self.NEW_PI_CREDIT_CODE data.at[i, invoice.BALANCE_FIELD] = ( row[invoice.COST_FIELD] - applied_credit ) @@ -151,36 +205,4 @@ def _apply_credits_new_pi( old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field ] = credits_used - old_pi_df = old_pi_df.astype( - { - invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( - pyarrow.decimal128(21, 2) - ), - invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ) - return (data, old_pi_df) - - def _dump_old_pis(self, old_pi_filepath, old_pi_df: pandas.DataFrame): - old_pi_df.to_csv(old_pi_filepath, index=False) - - def _get_pi_age(self, old_pi_df: pandas.DataFrame, pi, invoice_month): - """Returns time difference between current invoice month and PI's first invoice month - I.e 0 for new PIs - - Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" - first_invoice_month = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH - ] - if first_invoice_month.empty: - return 0 - - month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0]) - if month_diff < 0: - sys.exit( - f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" - ) - else: - return month_diff diff --git a/process_report/process_report.py b/process_report/process_report.py index bec2695..c8516e0 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -154,7 +154,7 @@ def main(): parser.add_argument( "--output-file", required=False, - default="filtered_output", + default="billable", help="Name of output file", ) parser.add_argument( @@ -282,7 +282,6 @@ def main(): upload_to_s3(invoice_list, invoice_month) upload_to_s3_HU_BU(args.HU_BU_invoice_file, invoice_month) - upload_to_s3_old_pi_file(old_pi_file) def fetch_s3_invoices(invoice_month): @@ -365,11 +364,6 @@ def fetch_s3_old_pi_file(): return local_name -def upload_to_s3_old_pi_file(old_pi_file): - invoice_bucket = get_invoice_bucket() - invoice_bucket.upload_file(old_pi_file, PI_S3_FILEPATH) - - def backup_to_s3_old_pi_file(old_pi_file): invoice_bucket = get_invoice_bucket() invoice_bucket.upload_file(old_pi_file, f"PIs/Archive/PI {get_iso8601_time()}.csv") diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index f26b823..117412d 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -5,6 +5,7 @@ import os import uuid import math +from decimal import Decimal from textwrap import dedent from process_report import process_report, util @@ -416,6 +417,9 @@ def setUp(self): "Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700], } self.dataframe = pandas.DataFrame(data) + self.dataframe["Credit"] = None + self.dataframe["Credit Code"] = None + self.dataframe["Balance"] = Decimal(0) self.answer_dataframe = pandas.DataFrame(answer_df_dict) old_pi = [ "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", @@ -510,6 +514,9 @@ def setUp(self): "Cost": [500, 100, 100, 500, 500], } ) + self.dataframe_no_gpu["Credit"] = None + self.dataframe_no_gpu["Credit Code"] = None + self.dataframe_no_gpu["Balance"] = Decimal(0) old_pi_no_gpu = [ "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", "OldPI,2024-03,500,200,0", @@ -549,18 +556,24 @@ def tearDown(self): os.remove(self.old_pi_no_gpu_file) def test_apply_credit_0002(self): - test_invoice = test_utils.new_billable_invoice() + test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") old_pi_df = test_invoice._load_old_pis(self.old_pi_file) dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi( self.dataframe, old_pi_df ) dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"}) - updated_old_pi_df = updated_old_pi_df.sort_values(by="PI", ignore_index=True) + updated_old_pi_df = updated_old_pi_df.astype( + dtype={ + "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + }, + ).sort_values(by=["PI"], ignore_index=True) self.assertTrue(self.answer_dataframe.equals(dataframe)) self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df)) def test_no_gpu(self): - test_invoice = test_utils.new_billable_invoice() + test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file) dataframe, _ = test_invoice._apply_credits_new_pi( self.dataframe_no_gpu, old_pi_df