From 0a08feac1afa713930b8b2ed4adf8f139d98e447 Mon Sep 17 00:00:00 2001
From: Quan Pham <qmpham2019@gmail.com>
Date: Tue, 23 Jul 2024 10:18:34 -0400
Subject: [PATCH] Further internal refactoring of billable invoice

---
 process_report/invoices/billable_invoice.py | 176 +++++++++++---------
 process_report/process_report.py            |   8 +-
 process_report/tests/unit_tests.py          |  19 ++-
 3 files changed, 116 insertions(+), 87 deletions(-)

diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py
index 58e65ac..e0d97d3 100644
--- a/process_report/invoices/billable_invoice.py
+++ b/process_report/invoices/billable_invoice.py
@@ -16,23 +16,35 @@
 
 @dataclass
 class BillableInvoice(invoice.Invoice):
+    NEW_PI_CREDIT_CODE = "0002"
+    INITIAL_CREDIT_AMOUNT = 1000
+    EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
+    PI_S3_FILEPATH = "PIs/PI.csv"
+
     nonbillable_pis: list[str]
     nonbillable_projects: list[str]
     old_pi_filepath: str
 
-    def _prepare(self):
-        self.data = self._remove_nonbillables(
-            self.data, self.nonbillable_pis, self.nonbillable_projects
-        )
-        self.data = self._validate_pi_names(self.data)
+    @staticmethod
+    def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
+        try:
+            old_pi_df = pandas.read_csv(
+                old_pi_filepath,
+                dtype={
+                    invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
+                        pyarrow.decimal128(21, 2)
+                    ),
+                    invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                    invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                },
+            )
+        except FileNotFoundError:
+            sys.exit("Applying credit 0002 failed. Old PI file does not exist")
 
-    def _process(self):
-        old_pi_df = self._load_old_pis(self.old_pi_filepath)
-        self.data, updated_old_pi_df = self._apply_credits_new_pi(self.data, old_pi_df)
-        self._dump_old_pis(self.old_pi_filepath, updated_old_pi_df)
+        return old_pi_df
 
+    @staticmethod
     def _remove_nonbillables(
-        self,
         data: pandas.DataFrame,
         nonbillable_pis: list[str],
         nonbillable_projects: list[str],
@@ -42,7 +54,8 @@ def _remove_nonbillables(
             & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
         ]
 
-    def _validate_pi_names(self, data: pandas.DataFrame):
+    @staticmethod
+    def _validate_pi_names(data: pandas.DataFrame):
         invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
         for i, row in invalid_pi_projects.iterrows():
             logger.warn(
@@ -50,47 +63,88 @@ def _validate_pi_names(self, data: pandas.DataFrame):
             )
         return data[~pandas.isna(data[invoice.PI_FIELD])]
 
-    def _load_old_pis(self, old_pi_filepath) -> pandas.DataFrame:
-        try:
-            old_pi_df = pandas.read_csv(
-                old_pi_filepath,
-                dtype={
-                    invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
-                        pyarrow.decimal128(21, 2)
-                    ),
-                    invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                    invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                },
+    @staticmethod
+    def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
+        """Returns time difference between current invoice month and PI's first invoice month
+        I.e 0 for new PIs
+        Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
+        first_invoice_month = old_pi_df.loc[
+            old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
+        ]
+        if first_invoice_month.empty:
+            return 0
+
+        month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
+        if month_diff < 0:
+            sys.exit(
+                f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
             )
-        except FileNotFoundError:
-            sys.exit("Applying credit 0002 failed. Old PI file does not exist")
+        else:
+            return month_diff
 
-        return old_pi_df
+    def _prepare(self):
+        self.data = self._remove_nonbillables(
+            self.data, self.nonbillable_pis, self.nonbillable_projects
+        )
+        self.data = self._validate_pi_names(self.data)
+        self.data[invoice.CREDIT_FIELD] = None
+        self.data[invoice.CREDIT_CODE_FIELD] = None
+        self.data[invoice.BALANCE_FIELD] = Decimal(0)
+        self.old_pi_df = self._load_old_pis(self.old_pi_filepath)
+
+    def _process(self):
+        self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
+            self.data, self.old_pi_df
+        )
+
+    def _prepare_export(self):
+        self.updated_old_pi_df = self.updated_old_pi_df.astype(
+            {
+                invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
+                    pyarrow.decimal128(21, 2)
+                ),
+                invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+            },
+        )
+
+    def export(self):
+        super().export()
+        self.old_pi_df.to_csv(self.old_pi_filepath, index=False)
+
+    def export_s3(self, s3_bucket):
+        super().export_s3(s3_bucket)
+        s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)
 
     def _apply_credits_new_pi(
         self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
     ):
-        new_pi_credit_code = "0002"
-        INITIAL_CREDIT_AMOUNT = 1000
-        EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
-
-        data[invoice.CREDIT_FIELD] = None
-        data[invoice.CREDIT_CODE_FIELD] = None
-        data[invoice.BALANCE_FIELD] = Decimal(0)
-
-        current_pi_set = set(data[invoice.PI_FIELD])
-        invoice_month = data[invoice.INVOICE_DATE_FIELD].iat[0]
-        invoice_pis = old_pi_df[old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month]
-        if invoice_pis[invoice.PI_INITIAL_CREDITS].empty or pandas.isna(
-            new_pi_credit_amount := invoice_pis[invoice.PI_INITIAL_CREDITS].iat[0]
+        def get_initial_credit_amount(
+            old_pi_df, invoice_month, default_initial_credit_amount
         ):
-            new_pi_credit_amount = INITIAL_CREDIT_AMOUNT
-
-        print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")
+            first_month_processed_pis = old_pi_df[
+                old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
+            ]
+            if first_month_processed_pis[
+                invoice.PI_INITIAL_CREDITS
+            ].empty or pandas.isna(
+                new_pi_credit_amount := first_month_processed_pis[
+                    invoice.PI_INITIAL_CREDITS
+                ].iat[0]
+            ):
+                new_pi_credit_amount = default_initial_credit_amount
+
+            return new_pi_credit_amount
+
+        new_pi_credit_amount = get_initial_credit_amount(
+            old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
+        )
+        print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")
 
+        current_pi_set = set(data[invoice.PI_FIELD])
         for pi in current_pi_set:
             pi_projects = data[data[invoice.PI_FIELD] == pi]
-            pi_age = self._get_pi_age(old_pi_df, pi, invoice_month)
+            pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
             pi_old_pi_entry = old_pi_df.loc[
                 old_pi_df[invoice.PI_PI_FIELD] == pi
             ].squeeze()
@@ -101,7 +155,7 @@ def _apply_credits_new_pi(
             else:
                 if pi_age == 0:
                     if len(pi_old_pi_entry) == 0:
-                        pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
+                        pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
                         old_pi_df = pandas.concat(
                             [
                                 pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
@@ -126,7 +180,7 @@ def _apply_credits_new_pi(
                 for i, row in pi_projects.iterrows():
                     if (
                         remaining_credit == 0
-                        or row[invoice.SU_TYPE_FIELD] in EXCLUDE_SU_TYPES
+                        or row[invoice.SU_TYPE_FIELD] in self.EXCLUDE_SU_TYPES
                     ):
                         data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
                     else:
@@ -134,7 +188,7 @@ def _apply_credits_new_pi(
                         applied_credit = min(project_cost, remaining_credit)
 
                         data.at[i, invoice.CREDIT_FIELD] = applied_credit
-                        data.at[i, invoice.CREDIT_CODE_FIELD] = new_pi_credit_code
+                        data.at[i, invoice.CREDIT_CODE_FIELD] = self.NEW_PI_CREDIT_CODE
                         data.at[i, invoice.BALANCE_FIELD] = (
                             row[invoice.COST_FIELD] - applied_credit
                         )
@@ -151,36 +205,4 @@ def _apply_credits_new_pi(
                     old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
                 ] = credits_used
 
-        old_pi_df = old_pi_df.astype(
-            {
-                invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
-                    pyarrow.decimal128(21, 2)
-                ),
-                invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-            },
-        )
-
         return (data, old_pi_df)
-
-    def _dump_old_pis(self, old_pi_filepath, old_pi_df: pandas.DataFrame):
-        old_pi_df.to_csv(old_pi_filepath, index=False)
-
-    def _get_pi_age(self, old_pi_df: pandas.DataFrame, pi, invoice_month):
-        """Returns time difference between current invoice month and PI's first invoice month
-        I.e 0 for new PIs
-
-        Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
-        first_invoice_month = old_pi_df.loc[
-            old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
-        ]
-        if first_invoice_month.empty:
-            return 0
-
-        month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
-        if month_diff < 0:
-            sys.exit(
-                f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
-            )
-        else:
-            return month_diff
diff --git a/process_report/process_report.py b/process_report/process_report.py
index bec2695..c8516e0 100644
--- a/process_report/process_report.py
+++ b/process_report/process_report.py
@@ -154,7 +154,7 @@ def main():
     parser.add_argument(
         "--output-file",
         required=False,
-        default="filtered_output",
+        default="billable",
         help="Name of output file",
     )
     parser.add_argument(
@@ -282,7 +282,6 @@ def main():
 
         upload_to_s3(invoice_list, invoice_month)
         upload_to_s3_HU_BU(args.HU_BU_invoice_file, invoice_month)
-        upload_to_s3_old_pi_file(old_pi_file)
 
 
 def fetch_s3_invoices(invoice_month):
@@ -365,11 +364,6 @@ def fetch_s3_old_pi_file():
     return local_name
 
 
-def upload_to_s3_old_pi_file(old_pi_file):
-    invoice_bucket = get_invoice_bucket()
-    invoice_bucket.upload_file(old_pi_file, PI_S3_FILEPATH)
-
-
 def backup_to_s3_old_pi_file(old_pi_file):
     invoice_bucket = get_invoice_bucket()
     invoice_bucket.upload_file(old_pi_file, f"PIs/Archive/PI {get_iso8601_time()}.csv")
diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py
index f26b823..117412d 100644
--- a/process_report/tests/unit_tests.py
+++ b/process_report/tests/unit_tests.py
@@ -5,6 +5,7 @@
 import os
 import uuid
 import math
+from decimal import Decimal
 from textwrap import dedent
 
 from process_report import process_report, util
@@ -416,6 +417,9 @@ def setUp(self):
             "Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700],
         }
         self.dataframe = pandas.DataFrame(data)
+        self.dataframe["Credit"] = None
+        self.dataframe["Credit Code"] = None
+        self.dataframe["Balance"] = Decimal(0)
         self.answer_dataframe = pandas.DataFrame(answer_df_dict)
         old_pi = [
             "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
@@ -510,6 +514,9 @@ def setUp(self):
                 "Cost": [500, 100, 100, 500, 500],
             }
         )
+        self.dataframe_no_gpu["Credit"] = None
+        self.dataframe_no_gpu["Credit Code"] = None
+        self.dataframe_no_gpu["Balance"] = Decimal(0)
         old_pi_no_gpu = [
             "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
             "OldPI,2024-03,500,200,0",
@@ -549,18 +556,24 @@ def tearDown(self):
         os.remove(self.old_pi_no_gpu_file)
 
     def test_apply_credit_0002(self):
-        test_invoice = test_utils.new_billable_invoice()
+        test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03")
         old_pi_df = test_invoice._load_old_pis(self.old_pi_file)
         dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi(
             self.dataframe, old_pi_df
         )
         dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"})
-        updated_old_pi_df = updated_old_pi_df.sort_values(by="PI", ignore_index=True)
+        updated_old_pi_df = updated_old_pi_df.astype(
+            dtype={
+                "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+            },
+        ).sort_values(by=["PI"], ignore_index=True)
         self.assertTrue(self.answer_dataframe.equals(dataframe))
         self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df))
 
     def test_no_gpu(self):
-        test_invoice = test_utils.new_billable_invoice()
+        test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03")
         old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file)
         dataframe, _ = test_invoice._apply_credits_new_pi(
             self.dataframe_no_gpu, old_pi_df