From e4e11a1d421c8dfac54f9fcf06c53b535d3aeabb Mon Sep 17 00:00:00 2001
From: Quan Pham <qmpham2019@gmail.com>
Date: Thu, 6 Jun 2024 22:55:44 +0300
Subject: [PATCH] Refactored billable invoice

The billable invoice now subclasses the Invoice base class.
The computations involved in preparation, namely filtering out
nonbillable projects and PIs, and validating PI names, have been
moved to `util.py`.

The function for applying the New-PI credit (`apply_credits_new_pi`)
is also moved there, and the I/O needed to read and write out
the PI file has been moved out of this functions for ease of testing.
---
 process_report/invoices/billable_invoice.py | 186 +++++++++++++++++++
 process_report/invoices/invoice.py          |   8 +
 process_report/process_report.py            | 189 +++-----------------
 process_report/tests/unit_tests.py          |  83 ++++-----
 process_report/tests/util.py                |  21 +++
 process_report/util.py                      |   7 +
 6 files changed, 285 insertions(+), 209 deletions(-)
 create mode 100644 process_report/invoices/billable_invoice.py
 create mode 100644 process_report/tests/util.py

diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py
new file mode 100644
index 0000000..58e65ac
--- /dev/null
+++ b/process_report/invoices/billable_invoice.py
@@ -0,0 +1,186 @@
+from dataclasses import dataclass
+from decimal import Decimal
+import logging
+import sys
+
+import pandas
+import pyarrow
+
+import process_report.invoices.invoice as invoice
+import process_report.util as util
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+@dataclass
+class BillableInvoice(invoice.Invoice):
+    nonbillable_pis: list[str]
+    nonbillable_projects: list[str]
+    old_pi_filepath: str
+
+    def _prepare(self):
+        self.data = self._remove_nonbillables(
+            self.data, self.nonbillable_pis, self.nonbillable_projects
+        )
+        self.data = self._validate_pi_names(self.data)
+
+    def _process(self):
+        old_pi_df = self._load_old_pis(self.old_pi_filepath)
+        self.data, updated_old_pi_df = self._apply_credits_new_pi(self.data, old_pi_df)
+        self._dump_old_pis(self.old_pi_filepath, updated_old_pi_df)
+
+    def _remove_nonbillables(
+        self,
+        data: pandas.DataFrame,
+        nonbillable_pis: list[str],
+        nonbillable_projects: list[str],
+    ):
+        return data[
+            ~data[invoice.PI_FIELD].isin(nonbillable_pis)
+            & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
+        ]
+
+    def _validate_pi_names(self, data: pandas.DataFrame):
+        invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
+        for i, row in invalid_pi_projects.iterrows():
+            logger.warn(
+                f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
+            )
+        return data[~pandas.isna(data[invoice.PI_FIELD])]
+
+    def _load_old_pis(self, old_pi_filepath) -> pandas.DataFrame:
+        try:
+            old_pi_df = pandas.read_csv(
+                old_pi_filepath,
+                dtype={
+                    invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
+                        pyarrow.decimal128(21, 2)
+                    ),
+                    invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                    invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                },
+            )
+        except FileNotFoundError:
+            sys.exit("Applying credit 0002 failed. Old PI file does not exist")
+
+        return old_pi_df
+
+    def _apply_credits_new_pi(
+        self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
+    ):
+        new_pi_credit_code = "0002"
+        INITIAL_CREDIT_AMOUNT = 1000
+        EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
+
+        data[invoice.CREDIT_FIELD] = None
+        data[invoice.CREDIT_CODE_FIELD] = None
+        data[invoice.BALANCE_FIELD] = Decimal(0)
+
+        current_pi_set = set(data[invoice.PI_FIELD])
+        invoice_month = data[invoice.INVOICE_DATE_FIELD].iat[0]
+        invoice_pis = old_pi_df[old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month]
+        if invoice_pis[invoice.PI_INITIAL_CREDITS].empty or pandas.isna(
+            new_pi_credit_amount := invoice_pis[invoice.PI_INITIAL_CREDITS].iat[0]
+        ):
+            new_pi_credit_amount = INITIAL_CREDIT_AMOUNT
+
+        print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")
+
+        for pi in current_pi_set:
+            pi_projects = data[data[invoice.PI_FIELD] == pi]
+            pi_age = self._get_pi_age(old_pi_df, pi, invoice_month)
+            pi_old_pi_entry = old_pi_df.loc[
+                old_pi_df[invoice.PI_PI_FIELD] == pi
+            ].squeeze()
+
+            if pi_age > 1:
+                for i, row in pi_projects.iterrows():
+                    data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
+            else:
+                if pi_age == 0:
+                    if len(pi_old_pi_entry) == 0:
+                        pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
+                        old_pi_df = pandas.concat(
+                            [
+                                pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
+                                old_pi_df,
+                            ],
+                            ignore_index=True,
+                        )
+                        pi_old_pi_entry = old_pi_df.loc[
+                            old_pi_df[invoice.PI_PI_FIELD] == pi
+                        ].squeeze()
+
+                    remaining_credit = new_pi_credit_amount
+                    credit_used_field = invoice.PI_1ST_USED
+                elif pi_age == 1:
+                    remaining_credit = (
+                        pi_old_pi_entry[invoice.PI_INITIAL_CREDITS]
+                        - pi_old_pi_entry[invoice.PI_1ST_USED]
+                    )
+                    credit_used_field = invoice.PI_2ND_USED
+
+                initial_credit = remaining_credit
+                for i, row in pi_projects.iterrows():
+                    if (
+                        remaining_credit == 0
+                        or row[invoice.SU_TYPE_FIELD] in EXCLUDE_SU_TYPES
+                    ):
+                        data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
+                    else:
+                        project_cost = row[invoice.COST_FIELD]
+                        applied_credit = min(project_cost, remaining_credit)
+
+                        data.at[i, invoice.CREDIT_FIELD] = applied_credit
+                        data.at[i, invoice.CREDIT_CODE_FIELD] = new_pi_credit_code
+                        data.at[i, invoice.BALANCE_FIELD] = (
+                            row[invoice.COST_FIELD] - applied_credit
+                        )
+                        remaining_credit -= applied_credit
+
+                credits_used = initial_credit - remaining_credit
+                if (pi_old_pi_entry[credit_used_field] != 0) and (
+                    credits_used != pi_old_pi_entry[credit_used_field]
+                ):
+                    print(
+                        f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
+                    )
+                old_pi_df.loc[
+                    old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
+                ] = credits_used
+
+        old_pi_df = old_pi_df.astype(
+            {
+                invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
+                    pyarrow.decimal128(21, 2)
+                ),
+                invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+                invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
+            },
+        )
+
+        return (data, old_pi_df)
+
+    def _dump_old_pis(self, old_pi_filepath, old_pi_df: pandas.DataFrame):
+        old_pi_df.to_csv(old_pi_filepath, index=False)
+
+    def _get_pi_age(self, old_pi_df: pandas.DataFrame, pi, invoice_month):
+        """Returns time difference between current invoice month and PI's first invoice month
+        I.e 0 for new PIs
+
+        Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
+        first_invoice_month = old_pi_df.loc[
+            old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
+        ]
+        if first_invoice_month.empty:
+            return 0
+
+        month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
+        if month_diff < 0:
+            sys.exit(
+                f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
+            )
+        else:
+            return month_diff
diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py
index 446a6f6..23c2371 100644
--- a/process_report/invoices/invoice.py
+++ b/process_report/invoices/invoice.py
@@ -4,6 +4,14 @@
 import process_report.util as util
 
 
+### PI file field names
+PI_PI_FIELD = "PI"
+PI_FIRST_MONTH = "First Invoice Month"
+PI_INITIAL_CREDITS = "Initial Credits"
+PI_1ST_USED = "1st Month Used"
+PI_2ND_USED = "2nd Month Used"
+###
+
 ### Invoice field names
 INVOICE_DATE_FIELD = "Invoice Month"
 PROJECT_FIELD = "Project - Allocation"
diff --git a/process_report/process_report.py b/process_report/process_report.py
index 959ba7c..53d8467 100644
--- a/process_report/process_report.py
+++ b/process_report/process_report.py
@@ -9,7 +9,11 @@
 import boto3
 import pyarrow
 
-from process_report.invoices import lenovo_invoice, nonbillable_invoice
+from process_report.invoices import (
+    lenovo_invoice,
+    nonbillable_invoice,
+    billable_invoice,
+)
 
 
 ### PI file field names
@@ -66,33 +70,6 @@ def load_institute_map() -> dict:
     return institute_map
 
 
-def load_old_pis(old_pi_file) -> pandas.DataFrame:
-    try:
-        old_pi_df = pandas.read_csv(
-            old_pi_file,
-            dtype={
-                PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-            },
-        )
-    except FileNotFoundError:
-        sys.exit("Applying credit 0002 failed. Old PI file does not exist")
-
-    return old_pi_df
-
-
-def dump_old_pis(old_pi_file, old_pi_df: pandas.DataFrame):
-    old_pi_df = old_pi_df.astype(
-        {
-            PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-            PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-            PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-        },
-    )
-    old_pi_df.to_csv(old_pi_file, index=False)
-
-
 def load_alias(alias_file):
     alias_dict = dict()
 
@@ -108,31 +85,6 @@ def load_alias(alias_file):
     return alias_dict
 
 
-def get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
-    """Returns time difference between current invoice month and PI's first invoice month
-    I.e 0 for new PIs
-
-    Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
-    first_invoice_month = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi, PI_FIRST_MONTH]
-    if first_invoice_month.empty:
-        return 0
-
-    month_diff = get_month_diff(invoice_month, first_invoice_month.iat[0])
-    if month_diff < 0:
-        sys.exit(
-            f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
-        )
-    else:
-        return month_diff
-
-
-def get_month_diff(month_1, month_2):
-    """Returns a positive integer if month_1 is ahead in time of month_2"""
-    dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
-    dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
-    return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)
-
-
 def get_invoice_bucket():
     try:
         s3_resource = boto3.resource(
@@ -202,7 +154,7 @@ def main():
     parser.add_argument(
         "--output-file",
         required=False,
-        default="filtered_output.csv",
+        default="filtered_output",
         help="Name of output file",
     )
     parser.add_argument(
@@ -301,22 +253,29 @@ def main():
             bucket = get_invoice_bucket()
             invoice.export_s3(bucket)
 
-    billable_projects = remove_non_billables(merged_dataframe, pi, projects)
-    billable_projects = validate_pi_names(billable_projects)
-
     if args.upload_to_s3:
         backup_to_s3_old_pi_file(old_pi_file)
-    credited_projects = apply_credits_new_pi(billable_projects, old_pi_file)
 
-    export_billables(credited_projects, args.output_file)
-    export_pi_billables(credited_projects, args.output_folder, invoice_month)
-    export_BU_only(billable_projects, args.BU_invoice_file, args.BU_subsidy_amount)
-    export_HU_BU(credited_projects, args.HU_BU_invoice_file)
+    billable_inv = billable_invoice.BillableInvoice(
+        name=args.output_file,
+        invoice_month=invoice_month,
+        data=merged_dataframe.copy(),
+        nonbillable_pis=pi,
+        nonbillable_projects=projects,
+        old_pi_filepath=old_pi_file,
+    )
+    billable_inv.process()
+    billable_inv.export()
+    if args.upload_to_s3:
+        bucket = get_invoice_bucket()
+        billable_inv.export_s3(bucket)
+
+    export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
+    export_BU_only(billable_inv.data, args.BU_invoice_file, args.BU_subsidy_amount)
+    export_HU_BU(billable_inv.data, args.HU_BU_invoice_file)
 
     if args.upload_to_s3:
-        invoice_list = [
-            args.output_file,
-        ]
+        invoice_list = list()
 
         for pi_invoice in os.listdir(args.output_folder):
             invoice_list.append(os.path.join(args.output_folder, pi_invoice))
@@ -385,23 +344,6 @@ def timed_projects(timed_projects_file, invoice_date):
     return dataframe[mask]["Project"].to_list()
 
 
-def remove_non_billables(dataframe, pi, projects):
-    """Removes projects and PIs that should not be billed from the dataframe"""
-    filtered_dataframe = dataframe[
-        ~dataframe[PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)
-    ]
-    return filtered_dataframe
-
-
-def validate_pi_names(dataframe):
-    invalid_pi_projects = dataframe[pandas.isna(dataframe[PI_FIELD])]
-    for i, row in invalid_pi_projects.iterrows():
-        print(f"Warning: Billable project {row[PROJECT_FIELD]} has empty PI field")
-    dataframe = dataframe[~pandas.isna(dataframe[PI_FIELD])]
-
-    return dataframe
-
-
 def validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict):
     for pi, pi_aliases in alias_dict.items():
         dataframe.loc[dataframe[PI_FIELD].isin(pi_aliases), PI_FIELD] = pi
@@ -416,89 +358,6 @@ def fetch_s3_alias_file():
     return local_name
 
 
-def apply_credits_new_pi(dataframe, old_pi_file):
-    new_pi_credit_code = "0002"
-    INITIAL_CREDIT_AMOUNT = 1000
-    EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
-
-    dataframe[CREDIT_FIELD] = None
-    dataframe[CREDIT_CODE_FIELD] = None
-    dataframe[BALANCE_FIELD] = Decimal(0)
-
-    old_pi_df = load_old_pis(old_pi_file)
-
-    current_pi_set = set(dataframe[PI_FIELD])
-    invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0]
-    invoice_pis = old_pi_df[old_pi_df[PI_FIRST_MONTH] == invoice_month]
-    if invoice_pis[PI_INITIAL_CREDITS].empty or pandas.isna(
-        new_pi_credit_amount := invoice_pis[PI_INITIAL_CREDITS].iat[0]
-    ):
-        new_pi_credit_amount = INITIAL_CREDIT_AMOUNT
-
-    print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")
-
-    for pi in current_pi_set:
-        pi_projects = dataframe[dataframe[PI_FIELD] == pi]
-        pi_age = get_pi_age(old_pi_df, pi, invoice_month)
-        pi_old_pi_entry = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi].squeeze()
-
-        if pi_age > 1:
-            for i, row in pi_projects.iterrows():
-                dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
-        else:
-            if pi_age == 0:
-                if len(pi_old_pi_entry) == 0:
-                    pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
-                    old_pi_df = pandas.concat(
-                        [
-                            pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
-                            old_pi_df,
-                        ],
-                        ignore_index=True,
-                    )
-                    pi_old_pi_entry = old_pi_df.loc[
-                        old_pi_df[PI_PI_FIELD] == pi
-                    ].squeeze()
-
-                remaining_credit = new_pi_credit_amount
-                credit_used_field = PI_1ST_USED
-            elif pi_age == 1:
-                remaining_credit = (
-                    pi_old_pi_entry[PI_INITIAL_CREDITS] - pi_old_pi_entry[PI_1ST_USED]
-                )
-                credit_used_field = PI_2ND_USED
-
-            initial_credit = remaining_credit
-            for i, row in pi_projects.iterrows():
-                if remaining_credit == 0 or row[SU_TYPE_FIELD] in EXCLUDE_SU_TYPES:
-                    dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
-                else:
-                    project_cost = row[COST_FIELD]
-                    applied_credit = min(project_cost, remaining_credit)
-
-                    dataframe.at[i, CREDIT_FIELD] = applied_credit
-                    dataframe.at[i, CREDIT_CODE_FIELD] = new_pi_credit_code
-                    dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - applied_credit
-                    remaining_credit -= applied_credit
-
-            credits_used = initial_credit - remaining_credit
-            if (
-                not pandas.isna(pi_old_pi_entry[credit_used_field])
-                and pi_old_pi_entry[credit_used_field] != 0
-                and pi_old_pi_entry[credit_used_field] != credits_used
-            ):
-                print(
-                    f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
-                )
-            old_pi_df.loc[
-                old_pi_df[PI_PI_FIELD] == pi, credit_used_field
-            ] = credits_used
-
-    dump_old_pis(old_pi_file, old_pi_df)
-
-    return dataframe
-
-
 def fetch_s3_old_pi_file():
     local_name = "PI.csv"
     invoice_bucket = get_invoice_bucket()
diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py
index 42295f3..f26b823 100644
--- a/process_report/tests/unit_tests.py
+++ b/process_report/tests/unit_tests.py
@@ -3,11 +3,13 @@
 import pandas
 import pyarrow
 import os
+import uuid
 import math
 from textwrap import dedent
 
-from process_report import process_report
+from process_report import process_report, util
 from process_report.invoices import lenovo_invoice, nonbillable_invoice
+from process_report.tests import util as test_utils
 
 
 class TestGetInvoiceDate(TestCase):
@@ -85,30 +87,6 @@ def tearDown(self):
         os.remove(self.output_file.name)
         os.remove(self.output_file2.name)
 
-    def test_remove_non_billables(self):
-        billables_df = process_report.remove_non_billables(
-            self.dataframe, self.pi_to_exclude, self.projects_to_exclude
-        )
-        process_report.export_billables(billables_df, self.output_file.name)
-
-        result_df = pandas.read_csv(self.output_file.name)
-
-        self.assertNotIn("PI2", result_df["Manager (PI)"].tolist())
-        self.assertNotIn("PI3", result_df["Manager (PI)"].tolist())
-        self.assertNotIn(
-            "PI4", result_df["Manager (PI)"].tolist()
-        )  # indirect because ProjectD was removed
-        self.assertNotIn("ProjectB", result_df["Project - Allocation"].tolist())
-        self.assertNotIn(
-            "ProjectC", result_df["Project - Allocation"].tolist()
-        )  # indirect because PI3 was removed
-        self.assertNotIn("ProjectD", result_df["Project - Allocation"].tolist())
-
-        self.assertIn("PI1", result_df["Manager (PI)"].tolist())
-        self.assertIn("PI5", result_df["Manager (PI)"].tolist())
-        self.assertIn("ProjectA", result_df["Project - Allocation"].tolist())
-        self.assertIn("ProjectE", result_df["Project - Allocation"].tolist())
-
     def test_remove_billables(self):
         self.nonbillable_invoice.process()
         result_df = self.nonbillable_invoice.data
@@ -126,6 +104,26 @@ def test_remove_billables(self):
         self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())
 
 
+class TestBillableInvoice(TestCase):
+    def test_remove_nonbillables(self):
+        pis = [uuid.uuid4().hex for x in range(10)]
+        projects = [uuid.uuid4().hex for x in range(10)]
+        nonbillable_pis = pis[:3]
+        nonbillable_projects = projects[7:]
+        billable_pis = pis[3:7]
+        data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})
+
+        test_invoice = test_utils.new_billable_invoice()
+        data = test_invoice._remove_nonbillables(
+            data, nonbillable_pis, nonbillable_projects
+        )
+        self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
+        self.assertTrue(
+            data[data["Project - Allocation"].isin(nonbillable_projects)].empty
+        )
+        self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))
+
+
 class TestMergeCSV(TestCase):
     def setUp(self):
         self.header = ["ID", "Name", "Age"]
@@ -285,9 +283,9 @@ def test_get_month_diff(self):
             (("2024-12", "2025-03"), -3),
         ]
         for arglist, answer in testcases:
-            self.assertEqual(process_report.get_month_diff(*arglist), answer)
+            self.assertEqual(util.get_month_diff(*arglist), answer)
         with self.assertRaises(ValueError):
-            process_report.get_month_diff("2024-16", "2025-03")
+            util.get_month_diff("2024-16", "2025-03")
 
 
 class TestCredit0002(TestCase):
@@ -551,26 +549,21 @@ def tearDown(self):
         os.remove(self.old_pi_no_gpu_file)
 
     def test_apply_credit_0002(self):
-        dataframe = process_report.apply_credits_new_pi(
-            self.dataframe, self.old_pi_file
+        test_invoice = test_utils.new_billable_invoice()
+        old_pi_df = test_invoice._load_old_pis(self.old_pi_file)
+        dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi(
+            self.dataframe, old_pi_df
         )
         dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"})
+        updated_old_pi_df = updated_old_pi_df.sort_values(by="PI", ignore_index=True)
         self.assertTrue(self.answer_dataframe.equals(dataframe))
-
-        old_pi_df_output = pandas.read_csv(
-            self.old_pi_file,
-            dtype={
-                "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-            },
-        ).sort_values(by=["PI"], ignore_index=True)
-
-        self.assertTrue(old_pi_df_output.equals(self.old_pi_df_answer))
+        self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df))
 
     def test_no_gpu(self):
-        dataframe = process_report.apply_credits_new_pi(
-            self.dataframe_no_gpu, self.old_pi_no_gpu_file
+        test_invoice = test_utils.new_billable_invoice()
+        old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file)
+        dataframe, _ = test_invoice._apply_credits_new_pi(
+            self.dataframe_no_gpu, old_pi_df
         )
         dataframe = dataframe.astype({"Credit": "float64", "Balance": "float64"})
         self.assertTrue(self.no_gpu_df_answer.equals(dataframe))
@@ -580,8 +573,9 @@ def test_apply_credit_error(self):
             {"PI": ["PI1"], "First Invoice Month": ["2024-04"]}
         )
         invoice_month = "2024-03"
+        test_invoice = test_utils.new_billable_invoice()
         with self.assertRaises(SystemExit):
-            process_report.get_pi_age(old_pi_df, "PI1", invoice_month)
+            test_invoice._get_pi_age(old_pi_df, "PI1", invoice_month)
 
 
 class TestBUSubsidy(TestCase):
@@ -714,7 +708,8 @@ def test_validate_billables(self):
         self.assertEqual(
             1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
         )
-        validated_df = process_report.validate_pi_names(self.dataframe)
+        test_invoice = test_utils.new_billable_invoice()
+        validated_df = test_invoice._validate_pi_names(self.dataframe)
         self.assertEqual(
             0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
         )
diff --git a/process_report/tests/util.py b/process_report/tests/util.py
new file mode 100644
index 0000000..2a04251
--- /dev/null
+++ b/process_report/tests/util.py
@@ -0,0 +1,21 @@
+import pandas
+
+from process_report.invoices import billable_invoice
+
+
+def new_billable_invoice(
+    name="",
+    invoice_month="0000-00",
+    data=pandas.DataFrame(),
+    nonbillable_pis=[],
+    nonbillable_projects=[],
+    old_pi_filepath="",
+):
+    return billable_invoice.BillableInvoice(
+        name,
+        invoice_month,
+        data,
+        nonbillable_pis,
+        nonbillable_projects,
+        old_pi_filepath,
+    )
diff --git a/process_report/util.py b/process_report/util.py
index 0853ed0..e6d2f21 100644
--- a/process_report/util.py
+++ b/process_report/util.py
@@ -33,3 +33,10 @@ def compare_invoice_month(month_1, month_2):
     dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
     dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
     return dt1 > dt2
+
+
+def get_month_diff(month_1, month_2):
+    """Returns a positive integer if month_1 is ahead in time of month_2"""
+    dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
+    dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
+    return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)