Skip to content

Commit

Permalink
Implemented processors for removing nonbillables and validating billa…
Browse files Browse the repository at this point in the history
…ble PIs
  • Loading branch information
QuanMPhm committed Nov 6, 2024
1 parent 4f1ac15 commit 85ad7f7
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 133 deletions.
28 changes: 1 addition & 27 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
PI_S3_FILEPATH = "PIs/PI.csv"

nonbillable_pis: list[str]
nonbillable_projects: list[str]

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand Down Expand Up @@ -62,26 +59,6 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:

return old_pi_df

@staticmethod
def _remove_nonbillables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return data[
~data[invoice.PI_FIELD].isin(nonbillable_pis)
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
]

@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return data[~pandas.isna(data[invoice.PI_FIELD])]

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
Expand All @@ -102,10 +79,7 @@ def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
return month_diff

def _prepare(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = self._validate_pi_names(self.data)
self.data = self.data[self.data["Is Billable"] & ~self.data["Missing PI"]]
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
Expand Down
5 changes: 1 addition & 4 deletions process_report/invoices/nonbillable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,4 @@ class NonbillableInvoice(invoice.Invoice):
]

def _prepare_export(self):
self.data = self.data[
self.data[invoice.PI_FIELD].isin(self.nonbillable_pis)
| self.data[invoice.PROJECT_FIELD].isin(self.nonbillable_projects)
]
self.data = self.data[~self.data["Is Billable"]]
20 changes: 13 additions & 7 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
validate_pi_alias_processor,
add_institution_processor,
lenovo_processor,
validate_billable_pi_processor,
)

### PI file field names
Expand Down Expand Up @@ -221,19 +222,26 @@ def main():
)
lenovo_proc.process()

preliminary_processed_data = lenovo_proc.data
validate_billable_pi_proc = (
validate_billable_pi_processor.ValidateBillablePIsProcessor(
"", invoice_month, lenovo_proc.data, pi, projects
)
)
validate_billable_pi_proc.process()

processed_data = validate_billable_pi_proc.data

### Finish preliminary processing
### Initialize invoices

lenovo_inv = lenovo_invoice.LenovoInvoice(
name=args.Lenovo_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
data=processed_data.copy(),
)
nonbillable_inv = nonbillable_invoice.NonbillableInvoice(
name=args.nonbillable_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
data=processed_data.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
)
Expand All @@ -245,9 +253,7 @@ def main():
billable_inv = billable_invoice.BillableInvoice(
name=args.output_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
data=processed_data.copy(),
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
Expand Down
41 changes: 41 additions & 0 deletions process_report/processors/validate_billable_pi_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from dataclasses import dataclass
import logging

import pandas

from process_report.invoices import invoice
from process_report.processors import processor

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class ValidateBillablePIsProcessor(processor.Processor):
nonbillable_pis: list[str]
nonbillable_projects: list[str]

@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return pandas.isna(data[invoice.PI_FIELD])

@staticmethod
def _get_billables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return ~data[invoice.PI_FIELD].isin(nonbillable_pis) & ~data[
invoice.PROJECT_FIELD
].isin(nonbillable_projects)

def _process(self):
self.data["Missing PI"] = self._validate_pi_names(self.data)
self.data["Is Billable"] = self._get_billables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
139 changes: 48 additions & 91 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from textwrap import dedent

from process_report import process_report, util
from process_report.invoices import nonbillable_invoice
from process_report.tests import util as test_utils


Expand Down Expand Up @@ -59,71 +58,6 @@ def test_timed_projects(self):
self.assertEqual(excluded_projects, expected_projects)


class TestRemoveNonBillables(TestCase):
def setUp(self):
data = {
"Manager (PI)": ["PI1", "PI2", "PI3", "PI4", "PI5"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
"Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"],
}
self.dataframe = pandas.DataFrame(data)

self.pi_to_exclude = ["PI2", "PI3"]
self.projects_to_exclude = ["ProjectB", "ProjectD"]
self.nonbillable_invoice = nonbillable_invoice.NonbillableInvoice(
"Foo", "Foo", self.dataframe, self.pi_to_exclude, self.projects_to_exclude
)

self.output_file = tempfile.NamedTemporaryFile(delete=False)
self.output_file2 = tempfile.NamedTemporaryFile(delete=False)

def tearDown(self):
os.remove(self.output_file.name)
os.remove(self.output_file2.name)

def test_remove_billables(self):
self.nonbillable_invoice.process()
result_df = self.nonbillable_invoice.data

self.assertIn("PI2", result_df["Manager (PI)"].tolist())
self.assertIn("PI3", result_df["Manager (PI)"].tolist())
self.assertIn("PI4", result_df["Manager (PI)"].tolist())
self.assertIn("ProjectB", result_df["Project - Allocation"].tolist())
self.assertIn("ProjectC", result_df["Project - Allocation"].tolist())
self.assertIn("ProjectD", result_df["Project - Allocation"].tolist())

self.assertNotIn("PI1", result_df["Manager (PI)"].tolist())
self.assertNotIn("PI5", result_df["Manager (PI)"].tolist())
self.assertNotIn("ProjectA", result_df["Project - Allocation"].tolist())
self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())


class TestBillableInvoice(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

test_invoice = test_utils.new_billable_invoice()
data = test_invoice._remove_nonbillables(
data, nonbillable_pis, nonbillable_projects
)
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))


class TestMergeCSV(TestCase):
def setUp(self):
self.header = ["ID", "Name", "Age"]
Expand Down Expand Up @@ -281,6 +215,54 @@ def test_validate_alias(self):
self.assertTrue(answer_data.equals(validate_pi_alias_proc.data))


class TestRemoveNonbillablesProcessor(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor(
data=data,
nonbillable_pis=nonbillable_pis,
nonbillable_projects=nonbillable_projects,
)
validate_billable_pi_proc.process()
data = validate_billable_pi_proc.data
data = data[data["Is Billable"]]
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data["Manager (PI)"].isin(billable_pis).all())


class TestValidateBillablePIProcessor(TestCase):
def test_validate_billables(self):
test_data = pandas.DataFrame(
{
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
)
self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])]))
validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor(
data=test_data
)
validate_billable_pi_proc.process()
output_data = validate_billable_pi_proc.data
output_data = output_data[~output_data["Missing PI"]]
self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])]))


class TestMonthUtils(TestCase):
def test_get_month_diff(self):
testcases = [
Expand Down Expand Up @@ -710,31 +692,6 @@ def test_apply_BU_subsidy(self):
self.assertEqual(50, output_df.loc[3, "Balance"])


class TestValidateBillables(TestCase):
def setUp(self):
data = {
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
self.dataframe = pandas.DataFrame(data)

def test_validate_billables(self):
self.assertEqual(
1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
)
test_invoice = test_utils.new_billable_invoice()
validated_df = test_invoice._validate_pi_names(self.dataframe)
self.assertEqual(
0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
)


class TestLenovoProcessor(TestCase):
def test_process_lenovo(self):
test_invoice = pandas.DataFrame(
Expand Down
21 changes: 17 additions & 4 deletions process_report/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
add_institution_processor,
validate_pi_alias_processor,
lenovo_processor,
validate_billable_pi_processor,
)


Expand All @@ -26,17 +27,13 @@ def new_billable_invoice(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
old_pi_filepath="",
limit_new_pi_credit_to_partners=False,
):
return billable_invoice.BillableInvoice(
name,
invoice_month,
data,
nonbillable_pis,
nonbillable_projects,
old_pi_filepath,
limit_new_pi_credit_to_partners,
)
Expand Down Expand Up @@ -80,3 +77,19 @@ def new_validate_pi_alias_processor(

def new_lenovo_processor(name="", invoice_month="0000-00", data=pandas.DataFrame()):
return lenovo_processor.LenovoProcessor(name, invoice_month, data)


def new_validate_billable_pi_processor(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
):
return validate_billable_pi_processor.ValidateBillablePIsProcessor(
name,
invoice_month,
data,
nonbillable_pis,
nonbillable_projects,
)

0 comments on commit 85ad7f7

Please sign in to comment.