Skip to content

Commit

Permalink
Implemented processors for removing nonbillables and validating billa…
Browse files Browse the repository at this point in the history
…ble PIs

Two new fields, which are used internally by the scripts and never exported,
have been added:
- IS_BILLABLE_FIELD: Boolean field for whether the project is billable
- MISSING_PI_FIELD: Boolean field that is True if PI name field is empty
  • Loading branch information
QuanMPhm committed Nov 7, 2024
1 parent 4f1ac15 commit c9db8d0
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 133 deletions.
30 changes: 3 additions & 27 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
PI_S3_FILEPATH = "PIs/PI.csv"

nonbillable_pis: list[str]
nonbillable_projects: list[str]

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand Down Expand Up @@ -62,26 +59,6 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:

return old_pi_df

@staticmethod
def _remove_nonbillables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return data[
~data[invoice.PI_FIELD].isin(nonbillable_pis)
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
]

@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return data[~pandas.isna(data[invoice.PI_FIELD])]

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
Expand All @@ -102,10 +79,9 @@ def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
return month_diff

def _prepare(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = self._validate_pi_names(self.data)
self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
Expand Down
5 changes: 5 additions & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
BALANCE_FIELD = "Balance"
###

### Internally used field names
IS_BILLABLE_FIELD = "Is Billable"
MISSING_PI_FIELD = "Missing PI"
###


@dataclass
class Invoice:
Expand Down
5 changes: 1 addition & 4 deletions process_report/invoices/nonbillable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,4 @@ class NonbillableInvoice(invoice.Invoice):
]

def _prepare_export(self):
self.data = self.data[
self.data[invoice.PI_FIELD].isin(self.nonbillable_pis)
| self.data[invoice.PROJECT_FIELD].isin(self.nonbillable_projects)
]
self.data = self.data[~self.data[invoice.IS_BILLABLE_FIELD]]
20 changes: 13 additions & 7 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
validate_pi_alias_processor,
add_institution_processor,
lenovo_processor,
validate_billable_pi_processor,
)

### PI file field names
Expand Down Expand Up @@ -221,19 +222,26 @@ def main():
)
lenovo_proc.process()

preliminary_processed_data = lenovo_proc.data
validate_billable_pi_proc = (
validate_billable_pi_processor.ValidateBillablePIsProcessor(
"", invoice_month, lenovo_proc.data, pi, projects
)
)
validate_billable_pi_proc.process()

processed_data = validate_billable_pi_proc.data

### Finish preliminary processing
### Initialize invoices

lenovo_inv = lenovo_invoice.LenovoInvoice(
name=args.Lenovo_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
data=processed_data.copy(),
)
nonbillable_inv = nonbillable_invoice.NonbillableInvoice(
name=args.nonbillable_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
data=processed_data.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
)
Expand All @@ -245,9 +253,7 @@ def main():
billable_inv = billable_invoice.BillableInvoice(
name=args.output_file,
invoice_month=invoice_month,
data=preliminary_processed_data.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
data=processed_data.copy(),
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
Expand Down
42 changes: 42 additions & 0 deletions process_report/processors/validate_billable_pi_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from dataclasses import dataclass
import logging

import pandas

from process_report.invoices import invoice
from process_report.processors import processor

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class ValidateBillablePIsProcessor(processor.Processor):
nonbillable_pis: list[str]
nonbillable_projects: list[str]

@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
if row[invoice.IS_BILLABLE_FIELD]:
logger.warning(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return pandas.isna(data[invoice.PI_FIELD])

@staticmethod
def _get_billables(
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
):
return ~data[invoice.PI_FIELD].isin(nonbillable_pis) & ~data[
invoice.PROJECT_FIELD
].isin(nonbillable_projects)

def _process(self):
self.data[invoice.IS_BILLABLE_FIELD] = self._get_billables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data[invoice.MISSING_PI_FIELD] = self._validate_pi_names(self.data)
139 changes: 48 additions & 91 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from textwrap import dedent

from process_report import process_report, util
from process_report.invoices import nonbillable_invoice
from process_report.tests import util as test_utils


Expand Down Expand Up @@ -59,71 +58,6 @@ def test_timed_projects(self):
self.assertEqual(excluded_projects, expected_projects)


class TestRemoveNonBillables(TestCase):
def setUp(self):
data = {
"Manager (PI)": ["PI1", "PI2", "PI3", "PI4", "PI5"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
"Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"],
}
self.dataframe = pandas.DataFrame(data)

self.pi_to_exclude = ["PI2", "PI3"]
self.projects_to_exclude = ["ProjectB", "ProjectD"]
self.nonbillable_invoice = nonbillable_invoice.NonbillableInvoice(
"Foo", "Foo", self.dataframe, self.pi_to_exclude, self.projects_to_exclude
)

self.output_file = tempfile.NamedTemporaryFile(delete=False)
self.output_file2 = tempfile.NamedTemporaryFile(delete=False)

def tearDown(self):
os.remove(self.output_file.name)
os.remove(self.output_file2.name)

def test_remove_billables(self):
self.nonbillable_invoice.process()
result_df = self.nonbillable_invoice.data

self.assertIn("PI2", result_df["Manager (PI)"].tolist())
self.assertIn("PI3", result_df["Manager (PI)"].tolist())
self.assertIn("PI4", result_df["Manager (PI)"].tolist())
self.assertIn("ProjectB", result_df["Project - Allocation"].tolist())
self.assertIn("ProjectC", result_df["Project - Allocation"].tolist())
self.assertIn("ProjectD", result_df["Project - Allocation"].tolist())

self.assertNotIn("PI1", result_df["Manager (PI)"].tolist())
self.assertNotIn("PI5", result_df["Manager (PI)"].tolist())
self.assertNotIn("ProjectA", result_df["Project - Allocation"].tolist())
self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())


class TestBillableInvoice(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

test_invoice = test_utils.new_billable_invoice()
data = test_invoice._remove_nonbillables(
data, nonbillable_pis, nonbillable_projects
)
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))


class TestMergeCSV(TestCase):
def setUp(self):
self.header = ["ID", "Name", "Age"]
Expand Down Expand Up @@ -281,6 +215,54 @@ def test_validate_alias(self):
self.assertTrue(answer_data.equals(validate_pi_alias_proc.data))


class TestRemoveNonbillablesProcessor(TestCase):
def test_remove_nonbillables(self):
pis = [uuid.uuid4().hex for x in range(10)]
projects = [uuid.uuid4().hex for x in range(10)]
nonbillable_pis = pis[:3]
nonbillable_projects = projects[7:]
billable_pis = pis[3:7]
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})

validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor(
data=data,
nonbillable_pis=nonbillable_pis,
nonbillable_projects=nonbillable_projects,
)
validate_billable_pi_proc.process()
data = validate_billable_pi_proc.data
data = data[data["Is Billable"]]
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
self.assertTrue(
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
)
self.assertTrue(data["Manager (PI)"].isin(billable_pis).all())


class TestValidateBillablePIProcessor(TestCase):
def test_validate_billables(self):
test_data = pandas.DataFrame(
{
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
)
self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])]))
validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor(
data=test_data
)
validate_billable_pi_proc.process()
output_data = validate_billable_pi_proc.data
output_data = output_data[~output_data["Missing PI"]]
self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])]))


class TestMonthUtils(TestCase):
def test_get_month_diff(self):
testcases = [
Expand Down Expand Up @@ -710,31 +692,6 @@ def test_apply_BU_subsidy(self):
self.assertEqual(50, output_df.loc[3, "Balance"])


class TestValidateBillables(TestCase):
def setUp(self):
data = {
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
"Project - Allocation": [
"ProjectA",
"ProjectB",
"ProjectC",
"ProjectD",
"ProjectE",
],
}
self.dataframe = pandas.DataFrame(data)

def test_validate_billables(self):
self.assertEqual(
1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
)
test_invoice = test_utils.new_billable_invoice()
validated_df = test_invoice._validate_pi_names(self.dataframe)
self.assertEqual(
0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
)


class TestLenovoProcessor(TestCase):
def test_process_lenovo(self):
test_invoice = pandas.DataFrame(
Expand Down
21 changes: 17 additions & 4 deletions process_report/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
add_institution_processor,
validate_pi_alias_processor,
lenovo_processor,
validate_billable_pi_processor,
)


Expand All @@ -26,17 +27,13 @@ def new_billable_invoice(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
old_pi_filepath="",
limit_new_pi_credit_to_partners=False,
):
return billable_invoice.BillableInvoice(
name,
invoice_month,
data,
nonbillable_pis,
nonbillable_projects,
old_pi_filepath,
limit_new_pi_credit_to_partners,
)
Expand Down Expand Up @@ -80,3 +77,19 @@ def new_validate_pi_alias_processor(

def new_lenovo_processor(name="", invoice_month="0000-00", data=pandas.DataFrame()):
return lenovo_processor.LenovoProcessor(name, invoice_month, data)


def new_validate_billable_pi_processor(
name="",
invoice_month="0000-00",
data=pandas.DataFrame(),
nonbillable_pis=[],
nonbillable_projects=[],
):
return validate_billable_pi_processor.ValidateBillablePIsProcessor(
name,
invoice_month,
data,
nonbillable_pis,
nonbillable_projects,
)

0 comments on commit c9db8d0

Please sign in to comment.