Skip to content

Commit

Permalink
Refactored BU Internal Invoice
Browse files Browse the repository at this point in the history
A new `bu_internal_invoice.py` file has been added.
Note that the BU Internal invoice depends on the billable invoice.
The test case for the BU Internal invoice has been modified appropriately

A new function `apply_flat_discount()` has been added, containing logic
that is used by the functions to apply the new-pi credit and the BU subsidy.
More info on this function can be found in its docstring
  • Loading branch information
QuanMPhm committed Jul 17, 2024
1 parent e4e11a1 commit 14df6c0
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 87 deletions.
37 changes: 15 additions & 22 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dataclasses import dataclass
from decimal import Decimal
import logging
import sys

Expand Down Expand Up @@ -72,11 +71,11 @@ def _apply_credits_new_pi(
):
new_pi_credit_code = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA10SXM4"]

data[invoice.CREDIT_FIELD] = None
data[invoice.CREDIT_CODE_FIELD] = None
data[invoice.BALANCE_FIELD] = Decimal(0)
data[invoice.BALANCE_FIELD] = data[invoice.COST_FIELD]

current_pi_set = set(data[invoice.PI_FIELD])
invoice_month = data[invoice.INVOICE_DATE_FIELD].iat[0]
Expand All @@ -89,7 +88,10 @@ def _apply_credits_new_pi(
print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")

for pi in current_pi_set:
pi_projects = data[data[invoice.PI_FIELD] == pi]
pi_projects = data[
(data[invoice.PI_FIELD] == pi)
& ~(data[invoice.SU_TYPE_FIELD].isin(EXCLUDE_SU_TYPES))
]
pi_age = self._get_pi_age(old_pi_df, pi, invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
Expand Down Expand Up @@ -122,25 +124,16 @@ def _apply_credits_new_pi(
)
credit_used_field = invoice.PI_2ND_USED

initial_credit = remaining_credit
for i, row in pi_projects.iterrows():
if (
remaining_credit == 0
or row[invoice.SU_TYPE_FIELD] in EXCLUDE_SU_TYPES
):
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
project_cost = row[invoice.COST_FIELD]
applied_credit = min(project_cost, remaining_credit)

data.at[i, invoice.CREDIT_FIELD] = applied_credit
data.at[i, invoice.CREDIT_CODE_FIELD] = new_pi_credit_code
data.at[i, invoice.BALANCE_FIELD] = (
row[invoice.COST_FIELD] - applied_credit
)
remaining_credit -= applied_credit
credits_used = util.apply_flat_discount(
data,
pi_projects,
remaining_credit,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
invoice.CREDIT_CODE_FIELD,
new_pi_credit_code,
)

credits_used = initial_credit - remaining_credit
if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
Expand Down
63 changes: 63 additions & 0 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from dataclasses import dataclass
from decimal import Decimal

import process_report.invoices.invoice as invoice
import process_report.util as util


@dataclass
class BUInternalInvoice(invoice.Invoice):
subsidy_amount: int

def _prepare(self):
def get_project(row):
project_alloc = row[invoice.PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
self.data["Project"] = self.data.apply(get_project, axis=1)
self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
self.data = self.data[
[
invoice.INVOICE_DATE_FIELD,
invoice.PI_FIELD,
"Project",
invoice.COST_FIELD,
invoice.CREDIT_FIELD,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
]
]

def _process(self):
project_list = self.data["Project"].unique()
data_no_dup = self.data.drop_duplicates("Project", inplace=False)
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
for project in project_list:
project_mask = self.data["Project"] == project
no_dup_project_mask = data_no_dup["Project"] == project

sum_fields_sums = self.data[project_mask][sum_fields].sum().values
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

self.data = self._apply_subsidy(data_no_dup, self.subsidy_amount)

def _apply_subsidy(self, dataframe, subsidy_amount):
pi_list = dataframe[invoice.PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
util.apply_flat_discount(
dataframe,
pi_projects,
subsidy_amount,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
)

return dataframe
74 changes: 14 additions & 60 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
import sys
import datetime
from decimal import Decimal

import json
import pandas
Expand All @@ -13,6 +12,7 @@
lenovo_invoice,
nonbillable_invoice,
billable_invoice,
bu_internal_invoice,
)


Expand Down Expand Up @@ -166,7 +166,7 @@ def main():
parser.add_argument(
"--BU-invoice-file",
required=False,
default="BU_Internal.csv",
default="BU_Internal",
help="Name of output csv for BU invoices",
)
parser.add_argument(
Expand Down Expand Up @@ -270,8 +270,19 @@ def main():
bucket = get_invoice_bucket()
billable_inv.export_s3(bucket)

bu_internal_inv = bu_internal_invoice.BUInternalInvoice(
name=args.BU_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data,
subsidy_amount=args.BU_subsidy_amount,
)
bu_internal_inv.process()
bu_internal_inv.export()
if args.upload_to_s3:
bucket = get_invoice_bucket()
bu_internal_inv.export_s3(bucket)

export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
export_BU_only(billable_inv.data, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(billable_inv.data, args.HU_BU_invoice_file)

if args.upload_to_s3:
Expand Down Expand Up @@ -421,63 +432,6 @@ def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_mont
)


def export_BU_only(dataframe: pandas.DataFrame, output_file, subsidy_amount):
def get_project(row):
project_alloc = row[PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

BU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Boston University"].copy()
BU_projects["Project"] = BU_projects.apply(get_project, axis=1)
BU_projects[SUBSIDY_FIELD] = Decimal(0)
BU_projects = BU_projects[
[
INVOICE_DATE_FIELD,
PI_FIELD,
"Project",
COST_FIELD,
CREDIT_FIELD,
SUBSIDY_FIELD,
BALANCE_FIELD,
]
]

project_list = BU_projects["Project"].unique()
BU_projects_no_dup = BU_projects.drop_duplicates("Project", inplace=False)
sum_fields = [COST_FIELD, CREDIT_FIELD, BALANCE_FIELD]
for project in project_list:
project_mask = BU_projects["Project"] == project
no_dup_project_mask = BU_projects_no_dup["Project"] == project

sum_fields_sums = BU_projects[project_mask][sum_fields].sum().values
BU_projects_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

BU_projects_no_dup = _apply_subsidy(BU_projects_no_dup, subsidy_amount)
BU_projects_no_dup.to_csv(output_file)


def _apply_subsidy(dataframe, subsidy_amount):
pi_list = dataframe[PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[PI_FIELD] == pi]
remaining_subsidy = subsidy_amount
for i, row in pi_projects.iterrows():
project_remaining_cost = row[BALANCE_FIELD]
applied_subsidy = min(project_remaining_cost, remaining_subsidy)

dataframe.at[i, SUBSIDY_FIELD] = applied_subsidy
dataframe.at[i, BALANCE_FIELD] = row[BALANCE_FIELD] - applied_subsidy
remaining_subsidy -= applied_subsidy

if remaining_subsidy == 0:
break

return dataframe


def export_HU_BU(dataframe, output_file):
HU_BU_projects = dataframe[
(dataframe[INSTITUTION_FIELD] == "Harvard University")
Expand Down
9 changes: 5 additions & 4 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,13 +635,14 @@ def setUp(self):
], # Test case where subsidy does/doesn't cover fully balance
}
self.dataframe = pandas.DataFrame(data)
output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv")
self.output_file = output_file.name
self.subsidy = 100

def test_apply_BU_subsidy(self):
process_report.export_BU_only(self.dataframe, self.output_file, self.subsidy)
output_df = pandas.read_csv(self.output_file)
test_invoice = test_utils.new_bu_internal_invoice(
data=self.dataframe, subsidy_amount=self.subsidy
)
test_invoice.process()
output_df = test_invoice.data.reset_index()

self.assertTrue(
set(
Expand Down
10 changes: 9 additions & 1 deletion process_report/tests/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas

from process_report.invoices import billable_invoice
from process_report.invoices import billable_invoice, bu_internal_invoice


def new_billable_invoice(
Expand All @@ -19,3 +19,11 @@ def new_billable_invoice(
nonbillable_projects,
old_pi_filepath,
)


def new_bu_internal_invoice(
name="", invoice_month="0000-00", data=pandas.DataFrame(), subsidy_amount=0
):
return bu_internal_invoice.BUInternalInvoice(
name, invoice_month, data, subsidy_amount
)
54 changes: 54 additions & 0 deletions process_report/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging

import pandas

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -40,3 +41,56 @@ def get_month_diff(month_1, month_2):
dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)


def apply_flat_discount(
invoice: pandas.DataFrame,
pi_projects: pandas.DataFrame,
discount_amount: int,
discount_field: str,
balance_field: str,
code_field: str = None,
discount_code: str = None,
):
"""
Takes in an invoice and a list of PI projects that are a subset of it,
and applies a flat discount to those PI projects. Note that this function
will change the provided `invoice` Dataframe directly. Therefore, it does
not return the changed invoice.
This function assumes that the balance field shows the remaining cost of the project.
If the optional parameters `code_field` and `discount_code` are passed in,
`discount_code` will be comma-APPENDED to the `code_field` of projects where
the discount is applied
Returns the amount of discount used.
:param invoice: Dataframe containing all projects
:param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount
:param discount_amount: The discount given to the PI
:param discount_field: Name of the field to put the discount amount applied to each project
:param balance_field: Name of the balance field
:param code_field: Name of the discount code field
:param discount_code: Code of the discount
"""
remaining_discount_amount = discount_amount
for i, row in pi_projects.iterrows():
if remaining_discount_amount == 0:
break
else:
remaining_project_cost = row[balance_field]
applied_discount = min(remaining_project_cost, remaining_discount_amount)
invoice.at[i, discount_field] = applied_discount
invoice.at[i, balance_field] = row[balance_field] - applied_discount
remaining_discount_amount -= applied_discount

if code_field and discount_code:
if pandas.isna(invoice.at[i, code_field]):
invoice.at[i, code_field] = discount_code
else:
invoice.at[i, code_field] = (
invoice.at[i, code_field] + "," + discount_code
)

discount_used = discount_amount - remaining_discount_amount
return discount_used

0 comments on commit 14df6c0

Please sign in to comment.