Skip to content

Commit

Permalink
Refactored BU Internal Invoice
Browse files Browse the repository at this point in the history
A new `bu_internal_invoice.py` file has been added.
Note that the BU Internal invoice depends on the billable invoice.
The test case for the BU Internal invoice has been modified appropriately
  • Loading branch information
QuanMPhm committed Jul 15, 2024
1 parent e4e11a1 commit 758bb8c
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 65 deletions.
68 changes: 68 additions & 0 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from dataclasses import dataclass
from decimal import Decimal

import process_report.invoices.invoice as invoice


@dataclass
class BUInternalInvoice(invoice.Invoice):
subsidy_amount: int

def _prepare(self):
def get_project(row):
project_alloc = row[invoice.PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
self.data["Project"] = self.data.apply(get_project, axis=1)
self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
self.data = self.data[
[
invoice.INVOICE_DATE_FIELD,
invoice.PI_FIELD,
"Project",
invoice.COST_FIELD,
invoice.CREDIT_FIELD,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
]
]

def _process(self):
project_list = self.data["Project"].unique()
data_no_dup = self.data.drop_duplicates("Project", inplace=False)
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
for project in project_list:
project_mask = self.data["Project"] == project
no_dup_project_mask = data_no_dup["Project"] == project

sum_fields_sums = self.data[project_mask][sum_fields].sum().values
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

self.data = self._apply_subsidy(data_no_dup, self.subsidy_amount)

def _apply_subsidy(self, dataframe, subsidy_amount):
pi_list = dataframe[invoice.PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
remaining_subsidy = subsidy_amount
for i, row in pi_projects.iterrows():
project_remaining_cost = row[invoice.BALANCE_FIELD]
applied_subsidy = min(project_remaining_cost, remaining_subsidy)

dataframe.at[i, invoice.SUBSIDY_FIELD] = applied_subsidy
dataframe.at[i, invoice.BALANCE_FIELD] = (
row[invoice.BALANCE_FIELD] - applied_subsidy
)
remaining_subsidy -= applied_subsidy

if remaining_subsidy == 0:
break

return dataframe
74 changes: 14 additions & 60 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
import sys
import datetime
from decimal import Decimal

import json
import pandas
Expand All @@ -13,6 +12,7 @@
lenovo_invoice,
nonbillable_invoice,
billable_invoice,
bu_internal_invoice,
)


Expand Down Expand Up @@ -166,7 +166,7 @@ def main():
parser.add_argument(
"--BU-invoice-file",
required=False,
default="BU_Internal.csv",
default="BU_Internal",
help="Name of output csv for BU invoices",
)
parser.add_argument(
Expand Down Expand Up @@ -270,8 +270,19 @@ def main():
bucket = get_invoice_bucket()
billable_inv.export_s3(bucket)

bu_internal_inv = bu_internal_invoice.BUInternalInvoice(
name=args.BU_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data,
subsidy_amount=args.BU_subsidy_amount,
)
bu_internal_inv.process()
bu_internal_inv.export()
if args.upload_to_s3:
bucket = get_invoice_bucket()
bu_internal_inv.export_s3(bucket)

export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
export_BU_only(billable_inv.data, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(billable_inv.data, args.HU_BU_invoice_file)

if args.upload_to_s3:
Expand Down Expand Up @@ -421,63 +432,6 @@ def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_mont
)


def export_BU_only(dataframe: pandas.DataFrame, output_file, subsidy_amount):
def get_project(row):
project_alloc = row[PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

BU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Boston University"].copy()
BU_projects["Project"] = BU_projects.apply(get_project, axis=1)
BU_projects[SUBSIDY_FIELD] = Decimal(0)
BU_projects = BU_projects[
[
INVOICE_DATE_FIELD,
PI_FIELD,
"Project",
COST_FIELD,
CREDIT_FIELD,
SUBSIDY_FIELD,
BALANCE_FIELD,
]
]

project_list = BU_projects["Project"].unique()
BU_projects_no_dup = BU_projects.drop_duplicates("Project", inplace=False)
sum_fields = [COST_FIELD, CREDIT_FIELD, BALANCE_FIELD]
for project in project_list:
project_mask = BU_projects["Project"] == project
no_dup_project_mask = BU_projects_no_dup["Project"] == project

sum_fields_sums = BU_projects[project_mask][sum_fields].sum().values
BU_projects_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

BU_projects_no_dup = _apply_subsidy(BU_projects_no_dup, subsidy_amount)
BU_projects_no_dup.to_csv(output_file)


def _apply_subsidy(dataframe, subsidy_amount):
pi_list = dataframe[PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[PI_FIELD] == pi]
remaining_subsidy = subsidy_amount
for i, row in pi_projects.iterrows():
project_remaining_cost = row[BALANCE_FIELD]
applied_subsidy = min(project_remaining_cost, remaining_subsidy)

dataframe.at[i, SUBSIDY_FIELD] = applied_subsidy
dataframe.at[i, BALANCE_FIELD] = row[BALANCE_FIELD] - applied_subsidy
remaining_subsidy -= applied_subsidy

if remaining_subsidy == 0:
break

return dataframe


def export_HU_BU(dataframe, output_file):
HU_BU_projects = dataframe[
(dataframe[INSTITUTION_FIELD] == "Harvard University")
Expand Down
9 changes: 5 additions & 4 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,13 +635,14 @@ def setUp(self):
], # Test case where subsidy does/doesn't cover fully balance
}
self.dataframe = pandas.DataFrame(data)
output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv")
self.output_file = output_file.name
self.subsidy = 100

def test_apply_BU_subsidy(self):
process_report.export_BU_only(self.dataframe, self.output_file, self.subsidy)
output_df = pandas.read_csv(self.output_file)
test_invoice = test_utils.new_bu_internal_invoice(
data=self.dataframe, subsidy_amount=self.subsidy
)
test_invoice.process()
output_df = test_invoice.data.reset_index()

self.assertTrue(
set(
Expand Down
10 changes: 9 additions & 1 deletion process_report/tests/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas

from process_report.invoices import billable_invoice
from process_report.invoices import billable_invoice, bu_internal_invoice


def new_billable_invoice(
Expand All @@ -19,3 +19,11 @@ def new_billable_invoice(
nonbillable_projects,
old_pi_filepath,
)


def new_bu_internal_invoice(
name="", invoice_month="0000-00", data=pandas.DataFrame(), subsidy_amount=0
):
return bu_internal_invoice.BUInternalInvoice(
name, invoice_month, data, subsidy_amount
)

0 comments on commit 758bb8c

Please sign in to comment.