Skip to content

Commit

Permalink
Implemented discount_invoice to refactor BU-Internal and billable inv…
Browse files Browse the repository at this point in the history
…oice

The BU-Internal and billable invoice now subclasses from `discount_invoice`,
an invoice class which implements a function to apply a flat discount
on a PI's projects. This reduces some code redundancy since the
New-PI credit and the BU subsidy share some similar logic.
Additional smaller changes is done to improve code readability.
  • Loading branch information
QuanMPhm committed Jul 30, 2024
1 parent 42a6e90 commit 1fcb400
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 44 deletions.
38 changes: 16 additions & 22 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from dataclasses import dataclass
from decimal import Decimal
import logging
import sys

import pandas
import pyarrow

import process_report.invoices.invoice as invoice
import process_report.invoices.discount_invoice as discount_invoice
import process_report.util as util


Expand All @@ -15,7 +15,7 @@


@dataclass
class BillableInvoice(invoice.Invoice):
class BillableInvoice(discount_invoice.DiscountInvoice):
NEW_PI_CREDIT_CODE = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
Expand All @@ -31,7 +31,7 @@ def _prepare(self):
self.data = self._validate_pi_names(self.data)
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = Decimal(0)
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)

def _process(self):
Expand Down Expand Up @@ -100,7 +100,10 @@ def _apply_credits_new_pi(
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")

for pi in current_pi_set:
pi_projects = data[data[invoice.PI_FIELD] == pi]
pi_projects = data[
(data[invoice.PI_FIELD] == pi)
& ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))
]
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
Expand Down Expand Up @@ -133,25 +136,16 @@ def _apply_credits_new_pi(
)
credit_used_field = invoice.PI_2ND_USED

initial_credit = remaining_credit
for i, row in pi_projects.iterrows():
if (
remaining_credit == 0
or row[invoice.SU_TYPE_FIELD] in self.EXCLUDE_SU_TYPES
):
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
project_cost = row[invoice.COST_FIELD]
applied_credit = min(project_cost, remaining_credit)

data.at[i, invoice.CREDIT_FIELD] = applied_credit
data.at[i, invoice.CREDIT_CODE_FIELD] = self.NEW_PI_CREDIT_CODE
data.at[i, invoice.BALANCE_FIELD] = (
row[invoice.COST_FIELD] - applied_credit
)
remaining_credit -= applied_credit
credits_used = self.apply_flat_discount(
data,
pi_projects,
remaining_credit,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
invoice.CREDIT_CODE_FIELD,
self.NEW_PI_CREDIT_CODE,
)

credits_used = initial_credit - remaining_credit
if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
Expand Down
40 changes: 21 additions & 19 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from decimal import Decimal

import process_report.invoices.invoice as invoice
import process_report.invoices.discount_invoice as discount_invoice


@dataclass
class BUInternalInvoice(invoice.Invoice):
class BUInternalInvoice(discount_invoice.DiscountInvoice):
subsidy_amount: int

def _prepare(self):
Expand Down Expand Up @@ -34,35 +35,36 @@ def get_project(row):
]

def _process(self):
project_list = self.data["Project"].unique()
data_no_dup = self.data.drop_duplicates("Project", inplace=False)
data_summed_projects = self._sum_project_allocations(self.data)
self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)

def _sum_project_allocations(self, dataframe):
"""A project may have multiple allocations, and therefore multiple rows
in the raw invoices. For BU-Internal invoice, we only want 1 row for
each unique project, summing up its allocations' costs"""
project_list = dataframe["Project"].unique()
data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
for project in project_list:
project_mask = self.data["Project"] == project
project_mask = dataframe["Project"] == project
no_dup_project_mask = data_no_dup["Project"] == project

sum_fields_sums = self.data[project_mask][sum_fields].sum().values
sum_fields_sums = dataframe[project_mask][sum_fields].sum().values
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

self.data = self._apply_subsidy(data_no_dup, self.subsidy_amount)
return data_no_dup

def _apply_subsidy(self, dataframe, subsidy_amount):
pi_list = dataframe[invoice.PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
remaining_subsidy = subsidy_amount
for i, row in pi_projects.iterrows():
project_remaining_cost = row[invoice.BALANCE_FIELD]
applied_subsidy = min(project_remaining_cost, remaining_subsidy)

dataframe.at[i, invoice.SUBSIDY_FIELD] = applied_subsidy
dataframe.at[i, invoice.BALANCE_FIELD] = (
row[invoice.BALANCE_FIELD] - applied_subsidy
)
remaining_subsidy -= applied_subsidy

if remaining_subsidy == 0:
break
self.apply_flat_discount(
dataframe,
pi_projects,
subsidy_amount,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
)

return dataframe
79 changes: 79 additions & 0 deletions process_report/invoices/discount_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from dataclasses import dataclass

import pandas

import process_report.invoices.invoice as invoice


@dataclass
class DiscountInvoice(invoice.Invoice):
"""
Invoice class containing functions useful for applying discounts
on dataframes
"""

@staticmethod
def apply_flat_discount(
invoice: pandas.DataFrame,
pi_projects: pandas.DataFrame,
discount_amount: int,
discount_field: str,
balance_field: str,
code_field: str = None,
discount_code: str = None,
):
"""
Takes in an invoice and a list of PI projects that are a subset of it,
and applies a flat discount to those PI projects. Note that this function
will change the provided `invoice` Dataframe directly. Therefore, it does
not return the changed invoice.
This function assumes that the balance field shows the remaining cost of the project.
Basically, the provided invoice must show that balance = cost.
If the optional parameters `code_field` and `discount_code` are passed in,
`discount_code` will be comma-APPENDED to the `code_field` of projects where
the discount is applied
Returns the amount of discount used.
:param invoice: Dataframe containing all projects
:param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount
:param discount_amount: The discount given to the PI
:param discount_field: Name of the field to put the discount amount applied to each project
:param balance_field: Name of the balance field
:param code_field: Name of the discount code field
:param discount_code: Code of the discount
"""

def apply_discount_on_project(remaining_discount_amount, project_i, project):
remaining_project_cost = project[balance_field]
applied_discount = min(remaining_project_cost, remaining_discount_amount)
invoice.at[project_i, discount_field] = applied_discount
invoice.at[project_i, balance_field] = (
project[balance_field] - applied_discount
)
remaining_discount_amount -= applied_discount
return remaining_discount_amount

def apply_credit_code_on_project(project_i):
if code_field and discount_code:
if pandas.isna(invoice.at[project_i, code_field]):
invoice.at[project_i, code_field] = discount_code
else:
invoice.at[project_i, code_field] = (
invoice.at[project_i, code_field] + "," + discount_code
)

remaining_discount_amount = discount_amount
for i, row in pi_projects.iterrows():
if remaining_discount_amount == 0:
break
else:
remaining_discount_amount = apply_discount_on_project(
remaining_discount_amount, i, row
)
apply_credit_code_on_project(i)

discount_used = discount_amount - remaining_discount_amount
return discount_used
5 changes: 2 additions & 3 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import uuid
import math
from decimal import Decimal
from textwrap import dedent

from process_report import process_report, util
Expand Down Expand Up @@ -418,7 +417,7 @@ def setUp(self):
self.dataframe = pandas.DataFrame(data)
self.dataframe["Credit"] = None
self.dataframe["Credit Code"] = None
self.dataframe["Balance"] = Decimal(0)
self.dataframe["Balance"] = self.dataframe["Cost"]
self.answer_dataframe = pandas.DataFrame(answer_df_dict)
old_pi = [
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
Expand Down Expand Up @@ -515,7 +514,7 @@ def setUp(self):
)
self.dataframe_no_gpu["Credit"] = None
self.dataframe_no_gpu["Credit Code"] = None
self.dataframe_no_gpu["Balance"] = Decimal(0)
self.dataframe_no_gpu["Balance"] = self.dataframe_no_gpu["Cost"]
old_pi_no_gpu = [
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
"OldPI,2024-03,500,200,0",
Expand Down

0 comments on commit 1fcb400

Please sign in to comment.