Skip to content

Commit

Permalink
Further internal refactoring of billable invoice
Browse files Browse the repository at this point in the history
  • Loading branch information
QuanMPhm committed Aug 7, 2024
1 parent 1c4c271 commit 0a08fea
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 87 deletions.
176 changes: 99 additions & 77 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,35 @@

@dataclass
class BillableInvoice(invoice.Invoice):
NEW_PI_CREDIT_CODE = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
PI_S3_FILEPATH = "PIs/PI.csv"

nonbillable_pis: list[str]
nonbillable_projects: list[str]
old_pi_filepath: str

def _prepare(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = self._validate_pi_names(self.data)
@staticmethod
def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_filepath,
dtype={
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

def _process(self):
old_pi_df = self._load_old_pis(self.old_pi_filepath)
self.data, updated_old_pi_df = self._apply_credits_new_pi(self.data, old_pi_df)
self._dump_old_pis(self.old_pi_filepath, updated_old_pi_df)
return old_pi_df

@staticmethod
def _remove_nonbillables(
self,
data: pandas.DataFrame,
nonbillable_pis: list[str],
nonbillable_projects: list[str],
Expand All @@ -42,55 +54,97 @@ def _remove_nonbillables(
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
]

def _validate_pi_names(self, data: pandas.DataFrame):
@staticmethod
def _validate_pi_names(data: pandas.DataFrame):
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
logger.warn(
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
)
return data[~pandas.isna(data[invoice.PI_FIELD])]

def _load_old_pis(self, old_pi_filepath) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_filepath,
dtype={
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
]
if first_invoice_month.empty:
return 0

month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")
else:
return month_diff

return old_pi_df
def _prepare(self):
self.data = self._remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = self._validate_pi_names(self.data)
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = Decimal(0)
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)

def _process(self):
self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
self.data, self.old_pi_df
)

def _prepare_export(self):
self.updated_old_pi_df = self.updated_old_pi_df.astype(
{
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)

def export(self):
super().export()
self.old_pi_df.to_csv(self.old_pi_filepath, index=False)

def export_s3(self, s3_bucket):
super().export_s3(s3_bucket)
s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)

def _apply_credits_new_pi(
self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
):
new_pi_credit_code = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]

data[invoice.CREDIT_FIELD] = None
data[invoice.CREDIT_CODE_FIELD] = None
data[invoice.BALANCE_FIELD] = Decimal(0)

current_pi_set = set(data[invoice.PI_FIELD])
invoice_month = data[invoice.INVOICE_DATE_FIELD].iat[0]
invoice_pis = old_pi_df[old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month]
if invoice_pis[invoice.PI_INITIAL_CREDITS].empty or pandas.isna(
new_pi_credit_amount := invoice_pis[invoice.PI_INITIAL_CREDITS].iat[0]
def get_initial_credit_amount(
old_pi_df, invoice_month, default_initial_credit_amount
):
new_pi_credit_amount = INITIAL_CREDIT_AMOUNT

print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")
first_month_processed_pis = old_pi_df[
old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
]
if first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].empty or pandas.isna(
new_pi_credit_amount := first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].iat[0]
):
new_pi_credit_amount = default_initial_credit_amount

return new_pi_credit_amount

new_pi_credit_amount = get_initial_credit_amount(
old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
)
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")

current_pi_set = set(data[invoice.PI_FIELD])
for pi in current_pi_set:
pi_projects = data[data[invoice.PI_FIELD] == pi]
pi_age = self._get_pi_age(old_pi_df, pi, invoice_month)
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()
Expand All @@ -101,7 +155,7 @@ def _apply_credits_new_pi(
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
Expand All @@ -126,15 +180,15 @@ def _apply_credits_new_pi(
for i, row in pi_projects.iterrows():
if (
remaining_credit == 0
or row[invoice.SU_TYPE_FIELD] in EXCLUDE_SU_TYPES
or row[invoice.SU_TYPE_FIELD] in self.EXCLUDE_SU_TYPES
):
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
project_cost = row[invoice.COST_FIELD]
applied_credit = min(project_cost, remaining_credit)

data.at[i, invoice.CREDIT_FIELD] = applied_credit
data.at[i, invoice.CREDIT_CODE_FIELD] = new_pi_credit_code
data.at[i, invoice.CREDIT_CODE_FIELD] = self.NEW_PI_CREDIT_CODE
data.at[i, invoice.BALANCE_FIELD] = (
row[invoice.COST_FIELD] - applied_credit
)
Expand All @@ -151,36 +205,4 @@ def _apply_credits_new_pi(
old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
] = credits_used

old_pi_df = old_pi_df.astype(
{
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)

return (data, old_pi_df)

def _dump_old_pis(self, old_pi_filepath, old_pi_df: pandas.DataFrame):
old_pi_df.to_csv(old_pi_filepath, index=False)

def _get_pi_age(self, old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
]
if first_invoice_month.empty:
return 0

month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff
8 changes: 1 addition & 7 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def main():
parser.add_argument(
"--output-file",
required=False,
default="filtered_output",
default="billable",
help="Name of output file",
)
parser.add_argument(
Expand Down Expand Up @@ -282,7 +282,6 @@ def main():

upload_to_s3(invoice_list, invoice_month)
upload_to_s3_HU_BU(args.HU_BU_invoice_file, invoice_month)
upload_to_s3_old_pi_file(old_pi_file)


def fetch_s3_invoices(invoice_month):
Expand Down Expand Up @@ -365,11 +364,6 @@ def fetch_s3_old_pi_file():
return local_name


def upload_to_s3_old_pi_file(old_pi_file):
invoice_bucket = get_invoice_bucket()
invoice_bucket.upload_file(old_pi_file, PI_S3_FILEPATH)


def backup_to_s3_old_pi_file(old_pi_file):
invoice_bucket = get_invoice_bucket()
invoice_bucket.upload_file(old_pi_file, f"PIs/Archive/PI {get_iso8601_time()}.csv")
Expand Down
19 changes: 16 additions & 3 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import uuid
import math
from decimal import Decimal
from textwrap import dedent

from process_report import process_report, util
Expand Down Expand Up @@ -416,6 +417,9 @@ def setUp(self):
"Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700],
}
self.dataframe = pandas.DataFrame(data)
self.dataframe["Credit"] = None
self.dataframe["Credit Code"] = None
self.dataframe["Balance"] = Decimal(0)
self.answer_dataframe = pandas.DataFrame(answer_df_dict)
old_pi = [
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
Expand Down Expand Up @@ -510,6 +514,9 @@ def setUp(self):
"Cost": [500, 100, 100, 500, 500],
}
)
self.dataframe_no_gpu["Credit"] = None
self.dataframe_no_gpu["Credit Code"] = None
self.dataframe_no_gpu["Balance"] = Decimal(0)
old_pi_no_gpu = [
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
"OldPI,2024-03,500,200,0",
Expand Down Expand Up @@ -549,18 +556,24 @@ def tearDown(self):
os.remove(self.old_pi_no_gpu_file)

def test_apply_credit_0002(self):
test_invoice = test_utils.new_billable_invoice()
test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03")
old_pi_df = test_invoice._load_old_pis(self.old_pi_file)
dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi(
self.dataframe, old_pi_df
)
dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"})
updated_old_pi_df = updated_old_pi_df.sort_values(by="PI", ignore_index=True)
updated_old_pi_df = updated_old_pi_df.astype(
dtype={
"Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
"1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
"2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
).sort_values(by=["PI"], ignore_index=True)
self.assertTrue(self.answer_dataframe.equals(dataframe))
self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df))

def test_no_gpu(self):
test_invoice = test_utils.new_billable_invoice()
test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03")
old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file)
dataframe, _ = test_invoice._apply_credits_new_pi(
self.dataframe_no_gpu, old_pi_df
Expand Down

0 comments on commit 0a08fea

Please sign in to comment.