diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py index 9133333..335c52a 100644 --- a/process_report/invoices/NERC_total_invoice.py +++ b/process_report/invoices/NERC_total_invoice.py @@ -12,6 +12,24 @@ class NERCTotalInvoice(invoice.Invoice): "University of Rhode Island", ] + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + @property def output_path(self) -> str: return f"NERC-{self.invoice_month}-Total-Invoice.csv" diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index 0cf7d3e..a1d0363 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -1,100 +1,39 @@ from dataclasses import dataclass import logging -import sys import pandas import pyarrow -from process_report.invoices import invoice, discount_invoice -from process_report import util - +from process_report.invoices import invoice logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @dataclass -class BillableInvoice(discount_invoice.DiscountInvoice): - NEW_PI_CREDIT_CODE = "0002" - INITIAL_CREDIT_AMOUNT = 1000 - EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] +class BillableInvoice(invoice.Invoice): PI_S3_FILEPATH = "PIs/PI.csv" - nonbillable_pis: list[str] - nonbillable_projects: list[str] - old_pi_filepath: str - - @staticmethod - def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: - try: - old_pi_df = pandas.read_csv( - old_pi_filepath, - dtype={ - invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( - pyarrow.decimal128(21, 2) - ), - invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ) - except FileNotFoundError: - sys.exit("Applying credit 0002 failed. Old PI file does not exist") - - return old_pi_df - - @staticmethod - def _remove_nonbillables( - data: pandas.DataFrame, - nonbillable_pis: list[str], - nonbillable_projects: list[str], - ): - return data[ - ~data[invoice.PI_FIELD].isin(nonbillable_pis) - & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects) - ] - - @staticmethod - def _validate_pi_names(data: pandas.DataFrame): - invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])] - for i, row in invalid_pi_projects.iterrows(): - logger.warn( - f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field" - ) - return data[~pandas.isna(data[invoice.PI_FIELD])] + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] - @staticmethod - def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): - """Returns time difference between current invoice month and PI's first invoice month - I.e 0 for new PIs - Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" - first_invoice_month = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH - ] - if first_invoice_month.empty: - return 0 - - month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0]) - if month_diff < 0: - sys.exit( - f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" - ) - else: - return month_diff - - def _prepare(self): - self.data = self._remove_nonbillables( - self.data, self.nonbillable_pis, self.nonbillable_projects - ) - self.data = self._validate_pi_names(self.data) - self.data[invoice.CREDIT_FIELD] = None - self.data[invoice.CREDIT_CODE_FIELD] = None - self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD] - self.old_pi_df = self._load_old_pis(self.old_pi_filepath) - - def _process(self): - self.data, self.updated_old_pi_df = self._apply_credits_new_pi( - self.data, self.old_pi_df - ) + old_pi_filepath: str + updated_old_pi_df: pandas.DataFrame def _prepare_export(self): self.updated_old_pi_df = self.updated_old_pi_df.astype( @@ -114,88 +53,3 @@ def export(self): def export_s3(self, s3_bucket): super().export_s3(s3_bucket) s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH) - - def _apply_credits_new_pi( - self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame - ): - def get_initial_credit_amount( - old_pi_df, invoice_month, default_initial_credit_amount - ): - first_month_processed_pis = old_pi_df[ - old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month - ] - if first_month_processed_pis[ - invoice.PI_INITIAL_CREDITS - ].empty or pandas.isna( - new_pi_credit_amount := first_month_processed_pis[ - invoice.PI_INITIAL_CREDITS - ].iat[0] - ): - new_pi_credit_amount = default_initial_credit_amount - - return new_pi_credit_amount - - new_pi_credit_amount = get_initial_credit_amount( - old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT - ) - print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}") - - current_pi_set = set(data[invoice.PI_FIELD]) - for pi in current_pi_set: - credit_eligible_projects = data[ - (data[invoice.PI_FIELD] == pi) - & ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES)) - ] - pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month) - pi_old_pi_entry = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi - ].squeeze() - - if pi_age > 1: - for i, row in credit_eligible_projects.iterrows(): - data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD] - else: - if pi_age == 0: - if len(pi_old_pi_entry) == 0: - pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0] - old_pi_df = pandas.concat( - [ - pandas.DataFrame([pi_entry], columns=old_pi_df.columns), - old_pi_df, - ], - ignore_index=True, - ) - pi_old_pi_entry = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi - ].squeeze() - - remaining_credit = new_pi_credit_amount - credit_used_field = invoice.PI_1ST_USED - elif pi_age == 1: - remaining_credit = ( - pi_old_pi_entry[invoice.PI_INITIAL_CREDITS] - - pi_old_pi_entry[invoice.PI_1ST_USED] - ) - credit_used_field = invoice.PI_2ND_USED - - credits_used = self.apply_flat_discount( - data, - credit_eligible_projects, - remaining_credit, - invoice.CREDIT_FIELD, - invoice.BALANCE_FIELD, - invoice.CREDIT_CODE_FIELD, - self.NEW_PI_CREDIT_CODE, - ) - - if (pi_old_pi_entry[credit_used_field] != 0) and ( - credits_used != pi_old_pi_entry[credit_used_field] - ): - print( - f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}" - ) - old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field - ] = credits_used - - return (data, old_pi_df) diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index 8226b97..df11b8b 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -1,42 +1,28 @@ from dataclasses import dataclass -from decimal import Decimal -import process_report.invoices.invoice as invoice -import process_report.invoices.discount_invoice as discount_invoice +from process_report.invoices import invoice @dataclass -class BUInternalInvoice(discount_invoice.DiscountInvoice): - subsidy_amount: int - - def _prepare(self): - def get_project(row): - project_alloc = row[invoice.PROJECT_FIELD] - if project_alloc.rfind("-") == -1: - return project_alloc - else: - return project_alloc[: project_alloc.rfind("-")] +class BUInternalInvoice(invoice.Invoice): + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PI_FIELD, + "Project", + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.BU_BALANCE_FIELD, + invoice.PI_BALANCE_FIELD, + ] + exported_columns_map = { + invoice.BU_BALANCE_FIELD: "Subsidy", + invoice.PI_BALANCE_FIELD: "Balance", + } - self.data = self.data[ - self.data[invoice.INSTITUTION_FIELD] == "Boston University" - ].copy() - self.data["Project"] = self.data.apply(get_project, axis=1) - self.data[invoice.SUBSIDY_FIELD] = Decimal(0) - self.data = self.data[ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PI_FIELD, - "Project", - invoice.COST_FIELD, - invoice.CREDIT_FIELD, - invoice.SUBSIDY_FIELD, - invoice.BALANCE_FIELD, - ] - ] + subsidy_amount: int - def _process(self): - data_summed_projects = self._sum_project_allocations(self.data) - self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount) + def _prepare_export(self): + self.data = self._sum_project_allocations(self.data) def _sum_project_allocations(self, dataframe): """A project may have multiple allocations, and therefore multiple rows @@ -44,7 +30,12 @@ def _sum_project_allocations(self, dataframe): each unique project, summing up its allocations' costs""" project_list = dataframe["Project"].unique() data_no_dup = dataframe.drop_duplicates("Project", inplace=False) - sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD] + sum_fields = [ + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.BU_BALANCE_FIELD, + invoice.PI_BALANCE_FIELD, + ] for project in project_list: project_mask = dataframe["Project"] == project no_dup_project_mask = data_no_dup["Project"] == project @@ -53,18 +44,3 @@ def _sum_project_allocations(self, dataframe): data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums return data_no_dup - - def _apply_subsidy(self, dataframe, subsidy_amount): - pi_list = dataframe[invoice.PI_FIELD].unique() - - for pi in pi_list: - pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi] - self.apply_flat_discount( - dataframe, - pi_projects, - subsidy_amount, - invoice.SUBSIDY_FIELD, - invoice.BALANCE_FIELD, - ) - - return dataframe diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 8599ec8..85c0c07 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -23,6 +23,7 @@ INSTITUTION_ID_FIELD = "Institution - Specific Code" SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)" SU_TYPE_FIELD = "SU Type" +RATE_FIELD = "Rate" COST_FIELD = "Cost" CREDIT_FIELD = "Credit" CREDIT_CODE_FIELD = "Credit Code" @@ -30,9 +31,17 @@ BALANCE_FIELD = "Balance" ### +### Invoice additional fields (not used in exporting) +PI_BALANCE_FIELD = "PI Balance" +BU_BALANCE_FIELD = "BU Balance" +### + @dataclass class Invoice: + export_columns_list = list() + exported_columns_map = dict() + name: str invoice_month: str data: pandas.DataFrame @@ -78,7 +87,14 @@ def _prepare_export(self): that should or should not be exported after processing.""" pass + def _filter_columns(self): + """Filters and renames columns before exporting""" + self.data = self.data[self.export_columns_list].rename( + columns=self.exported_columns_map + ) + def export(self): + self._filter_columns() self.data.to_csv(self.output_path, index=False) def export_s3(self, s3_bucket): diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py index fa3355f..c4a6e42 100644 --- a/process_report/invoices/lenovo_invoice.py +++ b/process_report/invoices/lenovo_invoice.py @@ -6,23 +6,18 @@ @dataclass class LenovoInvoice(invoice.Invoice): LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] - SU_CHARGE_MULTIPLIER = 1 - def _prepare(self): + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.INSTITUTION_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + "Charge", + ] + exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"} + + def _prepare_export(self): self.data = self.data[ self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) - ][ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PROJECT_FIELD, - invoice.INSTITUTION_FIELD, - invoice.SU_HOURS_FIELD, - invoice.SU_TYPE_FIELD, - ] - ].copy() - - self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) - self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) - - def _process(self): - self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] + ] diff --git a/process_report/invoices/nonbillable_invoice.py b/process_report/invoices/nonbillable_invoice.py index 701d308..508745a 100644 --- a/process_report/invoices/nonbillable_invoice.py +++ b/process_report/invoices/nonbillable_invoice.py @@ -8,6 +8,21 @@ class NonbillableInvoice(invoice.Invoice): nonbillable_pis: list[str] nonbillable_projects: list[str] + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + ] + def _prepare_export(self): self.data = self.data[ self.data[invoice.PI_FIELD].isin(self.nonbillable_pis) diff --git a/process_report/process_report.py b/process_report/process_report.py index 3f309a2..be3b1bd 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -3,7 +3,6 @@ import sys import datetime -import json import pandas import pyarrow @@ -15,7 +14,15 @@ NERC_total_invoice, bu_internal_invoice, ) - +from process_report.processors import ( + validate_pi_alias_processor, + add_institution_processor, + lenovo_processor, + remove_nonbillables_processor, + validate_billable_pi_processor, + new_pi_credit_processor, + bu_subsidy_processor, +) ### PI file field names PI_PI_FIELD = "PI" @@ -51,26 +58,6 @@ ALIAS_S3_FILEPATH = "PIs/alias.csv" -def get_institution_from_pi(institute_map, pi_uname): - institution_domain = pi_uname.split("@")[-1] - for i in range(institution_domain.count(".") + 1): - if institution_name := institute_map.get(institution_domain, ""): - break - institution_domain = institution_domain[institution_domain.find(".") + 1 :] - - if institution_name == "": - print(f"Warning: PI name {pi_uname} does not match any institution!") - - return institution_name - - -def load_institute_map() -> dict: - with open("process_report/institute_map.json", "r") as f: - institute_map = json.load(f) - - return institute_map - - def load_alias(alias_file): alias_dict = dict() @@ -220,49 +207,93 @@ def main(): projects = list(set(projects + timed_projects_list)) - merged_dataframe = validate_pi_aliases(merged_dataframe, alias_dict) - merged_dataframe = add_institution(merged_dataframe) + ### Do some preliminary processing + + validate_pi_alias_proc = validate_pi_alias_processor.ValidatePIAliasProcessor( + "", invoice_month, merged_dataframe, alias_dict + ) + validate_pi_alias_proc.process() + + add_institute_proc = add_institution_processor.AddInstitutionProcessor( + "", invoice_month, validate_pi_alias_proc.data + ) + add_institute_proc.process() + + lenovo_proc = lenovo_processor.LenovoProcessor( + "", invoice_month, add_institute_proc.data + ) + lenovo_proc.process() + + ### Finish preliminary processing + lenovo_inv = lenovo_invoice.LenovoInvoice( - name=args.Lenovo_file, invoice_month=invoice_month, data=merged_dataframe.copy() + name=args.Lenovo_file, invoice_month=invoice_month, data=lenovo_proc.data ) nonbillable_inv = nonbillable_invoice.NonbillableInvoice( name=args.nonbillable_file, invoice_month=invoice_month, - data=merged_dataframe.copy(), + data=lenovo_proc.data, nonbillable_pis=pi, nonbillable_projects=projects, ) + ### Perform main processing + + remove_nonbillables_proc = remove_nonbillables_processor.RemoveNonbillables( + "", invoice_month, lenovo_proc.data, pi, projects + ) + remove_nonbillables_proc.process() + + validate_billable_pi_proc = ( + validate_billable_pi_processor.ValidateBillablePIsProcessor( + "", invoice_month, remove_nonbillables_proc.data + ) + ) + validate_billable_pi_proc.process() + + new_pi_credit_proc = new_pi_credit_processor.NewPICreditProcessor( + "", + invoice_month, + data=validate_billable_pi_proc.data, + old_pi_filepath=old_pi_file, + ) + new_pi_credit_proc.process() + + bu_subsidy_proc = bu_subsidy_processor.BUSubsidyProcessor( + "", invoice_month, new_pi_credit_proc.data.copy(), args.BU_subsidy_amount + ) + bu_subsidy_proc.process() + + ### Initialize invoices + if args.upload_to_s3: backup_to_s3_old_pi_file(old_pi_file) billable_inv = billable_invoice.BillableInvoice( name=args.output_file, invoice_month=invoice_month, - data=merged_dataframe.copy(), - nonbillable_pis=pi, - nonbillable_projects=projects, + data=bu_subsidy_proc.data, old_pi_filepath=old_pi_file, - ) - - process_and_export_invoices( - [lenovo_inv, nonbillable_inv, billable_inv], args.upload_to_s3 + updated_old_pi_df=new_pi_credit_proc.updated_old_pi_df, ) nerc_total_inv = NERC_total_invoice.NERCTotalInvoice( name=args.NERC_total_invoice_file, invoice_month=invoice_month, - data=billable_inv.data.copy(), + data=bu_subsidy_proc.data.copy(), ) bu_internal_inv = bu_internal_invoice.BUInternalInvoice( name=args.BU_invoice_file, invoice_month=invoice_month, - data=billable_inv.data.copy(), + data=bu_subsidy_proc.data.copy(), subsidy_amount=args.BU_subsidy_amount, ) - process_and_export_invoices([nerc_total_inv, bu_internal_inv], args.upload_to_s3) + process_and_export_invoices( + [lenovo_inv, nonbillable_inv, billable_inv, nerc_total_inv, bu_internal_inv], + args.upload_to_s3, + ) export_pi_billables(billable_inv.data.copy(), args.output_folder, invoice_month) @@ -334,13 +365,6 @@ def timed_projects(timed_projects_file, invoice_date): return dataframe[mask]["Project"].to_list() -def validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict): - for pi, pi_aliases in alias_dict.items(): - dataframe.loc[dataframe[PI_FIELD].isin(pi_aliases), PI_FIELD] = pi - - return dataframe - - def fetch_s3_alias_file(): local_name = "alias.csv" invoice_bucket = get_invoice_bucket() @@ -360,32 +384,6 @@ def backup_to_s3_old_pi_file(old_pi_file): invoice_bucket.upload_file(old_pi_file, f"PIs/Archive/PI {get_iso8601_time()}.csv") -def add_institution(dataframe: pandas.DataFrame): - """Determine every PI's institution name, logging any PI whose institution cannot be determined - This is performed by `get_institution_from_pi()`, which tries to match the PI's username to - a list of known institution email domains (i.e bu.edu), or to several edge cases (i.e rudolph) if - the username is not an email address. - - Exact matches are then mapped to the corresponding institution name. - - I.e "foo@bu.edu" would match with "bu.edu", which maps to the instition name "Boston University" - - The list of mappings are defined in `institute_map.json`. - """ - institute_map = load_institute_map() - dataframe = dataframe.astype({INSTITUTION_FIELD: "str"}) - for i, row in dataframe.iterrows(): - pi_name = row[PI_FIELD] - if pandas.isna(pi_name): - print(f"Project {row[PROJECT_FIELD]} has no PI") - else: - dataframe.at[i, INSTITUTION_FIELD] = get_institution_from_pi( - institute_map, pi_name - ) - - return dataframe - - def export_billables(dataframe, output_file): dataframe.to_csv(output_file, index=False) diff --git a/process_report/processors/add_institution_processor.py b/process_report/processors/add_institution_processor.py new file mode 100644 index 0000000..0d7e94b --- /dev/null +++ b/process_report/processors/add_institution_processor.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +import json + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class AddInstitutionProcessor(processor.Processor): + @staticmethod + def _load_institute_map() -> dict: + with open("process_report/institute_map.json", "r") as f: + institute_map = json.load(f) + + return institute_map + + @staticmethod + def _get_institution_from_pi(institute_map, pi_uname): + institution_domain = pi_uname.split("@")[-1] + for i in range(institution_domain.count(".") + 1): + if institution_name := institute_map.get(institution_domain, ""): + break + institution_domain = institution_domain[institution_domain.find(".") + 1 :] + + if institution_name == "": + print(f"Warning: PI name {pi_uname} does not match any institution!") + + return institution_name + + def _add_institution(self, dataframe: pandas.DataFrame): + """Determine every PI's institution name, logging any PI whose institution cannot be determined + This is performed by `get_institution_from_pi()`, which tries to match the PI's username to + a list of known institution email domains (i.e bu.edu), or to several edge cases (i.e rudolph) if + the username is not an email address. + + Exact matches are then mapped to the corresponding institution name. + + I.e "foo@bu.edu" would match with "bu.edu", which maps to the instition name "Boston University" + + The list of mappings are defined in `institute_map.json`. + """ + institute_map = self._load_institute_map() + dataframe = dataframe.astype({invoice.INSTITUTION_FIELD: "str"}) + for i, row in dataframe.iterrows(): + pi_name = row[invoice.PI_FIELD] + if pandas.isna(pi_name): + print(f"Project {row[invoice.PROJECT_FIELD]} has no PI") + else: + dataframe.at[ + i, invoice.INSTITUTION_FIELD + ] = self._get_institution_from_pi(institute_map, pi_name) + + return dataframe + + def _process(self): + self.data = self._add_institution(self.data) diff --git a/process_report/processors/bu_subsidy_processor.py b/process_report/processors/bu_subsidy_processor.py new file mode 100644 index 0000000..b93299d --- /dev/null +++ b/process_report/processors/bu_subsidy_processor.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from decimal import Decimal + +from process_report.invoices import invoice +from process_report.processors import discount_processor + + +@dataclass +class BUSubsidyProcessor(discount_processor.DiscountProcessor): + IS_DISCOUNT_BY_NERC = False + + subsidy_amount: int + + def _apply_bu_subsidy(self, dataframe, subsidy_amount): + pi_list = dataframe[ + dataframe[invoice.INSTITUTION_FIELD] == "Boston University" + ][invoice.PI_FIELD].unique() + + for pi in pi_list: + pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi] + self.apply_flat_discount( + dataframe, + pi_projects, + invoice.PI_BALANCE_FIELD, + subsidy_amount, + invoice.BU_BALANCE_FIELD, + invoice.BALANCE_FIELD, + ) + + return dataframe + + def _prepare(self): + def get_project(row): + project_alloc = row[invoice.PROJECT_FIELD] + if project_alloc.rfind("-") == -1: + return project_alloc + else: + return project_alloc[: project_alloc.rfind("-")] + + self.data["Project"] = self.data.apply(get_project, axis=1) + self.data[invoice.BU_BALANCE_FIELD] = Decimal(0) + + def _process(self): + self.data = self._apply_bu_subsidy(self.data, self.subsidy_amount) diff --git a/process_report/invoices/discount_invoice.py b/process_report/processors/discount_processor.py similarity index 72% rename from process_report/invoices/discount_invoice.py rename to process_report/processors/discount_processor.py index 333b998..353111e 100644 --- a/process_report/invoices/discount_invoice.py +++ b/process_report/processors/discount_processor.py @@ -1,22 +1,19 @@ from dataclasses import dataclass - import pandas -import process_report.invoices.invoice as invoice +from process_report.processors import processor @dataclass -class DiscountInvoice(invoice.Invoice): - """ - Invoice class containing functions useful for applying discounts - on dataframes - """ +class DiscountProcessor(processor.Processor): + IS_DISCOUNT_BY_NERC = True - @staticmethod def apply_flat_discount( + self, invoice: pandas.DataFrame, pi_projects: pandas.DataFrame, - discount_amount: int, + pi_balance_field: str, + discount_amount: float, discount_field: str, balance_field: str, code_field: str = None, @@ -28,31 +25,37 @@ def apply_flat_discount( will change the provided `invoice` Dataframe directly. Therefore, it does not return the changed invoice. - This function assumes that the balance field shows the remaining cost of the project, + This function assumes that the PI balance field shows the remaining cost of the project, or what the PI would pay before the flat discount is applied. If the optional parameters `code_field` and `discount_code` are passed in, `discount_code` will be comma-APPENDED to the `code_field` of projects where the discount is applied + If `IS_DISCOUNT_BY_NERC` is true, the discount will be reflected in the + MGHPCC Balance field. + Returns the amount of discount used. :param invoice: Dataframe containing all projects :param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount + :param pi_balance_field: Name of the field of the PI balance :param discount_amount: The discount given to the PI :param discount_field: Name of the field to put the discount amount applied to each project - :param balance_field: Name of the balance field + :param balance_field: Name of the MGHPCC balance field :param code_field: Name of the discount code field :param discount_code: Code of the discount """ def apply_discount_on_project(remaining_discount_amount, project_i, project): - remaining_project_balance = project[balance_field] + remaining_project_balance = project[pi_balance_field] applied_discount = min(remaining_project_balance, remaining_discount_amount) invoice.at[project_i, discount_field] = applied_discount - invoice.at[project_i, balance_field] = ( - project[balance_field] - applied_discount - ) + pi_balance_after_discount = project[pi_balance_field] - applied_discount + balance_after_discount = project[balance_field] - applied_discount + invoice.at[project_i, pi_balance_field] = pi_balance_after_discount + if self.IS_DISCOUNT_BY_NERC: + invoice.at[project_i, balance_field] = balance_after_discount remaining_discount_amount -= applied_discount return remaining_discount_amount diff --git a/process_report/processors/lenovo_processor.py b/process_report/processors/lenovo_processor.py new file mode 100644 index 0000000..808e820 --- /dev/null +++ b/process_report/processors/lenovo_processor.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass + + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class LenovoProcessor(processor.Processor): + SU_CHARGE_MULTIPLIER = 1 + + def _process(self): + self.data["SU Charge"] = self.SU_CHARGE_MULTIPLIER + self.data["Charge"] = self.data[invoice.SU_HOURS_FIELD] * self.data["SU Charge"] diff --git a/process_report/processors/new_pi_credit_processor.py b/process_report/processors/new_pi_credit_processor.py new file mode 100644 index 0000000..245524b --- /dev/null +++ b/process_report/processors/new_pi_credit_processor.py @@ -0,0 +1,154 @@ +import sys + +from dataclasses import dataclass +import pandas +import pyarrow + +from process_report import util +from process_report.invoices import invoice +from process_report.processors import discount_processor + + +@dataclass +class NewPICreditProcessor(discount_processor.DiscountProcessor): + NEW_PI_CREDIT_CODE = "0002" + INITIAL_CREDIT_AMOUNT = 1000 + EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] + IS_DISCOUNT_BY_NERC = True + + old_pi_filepath: str + + @staticmethod + def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: + try: + old_pi_df = pandas.read_csv( + old_pi_filepath, + dtype={ + invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( + pyarrow.decimal128(21, 2) + ), + invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), + }, + ) + except FileNotFoundError: + sys.exit("Applying credit 0002 failed. Old PI file does not exist") + + return old_pi_df + + @staticmethod + def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): + """Returns time difference between current invoice month and PI's first invoice month + I.e 0 for new PIs + Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" + first_invoice_month = old_pi_df.loc[ + old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH + ] + if first_invoice_month.empty: + return 0 + + month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0]) + if month_diff < 0: + sys.exit( + f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" + ) + else: + return month_diff + + def _apply_credits_new_pi( + self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame + ): + def get_initial_credit_amount( + old_pi_df, invoice_month, default_initial_credit_amount + ): + first_month_processed_pis = old_pi_df[ + old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month + ] + if first_month_processed_pis[ + invoice.PI_INITIAL_CREDITS + ].empty or pandas.isna( + new_pi_credit_amount := first_month_processed_pis[ + invoice.PI_INITIAL_CREDITS + ].iat[0] + ): + new_pi_credit_amount = default_initial_credit_amount + + return new_pi_credit_amount + + new_pi_credit_amount = get_initial_credit_amount( + old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT + ) + print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}") + + current_pi_set = set(data[invoice.PI_FIELD]) + for pi in current_pi_set: + credit_eligible_projects = data[ + (data[invoice.PI_FIELD] == pi) + & ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES)) + ] + pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month) + pi_old_pi_entry = old_pi_df.loc[ + old_pi_df[invoice.PI_PI_FIELD] == pi + ].squeeze() + + if pi_age > 1: + for i, row in credit_eligible_projects.iterrows(): + data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD] + else: + if pi_age == 0: + if len(pi_old_pi_entry) == 0: + pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0] + old_pi_df = pandas.concat( + [ + old_pi_df, + pandas.DataFrame([pi_entry], columns=old_pi_df.columns), + ], + ignore_index=True, + ) + pi_old_pi_entry = old_pi_df.loc[ + old_pi_df[invoice.PI_PI_FIELD] == pi + ].squeeze() + + remaining_credit = new_pi_credit_amount + credit_used_field = invoice.PI_1ST_USED + elif pi_age == 1: + remaining_credit = ( + pi_old_pi_entry[invoice.PI_INITIAL_CREDITS] + - pi_old_pi_entry[invoice.PI_1ST_USED] + ) + credit_used_field = invoice.PI_2ND_USED + + credits_used = self.apply_flat_discount( + data, + credit_eligible_projects, + invoice.PI_BALANCE_FIELD, + remaining_credit, + invoice.CREDIT_FIELD, + invoice.BALANCE_FIELD, + invoice.CREDIT_CODE_FIELD, + self.NEW_PI_CREDIT_CODE, + ) + + if (pi_old_pi_entry[credit_used_field] != 0) and ( + credits_used != pi_old_pi_entry[credit_used_field] + ): + print( + f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}" + ) + old_pi_df.loc[ + old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field + ] = credits_used + + return (data, old_pi_df) + + def _prepare(self): + self.data[invoice.CREDIT_FIELD] = None + self.data[invoice.CREDIT_CODE_FIELD] = None + self.data[invoice.PI_BALANCE_FIELD] = self.data[invoice.COST_FIELD] + self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD] + self.old_pi_df = self._load_old_pis(self.old_pi_filepath) + + def _process(self): + self.data, self.updated_old_pi_df = self._apply_credits_new_pi( + self.data, self.old_pi_df + ) diff --git a/process_report/processors/processor.py b/process_report/processors/processor.py new file mode 100644 index 0000000..5a88fd6 --- /dev/null +++ b/process_report/processors/processor.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + +from process_report.invoices import invoice + + +@dataclass +class Processor(invoice.Invoice): + pass diff --git a/process_report/processors/remove_nonbillables_processor.py b/process_report/processors/remove_nonbillables_processor.py new file mode 100644 index 0000000..326c98b --- /dev/null +++ b/process_report/processors/remove_nonbillables_processor.py @@ -0,0 +1,28 @@ +from dataclasses import dataclass + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class RemoveNonbillables(processor.Processor): + nonbillable_pis: list[str] + nonbillable_projects: list[str] + + @staticmethod + def _remove_nonbillables( + data: pandas.DataFrame, + nonbillable_pis: list[str], + nonbillable_projects: list[str], + ): + return data[ + ~data[invoice.PI_FIELD].isin(nonbillable_pis) + & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects) + ] + + def _process(self): + self.data = self._remove_nonbillables( + self.data, self.nonbillable_pis, self.nonbillable_projects + ) diff --git a/process_report/processors/validate_billable_pi_processor.py b/process_report/processors/validate_billable_pi_processor.py new file mode 100644 index 0000000..72ab60a --- /dev/null +++ b/process_report/processors/validate_billable_pi_processor.py @@ -0,0 +1,25 @@ +from dataclasses import dataclass +import logging + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +@dataclass +class ValidateBillablePIsProcessor(processor.Processor): + @staticmethod + def _validate_pi_names(data: pandas.DataFrame): + invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])] + for i, row in invalid_pi_projects.iterrows(): + logger.warn( + f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field" + ) + return data[~pandas.isna(data[invoice.PI_FIELD])] + + def _process(self): + self.data = self._validate_pi_names(self.data) diff --git a/process_report/processors/validate_pi_alias_processor.py b/process_report/processors/validate_pi_alias_processor.py new file mode 100644 index 0000000..a702858 --- /dev/null +++ b/process_report/processors/validate_pi_alias_processor.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass + +import pandas + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class ValidatePIAliasProcessor(processor.Processor): + alias_map: dict + + @staticmethod + def _validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict): + for pi, pi_aliases in alias_dict.items(): + dataframe.loc[ + dataframe[invoice.PI_FIELD].isin(pi_aliases), invoice.PI_FIELD + ] = pi + + return dataframe + + def _process(self): + self.data = self._validate_pi_aliases(self.data, self.alias_map) diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 411b55a..d5b879f 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -1,14 +1,13 @@ from unittest import TestCase, mock import tempfile import pandas -import pyarrow import os import uuid import math from textwrap import dedent from process_report import process_report, util -from process_report.invoices import lenovo_invoice, nonbillable_invoice +from process_report.invoices import nonbillable_invoice from process_report.tests import util as test_utils @@ -104,26 +103,6 @@ def test_remove_billables(self): self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist()) -class TestBillableInvoice(TestCase): - def test_remove_nonbillables(self): - pis = [uuid.uuid4().hex for x in range(10)] - projects = [uuid.uuid4().hex for x in range(10)] - nonbillable_pis = pis[:3] - nonbillable_projects = projects[7:] - billable_pis = pis[3:7] - data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects}) - - test_invoice = test_utils.new_billable_invoice() - data = test_invoice._remove_nonbillables( - data, nonbillable_pis, nonbillable_projects - ) - self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty) - self.assertTrue( - data[data["Project - Allocation"].isin(nonbillable_projects)].empty - ) - self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)])) - - class TestMergeCSV(TestCase): def setUp(self): self.header = ["ID", "Name", "Age"] @@ -218,7 +197,7 @@ def test_export_pi(self): self.assertNotIn("ProjectC", pi_df["Project - Allocation"].tolist()) -class TestGetInstitute(TestCase): +class TestAddInstituteProcessor(TestCase): def test_get_pi_institution(self): institute_map = { "harvard.edu": "Harvard University", @@ -247,31 +226,74 @@ def test_get_pi_institution(self): "g@bidmc.harvard.edu": "Beth Israel Deaconess Medical Center", } + add_institute_proc = test_utils.new_add_institution_processor() + for pi_email, answer in answers.items(): self.assertEqual( - process_report.get_institution_from_pi(institute_map, pi_email), answer + add_institute_proc._get_institution_from_pi(institute_map, pi_email), + answer, ) -class TestAlias(TestCase): - def setUp(self): - self.alias_dict = {"PI1": ["PI1_1", "PI1_2"], "PI2": ["PI2_1"]} - - self.data = pandas.DataFrame( +class TestValidateAliasProcessor(TestCase): + def test_validate_alias(self): + alias_map = {"PI1": ["PI1_1", "PI1_2"], "PI2": ["PI2_1"]} + test_data = pandas.DataFrame( { "Manager (PI)": ["PI1", "PI1_1", "PI1_2", "PI2_1", "PI2_1"], } ) - - self.answer = pandas.DataFrame( + answer_data = pandas.DataFrame( { "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], } ) - def test_validate_alias(self): - output = process_report.validate_pi_aliases(self.data, self.alias_dict) - self.assertTrue(self.answer.equals(output)) + validate_pi_alias_proc = test_utils.new_validate_pi_alias_processor( + data=test_data, alias_map=alias_map + ) + validate_pi_alias_proc.process() + self.assertTrue(answer_data.equals(validate_pi_alias_proc.data)) + + +class TestRemoveNonbillablesProcessor(TestCase): + def test_remove_nonbillables(self): + pis = [uuid.uuid4().hex for x in range(10)] + projects = [uuid.uuid4().hex for x in range(10)] + nonbillable_pis = pis[:3] + nonbillable_projects = projects[7:] + billable_pis = pis[3:7] + data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects}) + + remove_nonbillables_proc = test_utils.new_remove_nonbillables_processor() + data = remove_nonbillables_proc._remove_nonbillables( + data, nonbillable_pis, nonbillable_projects + ) + self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty) + self.assertTrue( + data[data["Project - Allocation"].isin(nonbillable_projects)].empty + ) + self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)])) + + +class TestValidateBillablePIProcessor(TestCase): + def test_validate_billables(self): + test_data = pandas.DataFrame( + { + "Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"], + "Project - Allocation": [ + "ProjectA", + "ProjectB", + "ProjectC", + "ProjectD", + "ProjectE", + ], + } + ) + self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])])) + validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor() + output_data = validate_billable_pi_proc._validate_pi_names(test_data) + self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])])) class TestMonthUtils(TestCase): @@ -288,503 +310,570 @@ def test_get_month_diff(self): util.get_month_diff("2024-16", "2025-03") -class TestCredit0002(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI4", - "PI5", - "PI7", - "NewPI1", - "NewPI1", - "NewPI2", - "NewPI2", +class TestNewPICreditProcessor(TestCase): + def _assert_result_invoice_and_old_pi_file( + self, + invoice_month, + test_invoice, + test_old_pi_filepath, + answer_invoice, + answer_old_pi_df, + ): + new_pi_credit_proc = test_utils.new_new_pi_credit_processor( + invoice_month=invoice_month, + data=test_invoice, + old_pi_filepath=test_old_pi_filepath, + ) + new_pi_credit_proc.process() + + answer_invoice = answer_invoice.astype(new_pi_credit_proc.data.dtypes) + answer_old_pi_df = answer_old_pi_df.astype( + new_pi_credit_proc.updated_old_pi_df.dtypes + ) + + self.assertTrue(new_pi_credit_proc.data.equals(answer_invoice)) + self.assertTrue(new_pi_credit_proc.updated_old_pi_df.equals(answer_old_pi_df)) + + def setUp(self) -> None: + self.test_old_pi_file = tempfile.NamedTemporaryFile( + delete=False, mode="w+", suffix=".csv" + ) + + def tearDown(self) -> None: + os.remove(self.test_old_pi_file.name) + + def test_no_new_pi(self): + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI" for _ in range(3)], + "Cost": [100 for _ in range(3)], + "SU Type": ["CPU" for _ in range(3)], + } + ) + + # Other fields of old PI file not accessed if PI is no longer + # eligible for new-PI credit + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-01"], + "Initial Credits": [1000], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [None for _ in range(3)], + "Credit Code": [None for _ in range(3)], + "PI Balance": [100 for _ in range(3)], + "Balance": [100 for _ in range(3)], + } + ), ], - "SU Type": [ - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", + axis=1, + ) + + answer_old_pi_df = test_old_pi_df.copy() + + self._assert_result_invoice_and_old_pi_file( + "2024-06", + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_one_new_pi(self): + """Invoice with one completely new PI""" + + # One allocation + invoice_month = "2024-06" + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI"], + "Cost": [100], + "SU Type": ["CPU"], + } + ) + + test_old_pi_df = pandas.DataFrame( + columns=[ + "PI", + "First Invoice Month", + "Initial Credits", + "1st Month Used", + "2nd Month Used", + ] + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [100], + "Credit Code": ["0002"], + "PI Balance": [0], + "Balance": [0], + } + ), ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - "ProjectG", - "ProjectH", - "ProjectI", - "ProjectJ", - "ProjectK", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [100], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Two allocations, costs partially covered + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI", "PI"], + "Cost": [500, 1000], + "SU Type": ["CPU", "CPU"], + } + ) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, 500], + "Credit Code": ["0002", "0002"], + "PI Balance": [0, 500], + "Balance": [0, 500], + } + ), ], - "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], - } - answer_df_dict = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [1000], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Two allocations, costs completely covered + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI", "PI"], + "Cost": [500, 400], + "SU Type": ["CPU", "CPU"], + } + ) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, 400], + "Credit Code": ["0002", "0002"], + "PI Balance": [0, 0], + "Balance": [0, 0], + } + ), ], - "Manager (PI)": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI4", - "PI5", - "PI7", - "NewPI1", - "NewPI1", - "NewPI2", - "NewPI2", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [900], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_one_month_pi(self): + """PI has appeared in invoices for one month""" + + # Remaining credits completely covers costs + invoice_month = "2024-07" + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI"], + "Cost": [200], + "SU Type": ["CPU"], + } + ) + + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [0], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [200], + "Credit Code": ["0002"], + "PI Balance": [0], + "Balance": [0], + } + ), ], - "SU Type": [ - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [200], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Remaining credits partially covers costs + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI"], + "Cost": [600], + "SU Type": ["CPU"], + } + ) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500], + "Credit Code": ["0002"], + "PI Balance": [100], + "Balance": [100], + } + ), ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - "ProjectG", - "ProjectH", - "ProjectI", - "ProjectJ", - "ProjectK", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [500], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_two_new_pi(self): + """Two PIs of different age""" + + # Costs partially and completely covered + invoice_month = "2024-07" + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI1", "PI1", "PI2"], + "Cost": [800, 500, 500], + "SU Type": ["CPU", "CPU", "CPU"], + } + ) + + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI1"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [0], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, None, 500], + "Credit Code": ["0002", None, "0002"], + "PI Balance": [300, 500, 0], + "Balance": [300, 500, 0], + } + ), ], - "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], - "Credit": [None, None, None, 100, None, 400, 200, 250, 250, 500, None], - "Credit Code": [ - None, - None, - None, - "0002", - None, - "0002", - "0002", - "0002", - "0002", - "0002", - None, + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI1", "PI2"], + "First Invoice Month": ["2024-06", "2024-07"], + "Initial Credits": [1000, 1000], + "1st Month Used": [500, 500], + "2nd Month Used": [500, 0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_old_pi_file_overwritten(self): + """If PI already has entry in Old PI file, + their initial credits and PI entry could be overwritten""" + + invoice_month = "2024-06" + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI", "PI"], + "Cost": [500, 500], + "SU Type": ["CPU", "CPU"], + } + ) + + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [500], + "1st Month Used": [200], + "2nd Month Used": [0], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, None], + "Credit Code": ["0002", None], + "PI Balance": [0, 500], + "Balance": [0, 500], + } + ), ], - "Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700], - } - self.dataframe = pandas.DataFrame(data) - self.dataframe["Credit"] = None - self.dataframe["Credit Code"] = None - self.dataframe["Balance"] = self.dataframe["Cost"] - self.answer_dataframe = pandas.DataFrame(answer_df_dict) - old_pi = [ - "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", - "PI1,2023-09,500,200,0", - "PI2,2024-01,2000,0,0", - "PI3,2024-01,2000,1000,500", - "PI4,2024-02,1000,900,0", - "PI5,2024-02,1000,300,500", - "PI6,2024-02,1000,700,0", - "PI7,2024-03,500,300,0", # This as current month we're testing, new PIs should get $500 - "PI8,2024-04,1000,500,0", - ] - self.old_pi_df_answer = ( - pandas.DataFrame( - { - "PI": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI5", - "PI6", - "PI7", - "NewPI1", - "NewPI2", - "PI8", - ], - "First Invoice Month": [ - "2023-09", - "2024-01", - "2024-01", - "2024-02", - "2024-02", - "2024-02", - "2024-03", - "2024-03", - "2024-03", - "2024-04", - ], - "Initial Credits": [ - 500, - 2000, - 2000, - 1000, - 1000, - 1000, - 500, - 500, - 500, - 1000, - ], - "1st Month Used": [200, 0, 1000, 900, 300, 700, 200, 500, 500, 500], - "2nd Month Used": [0, 0, 500, 100, 400, 0, 0, 0, 0, 0], - } - ) - .astype( - { - "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ) - .sort_values(by="PI", ignore_index=True) + axis=1, ) - # Contains cases with new, one month old, two month old, older PI, and future PI that hasn't appeared in invoices yet - # For each invoice month, test case where pi has 1 project, >1, and has spare credit - old_pi_file = tempfile.NamedTemporaryFile( - delete=False, mode="w+", suffix=".csv" + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [500], + "1st Month Used": [500], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, ) - for pi in old_pi: - old_pi_file.write(pi + "\n") - self.old_pi_file = old_pi_file.name - self.dataframe_no_gpu = pandas.DataFrame( + def test_excluded_su_types(self): + """Certain SU types can be excluded from the credit""" + + invoice_month = "2024-06" + test_invoice = pandas.DataFrame( { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], + "Manager (PI)": ["PI", "PI", "PI", "PI"], + "Cost": [600, 600, 600, 600], "SU Type": [ - "GPU", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", + "CPU", "OpenShift GPUA100SXM4", + "GPU", "OpenStack GPUA100SXM4", ], - "Cost": [500, 100, 100, 500, 500], } ) - self.dataframe_no_gpu["Credit"] = None - self.dataframe_no_gpu["Credit Code"] = None - self.dataframe_no_gpu["Balance"] = self.dataframe_no_gpu["Cost"] - old_pi_no_gpu = [ - "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", - "OldPI,2024-03,500,200,0", - ] - old_pi_no_gpu_file = tempfile.NamedTemporaryFile( - delete=False, mode="w", suffix=".csv" + + test_old_pi_df = pandas.DataFrame( + columns=[ + "PI", + "First Invoice Month", + "Initial Credits", + "1st Month Used", + "2nd Month Used", + ] ) - for pi in old_pi_no_gpu: - old_pi_no_gpu_file.write(pi + "\n") - self.old_pi_no_gpu_file = old_pi_no_gpu_file.name - self.no_gpu_df_answer = pandas.DataFrame( + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [600, None, 400, None], + "Credit Code": ["0002", None, "0002", None], + "PI Balance": [0, 600, 200, 600], + "Balance": [0, 600, 200, 600], + } + ), + ], + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], - "SU Type": [ - "GPU", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - ], - "Cost": [500, 100, 100, 500, 500], - "Credit": [500, None, None, None, None], - "Credit Code": ["0002", None, None, None, None], - "Balance": [0.0, 100.0, 100.0, 500.0, 500.0], + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [1000], + "2nd Month Used": [0], } ) - def tearDown(self): - os.remove(self.old_pi_file) - os.remove(self.old_pi_no_gpu_file) - - def test_apply_credit_0002(self): - test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") - old_pi_df = test_invoice._load_old_pis(self.old_pi_file) - dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi( - self.dataframe, old_pi_df - ) - dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"}) - updated_old_pi_df = updated_old_pi_df.astype( - dtype={ - "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ).sort_values(by=["PI"], ignore_index=True) - self.assertTrue(self.answer_dataframe.equals(dataframe)) - self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df)) - - def test_no_gpu(self): - test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") - old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file) - dataframe, _ = test_invoice._apply_credits_new_pi( - self.dataframe_no_gpu, old_pi_df - ) - dataframe = dataframe.astype({"Credit": "float64", "Balance": "float64"}) - self.assertTrue(self.no_gpu_df_answer.equals(dataframe)) + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) def test_apply_credit_error(self): + """Test faulty data""" old_pi_df = pandas.DataFrame( {"PI": ["PI1"], "First Invoice Month": ["2024-04"]} ) invoice_month = "2024-03" - test_invoice = test_utils.new_billable_invoice() + test_invoice = test_utils.new_new_pi_credit_processor() with self.assertRaises(SystemExit): test_invoice._get_pi_age(old_pi_df, "PI1", invoice_month) -class TestBUSubsidy(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI2", "PI2", "PI3", "PI3", "PI4", "PI4"], - "Institution": [ - "Boston University", - "Boston University", - "Boston University", - "Boston University", - "Harvard University", # Test case for non-BU PIs - "Harvard University", - "Boston University", - "Boston University", - ], - "Project - Allocation": [ - "ProjectA-e6413", - "ProjectA-t575e6", # Test case for project with >1 allocation - "ProjectB-fddgfygg", - "ProjectB-5t143t", - "ProjectC-t14334", - "ProjectD", # Test case for correctly extracting project name - "ProjectE-test-r25135", # Test case for BU PI with >1 project - "ProjectF", - ], - "Cost": [1050, 500, 100, 925, 10000, 1000, 1050, 100], - "Credit": [ - 1000, - 0, - 100, - 900, - 0, - 0, - 1000, - 0, - ], # Test cases where PI does/dones't have credits alreadys - "Balance": [ - 50, - 500, - 0, - 25, - 10000, - 1000, - 50, - 100, - ], # Test case where subsidy does/doesn't cover fully balance - } - self.dataframe = pandas.DataFrame(data) - self.subsidy = 100 - - def test_apply_BU_subsidy(self): - test_invoice = test_utils.new_bu_internal_invoice( - data=self.dataframe, subsidy_amount=self.subsidy - ) - test_invoice.process() - output_df = test_invoice.data.reset_index() - - self.assertTrue( - set( - [ - process_report.INVOICE_DATE_FIELD, - "Project", - process_report.PI_FIELD, - process_report.COST_FIELD, - process_report.CREDIT_FIELD, - process_report.SUBSIDY_FIELD, - process_report.BALANCE_FIELD, - ] - ).issubset(output_df) +class TestBUSubsidyProcessor(TestCase): + def test_apply_subsidy(self): + subsidy_amount = 500 + invoice_month = "2024-06" + test_invoice = pandas.DataFrame( + { + "Manager (PI)": ["PI1", "PI1", "PI2", "PI2", "PI3", "PI3"], + "Institution": [ + "Boston University", + "Boston University", + "Harvard University", # Test case for non-BU PIs + "Harvard University", + "Boston University", + "Boston University", + ], + "Project - Allocation": [ + "P1-A1", + "P2-A1", + "P3-A1", + "P3-A2", + "P4", # 2 Test cases for correctly extracting project name + "P4-P4-A1", + ], + "PI Balance": [400, 600, 1000, 2000, 500, 500], + "Balance": [400, 600, 1000, 2000, 500, 500], + } ) - self.assertTrue( - set(["PI1", "PI2", "PI4"]).issubset(output_df["Manager (PI)"].unique()) - ) - self.assertFalse("PI3" in output_df["Project"].unique()) + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = ["P1", "P2", "P3", "P3", "P4", "P4-P4"] + answer_invoice["BU Balance"] = [400, 100, 0, 0, 500, 0] + answer_invoice["PI Balance"] = [0, 500, 1000, 2000, 0, 500] - self.assertTrue( - set(["ProjectA", "ProjectB", "ProjectE-test", "ProjectF"]).issubset( - output_df["Project"].unique() - ) + bu_subsidy_proc = test_utils.new_bu_subsidy_processor( + invoice_month=invoice_month, + data=test_invoice, + subsidy_amount=subsidy_amount, ) - self.assertFalse( - set(["ProjectC-t14334", "ProjectC", "ProjectD"]).intersection( - output_df["Project"].unique() - ) - ) - - self.assertEqual(4, len(output_df.index)) - self.assertEqual(1550, output_df.loc[0, "Cost"]) - self.assertEqual(1025, output_df.loc[1, "Cost"]) - self.assertEqual(1050, output_df.loc[2, "Cost"]) - self.assertEqual(100, output_df.loc[3, "Cost"]) + bu_subsidy_proc.process() - self.assertEqual(100, output_df.loc[0, "Subsidy"]) - self.assertEqual(25, output_df.loc[1, "Subsidy"]) - self.assertEqual(50, output_df.loc[2, "Subsidy"]) - self.assertEqual(50, output_df.loc[3, "Subsidy"]) + answer_invoice = answer_invoice.astype(bu_subsidy_proc.data.dtypes) - self.assertEqual(450, output_df.loc[0, "Balance"]) - self.assertEqual(0, output_df.loc[1, "Balance"]) - self.assertEqual(0, output_df.loc[2, "Balance"]) - self.assertEqual(50, output_df.loc[3, "Balance"]) + self.assertTrue(bu_subsidy_proc.data.equals(answer_invoice)) -class TestValidateBillables(TestCase): - def setUp(self): - data = { - "Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - ], - } - self.dataframe = pandas.DataFrame(data) - - def test_validate_billables(self): - self.assertEqual( - 1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])]) +class TestLenovoProcessor(TestCase): + def test_process_lenovo(self): + test_invoice = pandas.DataFrame( + { + "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], + } ) - test_invoice = test_utils.new_billable_invoice() - validated_df = test_invoice._validate_pi_names(self.dataframe) - self.assertEqual( - 0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])]) + answer_invoice = test_invoice.copy() + answer_invoice["SU Charge"] = 1 + answer_invoice["Charge"] = ( + answer_invoice["SU Hours (GBhr or SUhr)"] * answer_invoice["SU Charge"] ) - -class TestExportLenovo(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - ], - "Institution": ["A", "B", "C", "D", "E", "F"], - "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], - "SU Type": [ - "OpenShift GPUA100SXM4", - "OpenShift GPUA100", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenStack CPU", - "OpenStack GPUK80", - ], - } - self.lenovo_invoice = lenovo_invoice.LenovoInvoice( - "Lenovo", "2023-01", pandas.DataFrame(data) - ) - self.lenovo_invoice.process() - - def test_process_lenovo(self): - output_df = self.lenovo_invoice.data - self.assertTrue( - set( - [ - process_report.INVOICE_DATE_FIELD, - process_report.PROJECT_FIELD, - process_report.INSTITUTION_FIELD, - process_report.SU_TYPE_FIELD, - "SU Hours", - "SU Charge", - "Charge", - ] - ).issubset(output_df) - ) - - for i, row in output_df.iterrows(): - self.assertIn( - row[process_report.SU_TYPE_FIELD], - ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"], - ) - self.assertEqual(row["Charge"], row["SU Charge"] * row["SU Hours"]) + lenovo_proc = test_utils.new_lenovo_processor(data=test_invoice) + lenovo_proc.process() + self.assertTrue(lenovo_proc.data.equals(answer_invoice)) class TestUploadToS3(TestCase): @@ -818,3 +907,15 @@ def test_remove_prefix(self, mock_get_time, mock_get_bucket): process_report.upload_to_s3(filenames, invoice_month) for i, call_args in enumerate(mock_bucket.upload_file.call_args_list): self.assertTrue(answers[i] in call_args) + + +class TestBaseInvoice(TestCase): + def test_filter_exported_columns(self): + test_invoice = pandas.DataFrame(columns=["C1", "C2", "C3", "C4", "C5"]) + answer_invoice = pandas.DataFrame(columns=["C1", "C3R", "C5R"]) + inv = test_utils.new_base_invoice(data=test_invoice) + inv.export_columns_list = ["C1", "C3", "C5"] + inv.exported_columns_map = {"C3": "C3R", "C5": "C5R"} + inv._filter_columns() + + self.assertTrue(inv.data.equals(answer_invoice)) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 0d3d3d6..8d0a935 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -1,29 +1,82 @@ import pandas -from process_report.invoices import billable_invoice, bu_internal_invoice +from process_report.processors import ( + add_institution_processor, + validate_pi_alias_processor, + lenovo_processor, + remove_nonbillables_processor, + validate_billable_pi_processor, + new_pi_credit_processor, + bu_subsidy_processor, +) +from process_report.invoices import invoice -def new_billable_invoice( +def new_add_institution_processor( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), +): + return add_institution_processor.AddInstitutionProcessor(name, invoice_month, data) + + +def new_validate_pi_alias_processor( + name="", invoice_month="0000-00", data=pandas.DataFrame(), alias_map={} +): + return validate_pi_alias_processor.ValidatePIAliasProcessor( + name, invoice_month, data, alias_map + ) + + +def new_lenovo_processor(name="", invoice_month="0000-00", data=pandas.DataFrame()): + return lenovo_processor.LenovoProcessor(name, invoice_month, data) + + +def new_remove_nonbillables_processor( name="", invoice_month="0000-00", data=pandas.DataFrame(), nonbillable_pis=[], nonbillable_projects=[], +): + return remove_nonbillables_processor.RemoveNonbillables( + name, invoice_month, data, nonbillable_pis, nonbillable_projects + ) + + +def new_validate_billable_pi_processor( + name="", invoice_month="0000-00", data=pandas.DataFrame() +): + return validate_billable_pi_processor.ValidateBillablePIsProcessor( + name, invoice_month, data + ) + + +def new_new_pi_credit_processor( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), old_pi_filepath="", ): - return billable_invoice.BillableInvoice( - name, - invoice_month, - data, - nonbillable_pis, - nonbillable_projects, - old_pi_filepath, + return new_pi_credit_processor.NewPICreditProcessor( + name, invoice_month, data, old_pi_filepath ) -def new_bu_internal_invoice( - name="", invoice_month="0000-00", data=pandas.DataFrame(), subsidy_amount=0 +def new_bu_subsidy_processor( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), + subsidy_amount="", ): - return bu_internal_invoice.BUInternalInvoice( + return bu_subsidy_processor.BUSubsidyProcessor( name, invoice_month, data, subsidy_amount ) + + +def new_base_invoice( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), +): + return invoice.Invoice(name, invoice_month, data) diff --git a/process_report/util.py b/process_report/util.py index 45b9ed4..5c4f190 100644 --- a/process_report/util.py +++ b/process_report/util.py @@ -1,6 +1,5 @@ import os import datetime -import json import logging import functools @@ -27,23 +26,6 @@ def get_invoice_bucket(): return s3_resource.Bucket(os.environ.get("S3_BUCKET_NAME", "nerc-invoicing")) -def get_institution_from_pi(institute_map, pi_uname): - institution_key = pi_uname.split("@")[-1] - institution_name = institute_map.get(institution_key, "") - - if institution_name == "": - logger.warn(f"PI name {pi_uname} does not match any institution!") - - return institution_name - - -def load_institute_map() -> dict: - with open("process_report/institute_map.json", "r") as f: - institute_map = json.load(f) - - return institute_map - - def get_iso8601_time(): return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ")