Skip to content

Commit

Permalink
Merge pull request #2 from naved001/merge-csv
Browse files Browse the repository at this point in the history
The script now accepts multiple CSV files which it then merges into one
  • Loading branch information
naved001 authored Sep 26, 2023
2 parents 4a2bbbe + 1684c92 commit 25a1fb1
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ [email protected],project foo,2023-09,2024-08,Internal
[email protected],project bar,2023-09,2024-08,Internal
```

The script will gather the invoice month from the csv report and if it falls under the start and end date then those projects will be excluded.
The script will gather the invoice month from the csv reports and if it falls under the start and end date then those projects will be excluded.
In this example, `project foo` will not be billed for September 2023 and August 2024 and all the months in between for total of 1 year.

The CSV report must have the headers `Manager (PI)'` and `Project - Allocation'`.

```
usage: process_report.py [-h] --report-file REPORT_FILE --pi-file PI_FILE --projects-file PROJECTS_FILE --timed-projects-file TIMED_PROJECTS_FILE```
usage: process_report.py [-h] --pi-file PI_FILE --projects-file PROJECTS_FILE --timed-projects-file TIMED_PROJECTS_FILE [--output-file OUTPUT_FILE]
csv_files [csv_files ...]
process_report.py: error: the following arguments are required: csv_files, --pi-file, --projects-file, --timed-projects-file
E.g. python process_report.py test1.csv test2.csv --pi-file pi.txt --projects-file projects.txt --timed-projects-file timed_projects.txt --output-file myfile.csv
```

40 changes: 29 additions & 11 deletions process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ def main():
"""Remove non-billable PIs and projects"""

parser = argparse.ArgumentParser()

parser.add_argument(
"--report-file",
required=True,
help="The CSV file with everything in it",
"csv_files",
nargs="+",
help="One or more CSV files that need to be processed",
)
parser.add_argument(
"--pi-file",
Expand All @@ -27,7 +28,14 @@ def main():
required=True,
help="File containing list of projects that are non-billable within a specified duration",
)
parser.add_argument(
"--output-file",
required=False,
default="filtered_output.csv",
help="Name of output file",
)
args = parser.parse_args()
merged_dataframe = merge_csv(args.csv_files)

pi = []
projects = []
Expand All @@ -36,7 +44,7 @@ def main():
with open(args.projects_file) as file:
projects = [line.rstrip() for line in file]

invoice_date = get_invoice_date(args.report_file)
invoice_date = get_invoice_date(merged_dataframe)
print("Invoice date: " + str(invoice_date))

timed_projects_list = timed_projects(args.timed_projects_file, invoice_date)
Expand All @@ -45,16 +53,27 @@ def main():

projects = list(set(projects + timed_projects_list))

remove_non_billables(args.report_file, pi, projects)
remove_non_billables(merged_dataframe, pi, projects, args.output_file)


def merge_csv(files):
"""Merge multiple CSV files and return a single pandas dataframe"""
dataframes = []
for file in files:
dataframe = pandas.read_csv(file)
dataframes.append(dataframe)

merged_dataframe = pandas.concat(dataframes, ignore_index=True)
merged_dataframe.reset_index(drop=True, inplace=True)
return merged_dataframe


def get_invoice_date(report_file):
def get_invoice_date(dataframe):
"""Returns the invoice date as a pandas timestamp object
Note that it only checks the first entry because it should
be the same for every row.
"""
dataframe = pandas.read_csv(report_file)
invoice_date_str = dataframe['Invoice Month'][0]
invoice_date = pandas.to_datetime(invoice_date_str, format='%Y-%m')
return invoice_date
Expand All @@ -72,11 +91,10 @@ def timed_projects(timed_projects_file, invoice_date):
return dataframe[mask]['Project'].to_list()


def remove_non_billables(report_file, pi, projects):
"""Removes projects and PIs that should not be billed from the CSV report_file"""
dataframe = pandas.read_csv(report_file)
def remove_non_billables(dataframe, pi, projects, output_file):
"""Removes projects and PIs that should not be billed from the dataframe"""
filtered_dataframe = dataframe[~dataframe['Manager (PI)'].isin(pi) & ~dataframe['Project - Allocation'].isin(projects)]
filtered_dataframe.to_csv('filtered_' + report_file, index=False)
filtered_dataframe.to_csv(output_file, index=False)

if __name__ == "__main__":
main()

0 comments on commit 25a1fb1

Please sign in to comment.