From a49625bd9036cc54e24e639cc8234e7f6af5cd67 Mon Sep 17 00:00:00 2001 From: Chidi-Bernard Emmanuel <89081878+bernard-rr@users.noreply.github.com> Date: Fri, 13 Jan 2023 10:36:19 +0000 Subject: [PATCH] Pending changes exported from your codespace --- Backend/controllers/README.md | 17 +++++++++++++++++ Backend/routes/download.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 Backend/routes/download.py diff --git a/Backend/controllers/README.md b/Backend/controllers/README.md index e69de29b..74e21695 100644 --- a/Backend/controllers/README.md +++ b/Backend/controllers/README.md @@ -0,0 +1,17 @@ +convert_file.py + + The convert_file.py file is a module that is responsible for converting various types of files (csv, xls, pdf) to JSON format. It utilizes the pandas library to read in the files and convert them to a dataframe, which is then converted to a JSON object. The module also includes helper functions to check the validity of headers in csv files and correct any incorrectly parsed date fields in JSON responses. Additionally, it uses the extract_text.py and openai_request.py modules to convert pdf files to text and make a call to the OpenAI API respectively. + + The convert_file() function is the main function of the module and takes in a single argument, filename, which is the directory containing the file to be converted. The function uses the file extension to determine the file type and calls the appropriate conversion method. For csv files, it reads in the file using pd.read_csv(), replaces any blank spaces with NaN and fill NaN with "". It then calls the check_header() function to check the validity of the headers and, if necessary, correct them. It then converts the dataframe to a JSON object. For xls files, it reads in the file using pd.read_excel(), replaces any blank spaces with NaN and fill NaN with "". It then converts the dataframe to a JSON object. + + The df_to_json() function takes a single argument, df, which is a dataframe and converts it to a JSON object using the to_json() method from pandas. + + The check_header() function takes in two arguments, df and header. It first sends a prompt to the OpenAI API and receives a response containing a dictionary. It then iterates over the dictionary checking for the key "False", if found, it updates the dataframe with the best headers it received in the response. + + The correct_date() function is currently not implemented but it takes a single argument response which is a JSON object and is responsible for converting the date field in the JSON to proper dates and null to a string. + + The convert_file() function supports csv and xls files only, but it has commented out code for txt files, which can be implemented if needed. + +................................................................................................................................................................................. + +extract_text.py \ No newline at end of file diff --git a/Backend/routes/download.py b/Backend/routes/download.py new file mode 100644 index 00000000..84ddb0ba --- /dev/null +++ b/Backend/routes/download.py @@ -0,0 +1,16 @@ +from fastapi import FastAPI, File, HTTPException +import pandas as pd + +app = FastAPI() + +@app.post("/download/csv") +async def download_csv(matched_json): + try: + # convert matched_json to pandas dataframe + df = pd.DataFrame(matched_json) + # create a CSV file + csv_file = df.to_csv(index=False) + # return the file as a response + return {"file": csv_file, "Content-Disposition": "attachment; filename=matched_transactions.csv"} + except: + raise HTTPException(status_code=400, detail="An error occurred while generating the CSV file.") \ No newline at end of file