Skip to content

Commit

Permalink
Merge branch 'main' into FS-93/UI-File-Upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Gagan Singh committed Nov 26, 2024
2 parents 2641012 + e928502 commit 301b4ab
Show file tree
Hide file tree
Showing 48 changed files with 1,646 additions and 276 deletions.
15 changes: 6 additions & 9 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,32 +35,29 @@ BACKEND_URL=http://localhost:8250
# websockets url to conect to backend websocket endpoint
WS_URL=ws://localhost:8250/ws

# Azure
AZURE_STORAGE_CONNECTION_STRING="my-connection-string"
AZURE_STORAGE_CONTAINER_NAME=my-container-name
AZURE_INITIAL_DATA_FILENAME=test-data.json

# llm
ANSWER_AGENT_LLM="openai"
INTENT_AGENT_llm="openai"
VALIDATOR_AGENT_LLM="mistral"
INTENT_AGENT_LLM="openai"
VALIDATOR_AGENT_LLM="openai"
DATASTORE_AGENT_LLM="openai"
MATHS_AGENT_LLM="openai"
WEB_AGENT_LLM="openai"
CHART_GENERATOR_LLM="openai"
ROUTER_LLM="openai"
FILE_AGENT_LLM="openai"
SUGGESTIONS_LLM="openai"
DYNAMIC_KNOWLEDGE_GRAPH_LLM="openai"


# model
ANSWER_AGENT_MODEL="gpt-4o-mini"
INTENT_AGENT_MODEL="gpt-4o-mini"
VALIDATOR_AGENT_MODEL="mistral-large-latest"
VALIDATOR_AGENT_MODEL="gpt-4o-mini"
DATASTORE_AGENT_MODEL="gpt-4o-mini"
MATHS_AGENT_MODEL="gpt-4o-mini"
WEB_AGENT_MODEL="gpt-4o-mini"
CHART_GENERATOR_MODEL="gpt-4o-mini"
ROUTER_MODEL="gpt-4o-mini"
FILE_AGENT_MODEL="gpt-4o-mini"
SUGGESTIONS_MODEL="gpt-4o-mini"

DYNAMIC_KNOWLEDGE_GRAPH_MODEL="gpt-4o"
3 changes: 3 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ WORKDIR /backend
# Copy just the requirements into the working directory so it gets cached by itself
COPY ./requirements.txt ./requirements.txt

# Copy the datasets directory, this should match what local run of application will need
COPY ./datasets/ ./datasets/

# Install the dependencies from the requirements file
RUN pip install --no-cache-dir --upgrade -r /backend/requirements.txt

Expand Down
14 changes: 14 additions & 0 deletions backend/datasets/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Datasets

This is a temporary folder for preloading the esg dataset into InferESG.

This will be replaced by file upload from the UI.

## Bloomberg.csv
This was downloaded from https://data.mendeley.com/datasets/tgmppk9kkt/1

## Bloomberg_2.csv
This was generated by chat gpt from the original bloomberg.csv

## esg_poc.csv
This was crafted by hand by the original InferESG PoC team.
721 changes: 721 additions & 0 deletions backend/datasets/bloomberg.csv

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions backend/datasets/bloomberg_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Identifier (RIC),Company Name,Date,ESG_score,Social_score,Gov_score,Env_score,BVPS,Market_cap,Shares,Industry,Net_income,RETURN_ON_ASSET,QUICK_RATIO,ASSET_GROWTH,FNCL_LVRG,PE_RATIO,Scope_1,Scope_2,CO2_emissions,Energy_use,Water_use,Water_recycle,Toxic_chem_red,Injury_rate,Women_Employees,Human_Rights,Strikes,Turnover_empl,Board_Size,Shareholder_Rights,Board_gen_div,Bribery,Recycling_Initiatives,Total_assets
AAPL,Apple Inc,2021,85.3,88.5,82.1,85.3,5.04,2410530000000,16442000000,Technology,94680000000,25.31,1.1,13.77,1.25,32.45,1234567,89101112,123456789,1234567890,12345678900,123456789000,12.345,12.345,1,1,1,1,1,1,1,1,1,1,1234567890000
GOOG,Alphabet Inc,2021,87.4,90.1,84.7,87.4,5.21,1797000000000,6949000000,Technology,40780000000,14.04,1.2,14.56,1.3,29.78,2345678,91011112,23456789,234567890,2345678900,23456789000,23.456,23.456,1,1,1,1,1,1,1,1,1,1,2345678900000
AMZN,Amazon.com Inc,2021,78.9,81.2,76.6,78.9,4.87,1676000000000,5031000000,Retail,21330000000,6.2,1.0,15.34,1.4,35.21,3456789,10111112,34567890,345678900,3456789000,34567890000,34.567,34.567,1,1,1,1,1,1,1,1,1,1,3456789000000
MSFT,Microsoft Corporation,2021,86.7,89.3,84.1,86.7,4.98,2032000000000,7534000000,Technology,61270000000,29.12,1.1,14.89,1.2,31.54,4567890,11111112,45678901,456789010,4567890100,45678901000,45.678,45.678,1,1,1,1,1,1,1,1,1,1,4567890100000
FB,Facebook Inc,2021,75.6,78.2,73.0,75.6,3.74,927000000000,2834000000,Technology,29150000000,31.52,1.0,16.45,1.5,27.89,5678901,11111112,56789012,567890120,5678901200,56789012000,56.789,56.789,1,1,1,1,1,1,1,1,1,1,5678901200000
TSLA,Tesla Inc,2021,72.8,75.4,69.2,72.8,4.13,780000000000,9631000000,Automotive,23210000000,11.76,1.2,17.67,1.6,34.56,6789012,11111112,67890123,678901230,6789012300,67890123000,67.890,67.890,1,1,1,1,1,1,1,1,1,1,6789012300000
NFLX,Netflix Inc,2021,74.5,77.1,71.9,74.5,3.92,240000000000,4571000000,Media,28610000000,12.67,1.1,18.78,1.7,31.23,7890123,11111112,78901234,789012340,7890123400,78901234000,78.901,78.901,1,1,1,1,1,1,1,1,1,1,7890123400000
NVDA,NVIDIA Corporation,2021,82.3,84.9,79.7,82.3,4.35,309000000000,6142000000,Technology,43320000000,16.89,1.0,19.89,1.8,37.45,8901234,11111112,89012345,890123450,8901234500,89012345000,89.012,89.012,1,1,1,1,1,1,1,1,1,1,8901234500000
AAPL,Apple Inc,2020,84.2,87.6,80.8,84.2,4.91,2258000000000,16123000000,Technology,89860000000,24.78,1.1,14.12,1.3,31.89,9012345,11111112,90123456,901234560,9012345600,90123456000,90.123,90.123,1,1,1,1,1,1,1,1,1,1,9012345600000
GOOG,Alphabet Inc,2020,86.5,89.2,83.8,86.5,5.12,1676000000000,6849000000,Technology,38270000000,13.56,1.2,15.01,1.4,28.97,1234567,11111112,12345678,123456780,1234567800,12345678000,12.345,12.345,1,1,1,1,1,1,1,1,1,1,1234567800000
AMZN,Amazon.com Inc,2020,77.8,80.1,75.5,77.8,4.76,1567000000000,4931000000,Retail,19780000000,5.98,1.0,16.02,1.5,34.12,2345678,11111112,23456789,234567890,2345678900,23456789000,23.456,23.456,1,1,1,1,1,1,1,1,1,1,2345678900000
MSFT,Microsoft Corporation,2020,85.6,88.2,83.0,85.6,4.87,1923000000000,7434000000,Technology,59120000000,28.01,1.1,15.43,1.3,30.78,3456789,11111112,34567890,345678900,3456789000,34567890000,34.567,34.567,1,1,1,1,1,1,1,1,1,1,3456789000000
FB,Facebook Inc,2020,74.5,77.1,71.9,74.5,3.63,876000000000,2734000000,Technology,27890000000,30.45,1.0,17.12,1.6,26.98,4567890,11111112,45678901,456789010,4567890100,45678901000,45.678,45.678,1,1,1,1,1,1,1,1,1,1,4567890100000
TSLA,Tesla Inc,2020,71.7,74.3,69.1,71.7,3.98,720000000000,9531000000,Automotive,21980000000,10.98,1.2,18.23,1.7,33.78,5678901,11111112,56789012,567890120,5678901200,56789012000,56.789,56.789,1,1,1,1,1,1,1,1,1,1,5678901200000
NFLX,Netflix Inc,2020,73.4,76.0,70.8,73.4,3.81,228000000000,4471000000,Media,27450000000,11.98,1.1,19.34,1.8,30.56,6789012,11111112,67890123,678901230,6789012300,67890123000,67.890,67.890,1,1,1,1,1,1,1,1,1,1,6789012300000
NVDA,NVIDIA Corporation,2020,81.2,83.8,78.6,81.2,4.24,298000000000,6042000000,Technology,42130000000,15.98,1.0,20.45,1.9,36.78,7890123,11111112,78901234,789012340,7890123400,78901234000,78.901,78.901,1,1,1,1,1,1,1,1,1,1,7890123400000
AAPL,Apple Inc,2019,83.1,86.5,79.7,83.1,4.80,2145000000000,15923000000,Technology,87650000000,23.89,1.1,14.56,1.4,30.98,8901234,11111112,89012345,890123450,8901234500,89012345000,89.012,89.012,1,1,1,1,1,1,1,1,1,1,8901234500000
GOOG,Alphabet Inc,2019,85.4,88.1,82.7,85.4,5.01,1597000000000,6749000000,Technology,37120000000,12.98,1.2,15.56,1.5,27.89,1234567,11111112,12345678,123456780,1234567800,12345678000,12.345,12.345,1,1,1,1,1,1,1,1,1,1,1234567800000
AMZN,Amazon.com Inc,2019,76.7,79.0,74.4,76.7,4.65,1489000000000,4831000000,Retail,18970000000,5.87,1.0,16.56,1.6,33.45,2345678,11111112,23456789,234567890,2345678900,23456789000,23.456,23.456,1,1,1,1,1,1,1,1,1,1,2345678900000
MSFT,Microsoft Corporation,2019,84.5,87.1,81.9,84.5,4.76,1834000000000,7334000000,Technology,57890000000,27.01,1.1,15.98,1.4,30.12,3456789,11111112,34567890,345678900,3456789000,34567890000,34.567,34.567,1,1,1,1,1,1,1,1,1,1,3456789000000
FB,Facebook Inc,2019,73.3,75.9,70.7,73.3,3.52,845000000000,2634000000,Technology,26780000000,29.56,1.0,17.67,1.7,26.12,4567890,11111112,45678901,456789010,4567890100,45678901000,45.678,45.678,1,1,1,1,1,1,1,1,1,1,4567890100000
TSLA,Tesla Inc,2019,70.6,73.2,68.0,70.6,3.87,690000000000,9431000000,Automotive,20890000000,10.56,1.2,18.89,1.8,33.12,5678901,11111112,56789012,567890120,5678901200,56789012000,56.789,56.789,1,1,1,1,1,1,1,1,1,1,5678901200000
NFLX,Netflix Inc,2019,72.3,74.9,69.7,72.3,3.70,217000000000,4371000000,Media,26340000000,11.56,1.1,19.98,1.9,29.89,6789012,11111112,67890123,678901230,6789012300,67890123000,67.890,67.890,1,1,1,1,1,1,1,1,1,1,6789012300000
NVDA,NVIDIA Corporation,2019,80.1,82.7,77.5,80.1,4.13,287000000000,5942000000,Technology,41020000000,15.45,1.0,20.98,2.0,36.12,7890123,11111112,78901234,789012340,7890123400,78901234000,78.901,78.901,1,1,1,1,1,1,1,1,1,1,7890123400000
File renamed without changes.
11 changes: 9 additions & 2 deletions backend/promptfoo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,15 @@ Promptfoo must be run in a python virtual environment as python is used to load
To set up a virtual environment, see [Running Locally](../README.md)

## Run Promptfoo
Promptfoo configuration (e.g. LLM model) can be set in `promptfooconfig.yaml`
Promptfoo has no way of fetching the OPENAI_API_KEY from our env file.
You will need to run the follow using the OPENAI_KEY from the `.env` file:
`export OPENAI_API_KEY=<OPENAI_KEY>`

Promptfoo has no way to run all test suites, you must manually list each file you want to run.

* Use `promptfoo eval` to run all promptfoo tests.
* Use `promptfoo eval -c generate_message_suggestions_config.yaml` to run a specific test suite.
* Use `promptfoo view` to view the results in browser.

### Debugging tests

Tests can be debugged using `print("...")` messages and adding the `--verbose` argument when calling promptfoo to run tests.
26 changes: 26 additions & 0 deletions backend/promptfoo/dynamic_knowledge_graph_cypher_config.yaml

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions backend/promptfoo/dynamic_knowledge_graph_model_config.yaml

Large diffs are not rendered by default.

24 changes: 15 additions & 9 deletions backend/promptfoo/generate_message_suggestions_config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
description: "Generate Message Suggestions"

providers:
- id: openai:chat # openai:chat - defaults to gpt-4o-mini
- id: openai:gpt-4o-mini
config:
temperature: 0

prompts: file://prompt_foo_runner.py:generate_message_suggestions
prompts: file://promptfoo_test_runner.py:create_prompt

tests:
- description: "test the output has the correct format and content when there is no chat history "
- description: "test the output has the correct format and content when there is no chat history"
vars:
chatHistory: []
user_prompt: "Give me 5 suggestions."
system_prompt_template: "generate-message-suggestions"
system_prompt_args:
chat_history: []
assert:
- type: javascript
value: JSON.parse(output).suggestions.length === 5
Expand All @@ -19,11 +22,14 @@ tests:

- description: "test the output has content containing coca-cola when the chat history contains a previous question about coca-cola"
vars:
chatHistory:
[
"User: Can you find recent news articles discussing the ESG initiatives of Coca-Cola?",
"System: In 2023, Coca-Cola HBC has strengthened its commitment to Environmental, Social, and Governance (ESG) initiatives by embedding sustainability into its operations. The company aims for a net zero carbon footprint and net positive biodiversity by 2040, and it has been recognized as the world's most sustainable beverage company by the Dow Jones Sustainability Indices for the seventh consecutive year. Key efforts include collaborating with suppliers to improve sustainability practices, reducing carbon emissions, and promoting responsible sourcing. Additionally, Coca-Cola HBC has expanded its sustainability strategy to Egypt, reflecting its global approach to these initiatives.",
]
user_prompt: "Give me 5 suggestions."
system_prompt_template: "generate-message-suggestions"
system_prompt_args:
chat_history:
[
"User: Can you find recent news articles discussing the ESG initiatives of Coca-Cola?",
"System: In 2023, Coca-Cola HBC has strengthened its commitment to Environmental, Social, and Governance (ESG) initiatives by embedding sustainability into its operations. The company aims for a net zero carbon footprint and net positive biodiversity by 2040, and it has been recognized as the world's most sustainable beverage company by the Dow Jones Sustainability Indices for the seventh consecutive year. Key efforts include collaborating with suppliers to improve sustainability practices, reducing carbon emissions, and promoting responsible sourcing. Additionally, Coca-Cola HBC has expanded its sustainability strategy to Egypt, reflecting its global approach to these initiatives.",
]
assert:
- type: contains
value: Coca-Cola
Expand Down
18 changes: 0 additions & 18 deletions backend/promptfoo/prompt_foo_runner.py

This file was deleted.

23 changes: 23 additions & 0 deletions backend/promptfoo/promptfoo_test_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import sys
sys.path.append("../")
from src.prompts.prompting import PromptEngine # noqa: E402

engine = PromptEngine()


def create_prompt(context):
config = context["vars"]

system_prompt_args = config["system_prompt_args"] if "system_prompt_args" in config else {}

system_prompt = engine.load_prompt(template_name=config["system_prompt_template"], **system_prompt_args)

if "user_prompt" in config:
user_prompt = config["user_prompt"]
elif "user_prompt_template" in config:
user_prompt_args = config["user_prompt_args"] if "user_prompt_args" in config else {}
user_prompt = engine.load_prompt(template_name=config["user_prompt_template"], **user_prompt_args)
else:
raise Exception("Must provide either user_prompt or user_prompt_template")

return [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
4 changes: 0 additions & 4 deletions backend/promptfoo/promptfooconfig.yaml

This file was deleted.

2 changes: 0 additions & 2 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ neo4j==5.18.0
ruff==0.3.5
jinja2==3.1.3
websockets==12.0
azure-core==1.30.1
azure-storage-blob==12.20.0
cffi==1.16.0
cryptography==42.0.7
isodate==0.6.1
Expand Down
18 changes: 9 additions & 9 deletions backend/src/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import List
from src.utils import Config
from .agent import Agent, agent
from .datastore_agent import DatastoreAgent
from .web_agent import WebAgent
from .intent_agent import IntentAgent
from .tool import tool, Parameter
from .validator_agent import ValidatorAgent
from .answer_agent import AnswerAgent
from .chart_generator_agent import ChartGeneratorAgent
from .file_agent import FileAgent
from src.agents.agent import Agent, agent
from src.agents.datastore_agent import DatastoreAgent
from src.agents.web_agent import WebAgent
from src.agents.intent_agent import IntentAgent
from src.agents.tool import tool, Parameter
from src.agents.validator_agent import ValidatorAgent
from src.agents.answer_agent import AnswerAgent
from src.agents.chart_generator_agent import ChartGeneratorAgent
from src.agents.file_agent import FileAgent


config = Config()
Expand Down
39 changes: 18 additions & 21 deletions backend/src/api/app.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
from contextlib import asynccontextmanager
import json
import logging
import logging.config
import os
from azure.storage.blob import BlobServiceClient
from typing import NoReturn
from fastapi import FastAPI, HTTPException, Response, WebSocket, UploadFile
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from src.chat_storage_service import get_chat_message
from src.session.file_uploads import clear_session_file_uploads
from src.session.redis_session_middleware import reset_session
from src.utils.graph_db_utils import populate_db
from src.utils import Config, test_connection
from src.director import question
from src.director import question, dataset_upload
from src.websockets.connection_manager import connection_manager, parse_message
from src.session import RedisSessionMiddleware
from src.utils.cyper_import_data_from_csv import import_data_from_csv_script
from src.suggestions_generator import generate_suggestions
from src.file_upload_service import handle_file_upload, get_file_upload

Expand All @@ -29,22 +25,9 @@
@asynccontextmanager
async def lifespan(app: FastAPI):
try:
if (
config.azure_storage_connection_string is None
or config.azure_storage_container_name is None
or config.azure_initial_data_filename is None
):
raise Exception("Missing Azure Environment variables. Please check the README.md for guidance.")

blob_service_client = BlobServiceClient.from_connection_string(config.azure_storage_connection_string)
container_client = blob_service_client.get_container_client(config.azure_storage_container_name)
blob_client = container_client.get_blob_client(config.azure_initial_data_filename)
download_stream = blob_client.download_blob()
annual_transactions = download_stream.readall().decode("utf-8")
populate_db(import_data_from_csv_script, json.loads(annual_transactions))
await dataset_upload()
except Exception as e:
logger.exception(f"Failed to populate database with initial data from Azure: {e}")
populate_db(import_data_from_csv_script, {})
logger.exception(f"Failed to populate database with initial data from file: {e}")
yield


Expand Down Expand Up @@ -87,6 +70,7 @@ async def health_check():
finally:
return response


@app.get("/chat")
async def chat(utterance: str):
logger.info(f"Chat method called with utterance: {utterance}")
Expand All @@ -109,6 +93,17 @@ async def clear_chat():
logger.exception(e)
return Response(status_code=500)

@app.get("/chat/{id}")
def chat_message(id: str):
logger.info(f"Get chat message called with id: {id}")
try:
final_result = get_chat_message(id)
if final_result is None:
return JSONResponse(status_code=404, content=f"Message with id {id} not found")
return JSONResponse(status_code=200, content=final_result)
except Exception as e:
logger.exception(e)
return JSONResponse(status_code=500, content=chat_fail_response)

@app.get("/suggestions")
async def suggestions():
Expand All @@ -120,6 +115,7 @@ async def suggestions():
logger.exception(e)
return JSONResponse(status_code=500, content=suggestions_failed_response)


@app.post("/uploadfile")
async def create_upload_file(file: UploadFile):
logger.info(f"upload file type={file.content_type} name={file.filename} size={file.size}")
Expand All @@ -132,6 +128,7 @@ async def create_upload_file(file: UploadFile):
logger.exception(e)
return JSONResponse(status_code=500, content=file_upload_failed_response)


@app.get("/uploadfile")
async def fetch_file(id: str):
logger.info(f"fetch uploaded file id={id} ")
Expand Down
33 changes: 33 additions & 0 deletions backend/src/chat_storage_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

import json
import logging
from typing import TypedDict
import redis

from src.utils.json import try_parse_to_json
from src.utils import Config

class ChatResponse(TypedDict):
id: str
question:str
answer: str
reasoning: str | None

logger = logging.getLogger(__name__)

config = Config()

redis_client = redis.Redis(host=config.redis_host, port=6379, decode_responses=True)

CHAT_KEY_PREFIX = "chat_"

def store_chat_message(chat:ChatResponse):
redis_client.set(CHAT_KEY_PREFIX + chat["id"], json.dumps(chat))


def get_chat_message(id: str) -> ChatResponse | None:
value = redis_client.get(CHAT_KEY_PREFIX + id)
if value and isinstance(value, str):
if parsed_session_data := try_parse_to_json(value):
return parsed_session_data
return None
Loading

0 comments on commit 301b4ab

Please sign in to comment.