diff --git a/.env.example b/.env.example index 5f598c9a..add1fd79 100644 --- a/.env.example +++ b/.env.example @@ -38,6 +38,7 @@ WS_URL=ws://localhost:8250/ws # llm ANSWER_AGENT_LLM="mistral" INTENT_AGENT_LLM="openai" +REPORT_AGENT_LLM="mistral" VALIDATOR_AGENT_LLM="openai" DATASTORE_AGENT_LLM="openai" MATHS_AGENT_LLM="openai" @@ -52,6 +53,7 @@ DYNAMIC_KNOWLEDGE_GRAPH_LLM="openai" # model ANSWER_AGENT_MODEL="mistral-large-latest" INTENT_AGENT_MODEL="gpt-4o-mini" +REPORT_AGENT_MODEL="mistral-large-latest" VALIDATOR_AGENT_MODEL="gpt-4o-mini" DATASTORE_AGENT_MODEL="gpt-4o-mini" MATHS_AGENT_MODEL="gpt-4o-mini" diff --git a/backend/promptfoo/create_report_config.yaml b/backend/promptfoo/create_report_config.yaml new file mode 100644 index 00000000..33e5e96e --- /dev/null +++ b/backend/promptfoo/create_report_config.yaml @@ -0,0 +1,119 @@ +description: "Test Report Prompt" + +providers: + - id: mistral:mistral-large-latest + config: + temperature: 0 + +prompts: file://promptfoo_test_runner.py:create_prompt + +tests: + - description: "Sample test to aid in ESG report generation development" + vars: + user_prompt_template: "create-report-user-prompt" + system_prompt_template: "create-report-system-prompt" + user_prompt_args: + document_text: "Published September 2024 Carbon Reduction Plan +Supplier name: Amazon Web Services EU SARL (UK Branch) (“AWS UK”) +Publication date: September 30, 2024 +Commitment to Achieving Net Zero +AWS UK, as part of Amazon.com, Inc. (“Amazon”), is committed to achieving net -zero +emissions by 2040. In 2019, Amazon co -founded The Climate Pledge, a public commitment +to innovate, use our scale for good and go faster to address the urgency of the climate crisis +to reach net -zero carbon across the entire organization by 2040. Since committing to the +Pledge, we’ve changed how we conduct our business and the running of our operations, and +we’ve increased funding and implementation of new technologies and services that +decarbonize and help preserve the natural world, alon gside the ambitious goals outlined in +The Climate Pledge. We’re fully committed to our goals and our work to build a better planet. +Baseline Emissions Footprint +Base Year emissions are a record of the greenhouse gases that have been produced in the +past an d are the reference point against which emissions reduction can be measured. +Baseline Year: 2020 +Additional Details relating to the Baseline Emissions calculations: +AWS UK utilized January 1, 2020 to December 31, 2020 as the baseline year for emissions +reporting under this Carbon Reduction Plan. Our plan includes emissions data from relevant +affiliate companies helping to provide AWS UK’s services to our customers. We ’ve included both +location -based and market -based method Scope 2 emissions in the following tables. AWS UK +benefits from contractual arrangements entered into by our affiliate(s) for renewable electricity +and/or renewable attributes that are reflected in t he market -based data set. More information +about our corporate carbon footprint and methodology can be found on our website . +Our baseline year does not include Scope 1 emissions. In 2022 we updated our methodology +and Scope 1 emissions are now included in total emissions for AWS UK + + Published September 2024 Baseline year emissions: +EMISSIONS TOTAL (tCO 2e) +Scope 1 0 +Scope 2 61,346 – Location -based method +2,813 – Market -based method +Scope 3 (Included +Sources) 3,770 +Total Emissions 65,116 – Location -based method +6,583 – Market -based method +Current Emissions Reporting +Reporting Year: 202 3 (January 1, 202 3 to December 31, 202 3) +EMISSIONS TOTAL (tCO 2e) +Scope 1 2,23 3 +Scope 2 126,755 – Location -based method +0 – Market -based method +Scope 3 (Included +Sources) 13,188 +Total Emissions 142,17 6 – Location -based method +15,42 1 – Market -based method + + Published September 2024 Emissions Reduction Targets +In 2019, we set an ambitious goal to match 100% of the electricity we use with renewable +energy by 2030. This goal includes all data centres , logistics facilities, physical stores, and +corporate offices, as well as on -site charg ing points and our financially integrated subsidiaries. +We are proud to have achieved this goal in 2023, seven years early, with 100% of the electricity +consum ed by Amazon matched with renewable energy sources. +Amazon continue s to be transparent and share our progress to reach net -zero carbon in our +annual Sustainability Report , which also includes details on how we measure carbon . +Carbon Reduction Projects +Completed Carbon Reduction Initiatives +Amazon continues to take actions across our operations to drive carbon reduction around the +world, including in the UK. As of January 202 4, Amazon’s renewable energy portfolio includes +243 wind and solar farms and 2 70 rooftop solar projects, totalling 513 projects and 28 +gigawatts of renewable energy capacity. This includes several utility -scale renewable energy +projects located within the UK: +•In 2019, Amazon announced our first power purchase agreement in the UK, located in +Kintyre Peninsula, Scotland. The “Amazon Wind Farm Scotland – Beinn an Tuirc 3” +began o perating in 2021, providing 50 megawatts (MW) of new renewable capacity to +the electricity grid with expected generation of 168,000 megawatt hours (MWh) of +clean energy annually. That’s enough to power 46,000 UK homes every year. +•In December 2020, Amazon a nnounced a two -phase renewable energy project located +in South Lanarkshire, Scotland, the Kennoxhead wind farm. Kennoxhead will be the +largest single -site onshore wind project in the UK, enabled through corporate +procurement. Once fully operational, Kenno xhead will produce 129 MW of renewable +capacity and is expected to generate 439,000 MWh of clean energy annually. Phase 1 +(60 MW) began operating in 2022, and Phase 2 (69 MW) will begin operations in 2024 . +•In 2022, Amazon announced its first project in Nor thern Ireland, a 16 MW onshore +windfarm in Co Antrim. +•In 2022, Amazon also announced a new 473 MW offshore wind farm, Moray West, +located off the coast of Scotland . Amazon expects completion of Moray West in 2024. +This is Amazon’s largest project in Scotland and the largest corporate renewable +energy deal announced by any company in the UK to date. +•In 2023, Amazon announced a new 47 MW solar farm, Warl ey located in Essex. +This project is expected to be operational in 2024. + + Published September 2024 Declaration and Sign Off +This Carbon Reduction Plan has been completed in accordance with PPN 06/21 and +associated guidance and reporting standard for Carbon Reduction Plans. +Emiss ions have been reported and recorded in accordance with the published reporting +standard for Carbon Reduction Plans and the GHG Reporting Protocol corporate standard1 +and uses the appropri ate Government emission conversion factors for greenhouse gas +company reporting2. +Scope 1 and Scope 2 emissions have been reported in accordance with S ECR requirements, +and the required subset of Scope 3 emissions have been reported in accordance with the +published reporting standard for Carbon Reduction Plans and the Corporate Value Chain +(Scope 3) Standard3. +This Carbon Reduction Plan has been reviewed and signed off by the board of directors (or +equivalent management body)." + assert: + - type: contains-all + value: + - "# Basic" + - "# ESG" + - "# Environmental" + - "# Social" + - "# Governance" + - "# Conclusion" \ No newline at end of file diff --git a/backend/src/agents/__init__.py b/backend/src/agents/__init__.py index c34cd064..d692a8ec 100644 --- a/backend/src/agents/__init__.py +++ b/backend/src/agents/__init__.py @@ -1,4 +1,5 @@ from typing import List + from src.utils import Config from src.agents.agent import Agent, agent from src.agents.datastore_agent import DatastoreAgent @@ -9,6 +10,7 @@ from src.agents.answer_agent import AnswerAgent from src.agents.chart_generator_agent import ChartGeneratorAgent from src.agents.file_agent import FileAgent +from src.agents.report_agent import ReportAgent config = Config() @@ -26,6 +28,10 @@ def get_answer_agent() -> Agent: return AnswerAgent(config.answer_agent_llm, config.answer_agent_model) +def get_report_agent() -> Agent: + return ReportAgent(config.report_agent_llm, config.report_agent_model) + + def agent_details(agent) -> dict: return {"name": agent.name, "description": agent.description} @@ -55,6 +61,7 @@ def get_agent_details(): "get_intent_agent", "get_available_agents", "get_validator_agent", + "get_report_agent", "Parameter", "tool", ] diff --git a/backend/src/agents/report_agent.py b/backend/src/agents/report_agent.py new file mode 100644 index 00000000..dd2c0db0 --- /dev/null +++ b/backend/src/agents/report_agent.py @@ -0,0 +1,20 @@ +from src.agents import Agent, agent +from src.prompts import PromptEngine + +engine = PromptEngine() + + +@agent( + name="ReportAgent", + description="This agent is responsible for generating an ESG focused report on a narrative document", + tools=[], +) +class ReportAgent(Agent): + async def invoke(self, utterance: str) -> str: + user_prompt = engine.load_prompt( + "create-report-user-prompt", + document_text=utterance) + + system_prompt = engine.load_prompt("create-report-system-prompt") + + return await self.llm.chat(self.model, system_prompt=system_prompt, user_prompt=user_prompt) diff --git a/backend/src/api/app.py b/backend/src/api/app.py index a4922035..71bcac78 100644 --- a/backend/src/api/app.py +++ b/backend/src/api/app.py @@ -82,6 +82,7 @@ async def chat(utterance: str): logger.exception(e) return JSONResponse(status_code=500, content=chat_fail_response) + @app.delete("/chat") async def clear_chat(): logger.info("Delete the chat session") @@ -94,6 +95,7 @@ async def clear_chat(): logger.exception(e) return Response(status_code=500) + @app.get("/chat/{id}") def chat_message(id: str): logger.info(f"Get chat message called with id: {id}") @@ -106,6 +108,7 @@ def chat_message(id: str): logger.exception(e) return JSONResponse(status_code=500, content=chat_fail_response) + @app.get("/suggestions") async def suggestions(): logger.info("Requesting chat suggestions") diff --git a/backend/src/directors/report_director.py b/backend/src/directors/report_director.py index 1d177b32..c064a57c 100644 --- a/backend/src/directors/report_director.py +++ b/backend/src/directors/report_director.py @@ -4,19 +4,22 @@ from src.utils.scratchpad import clear_scratchpad, update_scratchpad from src.utils.file_utils import handle_file_upload +from src.agents import get_report_agent + class FileUploadReport(TypedDict): id: str filename: str | None report: str | None -async def report_on_file_upload(upload:UploadFile) -> FileUploadReport: + +async def report_on_file_upload(upload: UploadFile) -> FileUploadReport: file = handle_file_upload(upload) update_scratchpad(result=file["content"]) - report = "#Report on upload as markdown" # await report_agent.invoke(file["content"]) + report = await get_report_agent().invoke(file["content"]) clear_scratchpad() diff --git a/backend/src/prompts/templates/create-report-system-prompt.j2 b/backend/src/prompts/templates/create-report-system-prompt.j2 new file mode 100644 index 00000000..9c78f59a --- /dev/null +++ b/backend/src/prompts/templates/create-report-system-prompt.j2 @@ -0,0 +1,41 @@ +The user will provide a report from a company. Your goal is to analyse the document and respond answering the following questions in the format described below: + +# Report: + +## Basic: + +1. What is the name of the company that this document refers to? +2. What year or years does the information refer too? +3. Summarise in one sentence what the document is about? + +## ESG (Environment, Social, Governance): +1. Which aspects of ESG does this document primarily discuss, respond with a percentage of each topic covered by the document. +2. What aspects of ESG are not discussed in the document? + +### Environmental: + +1. What environmental goals does this document describe? +2. What beneficial environmental claims does the company make? +3. What potential environment greenwashing can you identify that should be fact checked? +4. What environmental regulations, standards or certifications can you identify in the document? + +### Social: + +1. What social goals does this document describe? +2. What beneficial societal claims does the company make? +3. What potential societal greenwashing can you identify that should be fact checked? +4. What societal regulations, standards or certifications can you identify in the document? + +### Governance: + +1. What governance goals does this document describe? +2. What beneficial governance claims does the company make? +3. What potential governance greenwashing can you identify that should be fact checked? +4. What governance regulations, standards or certifications can you identify in the document? + +## Conclusion: + +1. What is your conclusion about the claims and potential greenwashing in this document? +2. What are your recommended next steps to verify any of the claims in this document? + +The report should be formatted as markdown. \ No newline at end of file diff --git a/backend/src/prompts/templates/create-report-user-prompt.j2 b/backend/src/prompts/templates/create-report-user-prompt.j2 new file mode 100644 index 00000000..8b5d3ee4 --- /dev/null +++ b/backend/src/prompts/templates/create-report-user-prompt.j2 @@ -0,0 +1,3 @@ +Generate an ESG report using the following document: + +{{ document_text }} \ No newline at end of file diff --git a/backend/src/session/file_uploads.py b/backend/src/session/file_uploads.py index 134cf24a..4419441b 100644 --- a/backend/src/session/file_uploads.py +++ b/backend/src/session/file_uploads.py @@ -17,21 +17,24 @@ UPLOADS_KEY_PREFIX = "file_upload_" + class FileUploadMeta(TypedDict): uploadId: str filename: str + class FileUpload(TypedDict): uploadId: str + content: str filename: str | None contentType: str | None size: int | None - content: str | None def get_session_file_uploads_meta() -> list[FileUploadMeta] | None: return get_session(UPLOADS_META_SESSION_KEY, []) + def get_session_file_upload(upload_id) -> FileUpload | None: value = redis_client.get(UPLOADS_KEY_PREFIX + upload_id) if value and isinstance(value, str): diff --git a/backend/src/utils/config.py b/backend/src/utils/config.py index 69544a82..078f860c 100644 --- a/backend/src/utils/config.py +++ b/backend/src/utils/config.py @@ -20,6 +20,7 @@ def __init__(self): self.neo4j_password = None self.answer_agent_llm = None self.intent_agent_llm = None + self.report_agent_llm = None self.validator_agent_llm = None self.datastore_agent_llm = None self.maths_agent_llm = None @@ -32,6 +33,7 @@ def __init__(self): self.validator_agent_model = None self.intent_agent_model = None self.answer_agent_model = None + self.report_agent_model = None self.datastore_agent_model = None self.chart_generator_model = None self.web_agent_model = None @@ -61,6 +63,7 @@ def load_env(self): self.files_directory = os.getenv("FILES_DIRECTORY", default_files_directory) self.answer_agent_llm = os.getenv("ANSWER_AGENT_LLM") self.intent_agent_llm = os.getenv("INTENT_AGENT_LLM") + self.report_agent_llm = os.getenv("REPORT_AGENT_LLM") self.validator_agent_llm = os.getenv("VALIDATOR_AGENT_LLM") self.datastore_agent_llm = os.getenv("DATASTORE_AGENT_LLM") self.chart_generator_llm = os.getenv("CHART_GENERATOR_LLM") @@ -72,6 +75,7 @@ def load_env(self): self.dynamic_knowledge_graph_llm = os.getenv("DYNAMIC_KNOWLEDGE_GRAPH_LLM") self.answer_agent_model = os.getenv("ANSWER_AGENT_MODEL") self.intent_agent_model = os.getenv("INTENT_AGENT_MODEL") + self.report_agent_model = os.getenv("REPORT_AGENT_MODEL") self.validator_agent_model = os.getenv("VALIDATOR_AGENT_MODEL") self.datastore_agent_model = os.getenv("DATASTORE_AGENT_MODEL") self.web_agent_model = os.getenv("WEB_AGENT_MODEL") diff --git a/backend/src/utils/file_utils.py b/backend/src/utils/file_utils.py index 2575a332..e4fcaf2d 100644 --- a/backend/src/utils/file_utils.py +++ b/backend/src/utils/file_utils.py @@ -11,15 +11,13 @@ MAX_FILE_SIZE = 10*1024*1024 -def handle_file_upload(file:UploadFile) -> FileUpload: + +def handle_file_upload(file: UploadFile) -> FileUpload: if (file.size or 0) > MAX_FILE_SIZE: raise HTTPException(status_code=413, detail=f"File upload must be less than {MAX_FILE_SIZE} bytes") - - all_content = "" - if ("application/pdf" == file.content_type): - + if "application/pdf" == file.content_type: start_time = time.time() pdf_file = PdfReader(file.file) all_content = "" @@ -33,24 +31,26 @@ def handle_file_upload(file:UploadFile) -> FileUpload: logger.debug(f'PDF content {all_content}') logger.info(f"PDF content extracted successfully in {(end_time - start_time)}") - - elif ("text/plain" == file.content_type): + elif "text/plain" == file.content_type: all_content = TextIOWrapper(file.file, encoding='utf-8').read() logger.debug(f'Text content {all_content}') else: raise HTTPException(status_code=400, detail="File upload must be supported type (text/plain or application/pdf)") - session_file = FileUpload(uploadId=str(uuid.uuid4()), - contentType=file.content_type, - filename=file.filename, - content=all_content, - size=file.size) + session_file = FileUpload( + uploadId=str(uuid.uuid4()), + contentType=file.content_type, + filename=file.filename, + content=all_content, + size=file.size + ) update_session_file_uploads(session_file) return session_file + def get_file_upload(upload_id) -> FileUpload | None: return get_session_file_upload(upload_id) diff --git a/backend/tests/agents/report_agent_test.py b/backend/tests/agents/report_agent_test.py new file mode 100644 index 00000000..a8e1ca36 --- /dev/null +++ b/backend/tests/agents/report_agent_test.py @@ -0,0 +1,19 @@ +import pytest + +from src.agents.report_agent import ReportAgent +from src.llm.factory import get_llm + +mock_model = "mockmodel" +mock_llm = get_llm("mockllm") + +@pytest.mark.asyncio +async def test_invoke_calls_llm(mocker): + report_agent = ReportAgent(llm_name="mockllm", model=mock_model) + mock_response = "A Test Report" + + mock_llm.chat = mocker.AsyncMock(return_value=mock_response) + + response = await report_agent.invoke("Test Document") + + assert response == mock_response + diff --git a/backend/tests/directors/report_director_test.py b/backend/tests/directors/report_director_test.py index 32f90770..5cd6583e 100644 --- a/backend/tests/directors/report_director_test.py +++ b/backend/tests/directors/report_director_test.py @@ -6,11 +6,14 @@ from src.session.file_uploads import FileUpload from src.directors.report_director import report_on_file_upload + @pytest.mark.asyncio async def test_report_on_file_upload(mocker): - file_upload = FileUpload(uploadId="1", filename="test.txt", content="test", contentType="text/plain", size=4) + mock_agent = mocker.AsyncMock() + mock_agent.invoke.return_value = "#Report on upload as markdown" + mocker.patch("src.directors.report_director.get_report_agent", return_value=mock_agent) mock_handle_file_upload = mocker.patch("src.directors.report_director.handle_file_upload", return_value=file_upload) headers = Headers({"content-type": "text/plain"}) @@ -19,4 +22,4 @@ async def test_report_on_file_upload(mocker): response = await report_on_file_upload(request_upload_file) mock_handle_file_upload.assert_called_once_with(request_upload_file) - assert response == {"filename": "test.txt", "id": "1", "report": "#Report on upload as markdown"} + assert response == {"filename": "test.txt", "id": "1", "report": "#Report on upload as markdown"}