Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add legal document review use case #467

Merged
merged 14 commits into from
Dec 24, 2024
12 changes: 12 additions & 0 deletions helpers/datasources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const EXAMPLE_10K_SEC_FILES: TemplateDataSource[] = [
url: new URL(
"https://s2.q4cdn.com/470004039/files/doc_earnings/2023/q4/filing/_10-K-Q4-2023-As-Filed.pdf",
),
filename: "apple_10k_report.pdf",
},
},
{
Expand All @@ -26,10 +27,21 @@ export const EXAMPLE_10K_SEC_FILES: TemplateDataSource[] = [
url: new URL(
"https://ir.tesla.com/_flysystem/s3/sec/000162828024002390/tsla-20231231-gen.pdf",
),
filename: "tesla_10k_report.pdf",
},
},
];

export const EXAMPLE_GDPR: TemplateDataSource = {
type: "file",
config: {
url: new URL(
"https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:32016R0679",
),
filename: "gdpr.pdf",
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
},
};

export function getDataSources(
files?: string,
exampleFile?: boolean,
Expand Down
5 changes: 3 additions & 2 deletions helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ const prepareContextData = async (
const destPath = path.join(
root,
"data",
path.basename(dataSourceConfig.url.toString()),
dataSourceConfig.filename ??
path.basename(dataSourceConfig.url.toString()),
);
await downloadFile(dataSourceConfig.url.toString(), destPath);
} else {
Expand Down Expand Up @@ -192,7 +193,7 @@ export const installTemplate = async (
if (
props.template === "streaming" ||
props.template === "multiagent" ||
props.template === "extractor"
props.template === "reflex"
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
) {
await createBackendEnvFile(props.root, props);
}
Expand Down
39 changes: 19 additions & 20 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,8 @@ export const installPythonTemplate = async ({
>) => {
console.log("\nInitializing Python project with template:", template, "\n");
let templatePath;
if (template === "extractor") {
templatePath = path.join(templatesDir, "types", "extractor", framework);
if (template === "reflex") {
templatePath = path.join(templatesDir, "types", "reflex");
} else {
templatePath = path.join(templatesDir, "types", "streaming", framework);
}
Expand Down Expand Up @@ -472,24 +472,6 @@ export const installPythonTemplate = async ({
cwd: path.join(compPath, "engines", "python", engine),
});

// Copy agent code
if (template === "multiagent") {
if (agents) {
await copy("**", path.join(root), {
parents: true,
cwd: path.join(compPath, "agents", "python", agents),
rename: assetRelocator,
});
} else {
console.log(
red(
"There is no agent selected for multi-agent template. Please pick an agent to use via --agents flag.",
),
);
process.exit(1);
}
}

// Copy router code
await copyRouterCode(root, tools ?? []);
}
Expand All @@ -503,6 +485,23 @@ export const installPythonTemplate = async ({
});
}

if (template === "multiagent" || template === "reflex") {
if (agents) {
await copy("**", path.join(root), {
parents: true,
cwd: path.join(compPath, "agents", "python", agents),
rename: assetRelocator,
});
} else {
console.log(
red(
"There is no agent selected for multi-agent template. Please pick an agent to use via --agents flag.",
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
),
);
process.exit(1);
}
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
}

console.log("Adding additional dependencies");

const addOnDependencies = getAdditionalDependencies(
Expand Down
13 changes: 10 additions & 3 deletions helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ export type ModelConfig = {
isConfigured(): boolean;
};
export type TemplateType =
| "extractor"
| "streaming"
| "community"
| "llamapack"
| "multiagent";
| "multiagent"
| "reflex";
export type TemplateFramework = "nextjs" | "express" | "fastapi";
export type TemplateUI = "html" | "shadcn";
export type TemplateVectorDB =
Expand All @@ -49,14 +49,21 @@ export type TemplateDataSource = {
};
export type TemplateDataSourceType = "file" | "web" | "db";
export type TemplateObservability = "none" | "traceloop" | "llamatrace";
export type TemplateAgents = "financial_report" | "blog" | "form_filling";
export type TemplateAgents =
| "financial_report"
| "blog"
| "form_filling"
| "extractor"
| "contract_review";
// Config for both file and folder
export type FileSourceConfig =
| {
path: string;
filename?: string;
}
| {
url: URL;
filename?: string;
};
export type WebSourceConfig = {
baseUrl?: string;
Expand Down
2 changes: 1 addition & 1 deletion questions/datasources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ export const getDataSourceChoices = (
);
}

if (framework === "fastapi" && template !== "extractor") {
if (framework === "fastapi" && template !== "reflex") {
choices.push({
title: "Use website content (requires Chrome)",
value: "web",
Expand Down
11 changes: 4 additions & 7 deletions questions/questions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ export const askProQuestions = async (program: QuestionArgs) => {
return; // early return - no further questions needed for llamapack projects
}

if (program.template === "extractor") {
// Extractor template only supports FastAPI, empty data sources, and llamacloud
if (program.template === "reflex") {
// Reflex template only supports FastAPI, empty data sources, and llamacloud
// So we just use example file for extractor template, this allows user to choose vector database later
program.dataSources = [EXAMPLE_FILE];
program.framework = "fastapi";
Expand Down Expand Up @@ -354,11 +354,8 @@ export const askProQuestions = async (program: QuestionArgs) => {
// default to use LlamaParse if using LlamaCloud
program.useLlamaParse = true;
} else {
// Extractor template doesn't support LlamaParse and LlamaCloud right now (cannot use asyncio loop in Reflex)
if (
program.useLlamaParse === undefined &&
program.template !== "extractor"
) {
// Reflex template doesn't support LlamaParse and LlamaCloud right now (cannot use asyncio loop in Reflex)
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
if (program.useLlamaParse === undefined && program.template !== "reflex") {
// if already set useLlamaParse, don't ask again
if (program.dataSources.some((ds) => ds.type === "file")) {
const { useLlamaParse } = await prompts(
Expand Down
20 changes: 17 additions & 3 deletions questions/simple.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import prompts from "prompts";
import { EXAMPLE_10K_SEC_FILES, EXAMPLE_FILE } from "../helpers/datasources";
import {
EXAMPLE_10K_SEC_FILES,
EXAMPLE_FILE,
EXAMPLE_GDPR,
} from "../helpers/datasources";
import { askModelConfig } from "../helpers/providers";
import { getTools } from "../helpers/tools";
import { ModelConfig, TemplateFramework } from "../helpers/types";
Expand All @@ -12,6 +16,7 @@ type AppType =
| "financial_report_agent"
| "form_filling"
| "extractor"
| "contract_review"
| "data_scientist";

type SimpleAnswers = {
Expand Down Expand Up @@ -42,6 +47,7 @@ export const askSimpleQuestions = async (
},
{ title: "Code Artifact Agent", value: "code_artifact" },
{ title: "Information Extractor", value: "extractor" },
{ title: "Contract Review", value: "contract_review" },
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
],
},
questionHandlers,
Expand All @@ -51,7 +57,7 @@ export const askSimpleQuestions = async (
let llamaCloudKey = args.llamaCloudKey;
let useLlamaCloud = false;

if (appType !== "extractor") {
if (appType !== "extractor" && appType !== "contract_review") {
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
const { language: newLanguage } = await prompts(
{
type: "select",
Expand Down Expand Up @@ -166,11 +172,19 @@ const convertAnswers = async (
modelConfig: MODEL_GPT4o,
},
extractor: {
template: "extractor",
template: "reflex",
agents: "extractor",
tools: [],
frontend: false,
dataSources: [EXAMPLE_FILE],
},
contract_review: {
template: "reflex",
agents: "contract_review",
tools: [],
frontend: false,
dataSources: [EXAMPLE_GDPR],
},
};
const results = lookup[answers.appType];
return {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

from fastapi import APIRouter
from fastapi.responses import FileResponse

router = APIRouter()


@router.get("")
def download_file(path: str):
# Sanitize path to prevent directory traversal
path = path.replace("..", "")

# Construct full file path
file_path = path if path.startswith("output") else f"output/{path}"

# Check if file exists before returning
if not os.path.exists(file_path):
from fastapi import HTTPException

raise HTTPException(status_code=404, detail="File not found")

return FileResponse(
path=file_path,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from fastapi import APIRouter

from app.api.routers.download import router as download_router

api_router = APIRouter()

api_router.include_router(download_router, prefix="/api/download")
47 changes: 47 additions & 0 deletions templates/components/agents/python/contract_review/app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
DATA_DIR = "data"
UPLOADED_DIR = "output/uploaded"

# Workflow prompts
CONTRACT_EXTRACT_PROMPT = """\
You are given contract data below. \
Please extract out relevant information from the contract into the defined schema - the schema is defined as a function call.\

{contract_data}
"""

CONTRACT_MATCH_PROMPT = """\
Given the following contract clause and the corresponding relevant guideline text, evaluate the compliance \
and provide a JSON object that matches the ClauseComplianceCheck schema.

**Contract Clause:**
{clause_text}

**Matched Guideline Text(s):**
{guideline_text}
"""


COMPLIANCE_REPORT_SYSTEM_PROMPT = """\
You are a compliance reporting assistant. Your task is to generate a final compliance report \
based on the results of clause compliance checks against \
a given set of guidelines.

Analyze the provided compliance results and produce a structured report according to the specified schema.
Ensure that if there are no noncompliant clauses, the report clearly indicates full compliance.
"""

COMPLIANCE_REPORT_USER_PROMPT = """\
A set of clauses within a contract were checked against GDPR compliance guidelines for the following vendor: {vendor_name}.
The set of noncompliant clauses are given below.

Each section includes:
- **Clause:** The exact text of the contract clause.
- **Guideline:** The relevant GDPR guideline text.
- **Compliance Status:** Should be `False` for noncompliant clauses.
- **Notes:** Additional information or explanations.

{compliance_results}

Based on the above compliance results, generate a final compliance report following the `ComplianceReport` schema below.
If there are no noncompliant clauses, the report should indicate that the contract is fully compliant.
"""
Loading
Loading