Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(docker): temp - use hard code path for middleware to read from ot… #54

Merged
merged 8 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ PandaETL is an open-source, no-code ETL (Extract, Transform, Load) tool designed
3. Create a `.env` file in the frontend directory with the following:

```bash
NEXT_PUBLIC_API_URL=http://localhost:3000/api/v1
NEXT_PUBLIC_STORAGE_URL=http://localhost:3000/api/assets
NEXT_PUBLIC_API_URL=http://localhost:5328
```

or copy the `.env.example` file to `.env`
Expand Down
3 changes: 2 additions & 1 deletion backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ PANDAETL_SERVER_URL="https://api.panda-etl.ai/" # optional
API_SERVER_URL="https://api.domer.ai" # optional
USE_OPENAI_EMBEDDINGS=false # optional
OPENAI_API_KEY=sk-xxxxxxxxxxxx # optional
CHROMA_BATCH_SIZE=5 # optional
CHROMA_BATCH_SIZE=500 # optional
MAX_FILE_SIZE=20971520 # optional
PANDAETL_API_KEY=xxx-xxx-xxx-xxx # optional if you already have a PandaETL api key
8 changes: 0 additions & 8 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@ FROM python:3.11-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
&& rm -rf /var/lib/apt/lists/*

# Install Poetry
RUN pip install poetry

Expand All @@ -19,9 +14,6 @@ RUN poetry config virtualenvs.create false
# Install dependencies
RUN poetry install

# run migrations
RUN make migrate

# Expose the port the app runs on
EXPOSE 8000

Expand Down
4 changes: 4 additions & 0 deletions backend/app/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from dotenv import load_dotenv
from pydantic_settings import BaseSettings
from typing import Optional

# Load environment variables from .env file
load_dotenv()
Expand Down Expand Up @@ -30,6 +31,9 @@ class Settings(BaseSettings):
chat_extraction_doc_threshold: float = 0.5
chat_extraction_max_docs: int = 50

# PandaETL api key
pandaetl_api_key: Optional[str] = None

class Config:
env_file = ".env"

Expand Down
25 changes: 23 additions & 2 deletions backend/app/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from app import models
from app.processing.process_queue import submit_process
from app.repositories import process_repository, project_repository
from app.repositories import process_repository, project_repository, user_repository
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from .database import SessionLocal
from fastapi.middleware.cors import CORSMiddleware
from app.processing.file_preprocessing import process_file
from .config import settings
from .api import v1_router
from app.schemas.user import APIKeyRequest

# Initialize the FastAPI app
app = FastAPI()
Expand Down Expand Up @@ -57,6 +58,26 @@ def startup_pending_processes():
print(f"Error in startup_pending_processes: {e}")


def setup_user():
try:
with SessionLocal() as db:

if settings.pandaetl_api_key:
user = user_repository.get_users(db, n=1)
api_key = user_repository.get_user_api_key(db)

if not user:
user = user_repository.create_user(db, APIKeyRequest(email="[email protected]"))

if not api_key:
user_repository.add_user_api_key(db, user.id, settings.pandaetl_api_key)

print("Successfully set up user from api key")

except Exception as e:
print(f"Error in setup user from api key: {e}")


app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins (for development)
Expand All @@ -69,6 +90,6 @@ def startup_pending_processes():

app.include_router(v1_router, prefix="/v1")


setup_user()
startup_pending_processes()
startup_file_preprocessing()
2 changes: 1 addition & 1 deletion backend/app/processing/file_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def process_segmentation(project_id: int, asset_id: int, asset_file_name: str):
vectorstore.add_docs(
docs=docs,
metadatas=metadatas,
batch_size=100
batch_size=settings.chroma_batch_size
)

project_repository.update_asset_content_status(
Expand Down
2 changes: 1 addition & 1 deletion backend/app/processing/process_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,4 +406,4 @@ def vectorize_extraction_process_step(project_id: int, process_step_id: int, fil
]

# Add documents to vectorstore
vectorstore.add_docs(docs=docs, metadatas=metadatas, batch_size=100)
vectorstore.add_docs(docs=docs, metadatas=metadatas, batch_size=settings.chroma_batch_size)
4 changes: 2 additions & 2 deletions backend/tests/processing/test_process_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_vectorize_extraction_process_step_single_reference(mock_chroma_db):
mock_vectorstore.add_docs.assert_called_once_with(
docs=expected_docs,
metadatas=expected_metadatas,
batch_size=100
batch_size=5
)

@patch('app.processing.process_queue.ChromaDB')
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_vectorize_extraction_process_step_multiple_references_concatenation(moc
mock_vectorstore.add_docs.assert_called_once_with(
docs=expected_docs,
metadatas=expected_metadatas,
batch_size=100
batch_size=5
)

@patch('app.processing.process_queue.ChromaDB') # Replace with the correct module path
Expand Down
4 changes: 2 additions & 2 deletions frontend/.env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
NEXT_PUBLIC_API_URL=http://localhost:3000/api/v1
NEXT_PUBLIC_STORAGE_URL=http://localhost:3000/api/assets
NEXT_PUBLIC_API_URL=http://localhost:5328
NEXT_PUBLIC_MIXPANEL_TOKEN=f2e8a71ab2bde33ebf346c5abf6ba9fa
NEXT_PUBLIC_ROLLBAR_ACCESS_TOKEN=0df0bee895044430880278e2b2a5b2d2
# NEXT_PUBLIC_BACKEND_URL=http://backend:5328 # Uncomment this if you're working with a docker setup
12 changes: 0 additions & 12 deletions frontend/next.config.mjs
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
const nextConfig = {
swcMinify: false, // TODO - track and remove this later: https://github.com/wojtekmaj/react-pdf/issues/1822
async rewrites() {
return [
// {
// source: "/api/:path*",
// destination: "http://localhost:5328/:path*",
// },
{
source: "/assets/:path*",
destination: "http://localhost:5328/assets/:path*",
},
];
},
};

export default nextConfig;
4 changes: 2 additions & 2 deletions frontend/src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export const BASE_API_URL = process.env.NEXT_PUBLIC_API_URL;
export const BASE_STORAGE_URL = process.env.NEXT_PUBLIC_STORAGE_URL;
export const BASE_API_URL = `${process.env.NEXT_PUBLIC_API_URL}/v1`;
export const BASE_STORAGE_URL = `${process.env.NEXT_PUBLIC_API_URL}/assets`;
export const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes
16 changes: 14 additions & 2 deletions frontend/src/middleware.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
import { NextResponse } from "next/server";
import type { NextRequest } from "next/server";
import { GetAPIKey } from "@/services/user";
import localStorage from "@/lib/localStorage";
import { APIKeyData } from "./interfaces/user";
import axios from "axios";
import { GetAPIKey } from "./services/user";

export async function middleware(request: NextRequest) {
let apiKey = null;
try {
apiKey = await GetAPIKey();
const dockerBackendUrl = process.env.NEXT_PUBLIC_BACKEND_URL;

if (dockerBackendUrl) {
console.log(dockerBackendUrl);
const response = await axios.get<{ data: APIKeyData }>(
`${dockerBackendUrl}/v1/user/get-api-key`
);
apiKey = { data: { api_key: response.data.data.key } };
} else {
apiKey = await GetAPIKey();
}
} catch (error) {
console.error("Error fetching API key:", error);
return NextResponse.redirect(new URL("/api-key-setup", request.url));
Expand Down