diff --git a/__pycache__/database.cpython-310.pyc b/__pycache__/database.cpython-310.pyc
new file mode 100644
index 0000000..eeea698
Binary files /dev/null and b/__pycache__/database.cpython-310.pyc differ
diff --git a/__pycache__/database.cpython-312.pyc b/__pycache__/database.cpython-312.pyc
new file mode 100644
index 0000000..91b09da
Binary files /dev/null and b/__pycache__/database.cpython-312.pyc differ
diff --git a/llama.py b/llama.py
new file mode 100644
index 0000000..3a82295
--- /dev/null
+++ b/llama.py
@@ -0,0 +1,120 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from langchain_community.llms import Ollama
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+import uvicorn
+from typing import Optional, Dict
+
+app = FastAPI()
+
+class QueryRequest(BaseModel):
+    query: str
+    context: Optional[str] = ""
+    model_name: Optional[str] = "llama2"
+    temperature: Optional[float] = 0.7
+    max_tokens: Optional[int] = 500
+
+class LLMManager:
+    def __init__(self):
+        self.llm_cache: Dict[str, Ollama] = {}
+        self.base_url = "http://localhost:11434"
+    
+    def get_llm(self, model_name: str, temperature: float) -> Ollama:
+        """Get or create LLM instance"""
+        cache_key = f"{model_name}_{temperature}"
+        if cache_key not in self.llm_cache:
+            try:
+                self.llm_cache[cache_key] = Ollama(
+                    model=model_name,
+                    base_url=self.base_url,
+                    temperature=temperature
+                )
+            except Exception as e:
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Failed to initialize LLM: {str(e)}"
+                )
+        return self.llm_cache[cache_key]
+
+llm_manager = LLMManager()
+
+@app.get("/")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "service": "llama-api"}
+
+@app.get("/models")
+async def list_models():
+    """List available models"""
+    return {
+        "models": [
+            "llama2",
+            "mistral",
+            "codellama",
+            "llama2-uncensored"
+        ]
+    }
+
+@app.post("/generate")
+async def generate_response(request: QueryRequest):
+    """Generate LLM response"""
+    try:
+        llm = llm_manager.get_llm(request.model_name, request.temperature)
+        
+        prompt_template = """
+        Use the following pieces of context to answer the question at the end. 
+        If you don't know the answer, just say that you don't know, don't try to make up an answer.
+        
+        Context: {context}
+        
+        Question: {query}
+        
+        Answer:"""
+        
+        PROMPT = PromptTemplate(
+            template=prompt_template, 
+            input_variables=["context", "query"]
+        )
+        
+        chain = LLMChain(llm=llm, prompt=PROMPT)
+        
+        response = chain.run(
+            context=request.context,
+            query=request.query
+        )
+        
+        return {
+            "status": "success",
+            "response": response,
+            "model": request.model_name
+        }
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error generating response: {str(e)}"
+        )
+
+@app.post("/chat")
+async def chat_completion(request: QueryRequest):
+    """Chat completion endpoint"""
+    try:
+        llm = llm_manager.get_llm(request.model_name, request.temperature)
+        
+        response = llm(request.query)
+        
+        return {
+            "status": "success",
+            "response": response,
+            "model": request.model_name
+        }
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error in chat completion: {str(e)}"
+        )
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/main.py b/main.py
index 8bc1a35..9062ed2 100644
--- a/main.py
+++ b/main.py
@@ -14,15 +14,6 @@
 from database import create_user, verify_user, update_password
 from dotenv import load_dotenv
 
-# Load environment variables
-load_dotenv()
-
-# Get OpenAI API key from environment variables
-openai_api_key = os.getenv('OPENAI_API_KEY')
-if not openai_api_key:
-    st.error("OpenAI API key not found in environment variables")
-    st.stop()
-
 # Define authentication functions first
 def login_page():
     st.markdown("""
@@ -98,7 +89,12 @@ def forgot_password_page():
         st.session_state.current_page = 'login'
         st.rerun()
 
+# Load environment variables
+load_dotenv()
+
 # Main app configuration
+# os.environ['OPENAI_API_KEY'] = "sk-..." <- Remove this line
+
 st.set_page_config(page_title="📰 ResearchMate", page_icon="📰", layout="wide")
 
 with open("styles.css") as f:
@@ -174,7 +170,7 @@ def add_url():
 process_content_clicked = st.sidebar.button("🔍 Process Content")
 
 main_placeholder = st.empty()
-chat_model = ChatOpenAI(temperature=0.7, max_tokens=500, api_key=openai_api_key)
+chat_model = ChatOpenAI(temperature=0.7, max_tokens=500)
 
 def extract_text_from_pdf(pdf):
     doc = fitz.open(stream=pdf.read(), filetype="pdf")
@@ -189,6 +185,19 @@ def preprocess_text(text):
     text = re.sub(r'[^\w\s.,!?]', '', text)
     return text
 
+template = """You are an AI assistant tasked with answering questions based on the given context. 
+Use the information provided in the context to answer the question in detail form. Give detailed answer with context as well. 
+I am saying this strictly that If the answer cannot be found in the context, simply state that you don't have enough information to answer accurately.
+
+Context: {context}
+Question: {question}
+Answer: """
+
+PROMPT = PromptTemplate(
+    template=template,
+    input_variables=["context", "question"]
+)
+
 if process_content_clicked:
     documents = []
     
@@ -225,37 +234,25 @@ def preprocess_text(text):
         docs = text_splitter.split_documents(documents)
 
         main_placeholder.text("Building vector store...")
-        embeddings = OpenAIEmbeddings(api_key=openai_api_key)
+        embeddings = OpenAIEmbeddings()
         vectorstore = FAISS.from_documents(docs, embeddings)
         st.session_state.vectorstore = vectorstore
 
+        # Update QA chain with new vectorstore
+        st.session_state.qa = RetrievalQA.from_chain_type(
+            llm=chat_model,
+            chain_type="stuff",
+            retriever=st.session_state.vectorstore.as_retriever(search_kwargs={"k": 3}),
+            chain_type_kwargs={"prompt": PROMPT},
+            return_source_documents=True
+        )
+
         main_placeholder.text(f"Processing complete. Total chunks: {len(docs)}")
         time.sleep(2)
+        st.success("Content processed successfully! You can now ask questions about the new content.")
     else:
         st.warning("No content to process. Please add URLs or upload PDF files.")
 
-template = """You are an AI assistant tasked with answering questions based on the given context. 
-Use the information provided in the context to answer the question concisely and avoid repetition. 
-I am saying this strictly that If the answer cannot be found in the context, simply state that you don't have enough information to answer accurately.
-
-Context: {context}
-Question: {question}
-Answer: """
-
-PROMPT = PromptTemplate(
-    template=template,
-    input_variables=["context", "question"]
-)
-
-if 'qa' not in st.session_state and st.session_state.vectorstore is not None:
-    st.session_state.qa = RetrievalQA.from_chain_type(
-        llm=chat_model,
-        chain_type="stuff",
-        retriever=st.session_state.vectorstore.as_retriever(search_kwargs={"k": 3}),
-        chain_type_kwargs={"prompt": PROMPT},
-        return_source_documents=True
-    )
-
 for message in st.session_state.chat_history:
     message_class = "user-message" if message["role"] == "user" else "bot-message"
     with st.container():