From f8e4e42a364ef05084abae67e1dcaf1fcb81658a Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Tue, 17 Dec 2024 16:14:17 +0530
Subject: [PATCH 1/5] (feat:limit conv history) add util method

---
 application/api/answer/routes.py |  5 ++---
 application/utils.py             | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index bccffb662..6a4a0929c 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -18,7 +18,7 @@
 from application.extensions import api
 from application.llm.llm_creator import LLMCreator
 from application.retriever.retriever_creator import RetrieverCreator
-from application.utils import check_required_fields
+from application.utils import check_required_fields, limit_chat_history
 
 logger = logging.getLogger(__name__)
 
@@ -324,8 +324,7 @@ def post(self):
 
         try:
             question = data["question"]
-            history = str(data.get("history", []))
-            history = str(json.loads(history))
+            history = str(limit_chat_history(json.loads(data.get("history", []))))
             conversation_id = data.get("conversation_id")
             prompt_id = data.get("prompt_id", "default")
             
diff --git a/application/utils.py b/application/utils.py
index 1fc9e3291..8b5ddf2c0 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -46,3 +46,17 @@ def check_required_fields(data, required_fields):
 def get_hash(data):
     return hashlib.md5(data.encode()).hexdigest()
 
+def limit_chat_history(history,max_token_limit = 500):
+   
+    cumulative_token_count = 0
+    trimmed_history = []
+    
+    for i in reversed(history):
+        
+        if("prompt" in i and "response" in i):
+            cumulative_token_count += num_tokens_from_string(i["prompt"] + i["response"])
+            if(cumulative_token_count > max_token_limit):
+                    break
+            trimmed_history.insert(0,i)
+            
+    return trimmed_history
\ No newline at end of file

From 89a2f249c156adba56ef47a529a96d4332eb3805 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 19 Dec 2024 05:15:33 +0530
Subject: [PATCH 2/5] (feat:conv history) token limit from settings

---
 application/utils.py | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/application/utils.py b/application/utils.py
index 8b5ddf2c0..a96e0c9a7 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -46,17 +46,37 @@ def check_required_fields(data, required_fields):
 def get_hash(data):
     return hashlib.md5(data.encode()).hexdigest()
 
-def limit_chat_history(history,max_token_limit = 500):
-   
-    cumulative_token_count = 0
+def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
+    """
+    Limits chat history based on token count.
+    Returns a list of messages that fit within the token limit.
+    """
+    from application.core.settings import settings
+
+    max_token_limit = (
+        max_token_limit
+        if max_token_limit
+        and max_token_limit < settings.MODEL_TOKEN_LIMITS.get(
+            gpt_model, settings.DEFAULT_MAX_HISTORY
+        )
+        else settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
+    )
+
+    if not history:
+        return []
+        
+    tokens_current_history = 0
     trimmed_history = []
     
-    for i in reversed(history):
-        
-        if("prompt" in i and "response" in i):
-            cumulative_token_count += num_tokens_from_string(i["prompt"] + i["response"])
-            if(cumulative_token_count > max_token_limit):
-                    break
-            trimmed_history.insert(0,i)
-            
-    return trimmed_history
\ No newline at end of file
+    for message in reversed(history):
+        if "prompt" in message and "response" in message:
+            tokens_batch = num_tokens_from_string(message["prompt"]) + num_tokens_from_string(
+                message["response"]
+            )
+            if tokens_current_history + tokens_batch < max_token_limit:
+                tokens_current_history += tokens_batch
+                trimmed_history.insert(0, message)
+            else:
+                break
+                
+    return trimmed_history

From 9096013e13797f455581b5386ff6017831de158c Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 19 Dec 2024 05:20:55 +0530
Subject: [PATCH 3/5] (refactor) remove preprocessing in retrieval

---
 application/api/answer/routes.py         |  2 +-
 application/retriever/brave_search.py    |  9 ---------
 application/retriever/classic_rag.py     |  9 ---------
 application/retriever/duckduck_search.py | 11 +----------
 frontend/package-lock.json               |  2 +-
 5 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 6a4a0929c..aba2b88eb 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -455,7 +455,7 @@ def post(self):
 
         try:
             question = data["question"]
-            history = data.get("history", [])
+            history = str(limit_chat_history(json.loads(data.get("history", []))))
             conversation_id = data.get("conversation_id")
             prompt_id = data.get("prompt_id", "default")
             chunks = int(data.get("chunks", 2))
diff --git a/application/retriever/brave_search.py b/application/retriever/brave_search.py
index 1fd844b26..4601d352a 100644
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@@ -2,7 +2,6 @@
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
-from application.utils import num_tokens_from_string
 from langchain_community.tools import BraveSearch
 
 
@@ -73,15 +72,7 @@ def gen(self):
             yield {"source": doc}
 
         if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
             for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 42e318d20..75b2b576a 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -3,7 +3,6 @@
 from application.vectorstore.vector_creator import VectorCreator
 from application.llm.llm_creator import LLMCreator
 
-from application.utils import num_tokens_from_string
 
 
 class ClassicRAG(BaseRetriever):
@@ -73,15 +72,7 @@ def gen(self):
             yield {"source": doc}
 
         if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
             for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
diff --git a/application/retriever/duckduck_search.py b/application/retriever/duckduck_search.py
index 6ae562269..80717e7dc 100644
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@@ -1,7 +1,6 @@
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
-from application.utils import num_tokens_from_string
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 
@@ -89,16 +88,8 @@ def gen(self):
         for doc in docs:
             yield {"source": doc}
 
-        if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
+        if len(self.chat_history) > 1:      
             for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 7b6f11d61..f96a17d40 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1649,7 +1649,7 @@
       "version": "18.3.0",
       "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz",
       "integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==",
-      "devOptional": true,
+      "dev": true,
       "dependencies": {
         "@types/react": "*"
       }

From b19c14787e355520e83793e19adb4a36587e45c4 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 19 Dec 2024 17:58:55 +0530
Subject: [PATCH 4/5] (fix) avoid stringifying list

---
 application/api/answer/routes.py         |  4 ++--
 application/retriever/brave_search.py    |  1 +
 application/retriever/classic_rag.py     |  3 ++-
 application/retriever/duckduck_search.py |  1 +
 application/utils.py                     | 17 ++++++++++-------
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index aba2b88eb..c55ffe725 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -324,7 +324,7 @@ def post(self):
 
         try:
             question = data["question"]
-            history = str(limit_chat_history(json.loads(data.get("history", []))))
+            history = limit_chat_history(json.loads(data.get("history", [])), gpt_model=gpt_model)
             conversation_id = data.get("conversation_id")
             prompt_id = data.get("prompt_id", "default")
             
@@ -455,7 +455,7 @@ def post(self):
 
         try:
             question = data["question"]
-            history = str(limit_chat_history(json.loads(data.get("history", []))))
+            history = limit_chat_history(json.loads(data.get("history", [])), gpt_model=gpt_model)
             conversation_id = data.get("conversation_id")
             prompt_id = data.get("prompt_id", "default")
             chunks = int(data.get("chunks", 2))
diff --git a/application/retriever/brave_search.py b/application/retriever/brave_search.py
index 4601d352a..3d9ae89e6 100644
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@@ -73,6 +73,7 @@ def gen(self):
 
         if len(self.chat_history) > 1:
             for i in self.chat_history:
+                if "prompt" in i and "response" in i:
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 75b2b576a..8de625dd8 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -73,6 +73,7 @@ def gen(self):
 
         if len(self.chat_history) > 1:
             for i in self.chat_history:
+                    if "prompt" in i and "response" in i:
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
@@ -80,7 +81,7 @@ def gen(self):
                             {"role": "system", "content": i["response"]}
                         )
         messages_combine.append({"role": "user", "content": self.question})
-
+      
         llm = LLMCreator.create_llm(
             settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
         )
diff --git a/application/retriever/duckduck_search.py b/application/retriever/duckduck_search.py
index 80717e7dc..fa19ead03 100644
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@@ -90,6 +90,7 @@ def gen(self):
 
         if len(self.chat_history) > 1:      
             for i in self.chat_history:
+                    if "prompt" in i and "response" in i:
                         messages_combine.append(
                             {"role": "user", "content": i["prompt"]}
                         )
diff --git a/application/utils.py b/application/utils.py
index a96e0c9a7..7099a20a9 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -54,13 +54,16 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
     from application.core.settings import settings
 
     max_token_limit = (
-        max_token_limit
-        if max_token_limit
-        and max_token_limit < settings.MODEL_TOKEN_LIMITS.get(
-            gpt_model, settings.DEFAULT_MAX_HISTORY
+            max_token_limit
+            if max_token_limit and 
+            max_token_limit < settings.MODEL_TOKEN_LIMITS.get(
+                gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
+            else settings.MODEL_TOKEN_LIMITS.get(
+                gpt_model, settings.DEFAULT_MAX_HISTORY
+            )
         )
-        else settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
-    )
+    
 
     if not history:
         return []
@@ -78,5 +81,5 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
                 trimmed_history.insert(0, message)
             else:
                 break
-                
+
     return trimmed_history

From 132326136aea3cbfc3f22de91805ea3e45563cd6 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 19 Dec 2024 18:17:12 +0000
Subject: [PATCH 5/5] added gpt-4o-mini model

---
 application/core/settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/core/settings.py b/application/core/settings.py
index a7811ec78..0bace432f 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -16,7 +16,7 @@ class Settings(BaseSettings):
     MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
     MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
     DEFAULT_MAX_HISTORY: int = 150
-    MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
+    MODEL_TOKEN_LIMITS: dict = {"gpt-4o-mini": 128000, "gpt-3.5-turbo": 4096, "claude-2": 1e5}
     UPLOAD_FOLDER: str = "inputs"
     PARSE_PDF_AS_IMAGE: bool = False
     VECTOR_STORE: str = "faiss" #  "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"