Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

working es #354

Merged
merged 5 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions application/api/answer/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@


from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
from application.llm.llm_creator import LLMCreator
from application.vectorstore.faiss import FaissStore
from application.error import bad_request


Expand Down Expand Up @@ -226,7 +226,7 @@
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
else:
vectorstore = ""
docsearch = FaissStore(vectorstore, embeddings_key)
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)

Check warning on line 229 in application/api/answer/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/answer/routes.py#L229

Added line #L229 was not covered by tests

return Response(
complete_stream(question, docsearch,
Expand Down Expand Up @@ -260,7 +260,7 @@
vectorstore = get_vectorstore(data)
# loading the index and the store and the prompt template
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
docsearch = FaissStore(vectorstore, embeddings_key)
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)

Check warning on line 263 in application/api/answer/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/answer/routes.py#L263

Added line #L263 was not covered by tests


llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
Expand Down
37 changes: 19 additions & 18 deletions application/api/internal/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,26 @@
if "name" not in request.form:
return {"status": "no name"}
job_name = secure_filename(request.form["name"])
if "file_faiss" not in request.files:
print("No file part")
return {"status": "no file"}
file_faiss = request.files["file_faiss"]
if file_faiss.filename == "":
return {"status": "no file name"}
if "file_pkl" not in request.files:
print("No file part")
return {"status": "no file"}
file_pkl = request.files["file_pkl"]
if file_pkl.filename == "":
return {"status": "no file name"}

# saves index files
save_dir = os.path.join(current_dir, "indexes", user, job_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
file_faiss.save(os.path.join(save_dir, "index.faiss"))
file_pkl.save(os.path.join(save_dir, "index.pkl"))
if settings.VECTOR_STORE == "faiss":
if "file_faiss" not in request.files:
print("No file part")
return {"status": "no file"}
file_faiss = request.files["file_faiss"]
if file_faiss.filename == "":
return {"status": "no file name"}
if "file_pkl" not in request.files:
print("No file part")
return {"status": "no file"}
file_pkl = request.files["file_pkl"]
if file_pkl.filename == "":
return {"status": "no file name"}

Check warning on line 50 in application/api/internal/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/internal/routes.py#L38-L50

Added lines #L38 - L50 were not covered by tests
# saves index files

if not os.path.exists(save_dir):
os.makedirs(save_dir)
file_faiss.save(os.path.join(save_dir, "index.faiss"))
file_pkl.save(os.path.join(save_dir, "index.pkl"))

Check warning on line 56 in application/api/internal/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/internal/routes.py#L53-L56

Added lines #L53 - L56 were not covered by tests
# create entry in vectors_collection
vectors_collection.insert_one(
{
Expand Down
27 changes: 18 additions & 9 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from application.api.user.tasks import ingest

from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator

mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
Expand Down Expand Up @@ -90,10 +92,17 @@
return {"status": "error"}
path_clean = "/".join(dirs)
vectors_collection.delete_one({"location": path})
try:
shutil.rmtree(path_clean)
except FileNotFoundError:
pass
if settings.VECTOR_STORE == "faiss":
try:
shutil.rmtree(os.path.join(current_dir, path_clean))
Dismissed Show dismissed Hide dismissed
except FileNotFoundError:
pass

Check warning on line 99 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L95-L99

Added lines #L95 - L99 were not covered by tests
else:
vetorstore = VectorCreator.create_vectorstore(

Check warning on line 101 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L101

Added line #L101 was not covered by tests
settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
)
vetorstore.delete_index()

Check warning on line 104 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L104

Added line #L104 was not covered by tests

return {"status": "ok"}

@user.route("/api/upload", methods=["POST"])
Expand Down Expand Up @@ -173,11 +182,11 @@
"location": "local",
}
)

data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
for index in data_remote:
index["location"] = "remote"
data.append(index)
if settings.VECTOR_STORE == "faiss":
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
for index in data_remote:
index["location"] = "remote"
data.append(index)

Check warning on line 189 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L185-L189

Added lines #L185 - L189 were not covered by tests

return jsonify(data)

Expand Down
8 changes: 8 additions & 0 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Settings(BaseSettings):
TOKENS_MAX_HISTORY: int = 150
SELF_HOSTED_MODEL: bool = False
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "elasticsearch" # "faiss" or "elasticsearch"

API_URL: str = "http://localhost:7091" # backend url for celery worker

Expand All @@ -23,6 +24,13 @@ class Settings(BaseSettings):
AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings

# elasticsearch
ELASTIC_CLOUD_ID: str = None # cloud id for elasticsearch
ELASTIC_USERNAME: str = None # username for elasticsearch
ELASTIC_PASSWORD: str = None # password for elasticsearch
ELASTIC_URL: str = None # url for elasticsearch
ELASTIC_INDEX: str = "docsgpt" # index name for elasticsearch


path = Path(__file__).parent.parent.absolute()
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
28 changes: 20 additions & 8 deletions application/parser/open_ai_func.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os

import tiktoken
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from application.vectorstore.vector_creator import VectorCreator
from application.core.settings import settings
from retry import retry


Expand Down Expand Up @@ -33,12 +33,23 @@
os.makedirs(f"{folder_name}")

from tqdm import tqdm
docs_test = [docs[0]]
docs.pop(0)
c1 = 0

store = FAISS.from_documents(docs_test, OpenAIEmbeddings(openai_api_key=os.getenv("EMBEDDINGS_KEY")))

if settings.VECTOR_STORE == "faiss":
docs_init = [docs[0]]
docs.pop(0)

Check warning on line 39 in application/parser/open_ai_func.py

View check run for this annotation

Codecov / codecov/patch

application/parser/open_ai_func.py#L37-L39

Added lines #L37 - L39 were not covered by tests

store = VectorCreator.create_vectorstore(

Check warning on line 41 in application/parser/open_ai_func.py

View check run for this annotation

Codecov / codecov/patch

application/parser/open_ai_func.py#L41

Added line #L41 was not covered by tests
settings.VECTOR_STORE,
docs_init = docs_init,
path=f"{folder_name}",
embeddings_key=os.getenv("EMBEDDINGS_KEY")
)
else:
store = VectorCreator.create_vectorstore(

Check warning on line 48 in application/parser/open_ai_func.py

View check run for this annotation

Codecov / codecov/patch

application/parser/open_ai_func.py#L48

Added line #L48 was not covered by tests
settings.VECTOR_STORE,
path=f"{folder_name}",
embeddings_key=os.getenv("EMBEDDINGS_KEY")
)
# Uncomment for MPNet embeddings
# model_name = "sentence-transformers/all-mpnet-base-v2"
# hf = HuggingFaceEmbeddings(model_name=model_name)
Expand All @@ -57,7 +68,8 @@
store.save_local(f"{folder_name}")
break
c1 += 1
store.save_local(f"{folder_name}")
if settings.VECTOR_STORE == "faiss":
store.save_local(f"{folder_name}")

Check warning on line 72 in application/parser/open_ai_func.py

View check run for this annotation

Codecov / codecov/patch

application/parser/open_ai_func.py#L71-L72

Added lines #L71 - L72 were not covered by tests


def get_user_permission(docs, folder_name):
Expand Down
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ decorator==5.1.1
dill==0.3.6
dnspython==2.3.0
ecdsa==0.18.0
elasticsearch==8.9.0
entrypoints==0.4
faiss-cpu==1.7.3
filelock==3.9.0
Expand Down
2 changes: 1 addition & 1 deletion application/vectorstore/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def search(self, *args, **kwargs):
def is_azure_configured(self):
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME

def _get_docsearch(self, embeddings_name, embeddings_key=None):
def _get_embeddings(self, embeddings_name, embeddings_key=None):
embeddings_factory = {
"openai_text-embedding-ada-002": OpenAIEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceHubEmbeddings,
Expand Down
Loading
Loading