diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f191efe31..c6615e560 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} @@ -21,7 +21,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest and generate coverage report run: | - python -m pytest --cov=application --cov=scripts --cov=extensions --cov-report=xml + python -m pytest --cov=application --cov-report=xml - name: Upload coverage reports to Codecov if: github.event_name == 'pull_request' && matrix.python-version == '3.11' uses: codecov/codecov-action@v3 diff --git a/application/requirements.txt b/application/requirements.txt index 153e9f6bc..d14be3ea0 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -2,35 +2,84 @@ anthropic==0.34.2 boto3==1.34.153 beautifulsoup4==4.12.3 celery==5.3.6 -dataclasses_json==0.6.7 +dataclasses-json==0.6.7 docx2txt==0.8 duckduckgo-search==6.2.6 -EbookLib==0.18 +ebooklib==0.18 +elastic-transport==8.15.0 elasticsearch==8.14.0 escodegen==1.0.11 esprima==4.0.1 +esutils==1.0.1 Flask==3.0.3 faiss-cpu==1.8.0.post1 gunicorn==23.0.0 html2text==2020.1.16 javalang==0.13.0 -langchain==0.2.16 -langchain-community==0.2.16 -langchain-core==0.2.38 -langchain-openai==0.1.23 -openapi3_parser==1.1.16 +jinja2==3.1.4 +jiter==0.5.0 +jmespath==1.0.1 +joblib==1.4.2 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-spec==0.2.4 +jsonschema-specifications==2023.7.1 +kombu==5.4.2 +langchain==0.3.0 +langchain-community==0.3.0 +langchain-core==0.3.2 +langchain-openai==0.2.0 +langchain-text-splitters==0.3.0 +langsmith==0.1.125 +lazy-object-proxy==1.10.0 +lxml==5.3.0 +markupsafe==2.1.5 +marshmallow==3.22.0 +mpmath==1.3.0 +multidict==6.1.0 +mypy-extensions==1.0.0 +networkx==3.3 +numpy==1.26.4 +openai==1.46.1 +openapi-schema-validator==0.6.2 +openapi-spec-validator==0.6.0 +openapi3-parser==1.1.16 +orjson==3.10.7 +packaging==24.1 pandas==2.2.2 -pydantic_settings==2.4.0 +pathable==0.4.3 +pillow==10.4.0 +portalocker==2.10.1 +prance==23.6.21.0 +primp==0.6.2 +prompt-toolkit==3.0.47 +protobuf==5.28.2 +py==1.11.0 +pydantic==2.9.2 +pydantic-core==2.23.4 +pydantic-settings==2.4.0 pymongo==4.8.0 -PyPDF2==3.0.1 +pypdf2==3.0.1 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 qdrant-client==1.11.0 redis==5.0.1 -Requests==2.32.0 +referencing==0.30.2 +regex==2024.9.11 +requests==2.32.3 retry==0.9.2 sentence-transformers==3.0.1 tiktoken==0.7.0 -torch +tokenizers==0.19.1 +torch==2.4.1 tqdm==4.66.5 transformers==4.44.2 -Werkzeug==3.0.4 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2024.1 +urllib3==2.2.3 +vine==5.1.0 +wcwidth==0.2.13 +werkzeug==3.0.4 +yarl==1.11.1 diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index ccd06a22f..164505ce4 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -47,8 +47,8 @@ function Upload({ ]; const [urlType, setUrlType] = useState<{ label: string; value: string }>({ - label: 'Link', - value: 'url', + label: 'Crawler', + value: 'crawler', }); useEffect(() => { diff --git a/tests/test_vector_store.py b/tests/test_vector_store.py index 6b700dc5d..83654b7e8 100644 --- a/tests/test_vector_store.py +++ b/tests/test_vector_store.py @@ -9,11 +9,33 @@ def test_init_local_faiss_store_huggingface(): """ - Test that asserts that trying to initialize a FaissStore with + Test that asserts that initializing a FaissStore with the huggingface sentence transformer below together with the index.faiss file in the application/ folder results in a dimension mismatch error. """ - settings.EMBEDDINGS_NAME = "openai_text-embedding-ada-002" - with pytest.raises(ValueError): - FaissStore("application/", "", None) + import os + from langchain.embeddings import HuggingFaceEmbeddings + from langchain.docstore.document import Document + from langchain_community.vectorstores import FAISS + + # Ensure application directory exists + index_path = os.path.join("application") + os.makedirs(index_path, exist_ok=True) + + # Create an index.faiss with a different embeddings dimension + # Use a different embedding model with a smaller dimension + other_embedding_model = "sentence-transformers/all-MiniLM-L6-v2" # Dimension 384 + other_embeddings = HuggingFaceEmbeddings(model_name=other_embedding_model) + # Create some dummy documents + docs = [Document(page_content="Test document")] + # Create index using the other embeddings + other_docsearch = FAISS.from_documents(docs, other_embeddings) + # Save index to application/ + other_docsearch.save_local(index_path) + + # Now set the EMBEDDINGS_NAME to the one with a different dimension + settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2" # Dimension 768 + with pytest.raises(ValueError) as exc_info: + FaissStore("", None) + assert "Embedding dimension mismatch" in str(exc_info.value)