diff --git a/selfie/api/documents.py b/selfie/api/documents.py index 68bb382..ada7819 100644 --- a/selfie/api/documents.py +++ b/selfie/api/documents.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import List, Optional from fastapi import APIRouter, Query @@ -27,8 +28,8 @@ class FetchedDocument(BaseModel): id: int = Field(..., description="The unique identifier of the document") name: str = Field(..., description="The name of the document") size: int = Field(..., description="The size of the document") - created_at: str = Field(..., description="The timestamp of the document creation") - updated_at: str = Field(..., description="The timestamp of the document update") + created_at: datetime = Field(..., description="The timestamp of the document creation") + updated_at: datetime = Field(..., description="The timestamp of the document update") content_type: str = Field(..., description="The content type of the document") connector_name: str = Field(..., description="The name of the connector") diff --git a/selfie/connectors/text_files/uischema.json b/selfie/connectors/text_files/uischema.json index f4ef5a6..f886166 100644 --- a/selfie/connectors/text_files/uischema.json +++ b/selfie/connectors/text_files/uischema.json @@ -1,8 +1,5 @@ { "files": { - "ui:widget": "nativeFile", - "ui:options": { - "accept": ".json" - } + "ui:widget": "nativeFile" } } diff --git a/selfie/embeddings/__init__.py b/selfie/embeddings/__init__.py index 7c6630b..9263407 100644 --- a/selfie/embeddings/__init__.py +++ b/selfie/embeddings/__init__.py @@ -74,6 +74,7 @@ def __init__(self, character_name, storage_path: str = config.embeddings_storage self.completion = completion or get_default_completion() self.character_name = character_name self.embeddings = Embeddings( + hybrid=True, sqlite={"wal": True}, # For now, sqlite w/the default driver is the only way to use WAL. content=True @@ -346,6 +347,7 @@ async def recall( include_summary=True, local_llm=True, min_score=0.4, + hybrid_search_weight=1.0, # TODO: Setting this to only use the dense index until this is tuned, e.g., with min_score ): if min_score is None: min_score = 0.4 @@ -354,7 +356,7 @@ async def recall( return {"documents": [], "summary": "No documents found.", "mean_score": 0} self.embeddings.load(self.storage_path) - results = self._query(where="similar(:topic)", parameters={"topic": topic}, limit=limit) + results = self._query(where=f"similar(:topic, {hybrid_search_weight})", parameters={"topic": topic}, limit=limit) documents_list: List[ScoredEmbeddingDocumentModel] = [] for result in results: document = EmbeddingDocumentModel(