Skip to content

Commit

Permalink
feat: support single document reindex
Browse files Browse the repository at this point in the history
  • Loading branch information
Mini256 committed Feb 10, 2025
1 parent b522d22 commit 859b1fb
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 5 deletions.
74 changes: 69 additions & 5 deletions backend/app/api/admin_routes/knowledge_base/document/routes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
from typing import Annotated

from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, Query, HTTPException
from fastapi_pagination import Params, Page

from app.api.admin_routes.knowledge_base.models import ChunkItem
from app.api.deps import SessionDep, CurrentSuperuserDep
from app.models import Document
from app.models.chunk import get_kb_chunk_model
from app.models import Document, DocIndexTaskStatus
from app.models.chunk import KgIndexStatus, get_kb_chunk_model
from app.models.entity import get_kb_entity_model
from app.models.relationship import get_kb_relationship_model
from app.repositories import knowledge_base_repo, document_repo
Expand All @@ -23,7 +23,10 @@
)
from app.repositories.graph import GraphRepo
from app.tasks.knowledge_base import stats_for_knowledge_base

from app.tasks import (
build_kg_index_for_chunk,
build_index_for_document,
)

router = APIRouter()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -53,7 +56,7 @@ def list_kb_documents(


@router.get("/admin/knowledge_bases/{kb_id}/documents/{doc_id}")
def get_document_by_id(
def get_kb_document_by_id(
session: SessionDep,
user: CurrentSuperuserDep,
kb_id: int,
Expand Down Expand Up @@ -131,3 +134,64 @@ def remove_kb_document(
except Exception as e:
logger.exception(f"Failed to remove document #{document_id}: {e}")
raise InternalServerError()


@router.post("/admin/knowledge_bases/{kb_id}/documents/{doc_id}/reindex")
def retry_kb_document_index(
session: SessionDep,
user: CurrentSuperuserDep,
kb_id: int,
doc_id: int,
reindex_completed: bool = True,
):
try:
kb = knowledge_base_repo.must_get(session, kb_id)

# Retry failed vector index tasks.
doc = document_repo.must_get(session, doc_id)
reindex_document_ids = []
ignore_document_ids = []

if doc.index_status == DocIndexTaskStatus.COMPLETED and not reindex_completed:
ignore_document_ids.append(doc.id)
else:
reindex_document_ids.append(doc.id)

doc.index_status = DocIndexTaskStatus.PENDING
session.add(doc)
session.commit()
build_index_for_document.delay(kb.id, doc.id)
logger.info(f"Triggered document #{len(doc.id)} to rebuilt vector index.")

# Retry failed kg index tasks.
chunk_repo = ChunkRepo(get_kb_chunk_model(kb))
chunks = chunk_repo.get_document_chunks(session, doc_id)
reindex_chunk_ids = []
ignore_chunk_ids = []
for chunk in chunks:
if chunk.index_status == KgIndexStatus.COMPLETED and not reindex_completed:
ignore_chunk_ids.append(chunk.id)
continue
else:
reindex_chunk_ids.append(chunk.id)

chunk.index_status = KgIndexStatus.PENDING
session.add(chunk)
session.commit()
build_kg_index_for_chunk.delay(kb_id, chunk.id)

logger.info(
f"Triggered {len(reindex_chunk_ids)} chunks to rebuilt knowledge graph index."
)

return {
"reindex_document_ids": reindex_document_ids,
"ignore_document_ids": ignore_document_ids,
"reindex_chunk_ids": reindex_chunk_ids,
"ignore_chunk_ids": ignore_chunk_ids,
}
except HTTPException as e:
raise e
except Exception as e:
logger.exception(e, exc_info=True)
raise InternalServerError()
1 change: 1 addition & 0 deletions backend/app/models/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class Document(UpdatableBaseModel, table=True):

# TODO: rename to vector_index_status, vector_index_result.
index_status: DocIndexTaskStatus = DocIndexTaskStatus.NOT_STARTED
vector_index_task_id: str = Field(sa_column=Column(String(128), nullable=False))
index_result: str = Field(sa_column=Column(Text, nullable=True))

# TODO: add kg_index_status, kg_index_result column, unify the index status.
Expand Down

0 comments on commit 859b1fb

Please sign in to comment.