Skip to content

Commit

Permalink
improve sql performance (#614)
Browse files Browse the repository at this point in the history
  • Loading branch information
IANTHEREAL authored Jan 26, 2025
1 parent dd6ac28 commit 8a448e9
Showing 1 changed file with 35 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -814,9 +814,7 @@ def retrieve_graph_data(
defer(self._entity_model.meta_vec),
)
.order_by(desc("similarity"))
.having(text("similarity >= :threshold"))
.params(threshold=similarity_threshold)
.limit(top_k)
.limit(top_k * 2) # Fetch more results to account for filtering
)

# Query similar relationships
Expand All @@ -840,44 +838,44 @@ def retrieve_graph_data(
.defer(self._entity_model.description_vec),
)
.order_by(desc("similarity"))
.having(text("similarity >= :threshold"))
.params(threshold=similarity_threshold)
.limit(top_k)
.limit(top_k * 2) # Fetch more results to account for filtering
)

# Execute both queries
entities = []
relationships = []

for entity, similarity in self._session.exec(entity_query).all():
entities.append(
{
"id": entity.id,
"name": entity.name,
"description": entity.description,
"metadata": entity.meta,
"similarity_score": similarity,
}
)
if similarity >= similarity_threshold and len(entities) < top_k:
entities.append(
{
"id": entity.id,
"name": entity.name,
"description": entity.description,
"metadata": entity.meta,
"similarity_score": similarity,
}
)

for relationship, similarity in self._session.exec(relationship_query).all():
relationships.append(
{
"id": relationship.id,
"relationship": relationship.description,
"source_entity": {
"id": relationship.source_entity.id,
"name": relationship.source_entity.name,
"description": relationship.source_entity.description,
},
"target_entity": {
"id": relationship.target_entity.id,
"name": relationship.target_entity.name,
"description": relationship.target_entity.description,
},
"similarity_score": similarity,
}
)
if similarity >= similarity_threshold and len(relationships) < top_k:
relationships.append(
{
"id": relationship.id,
"relationship": relationship.description,
"source_entity": {
"id": relationship.source_entity.id,
"name": relationship.source_entity.name,
"description": relationship.source_entity.description,
},
"target_entity": {
"id": relationship.target_entity.id,
"name": relationship.target_entity.name,
"description": relationship.target_entity.description,
},
"similarity_score": similarity,
}
)

return {"entities": entities, "relationships": relationships}

Expand Down Expand Up @@ -952,14 +950,16 @@ def retrieve_neighbors(
)
)
.order_by(desc("similarity"))
.having(text("similarity >= :threshold"))
.params(threshold=similarity_threshold)
.limit(max_neighbors)
.limit(max_neighbors * 2) # Fetch more results to account for filtering
).all()

next_level_nodes = set()

for rel, similarity in relationships:
# Skip if similarity is below threshold
if similarity < similarity_threshold:
continue

# Determine direction and connected entity
if rel.source_entity_id in current_level_nodes:
connected_id = rel.target_entity_id
Expand Down

0 comments on commit 8a448e9

Please sign in to comment.