From 51f336a0f87ddcf5b145dee49b15322c52699dd3 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:51:30 +0800
Subject: [PATCH 001/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 86d82b1d..6a075dc4 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
-
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
From 060695bb861f5a91f5c0cd3a7fd3676ca7b8a071 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:51:51 +0800
Subject: [PATCH 002/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 6a075dc4..f8227311 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
-
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
From b2db4d66ce78d0f4e4958c79bafa670c25834e72 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:54:03 +0800
Subject: [PATCH 003/258] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index f8227311..27f2c19b 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
-
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
@@ -191,7 +191,7 @@ Output your evaluation in the following JSON format:
title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
year={2024},
-eprint={},
+eprint={2410.05779},
archivePrefix={arXiv},
primaryClass={cs.IR}
}
From af4e4156732066807eaaaf621d8b73c77c6c416f Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:54:36 +0800
Subject: [PATCH 004/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 27f2c19b..42de1c1c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
-
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
From 5e513a71040b1c217990e59d0e98b4aaceeb71e1 Mon Sep 17 00:00:00 2001
From: Larfii <834462287@qq.com>
Date: Thu, 10 Oct 2024 12:09:24 +0800
Subject: [PATCH 005/258] update
---
lightrag/__init__.py | 2 +-
setup.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index a83afba3..dc497cd4 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG, QueryParam
-__version__ = "0.0.1"
+__version__ = "0.0.2"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/GraphEdit"
diff --git a/setup.py b/setup.py
index df1c3cf4..849fabfe 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
deps.append(line.strip())
setuptools.setup(
- name="light-rag",
+ name="lightrag-hku",
url=vars2readme["__url__"],
version=vars2readme["__version__"],
author=vars2readme["__author__"],
From 5cc02cb34f5f06e05d1f31097974f7ddcdfb84ce Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 14:57:32 +0800
Subject: [PATCH 006/258] Revert "first commit"
---
lightrag/base.py | 116 --------------------
lightrag/prompt.py | 256 --------------------------------------------
lightrag/storage.py | 246 ------------------------------------------
lightrag/utils.py | 165 ----------------------------
4 files changed, 783 deletions(-)
delete mode 100644 lightrag/base.py
delete mode 100644 lightrag/prompt.py
delete mode 100644 lightrag/storage.py
delete mode 100644 lightrag/utils.py
diff --git a/lightrag/base.py b/lightrag/base.py
deleted file mode 100644
index 9c0422fe..00000000
--- a/lightrag/base.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from dataclasses import dataclass, field
-from typing import TypedDict, Union, Literal, Generic, TypeVar
-
-import numpy as np
-
-from .utils import EmbeddingFunc
-
-TextChunkSchema = TypedDict(
- "TextChunkSchema",
- {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int},
-)
-
-T = TypeVar("T")
-
-@dataclass
-class QueryParam:
- mode: Literal["local", "global", "hybird", "naive"] = "global"
- only_need_context: bool = False
- response_type: str = "Multiple Paragraphs"
- top_k: int = 60
- max_token_for_text_unit: int = 4000
- max_token_for_global_context: int = 4000
- max_token_for_local_context: int = 4000
-
-
-@dataclass
-class StorageNameSpace:
- namespace: str
- global_config: dict
-
- async def index_done_callback(self):
- """commit the storage operations after indexing"""
- pass
-
- async def query_done_callback(self):
- """commit the storage operations after querying"""
- pass
-
-@dataclass
-class BaseVectorStorage(StorageNameSpace):
- embedding_func: EmbeddingFunc
- meta_fields: set = field(default_factory=set)
-
- async def query(self, query: str, top_k: int) -> list[dict]:
- raise NotImplementedError
-
- async def upsert(self, data: dict[str, dict]):
- """Use 'content' field from value for embedding, use key as id.
- If embedding_func is None, use 'embedding' field from value
- """
- raise NotImplementedError
-
-@dataclass
-class BaseKVStorage(Generic[T], StorageNameSpace):
- async def all_keys(self) -> list[str]:
- raise NotImplementedError
-
- async def get_by_id(self, id: str) -> Union[T, None]:
- raise NotImplementedError
-
- async def get_by_ids(
- self, ids: list[str], fields: Union[set[str], None] = None
- ) -> list[Union[T, None]]:
- raise NotImplementedError
-
- async def filter_keys(self, data: list[str]) -> set[str]:
- """return un-exist keys"""
- raise NotImplementedError
-
- async def upsert(self, data: dict[str, T]):
- raise NotImplementedError
-
- async def drop(self):
- raise NotImplementedError
-
-
-@dataclass
-class BaseGraphStorage(StorageNameSpace):
- async def has_node(self, node_id: str) -> bool:
- raise NotImplementedError
-
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- raise NotImplementedError
-
- async def node_degree(self, node_id: str) -> int:
- raise NotImplementedError
-
- async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- raise NotImplementedError
-
- async def get_node(self, node_id: str) -> Union[dict, None]:
- raise NotImplementedError
-
- async def get_edge(
- self, source_node_id: str, target_node_id: str
- ) -> Union[dict, None]:
- raise NotImplementedError
-
- async def get_node_edges(
- self, source_node_id: str
- ) -> Union[list[tuple[str, str]], None]:
- raise NotImplementedError
-
- async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- raise NotImplementedError
-
- async def upsert_edge(
- self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
- ):
- raise NotImplementedError
-
- async def clustering(self, algorithm: str):
- raise NotImplementedError
-
- async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
- raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
deleted file mode 100644
index 5d28e49c..00000000
--- a/lightrag/prompt.py
+++ /dev/null
@@ -1,256 +0,0 @@
-GRAPH_FIELD_SEP = ""
-
-PROMPTS = {}
-
-PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
-PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
-PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
-PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
-
-PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
-
-PROMPTS[
- "entity_extraction"
-] = """-Goal-
-Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
-
--Steps-
-1. Identify all entities. For each identified entity, extract the following information:
-- entity_name: Name of the entity, capitalized
-- entity_type: One of the following types: [{entity_types}]
-- entity_description: Comprehensive description of the entity's attributes and activities
-Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}
-
-2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
-For each pair of related entities, extract the following information:
-- source_entity: name of the source entity, as identified in step 1
-- target_entity: name of the target entity, as identified in step 1
-- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
-- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
-- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
-Format each relationship as ("relationship"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})
-
-3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
-Format the content-level key words as ("content_keywords"{tuple_delimiter})
-
-4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
-
-5. When finished, output {completion_delimiter}
-
-######################
--Examples-
-######################
-Example 1:
-
-Entity_types: [person, technology, mission, organization, location]
-Text:
-while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
-
-Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
-
-The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
-
-It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
-################
-Output:
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
-("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
-("entity"{tuple_delimiter}"Jordan"{tuple_delimiter}"person"{tuple_delimiter}"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device."){record_delimiter}
-("entity"{tuple_delimiter}"Cruz"{tuple_delimiter}"person"{tuple_delimiter}"Cruz is associated with a vision of control and order, influencing the dynamics among other characters."){record_delimiter}
-("entity"{tuple_delimiter}"The Device"{tuple_delimiter}"technology"{tuple_delimiter}"The Device is central to the story, with potential game-changing implications, and is revered by Taylor."){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Taylor"{tuple_delimiter}"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."{tuple_delimiter}"power dynamics, perspective shift"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Jordan"{tuple_delimiter}"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."{tuple_delimiter}"shared goals, rebellion"{tuple_delimiter}6){record_delimiter}
-("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"Jordan"{tuple_delimiter}"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."{tuple_delimiter}"conflict resolution, mutual respect"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Jordan"{tuple_delimiter}"Cruz"{tuple_delimiter}"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."{tuple_delimiter}"ideological conflict, rebellion"{tuple_delimiter}5){record_delimiter}
-("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
-("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
-#############################
-Example 2:
-
-Entity_types: [person, technology, mission, organization, location]
-Text:
-They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
-
-Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
-
-Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
-#############
-Output:
-("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
-("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
-("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
-("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
-#############################
-Example 3:
-
-Entity_types: [person, role, technology, organization, event, location, concept]
-Text:
-their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
-
-"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
-
-Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
-
-Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
-
-The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
-#############
-Output:
-("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
-("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
-("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
-("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
-("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
-("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
-("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
-#############################
--Real Data-
-######################
-Entity_types: {entity_types}
-Text: {input_text}
-######################
-Output:
-"""
-
-PROMPTS[
- "summarize_entity_descriptions"
-] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
-Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
-Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
-If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
-Make sure it is written in third person, and include the entity names so we the have full context.
-
-#######
--Data-
-Entities: {entity_name}
-Description List: {description_list}
-#######
-Output:
-"""
-
-PROMPTS[
- "entiti_continue_extraction"
-] = """MANY entities were missed in the last extraction. Add them below using the same format:
-"""
-
-PROMPTS[
- "entiti_if_loop_extraction"
-] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
-"""
-
-PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
-
-PROMPTS[
- "rag_response"
-] = """---Role---
-
-You are a helpful assistant responding to questions about data in the tables provided.
-
-
----Goal---
-
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
-
----Target response length and format---
-
-{response_type}
-
-
----Data tables---
-
-{context_data}
-
-
----Goal---
-
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-
-If you don't know the answer, just say so. Do not make anything up.
-
-Do not include information where the supporting evidence for it is not provided.
-
-
----Target response length and format---
-
-{response_type}
-
-Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
-"""
-
-PROMPTS["keywords_extraction"] = """---Role---
-
-You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
-
----Goal---
-
-Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
-
----Instructions---
-
-- Output the keywords in JSON format.
-- The JSON should have two keys:
- - "high_level_keywords" for overarching concepts or themes.
- - "low_level_keywords" for specific entities or details.
-
-######################
--Examples-
-######################
-Example 1:
-
-Query: "How does international trade influence global economic stability?"
-################
-Output:
-{{
- "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
- "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
-}}
-#############################
-Example 2:
-
-Query: "What are the environmental consequences of deforestation on biodiversity?"
-################
-Output:
-{{
- "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
- "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
-}}
-#############################
-Example 3:
-
-Query: "What is the role of education in reducing poverty?"
-################
-Output:
-{{
- "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
- "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
-}}
-#############################
--Real Data-
-######################
-Query: {query}
-######################
-Output:
-
-"""
-
-PROMPTS[
- "naive_rag_response"
-] = """You're a helpful assistant
-Below are the knowledge you know:
-{content_data}
----
-If you don't know the answer or if the provided knowledge do not contain sufficient information to provide an answer, just say so. Do not make anything up.
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
----Target response length and format---
-{response_type}
-"""
diff --git a/lightrag/storage.py b/lightrag/storage.py
deleted file mode 100644
index 2f2bb7d8..00000000
--- a/lightrag/storage.py
+++ /dev/null
@@ -1,246 +0,0 @@
-import asyncio
-import html
-import json
-import os
-from collections import defaultdict
-from dataclasses import dataclass, field
-from typing import Any, Union, cast
-import pickle
-import hnswlib
-import networkx as nx
-import numpy as np
-from nano_vectordb import NanoVectorDB
-import xxhash
-
-from .utils import load_json, logger, write_json
-from .base import (
- BaseGraphStorage,
- BaseKVStorage,
- BaseVectorStorage,
-)
-
-@dataclass
-class JsonKVStorage(BaseKVStorage):
- def __post_init__(self):
- working_dir = self.global_config["working_dir"]
- self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
- self._data = load_json(self._file_name) or {}
- logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
-
- async def all_keys(self) -> list[str]:
- return list(self._data.keys())
-
- async def index_done_callback(self):
- write_json(self._data, self._file_name)
-
- async def get_by_id(self, id):
- return self._data.get(id, None)
-
- async def get_by_ids(self, ids, fields=None):
- if fields is None:
- return [self._data.get(id, None) for id in ids]
- return [
- (
- {k: v for k, v in self._data[id].items() if k in fields}
- if self._data.get(id, None)
- else None
- )
- for id in ids
- ]
-
- async def filter_keys(self, data: list[str]) -> set[str]:
- return set([s for s in data if s not in self._data])
-
- async def upsert(self, data: dict[str, dict]):
- left_data = {k: v for k, v in data.items() if k not in self._data}
- self._data.update(left_data)
- return left_data
-
- async def drop(self):
- self._data = {}
-
-@dataclass
-class NanoVectorDBStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
-
- def __post_init__(self):
-
- self._client_file_name = os.path.join(
- self.global_config["working_dir"], f"vdb_{self.namespace}.json"
- )
- self._max_batch_size = self.global_config["embedding_batch_num"]
- self._client = NanoVectorDB(
- self.embedding_func.embedding_dim, storage_file=self._client_file_name
- )
- self.cosine_better_than_threshold = self.global_config.get(
- "cosine_better_than_threshold", self.cosine_better_than_threshold
- )
-
- async def upsert(self, data: dict[str, dict]):
- logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
- if not len(data):
- logger.warning("You insert an empty data to vector DB")
- return []
- list_data = [
- {
- "__id__": k,
- **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
- }
- for k, v in data.items()
- ]
- contents = [v["content"] for v in data.values()]
- batches = [
- contents[i : i + self._max_batch_size]
- for i in range(0, len(contents), self._max_batch_size)
- ]
- embeddings_list = await asyncio.gather(
- *[self.embedding_func(batch) for batch in batches]
- )
- embeddings = np.concatenate(embeddings_list)
- for i, d in enumerate(list_data):
- d["__vector__"] = embeddings[i]
- results = self._client.upsert(datas=list_data)
- return results
-
- async def query(self, query: str, top_k=5):
- embedding = await self.embedding_func([query])
- embedding = embedding[0]
- results = self._client.query(
- query=embedding,
- top_k=top_k,
- better_than_threshold=self.cosine_better_than_threshold,
- )
- results = [
- {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
- ]
- return results
-
- async def index_done_callback(self):
- self._client.save()
-
-@dataclass
-class NetworkXStorage(BaseGraphStorage):
- @staticmethod
- def load_nx_graph(file_name) -> nx.Graph:
- if os.path.exists(file_name):
- return nx.read_graphml(file_name)
- return None
-
- @staticmethod
- def write_nx_graph(graph: nx.Graph, file_name):
- logger.info(
- f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
- )
- nx.write_graphml(graph, file_name)
-
- @staticmethod
- def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
- """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
- Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
- """
- from graspologic.utils import largest_connected_component
-
- graph = graph.copy()
- graph = cast(nx.Graph, largest_connected_component(graph))
- node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
- graph = nx.relabel_nodes(graph, node_mapping)
- return NetworkXStorage._stabilize_graph(graph)
-
- @staticmethod
- def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
- """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
- Ensure an undirected graph with the same relationships will always be read the same way.
- """
- fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
-
- sorted_nodes = graph.nodes(data=True)
- sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
-
- fixed_graph.add_nodes_from(sorted_nodes)
- edges = list(graph.edges(data=True))
-
- if not graph.is_directed():
-
- def _sort_source_target(edge):
- source, target, edge_data = edge
- if source > target:
- temp = source
- source = target
- target = temp
- return source, target, edge_data
-
- edges = [_sort_source_target(edge) for edge in edges]
-
- def _get_edge_key(source: Any, target: Any) -> str:
- return f"{source} -> {target}"
-
- edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
-
- fixed_graph.add_edges_from(edges)
- return fixed_graph
-
- def __post_init__(self):
- self._graphml_xml_file = os.path.join(
- self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
- )
- preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
- if preloaded_graph is not None:
- logger.info(
- f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
- )
- self._graph = preloaded_graph or nx.Graph()
- self._node_embed_algorithms = {
- "node2vec": self._node2vec_embed,
- }
-
- async def index_done_callback(self):
- NetworkXStorage.write_nx_graph(self._graph, self._graphml_xml_file)
-
- async def has_node(self, node_id: str) -> bool:
- return self._graph.has_node(node_id)
-
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- return self._graph.has_edge(source_node_id, target_node_id)
-
- async def get_node(self, node_id: str) -> Union[dict, None]:
- return self._graph.nodes.get(node_id)
-
- async def node_degree(self, node_id: str) -> int:
- return self._graph.degree(node_id)
-
- async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- return self._graph.degree(src_id) + self._graph.degree(tgt_id)
-
- async def get_edge(
- self, source_node_id: str, target_node_id: str
- ) -> Union[dict, None]:
- return self._graph.edges.get((source_node_id, target_node_id))
-
- async def get_node_edges(self, source_node_id: str):
- if self._graph.has_node(source_node_id):
- return list(self._graph.edges(source_node_id))
- return None
-
- async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- self._graph.add_node(node_id, **node_data)
-
- async def upsert_edge(
- self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
- ):
- self._graph.add_edge(source_node_id, target_node_id, **edge_data)
-
- async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
- if algorithm not in self._node_embed_algorithms:
- raise ValueError(f"Node embedding algorithm {algorithm} not supported")
- return await self._node_embed_algorithms[algorithm]()
-
- async def _node2vec_embed(self):
- from graspologic import embed
-
- embeddings, nodes = embed.node2vec_embed(
- self._graph,
- **self.global_config["node2vec_params"],
- )
-
- nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
- return embeddings, nodes_ids
diff --git a/lightrag/utils.py b/lightrag/utils.py
deleted file mode 100644
index c75b4270..00000000
--- a/lightrag/utils.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import asyncio
-import html
-import json
-import logging
-import os
-import re
-from dataclasses import dataclass
-from functools import wraps
-from hashlib import md5
-from typing import Any, Union
-
-import numpy as np
-import tiktoken
-
-ENCODER = None
-
-logger = logging.getLogger("lightrag")
-
-def set_logger(log_file: str):
- logger.setLevel(logging.DEBUG)
-
- file_handler = logging.FileHandler(log_file)
- file_handler.setLevel(logging.DEBUG)
-
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- file_handler.setFormatter(formatter)
-
- if not logger.handlers:
- logger.addHandler(file_handler)
-
-@dataclass
-class EmbeddingFunc:
- embedding_dim: int
- max_token_size: int
- func: callable
-
- async def __call__(self, *args, **kwargs) -> np.ndarray:
- return await self.func(*args, **kwargs)
-
-def locate_json_string_body_from_string(content: str) -> Union[str, None]:
- """Locate the JSON string body from a string"""
- maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
- if maybe_json_str is not None:
- return maybe_json_str.group(0)
- else:
- return None
-
-def convert_response_to_json(response: str) -> dict:
- json_str = locate_json_string_body_from_string(response)
- assert json_str is not None, f"Unable to parse JSON from response: {response}"
- try:
- data = json.loads(json_str)
- return data
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse JSON: {json_str}")
- raise e from None
-
-def compute_args_hash(*args):
- return md5(str(args).encode()).hexdigest()
-
-def compute_mdhash_id(content, prefix: str = ""):
- return prefix + md5(content.encode()).hexdigest()
-
-def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
- """Add restriction of maximum async calling times for a async func"""
-
- def final_decro(func):
- """Not using async.Semaphore to aovid use nest-asyncio"""
- __current_size = 0
-
- @wraps(func)
- async def wait_func(*args, **kwargs):
- nonlocal __current_size
- while __current_size >= max_size:
- await asyncio.sleep(waitting_time)
- __current_size += 1
- result = await func(*args, **kwargs)
- __current_size -= 1
- return result
-
- return wait_func
-
- return final_decro
-
-def wrap_embedding_func_with_attrs(**kwargs):
- """Wrap a function with attributes"""
-
- def final_decro(func) -> EmbeddingFunc:
- new_func = EmbeddingFunc(**kwargs, func=func)
- return new_func
-
- return final_decro
-
-def load_json(file_name):
- if not os.path.exists(file_name):
- return None
- with open(file_name) as f:
- return json.load(f)
-
-def write_json(json_obj, file_name):
- with open(file_name, "w") as f:
- json.dump(json_obj, f, indent=2, ensure_ascii=False)
-
-def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
- global ENCODER
- if ENCODER is None:
- ENCODER = tiktoken.encoding_for_model(model_name)
- tokens = ENCODER.encode(content)
- return tokens
-
-
-def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
- global ENCODER
- if ENCODER is None:
- ENCODER = tiktoken.encoding_for_model(model_name)
- content = ENCODER.decode(tokens)
- return content
-
-def pack_user_ass_to_openai_messages(*args: str):
- roles = ["user", "assistant"]
- return [
- {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
- ]
-
-def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
- """Split a string by multiple markers"""
- if not markers:
- return [content]
- results = re.split("|".join(re.escape(marker) for marker in markers), content)
- return [r.strip() for r in results if r.strip()]
-
-# Refer the utils functions of the official GraphRAG implementation:
-# https://github.com/microsoft/graphrag
-def clean_str(input: Any) -> str:
- """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
- # If we get non-string input, just give it back
- if not isinstance(input, str):
- return input
-
- result = html.unescape(input.strip())
- # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
- return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
-
-def is_float_regex(value):
- return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
-
-def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
- """Truncate a list of data by token size"""
- if max_token_size <= 0:
- return []
- tokens = 0
- for i, data in enumerate(list_data):
- tokens += len(encode_string_by_tiktoken(key(data)))
- if tokens > max_token_size:
- return list_data[:i]
- return list_data
-
-def list_of_list_to_csv(data: list[list]):
- return "\n".join(
- [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
- )
-
-def save_data_to_file(data, file_name):
- with open(file_name, 'w', encoding='utf-8') as f:
- json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
From 86276325178e37de7eddfd27513a528b33932dcb Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 14:58:21 +0800
Subject: [PATCH 007/258] update
---
lightrag/__init__.py | 2 +-
lightrag/base.py | 116 ++++++++++++++++++++
lightrag/prompt.py | 256 +++++++++++++++++++++++++++++++++++++++++++
lightrag/storage.py | 246 +++++++++++++++++++++++++++++++++++++++++
lightrag/utils.py | 165 ++++++++++++++++++++++++++++
setup.py | 2 +-
6 files changed, 785 insertions(+), 2 deletions(-)
create mode 100644 lightrag/base.py
create mode 100644 lightrag/prompt.py
create mode 100644 lightrag/storage.py
create mode 100644 lightrag/utils.py
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index a83afba3..dc497cd4 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG, QueryParam
-__version__ = "0.0.1"
+__version__ = "0.0.2"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/GraphEdit"
diff --git a/lightrag/base.py b/lightrag/base.py
new file mode 100644
index 00000000..9c0422fe
--- /dev/null
+++ b/lightrag/base.py
@@ -0,0 +1,116 @@
+from dataclasses import dataclass, field
+from typing import TypedDict, Union, Literal, Generic, TypeVar
+
+import numpy as np
+
+from .utils import EmbeddingFunc
+
+TextChunkSchema = TypedDict(
+ "TextChunkSchema",
+ {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int},
+)
+
+T = TypeVar("T")
+
+@dataclass
+class QueryParam:
+ mode: Literal["local", "global", "hybird", "naive"] = "global"
+ only_need_context: bool = False
+ response_type: str = "Multiple Paragraphs"
+ top_k: int = 60
+ max_token_for_text_unit: int = 4000
+ max_token_for_global_context: int = 4000
+ max_token_for_local_context: int = 4000
+
+
+@dataclass
+class StorageNameSpace:
+ namespace: str
+ global_config: dict
+
+ async def index_done_callback(self):
+ """commit the storage operations after indexing"""
+ pass
+
+ async def query_done_callback(self):
+ """commit the storage operations after querying"""
+ pass
+
+@dataclass
+class BaseVectorStorage(StorageNameSpace):
+ embedding_func: EmbeddingFunc
+ meta_fields: set = field(default_factory=set)
+
+ async def query(self, query: str, top_k: int) -> list[dict]:
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, dict]):
+ """Use 'content' field from value for embedding, use key as id.
+ If embedding_func is None, use 'embedding' field from value
+ """
+ raise NotImplementedError
+
+@dataclass
+class BaseKVStorage(Generic[T], StorageNameSpace):
+ async def all_keys(self) -> list[str]:
+ raise NotImplementedError
+
+ async def get_by_id(self, id: str) -> Union[T, None]:
+ raise NotImplementedError
+
+ async def get_by_ids(
+ self, ids: list[str], fields: Union[set[str], None] = None
+ ) -> list[Union[T, None]]:
+ raise NotImplementedError
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ """return un-exist keys"""
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, T]):
+ raise NotImplementedError
+
+ async def drop(self):
+ raise NotImplementedError
+
+
+@dataclass
+class BaseGraphStorage(StorageNameSpace):
+ async def has_node(self, node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def node_degree(self, node_id: str) -> int:
+ raise NotImplementedError
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ raise NotImplementedError
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_node_edges(
+ self, source_node_id: str
+ ) -> Union[list[tuple[str, str]], None]:
+ raise NotImplementedError
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ raise NotImplementedError
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ raise NotImplementedError
+
+ async def clustering(self, algorithm: str):
+ raise NotImplementedError
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
new file mode 100644
index 00000000..5d28e49c
--- /dev/null
+++ b/lightrag/prompt.py
@@ -0,0 +1,256 @@
+GRAPH_FIELD_SEP = ""
+
+PROMPTS = {}
+
+PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
+PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
+PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
+PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+
+PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
+
+PROMPTS[
+ "entity_extraction"
+] = """-Goal-
+Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
+
+-Steps-
+1. Identify all entities. For each identified entity, extract the following information:
+- entity_name: Name of the entity, capitalized
+- entity_type: One of the following types: [{entity_types}]
+- entity_description: Comprehensive description of the entity's attributes and activities
+Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}
+
+2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
+For each pair of related entities, extract the following information:
+- source_entity: name of the source entity, as identified in step 1
+- target_entity: name of the target entity, as identified in step 1
+- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
+- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
+- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
+Format each relationship as ("relationship"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})
+
+3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
+Format the content-level key words as ("content_keywords"{tuple_delimiter})
+
+4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
+
+5. When finished, output {completion_delimiter}
+
+######################
+-Examples-
+######################
+Example 1:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
+
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
+
+The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
+
+It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
+################
+Output:
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
+("entity"{tuple_delimiter}"Jordan"{tuple_delimiter}"person"{tuple_delimiter}"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device."){record_delimiter}
+("entity"{tuple_delimiter}"Cruz"{tuple_delimiter}"person"{tuple_delimiter}"Cruz is associated with a vision of control and order, influencing the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"The Device"{tuple_delimiter}"technology"{tuple_delimiter}"The Device is central to the story, with potential game-changing implications, and is revered by Taylor."){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Taylor"{tuple_delimiter}"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."{tuple_delimiter}"power dynamics, perspective shift"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Jordan"{tuple_delimiter}"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."{tuple_delimiter}"shared goals, rebellion"{tuple_delimiter}6){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"Jordan"{tuple_delimiter}"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."{tuple_delimiter}"conflict resolution, mutual respect"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Jordan"{tuple_delimiter}"Cruz"{tuple_delimiter}"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."{tuple_delimiter}"ideological conflict, rebellion"{tuple_delimiter}5){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
+("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
+#############################
+Example 2:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
+
+Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
+
+Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
+#############
+Output:
+("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
+("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
+("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
+("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
+#############################
+Example 3:
+
+Entity_types: [person, role, technology, organization, event, location, concept]
+Text:
+their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
+
+"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
+
+Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
+
+Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
+
+The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
+#############
+Output:
+("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
+("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
+("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
+("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
+("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
+("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
+("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
+#############################
+-Real Data-
+######################
+Entity_types: {entity_types}
+Text: {input_text}
+######################
+Output:
+"""
+
+PROMPTS[
+ "summarize_entity_descriptions"
+] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
+Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
+Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+Make sure it is written in third person, and include the entity names so we the have full context.
+
+#######
+-Data-
+Entities: {entity_name}
+Description List: {description_list}
+#######
+Output:
+"""
+
+PROMPTS[
+ "entiti_continue_extraction"
+] = """MANY entities were missed in the last extraction. Add them below using the same format:
+"""
+
+PROMPTS[
+ "entiti_if_loop_extraction"
+] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
+"""
+
+PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
+
+PROMPTS[
+ "rag_response"
+] = """---Role---
+
+You are a helpful assistant responding to questions about data in the tables provided.
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+
+---Target response length and format---
+
+{response_type}
+
+
+---Data tables---
+
+{context_data}
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+
+If you don't know the answer, just say so. Do not make anything up.
+
+Do not include information where the supporting evidence for it is not provided.
+
+
+---Target response length and format---
+
+{response_type}
+
+Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
+"""
+
+PROMPTS["keywords_extraction"] = """---Role---
+
+You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
+
+---Goal---
+
+Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
+
+---Instructions---
+
+- Output the keywords in JSON format.
+- The JSON should have two keys:
+ - "high_level_keywords" for overarching concepts or themes.
+ - "low_level_keywords" for specific entities or details.
+
+######################
+-Examples-
+######################
+Example 1:
+
+Query: "How does international trade influence global economic stability?"
+################
+Output:
+{{
+ "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
+ "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
+}}
+#############################
+Example 2:
+
+Query: "What are the environmental consequences of deforestation on biodiversity?"
+################
+Output:
+{{
+ "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
+ "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
+}}
+#############################
+Example 3:
+
+Query: "What is the role of education in reducing poverty?"
+################
+Output:
+{{
+ "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
+ "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
+}}
+#############################
+-Real Data-
+######################
+Query: {query}
+######################
+Output:
+
+"""
+
+PROMPTS[
+ "naive_rag_response"
+] = """You're a helpful assistant
+Below are the knowledge you know:
+{content_data}
+---
+If you don't know the answer or if the provided knowledge do not contain sufficient information to provide an answer, just say so. Do not make anything up.
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+---Target response length and format---
+{response_type}
+"""
diff --git a/lightrag/storage.py b/lightrag/storage.py
new file mode 100644
index 00000000..2f2bb7d8
--- /dev/null
+++ b/lightrag/storage.py
@@ -0,0 +1,246 @@
+import asyncio
+import html
+import json
+import os
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any, Union, cast
+import pickle
+import hnswlib
+import networkx as nx
+import numpy as np
+from nano_vectordb import NanoVectorDB
+import xxhash
+
+from .utils import load_json, logger, write_json
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+)
+
+@dataclass
+class JsonKVStorage(BaseKVStorage):
+ def __post_init__(self):
+ working_dir = self.global_config["working_dir"]
+ self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
+ self._data = load_json(self._file_name) or {}
+ logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
+
+ async def all_keys(self) -> list[str]:
+ return list(self._data.keys())
+
+ async def index_done_callback(self):
+ write_json(self._data, self._file_name)
+
+ async def get_by_id(self, id):
+ return self._data.get(id, None)
+
+ async def get_by_ids(self, ids, fields=None):
+ if fields is None:
+ return [self._data.get(id, None) for id in ids]
+ return [
+ (
+ {k: v for k, v in self._data[id].items() if k in fields}
+ if self._data.get(id, None)
+ else None
+ )
+ for id in ids
+ ]
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ return set([s for s in data if s not in self._data])
+
+ async def upsert(self, data: dict[str, dict]):
+ left_data = {k: v for k, v in data.items() if k not in self._data}
+ self._data.update(left_data)
+ return left_data
+
+ async def drop(self):
+ self._data = {}
+
+@dataclass
+class NanoVectorDBStorage(BaseVectorStorage):
+ cosine_better_than_threshold: float = 0.2
+
+ def __post_init__(self):
+
+ self._client_file_name = os.path.join(
+ self.global_config["working_dir"], f"vdb_{self.namespace}.json"
+ )
+ self._max_batch_size = self.global_config["embedding_batch_num"]
+ self._client = NanoVectorDB(
+ self.embedding_func.embedding_dim, storage_file=self._client_file_name
+ )
+ self.cosine_better_than_threshold = self.global_config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
+
+ async def upsert(self, data: dict[str, dict]):
+ logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
+ if not len(data):
+ logger.warning("You insert an empty data to vector DB")
+ return []
+ list_data = [
+ {
+ "__id__": k,
+ **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
+ }
+ for k, v in data.items()
+ ]
+ contents = [v["content"] for v in data.values()]
+ batches = [
+ contents[i : i + self._max_batch_size]
+ for i in range(0, len(contents), self._max_batch_size)
+ ]
+ embeddings_list = await asyncio.gather(
+ *[self.embedding_func(batch) for batch in batches]
+ )
+ embeddings = np.concatenate(embeddings_list)
+ for i, d in enumerate(list_data):
+ d["__vector__"] = embeddings[i]
+ results = self._client.upsert(datas=list_data)
+ return results
+
+ async def query(self, query: str, top_k=5):
+ embedding = await self.embedding_func([query])
+ embedding = embedding[0]
+ results = self._client.query(
+ query=embedding,
+ top_k=top_k,
+ better_than_threshold=self.cosine_better_than_threshold,
+ )
+ results = [
+ {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
+ ]
+ return results
+
+ async def index_done_callback(self):
+ self._client.save()
+
+@dataclass
+class NetworkXStorage(BaseGraphStorage):
+ @staticmethod
+ def load_nx_graph(file_name) -> nx.Graph:
+ if os.path.exists(file_name):
+ return nx.read_graphml(file_name)
+ return None
+
+ @staticmethod
+ def write_nx_graph(graph: nx.Graph, file_name):
+ logger.info(
+ f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
+ )
+ nx.write_graphml(graph, file_name)
+
+ @staticmethod
+ def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
+ """
+ from graspologic.utils import largest_connected_component
+
+ graph = graph.copy()
+ graph = cast(nx.Graph, largest_connected_component(graph))
+ node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
+ graph = nx.relabel_nodes(graph, node_mapping)
+ return NetworkXStorage._stabilize_graph(graph)
+
+ @staticmethod
+ def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Ensure an undirected graph with the same relationships will always be read the same way.
+ """
+ fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
+
+ sorted_nodes = graph.nodes(data=True)
+ sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
+
+ fixed_graph.add_nodes_from(sorted_nodes)
+ edges = list(graph.edges(data=True))
+
+ if not graph.is_directed():
+
+ def _sort_source_target(edge):
+ source, target, edge_data = edge
+ if source > target:
+ temp = source
+ source = target
+ target = temp
+ return source, target, edge_data
+
+ edges = [_sort_source_target(edge) for edge in edges]
+
+ def _get_edge_key(source: Any, target: Any) -> str:
+ return f"{source} -> {target}"
+
+ edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
+
+ fixed_graph.add_edges_from(edges)
+ return fixed_graph
+
+ def __post_init__(self):
+ self._graphml_xml_file = os.path.join(
+ self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
+ )
+ preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
+ if preloaded_graph is not None:
+ logger.info(
+ f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
+ )
+ self._graph = preloaded_graph or nx.Graph()
+ self._node_embed_algorithms = {
+ "node2vec": self._node2vec_embed,
+ }
+
+ async def index_done_callback(self):
+ NetworkXStorage.write_nx_graph(self._graph, self._graphml_xml_file)
+
+ async def has_node(self, node_id: str) -> bool:
+ return self._graph.has_node(node_id)
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ return self._graph.has_edge(source_node_id, target_node_id)
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ return self._graph.nodes.get(node_id)
+
+ async def node_degree(self, node_id: str) -> int:
+ return self._graph.degree(node_id)
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ return self._graph.degree(src_id) + self._graph.degree(tgt_id)
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ return self._graph.edges.get((source_node_id, target_node_id))
+
+ async def get_node_edges(self, source_node_id: str):
+ if self._graph.has_node(source_node_id):
+ return list(self._graph.edges(source_node_id))
+ return None
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ self._graph.add_node(node_id, **node_data)
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ self._graph.add_edge(source_node_id, target_node_id, **edge_data)
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ if algorithm not in self._node_embed_algorithms:
+ raise ValueError(f"Node embedding algorithm {algorithm} not supported")
+ return await self._node_embed_algorithms[algorithm]()
+
+ async def _node2vec_embed(self):
+ from graspologic import embed
+
+ embeddings, nodes = embed.node2vec_embed(
+ self._graph,
+ **self.global_config["node2vec_params"],
+ )
+
+ nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
+ return embeddings, nodes_ids
diff --git a/lightrag/utils.py b/lightrag/utils.py
new file mode 100644
index 00000000..c75b4270
--- /dev/null
+++ b/lightrag/utils.py
@@ -0,0 +1,165 @@
+import asyncio
+import html
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from functools import wraps
+from hashlib import md5
+from typing import Any, Union
+
+import numpy as np
+import tiktoken
+
+ENCODER = None
+
+logger = logging.getLogger("lightrag")
+
+def set_logger(log_file: str):
+ logger.setLevel(logging.DEBUG)
+
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setLevel(logging.DEBUG)
+
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ file_handler.setFormatter(formatter)
+
+ if not logger.handlers:
+ logger.addHandler(file_handler)
+
+@dataclass
+class EmbeddingFunc:
+ embedding_dim: int
+ max_token_size: int
+ func: callable
+
+ async def __call__(self, *args, **kwargs) -> np.ndarray:
+ return await self.func(*args, **kwargs)
+
+def locate_json_string_body_from_string(content: str) -> Union[str, None]:
+ """Locate the JSON string body from a string"""
+ maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
+ if maybe_json_str is not None:
+ return maybe_json_str.group(0)
+ else:
+ return None
+
+def convert_response_to_json(response: str) -> dict:
+ json_str = locate_json_string_body_from_string(response)
+ assert json_str is not None, f"Unable to parse JSON from response: {response}"
+ try:
+ data = json.loads(json_str)
+ return data
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse JSON: {json_str}")
+ raise e from None
+
+def compute_args_hash(*args):
+ return md5(str(args).encode()).hexdigest()
+
+def compute_mdhash_id(content, prefix: str = ""):
+ return prefix + md5(content.encode()).hexdigest()
+
+def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
+ """Add restriction of maximum async calling times for a async func"""
+
+ def final_decro(func):
+ """Not using async.Semaphore to aovid use nest-asyncio"""
+ __current_size = 0
+
+ @wraps(func)
+ async def wait_func(*args, **kwargs):
+ nonlocal __current_size
+ while __current_size >= max_size:
+ await asyncio.sleep(waitting_time)
+ __current_size += 1
+ result = await func(*args, **kwargs)
+ __current_size -= 1
+ return result
+
+ return wait_func
+
+ return final_decro
+
+def wrap_embedding_func_with_attrs(**kwargs):
+ """Wrap a function with attributes"""
+
+ def final_decro(func) -> EmbeddingFunc:
+ new_func = EmbeddingFunc(**kwargs, func=func)
+ return new_func
+
+ return final_decro
+
+def load_json(file_name):
+ if not os.path.exists(file_name):
+ return None
+ with open(file_name) as f:
+ return json.load(f)
+
+def write_json(json_obj, file_name):
+ with open(file_name, "w") as f:
+ json.dump(json_obj, f, indent=2, ensure_ascii=False)
+
+def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ tokens = ENCODER.encode(content)
+ return tokens
+
+
+def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ content = ENCODER.decode(tokens)
+ return content
+
+def pack_user_ass_to_openai_messages(*args: str):
+ roles = ["user", "assistant"]
+ return [
+ {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
+ ]
+
+def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
+ """Split a string by multiple markers"""
+ if not markers:
+ return [content]
+ results = re.split("|".join(re.escape(marker) for marker in markers), content)
+ return [r.strip() for r in results if r.strip()]
+
+# Refer the utils functions of the official GraphRAG implementation:
+# https://github.com/microsoft/graphrag
+def clean_str(input: Any) -> str:
+ """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
+ # If we get non-string input, just give it back
+ if not isinstance(input, str):
+ return input
+
+ result = html.unescape(input.strip())
+ # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
+ return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
+
+def is_float_regex(value):
+ return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
+
+def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
+ """Truncate a list of data by token size"""
+ if max_token_size <= 0:
+ return []
+ tokens = 0
+ for i, data in enumerate(list_data):
+ tokens += len(encode_string_by_tiktoken(key(data)))
+ if tokens > max_token_size:
+ return list_data[:i]
+ return list_data
+
+def list_of_list_to_csv(data: list[list]):
+ return "\n".join(
+ [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
+ )
+
+def save_data_to_file(data, file_name):
+ with open(file_name, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index df1c3cf4..849fabfe 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
deps.append(line.strip())
setuptools.setup(
- name="light-rag",
+ name="lightrag-hku",
url=vars2readme["__url__"],
version=vars2readme["__version__"],
author=vars2readme["__author__"],
From 5931e4bccb90bfe9517e4f478f1ba10170be77ec Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 15:00:31 +0800
Subject: [PATCH 008/258] Revert "first commit"
---
README.md | 198 ---------
lightrag/__init__.py | 5 -
lightrag/base.py | 116 ------
lightrag/lightrag.py | 300 --------------
lightrag/llm.py | 88 ----
lightrag/operate.py | 944 -------------------------------------------
lightrag/prompt.py | 256 ------------
lightrag/storage.py | 246 -----------
lightrag/utils.py | 165 --------
9 files changed, 2318 deletions(-)
delete mode 100644 README.md
delete mode 100644 lightrag/__init__.py
delete mode 100644 lightrag/base.py
delete mode 100644 lightrag/lightrag.py
delete mode 100644 lightrag/llm.py
delete mode 100644 lightrag/operate.py
delete mode 100644 lightrag/prompt.py
delete mode 100644 lightrag/storage.py
delete mode 100644 lightrag/utils.py
diff --git a/README.md b/README.md
deleted file mode 100644
index 42de1c1c..00000000
--- a/README.md
+++ /dev/null
@@ -1,198 +0,0 @@
-# LightRAG: Simple and Fast Retrieval-Augmented Generation
-![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
-
-
-
-
-
-
-This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
-![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
-## Install
-
-* Install from source
-
-```bash
-cd LightRAG
-pip install -e .
-```
-* Install from PyPI
-```bash
-pip install lightrag-hku
-```
-
-## Quick Start
-
-* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".`
-* Download the demo text "A Christmas Carol by Charles Dickens"
-```bash
-curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
-```
-Use the below python snippet:
-
-```python
-from lightrag import LightRAG, QueryParam
-
-rag = LightRAG(working_dir="./dickens")
-
-with open("./book.txt") as f:
- rag.insert(f.read())
-
-# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
-
-# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
-
-# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
-
-# Perform hybird search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybird")))
-```
-Batch Insert
-```python
-rag.insert(["TEXT1", "TEXT2",...])
-```
-Incremental Insert
-
-```python
-rag = LightRAG(working_dir="./dickens")
-
-with open("./newText.txt") as f:
- rag.insert(f.read())
-```
-## Evaluation
-### Dataset
-The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
-
-### Generate Query
-LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
-```python
-Given the following description of a dataset:
-
-{description}
-
-Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
-
-Output the results in the following structure:
-- User 1: [user description]
- - Task 1: [task description]
- - Question 1:
- - Question 2:
- - Question 3:
- - Question 4:
- - Question 5:
- - Task 2: [task description]
- ...
- - Task 5: [task description]
-- User 2: [user description]
- ...
-- User 5: [user description]
- ...
-```
-
- ### Batch Eval
-To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
-```python
----Role---
-You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
----Goal---
-You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
-
-- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
-- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
-- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
-
-For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
-
-Here is the question:
-{query}
-
-Here are the two answers:
-
-**Answer 1:**
-{answer1}
-
-**Answer 2:**
-{answer2}
-
-Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
-
-Output your evaluation in the following JSON format:
-
-{{
- "Comprehensiveness": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Provide explanation here]"
- }},
- "Empowerment": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Provide explanation here]"
- }},
- "Overall Winner": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
- }}
-}}
-```
-### Overall Performance Table
-### Overall Performance Table
-| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
-|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
-| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
-| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** |
-| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** |
-| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** |
-| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** |
-| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** |
-| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** |
-| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** |
-| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** |
-| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** |
-| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** |
-| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** |
-| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** |
-| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** |
-| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** |
-| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** |
-| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% |
-| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** |
-| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
-| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
-
-## Code Structure
-
-```python
-.
-├── examples
-│ ├── batch_eval.py
-│ ├── generate_query.py
-│ ├── insert.py
-│ └── query.py
-├── lightrag
-│ ├── __init__.py
-│ ├── base.py
-│ ├── lightrag.py
-│ ├── llm.py
-│ ├── operate.py
-│ ├── prompt.py
-│ ├── storage.py
-│ └── utils.jpeg
-├── LICENSE
-├── README.md
-├── requirements.txt
-└── setup.py
-```
-## Citation
-
-```
-@article{guo2024lightrag,
-title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
-author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
-year={2024},
-eprint={2410.05779},
-archivePrefix={arXiv},
-primaryClass={cs.IR}
-}
-```
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
deleted file mode 100644
index dc497cd4..00000000
--- a/lightrag/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .lightrag import LightRAG, QueryParam
-
-__version__ = "0.0.2"
-__author__ = "Zirui Guo"
-__url__ = "https://github.com/HKUDS/GraphEdit"
diff --git a/lightrag/base.py b/lightrag/base.py
deleted file mode 100644
index 9c0422fe..00000000
--- a/lightrag/base.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from dataclasses import dataclass, field
-from typing import TypedDict, Union, Literal, Generic, TypeVar
-
-import numpy as np
-
-from .utils import EmbeddingFunc
-
-TextChunkSchema = TypedDict(
- "TextChunkSchema",
- {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int},
-)
-
-T = TypeVar("T")
-
-@dataclass
-class QueryParam:
- mode: Literal["local", "global", "hybird", "naive"] = "global"
- only_need_context: bool = False
- response_type: str = "Multiple Paragraphs"
- top_k: int = 60
- max_token_for_text_unit: int = 4000
- max_token_for_global_context: int = 4000
- max_token_for_local_context: int = 4000
-
-
-@dataclass
-class StorageNameSpace:
- namespace: str
- global_config: dict
-
- async def index_done_callback(self):
- """commit the storage operations after indexing"""
- pass
-
- async def query_done_callback(self):
- """commit the storage operations after querying"""
- pass
-
-@dataclass
-class BaseVectorStorage(StorageNameSpace):
- embedding_func: EmbeddingFunc
- meta_fields: set = field(default_factory=set)
-
- async def query(self, query: str, top_k: int) -> list[dict]:
- raise NotImplementedError
-
- async def upsert(self, data: dict[str, dict]):
- """Use 'content' field from value for embedding, use key as id.
- If embedding_func is None, use 'embedding' field from value
- """
- raise NotImplementedError
-
-@dataclass
-class BaseKVStorage(Generic[T], StorageNameSpace):
- async def all_keys(self) -> list[str]:
- raise NotImplementedError
-
- async def get_by_id(self, id: str) -> Union[T, None]:
- raise NotImplementedError
-
- async def get_by_ids(
- self, ids: list[str], fields: Union[set[str], None] = None
- ) -> list[Union[T, None]]:
- raise NotImplementedError
-
- async def filter_keys(self, data: list[str]) -> set[str]:
- """return un-exist keys"""
- raise NotImplementedError
-
- async def upsert(self, data: dict[str, T]):
- raise NotImplementedError
-
- async def drop(self):
- raise NotImplementedError
-
-
-@dataclass
-class BaseGraphStorage(StorageNameSpace):
- async def has_node(self, node_id: str) -> bool:
- raise NotImplementedError
-
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- raise NotImplementedError
-
- async def node_degree(self, node_id: str) -> int:
- raise NotImplementedError
-
- async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- raise NotImplementedError
-
- async def get_node(self, node_id: str) -> Union[dict, None]:
- raise NotImplementedError
-
- async def get_edge(
- self, source_node_id: str, target_node_id: str
- ) -> Union[dict, None]:
- raise NotImplementedError
-
- async def get_node_edges(
- self, source_node_id: str
- ) -> Union[list[tuple[str, str]], None]:
- raise NotImplementedError
-
- async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- raise NotImplementedError
-
- async def upsert_edge(
- self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
- ):
- raise NotImplementedError
-
- async def clustering(self, algorithm: str):
- raise NotImplementedError
-
- async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
- raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
deleted file mode 100644
index 836fda9e..00000000
--- a/lightrag/lightrag.py
+++ /dev/null
@@ -1,300 +0,0 @@
-import asyncio
-import os
-from dataclasses import asdict, dataclass, field
-from datetime import datetime
-from functools import partial
-from typing import Type, cast
-
-from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding
-from .operate import (
- chunking_by_token_size,
- extract_entities,
- local_query,
- global_query,
- hybird_query,
- naive_query,
-)
-
-from .storage import (
- JsonKVStorage,
- NanoVectorDBStorage,
- NetworkXStorage,
-)
-from .utils import (
- EmbeddingFunc,
- compute_mdhash_id,
- limit_async_func_call,
- convert_response_to_json,
- logger,
- set_logger,
-)
-from .base import (
- BaseGraphStorage,
- BaseKVStorage,
- BaseVectorStorage,
- StorageNameSpace,
- QueryParam,
-)
-
-def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
- try:
- # If there is already an event loop, use it.
- loop = asyncio.get_event_loop()
- except RuntimeError:
- # If in a sub-thread, create a new event loop.
- logger.info("Creating a new event loop in a sub-thread.")
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- return loop
-
-@dataclass
-class LightRAG:
- working_dir: str = field(
- default_factory=lambda: f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
- )
-
- # text chunking
- chunk_token_size: int = 1200
- chunk_overlap_token_size: int = 100
- tiktoken_model_name: str = "gpt-4o-mini"
-
- # entity extraction
- entity_extract_max_gleaning: int = 1
- entity_summary_to_max_tokens: int = 500
-
- # node embedding
- node_embedding_algorithm: str = "node2vec"
- node2vec_params: dict = field(
- default_factory=lambda: {
- "dimensions": 1536,
- "num_walks": 10,
- "walk_length": 40,
- "num_walks": 10,
- "window_size": 2,
- "iterations": 3,
- "random_seed": 3,
- }
- )
-
- # text embedding
- embedding_func: EmbeddingFunc = field(default_factory=lambda: openai_embedding)
- embedding_batch_num: int = 32
- embedding_func_max_async: int = 16
-
- # LLM
- llm_model_func: callable = gpt_4o_mini_complete
- llm_model_max_token_size: int = 32768
- llm_model_max_async: int = 16
-
- # storage
- key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
- vector_db_storage_cls: Type[BaseVectorStorage] = NanoVectorDBStorage
- vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
- graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
- enable_llm_cache: bool = True
-
- # extension
- addon_params: dict = field(default_factory=dict)
- convert_response_to_json_func: callable = convert_response_to_json
-
- def __post_init__(self):
- log_file = os.path.join(self.working_dir, "lightrag.log")
- set_logger(log_file)
- logger.info(f"Logger initialized for working directory: {self.working_dir}")
-
- _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
- logger.debug(f"LightRAG init with param:\n {_print_config}\n")
-
- if not os.path.exists(self.working_dir):
- logger.info(f"Creating working directory {self.working_dir}")
- os.makedirs(self.working_dir)
-
- self.full_docs = self.key_string_value_json_storage_cls(
- namespace="full_docs", global_config=asdict(self)
- )
-
- self.text_chunks = self.key_string_value_json_storage_cls(
- namespace="text_chunks", global_config=asdict(self)
- )
-
- self.llm_response_cache = (
- self.key_string_value_json_storage_cls(
- namespace="llm_response_cache", global_config=asdict(self)
- )
- if self.enable_llm_cache
- else None
- )
- self.chunk_entity_relation_graph = self.graph_storage_cls(
- namespace="chunk_entity_relation", global_config=asdict(self)
- )
- self.embedding_func = limit_async_func_call(self.embedding_func_max_async)(
- self.embedding_func
- )
- self.entities_vdb = (
- self.vector_db_storage_cls(
- namespace="entities",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- meta_fields={"entity_name"}
- )
- )
- self.relationships_vdb = (
- self.vector_db_storage_cls(
- namespace="relationships",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- meta_fields={"src_id", "tgt_id"}
- )
- )
- self.chunks_vdb = (
- self.vector_db_storage_cls(
- namespace="chunks",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- )
- )
-
- self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
- partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
- )
-
- def insert(self, string_or_strings):
- loop = always_get_an_event_loop()
- return loop.run_until_complete(self.ainsert(string_or_strings))
-
- async def ainsert(self, string_or_strings):
- try:
- if isinstance(string_or_strings, str):
- string_or_strings = [string_or_strings]
-
- new_docs = {
- compute_mdhash_id(c.strip(), prefix="doc-"): {"content": c.strip()}
- for c in string_or_strings
- }
- _add_doc_keys = await self.full_docs.filter_keys(list(new_docs.keys()))
- new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
- if not len(new_docs):
- logger.warning(f"All docs are already in the storage")
- return
- logger.info(f"[New Docs] inserting {len(new_docs)} docs")
-
- inserting_chunks = {}
- for doc_key, doc in new_docs.items():
- chunks = {
- compute_mdhash_id(dp["content"], prefix="chunk-"): {
- **dp,
- "full_doc_id": doc_key,
- }
- for dp in chunking_by_token_size(
- doc["content"],
- overlap_token_size=self.chunk_overlap_token_size,
- max_token_size=self.chunk_token_size,
- tiktoken_model=self.tiktoken_model_name,
- )
- }
- inserting_chunks.update(chunks)
- _add_chunk_keys = await self.text_chunks.filter_keys(
- list(inserting_chunks.keys())
- )
- inserting_chunks = {
- k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
- }
- if not len(inserting_chunks):
- logger.warning(f"All chunks are already in the storage")
- return
- logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
-
- await self.chunks_vdb.upsert(inserting_chunks)
-
- logger.info("[Entity Extraction]...")
- maybe_new_kg = await extract_entities(
- inserting_chunks,
- knwoledge_graph_inst=self.chunk_entity_relation_graph,
- entity_vdb=self.entities_vdb,
- relationships_vdb=self.relationships_vdb,
- global_config=asdict(self),
- )
- if maybe_new_kg is None:
- logger.warning("No new entities and relationships found")
- return
- self.chunk_entity_relation_graph = maybe_new_kg
-
- await self.full_docs.upsert(new_docs)
- await self.text_chunks.upsert(inserting_chunks)
- finally:
- await self._insert_done()
-
- async def _insert_done(self):
- tasks = []
- for storage_inst in [
- self.full_docs,
- self.text_chunks,
- self.llm_response_cache,
- self.entities_vdb,
- self.relationships_vdb,
- self.chunks_vdb,
- self.chunk_entity_relation_graph,
- ]:
- if storage_inst is None:
- continue
- tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
- await asyncio.gather(*tasks)
-
- def query(self, query: str, param: QueryParam = QueryParam()):
- loop = always_get_an_event_loop()
- return loop.run_until_complete(self.aquery(query, param))
-
- async def aquery(self, query: str, param: QueryParam = QueryParam()):
- if param.mode == "local":
- response = await local_query(
- query,
- self.chunk_entity_relation_graph,
- self.entities_vdb,
- self.relationships_vdb,
- self.text_chunks,
- param,
- asdict(self),
- )
- elif param.mode == "global":
- response = await global_query(
- query,
- self.chunk_entity_relation_graph,
- self.entities_vdb,
- self.relationships_vdb,
- self.text_chunks,
- param,
- asdict(self),
- )
- elif param.mode == "hybird":
- response = await hybird_query(
- query,
- self.chunk_entity_relation_graph,
- self.entities_vdb,
- self.relationships_vdb,
- self.text_chunks,
- param,
- asdict(self),
- )
- elif param.mode == "naive":
- response = await naive_query(
- query,
- self.chunks_vdb,
- self.text_chunks,
- param,
- asdict(self),
- )
- else:
- raise ValueError(f"Unknown mode {param.mode}")
- await self._query_done()
- return response
-
-
- async def _query_done(self):
- tasks = []
- for storage_inst in [self.llm_response_cache]:
- if storage_inst is None:
- continue
- tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
- await asyncio.gather(*tasks)
-
-
diff --git a/lightrag/llm.py b/lightrag/llm.py
deleted file mode 100644
index ee700a10..00000000
--- a/lightrag/llm.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import os
-import numpy as np
-from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
-from tenacity import (
- retry,
- stop_after_attempt,
- wait_exponential,
- retry_if_exception_type,
-)
-
-from .base import BaseKVStorage
-from .utils import compute_args_hash, wrap_embedding_func_with_attrs
-
-@retry(
- stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=4, max=10),
- retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
-)
-async def openai_complete_if_cache(
- model, prompt, system_prompt=None, history_messages=[], **kwargs
-) -> str:
- openai_async_client = AsyncOpenAI()
- hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
- messages = []
- if system_prompt:
- messages.append({"role": "system", "content": system_prompt})
- messages.extend(history_messages)
- messages.append({"role": "user", "content": prompt})
- if hashing_kv is not None:
- args_hash = compute_args_hash(model, messages)
- if_cache_return = await hashing_kv.get_by_id(args_hash)
- if if_cache_return is not None:
- return if_cache_return["return"]
-
- response = await openai_async_client.chat.completions.create(
- model=model, messages=messages, **kwargs
- )
-
- if hashing_kv is not None:
- await hashing_kv.upsert(
- {args_hash: {"return": response.choices[0].message.content, "model": model}}
- )
- return response.choices[0].message.content
-
-async def gpt_4o_complete(
- prompt, system_prompt=None, history_messages=[], **kwargs
-) -> str:
- return await openai_complete_if_cache(
- "gpt-4o",
- prompt,
- system_prompt=system_prompt,
- history_messages=history_messages,
- **kwargs,
- )
-
-
-async def gpt_4o_mini_complete(
- prompt, system_prompt=None, history_messages=[], **kwargs
-) -> str:
- return await openai_complete_if_cache(
- "gpt-4o-mini",
- prompt,
- system_prompt=system_prompt,
- history_messages=history_messages,
- **kwargs,
- )
-
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
-@retry(
- stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=4, max=10),
- retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
-)
-async def openai_embedding(texts: list[str]) -> np.ndarray:
- openai_async_client = AsyncOpenAI()
- response = await openai_async_client.embeddings.create(
- model="text-embedding-3-small", input=texts, encoding_format="float"
- )
- return np.array([dp.embedding for dp in response.data])
-
-if __name__ == "__main__":
- import asyncio
-
- async def main():
- result = await gpt_4o_mini_complete('How are you?')
- print(result)
-
- asyncio.run(main())
diff --git a/lightrag/operate.py b/lightrag/operate.py
deleted file mode 100644
index 2d3271da..00000000
--- a/lightrag/operate.py
+++ /dev/null
@@ -1,944 +0,0 @@
-import asyncio
-import json
-import re
-from typing import Union
-from collections import Counter, defaultdict
-
-from .utils import (
- logger,
- clean_str,
- compute_mdhash_id,
- decode_tokens_by_tiktoken,
- encode_string_by_tiktoken,
- is_float_regex,
- list_of_list_to_csv,
- pack_user_ass_to_openai_messages,
- split_string_by_multi_markers,
- truncate_list_by_token_size,
-)
-from .base import (
- BaseGraphStorage,
- BaseKVStorage,
- BaseVectorStorage,
- TextChunkSchema,
- QueryParam,
-)
-from .prompt import GRAPH_FIELD_SEP, PROMPTS
-
-def chunking_by_token_size(
- content: str, overlap_token_size=128, max_token_size=1024, tiktoken_model="gpt-4o"
-):
- tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
- results = []
- for index, start in enumerate(
- range(0, len(tokens), max_token_size - overlap_token_size)
- ):
- chunk_content = decode_tokens_by_tiktoken(
- tokens[start : start + max_token_size], model_name=tiktoken_model
- )
- results.append(
- {
- "tokens": min(max_token_size, len(tokens) - start),
- "content": chunk_content.strip(),
- "chunk_order_index": index,
- }
- )
- return results
-
-async def _handle_entity_relation_summary(
- entity_or_relation_name: str,
- description: str,
- global_config: dict,
-) -> str:
- use_llm_func: callable = global_config["llm_model_func"]
- llm_max_tokens = global_config["llm_model_max_token_size"]
- tiktoken_model_name = global_config["tiktoken_model_name"]
- summary_max_tokens = global_config["entity_summary_to_max_tokens"]
-
- tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
- if len(tokens) < summary_max_tokens: # No need for summary
- return description
- prompt_template = PROMPTS["summarize_entity_descriptions"]
- use_description = decode_tokens_by_tiktoken(
- tokens[:llm_max_tokens], model_name=tiktoken_model_name
- )
- context_base = dict(
- entity_name=entity_or_relation_name,
- description_list=use_description.split(GRAPH_FIELD_SEP),
- )
- use_prompt = prompt_template.format(**context_base)
- logger.debug(f"Trigger summary: {entity_or_relation_name}")
- summary = await use_llm_func(use_prompt, max_tokens=summary_max_tokens)
- return summary
-
-
-async def _handle_single_entity_extraction(
- record_attributes: list[str],
- chunk_key: str,
-):
- if record_attributes[0] != '"entity"' or len(record_attributes) < 4:
- return None
- # add this record as a node in the G
- entity_name = clean_str(record_attributes[1].upper())
- if not entity_name.strip():
- return None
- entity_type = clean_str(record_attributes[2].upper())
- entity_description = clean_str(record_attributes[3])
- entity_source_id = chunk_key
- return dict(
- entity_name=entity_name,
- entity_type=entity_type,
- description=entity_description,
- source_id=entity_source_id,
- )
-
-
-async def _handle_single_relationship_extraction(
- record_attributes: list[str],
- chunk_key: str,
-):
- if record_attributes[0] != '"relationship"' or len(record_attributes) < 5:
- return None
- # add this record as edge
- source = clean_str(record_attributes[1].upper())
- target = clean_str(record_attributes[2].upper())
- edge_description = clean_str(record_attributes[3])
-
- edge_keywords = clean_str(record_attributes[4])
- edge_source_id = chunk_key
- weight = (
- float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
- )
- return dict(
- src_id=source,
- tgt_id=target,
- weight=weight,
- description=edge_description,
- keywords=edge_keywords,
- source_id=edge_source_id,
- )
-
-
-async def _merge_nodes_then_upsert(
- entity_name: str,
- nodes_data: list[dict],
- knwoledge_graph_inst: BaseGraphStorage,
- global_config: dict,
-):
- already_entitiy_types = []
- already_source_ids = []
- already_description = []
-
- already_node = await knwoledge_graph_inst.get_node(entity_name)
- if already_node is not None:
- already_entitiy_types.append(already_node["entity_type"])
- already_source_ids.extend(
- split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
- )
- already_description.append(already_node["description"])
-
- entity_type = sorted(
- Counter(
- [dp["entity_type"] for dp in nodes_data] + already_entitiy_types
- ).items(),
- key=lambda x: x[1],
- reverse=True,
- )[0][0]
- description = GRAPH_FIELD_SEP.join(
- sorted(set([dp["description"] for dp in nodes_data] + already_description))
- )
- source_id = GRAPH_FIELD_SEP.join(
- set([dp["source_id"] for dp in nodes_data] + already_source_ids)
- )
- description = await _handle_entity_relation_summary(
- entity_name, description, global_config
- )
- node_data = dict(
- entity_type=entity_type,
- description=description,
- source_id=source_id,
- )
- await knwoledge_graph_inst.upsert_node(
- entity_name,
- node_data=node_data,
- )
- node_data["entity_name"] = entity_name
- return node_data
-
-
-async def _merge_edges_then_upsert(
- src_id: str,
- tgt_id: str,
- edges_data: list[dict],
- knwoledge_graph_inst: BaseGraphStorage,
- global_config: dict,
-):
- already_weights = []
- already_source_ids = []
- already_description = []
- already_keywords = []
-
- if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
- already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id)
- already_weights.append(already_edge["weight"])
- already_source_ids.extend(
- split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
- )
- already_description.append(already_edge["description"])
- already_keywords.extend(
- split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
- )
-
- weight = sum([dp["weight"] for dp in edges_data] + already_weights)
- description = GRAPH_FIELD_SEP.join(
- sorted(set([dp["description"] for dp in edges_data] + already_description))
- )
- keywords = GRAPH_FIELD_SEP.join(
- sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
- )
- source_id = GRAPH_FIELD_SEP.join(
- set([dp["source_id"] for dp in edges_data] + already_source_ids)
- )
- for need_insert_id in [src_id, tgt_id]:
- if not (await knwoledge_graph_inst.has_node(need_insert_id)):
- await knwoledge_graph_inst.upsert_node(
- need_insert_id,
- node_data={
- "source_id": source_id,
- "description": description,
- "entity_type": '"UNKNOWN"',
- },
- )
- description = await _handle_entity_relation_summary(
- (src_id, tgt_id), description, global_config
- )
- await knwoledge_graph_inst.upsert_edge(
- src_id,
- tgt_id,
- edge_data=dict(
- weight=weight,
- description=description,
- keywords=keywords,
- source_id=source_id,
- ),
- )
-
- edge_data = dict(
- src_id=src_id,
- tgt_id=tgt_id,
- description=description,
- keywords=keywords,
- )
-
- return edge_data
-
-async def extract_entities(
- chunks: dict[str, TextChunkSchema],
- knwoledge_graph_inst: BaseGraphStorage,
- entity_vdb: BaseVectorStorage,
- relationships_vdb: BaseVectorStorage,
- global_config: dict,
-) -> Union[BaseGraphStorage, None]:
- use_llm_func: callable = global_config["llm_model_func"]
- entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
-
- ordered_chunks = list(chunks.items())
-
- entity_extract_prompt = PROMPTS["entity_extraction"]
- context_base = dict(
- tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
- record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
- completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
- entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
- )
- continue_prompt = PROMPTS["entiti_continue_extraction"]
- if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
-
- already_processed = 0
- already_entities = 0
- already_relations = 0
-
- async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
- nonlocal already_processed, already_entities, already_relations
- chunk_key = chunk_key_dp[0]
- chunk_dp = chunk_key_dp[1]
- content = chunk_dp["content"]
- hint_prompt = entity_extract_prompt.format(**context_base, input_text=content)
- final_result = await use_llm_func(hint_prompt)
-
- history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
- for now_glean_index in range(entity_extract_max_gleaning):
- glean_result = await use_llm_func(continue_prompt, history_messages=history)
-
- history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
- final_result += glean_result
- if now_glean_index == entity_extract_max_gleaning - 1:
- break
-
- if_loop_result: str = await use_llm_func(
- if_loop_prompt, history_messages=history
- )
- if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
- if if_loop_result != "yes":
- break
-
- records = split_string_by_multi_markers(
- final_result,
- [context_base["record_delimiter"], context_base["completion_delimiter"]],
- )
-
- maybe_nodes = defaultdict(list)
- maybe_edges = defaultdict(list)
- for record in records:
- record = re.search(r"\((.*)\)", record)
- if record is None:
- continue
- record = record.group(1)
- record_attributes = split_string_by_multi_markers(
- record, [context_base["tuple_delimiter"]]
- )
- if_entities = await _handle_single_entity_extraction(
- record_attributes, chunk_key
- )
- if if_entities is not None:
- maybe_nodes[if_entities["entity_name"]].append(if_entities)
- continue
-
- if_relation = await _handle_single_relationship_extraction(
- record_attributes, chunk_key
- )
- if if_relation is not None:
- maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
- if_relation
- )
- already_processed += 1
- already_entities += len(maybe_nodes)
- already_relations += len(maybe_edges)
- now_ticks = PROMPTS["process_tickers"][
- already_processed % len(PROMPTS["process_tickers"])
- ]
- print(
- f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
- end="",
- flush=True,
- )
- return dict(maybe_nodes), dict(maybe_edges)
-
- # use_llm_func is wrapped in ascynio.Semaphore, limiting max_async callings
- results = await asyncio.gather(
- *[_process_single_content(c) for c in ordered_chunks]
- )
- print() # clear the progress bar
- maybe_nodes = defaultdict(list)
- maybe_edges = defaultdict(list)
- for m_nodes, m_edges in results:
- for k, v in m_nodes.items():
- maybe_nodes[k].extend(v)
- for k, v in m_edges.items():
- maybe_edges[tuple(sorted(k))].extend(v)
- all_entities_data = await asyncio.gather(
- *[
- _merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config)
- for k, v in maybe_nodes.items()
- ]
- )
- all_relationships_data = await asyncio.gather(
- *[
- _merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config)
- for k, v in maybe_edges.items()
- ]
- )
- if not len(all_entities_data):
- logger.warning("Didn't extract any entities, maybe your LLM is not working")
- return None
- if not len(all_relationships_data):
- logger.warning("Didn't extract any relationships, maybe your LLM is not working")
- return None
-
- if entity_vdb is not None:
- data_for_vdb = {
- compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
- "content": dp["entity_name"] + dp["description"],
- "entity_name": dp["entity_name"],
- }
- for dp in all_entities_data
- }
- await entity_vdb.upsert(data_for_vdb)
-
- if relationships_vdb is not None:
- data_for_vdb = {
- compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
- "src_id": dp["src_id"],
- "tgt_id": dp["tgt_id"],
- "content": dp["keywords"] + dp["src_id"] + dp["tgt_id"] + dp["description"],
- }
- for dp in all_relationships_data
- }
- await relationships_vdb.upsert(data_for_vdb)
-
- return knwoledge_graph_inst
-
-async def local_query(
- query,
- knowledge_graph_inst: BaseGraphStorage,
- entities_vdb: BaseVectorStorage,
- relationships_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
- global_config: dict,
-) -> str:
- use_model_func = global_config["llm_model_func"]
-
- kw_prompt_temp = PROMPTS["keywords_extraction"]
- kw_prompt = kw_prompt_temp.format(query=query)
- result = await use_model_func(kw_prompt)
-
- try:
- keywords_data = json.loads(result)
- keywords = keywords_data.get("low_level_keywords", [])
- keywords = ', '.join(keywords)
- except json.JSONDecodeError as e:
- # Handle parsing error
- print(f"JSON parsing error: {e}")
- return PROMPTS["fail_response"]
-
- context = await _build_local_query_context(
- keywords,
- knowledge_graph_inst,
- entities_vdb,
- text_chunks_db,
- query_param,
- )
- if query_param.only_need_context:
- return context
- if context is None:
- return PROMPTS["fail_response"]
- sys_prompt_temp = PROMPTS["rag_response"]
- sys_prompt = sys_prompt_temp.format(
- context_data=context, response_type=query_param.response_type
- )
- response = await use_model_func(
- query,
- system_prompt=sys_prompt,
- )
- return response
-
-async def _build_local_query_context(
- query,
- knowledge_graph_inst: BaseGraphStorage,
- entities_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
-):
- results = await entities_vdb.query(query, top_k=query_param.top_k)
- if not len(results):
- return None
- node_datas = await asyncio.gather(
- *[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
- )
- if not all([n is not None for n in node_datas]):
- logger.warning("Some nodes are missing, maybe the storage is damaged")
- node_degrees = await asyncio.gather(
- *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
- )
- node_datas = [
- {**n, "entity_name": k["entity_name"], "rank": d}
- for k, n, d in zip(results, node_datas, node_degrees)
- if n is not None
- ]
- use_text_units = await _find_most_related_text_unit_from_entities(
- node_datas, query_param, text_chunks_db, knowledge_graph_inst
- )
- use_relations = await _find_most_related_edges_from_entities(
- node_datas, query_param, knowledge_graph_inst
- )
- logger.info(
- f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
- )
- entites_section_list = [["id", "entity", "type", "description", "rank"]]
- for i, n in enumerate(node_datas):
- entites_section_list.append(
- [
- i,
- n["entity_name"],
- n.get("entity_type", "UNKNOWN"),
- n.get("description", "UNKNOWN"),
- n["rank"],
- ]
- )
- entities_context = list_of_list_to_csv(entites_section_list)
-
- relations_section_list = [
- ["id", "source", "target", "description", "keywords", "weight", "rank"]
- ]
- for i, e in enumerate(use_relations):
- relations_section_list.append(
- [
- i,
- e["src_tgt"][0],
- e["src_tgt"][1],
- e["description"],
- e["keywords"],
- e["weight"],
- e["rank"],
- ]
- )
- relations_context = list_of_list_to_csv(relations_section_list)
-
- text_units_section_list = [["id", "content"]]
- for i, t in enumerate(use_text_units):
- text_units_section_list.append([i, t["content"]])
- text_units_context = list_of_list_to_csv(text_units_section_list)
- return f"""
------Entities-----
-```csv
-{entities_context}
-```
------Relationships-----
-```csv
-{relations_context}
-```
------Sources-----
-```csv
-{text_units_context}
-```
-"""
-
-async def _find_most_related_text_unit_from_entities(
- node_datas: list[dict],
- query_param: QueryParam,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- knowledge_graph_inst: BaseGraphStorage,
-):
- text_units = [
- split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
- for dp in node_datas
- ]
- edges = await asyncio.gather(
- *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
- )
- all_one_hop_nodes = set()
- for this_edges in edges:
- if not this_edges:
- continue
- all_one_hop_nodes.update([e[1] for e in this_edges])
- all_one_hop_nodes = list(all_one_hop_nodes)
- all_one_hop_nodes_data = await asyncio.gather(
- *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
- )
- all_one_hop_text_units_lookup = {
- k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
- for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
- if v is not None
- }
- all_text_units_lookup = {}
- for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
- for c_id in this_text_units:
- if c_id in all_text_units_lookup:
- continue
- relation_counts = 0
- for e in this_edges:
- if (
- e[1] in all_one_hop_text_units_lookup
- and c_id in all_one_hop_text_units_lookup[e[1]]
- ):
- relation_counts += 1
- all_text_units_lookup[c_id] = {
- "data": await text_chunks_db.get_by_id(c_id),
- "order": index,
- "relation_counts": relation_counts,
- }
- if any([v is None for v in all_text_units_lookup.values()]):
- logger.warning("Text chunks are missing, maybe the storage is damaged")
- all_text_units = [
- {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
- ]
- all_text_units = sorted(
- all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
- )
- all_text_units = truncate_list_by_token_size(
- all_text_units,
- key=lambda x: x["data"]["content"],
- max_token_size=query_param.max_token_for_text_unit,
- )
- all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
- return all_text_units
-
-async def _find_most_related_edges_from_entities(
- node_datas: list[dict],
- query_param: QueryParam,
- knowledge_graph_inst: BaseGraphStorage,
-):
- all_related_edges = await asyncio.gather(
- *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
- )
- all_edges = set()
- for this_edges in all_related_edges:
- all_edges.update([tuple(sorted(e)) for e in this_edges])
- all_edges = list(all_edges)
- all_edges_pack = await asyncio.gather(
- *[knowledge_graph_inst.get_edge(e[0], e[1]) for e in all_edges]
- )
- all_edges_degree = await asyncio.gather(
- *[knowledge_graph_inst.edge_degree(e[0], e[1]) for e in all_edges]
- )
- all_edges_data = [
- {"src_tgt": k, "rank": d, **v}
- for k, v, d in zip(all_edges, all_edges_pack, all_edges_degree)
- if v is not None
- ]
- all_edges_data = sorted(
- all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
- )
- all_edges_data = truncate_list_by_token_size(
- all_edges_data,
- key=lambda x: x["description"],
- max_token_size=query_param.max_token_for_global_context,
- )
- return all_edges_data
-
-async def global_query(
- query,
- knowledge_graph_inst: BaseGraphStorage,
- entities_vdb: BaseVectorStorage,
- relationships_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
- global_config: dict,
-) -> str:
- use_model_func = global_config["llm_model_func"]
-
- kw_prompt_temp = PROMPTS["keywords_extraction"]
- kw_prompt = kw_prompt_temp.format(query=query)
- result = await use_model_func(kw_prompt)
-
- try:
- keywords_data = json.loads(result)
- keywords = keywords_data.get("high_level_keywords", [])
- keywords = ', '.join(keywords)
- except json.JSONDecodeError as e:
- # Handle parsing error
- print(f"JSON parsing error: {e}")
- return PROMPTS["fail_response"]
-
- context = await _build_global_query_context(
- keywords,
- knowledge_graph_inst,
- entities_vdb,
- relationships_vdb,
- text_chunks_db,
- query_param,
- )
-
- if query_param.only_need_context:
- return context
- if context is None:
- return PROMPTS["fail_response"]
-
- sys_prompt_temp = PROMPTS["rag_response"]
- sys_prompt = sys_prompt_temp.format(
- context_data=context, response_type=query_param.response_type
- )
- response = await use_model_func(
- query,
- system_prompt=sys_prompt,
- )
- return response
-
-async def _build_global_query_context(
- keywords,
- knowledge_graph_inst: BaseGraphStorage,
- entities_vdb: BaseVectorStorage,
- relationships_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
-):
- results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
-
- if not len(results):
- return None
-
- edge_datas = await asyncio.gather(
- *[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
- )
-
- if not all([n is not None for n in edge_datas]):
- logger.warning("Some edges are missing, maybe the storage is damaged")
- edge_degree = await asyncio.gather(
- *[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results]
- )
- edge_datas = [
- {"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v}
- for k, v, d in zip(results, edge_datas, edge_degree)
- if v is not None
- ]
- edge_datas = sorted(
- edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
- )
- edge_datas = truncate_list_by_token_size(
- edge_datas,
- key=lambda x: x["description"],
- max_token_size=query_param.max_token_for_global_context,
- )
-
- use_entities = await _find_most_related_entities_from_relationships(
- edge_datas, query_param, knowledge_graph_inst
- )
- use_text_units = await _find_related_text_unit_from_relationships(
- edge_datas, query_param, text_chunks_db, knowledge_graph_inst
- )
- logger.info(
- f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
- )
- relations_section_list = [
- ["id", "source", "target", "description", "keywords", "weight", "rank"]
- ]
- for i, e in enumerate(edge_datas):
- relations_section_list.append(
- [
- i,
- e["src_id"],
- e["tgt_id"],
- e["description"],
- e["keywords"],
- e["weight"],
- e["rank"],
- ]
- )
- relations_context = list_of_list_to_csv(relations_section_list)
-
- entites_section_list = [["id", "entity", "type", "description", "rank"]]
- for i, n in enumerate(use_entities):
- entites_section_list.append(
- [
- i,
- n["entity_name"],
- n.get("entity_type", "UNKNOWN"),
- n.get("description", "UNKNOWN"),
- n["rank"],
- ]
- )
- entities_context = list_of_list_to_csv(entites_section_list)
-
- text_units_section_list = [["id", "content"]]
- for i, t in enumerate(use_text_units):
- text_units_section_list.append([i, t["content"]])
- text_units_context = list_of_list_to_csv(text_units_section_list)
-
- return f"""
------Entities-----
-```csv
-{entities_context}
-```
------Relationships-----
-```csv
-{relations_context}
-```
------Sources-----
-```csv
-{text_units_context}
-```
-"""
-
-async def _find_most_related_entities_from_relationships(
- edge_datas: list[dict],
- query_param: QueryParam,
- knowledge_graph_inst: BaseGraphStorage,
-):
- entity_names = set()
- for e in edge_datas:
- entity_names.add(e["src_id"])
- entity_names.add(e["tgt_id"])
-
- node_datas = await asyncio.gather(
- *[knowledge_graph_inst.get_node(entity_name) for entity_name in entity_names]
- )
-
- node_degrees = await asyncio.gather(
- *[knowledge_graph_inst.node_degree(entity_name) for entity_name in entity_names]
- )
- node_datas = [
- {**n, "entity_name": k, "rank": d}
- for k, n, d in zip(entity_names, node_datas, node_degrees)
- ]
-
- node_datas = truncate_list_by_token_size(
- node_datas,
- key=lambda x: x["description"],
- max_token_size=query_param.max_token_for_local_context,
- )
-
- return node_datas
-
-async def _find_related_text_unit_from_relationships(
- edge_datas: list[dict],
- query_param: QueryParam,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- knowledge_graph_inst: BaseGraphStorage,
-):
-
- text_units = [
- split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
- for dp in edge_datas
- ]
-
- all_text_units_lookup = {}
-
- for index, unit_list in enumerate(text_units):
- for c_id in unit_list:
- if c_id not in all_text_units_lookup:
- all_text_units_lookup[c_id] = {
- "data": await text_chunks_db.get_by_id(c_id),
- "order": index,
- }
-
- if any([v is None for v in all_text_units_lookup.values()]):
- logger.warning("Text chunks are missing, maybe the storage is damaged")
- all_text_units = [
- {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
- ]
- all_text_units = sorted(
- all_text_units, key=lambda x: x["order"]
- )
- all_text_units = truncate_list_by_token_size(
- all_text_units,
- key=lambda x: x["data"]["content"],
- max_token_size=query_param.max_token_for_text_unit,
- )
- all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
-
- return all_text_units
-
-async def hybird_query(
- query,
- knowledge_graph_inst: BaseGraphStorage,
- entities_vdb: BaseVectorStorage,
- relationships_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
- global_config: dict,
-) -> str:
- use_model_func = global_config["llm_model_func"]
-
- kw_prompt_temp = PROMPTS["keywords_extraction"]
- kw_prompt = kw_prompt_temp.format(query=query)
- result = await use_model_func(kw_prompt)
-
- try:
- keywords_data = json.loads(result)
- hl_keywords = keywords_data.get("high_level_keywords", [])
- ll_keywords = keywords_data.get("low_level_keywords", [])
- hl_keywords = ', '.join(hl_keywords)
- ll_keywords = ', '.join(ll_keywords)
- except json.JSONDecodeError as e:
- # Handle parsing error
- print(f"JSON parsing error: {e}")
- return PROMPTS["fail_response"]
-
- low_level_context = await _build_local_query_context(
- ll_keywords,
- knowledge_graph_inst,
- entities_vdb,
- text_chunks_db,
- query_param,
- )
-
- high_level_context = await _build_global_query_context(
- hl_keywords,
- knowledge_graph_inst,
- entities_vdb,
- relationships_vdb,
- text_chunks_db,
- query_param,
- )
-
- context = combine_contexts(high_level_context, low_level_context)
-
- if query_param.only_need_context:
- return context
- if context is None:
- return PROMPTS["fail_response"]
-
- sys_prompt_temp = PROMPTS["rag_response"]
- sys_prompt = sys_prompt_temp.format(
- context_data=context, response_type=query_param.response_type
- )
- response = await use_model_func(
- query,
- system_prompt=sys_prompt,
- )
- return response
-
-def combine_contexts(high_level_context, low_level_context):
- # Function to extract entities, relationships, and sources from context strings
- def extract_sections(context):
- entities_match = re.search(r'-----Entities-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
- relationships_match = re.search(r'-----Relationships-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
- sources_match = re.search(r'-----Sources-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
-
- entities = entities_match.group(1) if entities_match else ''
- relationships = relationships_match.group(1) if relationships_match else ''
- sources = sources_match.group(1) if sources_match else ''
-
- return entities, relationships, sources
-
- # Extract sections from both contexts
- hl_entities, hl_relationships, hl_sources = extract_sections(high_level_context)
- ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
-
- # Combine and deduplicate the entities
- combined_entities_set = set(filter(None, hl_entities.strip().split('\n') + ll_entities.strip().split('\n')))
- combined_entities = '\n'.join(combined_entities_set)
-
- # Combine and deduplicate the relationships
- combined_relationships_set = set(filter(None, hl_relationships.strip().split('\n') + ll_relationships.strip().split('\n')))
- combined_relationships = '\n'.join(combined_relationships_set)
-
- # Combine and deduplicate the sources
- combined_sources_set = set(filter(None, hl_sources.strip().split('\n') + ll_sources.strip().split('\n')))
- combined_sources = '\n'.join(combined_sources_set)
-
- # Format the combined context
- return f"""
------Entities-----
-```csv
-{combined_entities}
------Relationships-----
-{combined_relationships}
------Sources-----
-{combined_sources}
-"""
-
-async def naive_query(
- query,
- chunks_vdb: BaseVectorStorage,
- text_chunks_db: BaseKVStorage[TextChunkSchema],
- query_param: QueryParam,
- global_config: dict,
-):
- use_model_func = global_config["llm_model_func"]
- results = await chunks_vdb.query(query, top_k=query_param.top_k)
- if not len(results):
- return PROMPTS["fail_response"]
- chunks_ids = [r["id"] for r in results]
- chunks = await text_chunks_db.get_by_ids(chunks_ids)
-
- maybe_trun_chunks = truncate_list_by_token_size(
- chunks,
- key=lambda x: x["content"],
- max_token_size=query_param.max_token_for_text_unit,
- )
- logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
- section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
- if query_param.only_need_context:
- return section
- sys_prompt_temp = PROMPTS["naive_rag_response"]
- sys_prompt = sys_prompt_temp.format(
- content_data=section, response_type=query_param.response_type
- )
- response = await use_model_func(
- query,
- system_prompt=sys_prompt,
- )
- return response
-
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
deleted file mode 100644
index 5d28e49c..00000000
--- a/lightrag/prompt.py
+++ /dev/null
@@ -1,256 +0,0 @@
-GRAPH_FIELD_SEP = ""
-
-PROMPTS = {}
-
-PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
-PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
-PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
-PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
-
-PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
-
-PROMPTS[
- "entity_extraction"
-] = """-Goal-
-Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
-
--Steps-
-1. Identify all entities. For each identified entity, extract the following information:
-- entity_name: Name of the entity, capitalized
-- entity_type: One of the following types: [{entity_types}]
-- entity_description: Comprehensive description of the entity's attributes and activities
-Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}
-
-2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
-For each pair of related entities, extract the following information:
-- source_entity: name of the source entity, as identified in step 1
-- target_entity: name of the target entity, as identified in step 1
-- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
-- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
-- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
-Format each relationship as ("relationship"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})
-
-3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
-Format the content-level key words as ("content_keywords"{tuple_delimiter})
-
-4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
-
-5. When finished, output {completion_delimiter}
-
-######################
--Examples-
-######################
-Example 1:
-
-Entity_types: [person, technology, mission, organization, location]
-Text:
-while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
-
-Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
-
-The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
-
-It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
-################
-Output:
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
-("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
-("entity"{tuple_delimiter}"Jordan"{tuple_delimiter}"person"{tuple_delimiter}"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device."){record_delimiter}
-("entity"{tuple_delimiter}"Cruz"{tuple_delimiter}"person"{tuple_delimiter}"Cruz is associated with a vision of control and order, influencing the dynamics among other characters."){record_delimiter}
-("entity"{tuple_delimiter}"The Device"{tuple_delimiter}"technology"{tuple_delimiter}"The Device is central to the story, with potential game-changing implications, and is revered by Taylor."){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Taylor"{tuple_delimiter}"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."{tuple_delimiter}"power dynamics, perspective shift"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Jordan"{tuple_delimiter}"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."{tuple_delimiter}"shared goals, rebellion"{tuple_delimiter}6){record_delimiter}
-("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"Jordan"{tuple_delimiter}"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."{tuple_delimiter}"conflict resolution, mutual respect"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Jordan"{tuple_delimiter}"Cruz"{tuple_delimiter}"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."{tuple_delimiter}"ideological conflict, rebellion"{tuple_delimiter}5){record_delimiter}
-("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
-("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
-#############################
-Example 2:
-
-Entity_types: [person, technology, mission, organization, location]
-Text:
-They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
-
-Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
-
-Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
-#############
-Output:
-("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
-("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
-("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
-("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
-#############################
-Example 3:
-
-Entity_types: [person, role, technology, organization, event, location, concept]
-Text:
-their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
-
-"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
-
-Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
-
-Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
-
-The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
-#############
-Output:
-("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
-("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
-("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
-("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
-("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
-("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
-("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
-#############################
--Real Data-
-######################
-Entity_types: {entity_types}
-Text: {input_text}
-######################
-Output:
-"""
-
-PROMPTS[
- "summarize_entity_descriptions"
-] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
-Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
-Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
-If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
-Make sure it is written in third person, and include the entity names so we the have full context.
-
-#######
--Data-
-Entities: {entity_name}
-Description List: {description_list}
-#######
-Output:
-"""
-
-PROMPTS[
- "entiti_continue_extraction"
-] = """MANY entities were missed in the last extraction. Add them below using the same format:
-"""
-
-PROMPTS[
- "entiti_if_loop_extraction"
-] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
-"""
-
-PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
-
-PROMPTS[
- "rag_response"
-] = """---Role---
-
-You are a helpful assistant responding to questions about data in the tables provided.
-
-
----Goal---
-
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
-
----Target response length and format---
-
-{response_type}
-
-
----Data tables---
-
-{context_data}
-
-
----Goal---
-
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-
-If you don't know the answer, just say so. Do not make anything up.
-
-Do not include information where the supporting evidence for it is not provided.
-
-
----Target response length and format---
-
-{response_type}
-
-Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
-"""
-
-PROMPTS["keywords_extraction"] = """---Role---
-
-You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
-
----Goal---
-
-Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
-
----Instructions---
-
-- Output the keywords in JSON format.
-- The JSON should have two keys:
- - "high_level_keywords" for overarching concepts or themes.
- - "low_level_keywords" for specific entities or details.
-
-######################
--Examples-
-######################
-Example 1:
-
-Query: "How does international trade influence global economic stability?"
-################
-Output:
-{{
- "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
- "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
-}}
-#############################
-Example 2:
-
-Query: "What are the environmental consequences of deforestation on biodiversity?"
-################
-Output:
-{{
- "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
- "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
-}}
-#############################
-Example 3:
-
-Query: "What is the role of education in reducing poverty?"
-################
-Output:
-{{
- "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
- "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
-}}
-#############################
--Real Data-
-######################
-Query: {query}
-######################
-Output:
-
-"""
-
-PROMPTS[
- "naive_rag_response"
-] = """You're a helpful assistant
-Below are the knowledge you know:
-{content_data}
----
-If you don't know the answer or if the provided knowledge do not contain sufficient information to provide an answer, just say so. Do not make anything up.
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-If you don't know the answer, just say so. Do not make anything up.
-Do not include information where the supporting evidence for it is not provided.
----Target response length and format---
-{response_type}
-"""
diff --git a/lightrag/storage.py b/lightrag/storage.py
deleted file mode 100644
index 2f2bb7d8..00000000
--- a/lightrag/storage.py
+++ /dev/null
@@ -1,246 +0,0 @@
-import asyncio
-import html
-import json
-import os
-from collections import defaultdict
-from dataclasses import dataclass, field
-from typing import Any, Union, cast
-import pickle
-import hnswlib
-import networkx as nx
-import numpy as np
-from nano_vectordb import NanoVectorDB
-import xxhash
-
-from .utils import load_json, logger, write_json
-from .base import (
- BaseGraphStorage,
- BaseKVStorage,
- BaseVectorStorage,
-)
-
-@dataclass
-class JsonKVStorage(BaseKVStorage):
- def __post_init__(self):
- working_dir = self.global_config["working_dir"]
- self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
- self._data = load_json(self._file_name) or {}
- logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
-
- async def all_keys(self) -> list[str]:
- return list(self._data.keys())
-
- async def index_done_callback(self):
- write_json(self._data, self._file_name)
-
- async def get_by_id(self, id):
- return self._data.get(id, None)
-
- async def get_by_ids(self, ids, fields=None):
- if fields is None:
- return [self._data.get(id, None) for id in ids]
- return [
- (
- {k: v for k, v in self._data[id].items() if k in fields}
- if self._data.get(id, None)
- else None
- )
- for id in ids
- ]
-
- async def filter_keys(self, data: list[str]) -> set[str]:
- return set([s for s in data if s not in self._data])
-
- async def upsert(self, data: dict[str, dict]):
- left_data = {k: v for k, v in data.items() if k not in self._data}
- self._data.update(left_data)
- return left_data
-
- async def drop(self):
- self._data = {}
-
-@dataclass
-class NanoVectorDBStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
-
- def __post_init__(self):
-
- self._client_file_name = os.path.join(
- self.global_config["working_dir"], f"vdb_{self.namespace}.json"
- )
- self._max_batch_size = self.global_config["embedding_batch_num"]
- self._client = NanoVectorDB(
- self.embedding_func.embedding_dim, storage_file=self._client_file_name
- )
- self.cosine_better_than_threshold = self.global_config.get(
- "cosine_better_than_threshold", self.cosine_better_than_threshold
- )
-
- async def upsert(self, data: dict[str, dict]):
- logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
- if not len(data):
- logger.warning("You insert an empty data to vector DB")
- return []
- list_data = [
- {
- "__id__": k,
- **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
- }
- for k, v in data.items()
- ]
- contents = [v["content"] for v in data.values()]
- batches = [
- contents[i : i + self._max_batch_size]
- for i in range(0, len(contents), self._max_batch_size)
- ]
- embeddings_list = await asyncio.gather(
- *[self.embedding_func(batch) for batch in batches]
- )
- embeddings = np.concatenate(embeddings_list)
- for i, d in enumerate(list_data):
- d["__vector__"] = embeddings[i]
- results = self._client.upsert(datas=list_data)
- return results
-
- async def query(self, query: str, top_k=5):
- embedding = await self.embedding_func([query])
- embedding = embedding[0]
- results = self._client.query(
- query=embedding,
- top_k=top_k,
- better_than_threshold=self.cosine_better_than_threshold,
- )
- results = [
- {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
- ]
- return results
-
- async def index_done_callback(self):
- self._client.save()
-
-@dataclass
-class NetworkXStorage(BaseGraphStorage):
- @staticmethod
- def load_nx_graph(file_name) -> nx.Graph:
- if os.path.exists(file_name):
- return nx.read_graphml(file_name)
- return None
-
- @staticmethod
- def write_nx_graph(graph: nx.Graph, file_name):
- logger.info(
- f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
- )
- nx.write_graphml(graph, file_name)
-
- @staticmethod
- def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
- """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
- Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
- """
- from graspologic.utils import largest_connected_component
-
- graph = graph.copy()
- graph = cast(nx.Graph, largest_connected_component(graph))
- node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
- graph = nx.relabel_nodes(graph, node_mapping)
- return NetworkXStorage._stabilize_graph(graph)
-
- @staticmethod
- def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
- """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
- Ensure an undirected graph with the same relationships will always be read the same way.
- """
- fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
-
- sorted_nodes = graph.nodes(data=True)
- sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
-
- fixed_graph.add_nodes_from(sorted_nodes)
- edges = list(graph.edges(data=True))
-
- if not graph.is_directed():
-
- def _sort_source_target(edge):
- source, target, edge_data = edge
- if source > target:
- temp = source
- source = target
- target = temp
- return source, target, edge_data
-
- edges = [_sort_source_target(edge) for edge in edges]
-
- def _get_edge_key(source: Any, target: Any) -> str:
- return f"{source} -> {target}"
-
- edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
-
- fixed_graph.add_edges_from(edges)
- return fixed_graph
-
- def __post_init__(self):
- self._graphml_xml_file = os.path.join(
- self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
- )
- preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
- if preloaded_graph is not None:
- logger.info(
- f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
- )
- self._graph = preloaded_graph or nx.Graph()
- self._node_embed_algorithms = {
- "node2vec": self._node2vec_embed,
- }
-
- async def index_done_callback(self):
- NetworkXStorage.write_nx_graph(self._graph, self._graphml_xml_file)
-
- async def has_node(self, node_id: str) -> bool:
- return self._graph.has_node(node_id)
-
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- return self._graph.has_edge(source_node_id, target_node_id)
-
- async def get_node(self, node_id: str) -> Union[dict, None]:
- return self._graph.nodes.get(node_id)
-
- async def node_degree(self, node_id: str) -> int:
- return self._graph.degree(node_id)
-
- async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- return self._graph.degree(src_id) + self._graph.degree(tgt_id)
-
- async def get_edge(
- self, source_node_id: str, target_node_id: str
- ) -> Union[dict, None]:
- return self._graph.edges.get((source_node_id, target_node_id))
-
- async def get_node_edges(self, source_node_id: str):
- if self._graph.has_node(source_node_id):
- return list(self._graph.edges(source_node_id))
- return None
-
- async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- self._graph.add_node(node_id, **node_data)
-
- async def upsert_edge(
- self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
- ):
- self._graph.add_edge(source_node_id, target_node_id, **edge_data)
-
- async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
- if algorithm not in self._node_embed_algorithms:
- raise ValueError(f"Node embedding algorithm {algorithm} not supported")
- return await self._node_embed_algorithms[algorithm]()
-
- async def _node2vec_embed(self):
- from graspologic import embed
-
- embeddings, nodes = embed.node2vec_embed(
- self._graph,
- **self.global_config["node2vec_params"],
- )
-
- nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
- return embeddings, nodes_ids
diff --git a/lightrag/utils.py b/lightrag/utils.py
deleted file mode 100644
index c75b4270..00000000
--- a/lightrag/utils.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import asyncio
-import html
-import json
-import logging
-import os
-import re
-from dataclasses import dataclass
-from functools import wraps
-from hashlib import md5
-from typing import Any, Union
-
-import numpy as np
-import tiktoken
-
-ENCODER = None
-
-logger = logging.getLogger("lightrag")
-
-def set_logger(log_file: str):
- logger.setLevel(logging.DEBUG)
-
- file_handler = logging.FileHandler(log_file)
- file_handler.setLevel(logging.DEBUG)
-
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- file_handler.setFormatter(formatter)
-
- if not logger.handlers:
- logger.addHandler(file_handler)
-
-@dataclass
-class EmbeddingFunc:
- embedding_dim: int
- max_token_size: int
- func: callable
-
- async def __call__(self, *args, **kwargs) -> np.ndarray:
- return await self.func(*args, **kwargs)
-
-def locate_json_string_body_from_string(content: str) -> Union[str, None]:
- """Locate the JSON string body from a string"""
- maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
- if maybe_json_str is not None:
- return maybe_json_str.group(0)
- else:
- return None
-
-def convert_response_to_json(response: str) -> dict:
- json_str = locate_json_string_body_from_string(response)
- assert json_str is not None, f"Unable to parse JSON from response: {response}"
- try:
- data = json.loads(json_str)
- return data
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse JSON: {json_str}")
- raise e from None
-
-def compute_args_hash(*args):
- return md5(str(args).encode()).hexdigest()
-
-def compute_mdhash_id(content, prefix: str = ""):
- return prefix + md5(content.encode()).hexdigest()
-
-def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
- """Add restriction of maximum async calling times for a async func"""
-
- def final_decro(func):
- """Not using async.Semaphore to aovid use nest-asyncio"""
- __current_size = 0
-
- @wraps(func)
- async def wait_func(*args, **kwargs):
- nonlocal __current_size
- while __current_size >= max_size:
- await asyncio.sleep(waitting_time)
- __current_size += 1
- result = await func(*args, **kwargs)
- __current_size -= 1
- return result
-
- return wait_func
-
- return final_decro
-
-def wrap_embedding_func_with_attrs(**kwargs):
- """Wrap a function with attributes"""
-
- def final_decro(func) -> EmbeddingFunc:
- new_func = EmbeddingFunc(**kwargs, func=func)
- return new_func
-
- return final_decro
-
-def load_json(file_name):
- if not os.path.exists(file_name):
- return None
- with open(file_name) as f:
- return json.load(f)
-
-def write_json(json_obj, file_name):
- with open(file_name, "w") as f:
- json.dump(json_obj, f, indent=2, ensure_ascii=False)
-
-def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
- global ENCODER
- if ENCODER is None:
- ENCODER = tiktoken.encoding_for_model(model_name)
- tokens = ENCODER.encode(content)
- return tokens
-
-
-def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
- global ENCODER
- if ENCODER is None:
- ENCODER = tiktoken.encoding_for_model(model_name)
- content = ENCODER.decode(tokens)
- return content
-
-def pack_user_ass_to_openai_messages(*args: str):
- roles = ["user", "assistant"]
- return [
- {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
- ]
-
-def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
- """Split a string by multiple markers"""
- if not markers:
- return [content]
- results = re.split("|".join(re.escape(marker) for marker in markers), content)
- return [r.strip() for r in results if r.strip()]
-
-# Refer the utils functions of the official GraphRAG implementation:
-# https://github.com/microsoft/graphrag
-def clean_str(input: Any) -> str:
- """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
- # If we get non-string input, just give it back
- if not isinstance(input, str):
- return input
-
- result = html.unescape(input.strip())
- # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
- return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
-
-def is_float_regex(value):
- return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
-
-def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
- """Truncate a list of data by token size"""
- if max_token_size <= 0:
- return []
- tokens = 0
- for i, data in enumerate(list_data):
- tokens += len(encode_string_by_tiktoken(key(data)))
- if tokens > max_token_size:
- return list_data[:i]
- return list_data
-
-def list_of_list_to_csv(data: list[list]):
- return "\n".join(
- [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
- )
-
-def save_data_to_file(data, file_name):
- with open(file_name, 'w', encoding='utf-8') as f:
- json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
From 30d54da6230d3b9ff51561cb2f374a5c29d10e28 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 15:01:40 +0800
Subject: [PATCH 009/258] update
---
README.md | 198 +++++++++
lightrag/__init__.py | 5 +
lightrag/base.py | 116 ++++++
lightrag/lightrag.py | 300 ++++++++++++++
lightrag/llm.py | 88 ++++
lightrag/operate.py | 944 +++++++++++++++++++++++++++++++++++++++++++
lightrag/prompt.py | 256 ++++++++++++
lightrag/storage.py | 246 +++++++++++
lightrag/utils.py | 165 ++++++++
9 files changed, 2318 insertions(+)
create mode 100644 README.md
create mode 100644 lightrag/__init__.py
create mode 100644 lightrag/base.py
create mode 100644 lightrag/lightrag.py
create mode 100644 lightrag/llm.py
create mode 100644 lightrag/operate.py
create mode 100644 lightrag/prompt.py
create mode 100644 lightrag/storage.py
create mode 100644 lightrag/utils.py
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..42de1c1c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,198 @@
+# LightRAG: Simple and Fast Retrieval-Augmented Generation
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
+
+
+
+
+
+
+This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
+## Install
+
+* Install from source
+
+```bash
+cd LightRAG
+pip install -e .
+```
+* Install from PyPI
+```bash
+pip install lightrag-hku
+```
+
+## Quick Start
+
+* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".`
+* Download the demo text "A Christmas Carol by Charles Dickens"
+```bash
+curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
+```
+Use the below python snippet:
+
+```python
+from lightrag import LightRAG, QueryParam
+
+rag = LightRAG(working_dir="./dickens")
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybird search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybird")))
+```
+Batch Insert
+```python
+rag.insert(["TEXT1", "TEXT2",...])
+```
+Incremental Insert
+
+```python
+rag = LightRAG(working_dir="./dickens")
+
+with open("./newText.txt") as f:
+ rag.insert(f.read())
+```
+## Evaluation
+### Dataset
+The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
+
+### Generate Query
+LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
+```python
+Given the following description of a dataset:
+
+{description}
+
+Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+Output the results in the following structure:
+- User 1: [user description]
+ - Task 1: [task description]
+ - Question 1:
+ - Question 2:
+ - Question 3:
+ - Question 4:
+ - Question 5:
+ - Task 2: [task description]
+ ...
+ - Task 5: [task description]
+- User 2: [user description]
+ ...
+- User 5: [user description]
+ ...
+```
+
+ ### Batch Eval
+To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
+```python
+---Role---
+You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+---Goal---
+You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+
+- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
+- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
+- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
+
+For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
+
+Here is the question:
+{query}
+
+Here are the two answers:
+
+**Answer 1:**
+{answer1}
+
+**Answer 2:**
+{answer2}
+
+Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
+
+Output your evaluation in the following JSON format:
+
+{{
+ "Comprehensiveness": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Empowerment": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Overall Winner": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
+ }}
+}}
+```
+### Overall Performance Table
+### Overall Performance Table
+| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
+|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
+| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
+| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** |
+| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** |
+| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** |
+| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** |
+| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** |
+| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** |
+| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** |
+| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** |
+| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** |
+| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** |
+| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** |
+| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** |
+| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** |
+| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** |
+| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** |
+| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% |
+| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** |
+| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
+| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
+
+## Code Structure
+
+```python
+.
+├── examples
+│ ├── batch_eval.py
+│ ├── generate_query.py
+│ ├── insert.py
+│ └── query.py
+├── lightrag
+│ ├── __init__.py
+│ ├── base.py
+│ ├── lightrag.py
+│ ├── llm.py
+│ ├── operate.py
+│ ├── prompt.py
+│ ├── storage.py
+│ └── utils.jpeg
+├── LICENSE
+├── README.md
+├── requirements.txt
+└── setup.py
+```
+## Citation
+
+```
+@article{guo2024lightrag,
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
+year={2024},
+eprint={2410.05779},
+archivePrefix={arXiv},
+primaryClass={cs.IR}
+}
+```
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
new file mode 100644
index 00000000..dc497cd4
--- /dev/null
+++ b/lightrag/__init__.py
@@ -0,0 +1,5 @@
+from .lightrag import LightRAG, QueryParam
+
+__version__ = "0.0.2"
+__author__ = "Zirui Guo"
+__url__ = "https://github.com/HKUDS/GraphEdit"
diff --git a/lightrag/base.py b/lightrag/base.py
new file mode 100644
index 00000000..9c0422fe
--- /dev/null
+++ b/lightrag/base.py
@@ -0,0 +1,116 @@
+from dataclasses import dataclass, field
+from typing import TypedDict, Union, Literal, Generic, TypeVar
+
+import numpy as np
+
+from .utils import EmbeddingFunc
+
+TextChunkSchema = TypedDict(
+ "TextChunkSchema",
+ {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int},
+)
+
+T = TypeVar("T")
+
+@dataclass
+class QueryParam:
+ mode: Literal["local", "global", "hybird", "naive"] = "global"
+ only_need_context: bool = False
+ response_type: str = "Multiple Paragraphs"
+ top_k: int = 60
+ max_token_for_text_unit: int = 4000
+ max_token_for_global_context: int = 4000
+ max_token_for_local_context: int = 4000
+
+
+@dataclass
+class StorageNameSpace:
+ namespace: str
+ global_config: dict
+
+ async def index_done_callback(self):
+ """commit the storage operations after indexing"""
+ pass
+
+ async def query_done_callback(self):
+ """commit the storage operations after querying"""
+ pass
+
+@dataclass
+class BaseVectorStorage(StorageNameSpace):
+ embedding_func: EmbeddingFunc
+ meta_fields: set = field(default_factory=set)
+
+ async def query(self, query: str, top_k: int) -> list[dict]:
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, dict]):
+ """Use 'content' field from value for embedding, use key as id.
+ If embedding_func is None, use 'embedding' field from value
+ """
+ raise NotImplementedError
+
+@dataclass
+class BaseKVStorage(Generic[T], StorageNameSpace):
+ async def all_keys(self) -> list[str]:
+ raise NotImplementedError
+
+ async def get_by_id(self, id: str) -> Union[T, None]:
+ raise NotImplementedError
+
+ async def get_by_ids(
+ self, ids: list[str], fields: Union[set[str], None] = None
+ ) -> list[Union[T, None]]:
+ raise NotImplementedError
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ """return un-exist keys"""
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, T]):
+ raise NotImplementedError
+
+ async def drop(self):
+ raise NotImplementedError
+
+
+@dataclass
+class BaseGraphStorage(StorageNameSpace):
+ async def has_node(self, node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def node_degree(self, node_id: str) -> int:
+ raise NotImplementedError
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ raise NotImplementedError
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_node_edges(
+ self, source_node_id: str
+ ) -> Union[list[tuple[str, str]], None]:
+ raise NotImplementedError
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ raise NotImplementedError
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ raise NotImplementedError
+
+ async def clustering(self, algorithm: str):
+ raise NotImplementedError
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
new file mode 100644
index 00000000..836fda9e
--- /dev/null
+++ b/lightrag/lightrag.py
@@ -0,0 +1,300 @@
+import asyncio
+import os
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from functools import partial
+from typing import Type, cast
+
+from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding
+from .operate import (
+ chunking_by_token_size,
+ extract_entities,
+ local_query,
+ global_query,
+ hybird_query,
+ naive_query,
+)
+
+from .storage import (
+ JsonKVStorage,
+ NanoVectorDBStorage,
+ NetworkXStorage,
+)
+from .utils import (
+ EmbeddingFunc,
+ compute_mdhash_id,
+ limit_async_func_call,
+ convert_response_to_json,
+ logger,
+ set_logger,
+)
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+ StorageNameSpace,
+ QueryParam,
+)
+
+def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
+ try:
+ # If there is already an event loop, use it.
+ loop = asyncio.get_event_loop()
+ except RuntimeError:
+ # If in a sub-thread, create a new event loop.
+ logger.info("Creating a new event loop in a sub-thread.")
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ return loop
+
+@dataclass
+class LightRAG:
+ working_dir: str = field(
+ default_factory=lambda: f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
+ )
+
+ # text chunking
+ chunk_token_size: int = 1200
+ chunk_overlap_token_size: int = 100
+ tiktoken_model_name: str = "gpt-4o-mini"
+
+ # entity extraction
+ entity_extract_max_gleaning: int = 1
+ entity_summary_to_max_tokens: int = 500
+
+ # node embedding
+ node_embedding_algorithm: str = "node2vec"
+ node2vec_params: dict = field(
+ default_factory=lambda: {
+ "dimensions": 1536,
+ "num_walks": 10,
+ "walk_length": 40,
+ "num_walks": 10,
+ "window_size": 2,
+ "iterations": 3,
+ "random_seed": 3,
+ }
+ )
+
+ # text embedding
+ embedding_func: EmbeddingFunc = field(default_factory=lambda: openai_embedding)
+ embedding_batch_num: int = 32
+ embedding_func_max_async: int = 16
+
+ # LLM
+ llm_model_func: callable = gpt_4o_mini_complete
+ llm_model_max_token_size: int = 32768
+ llm_model_max_async: int = 16
+
+ # storage
+ key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
+ vector_db_storage_cls: Type[BaseVectorStorage] = NanoVectorDBStorage
+ vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
+ graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
+ enable_llm_cache: bool = True
+
+ # extension
+ addon_params: dict = field(default_factory=dict)
+ convert_response_to_json_func: callable = convert_response_to_json
+
+ def __post_init__(self):
+ log_file = os.path.join(self.working_dir, "lightrag.log")
+ set_logger(log_file)
+ logger.info(f"Logger initialized for working directory: {self.working_dir}")
+
+ _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
+ logger.debug(f"LightRAG init with param:\n {_print_config}\n")
+
+ if not os.path.exists(self.working_dir):
+ logger.info(f"Creating working directory {self.working_dir}")
+ os.makedirs(self.working_dir)
+
+ self.full_docs = self.key_string_value_json_storage_cls(
+ namespace="full_docs", global_config=asdict(self)
+ )
+
+ self.text_chunks = self.key_string_value_json_storage_cls(
+ namespace="text_chunks", global_config=asdict(self)
+ )
+
+ self.llm_response_cache = (
+ self.key_string_value_json_storage_cls(
+ namespace="llm_response_cache", global_config=asdict(self)
+ )
+ if self.enable_llm_cache
+ else None
+ )
+ self.chunk_entity_relation_graph = self.graph_storage_cls(
+ namespace="chunk_entity_relation", global_config=asdict(self)
+ )
+ self.embedding_func = limit_async_func_call(self.embedding_func_max_async)(
+ self.embedding_func
+ )
+ self.entities_vdb = (
+ self.vector_db_storage_cls(
+ namespace="entities",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"entity_name"}
+ )
+ )
+ self.relationships_vdb = (
+ self.vector_db_storage_cls(
+ namespace="relationships",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"src_id", "tgt_id"}
+ )
+ )
+ self.chunks_vdb = (
+ self.vector_db_storage_cls(
+ namespace="chunks",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ )
+ )
+
+ self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
+ partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
+ )
+
+ def insert(self, string_or_strings):
+ loop = always_get_an_event_loop()
+ return loop.run_until_complete(self.ainsert(string_or_strings))
+
+ async def ainsert(self, string_or_strings):
+ try:
+ if isinstance(string_or_strings, str):
+ string_or_strings = [string_or_strings]
+
+ new_docs = {
+ compute_mdhash_id(c.strip(), prefix="doc-"): {"content": c.strip()}
+ for c in string_or_strings
+ }
+ _add_doc_keys = await self.full_docs.filter_keys(list(new_docs.keys()))
+ new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
+ if not len(new_docs):
+ logger.warning(f"All docs are already in the storage")
+ return
+ logger.info(f"[New Docs] inserting {len(new_docs)} docs")
+
+ inserting_chunks = {}
+ for doc_key, doc in new_docs.items():
+ chunks = {
+ compute_mdhash_id(dp["content"], prefix="chunk-"): {
+ **dp,
+ "full_doc_id": doc_key,
+ }
+ for dp in chunking_by_token_size(
+ doc["content"],
+ overlap_token_size=self.chunk_overlap_token_size,
+ max_token_size=self.chunk_token_size,
+ tiktoken_model=self.tiktoken_model_name,
+ )
+ }
+ inserting_chunks.update(chunks)
+ _add_chunk_keys = await self.text_chunks.filter_keys(
+ list(inserting_chunks.keys())
+ )
+ inserting_chunks = {
+ k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
+ }
+ if not len(inserting_chunks):
+ logger.warning(f"All chunks are already in the storage")
+ return
+ logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
+
+ await self.chunks_vdb.upsert(inserting_chunks)
+
+ logger.info("[Entity Extraction]...")
+ maybe_new_kg = await extract_entities(
+ inserting_chunks,
+ knwoledge_graph_inst=self.chunk_entity_relation_graph,
+ entity_vdb=self.entities_vdb,
+ relationships_vdb=self.relationships_vdb,
+ global_config=asdict(self),
+ )
+ if maybe_new_kg is None:
+ logger.warning("No new entities and relationships found")
+ return
+ self.chunk_entity_relation_graph = maybe_new_kg
+
+ await self.full_docs.upsert(new_docs)
+ await self.text_chunks.upsert(inserting_chunks)
+ finally:
+ await self._insert_done()
+
+ async def _insert_done(self):
+ tasks = []
+ for storage_inst in [
+ self.full_docs,
+ self.text_chunks,
+ self.llm_response_cache,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.chunks_vdb,
+ self.chunk_entity_relation_graph,
+ ]:
+ if storage_inst is None:
+ continue
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+ await asyncio.gather(*tasks)
+
+ def query(self, query: str, param: QueryParam = QueryParam()):
+ loop = always_get_an_event_loop()
+ return loop.run_until_complete(self.aquery(query, param))
+
+ async def aquery(self, query: str, param: QueryParam = QueryParam()):
+ if param.mode == "local":
+ response = await local_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "global":
+ response = await global_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "hybird":
+ response = await hybird_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "naive":
+ response = await naive_query(
+ query,
+ self.chunks_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ else:
+ raise ValueError(f"Unknown mode {param.mode}")
+ await self._query_done()
+ return response
+
+
+ async def _query_done(self):
+ tasks = []
+ for storage_inst in [self.llm_response_cache]:
+ if storage_inst is None:
+ continue
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+ await asyncio.gather(*tasks)
+
+
diff --git a/lightrag/llm.py b/lightrag/llm.py
new file mode 100644
index 00000000..ee700a10
--- /dev/null
+++ b/lightrag/llm.py
@@ -0,0 +1,88 @@
+import os
+import numpy as np
+from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
+from tenacity import (
+ retry,
+ stop_after_attempt,
+ wait_exponential,
+ retry_if_exception_type,
+)
+
+from .base import BaseKVStorage
+from .utils import compute_args_hash, wrap_embedding_func_with_attrs
+
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def openai_complete_if_cache(
+ model, prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ openai_async_client = AsyncOpenAI()
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ response = await openai_async_client.chat.completions.create(
+ model=model, messages=messages, **kwargs
+ )
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert(
+ {args_hash: {"return": response.choices[0].message.content, "model": model}}
+ )
+ return response.choices[0].message.content
+
+async def gpt_4o_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "gpt-4o",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
+
+async def gpt_4o_mini_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "gpt-4o-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def openai_embedding(texts: list[str]) -> np.ndarray:
+ openai_async_client = AsyncOpenAI()
+ response = await openai_async_client.embeddings.create(
+ model="text-embedding-3-small", input=texts, encoding_format="float"
+ )
+ return np.array([dp.embedding for dp in response.data])
+
+if __name__ == "__main__":
+ import asyncio
+
+ async def main():
+ result = await gpt_4o_mini_complete('How are you?')
+ print(result)
+
+ asyncio.run(main())
diff --git a/lightrag/operate.py b/lightrag/operate.py
new file mode 100644
index 00000000..2d3271da
--- /dev/null
+++ b/lightrag/operate.py
@@ -0,0 +1,944 @@
+import asyncio
+import json
+import re
+from typing import Union
+from collections import Counter, defaultdict
+
+from .utils import (
+ logger,
+ clean_str,
+ compute_mdhash_id,
+ decode_tokens_by_tiktoken,
+ encode_string_by_tiktoken,
+ is_float_regex,
+ list_of_list_to_csv,
+ pack_user_ass_to_openai_messages,
+ split_string_by_multi_markers,
+ truncate_list_by_token_size,
+)
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+ TextChunkSchema,
+ QueryParam,
+)
+from .prompt import GRAPH_FIELD_SEP, PROMPTS
+
+def chunking_by_token_size(
+ content: str, overlap_token_size=128, max_token_size=1024, tiktoken_model="gpt-4o"
+):
+ tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
+ results = []
+ for index, start in enumerate(
+ range(0, len(tokens), max_token_size - overlap_token_size)
+ ):
+ chunk_content = decode_tokens_by_tiktoken(
+ tokens[start : start + max_token_size], model_name=tiktoken_model
+ )
+ results.append(
+ {
+ "tokens": min(max_token_size, len(tokens) - start),
+ "content": chunk_content.strip(),
+ "chunk_order_index": index,
+ }
+ )
+ return results
+
+async def _handle_entity_relation_summary(
+ entity_or_relation_name: str,
+ description: str,
+ global_config: dict,
+) -> str:
+ use_llm_func: callable = global_config["llm_model_func"]
+ llm_max_tokens = global_config["llm_model_max_token_size"]
+ tiktoken_model_name = global_config["tiktoken_model_name"]
+ summary_max_tokens = global_config["entity_summary_to_max_tokens"]
+
+ tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
+ if len(tokens) < summary_max_tokens: # No need for summary
+ return description
+ prompt_template = PROMPTS["summarize_entity_descriptions"]
+ use_description = decode_tokens_by_tiktoken(
+ tokens[:llm_max_tokens], model_name=tiktoken_model_name
+ )
+ context_base = dict(
+ entity_name=entity_or_relation_name,
+ description_list=use_description.split(GRAPH_FIELD_SEP),
+ )
+ use_prompt = prompt_template.format(**context_base)
+ logger.debug(f"Trigger summary: {entity_or_relation_name}")
+ summary = await use_llm_func(use_prompt, max_tokens=summary_max_tokens)
+ return summary
+
+
+async def _handle_single_entity_extraction(
+ record_attributes: list[str],
+ chunk_key: str,
+):
+ if record_attributes[0] != '"entity"' or len(record_attributes) < 4:
+ return None
+ # add this record as a node in the G
+ entity_name = clean_str(record_attributes[1].upper())
+ if not entity_name.strip():
+ return None
+ entity_type = clean_str(record_attributes[2].upper())
+ entity_description = clean_str(record_attributes[3])
+ entity_source_id = chunk_key
+ return dict(
+ entity_name=entity_name,
+ entity_type=entity_type,
+ description=entity_description,
+ source_id=entity_source_id,
+ )
+
+
+async def _handle_single_relationship_extraction(
+ record_attributes: list[str],
+ chunk_key: str,
+):
+ if record_attributes[0] != '"relationship"' or len(record_attributes) < 5:
+ return None
+ # add this record as edge
+ source = clean_str(record_attributes[1].upper())
+ target = clean_str(record_attributes[2].upper())
+ edge_description = clean_str(record_attributes[3])
+
+ edge_keywords = clean_str(record_attributes[4])
+ edge_source_id = chunk_key
+ weight = (
+ float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
+ )
+ return dict(
+ src_id=source,
+ tgt_id=target,
+ weight=weight,
+ description=edge_description,
+ keywords=edge_keywords,
+ source_id=edge_source_id,
+ )
+
+
+async def _merge_nodes_then_upsert(
+ entity_name: str,
+ nodes_data: list[dict],
+ knwoledge_graph_inst: BaseGraphStorage,
+ global_config: dict,
+):
+ already_entitiy_types = []
+ already_source_ids = []
+ already_description = []
+
+ already_node = await knwoledge_graph_inst.get_node(entity_name)
+ if already_node is not None:
+ already_entitiy_types.append(already_node["entity_type"])
+ already_source_ids.extend(
+ split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
+ )
+ already_description.append(already_node["description"])
+
+ entity_type = sorted(
+ Counter(
+ [dp["entity_type"] for dp in nodes_data] + already_entitiy_types
+ ).items(),
+ key=lambda x: x[1],
+ reverse=True,
+ )[0][0]
+ description = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["description"] for dp in nodes_data] + already_description))
+ )
+ source_id = GRAPH_FIELD_SEP.join(
+ set([dp["source_id"] for dp in nodes_data] + already_source_ids)
+ )
+ description = await _handle_entity_relation_summary(
+ entity_name, description, global_config
+ )
+ node_data = dict(
+ entity_type=entity_type,
+ description=description,
+ source_id=source_id,
+ )
+ await knwoledge_graph_inst.upsert_node(
+ entity_name,
+ node_data=node_data,
+ )
+ node_data["entity_name"] = entity_name
+ return node_data
+
+
+async def _merge_edges_then_upsert(
+ src_id: str,
+ tgt_id: str,
+ edges_data: list[dict],
+ knwoledge_graph_inst: BaseGraphStorage,
+ global_config: dict,
+):
+ already_weights = []
+ already_source_ids = []
+ already_description = []
+ already_keywords = []
+
+ if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
+ already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id)
+ already_weights.append(already_edge["weight"])
+ already_source_ids.extend(
+ split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
+ )
+ already_description.append(already_edge["description"])
+ already_keywords.extend(
+ split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
+ )
+
+ weight = sum([dp["weight"] for dp in edges_data] + already_weights)
+ description = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["description"] for dp in edges_data] + already_description))
+ )
+ keywords = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
+ )
+ source_id = GRAPH_FIELD_SEP.join(
+ set([dp["source_id"] for dp in edges_data] + already_source_ids)
+ )
+ for need_insert_id in [src_id, tgt_id]:
+ if not (await knwoledge_graph_inst.has_node(need_insert_id)):
+ await knwoledge_graph_inst.upsert_node(
+ need_insert_id,
+ node_data={
+ "source_id": source_id,
+ "description": description,
+ "entity_type": '"UNKNOWN"',
+ },
+ )
+ description = await _handle_entity_relation_summary(
+ (src_id, tgt_id), description, global_config
+ )
+ await knwoledge_graph_inst.upsert_edge(
+ src_id,
+ tgt_id,
+ edge_data=dict(
+ weight=weight,
+ description=description,
+ keywords=keywords,
+ source_id=source_id,
+ ),
+ )
+
+ edge_data = dict(
+ src_id=src_id,
+ tgt_id=tgt_id,
+ description=description,
+ keywords=keywords,
+ )
+
+ return edge_data
+
+async def extract_entities(
+ chunks: dict[str, TextChunkSchema],
+ knwoledge_graph_inst: BaseGraphStorage,
+ entity_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ global_config: dict,
+) -> Union[BaseGraphStorage, None]:
+ use_llm_func: callable = global_config["llm_model_func"]
+ entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
+
+ ordered_chunks = list(chunks.items())
+
+ entity_extract_prompt = PROMPTS["entity_extraction"]
+ context_base = dict(
+ tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
+ record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
+ completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
+ entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
+ )
+ continue_prompt = PROMPTS["entiti_continue_extraction"]
+ if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
+
+ already_processed = 0
+ already_entities = 0
+ already_relations = 0
+
+ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
+ nonlocal already_processed, already_entities, already_relations
+ chunk_key = chunk_key_dp[0]
+ chunk_dp = chunk_key_dp[1]
+ content = chunk_dp["content"]
+ hint_prompt = entity_extract_prompt.format(**context_base, input_text=content)
+ final_result = await use_llm_func(hint_prompt)
+
+ history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
+ for now_glean_index in range(entity_extract_max_gleaning):
+ glean_result = await use_llm_func(continue_prompt, history_messages=history)
+
+ history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
+ final_result += glean_result
+ if now_glean_index == entity_extract_max_gleaning - 1:
+ break
+
+ if_loop_result: str = await use_llm_func(
+ if_loop_prompt, history_messages=history
+ )
+ if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
+ if if_loop_result != "yes":
+ break
+
+ records = split_string_by_multi_markers(
+ final_result,
+ [context_base["record_delimiter"], context_base["completion_delimiter"]],
+ )
+
+ maybe_nodes = defaultdict(list)
+ maybe_edges = defaultdict(list)
+ for record in records:
+ record = re.search(r"\((.*)\)", record)
+ if record is None:
+ continue
+ record = record.group(1)
+ record_attributes = split_string_by_multi_markers(
+ record, [context_base["tuple_delimiter"]]
+ )
+ if_entities = await _handle_single_entity_extraction(
+ record_attributes, chunk_key
+ )
+ if if_entities is not None:
+ maybe_nodes[if_entities["entity_name"]].append(if_entities)
+ continue
+
+ if_relation = await _handle_single_relationship_extraction(
+ record_attributes, chunk_key
+ )
+ if if_relation is not None:
+ maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
+ if_relation
+ )
+ already_processed += 1
+ already_entities += len(maybe_nodes)
+ already_relations += len(maybe_edges)
+ now_ticks = PROMPTS["process_tickers"][
+ already_processed % len(PROMPTS["process_tickers"])
+ ]
+ print(
+ f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
+ end="",
+ flush=True,
+ )
+ return dict(maybe_nodes), dict(maybe_edges)
+
+ # use_llm_func is wrapped in ascynio.Semaphore, limiting max_async callings
+ results = await asyncio.gather(
+ *[_process_single_content(c) for c in ordered_chunks]
+ )
+ print() # clear the progress bar
+ maybe_nodes = defaultdict(list)
+ maybe_edges = defaultdict(list)
+ for m_nodes, m_edges in results:
+ for k, v in m_nodes.items():
+ maybe_nodes[k].extend(v)
+ for k, v in m_edges.items():
+ maybe_edges[tuple(sorted(k))].extend(v)
+ all_entities_data = await asyncio.gather(
+ *[
+ _merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config)
+ for k, v in maybe_nodes.items()
+ ]
+ )
+ all_relationships_data = await asyncio.gather(
+ *[
+ _merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config)
+ for k, v in maybe_edges.items()
+ ]
+ )
+ if not len(all_entities_data):
+ logger.warning("Didn't extract any entities, maybe your LLM is not working")
+ return None
+ if not len(all_relationships_data):
+ logger.warning("Didn't extract any relationships, maybe your LLM is not working")
+ return None
+
+ if entity_vdb is not None:
+ data_for_vdb = {
+ compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
+ "content": dp["entity_name"] + dp["description"],
+ "entity_name": dp["entity_name"],
+ }
+ for dp in all_entities_data
+ }
+ await entity_vdb.upsert(data_for_vdb)
+
+ if relationships_vdb is not None:
+ data_for_vdb = {
+ compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
+ "src_id": dp["src_id"],
+ "tgt_id": dp["tgt_id"],
+ "content": dp["keywords"] + dp["src_id"] + dp["tgt_id"] + dp["description"],
+ }
+ for dp in all_relationships_data
+ }
+ await relationships_vdb.upsert(data_for_vdb)
+
+ return knwoledge_graph_inst
+
+async def local_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ keywords = keywords_data.get("low_level_keywords", [])
+ keywords = ', '.join(keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ context = await _build_local_query_context(
+ keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ text_chunks_db,
+ query_param,
+ )
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+async def _build_local_query_context(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+):
+ results = await entities_vdb.query(query, top_k=query_param.top_k)
+ if not len(results):
+ return None
+ node_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
+ )
+ if not all([n is not None for n in node_datas]):
+ logger.warning("Some nodes are missing, maybe the storage is damaged")
+ node_degrees = await asyncio.gather(
+ *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
+ )
+ node_datas = [
+ {**n, "entity_name": k["entity_name"], "rank": d}
+ for k, n, d in zip(results, node_datas, node_degrees)
+ if n is not None
+ ]
+ use_text_units = await _find_most_related_text_unit_from_entities(
+ node_datas, query_param, text_chunks_db, knowledge_graph_inst
+ )
+ use_relations = await _find_most_related_edges_from_entities(
+ node_datas, query_param, knowledge_graph_inst
+ )
+ logger.info(
+ f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
+ )
+ entites_section_list = [["id", "entity", "type", "description", "rank"]]
+ for i, n in enumerate(node_datas):
+ entites_section_list.append(
+ [
+ i,
+ n["entity_name"],
+ n.get("entity_type", "UNKNOWN"),
+ n.get("description", "UNKNOWN"),
+ n["rank"],
+ ]
+ )
+ entities_context = list_of_list_to_csv(entites_section_list)
+
+ relations_section_list = [
+ ["id", "source", "target", "description", "keywords", "weight", "rank"]
+ ]
+ for i, e in enumerate(use_relations):
+ relations_section_list.append(
+ [
+ i,
+ e["src_tgt"][0],
+ e["src_tgt"][1],
+ e["description"],
+ e["keywords"],
+ e["weight"],
+ e["rank"],
+ ]
+ )
+ relations_context = list_of_list_to_csv(relations_section_list)
+
+ text_units_section_list = [["id", "content"]]
+ for i, t in enumerate(use_text_units):
+ text_units_section_list.append([i, t["content"]])
+ text_units_context = list_of_list_to_csv(text_units_section_list)
+ return f"""
+-----Entities-----
+```csv
+{entities_context}
+```
+-----Relationships-----
+```csv
+{relations_context}
+```
+-----Sources-----
+```csv
+{text_units_context}
+```
+"""
+
+async def _find_most_related_text_unit_from_entities(
+ node_datas: list[dict],
+ query_param: QueryParam,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ text_units = [
+ split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+ for dp in node_datas
+ ]
+ edges = await asyncio.gather(
+ *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
+ )
+ all_one_hop_nodes = set()
+ for this_edges in edges:
+ if not this_edges:
+ continue
+ all_one_hop_nodes.update([e[1] for e in this_edges])
+ all_one_hop_nodes = list(all_one_hop_nodes)
+ all_one_hop_nodes_data = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
+ )
+ all_one_hop_text_units_lookup = {
+ k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
+ for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
+ if v is not None
+ }
+ all_text_units_lookup = {}
+ for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
+ for c_id in this_text_units:
+ if c_id in all_text_units_lookup:
+ continue
+ relation_counts = 0
+ for e in this_edges:
+ if (
+ e[1] in all_one_hop_text_units_lookup
+ and c_id in all_one_hop_text_units_lookup[e[1]]
+ ):
+ relation_counts += 1
+ all_text_units_lookup[c_id] = {
+ "data": await text_chunks_db.get_by_id(c_id),
+ "order": index,
+ "relation_counts": relation_counts,
+ }
+ if any([v is None for v in all_text_units_lookup.values()]):
+ logger.warning("Text chunks are missing, maybe the storage is damaged")
+ all_text_units = [
+ {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
+ ]
+ all_text_units = sorted(
+ all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
+ )
+ all_text_units = truncate_list_by_token_size(
+ all_text_units,
+ key=lambda x: x["data"]["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
+ return all_text_units
+
+async def _find_most_related_edges_from_entities(
+ node_datas: list[dict],
+ query_param: QueryParam,
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ all_related_edges = await asyncio.gather(
+ *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
+ )
+ all_edges = set()
+ for this_edges in all_related_edges:
+ all_edges.update([tuple(sorted(e)) for e in this_edges])
+ all_edges = list(all_edges)
+ all_edges_pack = await asyncio.gather(
+ *[knowledge_graph_inst.get_edge(e[0], e[1]) for e in all_edges]
+ )
+ all_edges_degree = await asyncio.gather(
+ *[knowledge_graph_inst.edge_degree(e[0], e[1]) for e in all_edges]
+ )
+ all_edges_data = [
+ {"src_tgt": k, "rank": d, **v}
+ for k, v, d in zip(all_edges, all_edges_pack, all_edges_degree)
+ if v is not None
+ ]
+ all_edges_data = sorted(
+ all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
+ )
+ all_edges_data = truncate_list_by_token_size(
+ all_edges_data,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_global_context,
+ )
+ return all_edges_data
+
+async def global_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ keywords = keywords_data.get("high_level_keywords", [])
+ keywords = ', '.join(keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ context = await _build_global_query_context(
+ keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ relationships_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+async def _build_global_query_context(
+ keywords,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+):
+ results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
+
+ if not len(results):
+ return None
+
+ edge_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
+ )
+
+ if not all([n is not None for n in edge_datas]):
+ logger.warning("Some edges are missing, maybe the storage is damaged")
+ edge_degree = await asyncio.gather(
+ *[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results]
+ )
+ edge_datas = [
+ {"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v}
+ for k, v, d in zip(results, edge_datas, edge_degree)
+ if v is not None
+ ]
+ edge_datas = sorted(
+ edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
+ )
+ edge_datas = truncate_list_by_token_size(
+ edge_datas,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_global_context,
+ )
+
+ use_entities = await _find_most_related_entities_from_relationships(
+ edge_datas, query_param, knowledge_graph_inst
+ )
+ use_text_units = await _find_related_text_unit_from_relationships(
+ edge_datas, query_param, text_chunks_db, knowledge_graph_inst
+ )
+ logger.info(
+ f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
+ )
+ relations_section_list = [
+ ["id", "source", "target", "description", "keywords", "weight", "rank"]
+ ]
+ for i, e in enumerate(edge_datas):
+ relations_section_list.append(
+ [
+ i,
+ e["src_id"],
+ e["tgt_id"],
+ e["description"],
+ e["keywords"],
+ e["weight"],
+ e["rank"],
+ ]
+ )
+ relations_context = list_of_list_to_csv(relations_section_list)
+
+ entites_section_list = [["id", "entity", "type", "description", "rank"]]
+ for i, n in enumerate(use_entities):
+ entites_section_list.append(
+ [
+ i,
+ n["entity_name"],
+ n.get("entity_type", "UNKNOWN"),
+ n.get("description", "UNKNOWN"),
+ n["rank"],
+ ]
+ )
+ entities_context = list_of_list_to_csv(entites_section_list)
+
+ text_units_section_list = [["id", "content"]]
+ for i, t in enumerate(use_text_units):
+ text_units_section_list.append([i, t["content"]])
+ text_units_context = list_of_list_to_csv(text_units_section_list)
+
+ return f"""
+-----Entities-----
+```csv
+{entities_context}
+```
+-----Relationships-----
+```csv
+{relations_context}
+```
+-----Sources-----
+```csv
+{text_units_context}
+```
+"""
+
+async def _find_most_related_entities_from_relationships(
+ edge_datas: list[dict],
+ query_param: QueryParam,
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ entity_names = set()
+ for e in edge_datas:
+ entity_names.add(e["src_id"])
+ entity_names.add(e["tgt_id"])
+
+ node_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(entity_name) for entity_name in entity_names]
+ )
+
+ node_degrees = await asyncio.gather(
+ *[knowledge_graph_inst.node_degree(entity_name) for entity_name in entity_names]
+ )
+ node_datas = [
+ {**n, "entity_name": k, "rank": d}
+ for k, n, d in zip(entity_names, node_datas, node_degrees)
+ ]
+
+ node_datas = truncate_list_by_token_size(
+ node_datas,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_local_context,
+ )
+
+ return node_datas
+
+async def _find_related_text_unit_from_relationships(
+ edge_datas: list[dict],
+ query_param: QueryParam,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ knowledge_graph_inst: BaseGraphStorage,
+):
+
+ text_units = [
+ split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+ for dp in edge_datas
+ ]
+
+ all_text_units_lookup = {}
+
+ for index, unit_list in enumerate(text_units):
+ for c_id in unit_list:
+ if c_id not in all_text_units_lookup:
+ all_text_units_lookup[c_id] = {
+ "data": await text_chunks_db.get_by_id(c_id),
+ "order": index,
+ }
+
+ if any([v is None for v in all_text_units_lookup.values()]):
+ logger.warning("Text chunks are missing, maybe the storage is damaged")
+ all_text_units = [
+ {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
+ ]
+ all_text_units = sorted(
+ all_text_units, key=lambda x: x["order"]
+ )
+ all_text_units = truncate_list_by_token_size(
+ all_text_units,
+ key=lambda x: x["data"]["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
+
+ return all_text_units
+
+async def hybird_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ hl_keywords = keywords_data.get("high_level_keywords", [])
+ ll_keywords = keywords_data.get("low_level_keywords", [])
+ hl_keywords = ', '.join(hl_keywords)
+ ll_keywords = ', '.join(ll_keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ low_level_context = await _build_local_query_context(
+ ll_keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ high_level_context = await _build_global_query_context(
+ hl_keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ relationships_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ context = combine_contexts(high_level_context, low_level_context)
+
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+def combine_contexts(high_level_context, low_level_context):
+ # Function to extract entities, relationships, and sources from context strings
+ def extract_sections(context):
+ entities_match = re.search(r'-----Entities-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+ relationships_match = re.search(r'-----Relationships-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+ sources_match = re.search(r'-----Sources-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+
+ entities = entities_match.group(1) if entities_match else ''
+ relationships = relationships_match.group(1) if relationships_match else ''
+ sources = sources_match.group(1) if sources_match else ''
+
+ return entities, relationships, sources
+
+ # Extract sections from both contexts
+ hl_entities, hl_relationships, hl_sources = extract_sections(high_level_context)
+ ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
+
+ # Combine and deduplicate the entities
+ combined_entities_set = set(filter(None, hl_entities.strip().split('\n') + ll_entities.strip().split('\n')))
+ combined_entities = '\n'.join(combined_entities_set)
+
+ # Combine and deduplicate the relationships
+ combined_relationships_set = set(filter(None, hl_relationships.strip().split('\n') + ll_relationships.strip().split('\n')))
+ combined_relationships = '\n'.join(combined_relationships_set)
+
+ # Combine and deduplicate the sources
+ combined_sources_set = set(filter(None, hl_sources.strip().split('\n') + ll_sources.strip().split('\n')))
+ combined_sources = '\n'.join(combined_sources_set)
+
+ # Format the combined context
+ return f"""
+-----Entities-----
+```csv
+{combined_entities}
+-----Relationships-----
+{combined_relationships}
+-----Sources-----
+{combined_sources}
+"""
+
+async def naive_query(
+ query,
+ chunks_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+):
+ use_model_func = global_config["llm_model_func"]
+ results = await chunks_vdb.query(query, top_k=query_param.top_k)
+ if not len(results):
+ return PROMPTS["fail_response"]
+ chunks_ids = [r["id"] for r in results]
+ chunks = await text_chunks_db.get_by_ids(chunks_ids)
+
+ maybe_trun_chunks = truncate_list_by_token_size(
+ chunks,
+ key=lambda x: x["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
+ section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
+ if query_param.only_need_context:
+ return section
+ sys_prompt_temp = PROMPTS["naive_rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ content_data=section, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
new file mode 100644
index 00000000..5d28e49c
--- /dev/null
+++ b/lightrag/prompt.py
@@ -0,0 +1,256 @@
+GRAPH_FIELD_SEP = ""
+
+PROMPTS = {}
+
+PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
+PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
+PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
+PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+
+PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
+
+PROMPTS[
+ "entity_extraction"
+] = """-Goal-
+Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
+
+-Steps-
+1. Identify all entities. For each identified entity, extract the following information:
+- entity_name: Name of the entity, capitalized
+- entity_type: One of the following types: [{entity_types}]
+- entity_description: Comprehensive description of the entity's attributes and activities
+Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}
+
+2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
+For each pair of related entities, extract the following information:
+- source_entity: name of the source entity, as identified in step 1
+- target_entity: name of the target entity, as identified in step 1
+- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
+- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
+- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
+Format each relationship as ("relationship"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})
+
+3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
+Format the content-level key words as ("content_keywords"{tuple_delimiter})
+
+4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
+
+5. When finished, output {completion_delimiter}
+
+######################
+-Examples-
+######################
+Example 1:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
+
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
+
+The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
+
+It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
+################
+Output:
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
+("entity"{tuple_delimiter}"Jordan"{tuple_delimiter}"person"{tuple_delimiter}"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device."){record_delimiter}
+("entity"{tuple_delimiter}"Cruz"{tuple_delimiter}"person"{tuple_delimiter}"Cruz is associated with a vision of control and order, influencing the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"The Device"{tuple_delimiter}"technology"{tuple_delimiter}"The Device is central to the story, with potential game-changing implications, and is revered by Taylor."){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Taylor"{tuple_delimiter}"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."{tuple_delimiter}"power dynamics, perspective shift"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Jordan"{tuple_delimiter}"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."{tuple_delimiter}"shared goals, rebellion"{tuple_delimiter}6){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"Jordan"{tuple_delimiter}"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."{tuple_delimiter}"conflict resolution, mutual respect"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Jordan"{tuple_delimiter}"Cruz"{tuple_delimiter}"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."{tuple_delimiter}"ideological conflict, rebellion"{tuple_delimiter}5){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
+("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
+#############################
+Example 2:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
+
+Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
+
+Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
+#############
+Output:
+("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
+("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
+("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
+("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
+#############################
+Example 3:
+
+Entity_types: [person, role, technology, organization, event, location, concept]
+Text:
+their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
+
+"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
+
+Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
+
+Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
+
+The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
+#############
+Output:
+("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
+("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
+("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
+("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
+("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
+("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
+("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
+#############################
+-Real Data-
+######################
+Entity_types: {entity_types}
+Text: {input_text}
+######################
+Output:
+"""
+
+PROMPTS[
+ "summarize_entity_descriptions"
+] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
+Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
+Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+Make sure it is written in third person, and include the entity names so we the have full context.
+
+#######
+-Data-
+Entities: {entity_name}
+Description List: {description_list}
+#######
+Output:
+"""
+
+PROMPTS[
+ "entiti_continue_extraction"
+] = """MANY entities were missed in the last extraction. Add them below using the same format:
+"""
+
+PROMPTS[
+ "entiti_if_loop_extraction"
+] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
+"""
+
+PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
+
+PROMPTS[
+ "rag_response"
+] = """---Role---
+
+You are a helpful assistant responding to questions about data in the tables provided.
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+
+---Target response length and format---
+
+{response_type}
+
+
+---Data tables---
+
+{context_data}
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+
+If you don't know the answer, just say so. Do not make anything up.
+
+Do not include information where the supporting evidence for it is not provided.
+
+
+---Target response length and format---
+
+{response_type}
+
+Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
+"""
+
+PROMPTS["keywords_extraction"] = """---Role---
+
+You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
+
+---Goal---
+
+Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
+
+---Instructions---
+
+- Output the keywords in JSON format.
+- The JSON should have two keys:
+ - "high_level_keywords" for overarching concepts or themes.
+ - "low_level_keywords" for specific entities or details.
+
+######################
+-Examples-
+######################
+Example 1:
+
+Query: "How does international trade influence global economic stability?"
+################
+Output:
+{{
+ "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
+ "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
+}}
+#############################
+Example 2:
+
+Query: "What are the environmental consequences of deforestation on biodiversity?"
+################
+Output:
+{{
+ "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
+ "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
+}}
+#############################
+Example 3:
+
+Query: "What is the role of education in reducing poverty?"
+################
+Output:
+{{
+ "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
+ "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
+}}
+#############################
+-Real Data-
+######################
+Query: {query}
+######################
+Output:
+
+"""
+
+PROMPTS[
+ "naive_rag_response"
+] = """You're a helpful assistant
+Below are the knowledge you know:
+{content_data}
+---
+If you don't know the answer or if the provided knowledge do not contain sufficient information to provide an answer, just say so. Do not make anything up.
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+---Target response length and format---
+{response_type}
+"""
diff --git a/lightrag/storage.py b/lightrag/storage.py
new file mode 100644
index 00000000..2f2bb7d8
--- /dev/null
+++ b/lightrag/storage.py
@@ -0,0 +1,246 @@
+import asyncio
+import html
+import json
+import os
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any, Union, cast
+import pickle
+import hnswlib
+import networkx as nx
+import numpy as np
+from nano_vectordb import NanoVectorDB
+import xxhash
+
+from .utils import load_json, logger, write_json
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+)
+
+@dataclass
+class JsonKVStorage(BaseKVStorage):
+ def __post_init__(self):
+ working_dir = self.global_config["working_dir"]
+ self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
+ self._data = load_json(self._file_name) or {}
+ logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
+
+ async def all_keys(self) -> list[str]:
+ return list(self._data.keys())
+
+ async def index_done_callback(self):
+ write_json(self._data, self._file_name)
+
+ async def get_by_id(self, id):
+ return self._data.get(id, None)
+
+ async def get_by_ids(self, ids, fields=None):
+ if fields is None:
+ return [self._data.get(id, None) for id in ids]
+ return [
+ (
+ {k: v for k, v in self._data[id].items() if k in fields}
+ if self._data.get(id, None)
+ else None
+ )
+ for id in ids
+ ]
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ return set([s for s in data if s not in self._data])
+
+ async def upsert(self, data: dict[str, dict]):
+ left_data = {k: v for k, v in data.items() if k not in self._data}
+ self._data.update(left_data)
+ return left_data
+
+ async def drop(self):
+ self._data = {}
+
+@dataclass
+class NanoVectorDBStorage(BaseVectorStorage):
+ cosine_better_than_threshold: float = 0.2
+
+ def __post_init__(self):
+
+ self._client_file_name = os.path.join(
+ self.global_config["working_dir"], f"vdb_{self.namespace}.json"
+ )
+ self._max_batch_size = self.global_config["embedding_batch_num"]
+ self._client = NanoVectorDB(
+ self.embedding_func.embedding_dim, storage_file=self._client_file_name
+ )
+ self.cosine_better_than_threshold = self.global_config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
+
+ async def upsert(self, data: dict[str, dict]):
+ logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
+ if not len(data):
+ logger.warning("You insert an empty data to vector DB")
+ return []
+ list_data = [
+ {
+ "__id__": k,
+ **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
+ }
+ for k, v in data.items()
+ ]
+ contents = [v["content"] for v in data.values()]
+ batches = [
+ contents[i : i + self._max_batch_size]
+ for i in range(0, len(contents), self._max_batch_size)
+ ]
+ embeddings_list = await asyncio.gather(
+ *[self.embedding_func(batch) for batch in batches]
+ )
+ embeddings = np.concatenate(embeddings_list)
+ for i, d in enumerate(list_data):
+ d["__vector__"] = embeddings[i]
+ results = self._client.upsert(datas=list_data)
+ return results
+
+ async def query(self, query: str, top_k=5):
+ embedding = await self.embedding_func([query])
+ embedding = embedding[0]
+ results = self._client.query(
+ query=embedding,
+ top_k=top_k,
+ better_than_threshold=self.cosine_better_than_threshold,
+ )
+ results = [
+ {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
+ ]
+ return results
+
+ async def index_done_callback(self):
+ self._client.save()
+
+@dataclass
+class NetworkXStorage(BaseGraphStorage):
+ @staticmethod
+ def load_nx_graph(file_name) -> nx.Graph:
+ if os.path.exists(file_name):
+ return nx.read_graphml(file_name)
+ return None
+
+ @staticmethod
+ def write_nx_graph(graph: nx.Graph, file_name):
+ logger.info(
+ f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
+ )
+ nx.write_graphml(graph, file_name)
+
+ @staticmethod
+ def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
+ """
+ from graspologic.utils import largest_connected_component
+
+ graph = graph.copy()
+ graph = cast(nx.Graph, largest_connected_component(graph))
+ node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
+ graph = nx.relabel_nodes(graph, node_mapping)
+ return NetworkXStorage._stabilize_graph(graph)
+
+ @staticmethod
+ def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Ensure an undirected graph with the same relationships will always be read the same way.
+ """
+ fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
+
+ sorted_nodes = graph.nodes(data=True)
+ sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
+
+ fixed_graph.add_nodes_from(sorted_nodes)
+ edges = list(graph.edges(data=True))
+
+ if not graph.is_directed():
+
+ def _sort_source_target(edge):
+ source, target, edge_data = edge
+ if source > target:
+ temp = source
+ source = target
+ target = temp
+ return source, target, edge_data
+
+ edges = [_sort_source_target(edge) for edge in edges]
+
+ def _get_edge_key(source: Any, target: Any) -> str:
+ return f"{source} -> {target}"
+
+ edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
+
+ fixed_graph.add_edges_from(edges)
+ return fixed_graph
+
+ def __post_init__(self):
+ self._graphml_xml_file = os.path.join(
+ self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
+ )
+ preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
+ if preloaded_graph is not None:
+ logger.info(
+ f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
+ )
+ self._graph = preloaded_graph or nx.Graph()
+ self._node_embed_algorithms = {
+ "node2vec": self._node2vec_embed,
+ }
+
+ async def index_done_callback(self):
+ NetworkXStorage.write_nx_graph(self._graph, self._graphml_xml_file)
+
+ async def has_node(self, node_id: str) -> bool:
+ return self._graph.has_node(node_id)
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ return self._graph.has_edge(source_node_id, target_node_id)
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ return self._graph.nodes.get(node_id)
+
+ async def node_degree(self, node_id: str) -> int:
+ return self._graph.degree(node_id)
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ return self._graph.degree(src_id) + self._graph.degree(tgt_id)
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ return self._graph.edges.get((source_node_id, target_node_id))
+
+ async def get_node_edges(self, source_node_id: str):
+ if self._graph.has_node(source_node_id):
+ return list(self._graph.edges(source_node_id))
+ return None
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ self._graph.add_node(node_id, **node_data)
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ self._graph.add_edge(source_node_id, target_node_id, **edge_data)
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ if algorithm not in self._node_embed_algorithms:
+ raise ValueError(f"Node embedding algorithm {algorithm} not supported")
+ return await self._node_embed_algorithms[algorithm]()
+
+ async def _node2vec_embed(self):
+ from graspologic import embed
+
+ embeddings, nodes = embed.node2vec_embed(
+ self._graph,
+ **self.global_config["node2vec_params"],
+ )
+
+ nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
+ return embeddings, nodes_ids
diff --git a/lightrag/utils.py b/lightrag/utils.py
new file mode 100644
index 00000000..c75b4270
--- /dev/null
+++ b/lightrag/utils.py
@@ -0,0 +1,165 @@
+import asyncio
+import html
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from functools import wraps
+from hashlib import md5
+from typing import Any, Union
+
+import numpy as np
+import tiktoken
+
+ENCODER = None
+
+logger = logging.getLogger("lightrag")
+
+def set_logger(log_file: str):
+ logger.setLevel(logging.DEBUG)
+
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setLevel(logging.DEBUG)
+
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ file_handler.setFormatter(formatter)
+
+ if not logger.handlers:
+ logger.addHandler(file_handler)
+
+@dataclass
+class EmbeddingFunc:
+ embedding_dim: int
+ max_token_size: int
+ func: callable
+
+ async def __call__(self, *args, **kwargs) -> np.ndarray:
+ return await self.func(*args, **kwargs)
+
+def locate_json_string_body_from_string(content: str) -> Union[str, None]:
+ """Locate the JSON string body from a string"""
+ maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
+ if maybe_json_str is not None:
+ return maybe_json_str.group(0)
+ else:
+ return None
+
+def convert_response_to_json(response: str) -> dict:
+ json_str = locate_json_string_body_from_string(response)
+ assert json_str is not None, f"Unable to parse JSON from response: {response}"
+ try:
+ data = json.loads(json_str)
+ return data
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse JSON: {json_str}")
+ raise e from None
+
+def compute_args_hash(*args):
+ return md5(str(args).encode()).hexdigest()
+
+def compute_mdhash_id(content, prefix: str = ""):
+ return prefix + md5(content.encode()).hexdigest()
+
+def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
+ """Add restriction of maximum async calling times for a async func"""
+
+ def final_decro(func):
+ """Not using async.Semaphore to aovid use nest-asyncio"""
+ __current_size = 0
+
+ @wraps(func)
+ async def wait_func(*args, **kwargs):
+ nonlocal __current_size
+ while __current_size >= max_size:
+ await asyncio.sleep(waitting_time)
+ __current_size += 1
+ result = await func(*args, **kwargs)
+ __current_size -= 1
+ return result
+
+ return wait_func
+
+ return final_decro
+
+def wrap_embedding_func_with_attrs(**kwargs):
+ """Wrap a function with attributes"""
+
+ def final_decro(func) -> EmbeddingFunc:
+ new_func = EmbeddingFunc(**kwargs, func=func)
+ return new_func
+
+ return final_decro
+
+def load_json(file_name):
+ if not os.path.exists(file_name):
+ return None
+ with open(file_name) as f:
+ return json.load(f)
+
+def write_json(json_obj, file_name):
+ with open(file_name, "w") as f:
+ json.dump(json_obj, f, indent=2, ensure_ascii=False)
+
+def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ tokens = ENCODER.encode(content)
+ return tokens
+
+
+def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ content = ENCODER.decode(tokens)
+ return content
+
+def pack_user_ass_to_openai_messages(*args: str):
+ roles = ["user", "assistant"]
+ return [
+ {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
+ ]
+
+def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
+ """Split a string by multiple markers"""
+ if not markers:
+ return [content]
+ results = re.split("|".join(re.escape(marker) for marker in markers), content)
+ return [r.strip() for r in results if r.strip()]
+
+# Refer the utils functions of the official GraphRAG implementation:
+# https://github.com/microsoft/graphrag
+def clean_str(input: Any) -> str:
+ """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
+ # If we get non-string input, just give it back
+ if not isinstance(input, str):
+ return input
+
+ result = html.unescape(input.strip())
+ # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
+ return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
+
+def is_float_regex(value):
+ return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
+
+def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
+ """Truncate a list of data by token size"""
+ if max_token_size <= 0:
+ return []
+ tokens = 0
+ for i, data in enumerate(list_data):
+ tokens += len(encode_string_by_tiktoken(key(data)))
+ if tokens > max_token_size:
+ return list_data[:i]
+ return list_data
+
+def list_of_list_to_csv(data: list[list]):
+ return "\n".join(
+ [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
+ )
+
+def save_data_to_file(data, file_name):
+ with open(file_name, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
From 81209fa96be951216323ca65bd02e4f6dd51ccf0 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 15:02:30 +0800
Subject: [PATCH 010/258] update
---
README.md | 198 +++++++++
lightrag/__init__.py | 5 +
lightrag/base.py | 116 ++++++
lightrag/lightrag.py | 300 ++++++++++++++
lightrag/llm.py | 88 ++++
lightrag/operate.py | 944 +++++++++++++++++++++++++++++++++++++++++++
lightrag/prompt.py | 256 ++++++++++++
lightrag/storage.py | 246 +++++++++++
lightrag/utils.py | 165 ++++++++
9 files changed, 2318 insertions(+)
create mode 100644 README.md
create mode 100644 lightrag/__init__.py
create mode 100644 lightrag/base.py
create mode 100644 lightrag/lightrag.py
create mode 100644 lightrag/llm.py
create mode 100644 lightrag/operate.py
create mode 100644 lightrag/prompt.py
create mode 100644 lightrag/storage.py
create mode 100644 lightrag/utils.py
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..42de1c1c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,198 @@
+# LightRAG: Simple and Fast Retrieval-Augmented Generation
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
+
+
+
+
+
+
+This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
+## Install
+
+* Install from source
+
+```bash
+cd LightRAG
+pip install -e .
+```
+* Install from PyPI
+```bash
+pip install lightrag-hku
+```
+
+## Quick Start
+
+* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".`
+* Download the demo text "A Christmas Carol by Charles Dickens"
+```bash
+curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
+```
+Use the below python snippet:
+
+```python
+from lightrag import LightRAG, QueryParam
+
+rag = LightRAG(working_dir="./dickens")
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybird search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybird")))
+```
+Batch Insert
+```python
+rag.insert(["TEXT1", "TEXT2",...])
+```
+Incremental Insert
+
+```python
+rag = LightRAG(working_dir="./dickens")
+
+with open("./newText.txt") as f:
+ rag.insert(f.read())
+```
+## Evaluation
+### Dataset
+The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
+
+### Generate Query
+LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
+```python
+Given the following description of a dataset:
+
+{description}
+
+Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+Output the results in the following structure:
+- User 1: [user description]
+ - Task 1: [task description]
+ - Question 1:
+ - Question 2:
+ - Question 3:
+ - Question 4:
+ - Question 5:
+ - Task 2: [task description]
+ ...
+ - Task 5: [task description]
+- User 2: [user description]
+ ...
+- User 5: [user description]
+ ...
+```
+
+ ### Batch Eval
+To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
+```python
+---Role---
+You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+---Goal---
+You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+
+- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
+- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
+- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
+
+For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
+
+Here is the question:
+{query}
+
+Here are the two answers:
+
+**Answer 1:**
+{answer1}
+
+**Answer 2:**
+{answer2}
+
+Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
+
+Output your evaluation in the following JSON format:
+
+{{
+ "Comprehensiveness": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Empowerment": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Overall Winner": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
+ }}
+}}
+```
+### Overall Performance Table
+### Overall Performance Table
+| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
+|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
+| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
+| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** |
+| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** |
+| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** |
+| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** |
+| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** |
+| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** |
+| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** |
+| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** |
+| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** |
+| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** |
+| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** |
+| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** |
+| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** |
+| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** |
+| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** |
+| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% |
+| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** |
+| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
+| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
+
+## Code Structure
+
+```python
+.
+├── examples
+│ ├── batch_eval.py
+│ ├── generate_query.py
+│ ├── insert.py
+│ └── query.py
+├── lightrag
+│ ├── __init__.py
+│ ├── base.py
+│ ├── lightrag.py
+│ ├── llm.py
+│ ├── operate.py
+│ ├── prompt.py
+│ ├── storage.py
+│ └── utils.jpeg
+├── LICENSE
+├── README.md
+├── requirements.txt
+└── setup.py
+```
+## Citation
+
+```
+@article{guo2024lightrag,
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
+year={2024},
+eprint={2410.05779},
+archivePrefix={arXiv},
+primaryClass={cs.IR}
+}
+```
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
new file mode 100644
index 00000000..dc497cd4
--- /dev/null
+++ b/lightrag/__init__.py
@@ -0,0 +1,5 @@
+from .lightrag import LightRAG, QueryParam
+
+__version__ = "0.0.2"
+__author__ = "Zirui Guo"
+__url__ = "https://github.com/HKUDS/GraphEdit"
diff --git a/lightrag/base.py b/lightrag/base.py
new file mode 100644
index 00000000..9c0422fe
--- /dev/null
+++ b/lightrag/base.py
@@ -0,0 +1,116 @@
+from dataclasses import dataclass, field
+from typing import TypedDict, Union, Literal, Generic, TypeVar
+
+import numpy as np
+
+from .utils import EmbeddingFunc
+
+TextChunkSchema = TypedDict(
+ "TextChunkSchema",
+ {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int},
+)
+
+T = TypeVar("T")
+
+@dataclass
+class QueryParam:
+ mode: Literal["local", "global", "hybird", "naive"] = "global"
+ only_need_context: bool = False
+ response_type: str = "Multiple Paragraphs"
+ top_k: int = 60
+ max_token_for_text_unit: int = 4000
+ max_token_for_global_context: int = 4000
+ max_token_for_local_context: int = 4000
+
+
+@dataclass
+class StorageNameSpace:
+ namespace: str
+ global_config: dict
+
+ async def index_done_callback(self):
+ """commit the storage operations after indexing"""
+ pass
+
+ async def query_done_callback(self):
+ """commit the storage operations after querying"""
+ pass
+
+@dataclass
+class BaseVectorStorage(StorageNameSpace):
+ embedding_func: EmbeddingFunc
+ meta_fields: set = field(default_factory=set)
+
+ async def query(self, query: str, top_k: int) -> list[dict]:
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, dict]):
+ """Use 'content' field from value for embedding, use key as id.
+ If embedding_func is None, use 'embedding' field from value
+ """
+ raise NotImplementedError
+
+@dataclass
+class BaseKVStorage(Generic[T], StorageNameSpace):
+ async def all_keys(self) -> list[str]:
+ raise NotImplementedError
+
+ async def get_by_id(self, id: str) -> Union[T, None]:
+ raise NotImplementedError
+
+ async def get_by_ids(
+ self, ids: list[str], fields: Union[set[str], None] = None
+ ) -> list[Union[T, None]]:
+ raise NotImplementedError
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ """return un-exist keys"""
+ raise NotImplementedError
+
+ async def upsert(self, data: dict[str, T]):
+ raise NotImplementedError
+
+ async def drop(self):
+ raise NotImplementedError
+
+
+@dataclass
+class BaseGraphStorage(StorageNameSpace):
+ async def has_node(self, node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ raise NotImplementedError
+
+ async def node_degree(self, node_id: str) -> int:
+ raise NotImplementedError
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ raise NotImplementedError
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ raise NotImplementedError
+
+ async def get_node_edges(
+ self, source_node_id: str
+ ) -> Union[list[tuple[str, str]], None]:
+ raise NotImplementedError
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ raise NotImplementedError
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ raise NotImplementedError
+
+ async def clustering(self, algorithm: str):
+ raise NotImplementedError
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
new file mode 100644
index 00000000..836fda9e
--- /dev/null
+++ b/lightrag/lightrag.py
@@ -0,0 +1,300 @@
+import asyncio
+import os
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from functools import partial
+from typing import Type, cast
+
+from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding
+from .operate import (
+ chunking_by_token_size,
+ extract_entities,
+ local_query,
+ global_query,
+ hybird_query,
+ naive_query,
+)
+
+from .storage import (
+ JsonKVStorage,
+ NanoVectorDBStorage,
+ NetworkXStorage,
+)
+from .utils import (
+ EmbeddingFunc,
+ compute_mdhash_id,
+ limit_async_func_call,
+ convert_response_to_json,
+ logger,
+ set_logger,
+)
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+ StorageNameSpace,
+ QueryParam,
+)
+
+def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
+ try:
+ # If there is already an event loop, use it.
+ loop = asyncio.get_event_loop()
+ except RuntimeError:
+ # If in a sub-thread, create a new event loop.
+ logger.info("Creating a new event loop in a sub-thread.")
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ return loop
+
+@dataclass
+class LightRAG:
+ working_dir: str = field(
+ default_factory=lambda: f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
+ )
+
+ # text chunking
+ chunk_token_size: int = 1200
+ chunk_overlap_token_size: int = 100
+ tiktoken_model_name: str = "gpt-4o-mini"
+
+ # entity extraction
+ entity_extract_max_gleaning: int = 1
+ entity_summary_to_max_tokens: int = 500
+
+ # node embedding
+ node_embedding_algorithm: str = "node2vec"
+ node2vec_params: dict = field(
+ default_factory=lambda: {
+ "dimensions": 1536,
+ "num_walks": 10,
+ "walk_length": 40,
+ "num_walks": 10,
+ "window_size": 2,
+ "iterations": 3,
+ "random_seed": 3,
+ }
+ )
+
+ # text embedding
+ embedding_func: EmbeddingFunc = field(default_factory=lambda: openai_embedding)
+ embedding_batch_num: int = 32
+ embedding_func_max_async: int = 16
+
+ # LLM
+ llm_model_func: callable = gpt_4o_mini_complete
+ llm_model_max_token_size: int = 32768
+ llm_model_max_async: int = 16
+
+ # storage
+ key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
+ vector_db_storage_cls: Type[BaseVectorStorage] = NanoVectorDBStorage
+ vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
+ graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
+ enable_llm_cache: bool = True
+
+ # extension
+ addon_params: dict = field(default_factory=dict)
+ convert_response_to_json_func: callable = convert_response_to_json
+
+ def __post_init__(self):
+ log_file = os.path.join(self.working_dir, "lightrag.log")
+ set_logger(log_file)
+ logger.info(f"Logger initialized for working directory: {self.working_dir}")
+
+ _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
+ logger.debug(f"LightRAG init with param:\n {_print_config}\n")
+
+ if not os.path.exists(self.working_dir):
+ logger.info(f"Creating working directory {self.working_dir}")
+ os.makedirs(self.working_dir)
+
+ self.full_docs = self.key_string_value_json_storage_cls(
+ namespace="full_docs", global_config=asdict(self)
+ )
+
+ self.text_chunks = self.key_string_value_json_storage_cls(
+ namespace="text_chunks", global_config=asdict(self)
+ )
+
+ self.llm_response_cache = (
+ self.key_string_value_json_storage_cls(
+ namespace="llm_response_cache", global_config=asdict(self)
+ )
+ if self.enable_llm_cache
+ else None
+ )
+ self.chunk_entity_relation_graph = self.graph_storage_cls(
+ namespace="chunk_entity_relation", global_config=asdict(self)
+ )
+ self.embedding_func = limit_async_func_call(self.embedding_func_max_async)(
+ self.embedding_func
+ )
+ self.entities_vdb = (
+ self.vector_db_storage_cls(
+ namespace="entities",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"entity_name"}
+ )
+ )
+ self.relationships_vdb = (
+ self.vector_db_storage_cls(
+ namespace="relationships",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"src_id", "tgt_id"}
+ )
+ )
+ self.chunks_vdb = (
+ self.vector_db_storage_cls(
+ namespace="chunks",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ )
+ )
+
+ self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
+ partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
+ )
+
+ def insert(self, string_or_strings):
+ loop = always_get_an_event_loop()
+ return loop.run_until_complete(self.ainsert(string_or_strings))
+
+ async def ainsert(self, string_or_strings):
+ try:
+ if isinstance(string_or_strings, str):
+ string_or_strings = [string_or_strings]
+
+ new_docs = {
+ compute_mdhash_id(c.strip(), prefix="doc-"): {"content": c.strip()}
+ for c in string_or_strings
+ }
+ _add_doc_keys = await self.full_docs.filter_keys(list(new_docs.keys()))
+ new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
+ if not len(new_docs):
+ logger.warning(f"All docs are already in the storage")
+ return
+ logger.info(f"[New Docs] inserting {len(new_docs)} docs")
+
+ inserting_chunks = {}
+ for doc_key, doc in new_docs.items():
+ chunks = {
+ compute_mdhash_id(dp["content"], prefix="chunk-"): {
+ **dp,
+ "full_doc_id": doc_key,
+ }
+ for dp in chunking_by_token_size(
+ doc["content"],
+ overlap_token_size=self.chunk_overlap_token_size,
+ max_token_size=self.chunk_token_size,
+ tiktoken_model=self.tiktoken_model_name,
+ )
+ }
+ inserting_chunks.update(chunks)
+ _add_chunk_keys = await self.text_chunks.filter_keys(
+ list(inserting_chunks.keys())
+ )
+ inserting_chunks = {
+ k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
+ }
+ if not len(inserting_chunks):
+ logger.warning(f"All chunks are already in the storage")
+ return
+ logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
+
+ await self.chunks_vdb.upsert(inserting_chunks)
+
+ logger.info("[Entity Extraction]...")
+ maybe_new_kg = await extract_entities(
+ inserting_chunks,
+ knwoledge_graph_inst=self.chunk_entity_relation_graph,
+ entity_vdb=self.entities_vdb,
+ relationships_vdb=self.relationships_vdb,
+ global_config=asdict(self),
+ )
+ if maybe_new_kg is None:
+ logger.warning("No new entities and relationships found")
+ return
+ self.chunk_entity_relation_graph = maybe_new_kg
+
+ await self.full_docs.upsert(new_docs)
+ await self.text_chunks.upsert(inserting_chunks)
+ finally:
+ await self._insert_done()
+
+ async def _insert_done(self):
+ tasks = []
+ for storage_inst in [
+ self.full_docs,
+ self.text_chunks,
+ self.llm_response_cache,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.chunks_vdb,
+ self.chunk_entity_relation_graph,
+ ]:
+ if storage_inst is None:
+ continue
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+ await asyncio.gather(*tasks)
+
+ def query(self, query: str, param: QueryParam = QueryParam()):
+ loop = always_get_an_event_loop()
+ return loop.run_until_complete(self.aquery(query, param))
+
+ async def aquery(self, query: str, param: QueryParam = QueryParam()):
+ if param.mode == "local":
+ response = await local_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "global":
+ response = await global_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "hybird":
+ response = await hybird_query(
+ query,
+ self.chunk_entity_relation_graph,
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ elif param.mode == "naive":
+ response = await naive_query(
+ query,
+ self.chunks_vdb,
+ self.text_chunks,
+ param,
+ asdict(self),
+ )
+ else:
+ raise ValueError(f"Unknown mode {param.mode}")
+ await self._query_done()
+ return response
+
+
+ async def _query_done(self):
+ tasks = []
+ for storage_inst in [self.llm_response_cache]:
+ if storage_inst is None:
+ continue
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+ await asyncio.gather(*tasks)
+
+
diff --git a/lightrag/llm.py b/lightrag/llm.py
new file mode 100644
index 00000000..ee700a10
--- /dev/null
+++ b/lightrag/llm.py
@@ -0,0 +1,88 @@
+import os
+import numpy as np
+from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
+from tenacity import (
+ retry,
+ stop_after_attempt,
+ wait_exponential,
+ retry_if_exception_type,
+)
+
+from .base import BaseKVStorage
+from .utils import compute_args_hash, wrap_embedding_func_with_attrs
+
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def openai_complete_if_cache(
+ model, prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ openai_async_client = AsyncOpenAI()
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ response = await openai_async_client.chat.completions.create(
+ model=model, messages=messages, **kwargs
+ )
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert(
+ {args_hash: {"return": response.choices[0].message.content, "model": model}}
+ )
+ return response.choices[0].message.content
+
+async def gpt_4o_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "gpt-4o",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
+
+async def gpt_4o_mini_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "gpt-4o-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def openai_embedding(texts: list[str]) -> np.ndarray:
+ openai_async_client = AsyncOpenAI()
+ response = await openai_async_client.embeddings.create(
+ model="text-embedding-3-small", input=texts, encoding_format="float"
+ )
+ return np.array([dp.embedding for dp in response.data])
+
+if __name__ == "__main__":
+ import asyncio
+
+ async def main():
+ result = await gpt_4o_mini_complete('How are you?')
+ print(result)
+
+ asyncio.run(main())
diff --git a/lightrag/operate.py b/lightrag/operate.py
new file mode 100644
index 00000000..2d3271da
--- /dev/null
+++ b/lightrag/operate.py
@@ -0,0 +1,944 @@
+import asyncio
+import json
+import re
+from typing import Union
+from collections import Counter, defaultdict
+
+from .utils import (
+ logger,
+ clean_str,
+ compute_mdhash_id,
+ decode_tokens_by_tiktoken,
+ encode_string_by_tiktoken,
+ is_float_regex,
+ list_of_list_to_csv,
+ pack_user_ass_to_openai_messages,
+ split_string_by_multi_markers,
+ truncate_list_by_token_size,
+)
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+ TextChunkSchema,
+ QueryParam,
+)
+from .prompt import GRAPH_FIELD_SEP, PROMPTS
+
+def chunking_by_token_size(
+ content: str, overlap_token_size=128, max_token_size=1024, tiktoken_model="gpt-4o"
+):
+ tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
+ results = []
+ for index, start in enumerate(
+ range(0, len(tokens), max_token_size - overlap_token_size)
+ ):
+ chunk_content = decode_tokens_by_tiktoken(
+ tokens[start : start + max_token_size], model_name=tiktoken_model
+ )
+ results.append(
+ {
+ "tokens": min(max_token_size, len(tokens) - start),
+ "content": chunk_content.strip(),
+ "chunk_order_index": index,
+ }
+ )
+ return results
+
+async def _handle_entity_relation_summary(
+ entity_or_relation_name: str,
+ description: str,
+ global_config: dict,
+) -> str:
+ use_llm_func: callable = global_config["llm_model_func"]
+ llm_max_tokens = global_config["llm_model_max_token_size"]
+ tiktoken_model_name = global_config["tiktoken_model_name"]
+ summary_max_tokens = global_config["entity_summary_to_max_tokens"]
+
+ tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
+ if len(tokens) < summary_max_tokens: # No need for summary
+ return description
+ prompt_template = PROMPTS["summarize_entity_descriptions"]
+ use_description = decode_tokens_by_tiktoken(
+ tokens[:llm_max_tokens], model_name=tiktoken_model_name
+ )
+ context_base = dict(
+ entity_name=entity_or_relation_name,
+ description_list=use_description.split(GRAPH_FIELD_SEP),
+ )
+ use_prompt = prompt_template.format(**context_base)
+ logger.debug(f"Trigger summary: {entity_or_relation_name}")
+ summary = await use_llm_func(use_prompt, max_tokens=summary_max_tokens)
+ return summary
+
+
+async def _handle_single_entity_extraction(
+ record_attributes: list[str],
+ chunk_key: str,
+):
+ if record_attributes[0] != '"entity"' or len(record_attributes) < 4:
+ return None
+ # add this record as a node in the G
+ entity_name = clean_str(record_attributes[1].upper())
+ if not entity_name.strip():
+ return None
+ entity_type = clean_str(record_attributes[2].upper())
+ entity_description = clean_str(record_attributes[3])
+ entity_source_id = chunk_key
+ return dict(
+ entity_name=entity_name,
+ entity_type=entity_type,
+ description=entity_description,
+ source_id=entity_source_id,
+ )
+
+
+async def _handle_single_relationship_extraction(
+ record_attributes: list[str],
+ chunk_key: str,
+):
+ if record_attributes[0] != '"relationship"' or len(record_attributes) < 5:
+ return None
+ # add this record as edge
+ source = clean_str(record_attributes[1].upper())
+ target = clean_str(record_attributes[2].upper())
+ edge_description = clean_str(record_attributes[3])
+
+ edge_keywords = clean_str(record_attributes[4])
+ edge_source_id = chunk_key
+ weight = (
+ float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
+ )
+ return dict(
+ src_id=source,
+ tgt_id=target,
+ weight=weight,
+ description=edge_description,
+ keywords=edge_keywords,
+ source_id=edge_source_id,
+ )
+
+
+async def _merge_nodes_then_upsert(
+ entity_name: str,
+ nodes_data: list[dict],
+ knwoledge_graph_inst: BaseGraphStorage,
+ global_config: dict,
+):
+ already_entitiy_types = []
+ already_source_ids = []
+ already_description = []
+
+ already_node = await knwoledge_graph_inst.get_node(entity_name)
+ if already_node is not None:
+ already_entitiy_types.append(already_node["entity_type"])
+ already_source_ids.extend(
+ split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
+ )
+ already_description.append(already_node["description"])
+
+ entity_type = sorted(
+ Counter(
+ [dp["entity_type"] for dp in nodes_data] + already_entitiy_types
+ ).items(),
+ key=lambda x: x[1],
+ reverse=True,
+ )[0][0]
+ description = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["description"] for dp in nodes_data] + already_description))
+ )
+ source_id = GRAPH_FIELD_SEP.join(
+ set([dp["source_id"] for dp in nodes_data] + already_source_ids)
+ )
+ description = await _handle_entity_relation_summary(
+ entity_name, description, global_config
+ )
+ node_data = dict(
+ entity_type=entity_type,
+ description=description,
+ source_id=source_id,
+ )
+ await knwoledge_graph_inst.upsert_node(
+ entity_name,
+ node_data=node_data,
+ )
+ node_data["entity_name"] = entity_name
+ return node_data
+
+
+async def _merge_edges_then_upsert(
+ src_id: str,
+ tgt_id: str,
+ edges_data: list[dict],
+ knwoledge_graph_inst: BaseGraphStorage,
+ global_config: dict,
+):
+ already_weights = []
+ already_source_ids = []
+ already_description = []
+ already_keywords = []
+
+ if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
+ already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id)
+ already_weights.append(already_edge["weight"])
+ already_source_ids.extend(
+ split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
+ )
+ already_description.append(already_edge["description"])
+ already_keywords.extend(
+ split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
+ )
+
+ weight = sum([dp["weight"] for dp in edges_data] + already_weights)
+ description = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["description"] for dp in edges_data] + already_description))
+ )
+ keywords = GRAPH_FIELD_SEP.join(
+ sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
+ )
+ source_id = GRAPH_FIELD_SEP.join(
+ set([dp["source_id"] for dp in edges_data] + already_source_ids)
+ )
+ for need_insert_id in [src_id, tgt_id]:
+ if not (await knwoledge_graph_inst.has_node(need_insert_id)):
+ await knwoledge_graph_inst.upsert_node(
+ need_insert_id,
+ node_data={
+ "source_id": source_id,
+ "description": description,
+ "entity_type": '"UNKNOWN"',
+ },
+ )
+ description = await _handle_entity_relation_summary(
+ (src_id, tgt_id), description, global_config
+ )
+ await knwoledge_graph_inst.upsert_edge(
+ src_id,
+ tgt_id,
+ edge_data=dict(
+ weight=weight,
+ description=description,
+ keywords=keywords,
+ source_id=source_id,
+ ),
+ )
+
+ edge_data = dict(
+ src_id=src_id,
+ tgt_id=tgt_id,
+ description=description,
+ keywords=keywords,
+ )
+
+ return edge_data
+
+async def extract_entities(
+ chunks: dict[str, TextChunkSchema],
+ knwoledge_graph_inst: BaseGraphStorage,
+ entity_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ global_config: dict,
+) -> Union[BaseGraphStorage, None]:
+ use_llm_func: callable = global_config["llm_model_func"]
+ entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
+
+ ordered_chunks = list(chunks.items())
+
+ entity_extract_prompt = PROMPTS["entity_extraction"]
+ context_base = dict(
+ tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
+ record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
+ completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
+ entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
+ )
+ continue_prompt = PROMPTS["entiti_continue_extraction"]
+ if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
+
+ already_processed = 0
+ already_entities = 0
+ already_relations = 0
+
+ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
+ nonlocal already_processed, already_entities, already_relations
+ chunk_key = chunk_key_dp[0]
+ chunk_dp = chunk_key_dp[1]
+ content = chunk_dp["content"]
+ hint_prompt = entity_extract_prompt.format(**context_base, input_text=content)
+ final_result = await use_llm_func(hint_prompt)
+
+ history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
+ for now_glean_index in range(entity_extract_max_gleaning):
+ glean_result = await use_llm_func(continue_prompt, history_messages=history)
+
+ history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
+ final_result += glean_result
+ if now_glean_index == entity_extract_max_gleaning - 1:
+ break
+
+ if_loop_result: str = await use_llm_func(
+ if_loop_prompt, history_messages=history
+ )
+ if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
+ if if_loop_result != "yes":
+ break
+
+ records = split_string_by_multi_markers(
+ final_result,
+ [context_base["record_delimiter"], context_base["completion_delimiter"]],
+ )
+
+ maybe_nodes = defaultdict(list)
+ maybe_edges = defaultdict(list)
+ for record in records:
+ record = re.search(r"\((.*)\)", record)
+ if record is None:
+ continue
+ record = record.group(1)
+ record_attributes = split_string_by_multi_markers(
+ record, [context_base["tuple_delimiter"]]
+ )
+ if_entities = await _handle_single_entity_extraction(
+ record_attributes, chunk_key
+ )
+ if if_entities is not None:
+ maybe_nodes[if_entities["entity_name"]].append(if_entities)
+ continue
+
+ if_relation = await _handle_single_relationship_extraction(
+ record_attributes, chunk_key
+ )
+ if if_relation is not None:
+ maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
+ if_relation
+ )
+ already_processed += 1
+ already_entities += len(maybe_nodes)
+ already_relations += len(maybe_edges)
+ now_ticks = PROMPTS["process_tickers"][
+ already_processed % len(PROMPTS["process_tickers"])
+ ]
+ print(
+ f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
+ end="",
+ flush=True,
+ )
+ return dict(maybe_nodes), dict(maybe_edges)
+
+ # use_llm_func is wrapped in ascynio.Semaphore, limiting max_async callings
+ results = await asyncio.gather(
+ *[_process_single_content(c) for c in ordered_chunks]
+ )
+ print() # clear the progress bar
+ maybe_nodes = defaultdict(list)
+ maybe_edges = defaultdict(list)
+ for m_nodes, m_edges in results:
+ for k, v in m_nodes.items():
+ maybe_nodes[k].extend(v)
+ for k, v in m_edges.items():
+ maybe_edges[tuple(sorted(k))].extend(v)
+ all_entities_data = await asyncio.gather(
+ *[
+ _merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config)
+ for k, v in maybe_nodes.items()
+ ]
+ )
+ all_relationships_data = await asyncio.gather(
+ *[
+ _merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config)
+ for k, v in maybe_edges.items()
+ ]
+ )
+ if not len(all_entities_data):
+ logger.warning("Didn't extract any entities, maybe your LLM is not working")
+ return None
+ if not len(all_relationships_data):
+ logger.warning("Didn't extract any relationships, maybe your LLM is not working")
+ return None
+
+ if entity_vdb is not None:
+ data_for_vdb = {
+ compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
+ "content": dp["entity_name"] + dp["description"],
+ "entity_name": dp["entity_name"],
+ }
+ for dp in all_entities_data
+ }
+ await entity_vdb.upsert(data_for_vdb)
+
+ if relationships_vdb is not None:
+ data_for_vdb = {
+ compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
+ "src_id": dp["src_id"],
+ "tgt_id": dp["tgt_id"],
+ "content": dp["keywords"] + dp["src_id"] + dp["tgt_id"] + dp["description"],
+ }
+ for dp in all_relationships_data
+ }
+ await relationships_vdb.upsert(data_for_vdb)
+
+ return knwoledge_graph_inst
+
+async def local_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ keywords = keywords_data.get("low_level_keywords", [])
+ keywords = ', '.join(keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ context = await _build_local_query_context(
+ keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ text_chunks_db,
+ query_param,
+ )
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+async def _build_local_query_context(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+):
+ results = await entities_vdb.query(query, top_k=query_param.top_k)
+ if not len(results):
+ return None
+ node_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
+ )
+ if not all([n is not None for n in node_datas]):
+ logger.warning("Some nodes are missing, maybe the storage is damaged")
+ node_degrees = await asyncio.gather(
+ *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
+ )
+ node_datas = [
+ {**n, "entity_name": k["entity_name"], "rank": d}
+ for k, n, d in zip(results, node_datas, node_degrees)
+ if n is not None
+ ]
+ use_text_units = await _find_most_related_text_unit_from_entities(
+ node_datas, query_param, text_chunks_db, knowledge_graph_inst
+ )
+ use_relations = await _find_most_related_edges_from_entities(
+ node_datas, query_param, knowledge_graph_inst
+ )
+ logger.info(
+ f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
+ )
+ entites_section_list = [["id", "entity", "type", "description", "rank"]]
+ for i, n in enumerate(node_datas):
+ entites_section_list.append(
+ [
+ i,
+ n["entity_name"],
+ n.get("entity_type", "UNKNOWN"),
+ n.get("description", "UNKNOWN"),
+ n["rank"],
+ ]
+ )
+ entities_context = list_of_list_to_csv(entites_section_list)
+
+ relations_section_list = [
+ ["id", "source", "target", "description", "keywords", "weight", "rank"]
+ ]
+ for i, e in enumerate(use_relations):
+ relations_section_list.append(
+ [
+ i,
+ e["src_tgt"][0],
+ e["src_tgt"][1],
+ e["description"],
+ e["keywords"],
+ e["weight"],
+ e["rank"],
+ ]
+ )
+ relations_context = list_of_list_to_csv(relations_section_list)
+
+ text_units_section_list = [["id", "content"]]
+ for i, t in enumerate(use_text_units):
+ text_units_section_list.append([i, t["content"]])
+ text_units_context = list_of_list_to_csv(text_units_section_list)
+ return f"""
+-----Entities-----
+```csv
+{entities_context}
+```
+-----Relationships-----
+```csv
+{relations_context}
+```
+-----Sources-----
+```csv
+{text_units_context}
+```
+"""
+
+async def _find_most_related_text_unit_from_entities(
+ node_datas: list[dict],
+ query_param: QueryParam,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ text_units = [
+ split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+ for dp in node_datas
+ ]
+ edges = await asyncio.gather(
+ *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
+ )
+ all_one_hop_nodes = set()
+ for this_edges in edges:
+ if not this_edges:
+ continue
+ all_one_hop_nodes.update([e[1] for e in this_edges])
+ all_one_hop_nodes = list(all_one_hop_nodes)
+ all_one_hop_nodes_data = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
+ )
+ all_one_hop_text_units_lookup = {
+ k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
+ for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
+ if v is not None
+ }
+ all_text_units_lookup = {}
+ for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
+ for c_id in this_text_units:
+ if c_id in all_text_units_lookup:
+ continue
+ relation_counts = 0
+ for e in this_edges:
+ if (
+ e[1] in all_one_hop_text_units_lookup
+ and c_id in all_one_hop_text_units_lookup[e[1]]
+ ):
+ relation_counts += 1
+ all_text_units_lookup[c_id] = {
+ "data": await text_chunks_db.get_by_id(c_id),
+ "order": index,
+ "relation_counts": relation_counts,
+ }
+ if any([v is None for v in all_text_units_lookup.values()]):
+ logger.warning("Text chunks are missing, maybe the storage is damaged")
+ all_text_units = [
+ {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
+ ]
+ all_text_units = sorted(
+ all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
+ )
+ all_text_units = truncate_list_by_token_size(
+ all_text_units,
+ key=lambda x: x["data"]["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
+ return all_text_units
+
+async def _find_most_related_edges_from_entities(
+ node_datas: list[dict],
+ query_param: QueryParam,
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ all_related_edges = await asyncio.gather(
+ *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
+ )
+ all_edges = set()
+ for this_edges in all_related_edges:
+ all_edges.update([tuple(sorted(e)) for e in this_edges])
+ all_edges = list(all_edges)
+ all_edges_pack = await asyncio.gather(
+ *[knowledge_graph_inst.get_edge(e[0], e[1]) for e in all_edges]
+ )
+ all_edges_degree = await asyncio.gather(
+ *[knowledge_graph_inst.edge_degree(e[0], e[1]) for e in all_edges]
+ )
+ all_edges_data = [
+ {"src_tgt": k, "rank": d, **v}
+ for k, v, d in zip(all_edges, all_edges_pack, all_edges_degree)
+ if v is not None
+ ]
+ all_edges_data = sorted(
+ all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
+ )
+ all_edges_data = truncate_list_by_token_size(
+ all_edges_data,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_global_context,
+ )
+ return all_edges_data
+
+async def global_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ keywords = keywords_data.get("high_level_keywords", [])
+ keywords = ', '.join(keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ context = await _build_global_query_context(
+ keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ relationships_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+async def _build_global_query_context(
+ keywords,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+):
+ results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
+
+ if not len(results):
+ return None
+
+ edge_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
+ )
+
+ if not all([n is not None for n in edge_datas]):
+ logger.warning("Some edges are missing, maybe the storage is damaged")
+ edge_degree = await asyncio.gather(
+ *[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results]
+ )
+ edge_datas = [
+ {"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v}
+ for k, v, d in zip(results, edge_datas, edge_degree)
+ if v is not None
+ ]
+ edge_datas = sorted(
+ edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
+ )
+ edge_datas = truncate_list_by_token_size(
+ edge_datas,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_global_context,
+ )
+
+ use_entities = await _find_most_related_entities_from_relationships(
+ edge_datas, query_param, knowledge_graph_inst
+ )
+ use_text_units = await _find_related_text_unit_from_relationships(
+ edge_datas, query_param, text_chunks_db, knowledge_graph_inst
+ )
+ logger.info(
+ f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
+ )
+ relations_section_list = [
+ ["id", "source", "target", "description", "keywords", "weight", "rank"]
+ ]
+ for i, e in enumerate(edge_datas):
+ relations_section_list.append(
+ [
+ i,
+ e["src_id"],
+ e["tgt_id"],
+ e["description"],
+ e["keywords"],
+ e["weight"],
+ e["rank"],
+ ]
+ )
+ relations_context = list_of_list_to_csv(relations_section_list)
+
+ entites_section_list = [["id", "entity", "type", "description", "rank"]]
+ for i, n in enumerate(use_entities):
+ entites_section_list.append(
+ [
+ i,
+ n["entity_name"],
+ n.get("entity_type", "UNKNOWN"),
+ n.get("description", "UNKNOWN"),
+ n["rank"],
+ ]
+ )
+ entities_context = list_of_list_to_csv(entites_section_list)
+
+ text_units_section_list = [["id", "content"]]
+ for i, t in enumerate(use_text_units):
+ text_units_section_list.append([i, t["content"]])
+ text_units_context = list_of_list_to_csv(text_units_section_list)
+
+ return f"""
+-----Entities-----
+```csv
+{entities_context}
+```
+-----Relationships-----
+```csv
+{relations_context}
+```
+-----Sources-----
+```csv
+{text_units_context}
+```
+"""
+
+async def _find_most_related_entities_from_relationships(
+ edge_datas: list[dict],
+ query_param: QueryParam,
+ knowledge_graph_inst: BaseGraphStorage,
+):
+ entity_names = set()
+ for e in edge_datas:
+ entity_names.add(e["src_id"])
+ entity_names.add(e["tgt_id"])
+
+ node_datas = await asyncio.gather(
+ *[knowledge_graph_inst.get_node(entity_name) for entity_name in entity_names]
+ )
+
+ node_degrees = await asyncio.gather(
+ *[knowledge_graph_inst.node_degree(entity_name) for entity_name in entity_names]
+ )
+ node_datas = [
+ {**n, "entity_name": k, "rank": d}
+ for k, n, d in zip(entity_names, node_datas, node_degrees)
+ ]
+
+ node_datas = truncate_list_by_token_size(
+ node_datas,
+ key=lambda x: x["description"],
+ max_token_size=query_param.max_token_for_local_context,
+ )
+
+ return node_datas
+
+async def _find_related_text_unit_from_relationships(
+ edge_datas: list[dict],
+ query_param: QueryParam,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ knowledge_graph_inst: BaseGraphStorage,
+):
+
+ text_units = [
+ split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+ for dp in edge_datas
+ ]
+
+ all_text_units_lookup = {}
+
+ for index, unit_list in enumerate(text_units):
+ for c_id in unit_list:
+ if c_id not in all_text_units_lookup:
+ all_text_units_lookup[c_id] = {
+ "data": await text_chunks_db.get_by_id(c_id),
+ "order": index,
+ }
+
+ if any([v is None for v in all_text_units_lookup.values()]):
+ logger.warning("Text chunks are missing, maybe the storage is damaged")
+ all_text_units = [
+ {"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
+ ]
+ all_text_units = sorted(
+ all_text_units, key=lambda x: x["order"]
+ )
+ all_text_units = truncate_list_by_token_size(
+ all_text_units,
+ key=lambda x: x["data"]["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
+
+ return all_text_units
+
+async def hybird_query(
+ query,
+ knowledge_graph_inst: BaseGraphStorage,
+ entities_vdb: BaseVectorStorage,
+ relationships_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+) -> str:
+ use_model_func = global_config["llm_model_func"]
+
+ kw_prompt_temp = PROMPTS["keywords_extraction"]
+ kw_prompt = kw_prompt_temp.format(query=query)
+ result = await use_model_func(kw_prompt)
+
+ try:
+ keywords_data = json.loads(result)
+ hl_keywords = keywords_data.get("high_level_keywords", [])
+ ll_keywords = keywords_data.get("low_level_keywords", [])
+ hl_keywords = ', '.join(hl_keywords)
+ ll_keywords = ', '.join(ll_keywords)
+ except json.JSONDecodeError as e:
+ # Handle parsing error
+ print(f"JSON parsing error: {e}")
+ return PROMPTS["fail_response"]
+
+ low_level_context = await _build_local_query_context(
+ ll_keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ high_level_context = await _build_global_query_context(
+ hl_keywords,
+ knowledge_graph_inst,
+ entities_vdb,
+ relationships_vdb,
+ text_chunks_db,
+ query_param,
+ )
+
+ context = combine_contexts(high_level_context, low_level_context)
+
+ if query_param.only_need_context:
+ return context
+ if context is None:
+ return PROMPTS["fail_response"]
+
+ sys_prompt_temp = PROMPTS["rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ context_data=context, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
+def combine_contexts(high_level_context, low_level_context):
+ # Function to extract entities, relationships, and sources from context strings
+ def extract_sections(context):
+ entities_match = re.search(r'-----Entities-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+ relationships_match = re.search(r'-----Relationships-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+ sources_match = re.search(r'-----Sources-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
+
+ entities = entities_match.group(1) if entities_match else ''
+ relationships = relationships_match.group(1) if relationships_match else ''
+ sources = sources_match.group(1) if sources_match else ''
+
+ return entities, relationships, sources
+
+ # Extract sections from both contexts
+ hl_entities, hl_relationships, hl_sources = extract_sections(high_level_context)
+ ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
+
+ # Combine and deduplicate the entities
+ combined_entities_set = set(filter(None, hl_entities.strip().split('\n') + ll_entities.strip().split('\n')))
+ combined_entities = '\n'.join(combined_entities_set)
+
+ # Combine and deduplicate the relationships
+ combined_relationships_set = set(filter(None, hl_relationships.strip().split('\n') + ll_relationships.strip().split('\n')))
+ combined_relationships = '\n'.join(combined_relationships_set)
+
+ # Combine and deduplicate the sources
+ combined_sources_set = set(filter(None, hl_sources.strip().split('\n') + ll_sources.strip().split('\n')))
+ combined_sources = '\n'.join(combined_sources_set)
+
+ # Format the combined context
+ return f"""
+-----Entities-----
+```csv
+{combined_entities}
+-----Relationships-----
+{combined_relationships}
+-----Sources-----
+{combined_sources}
+"""
+
+async def naive_query(
+ query,
+ chunks_vdb: BaseVectorStorage,
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
+ query_param: QueryParam,
+ global_config: dict,
+):
+ use_model_func = global_config["llm_model_func"]
+ results = await chunks_vdb.query(query, top_k=query_param.top_k)
+ if not len(results):
+ return PROMPTS["fail_response"]
+ chunks_ids = [r["id"] for r in results]
+ chunks = await text_chunks_db.get_by_ids(chunks_ids)
+
+ maybe_trun_chunks = truncate_list_by_token_size(
+ chunks,
+ key=lambda x: x["content"],
+ max_token_size=query_param.max_token_for_text_unit,
+ )
+ logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
+ section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
+ if query_param.only_need_context:
+ return section
+ sys_prompt_temp = PROMPTS["naive_rag_response"]
+ sys_prompt = sys_prompt_temp.format(
+ content_data=section, response_type=query_param.response_type
+ )
+ response = await use_model_func(
+ query,
+ system_prompt=sys_prompt,
+ )
+ return response
+
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
new file mode 100644
index 00000000..5d28e49c
--- /dev/null
+++ b/lightrag/prompt.py
@@ -0,0 +1,256 @@
+GRAPH_FIELD_SEP = ""
+
+PROMPTS = {}
+
+PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
+PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
+PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
+PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+
+PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
+
+PROMPTS[
+ "entity_extraction"
+] = """-Goal-
+Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
+
+-Steps-
+1. Identify all entities. For each identified entity, extract the following information:
+- entity_name: Name of the entity, capitalized
+- entity_type: One of the following types: [{entity_types}]
+- entity_description: Comprehensive description of the entity's attributes and activities
+Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}
+
+2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
+For each pair of related entities, extract the following information:
+- source_entity: name of the source entity, as identified in step 1
+- target_entity: name of the target entity, as identified in step 1
+- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
+- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
+- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
+Format each relationship as ("relationship"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter}{tuple_delimiter})
+
+3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
+Format the content-level key words as ("content_keywords"{tuple_delimiter})
+
+4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
+
+5. When finished, output {completion_delimiter}
+
+######################
+-Examples-
+######################
+Example 1:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
+
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
+
+The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
+
+It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
+################
+Output:
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
+("entity"{tuple_delimiter}"Jordan"{tuple_delimiter}"person"{tuple_delimiter}"Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device."){record_delimiter}
+("entity"{tuple_delimiter}"Cruz"{tuple_delimiter}"person"{tuple_delimiter}"Cruz is associated with a vision of control and order, influencing the dynamics among other characters."){record_delimiter}
+("entity"{tuple_delimiter}"The Device"{tuple_delimiter}"technology"{tuple_delimiter}"The Device is central to the story, with potential game-changing implications, and is revered by Taylor."){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Taylor"{tuple_delimiter}"Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device."{tuple_delimiter}"power dynamics, perspective shift"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Jordan"{tuple_delimiter}"Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision."{tuple_delimiter}"shared goals, rebellion"{tuple_delimiter}6){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"Jordan"{tuple_delimiter}"Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce."{tuple_delimiter}"conflict resolution, mutual respect"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Jordan"{tuple_delimiter}"Cruz"{tuple_delimiter}"Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order."{tuple_delimiter}"ideological conflict, rebellion"{tuple_delimiter}5){record_delimiter}
+("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
+("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
+#############################
+Example 2:
+
+Entity_types: [person, technology, mission, organization, location]
+Text:
+They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
+
+Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
+
+Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
+#############
+Output:
+("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
+("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
+("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
+("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
+#############################
+Example 3:
+
+Entity_types: [person, role, technology, organization, event, location, concept]
+Text:
+their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
+
+"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
+
+Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
+
+Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
+
+The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
+#############
+Output:
+("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
+("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
+("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
+("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
+("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
+("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
+("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
+("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
+#############################
+-Real Data-
+######################
+Entity_types: {entity_types}
+Text: {input_text}
+######################
+Output:
+"""
+
+PROMPTS[
+ "summarize_entity_descriptions"
+] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
+Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
+Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+Make sure it is written in third person, and include the entity names so we the have full context.
+
+#######
+-Data-
+Entities: {entity_name}
+Description List: {description_list}
+#######
+Output:
+"""
+
+PROMPTS[
+ "entiti_continue_extraction"
+] = """MANY entities were missed in the last extraction. Add them below using the same format:
+"""
+
+PROMPTS[
+ "entiti_if_loop_extraction"
+] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
+"""
+
+PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
+
+PROMPTS[
+ "rag_response"
+] = """---Role---
+
+You are a helpful assistant responding to questions about data in the tables provided.
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+
+---Target response length and format---
+
+{response_type}
+
+
+---Data tables---
+
+{context_data}
+
+
+---Goal---
+
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+
+If you don't know the answer, just say so. Do not make anything up.
+
+Do not include information where the supporting evidence for it is not provided.
+
+
+---Target response length and format---
+
+{response_type}
+
+Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
+"""
+
+PROMPTS["keywords_extraction"] = """---Role---
+
+You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
+
+---Goal---
+
+Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
+
+---Instructions---
+
+- Output the keywords in JSON format.
+- The JSON should have two keys:
+ - "high_level_keywords" for overarching concepts or themes.
+ - "low_level_keywords" for specific entities or details.
+
+######################
+-Examples-
+######################
+Example 1:
+
+Query: "How does international trade influence global economic stability?"
+################
+Output:
+{{
+ "high_level_keywords": ["International trade", "Global economic stability", "Economic impact"],
+ "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
+}}
+#############################
+Example 2:
+
+Query: "What are the environmental consequences of deforestation on biodiversity?"
+################
+Output:
+{{
+ "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
+ "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
+}}
+#############################
+Example 3:
+
+Query: "What is the role of education in reducing poverty?"
+################
+Output:
+{{
+ "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
+ "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
+}}
+#############################
+-Real Data-
+######################
+Query: {query}
+######################
+Output:
+
+"""
+
+PROMPTS[
+ "naive_rag_response"
+] = """You're a helpful assistant
+Below are the knowledge you know:
+{content_data}
+---
+If you don't know the answer or if the provided knowledge do not contain sufficient information to provide an answer, just say so. Do not make anything up.
+Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
+If you don't know the answer, just say so. Do not make anything up.
+Do not include information where the supporting evidence for it is not provided.
+---Target response length and format---
+{response_type}
+"""
diff --git a/lightrag/storage.py b/lightrag/storage.py
new file mode 100644
index 00000000..2f2bb7d8
--- /dev/null
+++ b/lightrag/storage.py
@@ -0,0 +1,246 @@
+import asyncio
+import html
+import json
+import os
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any, Union, cast
+import pickle
+import hnswlib
+import networkx as nx
+import numpy as np
+from nano_vectordb import NanoVectorDB
+import xxhash
+
+from .utils import load_json, logger, write_json
+from .base import (
+ BaseGraphStorage,
+ BaseKVStorage,
+ BaseVectorStorage,
+)
+
+@dataclass
+class JsonKVStorage(BaseKVStorage):
+ def __post_init__(self):
+ working_dir = self.global_config["working_dir"]
+ self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
+ self._data = load_json(self._file_name) or {}
+ logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
+
+ async def all_keys(self) -> list[str]:
+ return list(self._data.keys())
+
+ async def index_done_callback(self):
+ write_json(self._data, self._file_name)
+
+ async def get_by_id(self, id):
+ return self._data.get(id, None)
+
+ async def get_by_ids(self, ids, fields=None):
+ if fields is None:
+ return [self._data.get(id, None) for id in ids]
+ return [
+ (
+ {k: v for k, v in self._data[id].items() if k in fields}
+ if self._data.get(id, None)
+ else None
+ )
+ for id in ids
+ ]
+
+ async def filter_keys(self, data: list[str]) -> set[str]:
+ return set([s for s in data if s not in self._data])
+
+ async def upsert(self, data: dict[str, dict]):
+ left_data = {k: v for k, v in data.items() if k not in self._data}
+ self._data.update(left_data)
+ return left_data
+
+ async def drop(self):
+ self._data = {}
+
+@dataclass
+class NanoVectorDBStorage(BaseVectorStorage):
+ cosine_better_than_threshold: float = 0.2
+
+ def __post_init__(self):
+
+ self._client_file_name = os.path.join(
+ self.global_config["working_dir"], f"vdb_{self.namespace}.json"
+ )
+ self._max_batch_size = self.global_config["embedding_batch_num"]
+ self._client = NanoVectorDB(
+ self.embedding_func.embedding_dim, storage_file=self._client_file_name
+ )
+ self.cosine_better_than_threshold = self.global_config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
+
+ async def upsert(self, data: dict[str, dict]):
+ logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
+ if not len(data):
+ logger.warning("You insert an empty data to vector DB")
+ return []
+ list_data = [
+ {
+ "__id__": k,
+ **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
+ }
+ for k, v in data.items()
+ ]
+ contents = [v["content"] for v in data.values()]
+ batches = [
+ contents[i : i + self._max_batch_size]
+ for i in range(0, len(contents), self._max_batch_size)
+ ]
+ embeddings_list = await asyncio.gather(
+ *[self.embedding_func(batch) for batch in batches]
+ )
+ embeddings = np.concatenate(embeddings_list)
+ for i, d in enumerate(list_data):
+ d["__vector__"] = embeddings[i]
+ results = self._client.upsert(datas=list_data)
+ return results
+
+ async def query(self, query: str, top_k=5):
+ embedding = await self.embedding_func([query])
+ embedding = embedding[0]
+ results = self._client.query(
+ query=embedding,
+ top_k=top_k,
+ better_than_threshold=self.cosine_better_than_threshold,
+ )
+ results = [
+ {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
+ ]
+ return results
+
+ async def index_done_callback(self):
+ self._client.save()
+
+@dataclass
+class NetworkXStorage(BaseGraphStorage):
+ @staticmethod
+ def load_nx_graph(file_name) -> nx.Graph:
+ if os.path.exists(file_name):
+ return nx.read_graphml(file_name)
+ return None
+
+ @staticmethod
+ def write_nx_graph(graph: nx.Graph, file_name):
+ logger.info(
+ f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
+ )
+ nx.write_graphml(graph, file_name)
+
+ @staticmethod
+ def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
+ """
+ from graspologic.utils import largest_connected_component
+
+ graph = graph.copy()
+ graph = cast(nx.Graph, largest_connected_component(graph))
+ node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
+ graph = nx.relabel_nodes(graph, node_mapping)
+ return NetworkXStorage._stabilize_graph(graph)
+
+ @staticmethod
+ def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
+ """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
+ Ensure an undirected graph with the same relationships will always be read the same way.
+ """
+ fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
+
+ sorted_nodes = graph.nodes(data=True)
+ sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
+
+ fixed_graph.add_nodes_from(sorted_nodes)
+ edges = list(graph.edges(data=True))
+
+ if not graph.is_directed():
+
+ def _sort_source_target(edge):
+ source, target, edge_data = edge
+ if source > target:
+ temp = source
+ source = target
+ target = temp
+ return source, target, edge_data
+
+ edges = [_sort_source_target(edge) for edge in edges]
+
+ def _get_edge_key(source: Any, target: Any) -> str:
+ return f"{source} -> {target}"
+
+ edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
+
+ fixed_graph.add_edges_from(edges)
+ return fixed_graph
+
+ def __post_init__(self):
+ self._graphml_xml_file = os.path.join(
+ self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
+ )
+ preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
+ if preloaded_graph is not None:
+ logger.info(
+ f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
+ )
+ self._graph = preloaded_graph or nx.Graph()
+ self._node_embed_algorithms = {
+ "node2vec": self._node2vec_embed,
+ }
+
+ async def index_done_callback(self):
+ NetworkXStorage.write_nx_graph(self._graph, self._graphml_xml_file)
+
+ async def has_node(self, node_id: str) -> bool:
+ return self._graph.has_node(node_id)
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ return self._graph.has_edge(source_node_id, target_node_id)
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ return self._graph.nodes.get(node_id)
+
+ async def node_degree(self, node_id: str) -> int:
+ return self._graph.degree(node_id)
+
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ return self._graph.degree(src_id) + self._graph.degree(tgt_id)
+
+ async def get_edge(
+ self, source_node_id: str, target_node_id: str
+ ) -> Union[dict, None]:
+ return self._graph.edges.get((source_node_id, target_node_id))
+
+ async def get_node_edges(self, source_node_id: str):
+ if self._graph.has_node(source_node_id):
+ return list(self._graph.edges(source_node_id))
+ return None
+
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ self._graph.add_node(node_id, **node_data)
+
+ async def upsert_edge(
+ self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
+ ):
+ self._graph.add_edge(source_node_id, target_node_id, **edge_data)
+
+ async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
+ if algorithm not in self._node_embed_algorithms:
+ raise ValueError(f"Node embedding algorithm {algorithm} not supported")
+ return await self._node_embed_algorithms[algorithm]()
+
+ async def _node2vec_embed(self):
+ from graspologic import embed
+
+ embeddings, nodes = embed.node2vec_embed(
+ self._graph,
+ **self.global_config["node2vec_params"],
+ )
+
+ nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
+ return embeddings, nodes_ids
diff --git a/lightrag/utils.py b/lightrag/utils.py
new file mode 100644
index 00000000..c75b4270
--- /dev/null
+++ b/lightrag/utils.py
@@ -0,0 +1,165 @@
+import asyncio
+import html
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from functools import wraps
+from hashlib import md5
+from typing import Any, Union
+
+import numpy as np
+import tiktoken
+
+ENCODER = None
+
+logger = logging.getLogger("lightrag")
+
+def set_logger(log_file: str):
+ logger.setLevel(logging.DEBUG)
+
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setLevel(logging.DEBUG)
+
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ file_handler.setFormatter(formatter)
+
+ if not logger.handlers:
+ logger.addHandler(file_handler)
+
+@dataclass
+class EmbeddingFunc:
+ embedding_dim: int
+ max_token_size: int
+ func: callable
+
+ async def __call__(self, *args, **kwargs) -> np.ndarray:
+ return await self.func(*args, **kwargs)
+
+def locate_json_string_body_from_string(content: str) -> Union[str, None]:
+ """Locate the JSON string body from a string"""
+ maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
+ if maybe_json_str is not None:
+ return maybe_json_str.group(0)
+ else:
+ return None
+
+def convert_response_to_json(response: str) -> dict:
+ json_str = locate_json_string_body_from_string(response)
+ assert json_str is not None, f"Unable to parse JSON from response: {response}"
+ try:
+ data = json.loads(json_str)
+ return data
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse JSON: {json_str}")
+ raise e from None
+
+def compute_args_hash(*args):
+ return md5(str(args).encode()).hexdigest()
+
+def compute_mdhash_id(content, prefix: str = ""):
+ return prefix + md5(content.encode()).hexdigest()
+
+def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
+ """Add restriction of maximum async calling times for a async func"""
+
+ def final_decro(func):
+ """Not using async.Semaphore to aovid use nest-asyncio"""
+ __current_size = 0
+
+ @wraps(func)
+ async def wait_func(*args, **kwargs):
+ nonlocal __current_size
+ while __current_size >= max_size:
+ await asyncio.sleep(waitting_time)
+ __current_size += 1
+ result = await func(*args, **kwargs)
+ __current_size -= 1
+ return result
+
+ return wait_func
+
+ return final_decro
+
+def wrap_embedding_func_with_attrs(**kwargs):
+ """Wrap a function with attributes"""
+
+ def final_decro(func) -> EmbeddingFunc:
+ new_func = EmbeddingFunc(**kwargs, func=func)
+ return new_func
+
+ return final_decro
+
+def load_json(file_name):
+ if not os.path.exists(file_name):
+ return None
+ with open(file_name) as f:
+ return json.load(f)
+
+def write_json(json_obj, file_name):
+ with open(file_name, "w") as f:
+ json.dump(json_obj, f, indent=2, ensure_ascii=False)
+
+def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ tokens = ENCODER.encode(content)
+ return tokens
+
+
+def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
+ global ENCODER
+ if ENCODER is None:
+ ENCODER = tiktoken.encoding_for_model(model_name)
+ content = ENCODER.decode(tokens)
+ return content
+
+def pack_user_ass_to_openai_messages(*args: str):
+ roles = ["user", "assistant"]
+ return [
+ {"role": roles[i % 2], "content": content} for i, content in enumerate(args)
+ ]
+
+def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
+ """Split a string by multiple markers"""
+ if not markers:
+ return [content]
+ results = re.split("|".join(re.escape(marker) for marker in markers), content)
+ return [r.strip() for r in results if r.strip()]
+
+# Refer the utils functions of the official GraphRAG implementation:
+# https://github.com/microsoft/graphrag
+def clean_str(input: Any) -> str:
+ """Clean an input string by removing HTML escapes, control characters, and other unwanted characters."""
+ # If we get non-string input, just give it back
+ if not isinstance(input, str):
+ return input
+
+ result = html.unescape(input.strip())
+ # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
+ return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
+
+def is_float_regex(value):
+ return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
+
+def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
+ """Truncate a list of data by token size"""
+ if max_token_size <= 0:
+ return []
+ tokens = 0
+ for i, data in enumerate(list_data):
+ tokens += len(encode_string_by_tiktoken(key(data)))
+ if tokens > max_token_size:
+ return list_data[:i]
+ return list_data
+
+def list_of_list_to_csv(data: list[list]):
+ return "\n".join(
+ [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
+ )
+
+def save_data_to_file(data, file_name):
+ with open(file_name, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
From 2425ede64fff575c89c58b85bd0ca1b695320fd3 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 10 Oct 2024 15:17:03 +0800
Subject: [PATCH 011/258] update
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 42de1c1c..60b655ab 100644
--- a/README.md
+++ b/README.md
@@ -186,7 +186,7 @@ Output your evaluation in the following JSON format:
```
## Citation
-```
+```python
@article{guo2024lightrag,
title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
From bf84cf18a11006cb1767c550d84f4b393dfb32a8 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 11:22:17 +0800
Subject: [PATCH 012/258] update utils.py
---
lightrag/utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/utils.py b/lightrag/utils.py
index c75b4270..9496cf34 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -94,11 +94,11 @@ def final_decro(func) -> EmbeddingFunc:
def load_json(file_name):
if not os.path.exists(file_name):
return None
- with open(file_name) as f:
+ with open(file_name, encoding="utf-8") as f:
return json.load(f)
def write_json(json_obj, file_name):
- with open(file_name, "w") as f:
+ with open(file_name, "w", encoding="utf-8") as f:
json.dump(json_obj, f, indent=2, ensure_ascii=False)
def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
From 2d2085e6f92ac636a58e014c56df6f65319fd313 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 11:24:42 +0800
Subject: [PATCH 013/258] update utils.py
---
lightrag/utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/utils.py b/lightrag/utils.py
index c75b4270..9496cf34 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -94,11 +94,11 @@ def final_decro(func) -> EmbeddingFunc:
def load_json(file_name):
if not os.path.exists(file_name):
return None
- with open(file_name) as f:
+ with open(file_name, encoding="utf-8") as f:
return json.load(f)
def write_json(json_obj, file_name):
- with open(file_name, "w") as f:
+ with open(file_name, "w", encoding="utf-8") as f:
json.dump(json_obj, f, indent=2, ensure_ascii=False)
def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
From 2e364fb665bd2dacfa355c29b1a584deaa0bbe47 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 11:39:31 +0800
Subject: [PATCH 014/258] update __init__.py
---
lightrag/__init__.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index dc497cd4..0b279096 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG, QueryParam
-__version__ = "0.0.2"
+__version__ = "0.0.3"
__author__ = "Zirui Guo"
-__url__ = "https://github.com/HKUDS/GraphEdit"
+__url__ = "https://github.com/HKUDS/LightRAG"
From 050ff77dafb78c5ad8d68945926cc0bd2407308c Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 11:43:54 +0800
Subject: [PATCH 015/258] update README.md
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 60b655ab..9c588804 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
+
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
From 4011aaf8be69fbf7a7d8eb90708d8cfba2d71682 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 11:46:57 +0800
Subject: [PATCH 016/258] update README.md
---
README.md | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 9c588804..ee5535e6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
# LightRAG: Simple and Fast Retrieval-Augmented Generation
-![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
+
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
@@ -10,6 +11,8 @@
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
+
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
From 6713017df9e070414449a01d055b92587c2a43df Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 15:16:43 +0800
Subject: [PATCH 019/258] update reproduce
---
README.md | 114 +++++++++++++++++++++++++++++++++++++++++++-
reproduce/Step_0.py | 63 ++++++++++++++++++++++++
reproduce/Step_1.py | 32 +++++++++++++
reproduce/Step_2.py | 76 +++++++++++++++++++++++++++++
reproduce/Step_3.py | 62 ++++++++++++++++++++++++
5 files changed, 346 insertions(+), 1 deletion(-)
create mode 100644 reproduce/Step_0.py
create mode 100644 reproduce/Step_1.py
create mode 100644 reproduce/Step_2.py
create mode 100644 reproduce/Step_3.py
diff --git a/README.md b/README.md
index 693f60cf..f70b9d58 100644
--- a/README.md
+++ b/README.md
@@ -149,7 +149,6 @@ Output your evaluation in the following JSON format:
}}
```
### Overall Performance Table
-### Overall Performance Table
| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
@@ -173,6 +172,114 @@ Output your evaluation in the following JSON format:
| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
+## Reproduce
+All the code can be found in the `./reproduce` directory.
+### Step-0 Extract Unique Contexts
+First, we need to extract unique contexts in the datasets.
+```python
+def extract_unique_contexts(input_directory, output_directory):
+
+ os.makedirs(output_directory, exist_ok=True)
+
+ jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
+ print(f"Found {len(jsonl_files)} JSONL files.")
+
+ for file_path in jsonl_files:
+ filename = os.path.basename(file_path)
+ name, ext = os.path.splitext(filename)
+ output_filename = f"{name}_unique_contexts.json"
+ output_path = os.path.join(output_directory, output_filename)
+
+ unique_contexts_dict = {}
+
+ print(f"Processing file: {filename}")
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as infile:
+ for line_number, line in enumerate(infile, start=1):
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ json_obj = json.loads(line)
+ context = json_obj.get('context')
+ if context and context not in unique_contexts_dict:
+ unique_contexts_dict[context] = None
+ except json.JSONDecodeError as e:
+ print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
+ except FileNotFoundError:
+ print(f"File not found: {filename}")
+ continue
+ except Exception as e:
+ print(f"An error occurred while processing file {filename}: {e}")
+ continue
+
+ unique_contexts_list = list(unique_contexts_dict.keys())
+ print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
+
+ try:
+ with open(output_path, 'w', encoding='utf-8') as outfile:
+ json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
+ print(f"Unique `context` entries have been saved to: {output_filename}")
+ except Exception as e:
+ print(f"An error occurred while saving to the file {output_filename}: {e}")
+
+ print("All files have been processed.")
+
+```
+### Step-1 Insert Contexts
+For the extracted contexts, we insert them into the LightRAG system.
+
+```python
+def insert_text(rag, file_path):
+ with open(file_path, mode='r') as f:
+ unique_contexts = json.load(f)
+
+ retries = 0
+ max_retries = 3
+ while retries < max_retries:
+ try:
+ rag.insert(unique_contexts)
+ break
+ except Exception as e:
+ retries += 1
+ print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
+ time.sleep(10)
+ if retries == max_retries:
+ print("Insertion failed after exceeding the maximum number of retries")
+```
+### Step-2 Generate Queries
+
+We extract tokens from both the first half and the second half of each context in the dataset, then combine them to generate queries for dataset descriptions.
+```python
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+
+def get_summary(context, tot_tokens=2000):
+ tokens = tokenizer.tokenize(context)
+ half_tokens = tot_tokens // 2
+
+ start_tokens = tokens[1000:1000 + half_tokens]
+ end_tokens = tokens[-(1000 + half_tokens):1000]
+
+ summary_tokens = start_tokens + end_tokens
+ summary = tokenizer.convert_tokens_to_string(summary_tokens)
+
+ return summary
+```
+
+### Step-3 Query
+For the queries generated in Step-2, we will extract them and query LightRAG.
+```python
+def extract_queries(file_path):
+ with open(file_path, 'r') as f:
+ data = f.read()
+
+ data = data.replace('**', '')
+
+ queries = re.findall(r'- Question \d+: (.+)', data)
+
+ return queries
+```
## Code Structure
```python
@@ -191,6 +298,11 @@ Output your evaluation in the following JSON format:
│ ├── prompt.py
│ ├── storage.py
│ └── utils.jpeg
+├── reproduce
+│ ├── Step_0.py
+│ ├── Step_1.py
+│ ├── Step_2.py
+│ └── Step_3.py
├── LICENSE
├── README.md
├── requirements.txt
diff --git a/reproduce/Step_0.py b/reproduce/Step_0.py
new file mode 100644
index 00000000..9053aa40
--- /dev/null
+++ b/reproduce/Step_0.py
@@ -0,0 +1,63 @@
+import os
+import json
+import glob
+import argparse
+
+def extract_unique_contexts(input_directory, output_directory):
+
+ os.makedirs(output_directory, exist_ok=True)
+
+ jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
+ print(f"Found {len(jsonl_files)} JSONL files.")
+
+ for file_path in jsonl_files:
+ filename = os.path.basename(file_path)
+ name, ext = os.path.splitext(filename)
+ output_filename = f"{name}_unique_contexts.json"
+ output_path = os.path.join(output_directory, output_filename)
+
+ unique_contexts_dict = {}
+
+ print(f"Processing file: {filename}")
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as infile:
+ for line_number, line in enumerate(infile, start=1):
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ json_obj = json.loads(line)
+ context = json_obj.get('context')
+ if context and context not in unique_contexts_dict:
+ unique_contexts_dict[context] = None
+ except json.JSONDecodeError as e:
+ print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
+ except FileNotFoundError:
+ print(f"File not found: {filename}")
+ continue
+ except Exception as e:
+ print(f"An error occurred while processing file {filename}: {e}")
+ continue
+
+ unique_contexts_list = list(unique_contexts_dict.keys())
+ print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
+
+ try:
+ with open(output_path, 'w', encoding='utf-8') as outfile:
+ json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
+ print(f"Unique `context` entries have been saved to: {output_filename}")
+ except Exception as e:
+ print(f"An error occurred while saving to the file {output_filename}: {e}")
+
+ print("All files have been processed.")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--input_dir', type=str, default='../datasets')
+ parser.add_argument('-o', '--output_dir', type=str, default='../datasets/unique_contexts')
+
+ args = parser.parse_args()
+
+ extract_unique_contexts(args.input_dir, args.output_dir)
diff --git a/reproduce/Step_1.py b/reproduce/Step_1.py
new file mode 100644
index 00000000..08e497cb
--- /dev/null
+++ b/reproduce/Step_1.py
@@ -0,0 +1,32 @@
+import os
+import json
+import time
+
+from lightrag import LightRAG
+
+def insert_text(rag, file_path):
+ with open(file_path, mode='r') as f:
+ unique_contexts = json.load(f)
+
+ retries = 0
+ max_retries = 3
+ while retries < max_retries:
+ try:
+ rag.insert(unique_contexts)
+ break
+ except Exception as e:
+ retries += 1
+ print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
+ time.sleep(10)
+ if retries == max_retries:
+ print("Insertion failed after exceeding the maximum number of retries")
+
+cls = "agriculture"
+WORKING_DIR = "../{cls}"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(working_dir=WORKING_DIR)
+
+insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")
\ No newline at end of file
diff --git a/reproduce/Step_2.py b/reproduce/Step_2.py
new file mode 100644
index 00000000..b00c19b8
--- /dev/null
+++ b/reproduce/Step_2.py
@@ -0,0 +1,76 @@
+import os
+import json
+from openai import OpenAI
+from transformers import GPT2Tokenizer
+
+def openai_complete_if_cache(
+ model="gpt-4o", prompt=None, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ openai_client = OpenAI()
+
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+
+ response = openai_client.chat.completions.create(
+ model=model, messages=messages, **kwargs
+ )
+ return response.choices[0].message.content
+
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+
+def get_summary(context, tot_tokens=2000):
+ tokens = tokenizer.tokenize(context)
+ half_tokens = tot_tokens // 2
+
+ start_tokens = tokens[1000:1000 + half_tokens]
+ end_tokens = tokens[-(1000 + half_tokens):1000]
+
+ summary_tokens = start_tokens + end_tokens
+ summary = tokenizer.convert_tokens_to_string(summary_tokens)
+
+ return summary
+
+
+clses = ['agriculture']
+for cls in clses:
+ with open(f'../datasets/unique_contexts/{cls}_unique_contexts.json', mode='r') as f:
+ unique_contexts = json.load(f)
+
+ summaries = [get_summary(context) for context in unique_contexts]
+
+ total_description = "\n\n".join(summaries)
+
+ prompt = f"""
+ Given the following description of a dataset:
+
+ {total_description}
+
+ Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+ Output the results in the following structure:
+ - User 1: [user description]
+ - Task 1: [task description]
+ - Question 1:
+ - Question 2:
+ - Question 3:
+ - Question 4:
+ - Question 5:
+ - Task 2: [task description]
+ ...
+ - Task 5: [task description]
+ - User 2: [user description]
+ ...
+ - User 5: [user description]
+ ...
+ """
+
+ result = openai_complete_if_cache(model='gpt-4o', prompt=prompt)
+
+ file_path = f"../datasets/questions/{cls}_questions.txt"
+ with open(file_path, "w") as file:
+ file.write(result)
+
+ print(f"{cls}_questions written to {file_path}")
\ No newline at end of file
diff --git a/reproduce/Step_3.py b/reproduce/Step_3.py
new file mode 100644
index 00000000..f7f7ee30
--- /dev/null
+++ b/reproduce/Step_3.py
@@ -0,0 +1,62 @@
+import re
+import json
+import asyncio
+from lightrag import LightRAG, QueryParam
+from tqdm import tqdm
+
+def extract_queries(file_path):
+ with open(file_path, 'r') as f:
+ data = f.read()
+
+ data = data.replace('**', '')
+
+ queries = re.findall(r'- Question \d+: (.+)', data)
+
+ return queries
+
+async def process_query(query_text, rag_instance, query_param):
+ try:
+ result, context = await rag_instance.aquery(query_text, param=query_param)
+ return {"query": query_text, "result": result, "context": context}, None
+ except Exception as e:
+ return None, {"query": query_text, "error": str(e)}
+
+def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
+ try:
+ loop = asyncio.get_event_loop()
+ except RuntimeError:
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ return loop
+
+def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file, error_file):
+ loop = always_get_an_event_loop()
+
+ with open(output_file, 'a', encoding='utf-8') as result_file, open(error_file, 'a', encoding='utf-8') as err_file:
+ result_file.write("[\n")
+ first_entry = True
+
+ for query_text in tqdm(queries, desc="Processing queries", unit="query"):
+ result, error = loop.run_until_complete(process_query(query_text, rag_instance, query_param))
+
+ if result:
+ if not first_entry:
+ result_file.write(",\n")
+ json.dump(result, result_file, ensure_ascii=False, indent=4)
+ first_entry = False
+ elif error:
+ json.dump(error, err_file, ensure_ascii=False, indent=4)
+ err_file.write("\n")
+
+ result_file.write("\n]")
+
+if __name__ == "__main__":
+ cls = "agriculture"
+ mode = "hybird"
+ WORKING_DIR = "../{cls}"
+
+ rag = LightRAG(working_dir=WORKING_DIR)
+ query_param = QueryParam(mode=mode)
+
+ queries = extract_queries(f"../datasets/questions/{cls}_questions.txt")
+ run_queries_and_save_to_json(queries, rag, query_param, "result.json", "errors.json")
\ No newline at end of file
From de9ff79a29ecc0d4644a967621c50f046cec2b15 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Fri, 11 Oct 2024 15:19:20 +0800
Subject: [PATCH 020/258] update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index f70b9d58..6d837c24 100644
--- a/README.md
+++ b/README.md
@@ -250,7 +250,7 @@ def insert_text(rag, file_path):
```
### Step-2 Generate Queries
-We extract tokens from both the first half and the second half of each context in the dataset, then combine them to generate queries for dataset descriptions.
+We extract tokens from both the first half and the second half of each context in the dataset, then combine them as the dataset description to generate queries.
```python
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
From b92e8c9df4d169f6b081c724ec164a412e1ad43f Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:44:00 +0800
Subject: [PATCH 021/258] Update README.md
From 5b29ebdebcda92910187eed415dbfe28ee9e6102 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:46:51 +0800
Subject: [PATCH 022/258] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 6d837c24..ee70cffb 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,8 @@
-
-
+
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
From 837dcf53e605b018a4e752a6899f5b60cd3940b2 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:49:16 +0800
Subject: [PATCH 023/258] Update README.md
---
README.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index ee70cffb..da5f7ebf 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,10 @@
-
+
-
-
+
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
From a115b4b8ce1e18427432e80bf0f45762397ee1ac Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sat, 12 Oct 2024 18:13:33 +0800
Subject: [PATCH 024/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index da5f7ebf..5d8734e8 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
+## 🎉 News
+- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports Hugging Face models!
+
## Install
* Install from source
@@ -35,17 +38,27 @@ pip install lightrag-hku
## Quick Start
-* Set OpenAI API key in environment: `export OPENAI_API_KEY="sk-...".`
-* Download the demo text "A Christmas Carol by Charles Dickens"
+* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".`
+* Download the demo text "A Christmas Carol by Charles Dickens":
```bash
curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
```
-Use the below python snippet:
+Use the below Python snippet to initialize LightRAG and perform queries:
```python
from lightrag import LightRAG, QueryParam
+from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
-rag = LightRAG(working_dir="./dickens")
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
+ # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
+)
with open("./book.txt") as f:
rag.insert(f.read())
@@ -62,13 +75,31 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
# Perform hybrid search
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
```
-Batch Insert
+### Using Hugging Face Models
+If you want to use Hugging Face models, you only need to set LightRAG as follows:
+```python
+from lightrag.llm import hf_model_complete, hf_embedding
+from transformers import AutoModel, AutoTokenizer
+
+# Initialize LightRAG with Hugging Face model
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=hf_model_complete, # Use Hugging Face complete model for text generation
+ llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face
+ embedding_func=hf_embedding, # Use Hugging Face embedding function
+ tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+)
+```
+### Batch Insert
```python
+# Batch Insert: Insert multiple texts at once
rag.insert(["TEXT1", "TEXT2",...])
```
-Incremental Insert
+### Incremental Insert
```python
+# Incremental Insert: Insert new documents into an existing LightRAG instance
rag = LightRAG(working_dir="./dickens")
with open("./newText.txt") as f:
From a92f7bfd619816c01a68d6d89aaddbf8651075f6 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 20:06:59 +0800
Subject: [PATCH 030/258] update llm.py
---
lightrag/llm.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index bc2ac1f3..87b156c5 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -118,9 +118,9 @@ async def gpt_4o_mini_complete(
async def hf_model_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
- input_string = kwargs['hashing_kv'].global_config['llm_model_name']
+ model_name = kwargs['hashing_kv'].global_config['llm_model_name']
return await hf_model_if_cache(
- input_string,
+ model_name,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
From 2190425d95bd4623b71896b03833a177a2558952 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 21:11:12 +0800
Subject: [PATCH 031/258] fix bug
---
examples/lightrag_hf_demo.py | 11 ++++++++---
examples/lightrag_openai_demo.py | 6 +++---
lightrag/lightrag.py | 18 +++---------------
3 files changed, 14 insertions(+), 21 deletions(-)
diff --git a/examples/lightrag_hf_demo.py b/examples/lightrag_hf_demo.py
index f0e5fa99..4cd503b3 100644
--- a/examples/lightrag_hf_demo.py
+++ b/examples/lightrag_hf_demo.py
@@ -3,6 +3,7 @@
from lightrag import LightRAG, QueryParam
from lightrag.llm import hf_model_complete, hf_embedding
+from lightrag.utils import EmbeddingFunc
from transformers import AutoModel,AutoTokenizer
WORKING_DIR = "./dickens"
@@ -14,9 +15,13 @@
working_dir=WORKING_DIR,
llm_model_func=hf_model_complete,
llm_model_name='meta-llama/Llama-3.1-8B-Instruct',
- embedding_func=hf_embedding,
- tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
- embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+ embedding_func=EmbeddingFunc(
+ tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embedding_dim=384,
+ max_token_size=5000,
+ func=hf_embedding
+ ),
)
diff --git a/examples/lightrag_openai_demo.py b/examples/lightrag_openai_demo.py
index 677506c2..507b2eb2 100644
--- a/examples/lightrag_openai_demo.py
+++ b/examples/lightrag_openai_demo.py
@@ -5,15 +5,15 @@
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
from transformers import AutoModel,AutoTokenizer
-WORKING_DIR = "./dickens"
+WORKING_DIR = "/home/zrguo/code/myrag/agriculture"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=gpt_4o_complete
- # llm_model_func=gpt_4o_mini_complete
+ llm_model_func=gpt_4o_mini_complete
+ # llm_model_func=gpt_4o_complete
)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 329bfd12..0d50a13d 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -76,12 +76,8 @@ class LightRAG:
}
)
- # text embedding
- tokenizer: Any = None
- embed_model: Any = None
-
# embedding_func: EmbeddingFunc = field(default_factory=lambda:hf_embedding)
- embedding_func: EmbeddingFunc = field(default_factory=lambda:openai_embedding)#
+ embedding_func: EmbeddingFunc = field(default_factory=lambda:openai_embedding)
embedding_batch_num: int = 32
embedding_func_max_async: int = 16
@@ -103,13 +99,6 @@ class LightRAG:
convert_response_to_json_func: callable = convert_response_to_json
def __post_init__(self):
- if callable(self.embedding_func) and self.embedding_func.__name__ == 'hf_embedding':
- if self.tokenizer is None:
- self.tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
- if self.embed_model is None:
- self.embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
-
-
log_file = os.path.join(self.working_dir, "lightrag.log")
set_logger(log_file)
logger.info(f"Logger initialized for working directory: {self.working_dir}")
@@ -139,10 +128,9 @@ def __post_init__(self):
self.chunk_entity_relation_graph = self.graph_storage_cls(
namespace="chunk_entity_relation", global_config=asdict(self)
)
+
self.embedding_func = limit_async_func_call(self.embedding_func_max_async)(
- lambda texts: self.embedding_func(texts, self.tokenizer, self.embed_model)
- if callable(self.embedding_func) and self.embedding_func.__name__ == 'hf_embedding'
- else self.embedding_func(texts)
+ self.embedding_func
)
self.entities_vdb = (
From 756133512ee1898ef2e774a6bb0f2130dd2720e2 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 21:21:57 +0800
Subject: [PATCH 032/258] fix bug
---
examples/lightrag_hf_demo.py | 8 +++++---
examples/lightrag_openai_demo.py | 2 +-
lightrag/__init__.py | 2 +-
lightrag/llm.py | 5 -----
4 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/examples/lightrag_hf_demo.py b/examples/lightrag_hf_demo.py
index 4cd503b3..baf62bdb 100644
--- a/examples/lightrag_hf_demo.py
+++ b/examples/lightrag_hf_demo.py
@@ -16,11 +16,13 @@
llm_model_func=hf_model_complete,
llm_model_name='meta-llama/Llama-3.1-8B-Instruct',
embedding_func=EmbeddingFunc(
- tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
- embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
embedding_dim=384,
max_token_size=5000,
- func=hf_embedding
+ func=lambda texts: hf_embedding(
+ texts,
+ tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+ )
),
)
diff --git a/examples/lightrag_openai_demo.py b/examples/lightrag_openai_demo.py
index 507b2eb2..fb1f055c 100644
--- a/examples/lightrag_openai_demo.py
+++ b/examples/lightrag_openai_demo.py
@@ -5,7 +5,7 @@
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
from transformers import AutoModel,AutoTokenizer
-WORKING_DIR = "/home/zrguo/code/myrag/agriculture"
+WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index b3d1d4ca..dc8faa6a 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG, QueryParam
-__version__ = "0.0.4"
+__version__ = "0.0.5"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 87b156c5..bcb7e495 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -141,11 +141,6 @@ async def openai_embedding(texts: list[str]) -> np.ndarray:
return np.array([dp.embedding for dp in response.data])
-
-@wrap_embedding_func_with_attrs(
- embedding_dim=384,
- max_token_size=5000,
-)
async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
input_ids = tokenizer(texts, return_tensors='pt', padding=True, truncation=True).input_ids
with torch.no_grad():
From 7409668f9f2726c9f702a5ac9ed01e415e9b97b3 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 21:23:03 +0800
Subject: [PATCH 033/258] update README.md
---
README.md | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index cc282c27..12a422f6 100644
--- a/README.md
+++ b/README.md
@@ -86,9 +86,16 @@ rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=hf_model_complete, # Use Hugging Face complete model for text generation
llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face
- embedding_func=hf_embedding, # Use Hugging Face embedding function
- tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
- embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+ # Use Hugging Face embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=384,
+ max_token_size=5000,
+ func=lambda texts: hf_embedding(
+ texts,
+ tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+ )
+ ),
)
```
### Batch Insert
From 6389baef681477c89d653d776c7546d18c8a94b6 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 22:30:16 +0800
Subject: [PATCH 034/258] Add Star History
---
README.md | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/README.md b/README.md
index 12a422f6..f594f789 100644
--- a/README.md
+++ b/README.md
@@ -318,6 +318,17 @@ def extract_queries(file_path):
return queries
```
+
+## Star History
+
+
+
+
+
## Code Structure
```python
From c3a5c8a8ff3777c61a7bb87ae64c12a3859dad6c Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 22:31:41 +0800
Subject: [PATCH 035/258] Add Star History
---
README.md | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/README.md b/README.md
index 12a422f6..af79eec2 100644
--- a/README.md
+++ b/README.md
@@ -318,6 +318,7 @@ def extract_queries(file_path):
return queries
```
+
## Code Structure
```python
@@ -346,6 +347,17 @@ def extract_queries(file_path):
├── requirements.txt
└── setup.py
```
+
+## Star History
+
+
+
+
+
## Citation
```python
From c8a7c4fc79f4b89d2311b089e6183688783cc987 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 15 Oct 2024 22:34:02 +0800
Subject: [PATCH 036/258] Add Star history
---
README.md | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index f594f789..af79eec2 100644
--- a/README.md
+++ b/README.md
@@ -319,16 +319,6 @@ def extract_queries(file_path):
return queries
```
-## Star History
-
-
-
-
-
## Code Structure
```python
@@ -357,6 +347,17 @@ def extract_queries(file_path):
├── requirements.txt
└── setup.py
```
+
+## Star History
+
+
+
+
+
## Citation
```python
From b651a2ebaa26d3cf6763076ad776d7e9499ec138 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=9C=A8Data=20Intelligence=20Lab=40HKU=E2=9C=A8?=
<118165258+HKUDS@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:48:34 +0800
Subject: [PATCH 037/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index af79eec2..8d688cc9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# LightRAG: Simple and Fast Retrieval-Augmented Generation
+# 🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
From a1e3ca4a3352604e68f0235821faa531a42376d5 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:57:57 +0800
Subject: [PATCH 038/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 8d688cc9..39de81bc 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# 🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
+
🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
![请添加图片描述](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)
From b0ad8775f49accb8bb69ef4976856e889e88734e Mon Sep 17 00:00:00 2001
From: Sung Kim
Date: Tue, 15 Oct 2024 12:55:05 -0700
Subject: [PATCH 039/258] Added OpenAI compatible options and examples
---
examples/lightrag_openai_compatible_demo.py | 69 +++++++++++++++++++++
lightrag/llm.py | 16 +++--
2 files changed, 80 insertions(+), 5 deletions(-)
create mode 100644 examples/lightrag_openai_compatible_demo.py
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
new file mode 100644
index 00000000..75ecc118
--- /dev/null
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -0,0 +1,69 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import openai_complete_if_cache, openai_embedding
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "solar-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar",
+ **kwargs
+ )
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ return await openai_embedding(
+ texts,
+ model="solar-embedding-1-large-query",
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar"
+ )
+
+# function test
+async def test_funcs():
+ result = await llm_model_func("How are you?")
+ print("llm_model_func: ", result)
+
+ result = await embedding_func(["How are you?"])
+ print("embedding_func: ", result)
+
+asyncio.run(test_funcs())
+
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=4096,
+ max_token_size=8192,
+ func=embedding_func
+ )
+)
+
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybrid search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
diff --git a/lightrag/llm.py b/lightrag/llm.py
index bcb7e495..d2ca5344 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -19,9 +19,12 @@
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
async def openai_complete_if_cache(
- model, prompt, system_prompt=None, history_messages=[], **kwargs
+ model, prompt, system_prompt=None, history_messages=[], base_url=None, api_key=None, **kwargs
) -> str:
- openai_async_client = AsyncOpenAI()
+ if api_key:
+ os.environ["OPENAI_API_KEY"] = api_key
+
+ openai_async_client = AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
@@ -133,10 +136,13 @@ async def hf_model_complete(
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
-async def openai_embedding(texts: list[str]) -> np.ndarray:
- openai_async_client = AsyncOpenAI()
+async def openai_embedding(texts: list[str], model: str = "text-embedding-3-small", base_url: str = None, api_key: str = None) -> np.ndarray:
+ if api_key:
+ os.environ["OPENAI_API_KEY"] = api_key
+
+ openai_async_client = AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
response = await openai_async_client.embeddings.create(
- model="text-embedding-3-small", input=texts, encoding_format="float"
+ model=model, input=texts, encoding_format="float"
)
return np.array([dp.embedding for dp in response.data])
From 10d1ac48855adbcb9827e26cef6aa64972770901 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Wed, 16 Oct 2024 15:15:10 +0800
Subject: [PATCH 040/258] ollama test
---
examples/lightrag_ollama_demo.py | 40 +++++++++++++++++++++++++
lightrag/__init__.py | 2 +-
lightrag/lightrag.py | 2 +-
lightrag/llm.py | 50 ++++++++++++++++++++++++++++++--
requirements.txt | 3 ++
setup.py | 2 +-
6 files changed, 94 insertions(+), 5 deletions(-)
create mode 100644 examples/lightrag_ollama_demo.py
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
new file mode 100644
index 00000000..a2d04aa6
--- /dev/null
+++ b/examples/lightrag_ollama_demo.py
@@ -0,0 +1,40 @@
+import os
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import ollama_model_complete, ollama_embedding
+from lightrag.utils import EmbeddingFunc
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete,
+ llm_model_name='your_model_name',
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(
+ texts,
+ embed_model="nomic-embed-text"
+ )
+ ),
+)
+
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybrid search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index dc8faa6a..b6b953f1 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG, QueryParam
-__version__ = "0.0.5"
+__version__ = "0.0.6"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 0d50a13d..83312ef6 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -6,7 +6,7 @@
from typing import Type, cast, Any
from transformers import AutoModel,AutoTokenizer, AutoModelForCausalLM
-from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding,hf_model_complete,hf_embedding
+from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding, hf_model_complete, hf_embedding
from .operate import (
chunking_by_token_size,
extract_entities,
diff --git a/lightrag/llm.py b/lightrag/llm.py
index d2ca5344..7328a583 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -1,5 +1,6 @@
import os
import numpy as np
+import ollama
from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
from tenacity import (
retry,
@@ -92,6 +93,34 @@ async def hf_model_if_cache(
)
return response_text
+async def ollama_model_if_cache(
+ model, prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ kwargs.pop("max_tokens", None)
+ kwargs.pop("response_format", None)
+
+ ollama_client = ollama.AsyncClient()
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ response = await ollama_client.chat(model=model, messages=messages, **kwargs)
+
+ result = response["message"]["content"]
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert({args_hash: {"return": result, "model": model}})
+
+ return result
async def gpt_4o_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
@@ -116,8 +145,6 @@ async def gpt_4o_mini_complete(
**kwargs,
)
-
-
async def hf_model_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -130,6 +157,18 @@ async def hf_model_complete(
**kwargs,
)
+async def ollama_model_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ model_name = kwargs['hashing_kv'].global_config['llm_model_name']
+ return await ollama_model_if_cache(
+ model_name,
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(
stop=stop_after_attempt(3),
@@ -154,6 +193,13 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
+async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
+ embed_text = []
+ for text in texts:
+ data = ollama.embeddings(model=embed_model, prompt=text)
+ embed_text.append(data["embedding"])
+
+ return embed_text
if __name__ == "__main__":
import asyncio
diff --git a/requirements.txt b/requirements.txt
index 8a74d5e2..52edd151 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,6 @@ nano-vectordb
hnswlib
xxhash
tenacity
+transformers
+torch
+ollama
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 849fabfe..47222420 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
import setuptools
-with open("README.md", "r") as fh:
+with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
From 2b49f6ecf53d8dc84b277de259b6e92e21862feb Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Wed, 16 Oct 2024 15:33:59 +0800
Subject: [PATCH 041/258] update README.md
---
README.md | 33 ++++++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 39de81bc..6dedff97 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@
-
@@ -21,6 +20,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
+- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports Ollama models!
- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports Hugging Face models!
## Install
@@ -37,7 +37,7 @@ pip install lightrag-hku
```
## Quick Start
-
+* All the code can be found in the `examples`.
* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".`
* Download the demo text "A Christmas Carol by Charles Dickens":
```bash
@@ -84,7 +84,7 @@ from transformers import AutoModel, AutoTokenizer
# Initialize LightRAG with Hugging Face model
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=hf_model_complete, # Use Hugging Face complete model for text generation
+ llm_model_func=hf_model_complete, # Use Hugging Face model for text generation
llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face
# Use Hugging Face embedding function
embedding_func=EmbeddingFunc(
@@ -98,6 +98,27 @@ rag = LightRAG(
),
)
```
+### Using Ollama Models
+If you want to use Ollama models, you only need to set LightRAG as follows:
+```python
+from lightrag.llm import ollama_model_complete, ollama_embedding
+
+# Initialize LightRAG with Ollama model
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete, # Use Ollama model for text generation
+ llm_model_name='your_model_name', # Your model name
+ # Use Ollama embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(
+ texts,
+ embed_model="nomic-embed-text"
+ )
+ ),
+)
+```
### Batch Insert
```python
# Batch Insert: Insert multiple texts at once
@@ -326,8 +347,10 @@ def extract_queries(file_path):
├── examples
│ ├── batch_eval.py
│ ├── generate_query.py
-│ ├── lightrag_openai_demo.py
-│ └── lightrag_hf_demo.py
+│ ├── lightrag_hf_demo.py
+│ ├── lightrag_ollama_demo.py
+│ ├── lightrag_openai_compatible_demo.py
+│ └── lightrag_openai_demo.py
├── lightrag
│ ├── __init__.py
│ ├── base.py
From 72200af1692c7b85edbacb295af6d19bc62d2192 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Wed, 16 Oct 2024 17:37:11 +0800
Subject: [PATCH 042/258] update requirements.txt
---
requirements.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 52edd151..f7dcd787 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ xxhash
tenacity
transformers
torch
-ollama
\ No newline at end of file
+ollama
+accelerate
\ No newline at end of file
From cf869fc6803c248bd201f4031027d031caabaf32 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Wed, 16 Oct 2024 17:45:49 +0800
Subject: [PATCH 043/258] update README.md
---
README.md | 47 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 6dedff97..358115c0 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
-- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports Ollama models!
-- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports Hugging Face models!
+- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-ollama-models)!
+- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-hugging-face-models)!
## Install
@@ -75,6 +75,42 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
# Perform hybrid search
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
```
+
+### Open AI-like APIs
+LightRAG also support Open AI-like chat/embeddings APIs:
+```python
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "solar-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar",
+ **kwargs
+ )
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ return await openai_embedding(
+ texts,
+ model="solar-embedding-1-large-query",
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar"
+ )
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=4096,
+ max_token_size=8192,
+ func=embedding_func
+ )
+)
+```
+
### Using Hugging Face Models
If you want to use Hugging Face models, you only need to set LightRAG as follows:
```python
@@ -98,6 +134,7 @@ rag = LightRAG(
),
)
```
+
### Using Ollama Models
If you want to use Ollama models, you only need to set LightRAG as follows:
```python
@@ -119,11 +156,13 @@ rag = LightRAG(
),
)
```
+
### Batch Insert
```python
# Batch Insert: Insert multiple texts at once
rag.insert(["TEXT1", "TEXT2",...])
```
+
### Incremental Insert
```python
@@ -207,6 +246,7 @@ Output your evaluation in the following JSON format:
}}
}}
```
+
### Overall Performance Table
| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
@@ -233,6 +273,7 @@ Output your evaluation in the following JSON format:
## Reproduce
All the code can be found in the `./reproduce` directory.
+
### Step-0 Extract Unique Contexts
First, we need to extract unique contexts in the datasets.
```python
@@ -286,6 +327,7 @@ def extract_unique_contexts(input_directory, output_directory):
print("All files have been processed.")
```
+
### Step-1 Insert Contexts
For the extracted contexts, we insert them into the LightRAG system.
@@ -307,6 +349,7 @@ def insert_text(rag, file_path):
if retries == max_retries:
print("Insertion failed after exceeding the maximum number of retries")
```
+
### Step-2 Generate Queries
We extract tokens from both the first half and the second half of each context in the dataset, then combine them as the dataset description to generate queries.
From 4bbe4f8230c9afeb560a045db0314d947a3c87b2 Mon Sep 17 00:00:00 2001
From: zhangjiawei
Date: Wed, 16 Oct 2024 18:10:28 +0800
Subject: [PATCH 044/258] setup encoding modify
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 849fabfe..47222420 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
import setuptools
-with open("README.md", "r") as fh:
+with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
From 1e74af59a7ab34d2c3ba9b662c370bfdc58780b1 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 16 Oct 2024 18:24:47 +0800
Subject: [PATCH 045/258] Update README.md
---
README.md | 48 +++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 358115c0..fb29945b 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
-- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-ollama-models)!
-- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-hugging-face-models)!
+- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
## Install
@@ -76,7 +76,9 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
```
-### Open AI-like APIs
+
+ Using Open AI-like APIs
+
LightRAG also support Open AI-like chat/embeddings APIs:
```python
async def llm_model_func(
@@ -110,8 +112,11 @@ rag = LightRAG(
)
)
```
+
-### Using Hugging Face Models
+
+ Using Hugging Face Models
+
If you want to use Hugging Face models, you only need to set LightRAG as follows:
```python
from lightrag.llm import hf_model_complete, hf_embedding
@@ -134,9 +139,12 @@ rag = LightRAG(
),
)
```
+
-### Using Ollama Models
+
+ Using Ollama Models
If you want to use Ollama models, you only need to set LightRAG as follows:
+
```python
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -156,6 +164,7 @@ rag = LightRAG(
),
)
```
+
### Batch Insert
```python
@@ -178,6 +187,10 @@ The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https
### Generate Query
LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
+
+
+ Prompt
+
```python
Given the following description of a dataset:
@@ -201,9 +214,14 @@ Output the results in the following structure:
- User 5: [user description]
...
```
+
### Batch Eval
To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
+
+
+ Prompt
+
```python
---Role---
You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
@@ -246,6 +264,7 @@ Output your evaluation in the following JSON format:
}}
}}
```
+
### Overall Performance Table
| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
@@ -276,6 +295,10 @@ All the code can be found in the `./reproduce` directory.
### Step-0 Extract Unique Contexts
First, we need to extract unique contexts in the datasets.
+
+
+ Code
+
```python
def extract_unique_contexts(input_directory, output_directory):
@@ -327,10 +350,14 @@ def extract_unique_contexts(input_directory, output_directory):
print("All files have been processed.")
```
+
### Step-1 Insert Contexts
For the extracted contexts, we insert them into the LightRAG system.
+
+ Code
+
```python
def insert_text(rag, file_path):
with open(file_path, mode='r') as f:
@@ -349,10 +376,15 @@ def insert_text(rag, file_path):
if retries == max_retries:
print("Insertion failed after exceeding the maximum number of retries")
```
+
### Step-2 Generate Queries
We extract tokens from both the first half and the second half of each context in the dataset, then combine them as the dataset description to generate queries.
+
+
+ Code
+
```python
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -368,9 +400,14 @@ def get_summary(context, tot_tokens=2000):
return summary
```
+
### Step-3 Query
For the queries generated in Step-2, we will extract them and query LightRAG.
+
+
+ Code
+
```python
def extract_queries(file_path):
with open(file_path, 'r') as f:
@@ -382,6 +419,7 @@ def extract_queries(file_path):
return queries
```
+
## Code Structure
From 7ab699955e05d35ea89d4b46fb72138e15dcc877 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 17 Oct 2024 10:29:08 +0800
Subject: [PATCH 046/258] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index fb29945b..7ad8dd26 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## Install
-* Install from source
+* Install from source (Recommend)
```bash
cd LightRAG
@@ -142,7 +142,7 @@ rag = LightRAG(
- Using Ollama Models
+ Using Ollama Models (There are some bugs. I'll fix them ASAP.)
If you want to use Ollama models, you only need to set LightRAG as follows:
```python
From 0e0a037a1d15743798286146c998e6cfa29ddc1e Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:39:11 +0800
Subject: [PATCH 047/258] Add Discord channel link
---
README.md | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 7ad8dd26..ff6fe44a 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,10 @@
-
+
+
@@ -20,6 +21,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
+- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
From a2f1654f4cc2eeb73b38ca6e1d2ff787bc514a34 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Thu, 17 Oct 2024 16:02:43 +0800
Subject: [PATCH 048/258] fix Ollama bugs
---
README.md | 2 +-
lightrag/operate.py | 81 ++++++++++++++++++++++++++-------------------
2 files changed, 48 insertions(+), 35 deletions(-)
diff --git a/README.md b/README.md
index ff6fe44a..fd85141b 100644
--- a/README.md
+++ b/README.md
@@ -144,7 +144,7 @@ rag = LightRAG(
@@ -21,6 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
+- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG explanatory video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
From d04f70d4254eb024e8bd2347594d29149413363f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=9C=A8Data=20Intelligence=20Lab=40HKU=E2=9C=A8?=
<118165258+HKUDS@users.noreply.github.com>
Date: Fri, 18 Oct 2024 12:45:30 +0800
Subject: [PATCH 051/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2987507d..d0ed8a35 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
-- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG explanatory video](https://youtu.be/oageL-1I0GE). Thanks to the author!
+- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
From f576a28e0d66904a382f3eae076f1ff2699a6239 Mon Sep 17 00:00:00 2001
From: zrguo
Date: Fri, 18 Oct 2024 15:32:58 +0800
Subject: [PATCH 052/258] Create lightrag_azure_openai_demo.py
---
examples/lightrag_azure_openai_demo.py | 125 +++++++++++++++++++++++++
1 file changed, 125 insertions(+)
create mode 100644 examples/lightrag_azure_openai_demo.py
diff --git a/examples/lightrag_azure_openai_demo.py b/examples/lightrag_azure_openai_demo.py
new file mode 100644
index 00000000..62282a25
--- /dev/null
+++ b/examples/lightrag_azure_openai_demo.py
@@ -0,0 +1,125 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+from dotenv import load_dotenv
+import aiohttp
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+load_dotenv()
+
+AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
+AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
+AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
+AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
+
+AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
+AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION")
+
+WORKING_DIR = "./dickens"
+
+if os.path.exists(WORKING_DIR):
+ import shutil
+
+ shutil.rmtree(WORKING_DIR)
+
+os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ headers = {
+ "Content-Type": "application/json",
+ "api-key": AZURE_OPENAI_API_KEY,
+ }
+ endpoint = f"{AZURE_OPENAI_ENDPOINT}openai/deployments/{AZURE_OPENAI_DEPLOYMENT}/chat/completions?api-version={AZURE_OPENAI_API_VERSION}"
+
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+ if history_messages:
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+
+ payload = {
+ "messages": messages,
+ "temperature": kwargs.get("temperature", 0),
+ "top_p": kwargs.get("top_p", 1),
+ "n": kwargs.get("n", 1),
+ }
+
+ async with aiohttp.ClientSession() as session:
+ async with session.post(endpoint, headers=headers, json=payload) as response:
+ if response.status != 200:
+ raise ValueError(
+ f"Request failed with status {response.status}: {await response.text()}"
+ )
+ result = await response.json()
+ return result["choices"][0]["message"]["content"]
+
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ headers = {
+ "Content-Type": "application/json",
+ "api-key": AZURE_OPENAI_API_KEY,
+ }
+ endpoint = f"{AZURE_OPENAI_ENDPOINT}openai/deployments/{AZURE_EMBEDDING_DEPLOYMENT}/embeddings?api-version={AZURE_EMBEDDING_API_VERSION}"
+
+ payload = {"input": texts}
+
+ async with aiohttp.ClientSession() as session:
+ async with session.post(endpoint, headers=headers, json=payload) as response:
+ if response.status != 200:
+ raise ValueError(
+ f"Request failed with status {response.status}: {await response.text()}"
+ )
+ result = await response.json()
+ embeddings = [item["embedding"] for item in result["data"]]
+ return np.array(embeddings)
+
+
+async def test_funcs():
+ result = await llm_model_func("How are you?")
+ print("Resposta do llm_model_func: ", result)
+
+ result = await embedding_func(["How are you?"])
+ print("Resultado do embedding_func: ", result.shape)
+ print("Dimensão da embedding: ", result.shape[1])
+
+
+asyncio.run(test_funcs())
+
+embedding_dimension = 3072
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=embedding_dimension,
+ max_token_size=8192,
+ func=embedding_func,
+ ),
+)
+
+book1 = open("./book_1.txt", encoding="utf-8")
+book2 = open("./book_2.txt", encoding="utf-8")
+
+rag.insert([book1.read(), book2.read()])
+
+query_text = "What are the main themes?"
+
+print("Result (Naive):")
+print(rag.query(query_text, param=QueryParam(mode="naive")))
+
+print("\nResult (Local):")
+print(rag.query(query_text, param=QueryParam(mode="local")))
+
+print("\nResult (Global):")
+print(rag.query(query_text, param=QueryParam(mode="global")))
+
+print("\nResult (Hybrid):")
+print(rag.query(query_text, param=QueryParam(mode="hybrid")))
\ No newline at end of file
From e7a7ff62b264ae7dde437c8dac3e32847090805a Mon Sep 17 00:00:00 2001
From: zrguo
Date: Fri, 18 Oct 2024 15:33:11 +0800
Subject: [PATCH 053/258] Update operate.py
---
lightrag/operate.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 3a17810a..930ceb2a 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -76,7 +76,7 @@ async def _handle_single_entity_extraction(
record_attributes: list[str],
chunk_key: str,
):
- if record_attributes[0] != '"entity"' or len(record_attributes) < 4:
+ if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
return None
# add this record as a node in the G
entity_name = clean_str(record_attributes[1].upper())
@@ -97,7 +97,7 @@ async def _handle_single_relationship_extraction(
record_attributes: list[str],
chunk_key: str,
):
- if record_attributes[0] != '"relationship"' or len(record_attributes) < 5:
+ if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
return None
# add this record as edge
source = clean_str(record_attributes[1].upper())
From 705087529524ec96602435cd5eb736f0632e1d89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Galego?=
Date: Fri, 18 Oct 2024 14:17:14 +0100
Subject: [PATCH 054/258] Added support for Amazon Bedrock models
---
.gitignore | 4 +
examples/lightrag_bedrock_demo.py | 48 +++++++++++
lightrag/llm.py | 128 ++++++++++++++++++++++++++++++
requirements.txt | 1 +
4 files changed, 181 insertions(+)
create mode 100644 .gitignore
create mode 100644 examples/lightrag_bedrock_demo.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..cb457220
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+*.egg-info
+dickens/
+book.txt
\ No newline at end of file
diff --git a/examples/lightrag_bedrock_demo.py b/examples/lightrag_bedrock_demo.py
new file mode 100644
index 00000000..36ec3857
--- /dev/null
+++ b/examples/lightrag_bedrock_demo.py
@@ -0,0 +1,48 @@
+"""
+LightRAG meets Amazon Bedrock ⛰️
+"""
+
+import os
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import bedrock_complete, bedrock_embedding
+from lightrag.utils import EmbeddingFunc
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=bedrock_complete,
+ llm_model_name="anthropic.claude-3-haiku-20240307-v1:0",
+ node2vec_params = {
+ 'dimensions': 1024,
+ 'num_walks': 10,
+ 'walk_length': 40,
+ 'window_size': 2,
+ 'iterations': 3,
+ 'random_seed': 3
+ },
+ embedding_func=EmbeddingFunc(
+ embedding_dim=1024,
+ max_token_size=8192,
+ func=lambda texts: bedrock_embedding(texts)
+ )
+)
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Hybrid search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 7328a583..8fc0da2e 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -1,4 +1,6 @@
import os
+import json
+import aioboto3
import numpy as np
import ollama
from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
@@ -48,6 +50,54 @@ async def openai_complete_if_cache(
)
return response.choices[0].message.content
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def bedrock_complete_if_cache(
+ model, prompt, system_prompt=None, history_messages=[], base_url=None,
+ aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, **kwargs
+) -> str:
+ os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('AWS_ACCESS_KEY_ID', aws_access_key_id)
+ os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('AWS_SECRET_ACCESS_KEY', aws_secret_access_key)
+ os.environ['AWS_SESSION_TOKEN'] = os.environ.get('AWS_SESSION_TOKEN', aws_session_token)
+
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+
+ messages = []
+ messages.extend(history_messages)
+ messages.append({'role': "user", 'content': [{'text': prompt}]})
+
+ args = {
+ 'modelId': model,
+ 'messages': messages
+ }
+
+ if system_prompt:
+ args['system'] = [{'text': system_prompt}]
+
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ session = aioboto3.Session()
+ async with session.client("bedrock-runtime") as bedrock_async_client:
+
+ response = await bedrock_async_client.converse(**args, **kwargs)
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert({
+ args_hash: {
+ 'return': response['output']['message']['content'][0]['text'],
+ 'model': model
+ }
+ })
+
+ return response['output']['message']['content'][0]['text']
+
async def hf_model_if_cache(
model, prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -145,6 +195,19 @@ async def gpt_4o_mini_complete(
**kwargs,
)
+
+async def bedrock_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await bedrock_complete_if_cache(
+ "anthropic.claude-3-sonnet-20240229-v1:0",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
+
+
async def hf_model_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -186,6 +249,71 @@ async def openai_embedding(texts: list[str], model: str = "text-embedding-3-smal
return np.array([dp.embedding for dp in response.data])
+# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
+# @retry(
+# stop=stop_after_attempt(3),
+# wait=wait_exponential(multiplier=1, min=4, max=10),
+# retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), # TODO: fix exceptions
+# )
+async def bedrock_embedding(
+ texts: list[str], model: str = "amazon.titan-embed-text-v2:0",
+ aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None) -> np.ndarray:
+ os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('AWS_ACCESS_KEY_ID', aws_access_key_id)
+ os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('AWS_SECRET_ACCESS_KEY', aws_secret_access_key)
+ os.environ['AWS_SESSION_TOKEN'] = os.environ.get('AWS_SESSION_TOKEN', aws_session_token)
+
+ session = aioboto3.Session()
+ async with session.client("bedrock-runtime") as bedrock_async_client:
+
+ if (model_provider := model.split(".")[0]) == "amazon":
+ embed_texts = []
+ for text in texts:
+ if "v2" in model:
+ body = json.dumps({
+ 'inputText': text,
+ # 'dimensions': embedding_dim,
+ 'embeddingTypes': ["float"]
+ })
+ elif "v1" in model:
+ body = json.dumps({
+ 'inputText': text
+ })
+ else:
+ raise ValueError(f"Model {model} is not supported!")
+
+ response = await bedrock_async_client.invoke_model(
+ modelId=model,
+ body=body,
+ accept="application/json",
+ contentType="application/json"
+ )
+
+ response_body = await response.get('body').json()
+
+ embed_texts.append(response_body['embedding'])
+ elif model_provider == "cohere":
+ body = json.dumps({
+ 'texts': texts,
+ 'input_type': "search_document",
+ 'truncate': "NONE"
+ })
+
+ response = await bedrock_async_client.invoke_model(
+ model=model,
+ body=body,
+ accept="application/json",
+ contentType="application/json"
+ )
+
+ response_body = json.loads(response.get('body').read())
+
+ embed_texts = response_body['embeddings']
+ else:
+ raise ValueError(f"Model provider '{model_provider}' is not supported!")
+
+ return np.array(embed_texts)
+
+
async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
input_ids = tokenizer(texts, return_tensors='pt', padding=True, truncation=True).input_ids
with torch.no_grad():
diff --git a/requirements.txt b/requirements.txt
index f7dcd787..a1054692 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+aioboto3
openai
tiktoken
networkx
From 75a91d9300aa62cf0e918003e430e391c8d69ccc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Galego?=
Date: Fri, 18 Oct 2024 16:50:02 +0100
Subject: [PATCH 055/258] Fixed retry strategy, message history and inference
params; Cleaned up Bedrock example
---
examples/lightrag_bedrock_demo.py | 39 +++++++++++--------------
lightrag/llm.py | 48 +++++++++++++++++++++++++------
2 files changed, 55 insertions(+), 32 deletions(-)
diff --git a/examples/lightrag_bedrock_demo.py b/examples/lightrag_bedrock_demo.py
index 36ec3857..c515922e 100644
--- a/examples/lightrag_bedrock_demo.py
+++ b/examples/lightrag_bedrock_demo.py
@@ -3,46 +3,39 @@
"""
import os
+import logging
from lightrag import LightRAG, QueryParam
from lightrag.llm import bedrock_complete, bedrock_embedding
from lightrag.utils import EmbeddingFunc
-WORKING_DIR = "./dickens"
+logging.getLogger("aiobotocore").setLevel(logging.WARNING)
+WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=bedrock_complete,
- llm_model_name="anthropic.claude-3-haiku-20240307-v1:0",
- node2vec_params = {
- 'dimensions': 1024,
- 'num_walks': 10,
- 'walk_length': 40,
- 'window_size': 2,
- 'iterations': 3,
- 'random_seed': 3
- },
+ llm_model_name="Anthropic Claude 3 Haiku // Amazon Bedrock",
embedding_func=EmbeddingFunc(
embedding_dim=1024,
max_token_size=8192,
- func=lambda texts: bedrock_embedding(texts)
+ func=bedrock_embedding
)
)
-with open("./book.txt") as f:
+with open("./book.txt", 'r', encoding='utf-8') as f:
rag.insert(f.read())
-# Naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
-
-# Local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
-
-# Global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
-
-# Hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+for mode in ["naive", "local", "global", "hybrid"]:
+ print("\n+-" + "-" * len(mode) + "-+")
+ print(f"| {mode.capitalize()} |")
+ print("+-" + "-" * len(mode) + "-+\n")
+ print(
+ rag.query(
+ "What are the top themes in this story?",
+ param=QueryParam(mode=mode)
+ )
+ )
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 8fc0da2e..48defb4d 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -1,6 +1,9 @@
import os
+import copy
import json
+import botocore
import aioboto3
+import botocore.errorfactory
import numpy as np
import ollama
from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
@@ -50,43 +53,70 @@ async def openai_complete_if_cache(
)
return response.choices[0].message.content
+
+class BedrockError(Exception):
+ """Generic error for issues related to Amazon Bedrock"""
+
+
@retry(
- stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=4, max=10),
- retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+ stop=stop_after_attempt(5),
+ wait=wait_exponential(multiplier=1, max=60),
+ retry=retry_if_exception_type((BedrockError)),
)
async def bedrock_complete_if_cache(
- model, prompt, system_prompt=None, history_messages=[], base_url=None,
+ model, prompt, system_prompt=None, history_messages=[],
aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, **kwargs
) -> str:
os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('AWS_ACCESS_KEY_ID', aws_access_key_id)
os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('AWS_SECRET_ACCESS_KEY', aws_secret_access_key)
os.environ['AWS_SESSION_TOKEN'] = os.environ.get('AWS_SESSION_TOKEN', aws_session_token)
- hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
-
+ # Fix message history format
messages = []
- messages.extend(history_messages)
+ for history_message in history_messages:
+ message = copy.copy(history_message)
+ message['content'] = [{'text': message['content']}]
+ messages.append(message)
+
+ # Add user prompt
messages.append({'role': "user", 'content': [{'text': prompt}]})
+ # Initialize Converse API arguments
args = {
'modelId': model,
'messages': messages
}
+ # Define system prompt
if system_prompt:
args['system'] = [{'text': system_prompt}]
+ # Map and set up inference parameters
+ inference_params_map = {
+ 'max_tokens': "maxTokens",
+ 'top_p': "topP",
+ 'stop_sequences': "stopSequences"
+ }
+ if (inference_params := list(set(kwargs) & set(['max_tokens', 'temperature', 'top_p', 'stop_sequences']))):
+ args['inferenceConfig'] = {}
+ for param in inference_params:
+ args['inferenceConfig'][inference_params_map.get(param, param)] = kwargs.pop(param)
+
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
if hashing_kv is not None:
args_hash = compute_args_hash(model, messages)
if_cache_return = await hashing_kv.get_by_id(args_hash)
if if_cache_return is not None:
return if_cache_return["return"]
+ # Call model via Converse API
session = aioboto3.Session()
async with session.client("bedrock-runtime") as bedrock_async_client:
- response = await bedrock_async_client.converse(**args, **kwargs)
+ try:
+ response = await bedrock_async_client.converse(**args, **kwargs)
+ except Exception as e:
+ raise BedrockError(e)
if hashing_kv is not None:
await hashing_kv.upsert({
@@ -200,7 +230,7 @@ async def bedrock_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
return await bedrock_complete_if_cache(
- "anthropic.claude-3-sonnet-20240229-v1:0",
+ "anthropic.claude-3-haiku-20240307-v1:0",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
From a7b43d27dbe2e77c7cf666ba0327e08ec60815b9 Mon Sep 17 00:00:00 2001
From: Wade Rosko <7385473+wrosko@users.noreply.github.com>
Date: Fri, 18 Oct 2024 18:09:48 -0600
Subject: [PATCH 056/258] Add comment specifying jupyter req
Add lines that can be uncommented if running in a jupyter notebook
---
README.md | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d0ed8a35..bd226582 100644
--- a/README.md
+++ b/README.md
@@ -47,12 +47,21 @@ pip install lightrag-hku
```bash
curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
```
-Use the below Python snippet to initialize LightRAG and perform queries:
+Use the below Python snippet (in a script) to initialize LightRAG and perform queries:
```python
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+
+WORKING_DIR = "./dickens"
+
+
WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
From e2db7b6c45ac4b48d7026d69b3a770b42bad4dbe Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sat, 19 Oct 2024 11:46:03 +0800
Subject: [PATCH 057/258] fix prompt.py
---
lightrag/prompt.py | 15 ---------------
1 file changed, 15 deletions(-)
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 5d28e49c..67d52d63 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -163,25 +163,10 @@
{response_type}
-
---Data tables---
{context_data}
-
----Goal---
-
-Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
-
-If you don't know the answer, just say so. Do not make anything up.
-
-Do not include information where the supporting evidence for it is not provided.
-
-
----Target response length and format---
-
-{response_type}
-
Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
"""
From 744dad339d6b06505659ab5b1091180aecdc4c3b Mon Sep 17 00:00:00 2001
From: Sanketh Kumar
Date: Sat, 19 Oct 2024 09:43:17 +0530
Subject: [PATCH 058/258] chore: added pre-commit-hooks and ruff formatting for
commit-hooks
---
.gitignore | 3 +-
.pre-commit-config.yaml | 22 ++
README.md | 50 ++---
examples/batch_eval.py | 38 ++--
examples/generate_query.py | 9 +-
examples/lightrag_azure_openai_demo.py | 2 +-
examples/lightrag_bedrock_demo.py | 13 +-
examples/lightrag_hf_demo.py | 35 ++-
examples/lightrag_ollama_demo.py | 25 ++-
examples/lightrag_openai_compatible_demo.py | 32 ++-
examples/lightrag_openai_demo.py | 22 +-
lightrag/__init__.py | 2 +-
lightrag/base.py | 11 +-
lightrag/lightrag.py | 65 +++---
lightrag/llm.py | 223 ++++++++++++-------
lightrag/operate.py | 229 +++++++++++++-------
lightrag/prompt.py | 14 +-
lightrag/storage.py | 15 +-
lightrag/utils.py | 28 ++-
reproduce/Step_0.py | 24 +-
reproduce/Step_1.py | 8 +-
reproduce/Step_1_openai_compatible.py | 29 ++-
reproduce/Step_2.py | 20 +-
reproduce/Step_3.py | 29 ++-
reproduce/Step_3_openai_compatible.py | 54 +++--
requirements.txt | 16 +-
26 files changed, 630 insertions(+), 388 deletions(-)
create mode 100644 .pre-commit-config.yaml
diff --git a/.gitignore b/.gitignore
index cb457220..50f384ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
__pycache__
*.egg-info
dickens/
-book.txt
\ No newline at end of file
+book.txt
+lightrag-dev/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..db531bb6
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: requirements-txt-fixer
+
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.6.4
+ hooks:
+ - id: ruff-format
+ - id: ruff
+ args: [--fix]
+
+
+ - repo: https://github.com/mgedmin/check-manifest
+ rev: "0.49"
+ hooks:
+ - id: check-manifest
+ stages: [manual]
diff --git a/README.md b/README.md
index d0ed8a35..b3a04957 100644
--- a/README.md
+++ b/README.md
@@ -16,16 +16,16 @@
-
+
This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
-## 🎉 News
+## 🎉 News
- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
-- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
-- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
## Install
@@ -83,7 +83,7 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
Using Open AI-like APIs
-LightRAG also support Open AI-like chat/embeddings APIs:
+LightRAG also supports Open AI-like chat/embeddings APIs:
```python
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs
@@ -120,7 +120,7 @@ rag = LightRAG(
Using Hugging Face Models
-
+
If you want to use Hugging Face models, you only need to set LightRAG as follows:
```python
from lightrag.llm import hf_model_complete, hf_embedding
@@ -136,7 +136,7 @@ rag = LightRAG(
embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embedding(
- texts,
+ texts,
tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
)
@@ -148,7 +148,7 @@ rag = LightRAG(
Using Ollama Models
If you want to use Ollama models, you only need to set LightRAG as follows:
-
+
```python
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -162,7 +162,7 @@ rag = LightRAG(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embedding(
- texts,
+ texts,
embed_model="nomic-embed-text"
)
),
@@ -187,14 +187,14 @@ with open("./newText.txt") as f:
```
## Evaluation
### Dataset
-The dataset used in LightRAG can be download from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
+The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
### Generate Query
-LightRAG uses the following prompt to generate high-level queries, with the corresponding code located in `example/generate_query.py`.
+LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`.
Prompt
-
+
```python
Given the following description of a dataset:
@@ -219,18 +219,18 @@ Output the results in the following structure:
...
```
-
+
### Batch Eval
To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
Prompt
-
+
```python
---Role---
You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
---Goal---
-You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
@@ -294,7 +294,7 @@ Output your evaluation in the following JSON format:
| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
-## Reproduce
+## Reproduce
All the code can be found in the `./reproduce` directory.
### Step-0 Extract Unique Contexts
@@ -302,7 +302,7 @@ First, we need to extract unique contexts in the datasets.
Code
-
+
```python
def extract_unique_contexts(input_directory, output_directory):
@@ -361,12 +361,12 @@ For the extracted contexts, we insert them into the LightRAG system.
Code
-
+
```python
def insert_text(rag, file_path):
with open(file_path, mode='r') as f:
unique_contexts = json.load(f)
-
+
retries = 0
max_retries = 3
while retries < max_retries:
@@ -384,11 +384,11 @@ def insert_text(rag, file_path):
### Step-2 Generate Queries
-We extract tokens from both the first half and the second half of each context in the dataset, then combine them as the dataset description to generate queries.
+We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries.
Code
-
+
```python
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -401,7 +401,7 @@ def get_summary(context, tot_tokens=2000):
summary_tokens = start_tokens + end_tokens
summary = tokenizer.convert_tokens_to_string(summary_tokens)
-
+
return summary
```
@@ -411,12 +411,12 @@ For the queries generated in Step-2, we will extract them and query LightRAG.
Code
-
+
```python
def extract_queries(file_path):
with open(file_path, 'r') as f:
data = f.read()
-
+
data = data.replace('**', '')
queries = re.findall(r'- Question \d+: (.+)', data)
@@ -470,7 +470,7 @@ def extract_queries(file_path):
```python
@article{guo2024lightrag,
-title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
year={2024},
eprint={2410.05779},
diff --git a/examples/batch_eval.py b/examples/batch_eval.py
index 4601d267..a85e1ede 100644
--- a/examples/batch_eval.py
+++ b/examples/batch_eval.py
@@ -1,4 +1,3 @@
-import os
import re
import json
import jsonlines
@@ -9,28 +8,28 @@
def batch_eval(query_file, result1_file, result2_file, output_file_path):
client = OpenAI()
- with open(query_file, 'r') as f:
+ with open(query_file, "r") as f:
data = f.read()
- queries = re.findall(r'- Question \d+: (.+)', data)
+ queries = re.findall(r"- Question \d+: (.+)", data)
- with open(result1_file, 'r') as f:
+ with open(result1_file, "r") as f:
answers1 = json.load(f)
- answers1 = [i['result'] for i in answers1]
+ answers1 = [i["result"] for i in answers1]
- with open(result2_file, 'r') as f:
+ with open(result2_file, "r") as f:
answers2 = json.load(f)
- answers2 = [i['result'] for i in answers2]
+ answers2 = [i["result"] for i in answers2]
requests = []
for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
- sys_prompt = f"""
+ sys_prompt = """
---Role---
You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
"""
prompt = f"""
- You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+ You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
@@ -69,7 +68,6 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
}}
"""
-
request_data = {
"custom_id": f"request-{i+1}",
"method": "POST",
@@ -78,22 +76,21 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": sys_prompt},
- {"role": "user", "content": prompt}
+ {"role": "user", "content": prompt},
],
- }
+ },
}
-
+
requests.append(request_data)
- with jsonlines.open(output_file_path, mode='w') as writer:
+ with jsonlines.open(output_file_path, mode="w") as writer:
for request in requests:
writer.write(request)
print(f"Batch API requests written to {output_file_path}")
batch_input_file = client.files.create(
- file=open(output_file_path, "rb"),
- purpose="batch"
+ file=open(output_file_path, "rb"), purpose="batch"
)
batch_input_file_id = batch_input_file.id
@@ -101,12 +98,11 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
input_file_id=batch_input_file_id,
endpoint="/v1/chat/completions",
completion_window="24h",
- metadata={
- "description": "nightly eval job"
- }
+ metadata={"description": "nightly eval job"},
)
- print(f'Batch {batch.id} has been created.')
+ print(f"Batch {batch.id} has been created.")
+
if __name__ == "__main__":
- batch_eval()
\ No newline at end of file
+ batch_eval()
diff --git a/examples/generate_query.py b/examples/generate_query.py
index 0ae82f40..705b23d3 100644
--- a/examples/generate_query.py
+++ b/examples/generate_query.py
@@ -1,9 +1,8 @@
-import os
-
from openai import OpenAI
# os.environ["OPENAI_API_KEY"] = ""
+
def openai_complete_if_cache(
model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -47,10 +46,10 @@ def openai_complete_if_cache(
...
"""
- result = openai_complete_if_cache(model='gpt-4o-mini', prompt=prompt)
+ result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt)
- file_path = f"./queries.txt"
+ file_path = "./queries.txt"
with open(file_path, "w") as file:
file.write(result)
- print(f"Queries written to {file_path}")
\ No newline at end of file
+ print(f"Queries written to {file_path}")
diff --git a/examples/lightrag_azure_openai_demo.py b/examples/lightrag_azure_openai_demo.py
index 62282a25..e29a6a9d 100644
--- a/examples/lightrag_azure_openai_demo.py
+++ b/examples/lightrag_azure_openai_demo.py
@@ -122,4 +122,4 @@ async def test_funcs():
print(rag.query(query_text, param=QueryParam(mode="global")))
print("\nResult (Hybrid):")
-print(rag.query(query_text, param=QueryParam(mode="hybrid")))
\ No newline at end of file
+print(rag.query(query_text, param=QueryParam(mode="hybrid")))
diff --git a/examples/lightrag_bedrock_demo.py b/examples/lightrag_bedrock_demo.py
index c515922e..7e18ea57 100644
--- a/examples/lightrag_bedrock_demo.py
+++ b/examples/lightrag_bedrock_demo.py
@@ -20,13 +20,11 @@
llm_model_func=bedrock_complete,
llm_model_name="Anthropic Claude 3 Haiku // Amazon Bedrock",
embedding_func=EmbeddingFunc(
- embedding_dim=1024,
- max_token_size=8192,
- func=bedrock_embedding
- )
+ embedding_dim=1024, max_token_size=8192, func=bedrock_embedding
+ ),
)
-with open("./book.txt", 'r', encoding='utf-8') as f:
+with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
for mode in ["naive", "local", "global", "hybrid"]:
@@ -34,8 +32,5 @@
print(f"| {mode.capitalize()} |")
print("+-" + "-" * len(mode) + "-+\n")
print(
- rag.query(
- "What are the top themes in this story?",
- param=QueryParam(mode=mode)
- )
+ rag.query("What are the top themes in this story?", param=QueryParam(mode=mode))
)
diff --git a/examples/lightrag_hf_demo.py b/examples/lightrag_hf_demo.py
index baf62bdb..87312307 100644
--- a/examples/lightrag_hf_demo.py
+++ b/examples/lightrag_hf_demo.py
@@ -1,10 +1,9 @@
import os
-import sys
from lightrag import LightRAG, QueryParam
from lightrag.llm import hf_model_complete, hf_embedding
from lightrag.utils import EmbeddingFunc
-from transformers import AutoModel,AutoTokenizer
+from transformers import AutoModel, AutoTokenizer
WORKING_DIR = "./dickens"
@@ -13,16 +12,20 @@
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=hf_model_complete,
- llm_model_name='meta-llama/Llama-3.1-8B-Instruct',
+ llm_model_func=hf_model_complete,
+ llm_model_name="meta-llama/Llama-3.1-8B-Instruct",
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embedding(
- texts,
- tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
- embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
- )
+ texts,
+ tokenizer=AutoTokenizer.from_pretrained(
+ "sentence-transformers/all-MiniLM-L6-v2"
+ ),
+ embed_model=AutoModel.from_pretrained(
+ "sentence-transformers/all-MiniLM-L6-v2"
+ ),
+ ),
),
)
@@ -31,13 +34,21 @@
rag.insert(f.read())
# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index a2d04aa6..c61b71c0 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -11,15 +11,12 @@
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=ollama_model_complete,
- llm_model_name='your_model_name',
+ llm_model_func=ollama_model_complete,
+ llm_model_name="your_model_name",
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
- func=lambda texts: ollama_embedding(
- texts,
- embed_model="nomic-embed-text"
- )
+ func=lambda texts: ollama_embedding(texts, embed_model="nomic-embed-text"),
),
)
@@ -28,13 +25,21 @@
rag.insert(f.read())
# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
index 75ecc118..fbad1190 100644
--- a/examples/lightrag_openai_compatible_demo.py
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -6,10 +6,11 @@
import numpy as np
WORKING_DIR = "./dickens"
-
+
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
+
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -20,17 +21,19 @@ async def llm_model_func(
history_messages=history_messages,
api_key=os.getenv("UPSTAGE_API_KEY"),
base_url="https://api.upstage.ai/v1/solar",
- **kwargs
+ **kwargs,
)
+
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
model="solar-embedding-1-large-query",
api_key=os.getenv("UPSTAGE_API_KEY"),
- base_url="https://api.upstage.ai/v1/solar"
+ base_url="https://api.upstage.ai/v1/solar",
)
+
# function test
async def test_funcs():
result = await llm_model_func("How are you?")
@@ -39,6 +42,7 @@ async def test_funcs():
result = await embedding_func(["How are you?"])
print("embedding_func: ", result)
+
asyncio.run(test_funcs())
@@ -46,10 +50,8 @@ async def test_funcs():
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
- embedding_dim=4096,
- max_token_size=8192,
- func=embedding_func
- )
+ embedding_dim=4096, max_token_size=8192, func=embedding_func
+ ),
)
@@ -57,13 +59,21 @@ async def test_funcs():
rag.insert(f.read())
# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/examples/lightrag_openai_demo.py b/examples/lightrag_openai_demo.py
index fb1f055c..a6e7f3b2 100644
--- a/examples/lightrag_openai_demo.py
+++ b/examples/lightrag_openai_demo.py
@@ -1,9 +1,7 @@
import os
-import sys
from lightrag import LightRAG, QueryParam
-from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
-from transformers import AutoModel,AutoTokenizer
+from lightrag.llm import gpt_4o_mini_complete
WORKING_DIR = "./dickens"
@@ -12,7 +10,7 @@
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=gpt_4o_mini_complete
+ llm_model_func=gpt_4o_mini_complete,
# llm_model_func=gpt_4o_complete
)
@@ -21,13 +19,21 @@
rag.insert(f.read())
# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index b6b953f1..f208177f 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,4 +1,4 @@
-from .lightrag import LightRAG, QueryParam
+from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
__version__ = "0.0.6"
__author__ = "Zirui Guo"
diff --git a/lightrag/base.py b/lightrag/base.py
index d677c406..50be4f62 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -12,15 +12,16 @@
T = TypeVar("T")
+
@dataclass
class QueryParam:
mode: Literal["local", "global", "hybrid", "naive"] = "global"
only_need_context: bool = False
response_type: str = "Multiple Paragraphs"
top_k: int = 60
- max_token_for_text_unit: int = 4000
+ max_token_for_text_unit: int = 4000
max_token_for_global_context: int = 4000
- max_token_for_local_context: int = 4000
+ max_token_for_local_context: int = 4000
@dataclass
@@ -36,6 +37,7 @@ async def query_done_callback(self):
"""commit the storage operations after querying"""
pass
+
@dataclass
class BaseVectorStorage(StorageNameSpace):
embedding_func: EmbeddingFunc
@@ -50,6 +52,7 @@ async def upsert(self, data: dict[str, dict]):
"""
raise NotImplementedError
+
@dataclass
class BaseKVStorage(Generic[T], StorageNameSpace):
async def all_keys(self) -> list[str]:
@@ -72,7 +75,7 @@ async def upsert(self, data: dict[str, T]):
async def drop(self):
raise NotImplementedError
-
+
@dataclass
class BaseGraphStorage(StorageNameSpace):
@@ -113,4 +116,4 @@ async def clustering(self, algorithm: str):
raise NotImplementedError
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
- raise NotImplementedError("Node embedding is not used in lightrag.")
\ No newline at end of file
+ raise NotImplementedError("Node embedding is not used in lightrag.")
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 83312ef6..5137af42 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -3,10 +3,12 @@
from dataclasses import asdict, dataclass, field
from datetime import datetime
from functools import partial
-from typing import Type, cast, Any
-from transformers import AutoModel,AutoTokenizer, AutoModelForCausalLM
+from typing import Type, cast
-from .llm import gpt_4o_complete, gpt_4o_mini_complete, openai_embedding, hf_model_complete, hf_embedding
+from .llm import (
+ gpt_4o_mini_complete,
+ openai_embedding,
+)
from .operate import (
chunking_by_token_size,
extract_entities,
@@ -37,6 +39,7 @@
QueryParam,
)
+
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
try:
loop = asyncio.get_running_loop()
@@ -69,7 +72,6 @@ class LightRAG:
"dimensions": 1536,
"num_walks": 10,
"walk_length": 40,
- "num_walks": 10,
"window_size": 2,
"iterations": 3,
"random_seed": 3,
@@ -77,13 +79,13 @@ class LightRAG:
)
# embedding_func: EmbeddingFunc = field(default_factory=lambda:hf_embedding)
- embedding_func: EmbeddingFunc = field(default_factory=lambda:openai_embedding)
+ embedding_func: EmbeddingFunc = field(default_factory=lambda: openai_embedding)
embedding_batch_num: int = 32
embedding_func_max_async: int = 16
# LLM
- llm_model_func: callable = gpt_4o_mini_complete#hf_model_complete#
- llm_model_name: str = 'meta-llama/Llama-3.2-1B-Instruct'#'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
+ llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
+ llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16
@@ -98,11 +100,11 @@ class LightRAG:
addon_params: dict = field(default_factory=dict)
convert_response_to_json_func: callable = convert_response_to_json
- def __post_init__(self):
+ def __post_init__(self):
log_file = os.path.join(self.working_dir, "lightrag.log")
set_logger(log_file)
logger.info(f"Logger initialized for working directory: {self.working_dir}")
-
+
_print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
logger.debug(f"LightRAG init with param:\n {_print_config}\n")
@@ -133,30 +135,24 @@ def __post_init__(self):
self.embedding_func
)
- self.entities_vdb = (
- self.vector_db_storage_cls(
- namespace="entities",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- meta_fields={"entity_name"}
- )
+ self.entities_vdb = self.vector_db_storage_cls(
+ namespace="entities",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"entity_name"},
)
- self.relationships_vdb = (
- self.vector_db_storage_cls(
- namespace="relationships",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- meta_fields={"src_id", "tgt_id"}
- )
+ self.relationships_vdb = self.vector_db_storage_cls(
+ namespace="relationships",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
+ meta_fields={"src_id", "tgt_id"},
)
- self.chunks_vdb = (
- self.vector_db_storage_cls(
- namespace="chunks",
- global_config=asdict(self),
- embedding_func=self.embedding_func,
- )
+ self.chunks_vdb = self.vector_db_storage_cls(
+ namespace="chunks",
+ global_config=asdict(self),
+ embedding_func=self.embedding_func,
)
-
+
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
)
@@ -177,7 +173,7 @@ async def ainsert(self, string_or_strings):
_add_doc_keys = await self.full_docs.filter_keys(list(new_docs.keys()))
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
if not len(new_docs):
- logger.warning(f"All docs are already in the storage")
+ logger.warning("All docs are already in the storage")
return
logger.info(f"[New Docs] inserting {len(new_docs)} docs")
@@ -203,7 +199,7 @@ async def ainsert(self, string_or_strings):
k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
}
if not len(inserting_chunks):
- logger.warning(f"All chunks are already in the storage")
+ logger.warning("All chunks are already in the storage")
return
logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
@@ -246,7 +242,7 @@ async def _insert_done(self):
def query(self, query: str, param: QueryParam = QueryParam()):
loop = always_get_an_event_loop()
return loop.run_until_complete(self.aquery(query, param))
-
+
async def aquery(self, query: str, param: QueryParam = QueryParam()):
if param.mode == "local":
response = await local_query(
@@ -290,7 +286,6 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
raise ValueError(f"Unknown mode {param.mode}")
await self._query_done()
return response
-
async def _query_done(self):
tasks = []
@@ -299,5 +294,3 @@ async def _query_done(self):
continue
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
await asyncio.gather(*tasks)
-
-
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 48defb4d..be801e0c 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -1,9 +1,7 @@
import os
import copy
import json
-import botocore
import aioboto3
-import botocore.errorfactory
import numpy as np
import ollama
from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
@@ -13,24 +11,34 @@
wait_exponential,
retry_if_exception_type,
)
-from transformers import AutoModel,AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from .base import BaseKVStorage
from .utils import compute_args_hash, wrap_embedding_func_with_attrs
-import copy
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
async def openai_complete_if_cache(
- model, prompt, system_prompt=None, history_messages=[], base_url=None, api_key=None, **kwargs
+ model,
+ prompt,
+ system_prompt=None,
+ history_messages=[],
+ base_url=None,
+ api_key=None,
+ **kwargs,
) -> str:
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
- openai_async_client = AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
+ openai_async_client = (
+ AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
+ )
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
@@ -64,43 +72,56 @@ class BedrockError(Exception):
retry=retry_if_exception_type((BedrockError)),
)
async def bedrock_complete_if_cache(
- model, prompt, system_prompt=None, history_messages=[],
- aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, **kwargs
+ model,
+ prompt,
+ system_prompt=None,
+ history_messages=[],
+ aws_access_key_id=None,
+ aws_secret_access_key=None,
+ aws_session_token=None,
+ **kwargs,
) -> str:
- os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('AWS_ACCESS_KEY_ID', aws_access_key_id)
- os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('AWS_SECRET_ACCESS_KEY', aws_secret_access_key)
- os.environ['AWS_SESSION_TOKEN'] = os.environ.get('AWS_SESSION_TOKEN', aws_session_token)
+ os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get(
+ "AWS_ACCESS_KEY_ID", aws_access_key_id
+ )
+ os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get(
+ "AWS_SECRET_ACCESS_KEY", aws_secret_access_key
+ )
+ os.environ["AWS_SESSION_TOKEN"] = os.environ.get(
+ "AWS_SESSION_TOKEN", aws_session_token
+ )
# Fix message history format
messages = []
for history_message in history_messages:
message = copy.copy(history_message)
- message['content'] = [{'text': message['content']}]
+ message["content"] = [{"text": message["content"]}]
messages.append(message)
# Add user prompt
- messages.append({'role': "user", 'content': [{'text': prompt}]})
+ messages.append({"role": "user", "content": [{"text": prompt}]})
# Initialize Converse API arguments
- args = {
- 'modelId': model,
- 'messages': messages
- }
+ args = {"modelId": model, "messages": messages}
# Define system prompt
if system_prompt:
- args['system'] = [{'text': system_prompt}]
+ args["system"] = [{"text": system_prompt}]
# Map and set up inference parameters
inference_params_map = {
- 'max_tokens': "maxTokens",
- 'top_p': "topP",
- 'stop_sequences': "stopSequences"
+ "max_tokens": "maxTokens",
+ "top_p": "topP",
+ "stop_sequences": "stopSequences",
}
- if (inference_params := list(set(kwargs) & set(['max_tokens', 'temperature', 'top_p', 'stop_sequences']))):
- args['inferenceConfig'] = {}
+ if inference_params := list(
+ set(kwargs) & set(["max_tokens", "temperature", "top_p", "stop_sequences"])
+ ):
+ args["inferenceConfig"] = {}
for param in inference_params:
- args['inferenceConfig'][inference_params_map.get(param, param)] = kwargs.pop(param)
+ args["inferenceConfig"][inference_params_map.get(param, param)] = (
+ kwargs.pop(param)
+ )
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
if hashing_kv is not None:
@@ -112,31 +133,33 @@ async def bedrock_complete_if_cache(
# Call model via Converse API
session = aioboto3.Session()
async with session.client("bedrock-runtime") as bedrock_async_client:
-
try:
response = await bedrock_async_client.converse(**args, **kwargs)
except Exception as e:
raise BedrockError(e)
if hashing_kv is not None:
- await hashing_kv.upsert({
- args_hash: {
- 'return': response['output']['message']['content'][0]['text'],
- 'model': model
+ await hashing_kv.upsert(
+ {
+ args_hash: {
+ "return": response["output"]["message"]["content"][0]["text"],
+ "model": model,
+ }
}
- })
+ )
+
+ return response["output"]["message"]["content"][0]["text"]
- return response['output']['message']['content'][0]['text']
async def hf_model_if_cache(
model, prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
model_name = model
- hf_tokenizer = AutoTokenizer.from_pretrained(model_name,device_map = 'auto')
- if hf_tokenizer.pad_token == None:
+ hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
+ if hf_tokenizer.pad_token is None:
# print("use eos token")
hf_tokenizer.pad_token = hf_tokenizer.eos_token
- hf_model = AutoModelForCausalLM.from_pretrained(model_name,device_map = 'auto')
+ hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
@@ -149,30 +172,51 @@ async def hf_model_if_cache(
if_cache_return = await hashing_kv.get_by_id(args_hash)
if if_cache_return is not None:
return if_cache_return["return"]
- input_prompt = ''
+ input_prompt = ""
try:
- input_prompt = hf_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
- except:
+ input_prompt = hf_tokenizer.apply_chat_template(
+ messages, tokenize=False, add_generation_prompt=True
+ )
+ except Exception:
try:
ori_message = copy.deepcopy(messages)
- if messages[0]['role'] == "system":
- messages[1]['content'] = "" + messages[0]['content'] + "\n" + messages[1]['content']
+ if messages[0]["role"] == "system":
+ messages[1]["content"] = (
+ ""
+ + messages[0]["content"]
+ + "\n"
+ + messages[1]["content"]
+ )
messages = messages[1:]
- input_prompt = hf_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
- except:
+ input_prompt = hf_tokenizer.apply_chat_template(
+ messages, tokenize=False, add_generation_prompt=True
+ )
+ except Exception:
len_message = len(ori_message)
for msgid in range(len_message):
- input_prompt =input_prompt+ '<'+ori_message[msgid]['role']+'>'+ori_message[msgid]['content']+''+ori_message[msgid]['role']+'>\n'
-
- input_ids = hf_tokenizer(input_prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
- output = hf_model.generate(**input_ids, max_new_tokens=200, num_return_sequences=1,early_stopping = True)
+ input_prompt = (
+ input_prompt
+ + "<"
+ + ori_message[msgid]["role"]
+ + ">"
+ + ori_message[msgid]["content"]
+ + ""
+ + ori_message[msgid]["role"]
+ + ">\n"
+ )
+
+ input_ids = hf_tokenizer(
+ input_prompt, return_tensors="pt", padding=True, truncation=True
+ ).to("cuda")
+ output = hf_model.generate(
+ **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
+ )
response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True)
if hashing_kv is not None:
- await hashing_kv.upsert(
- {args_hash: {"return": response_text, "model": model}}
- )
+ await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
return response_text
+
async def ollama_model_if_cache(
model, prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -202,6 +246,7 @@ async def ollama_model_if_cache(
return result
+
async def gpt_4o_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -241,7 +286,7 @@ async def bedrock_complete(
async def hf_model_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
- model_name = kwargs['hashing_kv'].global_config['llm_model_name']
+ model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
return await hf_model_if_cache(
model_name,
prompt,
@@ -250,10 +295,11 @@ async def hf_model_complete(
**kwargs,
)
+
async def ollama_model_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
- model_name = kwargs['hashing_kv'].global_config['llm_model_name']
+ model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
return await ollama_model_if_cache(
model_name,
prompt,
@@ -262,17 +308,25 @@ async def ollama_model_complete(
**kwargs,
)
+
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
-async def openai_embedding(texts: list[str], model: str = "text-embedding-3-small", base_url: str = None, api_key: str = None) -> np.ndarray:
+async def openai_embedding(
+ texts: list[str],
+ model: str = "text-embedding-3-small",
+ base_url: str = None,
+ api_key: str = None,
+) -> np.ndarray:
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
- openai_async_client = AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
+ openai_async_client = (
+ AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
+ )
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
)
@@ -286,28 +340,37 @@ async def openai_embedding(texts: list[str], model: str = "text-embedding-3-smal
# retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), # TODO: fix exceptions
# )
async def bedrock_embedding(
- texts: list[str], model: str = "amazon.titan-embed-text-v2:0",
- aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None) -> np.ndarray:
- os.environ['AWS_ACCESS_KEY_ID'] = os.environ.get('AWS_ACCESS_KEY_ID', aws_access_key_id)
- os.environ['AWS_SECRET_ACCESS_KEY'] = os.environ.get('AWS_SECRET_ACCESS_KEY', aws_secret_access_key)
- os.environ['AWS_SESSION_TOKEN'] = os.environ.get('AWS_SESSION_TOKEN', aws_session_token)
+ texts: list[str],
+ model: str = "amazon.titan-embed-text-v2:0",
+ aws_access_key_id=None,
+ aws_secret_access_key=None,
+ aws_session_token=None,
+) -> np.ndarray:
+ os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get(
+ "AWS_ACCESS_KEY_ID", aws_access_key_id
+ )
+ os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get(
+ "AWS_SECRET_ACCESS_KEY", aws_secret_access_key
+ )
+ os.environ["AWS_SESSION_TOKEN"] = os.environ.get(
+ "AWS_SESSION_TOKEN", aws_session_token
+ )
session = aioboto3.Session()
async with session.client("bedrock-runtime") as bedrock_async_client:
-
if (model_provider := model.split(".")[0]) == "amazon":
embed_texts = []
for text in texts:
if "v2" in model:
- body = json.dumps({
- 'inputText': text,
- # 'dimensions': embedding_dim,
- 'embeddingTypes': ["float"]
- })
+ body = json.dumps(
+ {
+ "inputText": text,
+ # 'dimensions': embedding_dim,
+ "embeddingTypes": ["float"],
+ }
+ )
elif "v1" in model:
- body = json.dumps({
- 'inputText': text
- })
+ body = json.dumps({"inputText": text})
else:
raise ValueError(f"Model {model} is not supported!")
@@ -315,29 +378,27 @@ async def bedrock_embedding(
modelId=model,
body=body,
accept="application/json",
- contentType="application/json"
+ contentType="application/json",
)
- response_body = await response.get('body').json()
+ response_body = await response.get("body").json()
- embed_texts.append(response_body['embedding'])
+ embed_texts.append(response_body["embedding"])
elif model_provider == "cohere":
- body = json.dumps({
- 'texts': texts,
- 'input_type': "search_document",
- 'truncate': "NONE"
- })
+ body = json.dumps(
+ {"texts": texts, "input_type": "search_document", "truncate": "NONE"}
+ )
response = await bedrock_async_client.invoke_model(
model=model,
body=body,
accept="application/json",
- contentType="application/json"
+ contentType="application/json",
)
- response_body = json.loads(response.get('body').read())
+ response_body = json.loads(response.get("body").read())
- embed_texts = response_body['embeddings']
+ embed_texts = response_body["embeddings"]
else:
raise ValueError(f"Model provider '{model_provider}' is not supported!")
@@ -345,12 +406,15 @@ async def bedrock_embedding(
async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
- input_ids = tokenizer(texts, return_tensors='pt', padding=True, truncation=True).input_ids
+ input_ids = tokenizer(
+ texts, return_tensors="pt", padding=True, truncation=True
+ ).input_ids
with torch.no_grad():
outputs = embed_model(input_ids)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
+
async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
embed_text = []
for text in texts:
@@ -359,11 +423,12 @@ async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
return embed_text
+
if __name__ == "__main__":
import asyncio
async def main():
- result = await gpt_4o_mini_complete('How are you?')
+ result = await gpt_4o_mini_complete("How are you?")
print(result)
asyncio.run(main())
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 930ceb2a..a0729cd8 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -25,6 +25,7 @@
)
from .prompt import GRAPH_FIELD_SEP, PROMPTS
+
def chunking_by_token_size(
content: str, overlap_token_size=128, max_token_size=1024, tiktoken_model="gpt-4o"
):
@@ -45,6 +46,7 @@ def chunking_by_token_size(
)
return results
+
async def _handle_entity_relation_summary(
entity_or_relation_name: str,
description: str,
@@ -229,9 +231,10 @@ async def _merge_edges_then_upsert(
description=description,
keywords=keywords,
)
-
+
return edge_data
+
async def extract_entities(
chunks: dict[str, TextChunkSchema],
knwoledge_graph_inst: BaseGraphStorage,
@@ -352,7 +355,9 @@ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
logger.warning("Didn't extract any entities, maybe your LLM is not working")
return None
if not len(all_relationships_data):
- logger.warning("Didn't extract any relationships, maybe your LLM is not working")
+ logger.warning(
+ "Didn't extract any relationships, maybe your LLM is not working"
+ )
return None
if entity_vdb is not None:
@@ -370,7 +375,10 @@ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
"src_id": dp["src_id"],
"tgt_id": dp["tgt_id"],
- "content": dp["keywords"] + dp["src_id"] + dp["tgt_id"] + dp["description"],
+ "content": dp["keywords"]
+ + dp["src_id"]
+ + dp["tgt_id"]
+ + dp["description"],
}
for dp in all_relationships_data
}
@@ -378,6 +386,7 @@ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
return knwoledge_graph_inst
+
async def local_query(
query,
knowledge_graph_inst: BaseGraphStorage,
@@ -393,19 +402,24 @@ async def local_query(
kw_prompt_temp = PROMPTS["keywords_extraction"]
kw_prompt = kw_prompt_temp.format(query=query)
result = await use_model_func(kw_prompt)
-
+
try:
keywords_data = json.loads(result)
keywords = keywords_data.get("low_level_keywords", [])
- keywords = ', '.join(keywords)
- except json.JSONDecodeError as e:
+ keywords = ", ".join(keywords)
+ except json.JSONDecodeError:
try:
- result = result.replace(kw_prompt[:-1],'').replace('user','').replace('model','').strip()
- result = '{' + result.split('{')[1].split('}')[0] + '}'
+ result = (
+ result.replace(kw_prompt[:-1], "")
+ .replace("user", "")
+ .replace("model", "")
+ .strip()
+ )
+ result = "{" + result.split("{")[1].split("}")[0] + "}"
keywords_data = json.loads(result)
keywords = keywords_data.get("low_level_keywords", [])
- keywords = ', '.join(keywords)
+ keywords = ", ".join(keywords)
# Handle parsing error
except json.JSONDecodeError as e:
print(f"JSON parsing error: {e}")
@@ -430,11 +444,20 @@ async def local_query(
query,
system_prompt=sys_prompt,
)
- if len(response)>len(sys_prompt):
- response = response.replace(sys_prompt,'').replace('user','').replace('model','').replace(query,'').replace('','').replace('','').strip()
-
+ if len(response) > len(sys_prompt):
+ response = (
+ response.replace(sys_prompt, "")
+ .replace("user", "")
+ .replace("model", "")
+ .replace(query, "")
+ .replace("", "")
+ .replace("", "")
+ .strip()
+ )
+
return response
+
async def _build_local_query_context(
query,
knowledge_graph_inst: BaseGraphStorage,
@@ -516,6 +539,7 @@ async def _build_local_query_context(
```
"""
+
async def _find_most_related_text_unit_from_entities(
node_datas: list[dict],
query_param: QueryParam,
@@ -576,6 +600,7 @@ async def _find_most_related_text_unit_from_entities(
all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
return all_text_units
+
async def _find_most_related_edges_from_entities(
node_datas: list[dict],
query_param: QueryParam,
@@ -609,6 +634,7 @@ async def _find_most_related_edges_from_entities(
)
return all_edges_data
+
async def global_query(
query,
knowledge_graph_inst: BaseGraphStorage,
@@ -624,20 +650,25 @@ async def global_query(
kw_prompt_temp = PROMPTS["keywords_extraction"]
kw_prompt = kw_prompt_temp.format(query=query)
result = await use_model_func(kw_prompt)
-
+
try:
keywords_data = json.loads(result)
keywords = keywords_data.get("high_level_keywords", [])
- keywords = ', '.join(keywords)
- except json.JSONDecodeError as e:
+ keywords = ", ".join(keywords)
+ except json.JSONDecodeError:
try:
- result = result.replace(kw_prompt[:-1],'').replace('user','').replace('model','').strip()
- result = '{' + result.split('{')[1].split('}')[0] + '}'
+ result = (
+ result.replace(kw_prompt[:-1], "")
+ .replace("user", "")
+ .replace("model", "")
+ .strip()
+ )
+ result = "{" + result.split("{")[1].split("}")[0] + "}"
keywords_data = json.loads(result)
keywords = keywords_data.get("high_level_keywords", [])
- keywords = ', '.join(keywords)
-
+ keywords = ", ".join(keywords)
+
except json.JSONDecodeError as e:
# Handle parsing error
print(f"JSON parsing error: {e}")
@@ -651,12 +682,12 @@ async def global_query(
text_chunks_db,
query_param,
)
-
+
if query_param.only_need_context:
return context
if context is None:
return PROMPTS["fail_response"]
-
+
sys_prompt_temp = PROMPTS["rag_response"]
sys_prompt = sys_prompt_temp.format(
context_data=context, response_type=query_param.response_type
@@ -665,11 +696,20 @@ async def global_query(
query,
system_prompt=sys_prompt,
)
- if len(response)>len(sys_prompt):
- response = response.replace(sys_prompt,'').replace('user','').replace('model','').replace(query,'').replace('','').replace('','').strip()
-
+ if len(response) > len(sys_prompt):
+ response = (
+ response.replace(sys_prompt, "")
+ .replace("user", "")
+ .replace("model", "")
+ .replace(query, "")
+ .replace("", "")
+ .replace("", "")
+ .strip()
+ )
+
return response
+
async def _build_global_query_context(
keywords,
knowledge_graph_inst: BaseGraphStorage,
@@ -679,14 +719,14 @@ async def _build_global_query_context(
query_param: QueryParam,
):
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
-
+
if not len(results):
return None
-
+
edge_datas = await asyncio.gather(
*[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
)
-
+
if not all([n is not None for n in edge_datas]):
logger.warning("Some edges are missing, maybe the storage is damaged")
edge_degree = await asyncio.gather(
@@ -765,6 +805,7 @@ async def _build_global_query_context(
```
"""
+
async def _find_most_related_entities_from_relationships(
edge_datas: list[dict],
query_param: QueryParam,
@@ -774,7 +815,7 @@ async def _find_most_related_entities_from_relationships(
for e in edge_datas:
entity_names.add(e["src_id"])
entity_names.add(e["tgt_id"])
-
+
node_datas = await asyncio.gather(
*[knowledge_graph_inst.get_node(entity_name) for entity_name in entity_names]
)
@@ -795,13 +836,13 @@ async def _find_most_related_entities_from_relationships(
return node_datas
+
async def _find_related_text_unit_from_relationships(
edge_datas: list[dict],
query_param: QueryParam,
text_chunks_db: BaseKVStorage[TextChunkSchema],
knowledge_graph_inst: BaseGraphStorage,
):
-
text_units = [
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
for dp in edge_datas
@@ -816,15 +857,13 @@ async def _find_related_text_unit_from_relationships(
"data": await text_chunks_db.get_by_id(c_id),
"order": index,
}
-
+
if any([v is None for v in all_text_units_lookup.values()]):
logger.warning("Text chunks are missing, maybe the storage is damaged")
all_text_units = [
{"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
]
- all_text_units = sorted(
- all_text_units, key=lambda x: x["order"]
- )
+ all_text_units = sorted(all_text_units, key=lambda x: x["order"])
all_text_units = truncate_list_by_token_size(
all_text_units,
key=lambda x: x["data"]["content"],
@@ -834,6 +873,7 @@ async def _find_related_text_unit_from_relationships(
return all_text_units
+
async def hybrid_query(
query,
knowledge_graph_inst: BaseGraphStorage,
@@ -849,24 +889,29 @@ async def hybrid_query(
kw_prompt_temp = PROMPTS["keywords_extraction"]
kw_prompt = kw_prompt_temp.format(query=query)
-
+
result = await use_model_func(kw_prompt)
try:
keywords_data = json.loads(result)
hl_keywords = keywords_data.get("high_level_keywords", [])
ll_keywords = keywords_data.get("low_level_keywords", [])
- hl_keywords = ', '.join(hl_keywords)
- ll_keywords = ', '.join(ll_keywords)
- except json.JSONDecodeError as e:
+ hl_keywords = ", ".join(hl_keywords)
+ ll_keywords = ", ".join(ll_keywords)
+ except json.JSONDecodeError:
try:
- result = result.replace(kw_prompt[:-1],'').replace('user','').replace('model','').strip()
- result = '{' + result.split('{')[1].split('}')[0] + '}'
+ result = (
+ result.replace(kw_prompt[:-1], "")
+ .replace("user", "")
+ .replace("model", "")
+ .strip()
+ )
+ result = "{" + result.split("{")[1].split("}")[0] + "}"
keywords_data = json.loads(result)
hl_keywords = keywords_data.get("high_level_keywords", [])
ll_keywords = keywords_data.get("low_level_keywords", [])
- hl_keywords = ', '.join(hl_keywords)
- ll_keywords = ', '.join(ll_keywords)
+ hl_keywords = ", ".join(hl_keywords)
+ ll_keywords = ", ".join(ll_keywords)
# Handle parsing error
except json.JSONDecodeError as e:
print(f"JSON parsing error: {e}")
@@ -897,7 +942,7 @@ async def hybrid_query(
return context
if context is None:
return PROMPTS["fail_response"]
-
+
sys_prompt_temp = PROMPTS["rag_response"]
sys_prompt = sys_prompt_temp.format(
context_data=context, response_type=query_param.response_type
@@ -906,53 +951,78 @@ async def hybrid_query(
query,
system_prompt=sys_prompt,
)
- if len(response)>len(sys_prompt):
- response = response.replace(sys_prompt,'').replace('user','').replace('model','').replace(query,'').replace('','').replace('','').strip()
+ if len(response) > len(sys_prompt):
+ response = (
+ response.replace(sys_prompt, "")
+ .replace("user", "")
+ .replace("model", "")
+ .replace(query, "")
+ .replace("", "")
+ .replace("", "")
+ .strip()
+ )
return response
+
def combine_contexts(high_level_context, low_level_context):
# Function to extract entities, relationships, and sources from context strings
def extract_sections(context):
- entities_match = re.search(r'-----Entities-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
- relationships_match = re.search(r'-----Relationships-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
- sources_match = re.search(r'-----Sources-----\s*```csv\s*(.*?)\s*```', context, re.DOTALL)
-
- entities = entities_match.group(1) if entities_match else ''
- relationships = relationships_match.group(1) if relationships_match else ''
- sources = sources_match.group(1) if sources_match else ''
-
+ entities_match = re.search(
+ r"-----Entities-----\s*```csv\s*(.*?)\s*```", context, re.DOTALL
+ )
+ relationships_match = re.search(
+ r"-----Relationships-----\s*```csv\s*(.*?)\s*```", context, re.DOTALL
+ )
+ sources_match = re.search(
+ r"-----Sources-----\s*```csv\s*(.*?)\s*```", context, re.DOTALL
+ )
+
+ entities = entities_match.group(1) if entities_match else ""
+ relationships = relationships_match.group(1) if relationships_match else ""
+ sources = sources_match.group(1) if sources_match else ""
+
return entities, relationships, sources
-
+
# Extract sections from both contexts
- if high_level_context==None:
- warnings.warn("High Level context is None. Return empty High entity/relationship/source")
- hl_entities, hl_relationships, hl_sources = '','',''
+ if high_level_context is None:
+ warnings.warn(
+ "High Level context is None. Return empty High entity/relationship/source"
+ )
+ hl_entities, hl_relationships, hl_sources = "", "", ""
else:
hl_entities, hl_relationships, hl_sources = extract_sections(high_level_context)
-
- if low_level_context==None:
- warnings.warn("Low Level context is None. Return empty Low entity/relationship/source")
- ll_entities, ll_relationships, ll_sources = '','',''
+ if low_level_context is None:
+ warnings.warn(
+ "Low Level context is None. Return empty Low entity/relationship/source"
+ )
+ ll_entities, ll_relationships, ll_sources = "", "", ""
else:
ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
-
-
# Combine and deduplicate the entities
- combined_entities_set = set(filter(None, hl_entities.strip().split('\n') + ll_entities.strip().split('\n')))
- combined_entities = '\n'.join(combined_entities_set)
-
+ combined_entities_set = set(
+ filter(None, hl_entities.strip().split("\n") + ll_entities.strip().split("\n"))
+ )
+ combined_entities = "\n".join(combined_entities_set)
+
# Combine and deduplicate the relationships
- combined_relationships_set = set(filter(None, hl_relationships.strip().split('\n') + ll_relationships.strip().split('\n')))
- combined_relationships = '\n'.join(combined_relationships_set)
-
+ combined_relationships_set = set(
+ filter(
+ None,
+ hl_relationships.strip().split("\n") + ll_relationships.strip().split("\n"),
+ )
+ )
+ combined_relationships = "\n".join(combined_relationships_set)
+
# Combine and deduplicate the sources
- combined_sources_set = set(filter(None, hl_sources.strip().split('\n') + ll_sources.strip().split('\n')))
- combined_sources = '\n'.join(combined_sources_set)
-
+ combined_sources_set = set(
+ filter(None, hl_sources.strip().split("\n") + ll_sources.strip().split("\n"))
+ )
+ combined_sources = "\n".join(combined_sources_set)
+
# Format the combined context
return f"""
-----Entities-----
@@ -964,6 +1034,7 @@ def extract_sections(context):
{combined_sources}
"""
+
async def naive_query(
query,
chunks_vdb: BaseVectorStorage,
@@ -996,8 +1067,16 @@ async def naive_query(
system_prompt=sys_prompt,
)
- if len(response)>len(sys_prompt):
- response = response[len(sys_prompt):].replace(sys_prompt,'').replace('user','').replace('model','').replace(query,'').replace('','').replace('','').strip()
-
- return response
+ if len(response) > len(sys_prompt):
+ response = (
+ response[len(sys_prompt) :]
+ .replace(sys_prompt, "")
+ .replace("user", "")
+ .replace("model", "")
+ .replace(query, "")
+ .replace("", "")
+ .replace("", "")
+ .strip()
+ )
+ return response
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 5d28e49c..6bd9b638 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -9,9 +9,7 @@
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
-PROMPTS[
- "entity_extraction"
-] = """-Goal-
+PROMPTS["entity_extraction"] = """-Goal-
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
-Steps-
@@ -32,7 +30,7 @@
3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
Format the content-level key words as ("content_keywords"{tuple_delimiter})
-
+
4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
5. When finished, output {completion_delimiter}
@@ -146,9 +144,7 @@
PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
-PROMPTS[
- "rag_response"
-] = """---Role---
+PROMPTS["rag_response"] = """---Role---
You are a helpful assistant responding to questions about data in the tables provided.
@@ -241,9 +237,7 @@
"""
-PROMPTS[
- "naive_rag_response"
-] = """You're a helpful assistant
+PROMPTS["naive_rag_response"] = """You're a helpful assistant
Below are the knowledge you know:
{content_data}
---
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 2f2bb7d8..1f22fc56 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -1,16 +1,11 @@
import asyncio
import html
-import json
import os
-from collections import defaultdict
-from dataclasses import dataclass, field
+from dataclasses import dataclass
from typing import Any, Union, cast
-import pickle
-import hnswlib
import networkx as nx
import numpy as np
from nano_vectordb import NanoVectorDB
-import xxhash
from .utils import load_json, logger, write_json
from .base import (
@@ -19,6 +14,7 @@
BaseVectorStorage,
)
+
@dataclass
class JsonKVStorage(BaseKVStorage):
def __post_init__(self):
@@ -59,12 +55,12 @@ async def upsert(self, data: dict[str, dict]):
async def drop(self):
self._data = {}
+
@dataclass
class NanoVectorDBStorage(BaseVectorStorage):
cosine_better_than_threshold: float = 0.2
def __post_init__(self):
-
self._client_file_name = os.path.join(
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
)
@@ -118,6 +114,7 @@ async def query(self, query: str, top_k=5):
async def index_done_callback(self):
self._client.save()
+
@dataclass
class NetworkXStorage(BaseGraphStorage):
@staticmethod
@@ -142,7 +139,9 @@ def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
graph = graph.copy()
graph = cast(nx.Graph, largest_connected_component(graph))
- node_mapping = {node: html.unescape(node.upper().strip()) for node in graph.nodes()} # type: ignore
+ node_mapping = {
+ node: html.unescape(node.upper().strip()) for node in graph.nodes()
+ } # type: ignore
graph = nx.relabel_nodes(graph, node_mapping)
return NetworkXStorage._stabilize_graph(graph)
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 9496cf34..67d094c6 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -16,18 +16,22 @@
logger = logging.getLogger("lightrag")
+
def set_logger(log_file: str):
logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+ )
file_handler.setFormatter(formatter)
if not logger.handlers:
logger.addHandler(file_handler)
+
@dataclass
class EmbeddingFunc:
embedding_dim: int
@@ -36,7 +40,8 @@ class EmbeddingFunc:
async def __call__(self, *args, **kwargs) -> np.ndarray:
return await self.func(*args, **kwargs)
-
+
+
def locate_json_string_body_from_string(content: str) -> Union[str, None]:
"""Locate the JSON string body from a string"""
maybe_json_str = re.search(r"{.*}", content, re.DOTALL)
@@ -45,6 +50,7 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
else:
return None
+
def convert_response_to_json(response: str) -> dict:
json_str = locate_json_string_body_from_string(response)
assert json_str is not None, f"Unable to parse JSON from response: {response}"
@@ -55,12 +61,15 @@ def convert_response_to_json(response: str) -> dict:
logger.error(f"Failed to parse JSON: {json_str}")
raise e from None
+
def compute_args_hash(*args):
return md5(str(args).encode()).hexdigest()
+
def compute_mdhash_id(content, prefix: str = ""):
return prefix + md5(content.encode()).hexdigest()
+
def limit_async_func_call(max_size: int, waitting_time: float = 0.0001):
"""Add restriction of maximum async calling times for a async func"""
@@ -82,6 +91,7 @@ async def wait_func(*args, **kwargs):
return final_decro
+
def wrap_embedding_func_with_attrs(**kwargs):
"""Wrap a function with attributes"""
@@ -91,16 +101,19 @@ def final_decro(func) -> EmbeddingFunc:
return final_decro
+
def load_json(file_name):
if not os.path.exists(file_name):
return None
with open(file_name, encoding="utf-8") as f:
return json.load(f)
+
def write_json(json_obj, file_name):
with open(file_name, "w", encoding="utf-8") as f:
json.dump(json_obj, f, indent=2, ensure_ascii=False)
+
def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
global ENCODER
if ENCODER is None:
@@ -116,12 +129,14 @@ def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
content = ENCODER.decode(tokens)
return content
+
def pack_user_ass_to_openai_messages(*args: str):
roles = ["user", "assistant"]
return [
{"role": roles[i % 2], "content": content} for i, content in enumerate(args)
]
+
def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]:
"""Split a string by multiple markers"""
if not markers:
@@ -129,6 +144,7 @@ def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]
results = re.split("|".join(re.escape(marker) for marker in markers), content)
return [r.strip() for r in results if r.strip()]
+
# Refer the utils functions of the official GraphRAG implementation:
# https://github.com/microsoft/graphrag
def clean_str(input: Any) -> str:
@@ -141,9 +157,11 @@ def clean_str(input: Any) -> str:
# https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
+
def is_float_regex(value):
return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
+
def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: int):
"""Truncate a list of data by token size"""
if max_token_size <= 0:
@@ -155,11 +173,13 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
return list_data[:i]
return list_data
+
def list_of_list_to_csv(data: list[list]):
return "\n".join(
[",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
)
+
def save_data_to_file(data, file_name):
- with open(file_name, 'w', encoding='utf-8') as f:
- json.dump(data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
+ with open(file_name, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=4)
diff --git a/reproduce/Step_0.py b/reproduce/Step_0.py
index 9053aa40..2d97bd14 100644
--- a/reproduce/Step_0.py
+++ b/reproduce/Step_0.py
@@ -3,11 +3,11 @@
import glob
import argparse
-def extract_unique_contexts(input_directory, output_directory):
+def extract_unique_contexts(input_directory, output_directory):
os.makedirs(output_directory, exist_ok=True)
- jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
+ jsonl_files = glob.glob(os.path.join(input_directory, "*.jsonl"))
print(f"Found {len(jsonl_files)} JSONL files.")
for file_path in jsonl_files:
@@ -21,18 +21,20 @@ def extract_unique_contexts(input_directory, output_directory):
print(f"Processing file: {filename}")
try:
- with open(file_path, 'r', encoding='utf-8') as infile:
+ with open(file_path, "r", encoding="utf-8") as infile:
for line_number, line in enumerate(infile, start=1):
line = line.strip()
if not line:
continue
try:
json_obj = json.loads(line)
- context = json_obj.get('context')
+ context = json_obj.get("context")
if context and context not in unique_contexts_dict:
unique_contexts_dict[context] = None
except json.JSONDecodeError as e:
- print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
+ print(
+ f"JSON decoding error in file {filename} at line {line_number}: {e}"
+ )
except FileNotFoundError:
print(f"File not found: {filename}")
continue
@@ -41,10 +43,12 @@ def extract_unique_contexts(input_directory, output_directory):
continue
unique_contexts_list = list(unique_contexts_dict.keys())
- print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
+ print(
+ f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}."
+ )
try:
- with open(output_path, 'w', encoding='utf-8') as outfile:
+ with open(output_path, "w", encoding="utf-8") as outfile:
json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
print(f"Unique `context` entries have been saved to: {output_filename}")
except Exception as e:
@@ -55,8 +59,10 @@ def extract_unique_contexts(input_directory, output_directory):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- parser.add_argument('-i', '--input_dir', type=str, default='../datasets')
- parser.add_argument('-o', '--output_dir', type=str, default='../datasets/unique_contexts')
+ parser.add_argument("-i", "--input_dir", type=str, default="../datasets")
+ parser.add_argument(
+ "-o", "--output_dir", type=str, default="../datasets/unique_contexts"
+ )
args = parser.parse_args()
diff --git a/reproduce/Step_1.py b/reproduce/Step_1.py
index 08e497cb..43c44056 100644
--- a/reproduce/Step_1.py
+++ b/reproduce/Step_1.py
@@ -4,10 +4,11 @@
from lightrag import LightRAG
+
def insert_text(rag, file_path):
- with open(file_path, mode='r') as f:
+ with open(file_path, mode="r") as f:
unique_contexts = json.load(f)
-
+
retries = 0
max_retries = 3
while retries < max_retries:
@@ -21,6 +22,7 @@ def insert_text(rag, file_path):
if retries == max_retries:
print("Insertion failed after exceeding the maximum number of retries")
+
cls = "agriculture"
WORKING_DIR = "../{cls}"
@@ -29,4 +31,4 @@ def insert_text(rag, file_path):
rag = LightRAG(working_dir=WORKING_DIR)
-insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")
\ No newline at end of file
+insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")
diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py
index b5c6aef3..8e67cfb8 100644
--- a/reproduce/Step_1_openai_compatible.py
+++ b/reproduce/Step_1_openai_compatible.py
@@ -7,6 +7,7 @@
from lightrag.utils import EmbeddingFunc
from lightrag.llm import openai_complete_if_cache, openai_embedding
+
## For Upstage API
# please check if embedding_dim=4096 in lightrag.py and llm.py in lightrag direcotry
async def llm_model_func(
@@ -19,22 +20,26 @@ async def llm_model_func(
history_messages=history_messages,
api_key=os.getenv("UPSTAGE_API_KEY"),
base_url="https://api.upstage.ai/v1/solar",
- **kwargs
+ **kwargs,
)
+
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
model="solar-embedding-1-large-query",
api_key=os.getenv("UPSTAGE_API_KEY"),
- base_url="https://api.upstage.ai/v1/solar"
+ base_url="https://api.upstage.ai/v1/solar",
)
+
+
## /For Upstage API
+
def insert_text(rag, file_path):
- with open(file_path, mode='r') as f:
+ with open(file_path, mode="r") as f:
unique_contexts = json.load(f)
-
+
retries = 0
max_retries = 3
while retries < max_retries:
@@ -48,19 +53,19 @@ def insert_text(rag, file_path):
if retries == max_retries:
print("Insertion failed after exceeding the maximum number of retries")
+
cls = "mix"
WORKING_DIR = f"../{cls}"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
-rag = LightRAG(working_dir=WORKING_DIR,
- llm_model_func=llm_model_func,
- embedding_func=EmbeddingFunc(
- embedding_dim=4096,
- max_token_size=8192,
- func=embedding_func
- )
- )
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=4096, max_token_size=8192, func=embedding_func
+ ),
+)
insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")
diff --git a/reproduce/Step_2.py b/reproduce/Step_2.py
index b00c19b8..557c7714 100644
--- a/reproduce/Step_2.py
+++ b/reproduce/Step_2.py
@@ -1,8 +1,8 @@
-import os
import json
from openai import OpenAI
from transformers import GPT2Tokenizer
+
def openai_complete_if_cache(
model="gpt-4o", prompt=None, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -19,24 +19,26 @@ def openai_complete_if_cache(
)
return response.choices[0].message.content
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+
def get_summary(context, tot_tokens=2000):
tokens = tokenizer.tokenize(context)
half_tokens = tot_tokens // 2
- start_tokens = tokens[1000:1000 + half_tokens]
- end_tokens = tokens[-(1000 + half_tokens):1000]
+ start_tokens = tokens[1000 : 1000 + half_tokens]
+ end_tokens = tokens[-(1000 + half_tokens) : 1000]
summary_tokens = start_tokens + end_tokens
summary = tokenizer.convert_tokens_to_string(summary_tokens)
-
+
return summary
-clses = ['agriculture']
+clses = ["agriculture"]
for cls in clses:
- with open(f'../datasets/unique_contexts/{cls}_unique_contexts.json', mode='r') as f:
+ with open(f"../datasets/unique_contexts/{cls}_unique_contexts.json", mode="r") as f:
unique_contexts = json.load(f)
summaries = [get_summary(context) for context in unique_contexts]
@@ -67,10 +69,10 @@ def get_summary(context, tot_tokens=2000):
...
"""
- result = openai_complete_if_cache(model='gpt-4o', prompt=prompt)
+ result = openai_complete_if_cache(model="gpt-4o", prompt=prompt)
file_path = f"../datasets/questions/{cls}_questions.txt"
with open(file_path, "w") as file:
file.write(result)
- print(f"{cls}_questions written to {file_path}")
\ No newline at end of file
+ print(f"{cls}_questions written to {file_path}")
diff --git a/reproduce/Step_3.py b/reproduce/Step_3.py
index a79ebd17..a56190fc 100644
--- a/reproduce/Step_3.py
+++ b/reproduce/Step_3.py
@@ -4,16 +4,18 @@
from lightrag import LightRAG, QueryParam
from tqdm import tqdm
+
def extract_queries(file_path):
- with open(file_path, 'r') as f:
+ with open(file_path, "r") as f:
data = f.read()
-
- data = data.replace('**', '')
- queries = re.findall(r'- Question \d+: (.+)', data)
+ data = data.replace("**", "")
+
+ queries = re.findall(r"- Question \d+: (.+)", data)
return queries
+
async def process_query(query_text, rag_instance, query_param):
try:
result, context = await rag_instance.aquery(query_text, param=query_param)
@@ -21,6 +23,7 @@ async def process_query(query_text, rag_instance, query_param):
except Exception as e:
return None, {"query": query_text, "error": str(e)}
+
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
try:
loop = asyncio.get_event_loop()
@@ -29,15 +32,22 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
asyncio.set_event_loop(loop)
return loop
-def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file, error_file):
+
+def run_queries_and_save_to_json(
+ queries, rag_instance, query_param, output_file, error_file
+):
loop = always_get_an_event_loop()
- with open(output_file, 'a', encoding='utf-8') as result_file, open(error_file, 'a', encoding='utf-8') as err_file:
+ with open(output_file, "a", encoding="utf-8") as result_file, open(
+ error_file, "a", encoding="utf-8"
+ ) as err_file:
result_file.write("[\n")
first_entry = True
for query_text in tqdm(queries, desc="Processing queries", unit="query"):
- result, error = loop.run_until_complete(process_query(query_text, rag_instance, query_param))
+ result, error = loop.run_until_complete(
+ process_query(query_text, rag_instance, query_param)
+ )
if result:
if not first_entry:
@@ -50,6 +60,7 @@ def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file
result_file.write("\n]")
+
if __name__ == "__main__":
cls = "agriculture"
mode = "hybrid"
@@ -59,4 +70,6 @@ def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file
query_param = QueryParam(mode=mode)
queries = extract_queries(f"../datasets/questions/{cls}_questions.txt")
- run_queries_and_save_to_json(queries, rag, query_param, f"{cls}_result.json", f"{cls}_errors.json")
+ run_queries_and_save_to_json(
+ queries, rag, query_param, f"{cls}_result.json", f"{cls}_errors.json"
+ )
diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py
index 7b3079a9..2be5ea5c 100644
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@@ -8,6 +8,7 @@
from lightrag.utils import EmbeddingFunc
import numpy as np
+
## For Upstage API
# please check if embedding_dim=4096 in lightrag.py and llm.py in lightrag direcotry
async def llm_model_func(
@@ -20,28 +21,33 @@ async def llm_model_func(
history_messages=history_messages,
api_key=os.getenv("UPSTAGE_API_KEY"),
base_url="https://api.upstage.ai/v1/solar",
- **kwargs
+ **kwargs,
)
+
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
model="solar-embedding-1-large-query",
api_key=os.getenv("UPSTAGE_API_KEY"),
- base_url="https://api.upstage.ai/v1/solar"
+ base_url="https://api.upstage.ai/v1/solar",
)
+
+
## /For Upstage API
+
def extract_queries(file_path):
- with open(file_path, 'r') as f:
+ with open(file_path, "r") as f:
data = f.read()
-
- data = data.replace('**', '')
- queries = re.findall(r'- Question \d+: (.+)', data)
+ data = data.replace("**", "")
+
+ queries = re.findall(r"- Question \d+: (.+)", data)
return queries
+
async def process_query(query_text, rag_instance, query_param):
try:
result, context = await rag_instance.aquery(query_text, param=query_param)
@@ -49,6 +55,7 @@ async def process_query(query_text, rag_instance, query_param):
except Exception as e:
return None, {"query": query_text, "error": str(e)}
+
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
try:
loop = asyncio.get_event_loop()
@@ -57,15 +64,22 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
asyncio.set_event_loop(loop)
return loop
-def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file, error_file):
+
+def run_queries_and_save_to_json(
+ queries, rag_instance, query_param, output_file, error_file
+):
loop = always_get_an_event_loop()
- with open(output_file, 'a', encoding='utf-8') as result_file, open(error_file, 'a', encoding='utf-8') as err_file:
+ with open(output_file, "a", encoding="utf-8") as result_file, open(
+ error_file, "a", encoding="utf-8"
+ ) as err_file:
result_file.write("[\n")
first_entry = True
for query_text in tqdm(queries, desc="Processing queries", unit="query"):
- result, error = loop.run_until_complete(process_query(query_text, rag_instance, query_param))
+ result, error = loop.run_until_complete(
+ process_query(query_text, rag_instance, query_param)
+ )
if result:
if not first_entry:
@@ -78,22 +92,24 @@ def run_queries_and_save_to_json(queries, rag_instance, query_param, output_file
result_file.write("\n]")
+
if __name__ == "__main__":
cls = "mix"
mode = "hybrid"
WORKING_DIR = f"../{cls}"
rag = LightRAG(working_dir=WORKING_DIR)
- rag = LightRAG(working_dir=WORKING_DIR,
- llm_model_func=llm_model_func,
- embedding_func=EmbeddingFunc(
- embedding_dim=4096,
- max_token_size=8192,
- func=embedding_func
- )
- )
+ rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=4096, max_token_size=8192, func=embedding_func
+ ),
+ )
query_param = QueryParam(mode=mode)
- base_dir='../datasets/questions'
+ base_dir = "../datasets/questions"
queries = extract_queries(f"{base_dir}/{cls}_questions.txt")
- run_queries_and_save_to_json(queries, rag, query_param, f"{base_dir}/result.json", f"{base_dir}/errors.json")
+ run_queries_and_save_to_json(
+ queries, rag, query_param, f"{base_dir}/result.json", f"{base_dir}/errors.json"
+ )
diff --git a/requirements.txt b/requirements.txt
index a1054692..d5479dab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,13 @@
+accelerate
aioboto3
-openai
-tiktoken
-networkx
graspologic
-nano-vectordb
hnswlib
-xxhash
+nano-vectordb
+networkx
+ollama
+openai
tenacity
-transformers
+tiktoken
torch
-ollama
-accelerate
\ No newline at end of file
+transformers
+xxhash
From 4945027dc025c73763ecc271017152273a81d86d Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sat, 19 Oct 2024 21:35:50 +0800
Subject: [PATCH 059/258] Update README.md
---
README.md | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index a3e5c1b4..e2f7e81a 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
Using Open AI-like APIs
-LightRAG also supports Open AI-like chat/embeddings APIs:
+* LightRAG also supports Open AI-like chat/embeddings APIs:
```python
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs
@@ -130,7 +130,7 @@ rag = LightRAG(
Using Hugging Face Models
-If you want to use Hugging Face models, you only need to set LightRAG as follows:
+* If you want to use Hugging Face models, you only need to set LightRAG as follows:
```python
from lightrag.llm import hf_model_complete, hf_embedding
from transformers import AutoModel, AutoTokenizer
@@ -156,7 +156,8 @@ rag = LightRAG(
Using Ollama Models
-If you want to use Ollama models, you only need to set LightRAG as follows:
+
+* If you want to use Ollama models, you only need to set LightRAG as follows:
```python
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -177,6 +178,29 @@ rag = LightRAG(
),
)
```
+
+* Increasing the `num_ctx` parameter:
+
+1. Pull the model:
+```python
+ollama pull qwen2
+```
+
+2. Display the model file:
+```python
+ollama show --modelfile qwen2 > Modelfile
+```
+
+3. Edit the Modelfile by adding the following line:
+```python
+PARAMETER num_ctx 32768
+```
+
+4. Create the modified model:
+```python
+ollama create -f Modelfile qwen2m
+```
+
### Batch Insert
@@ -441,6 +465,8 @@ def extract_queries(file_path):
├── examples
│ ├── batch_eval.py
│ ├── generate_query.py
+│ ├── lightrag_azure_openai_demo.py
+│ ├── lightrag_bedrock_demo.py
│ ├── lightrag_hf_demo.py
│ ├── lightrag_ollama_demo.py
│ ├── lightrag_openai_compatible_demo.py
@@ -459,6 +485,8 @@ def extract_queries(file_path):
│ ├── Step_1.py
│ ├── Step_2.py
│ └── Step_3.py
+├── .gitignore
+├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── requirements.txt
From 263cde887156fa2d6108fa8463fdfd16b4b52fb1 Mon Sep 17 00:00:00 2001
From: nongbin
Date: Sun, 20 Oct 2024 09:55:52 +0800
Subject: [PATCH 060/258] add visualizing graph
---
.gitignore | 1 +
.idea/.gitignore | 8 ++++
.idea/LightRAG.iml | 12 ++++++
.idea/inspectionProfiles/Project_Default.xml | 38 +++++++++++++++++++
.../inspectionProfiles/profiles_settings.xml | 6 +++
.idea/misc.xml | 7 ++++
.idea/modules.xml | 8 ++++
.idea/vcs.xml | 6 +++
examples/graph_visual.py | 14 +++++++
requirements.txt | 1 +
10 files changed, 101 insertions(+)
create mode 100644 .idea/.gitignore
create mode 100644 .idea/LightRAG.iml
create mode 100644 .idea/inspectionProfiles/Project_Default.xml
create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/vcs.xml
create mode 100644 examples/graph_visual.py
diff --git a/.gitignore b/.gitignore
index 50f384ec..208668c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ __pycache__
dickens/
book.txt
lightrag-dev/
+*.idea
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 00000000..13566b81
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/LightRAG.iml b/.idea/LightRAG.iml
new file mode 100644
index 00000000..8b8c3954
--- /dev/null
+++ b/.idea/LightRAG.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 00000000..c41eaf20
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 00000000..105ce2da
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 00000000..676ac0f0
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..145d7086
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..35eb1ddf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/graph_visual.py b/examples/graph_visual.py
new file mode 100644
index 00000000..72c72bad
--- /dev/null
+++ b/examples/graph_visual.py
@@ -0,0 +1,14 @@
+import networkx as nx
+from pyvis.network import Network
+
+# Load the GraphML file
+G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
+
+# Create a Pyvis network
+net = Network(notebook=True)
+
+# Convert NetworkX graph to Pyvis network
+net.from_nx(G)
+
+# Save and display the network
+net.show('knowledge_graph.html')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index d5479dab..9cc5b7e9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ tiktoken
torch
transformers
xxhash
+pyvis
\ No newline at end of file
From a7e43406a5d6113a5a0483b187652c74868a21b2 Mon Sep 17 00:00:00 2001
From: nongbin
Date: Sun, 20 Oct 2024 09:57:14 +0800
Subject: [PATCH 061/258] delete not used files
---
.idea/.gitignore | 8 ----
.idea/LightRAG.iml | 12 ------
.idea/inspectionProfiles/Project_Default.xml | 38 -------------------
.../inspectionProfiles/profiles_settings.xml | 6 ---
.idea/misc.xml | 7 ----
.idea/modules.xml | 8 ----
.idea/vcs.xml | 6 ---
7 files changed, 85 deletions(-)
delete mode 100644 .idea/.gitignore
delete mode 100644 .idea/LightRAG.iml
delete mode 100644 .idea/inspectionProfiles/Project_Default.xml
delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml
delete mode 100644 .idea/misc.xml
delete mode 100644 .idea/modules.xml
delete mode 100644 .idea/vcs.xml
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 13566b81..00000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/.idea/LightRAG.iml b/.idea/LightRAG.iml
deleted file mode 100644
index 8b8c3954..00000000
--- a/.idea/LightRAG.iml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index c41eaf20..00000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2da..00000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 676ac0f0..00000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 145d7086..00000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 35eb1ddf..00000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
From c6585ff89f858b8d39de3eb5d4b71d59a0771a47 Mon Sep 17 00:00:00 2001
From: nongbin
Date: Sun, 20 Oct 2024 10:04:34 +0800
Subject: [PATCH 062/258] ignore idea files
---
.gitignore | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 208668c5..edfbfbfc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,4 @@ __pycache__
dickens/
book.txt
lightrag-dev/
-*.idea
\ No newline at end of file
+.idea/
\ No newline at end of file
From 347e8a97be3ee4e1b87ad0b16f7060e4643132a6 Mon Sep 17 00:00:00 2001
From: hanbin49 <554066527@qq.com>
Date: Sun, 20 Oct 2024 11:27:47 +0800
Subject: [PATCH 063/258] 'update'
---
examples/vram_management_demo.py | 82 ++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)
create mode 100644 examples/vram_management_demo.py
diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py
new file mode 100644
index 00000000..505e4761
--- /dev/null
+++ b/examples/vram_management_demo.py
@@ -0,0 +1,82 @@
+import os
+import time
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import ollama_model_complete, ollama_embedding
+from lightrag.utils import EmbeddingFunc
+
+# 工作目录和文本文件目录路径
+WORKING_DIR = "./dickens"
+TEXT_FILES_DIR = "/llm/mt"
+
+# 如果工作目录不存在,则创建该目录
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+# 初始化 LightRAG
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete,
+ llm_model_name="qwen2.5:3b-instruct-max-context",
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(texts, embed_model="nomic-embed-text"),
+ ),
+)
+
+# 读取 TEXT_FILES_DIR 目录下所有的 .txt 文件
+texts = []
+for filename in os.listdir(TEXT_FILES_DIR):
+ if filename.endswith('.txt'):
+ file_path = os.path.join(TEXT_FILES_DIR, filename)
+ with open(file_path, 'r', encoding='utf-8') as file:
+ texts.append(file.read())
+
+# 批量插入文本到 LightRAG,带有重试机制
+def insert_texts_with_retry(rag, texts, retries=3, delay=5):
+ for _ in range(retries):
+ try:
+ rag.insert(texts)
+ return
+ except Exception as e:
+ print(f"Error occurred during insertion: {e}. Retrying in {delay} seconds...")
+ time.sleep(delay)
+ raise RuntimeError("Failed to insert texts after multiple retries.")
+
+insert_texts_with_retry(rag, texts)
+
+# 执行不同类型的查询,并处理潜在的错误
+try:
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+except Exception as e:
+ print(f"Error performing naive search: {e}")
+
+try:
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+except Exception as e:
+ print(f"Error performing local search: {e}")
+
+try:
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+except Exception as e:
+ print(f"Error performing global search: {e}")
+
+try:
+ print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+except Exception as e:
+ print(f"Error performing hybrid search: {e}")
+
+# 清理 VRAM 资源的函数
+def clear_vram():
+ os.system("sudo nvidia-smi --gpu-reset")
+
+# 定期清理 VRAM 以防止溢出
+clear_vram_interval = 3600 # 每小时清理一次
+start_time = time.time()
+
+while True:
+ current_time = time.time()
+ if current_time - start_time > clear_vram_interval:
+ clear_vram()
+ start_time = current_time
+ time.sleep(60) # 每分钟检查一次时间
\ No newline at end of file
From a716e628e370719e0fdcb847e4cd9b4212cc72eb Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sun, 20 Oct 2024 18:08:49 +0800
Subject: [PATCH 064/258] Add vram_management_demo.py
---
examples/vram_management_demo.py | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py
index 505e4761..ec750254 100644
--- a/examples/vram_management_demo.py
+++ b/examples/vram_management_demo.py
@@ -4,15 +4,15 @@
from lightrag.llm import ollama_model_complete, ollama_embedding
from lightrag.utils import EmbeddingFunc
-# 工作目录和文本文件目录路径
+# Working directory and the directory path for text files
WORKING_DIR = "./dickens"
TEXT_FILES_DIR = "/llm/mt"
-# 如果工作目录不存在,则创建该目录
+# Create the working directory if it doesn't exist
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
-# 初始化 LightRAG
+# Initialize LightRAG
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete,
@@ -24,7 +24,7 @@
),
)
-# 读取 TEXT_FILES_DIR 目录下所有的 .txt 文件
+# Read all .txt files from the TEXT_FILES_DIR directory
texts = []
for filename in os.listdir(TEXT_FILES_DIR):
if filename.endswith('.txt'):
@@ -32,7 +32,7 @@
with open(file_path, 'r', encoding='utf-8') as file:
texts.append(file.read())
-# 批量插入文本到 LightRAG,带有重试机制
+# Batch insert texts into LightRAG with a retry mechanism
def insert_texts_with_retry(rag, texts, retries=3, delay=5):
for _ in range(retries):
try:
@@ -45,7 +45,7 @@ def insert_texts_with_retry(rag, texts, retries=3, delay=5):
insert_texts_with_retry(rag, texts)
-# 执行不同类型的查询,并处理潜在的错误
+# Perform different types of queries and handle potential errors
try:
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
except Exception as e:
@@ -66,12 +66,12 @@ def insert_texts_with_retry(rag, texts, retries=3, delay=5):
except Exception as e:
print(f"Error performing hybrid search: {e}")
-# 清理 VRAM 资源的函数
+# Function to clear VRAM resources
def clear_vram():
os.system("sudo nvidia-smi --gpu-reset")
-# 定期清理 VRAM 以防止溢出
-clear_vram_interval = 3600 # 每小时清理一次
+# Regularly clear VRAM to prevent overflow
+clear_vram_interval = 3600 # Clear once every hour
start_time = time.time()
while True:
@@ -79,4 +79,4 @@ def clear_vram():
if current_time - start_time > clear_vram_interval:
clear_vram()
start_time = current_time
- time.sleep(60) # 每分钟检查一次时间
\ No newline at end of file
+ time.sleep(60) # Check the time every minute
From ae4aafb525b2366499b1d9cf5dd2e92731464569 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sun, 20 Oct 2024 18:10:00 +0800
Subject: [PATCH 065/258] Update README.md
---
README.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index e2f7e81a..bf996f82 100644
--- a/README.md
+++ b/README.md
@@ -470,7 +470,8 @@ def extract_queries(file_path):
│ ├── lightrag_hf_demo.py
│ ├── lightrag_ollama_demo.py
│ ├── lightrag_openai_compatible_demo.py
-│ └── lightrag_openai_demo.py
+│ ├── lightrag_openai_demo.py
+│ └── vram_management_demo.py
├── lightrag
│ ├── __init__.py
│ ├── base.py
From c800fa48435fab8d2aca945e68d5f9f52c988f9e Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sun, 20 Oct 2024 18:22:43 +0800
Subject: [PATCH 066/258] Update README.md
---
README.md | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/README.md b/README.md
index bf996f82..c8d6e312 100644
--- a/README.md
+++ b/README.md
@@ -218,6 +218,26 @@ rag = LightRAG(working_dir="./dickens")
with open("./newText.txt") as f:
rag.insert(f.read())
```
+
+### Graph Visualization
+
+* Generate html file
+```python
+import networkx as nx
+from pyvis.network import Network
+
+# Load the GraphML file
+G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
+
+# Create a Pyvis network
+net = Network(notebook=True)
+
+# Convert NetworkX graph to Pyvis network
+net.from_nx(G)
+
+# Save and display the network
+net.show('knowledge_graph.html')
+```
## Evaluation
### Dataset
The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
@@ -465,6 +485,7 @@ def extract_queries(file_path):
├── examples
│ ├── batch_eval.py
│ ├── generate_query.py
+│ ├── graph_visual.py
│ ├── lightrag_azure_openai_demo.py
│ ├── lightrag_bedrock_demo.py
│ ├── lightrag_hf_demo.py
From f400b02b0f23401907a1aab004ab7bbc39615364 Mon Sep 17 00:00:00 2001
From: nongbin
Date: Sun, 20 Oct 2024 21:17:09 +0800
Subject: [PATCH 067/258] make graph visualization become colorful
---
examples/graph_visual.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/examples/graph_visual.py b/examples/graph_visual.py
index 72c72bad..b455e6de 100644
--- a/examples/graph_visual.py
+++ b/examples/graph_visual.py
@@ -1,5 +1,6 @@
import networkx as nx
from pyvis.network import Network
+import random
# Load the GraphML file
G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
@@ -10,5 +11,9 @@
# Convert NetworkX graph to Pyvis network
net.from_nx(G)
+# Add colors to nodes
+for node in net.nodes:
+ node['color'] = "#{:06x}".format(random.randint(0, 0xFFFFFF))
+
# Save and display the network
net.show('knowledge_graph.html')
\ No newline at end of file
From 8e9005baad5a3fba1324ddc9e11060f00e9a1b29 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Sun, 20 Oct 2024 23:08:26 +0800
Subject: [PATCH 068/258] Add visualization methods
---
.gitignore | 3 +-
README.md | 141 +++++++++++++++++-
...ph_visual.py => graph_visual_with_html.py} | 0
examples/graph_visual_with_neo4j.py | 118 +++++++++++++++
lightrag/utils.py | 49 ++++++
5 files changed, 308 insertions(+), 3 deletions(-)
rename examples/{graph_visual.py => graph_visual_with_html.py} (100%)
create mode 100644 examples/graph_visual_with_neo4j.py
diff --git a/.gitignore b/.gitignore
index edfbfbfc..5a41ae32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@ __pycache__
dickens/
book.txt
lightrag-dev/
-.idea/
\ No newline at end of file
+.idea/
+dist/
\ No newline at end of file
diff --git a/README.md b/README.md
index c8d6e312..89e50aa0 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
+- [x] [2024.10.20]🎯🎯📢📢We add two methods to visualize the graph.
- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
@@ -221,7 +222,11 @@ with open("./newText.txt") as f:
### Graph Visualization
-* Generate html file
+
+ Graph visualization with html
+
+* The following code can be found in `examples/graph_visual_with_html.py`
+
```python
import networkx as nx
from pyvis.network import Network
@@ -238,6 +243,137 @@ net.from_nx(G)
# Save and display the network
net.show('knowledge_graph.html')
```
+
+
+
+
+ Graph visualization with Neo4j
+
+* The following code can be found in `examples/graph_visual_with_neo4j.py`
+
+```python
+import os
+import json
+from lightrag.utils import xml_to_json
+from neo4j import GraphDatabase
+
+# Constants
+WORKING_DIR = "./dickens"
+BATCH_SIZE_NODES = 500
+BATCH_SIZE_EDGES = 100
+
+# Neo4j connection credentials
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USERNAME = "neo4j"
+NEO4J_PASSWORD = "your_password"
+
+def convert_xml_to_json(xml_path, output_path):
+ """Converts XML file to JSON and saves the output."""
+ if not os.path.exists(xml_path):
+ print(f"Error: File not found - {xml_path}")
+ return None
+
+ json_data = xml_to_json(xml_path)
+ if json_data:
+ with open(output_path, 'w', encoding='utf-8') as f:
+ json.dump(json_data, f, ensure_ascii=False, indent=2)
+ print(f"JSON file created: {output_path}")
+ return json_data
+ else:
+ print("Failed to create JSON data")
+ return None
+
+def process_in_batches(tx, query, data, batch_size):
+ """Process data in batches and execute the given query."""
+ for i in range(0, len(data), batch_size):
+ batch = data[i:i + batch_size]
+ tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
+
+def main():
+ # Paths
+ xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml')
+ json_file = os.path.join(WORKING_DIR, 'graph_data.json')
+
+ # Convert XML to JSON
+ json_data = convert_xml_to_json(xml_file, json_file)
+ if json_data is None:
+ return
+
+ # Load nodes and edges
+ nodes = json_data.get('nodes', [])
+ edges = json_data.get('edges', [])
+
+ # Neo4j queries
+ create_nodes_query = """
+ UNWIND $nodes AS node
+ MERGE (e:Entity {id: node.id})
+ SET e.entity_type = node.entity_type,
+ e.description = node.description,
+ e.source_id = node.source_id,
+ e.displayName = node.id
+ REMOVE e:Entity
+ WITH e, node
+ CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
+ RETURN count(*)
+ """
+
+ create_edges_query = """
+ UNWIND $edges AS edge
+ MATCH (source {id: edge.source})
+ MATCH (target {id: edge.target})
+ WITH source, target, edge,
+ CASE
+ WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
+ WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
+ WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
+ WHEN edge.keywords CONTAINS 'located' THEN 'located'
+ WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
+ ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '')
+ END AS relType
+ CALL apoc.create.relationship(source, relType, {
+ weight: edge.weight,
+ description: edge.description,
+ keywords: edge.keywords,
+ source_id: edge.source_id
+ }, target) YIELD rel
+ RETURN count(*)
+ """
+
+ set_displayname_and_labels_query = """
+ MATCH (n)
+ SET n.displayName = n.id
+ WITH n
+ CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
+ RETURN count(*)
+ """
+
+ # Create a Neo4j driver
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
+
+ try:
+ # Execute queries in batches
+ with driver.session() as session:
+ # Insert nodes in batches
+ session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES)
+
+ # Insert edges in batches
+ session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES)
+
+ # Set displayName and labels
+ session.run(set_displayname_and_labels_query)
+
+ except Exception as e:
+ print(f"Error occurred: {e}")
+
+ finally:
+ driver.close()
+
+if __name__ == "__main__":
+ main()
+```
+
+
+
## Evaluation
### Dataset
The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
@@ -484,8 +620,9 @@ def extract_queries(file_path):
.
├── examples
│ ├── batch_eval.py
+│ ├── graph_visual_with_html.py
+│ ├── graph_visual_with_neo4j.py
│ ├── generate_query.py
-│ ├── graph_visual.py
│ ├── lightrag_azure_openai_demo.py
│ ├── lightrag_bedrock_demo.py
│ ├── lightrag_hf_demo.py
diff --git a/examples/graph_visual.py b/examples/graph_visual_with_html.py
similarity index 100%
rename from examples/graph_visual.py
rename to examples/graph_visual_with_html.py
diff --git a/examples/graph_visual_with_neo4j.py b/examples/graph_visual_with_neo4j.py
new file mode 100644
index 00000000..22dde368
--- /dev/null
+++ b/examples/graph_visual_with_neo4j.py
@@ -0,0 +1,118 @@
+import os
+import json
+from lightrag.utils import xml_to_json
+from neo4j import GraphDatabase
+
+# Constants
+WORKING_DIR = "./dickens"
+BATCH_SIZE_NODES = 500
+BATCH_SIZE_EDGES = 100
+
+# Neo4j connection credentials
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USERNAME = "neo4j"
+NEO4J_PASSWORD = "your_password"
+
+def convert_xml_to_json(xml_path, output_path):
+ """Converts XML file to JSON and saves the output."""
+ if not os.path.exists(xml_path):
+ print(f"Error: File not found - {xml_path}")
+ return None
+
+ json_data = xml_to_json(xml_path)
+ if json_data:
+ with open(output_path, 'w', encoding='utf-8') as f:
+ json.dump(json_data, f, ensure_ascii=False, indent=2)
+ print(f"JSON file created: {output_path}")
+ return json_data
+ else:
+ print("Failed to create JSON data")
+ return None
+
+def process_in_batches(tx, query, data, batch_size):
+ """Process data in batches and execute the given query."""
+ for i in range(0, len(data), batch_size):
+ batch = data[i:i + batch_size]
+ tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
+
+def main():
+ # Paths
+ xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml')
+ json_file = os.path.join(WORKING_DIR, 'graph_data.json')
+
+ # Convert XML to JSON
+ json_data = convert_xml_to_json(xml_file, json_file)
+ if json_data is None:
+ return
+
+ # Load nodes and edges
+ nodes = json_data.get('nodes', [])
+ edges = json_data.get('edges', [])
+
+ # Neo4j queries
+ create_nodes_query = """
+ UNWIND $nodes AS node
+ MERGE (e:Entity {id: node.id})
+ SET e.entity_type = node.entity_type,
+ e.description = node.description,
+ e.source_id = node.source_id,
+ e.displayName = node.id
+ REMOVE e:Entity
+ WITH e, node
+ CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
+ RETURN count(*)
+ """
+
+ create_edges_query = """
+ UNWIND $edges AS edge
+ MATCH (source {id: edge.source})
+ MATCH (target {id: edge.target})
+ WITH source, target, edge,
+ CASE
+ WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
+ WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
+ WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
+ WHEN edge.keywords CONTAINS 'located' THEN 'located'
+ WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
+ ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '')
+ END AS relType
+ CALL apoc.create.relationship(source, relType, {
+ weight: edge.weight,
+ description: edge.description,
+ keywords: edge.keywords,
+ source_id: edge.source_id
+ }, target) YIELD rel
+ RETURN count(*)
+ """
+
+ set_displayname_and_labels_query = """
+ MATCH (n)
+ SET n.displayName = n.id
+ WITH n
+ CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
+ RETURN count(*)
+ """
+
+ # Create a Neo4j driver
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
+
+ try:
+ # Execute queries in batches
+ with driver.session() as session:
+ # Insert nodes in batches
+ session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES)
+
+ # Insert edges in batches
+ session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES)
+
+ # Set displayName and labels
+ session.run(set_displayname_and_labels_query)
+
+ except Exception as e:
+ print(f"Error occurred: {e}")
+
+ finally:
+ driver.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 67d094c6..9a68c16b 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -8,6 +8,7 @@
from functools import wraps
from hashlib import md5
from typing import Any, Union
+import xml.etree.ElementTree as ET
import numpy as np
import tiktoken
@@ -183,3 +184,51 @@ def list_of_list_to_csv(data: list[list]):
def save_data_to_file(data, file_name):
with open(file_name, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
+
+def xml_to_json(xml_file):
+ try:
+ tree = ET.parse(xml_file)
+ root = tree.getroot()
+
+ # Print the root element's tag and attributes to confirm the file has been correctly loaded
+ print(f"Root element: {root.tag}")
+ print(f"Root attributes: {root.attrib}")
+
+ data = {
+ "nodes": [],
+ "edges": []
+ }
+
+ # Use namespace
+ namespace = {'': 'http://graphml.graphdrawing.org/xmlns'}
+
+ for node in root.findall('.//node', namespace):
+ node_data = {
+ "id": node.get('id').strip('"'),
+ "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') if node.find("./data[@key='d0']", namespace) is not None else "",
+ "description": node.find("./data[@key='d1']", namespace).text if node.find("./data[@key='d1']", namespace) is not None else "",
+ "source_id": node.find("./data[@key='d2']", namespace).text if node.find("./data[@key='d2']", namespace) is not None else ""
+ }
+ data["nodes"].append(node_data)
+
+ for edge in root.findall('.//edge', namespace):
+ edge_data = {
+ "source": edge.get('source').strip('"'),
+ "target": edge.get('target').strip('"'),
+ "weight": float(edge.find("./data[@key='d3']", namespace).text) if edge.find("./data[@key='d3']", namespace) is not None else 0.0,
+ "description": edge.find("./data[@key='d4']", namespace).text if edge.find("./data[@key='d4']", namespace) is not None else "",
+ "keywords": edge.find("./data[@key='d5']", namespace).text if edge.find("./data[@key='d5']", namespace) is not None else "",
+ "source_id": edge.find("./data[@key='d6']", namespace).text if edge.find("./data[@key='d6']", namespace) is not None else ""
+ }
+ data["edges"].append(edge_data)
+
+ # Print the number of nodes and edges found
+ print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
+
+ return data
+ except ET.ParseError as e:
+ print(f"Error parsing XML file: {e}")
+ return None
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ return None
From 95c5ffef5a130a5949924d2c33ba9cf7e559fd97 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Sun, 20 Oct 2024 23:10:07 +0800
Subject: [PATCH 069/258] Update __init__.py
---
lightrag/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index f208177f..db81e005 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
-__version__ = "0.0.6"
+__version__ = "0.0.7"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
From 57e9604ce6526a48a7f60281962c2f14c0cbea76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=9C=A8Data=20Intelligence=20Lab=40HKU=E2=9C=A8?=
<118165258+HKUDS@users.noreply.github.com>
Date: Mon, 21 Oct 2024 01:18:46 +0800
Subject: [PATCH 070/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 89e50aa0..b345c1d1 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
-- [x] [2024.10.20]🎯🎯📢📢We add two methods to visualize the graph.
+- [x] [2024.10.20]🎯🎯📢📢We’ve added a new feature to LightRAG: Graph Visualization.
- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
From 32188feb4d4391c86923af064a1e38a36dc03ed0 Mon Sep 17 00:00:00 2001
From: Sebastian Schramm
Date: Mon, 21 Oct 2024 11:23:12 +0200
Subject: [PATCH 071/258] add import and rm duplicate working dir in README
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index b345c1d1..2ea4f7e9 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,7 @@ curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_d
Use the below Python snippet (in a script) to initialize LightRAG and perform queries:
```python
+import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
@@ -63,8 +64,6 @@ from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
WORKING_DIR = "./dickens"
-WORKING_DIR = "./dickens"
-
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
From 6941f3e30942d1215991de2ac5907ecc3ae5ff39 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Mon, 21 Oct 2024 11:53:06 +0000
Subject: [PATCH 072/258] Add ability to passadditional parameters to ollama
library like host and timeout
---
.gitignore | 121 +++++++++++++++++++++++++++++++
examples/lightrag_ollama_demo.py | 31 +++++---
lightrag/lightrag.py | 3 +-
lightrag/llm.py | 9 ++-
4 files changed, 151 insertions(+), 13 deletions(-)
create mode 100644 .gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..422c67ce
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,121 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+*.egg
+*.egg-info/
+dist/
+build/
+*.whl
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.env.*
+.venv
+.venv.*
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyderworkspace
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# Example files
+book.txt
+dickens/
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index a2d04aa6..dfda26e6 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -1,4 +1,7 @@
import os
+import logging
+
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
from lightrag import LightRAG, QueryParam
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -11,15 +14,17 @@
rag = LightRAG(
working_dir=WORKING_DIR,
- llm_model_func=ollama_model_complete,
- llm_model_name='your_model_name',
+ tiktoken_model_name="mistral:7b",
+ llm_model_func=ollama_model_complete,
+ llm_model_name="mistral:7b",
+ llm_model_max_async=2,
+ llm_model_kwargs={"host": "http://localhost:11434"},
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
func=lambda texts: ollama_embedding(
- texts,
- embed_model="nomic-embed-text"
- )
+ texts, embed_model="nomic-embed-text", host="http://localhost:11434"
+ ),
),
)
@@ -28,13 +33,21 @@
rag.insert(f.read())
# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 83312ef6..c3e5cdab 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -86,6 +86,7 @@ class LightRAG:
llm_model_name: str = 'meta-llama/Llama-3.2-1B-Instruct'#'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16
+ llm_model_kwargs: dict = field(default_factory=dict)
# storage
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
@@ -158,7 +159,7 @@ def __post_init__(self):
)
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
- partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
+ partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
)
def insert(self, string_or_strings):
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 7328a583..aac384d9 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -98,8 +98,10 @@ async def ollama_model_if_cache(
) -> str:
kwargs.pop("max_tokens", None)
kwargs.pop("response_format", None)
+ host = kwargs.pop("host", None)
+ timeout = kwargs.pop("timeout", None)
- ollama_client = ollama.AsyncClient()
+ ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
@@ -193,10 +195,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
-async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
+async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
embed_text = []
+ ollama_client = ollama.Client(**kwargs)
for text in texts:
- data = ollama.embeddings(model=embed_model, prompt=text)
+ data = ollama_client.embeddings(model=embed_model, prompt=text)
embed_text.append(data["embedding"])
return embed_text
From 216813c300db9bda9d976946a652b820d2f0fad1 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Mon, 21 Oct 2024 11:53:06 +0000
Subject: [PATCH 073/258] Add ability to passadditional parameters to ollama
library like host and timeout
---
.gitignore | 3 ++-
examples/lightrag_ollama_demo.py | 3 +++
lightrag/lightrag.py | 3 ++-
lightrag/llm.py | 9 ++++++---
4 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5a41ae32..9ce353de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ dickens/
book.txt
lightrag-dev/
.idea/
-dist/
\ No newline at end of file
+dist/
+.venv/
\ No newline at end of file
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index c61b71c0..f968d26e 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -1,4 +1,7 @@
import os
+import logging
+
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
from lightrag import LightRAG, QueryParam
from lightrag.llm import ollama_model_complete, ollama_embedding
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 5137af42..d4b1eaa1 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -88,6 +88,7 @@ class LightRAG:
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16
+ llm_model_kwargs: dict = field(default_factory=dict)
# storage
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
@@ -154,7 +155,7 @@ def __post_init__(self):
)
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
- partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
+ partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
)
def insert(self, string_or_strings):
diff --git a/lightrag/llm.py b/lightrag/llm.py
index be801e0c..aa818995 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -222,8 +222,10 @@ async def ollama_model_if_cache(
) -> str:
kwargs.pop("max_tokens", None)
kwargs.pop("response_format", None)
+ host = kwargs.pop("host", None)
+ timeout = kwargs.pop("timeout", None)
- ollama_client = ollama.AsyncClient()
+ ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
@@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
return embeddings.detach().numpy()
-async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
+async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
embed_text = []
+ ollama_client = ollama.Client(**kwargs)
for text in texts:
- data = ollama.embeddings(model=embed_model, prompt=text)
+ data = ollama_client.embeddings(model=embed_model, prompt=text)
embed_text.append(data["embedding"])
return embed_text
From 272044289367ad0a7bb732fd517d97b7871fc46f Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Mon, 21 Oct 2024 21:10:19 +0800
Subject: [PATCH 074/258] Update README.md
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 2ea4f7e9..76535d19 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,7 @@ pip install lightrag-hku
```
## Quick Start
+* [Video demo](https://www.youtube.com/watch?v=g21royNJ4fw) of running LightRAG locally.
* All the code can be found in the `examples`.
* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".`
* Download the demo text "A Christmas Carol by Charles Dickens":
From 0d4430898781b7df4c955e66625f41d67f169ea8 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Mon, 21 Oct 2024 13:53:28 +0000
Subject: [PATCH 075/258] Small fix on demo
---
examples/lightrag_ollama_demo.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index dfda26e6..93196066 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -1,7 +1,7 @@
import os
import logging
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
from lightrag import LightRAG, QueryParam
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -14,7 +14,6 @@
rag = LightRAG(
working_dir=WORKING_DIR,
- tiktoken_model_name="mistral:7b",
llm_model_func=ollama_model_complete,
llm_model_name="mistral:7b",
llm_model_max_async=2,
From d517ef9c209b96dc61ff7f3fb860a6f7e2b6d714 Mon Sep 17 00:00:00 2001
From: Soumil
Date: Mon, 21 Oct 2024 18:34:43 +0100
Subject: [PATCH 076/258] added a class to use multiple models
---
lightrag/llm.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index be801e0c..d820766d 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -13,6 +13,8 @@
)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
+from pydantic import BaseModel, Field
+from typing import List, Dict, Callable, Any
from .base import BaseKVStorage
from .utils import compute_args_hash, wrap_embedding_func_with_attrs
@@ -423,6 +425,73 @@ async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
return embed_text
+class Model(BaseModel):
+ """
+ This is a Pydantic model class named 'Model' that is used to define a custom language model.
+
+ Attributes:
+ gen_func (Callable[[Any], str]): A callable function that generates the response from the language model.
+ The function should take any argument and return a string.
+ kwargs (Dict[str, Any]): A dictionary that contains the arguments to pass to the callable function.
+ This could include parameters such as the model name, API key, etc.
+
+ Example usage:
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_1"]})
+
+ In this example, 'openai_complete_if_cache' is the callable function that generates the response from the OpenAI model.
+ The 'kwargs' dictionary contains the model name and API key to be passed to the function.
+ """
+
+ gen_func: Callable[[Any], str] = Field(..., description="A function that generates the response from the llm. The response must be a string")
+ kwargs: Dict[str, Any] = Field(..., description="The arguments to pass to the callable function. Eg. the api key, model name, etc")
+
+ class Config:
+ arbitrary_types_allowed = True
+
+
+class MultiModel():
+ """
+ Distributes the load across multiple language models. Useful for circumventing low rate limits with certain api providers especially if you are on the free tier.
+ Could also be used for spliting across diffrent models or providers.
+
+ Attributes:
+ models (List[Model]): A list of language models to be used.
+
+ Usage example:
+ ```python
+ models = [
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_1"]}),
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_2"]}),
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_3"]}),
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_4"]}),
+ Model(gen_func=openai_complete_if_cache, kwargs={"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY_5"]}),
+ ]
+ multi_model = MultiModel(models)
+ rag = LightRAG(
+ llm_model_func=multi_model.llm_model_func
+ / ..other args
+ )
+ ```
+ """
+ def __init__(self, models: List[Model]):
+ self._models = models
+ self._current_model = 0
+
+ def _next_model(self):
+ self._current_model = (self._current_model + 1) % len(self._models)
+ return self._models[self._current_model]
+
+ async def llm_model_func(
+ self,
+ prompt, system_prompt=None, history_messages=[], **kwargs
+ ) -> str:
+ kwargs.pop("model", None) # stop from overwriting the custom model name
+ next_model = self._next_model()
+ args = dict(prompt=prompt, system_prompt=system_prompt, history_messages=history_messages, **kwargs, **next_model.kwargs)
+
+ return await next_model.gen_func(
+ **args
+ )
if __name__ == "__main__":
import asyncio
From c69a3606c6c7b48a5adcdfd8e6c5c8e8a353c63e Mon Sep 17 00:00:00 2001
From: Abyl Ikhsanov
Date: Mon, 21 Oct 2024 20:40:49 +0200
Subject: [PATCH 077/258] Update llm.py
---
lightrag/llm.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 80 insertions(+), 1 deletion(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index be801e0c..51c48b84 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -4,7 +4,7 @@
import aioboto3
import numpy as np
import ollama
-from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
+from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout, AsyncAzureOpenAI
from tenacity import (
retry,
stop_after_attempt,
@@ -61,6 +61,49 @@ async def openai_complete_if_cache(
)
return response.choices[0].message.content
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def azure_openai_complete_if_cache(model,
+ prompt,
+ system_prompt=None,
+ history_messages=[],
+ base_url=None,
+ api_key=None,
+ **kwargs):
+ if api_key:
+ os.environ["AZURE_OPENAI_API_KEY"] = api_key
+ if base_url:
+ os.environ["AZURE_OPENAI_ENDPOINT"] = base_url
+
+ openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+ api_version=os.getenv("AZURE_OPENAI_API_VERSION"))
+
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+ messages.extend(history_messages)
+ if prompt is not None:
+ messages.append({"role": "user", "content": prompt})
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ response = await openai_async_client.chat.completions.create(
+ model=model, messages=messages, **kwargs
+ )
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert(
+ {args_hash: {"return": response.choices[0].message.content, "model": model}}
+ )
+ return response.choices[0].message.content
class BedrockError(Exception):
"""Generic error for issues related to Amazon Bedrock"""
@@ -270,6 +313,16 @@ async def gpt_4o_mini_complete(
**kwargs,
)
+async def azure_openai_complete(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await azure_openai_complete_if_cache(
+ "conversation-4o-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ )
async def bedrock_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
@@ -332,6 +385,32 @@ async def openai_embedding(
)
return np.array([dp.embedding for dp in response.data])
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def azure_openai_embedding(
+ texts: list[str],
+ model: str = "text-embedding-3-small",
+ base_url: str = None,
+ api_key: str = None,
+) -> np.ndarray:
+ if api_key:
+ os.environ["AZURE_OPENAI_API_KEY"] = api_key
+ if base_url:
+ os.environ["AZURE_OPENAI_ENDPOINT"] = base_url
+
+ openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+ api_version=os.getenv("AZURE_OPENAI_API_VERSION"))
+
+ response = await openai_async_client.embeddings.create(
+ model=model, input=texts, encoding_format="float"
+ )
+ return np.array([dp.embedding for dp in response.data])
+
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
# @retry(
From 274d0fcc92f0f77d30d34da5d9fbb4a0b9a11fd0 Mon Sep 17 00:00:00 2001
From: tpoisonooo
Date: Tue, 22 Oct 2024 15:16:57 +0800
Subject: [PATCH 078/258] feat(examples): support siliconcloud free API
---
README.md | 1 +
examples/lightrag_siliconcloud_demo.py | 79 ++++++++++++++++++++++++++
lightrag/llm.py | 48 +++++++++++++++-
requirements.txt | 3 +-
4 files changed, 129 insertions(+), 2 deletions(-)
create mode 100644 examples/lightrag_siliconcloud_demo.py
diff --git a/README.md b/README.md
index 76535d19..87335f1f 100644
--- a/README.md
+++ b/README.md
@@ -629,6 +629,7 @@ def extract_queries(file_path):
│ ├── lightrag_ollama_demo.py
│ ├── lightrag_openai_compatible_demo.py
│ ├── lightrag_openai_demo.py
+│ ├── lightrag_siliconcloud_demo.py
│ └── vram_management_demo.py
├── lightrag
│ ├── __init__.py
diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py
new file mode 100644
index 00000000..e3f5e67e
--- /dev/null
+++ b/examples/lightrag_siliconcloud_demo.py
@@ -0,0 +1,79 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import openai_complete_if_cache, siliconcloud_embedding
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "Qwen/Qwen2.5-7B-Instruct",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.siliconflow.cn/v1/",
+ **kwargs,
+ )
+
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ return await siliconcloud_embedding(
+ texts,
+ model="netease-youdao/bce-embedding-base_v1",
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ max_token_size=int(512 * 1.5)
+ )
+
+
+# function test
+async def test_funcs():
+ result = await llm_model_func("How are you?")
+ print("llm_model_func: ", result)
+
+ result = await embedding_func(["How are you?"])
+ print("embedding_func: ", result)
+
+
+asyncio.run(test_funcs())
+
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768, max_token_size=512, func=embedding_func
+ ),
+)
+
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
+
+# Perform local search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
+
+# Perform global search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
+
+# Perform hybrid search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index be801e0c..06d75d01 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -2,8 +2,11 @@
import copy
import json
import aioboto3
+import aiohttp
import numpy as np
import ollama
+import base64
+import struct
from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout
from tenacity import (
retry,
@@ -312,7 +315,7 @@ async def ollama_model_complete(
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(
stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=4, max=10),
+ wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
async def openai_embedding(
@@ -332,6 +335,49 @@ async def openai_embedding(
)
return np.array([dp.embedding for dp in response.data])
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=60),
+ retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+)
+async def siliconcloud_embedding(
+ texts: list[str],
+ model: str = "netease-youdao/bce-embedding-base_v1",
+ base_url: str = "https://api.siliconflow.cn/v1/embeddings",
+ max_token_size: int = 512,
+ api_key: str = None,
+) -> np.ndarray:
+ if api_key and not api_key.startswith('Bearer '):
+ api_key = 'Bearer ' + api_key
+
+ headers = {
+ "Authorization": api_key,
+ "Content-Type": "application/json"
+ }
+
+ truncate_texts = [text[0:max_token_size] for text in texts]
+
+ payload = {
+ "model": model,
+ "input": truncate_texts,
+ "encoding_format": "base64"
+ }
+
+ base64_strings = []
+ async with aiohttp.ClientSession() as session:
+ async with session.post(base_url, headers=headers, json=payload) as response:
+ content = await response.json()
+ if 'code' in content:
+ raise ValueError(content)
+ base64_strings = [item['embedding'] for item in content['data']]
+
+ embeddings = []
+ for string in base64_strings:
+ decode_bytes = base64.b64decode(string)
+ n = len(decode_bytes) // 4
+ float_array = struct.unpack('<' + 'f' * n, decode_bytes)
+ embeddings.append(float_array)
+ return np.array(embeddings)
# @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
# @retry(
diff --git a/requirements.txt b/requirements.txt
index 9cc5b7e9..5b3396fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,5 @@ tiktoken
torch
transformers
xxhash
-pyvis
\ No newline at end of file
+pyvis
+aiohttp
\ No newline at end of file
From 64124005939dceb7b2c2e52bb4f75112fba1a7ff Mon Sep 17 00:00:00 2001
From: zhangjiawei
Date: Tue, 22 Oct 2024 16:01:40 +0800
Subject: [PATCH 079/258] set encoding as utf-8 when reading ./book.txt in
examples
---
examples/lightrag_hf_demo.py | 2 +-
examples/lightrag_ollama_demo.py | 2 +-
examples/lightrag_openai_compatible_demo.py | 2 +-
examples/lightrag_openai_demo.py | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/examples/lightrag_hf_demo.py b/examples/lightrag_hf_demo.py
index 87312307..91033e50 100644
--- a/examples/lightrag_hf_demo.py
+++ b/examples/lightrag_hf_demo.py
@@ -30,7 +30,7 @@
)
-with open("./book.txt") as f:
+with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index c61b71c0..98f1521c 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -21,7 +21,7 @@
)
-with open("./book.txt") as f:
+with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
index fbad1190..aae56821 100644
--- a/examples/lightrag_openai_compatible_demo.py
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -55,7 +55,7 @@ async def test_funcs():
)
-with open("./book.txt") as f:
+with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
diff --git a/examples/lightrag_openai_demo.py b/examples/lightrag_openai_demo.py
index a6e7f3b2..29bc75ca 100644
--- a/examples/lightrag_openai_demo.py
+++ b/examples/lightrag_openai_demo.py
@@ -15,7 +15,7 @@
)
-with open("./book.txt") as f:
+with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
From c9baa4ebeb75c4b8920f302a985476fee92d9127 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Tue, 22 Oct 2024 14:35:42 +0000
Subject: [PATCH 080/258] Finetune example to be able to run ollama example
without need to tweak context size in Modelfile
---
examples/lightrag_ollama_demo.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index 93196066..6070131f 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -15,9 +15,10 @@
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete,
- llm_model_name="mistral:7b",
- llm_model_max_async=2,
- llm_model_kwargs={"host": "http://localhost:11434"},
+ llm_model_name="gemma2:2b",
+ llm_model_max_async=4,
+ llm_model_max_token_size=32768,
+ llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
embedding_func=EmbeddingFunc(
embedding_dim=768,
max_token_size=8192,
@@ -27,7 +28,6 @@
),
)
-
with open("./book.txt") as f:
rag.insert(f.read())
From 7fa7bd546396f6414be4fafc937eb6a307b04404 Mon Sep 17 00:00:00 2001
From: tpoisonooo
Date: Wed, 23 Oct 2024 11:24:52 +0800
Subject: [PATCH 081/258] Update lightrag_siliconcloud_demo.py
---
examples/lightrag_siliconcloud_demo.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py
index e3f5e67e..8be6ae7a 100644
--- a/examples/lightrag_siliconcloud_demo.py
+++ b/examples/lightrag_siliconcloud_demo.py
@@ -30,7 +30,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
texts,
model="netease-youdao/bce-embedding-base_v1",
api_key=os.getenv("UPSTAGE_API_KEY"),
- max_token_size=int(512 * 1.5)
+ max_token_size=512
)
From e20d2a040863d58f670b6ef5eff1c67f007fd4d6 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:50:29 +0800
Subject: [PATCH 082/258] Update base.py
---
lightrag/base.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lightrag/base.py b/lightrag/base.py
index 50be4f62..cecd5edd 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -18,9 +18,13 @@ class QueryParam:
mode: Literal["local", "global", "hybrid", "naive"] = "global"
only_need_context: bool = False
response_type: str = "Multiple Paragraphs"
+ # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
top_k: int = 60
+ # Number of tokens for the original chunks.
max_token_for_text_unit: int = 4000
+ # Number of tokens for the relationship descriptions
max_token_for_global_context: int = 4000
+ # Number of tokens for the entity descriptions
max_token_for_local_context: int = 4000
From 0bfcc00bdf2fb569cb9e191ae4bb5212b735c96c Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:53:43 +0800
Subject: [PATCH 083/258] Update README.md
---
README.md | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/README.md b/README.md
index 87335f1f..42a7d5db 100644
--- a/README.md
+++ b/README.md
@@ -203,6 +203,21 @@ ollama create -f Modelfile qwen2m
```
+### Query Param
+```python
+class QueryParam:
+ mode: Literal["local", "global", "hybrid", "naive"] = "global"
+ only_need_context: bool = False
+ response_type: str = "Multiple Paragraphs"
+ # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
+ top_k: int = 60
+ # Number of tokens for the original chunks.
+ max_token_for_text_unit: int = 4000
+ # Number of tokens for the relationship descriptions
+ max_token_for_global_context: int = 4000
+ # Number of tokens for the entity descriptions
+ max_token_for_local_context: int = 4000
+```
### Batch Insert
```python
From 2fb3fd25b018fc2a4c1b7a075a298453186a792b Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:54:22 +0800
Subject: [PATCH 084/258] Update README.md
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 42a7d5db..41cb4362 100644
--- a/README.md
+++ b/README.md
@@ -204,6 +204,7 @@ ollama create -f Modelfile qwen2m
### Query Param
+
```python
class QueryParam:
mode: Literal["local", "global", "hybrid", "naive"] = "global"
@@ -220,6 +221,7 @@ class QueryParam:
```
### Batch Insert
+
```python
# Batch Insert: Insert multiple texts at once
rag.insert(["TEXT1", "TEXT2",...])
From 5972958e79d42e9770c4a4b2de64d577bad3bcac Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 23 Oct 2024 12:15:23 +0800
Subject: [PATCH 085/258] Update README.md
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 41cb4362..dbabcb56 100644
--- a/README.md
+++ b/README.md
@@ -203,6 +203,7 @@ ollama create -f Modelfile qwen2m
```
+
### Query Param
```python
From 63c0283514954fc6f4c1f429cfcd4015136c750c Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Wed, 23 Oct 2024 15:02:28 +0800
Subject: [PATCH 086/258] fix hf bug
---
examples/lightrag_siliconcloud_demo.py | 4 ++--
lightrag/llm.py | 12 ++++++++++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py
index 8be6ae7a..82cab228 100644
--- a/examples/lightrag_siliconcloud_demo.py
+++ b/examples/lightrag_siliconcloud_demo.py
@@ -19,7 +19,7 @@ async def llm_model_func(
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
- api_key=os.getenv("UPSTAGE_API_KEY"),
+ api_key=os.getenv("SILICONFLOW_API_KEY"),
base_url="https://api.siliconflow.cn/v1/",
**kwargs,
)
@@ -29,7 +29,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
return await siliconcloud_embedding(
texts,
model="netease-youdao/bce-embedding-base_v1",
- api_key=os.getenv("UPSTAGE_API_KEY"),
+ api_key=os.getenv("SILICONFLOW_API_KEY"),
max_token_size=512
)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 67f547ea..76adec26 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -1,5 +1,6 @@
import os
import copy
+from functools import lru_cache
import json
import aioboto3
import aiohttp
@@ -202,15 +203,22 @@ async def bedrock_complete_if_cache(
return response["output"]["message"]["content"][0]["text"]
+@lru_cache(maxsize=1)
+def initialize_hf_model(model_name):
+ hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
+
+ return hf_model, hf_tokenizer
+
+
async def hf_model_if_cache(
model, prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
model_name = model
- hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
+ hf_model, hf_tokenizer = initialize_hf_model(model_name)
if hf_tokenizer.pad_token is None:
# print("use eos token")
hf_tokenizer.pad_token = hf_tokenizer.eos_token
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
From fd30ae4e4587946bea4edd8a6289cef4bf5a58e3 Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Wed, 23 Oct 2024 15:25:46 +0800
Subject: [PATCH 087/258] move_code
---
lightrag/llm.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 76adec26..4dcf535c 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -207,6 +207,8 @@ async def bedrock_complete_if_cache(
def initialize_hf_model(model_name):
hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
+ if hf_tokenizer.pad_token is None:
+ hf_tokenizer.pad_token = hf_tokenizer.eos_token
return hf_model, hf_tokenizer
@@ -216,9 +218,6 @@ async def hf_model_if_cache(
) -> str:
model_name = model
hf_model, hf_tokenizer = initialize_hf_model(model_name)
- if hf_tokenizer.pad_token is None:
- # print("use eos token")
- hf_tokenizer.pad_token = hf_tokenizer.eos_token
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
From f0856b918bc100f496272fb6ae4951d6f8620da4 Mon Sep 17 00:00:00 2001
From: Zhenyu Pan <120090196@link.cuhk.edu.cn>
Date: Thu, 24 Oct 2024 00:58:52 +0800
Subject: [PATCH 088/258] [hotfix-#75][embedding] Fix the potential embedding
problem
---
examples/lightrag_openai_compatible_demo.py | 68 +++++++++++++--------
1 file changed, 42 insertions(+), 26 deletions(-)
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
index aae56821..25d3722c 100644
--- a/examples/lightrag_openai_compatible_demo.py
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -34,6 +34,13 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
)
+async def get_embedding_dim():
+ test_text = ["This is a test sentence."]
+ embedding = await embedding_func(test_text)
+ embedding_dim = embedding.shape[1]
+ return embedding_dim
+
+
# function test
async def test_funcs():
result = await llm_model_func("How are you?")
@@ -43,37 +50,46 @@ async def test_funcs():
print("embedding_func: ", result)
-asyncio.run(test_funcs())
+# asyncio.run(test_funcs())
+
+async def main():
+ try:
+ embedding_dimension = await get_embedding_dim()
+ print(f"Detected embedding dimension: {embedding_dimension}")
+ rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func
+ ),
+ )
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=llm_model_func,
- embedding_func=EmbeddingFunc(
- embedding_dim=4096, max_token_size=8192, func=embedding_func
- ),
-)
+ with open("./book.txt", "r", encoding="utf-8") as f:
+ rag.insert(f.read())
-with open("./book.txt", "r", encoding="utf-8") as f:
- rag.insert(f.read())
+ # Perform naive search
+ print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+ )
-# Perform naive search
-print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
-)
+ # Perform local search
+ print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+ )
-# Perform local search
-print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
-)
+ # Perform global search
+ print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+ )
-# Perform global search
-print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
-)
+ # Perform hybrid search
+ print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+ )
+ except Exception as e:
+ print(f"An error occurred: {e}")
-# Perform hybrid search
-print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
-)
+if __name__ == "__main__":
+ asyncio.run(main())
\ No newline at end of file
From 516b4dfb22afec7d686e64d04534790affa22b1c Mon Sep 17 00:00:00 2001
From: tpoisonooo
Date: Fri, 25 Oct 2024 14:14:36 +0800
Subject: [PATCH 089/258] Update lightrag.py
---
lightrag/lightrag.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 5137af42..b84e22ef 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -208,7 +208,7 @@ async def ainsert(self, string_or_strings):
logger.info("[Entity Extraction]...")
maybe_new_kg = await extract_entities(
inserting_chunks,
- knwoledge_graph_inst=self.chunk_entity_relation_graph,
+ knowledge_graph_inst=self.chunk_entity_relation_graph,
entity_vdb=self.entities_vdb,
relationships_vdb=self.relationships_vdb,
global_config=asdict(self),
From ef41871b88c177584a08aba2bb9ab0dcfb612e5b Mon Sep 17 00:00:00 2001
From: tpoisonooo
Date: Fri, 25 Oct 2024 14:15:31 +0800
Subject: [PATCH 090/258] Update operate.py
---
lightrag/operate.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lightrag/operate.py b/lightrag/operate.py
index a0729cd8..b90a1ca1 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -124,14 +124,14 @@ async def _handle_single_relationship_extraction(
async def _merge_nodes_then_upsert(
entity_name: str,
nodes_data: list[dict],
- knwoledge_graph_inst: BaseGraphStorage,
+ knowledge_graph_inst: BaseGraphStorage,
global_config: dict,
):
already_entitiy_types = []
already_source_ids = []
already_description = []
- already_node = await knwoledge_graph_inst.get_node(entity_name)
+ already_node = await knowledge_graph_inst.get_node(entity_name)
if already_node is not None:
already_entitiy_types.append(already_node["entity_type"])
already_source_ids.extend(
From 8fbbf70a8311423ad585f54389ae895d78aa0a6f Mon Sep 17 00:00:00 2001
From: Sanketh Kumar
Date: Fri, 25 Oct 2024 13:23:08 +0530
Subject: [PATCH 091/258] Added linting actions for pull request
---
.github/workflows/linting.yaml | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
create mode 100644 .github/workflows/linting.yaml
diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml
new file mode 100644
index 00000000..32886cb0
--- /dev/null
+++ b/.github/workflows/linting.yaml
@@ -0,0 +1,30 @@
+name: Linting and Formatting
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ lint-and-format:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pre-commit
+
+ - name: Run pre-commit
+ run: pre-commit run --all-files
\ No newline at end of file
From 5e3ab98d8321f436c313cd1f8d0b1d410e8b91aa Mon Sep 17 00:00:00 2001
From: Sanketh Kumar
Date: Fri, 25 Oct 2024 13:32:25 +0530
Subject: [PATCH 092/258] Manually reformatted files
---
.github/workflows/linting.yaml | 4 +-
.gitignore | 2 +-
README.md | 12 +--
examples/graph_visual_with_html.py | 6 +-
examples/graph_visual_with_neo4j.py | 30 +++---
examples/lightrag_openai_compatible_demo.py | 27 ++++--
examples/lightrag_siliconcloud_demo.py | 2 +-
examples/vram_management_demo.py | 36 +++++--
lightrag/llm.py | 101 ++++++++++++--------
lightrag/utils.py | 46 +++++----
requirements.txt | 4 +-
11 files changed, 175 insertions(+), 95 deletions(-)
diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml
index 32886cb0..7c12e0a2 100644
--- a/.github/workflows/linting.yaml
+++ b/.github/workflows/linting.yaml
@@ -15,7 +15,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
-
+
- name: Set up Python
uses: actions/setup-python@v2
with:
@@ -27,4 +27,4 @@ jobs:
pip install pre-commit
- name: Run pre-commit
- run: pre-commit run --all-files
\ No newline at end of file
+ run: pre-commit run --all-files
diff --git a/.gitignore b/.gitignore
index 5a41ae32..fd4bd830 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,4 @@ dickens/
book.txt
lightrag-dev/
.idea/
-dist/
\ No newline at end of file
+dist/
diff --git a/README.md b/README.md
index dbabcb56..abd7ceb9 100644
--- a/README.md
+++ b/README.md
@@ -58,8 +58,8 @@ from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
+# import nest_asyncio
+# nest_asyncio.apply()
#########
WORKING_DIR = "./dickens"
@@ -157,7 +157,7 @@ rag = LightRAG(
Using Ollama Models
-
+
* If you want to use Ollama models, you only need to set LightRAG as follows:
```python
@@ -328,8 +328,8 @@ def main():
SET e.entity_type = node.entity_type,
e.description = node.description,
e.source_id = node.source_id,
- e.displayName = node.id
- REMOVE e:Entity
+ e.displayName = node.id
+ REMOVE e:Entity
WITH e, node
CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
RETURN count(*)
@@ -382,7 +382,7 @@ def main():
except Exception as e:
print(f"Error occurred: {e}")
-
+
finally:
driver.close()
diff --git a/examples/graph_visual_with_html.py b/examples/graph_visual_with_html.py
index b455e6de..e4337a54 100644
--- a/examples/graph_visual_with_html.py
+++ b/examples/graph_visual_with_html.py
@@ -3,7 +3,7 @@
import random
# Load the GraphML file
-G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
+G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml")
# Create a Pyvis network
net = Network(notebook=True)
@@ -13,7 +13,7 @@
# Add colors to nodes
for node in net.nodes:
- node['color'] = "#{:06x}".format(random.randint(0, 0xFFFFFF))
+ node["color"] = "#{:06x}".format(random.randint(0, 0xFFFFFF))
# Save and display the network
-net.show('knowledge_graph.html')
\ No newline at end of file
+net.show("knowledge_graph.html")
diff --git a/examples/graph_visual_with_neo4j.py b/examples/graph_visual_with_neo4j.py
index 22dde368..7377f21c 100644
--- a/examples/graph_visual_with_neo4j.py
+++ b/examples/graph_visual_with_neo4j.py
@@ -13,6 +13,7 @@
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "your_password"
+
def convert_xml_to_json(xml_path, output_path):
"""Converts XML file to JSON and saves the output."""
if not os.path.exists(xml_path):
@@ -21,7 +22,7 @@ def convert_xml_to_json(xml_path, output_path):
json_data = xml_to_json(xml_path)
if json_data:
- with open(output_path, 'w', encoding='utf-8') as f:
+ with open(output_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, ensure_ascii=False, indent=2)
print(f"JSON file created: {output_path}")
return json_data
@@ -29,16 +30,18 @@ def convert_xml_to_json(xml_path, output_path):
print("Failed to create JSON data")
return None
+
def process_in_batches(tx, query, data, batch_size):
"""Process data in batches and execute the given query."""
for i in range(0, len(data), batch_size):
- batch = data[i:i + batch_size]
+ batch = data[i : i + batch_size]
tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
+
def main():
# Paths
- xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml')
- json_file = os.path.join(WORKING_DIR, 'graph_data.json')
+ xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml")
+ json_file = os.path.join(WORKING_DIR, "graph_data.json")
# Convert XML to JSON
json_data = convert_xml_to_json(xml_file, json_file)
@@ -46,8 +49,8 @@ def main():
return
# Load nodes and edges
- nodes = json_data.get('nodes', [])
- edges = json_data.get('edges', [])
+ nodes = json_data.get("nodes", [])
+ edges = json_data.get("edges", [])
# Neo4j queries
create_nodes_query = """
@@ -56,8 +59,8 @@ def main():
SET e.entity_type = node.entity_type,
e.description = node.description,
e.source_id = node.source_id,
- e.displayName = node.id
- REMOVE e:Entity
+ e.displayName = node.id
+ REMOVE e:Entity
WITH e, node
CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
RETURN count(*)
@@ -100,19 +103,24 @@ def main():
# Execute queries in batches
with driver.session() as session:
# Insert nodes in batches
- session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES)
+ session.execute_write(
+ process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES
+ )
# Insert edges in batches
- session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES)
+ session.execute_write(
+ process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES
+ )
# Set displayName and labels
session.run(set_displayname_and_labels_query)
except Exception as e:
print(f"Error occurred: {e}")
-
+
finally:
driver.close()
+
if __name__ == "__main__":
main()
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
index 25d3722c..2470fc00 100644
--- a/examples/lightrag_openai_compatible_demo.py
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -52,6 +52,7 @@ async def test_funcs():
# asyncio.run(test_funcs())
+
async def main():
try:
embedding_dimension = await get_embedding_dim()
@@ -61,35 +62,47 @@ async def main():
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
- embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func
+ embedding_dim=embedding_dimension,
+ max_token_size=8192,
+ func=embedding_func,
),
)
-
with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
+ )
)
# Perform local search
print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="local")
+ )
)
# Perform global search
print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+ rag.query(
+ "What are the top themes in this story?",
+ param=QueryParam(mode="global"),
+ )
)
# Perform hybrid search
print(
- rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+ rag.query(
+ "What are the top themes in this story?",
+ param=QueryParam(mode="hybrid"),
+ )
)
except Exception as e:
print(f"An error occurred: {e}")
+
if __name__ == "__main__":
- asyncio.run(main())
\ No newline at end of file
+ asyncio.run(main())
diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py
index 82cab228..a73f16c5 100644
--- a/examples/lightrag_siliconcloud_demo.py
+++ b/examples/lightrag_siliconcloud_demo.py
@@ -30,7 +30,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
texts,
model="netease-youdao/bce-embedding-base_v1",
api_key=os.getenv("SILICONFLOW_API_KEY"),
- max_token_size=512
+ max_token_size=512,
)
diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py
index ec750254..c173b913 100644
--- a/examples/vram_management_demo.py
+++ b/examples/vram_management_demo.py
@@ -27,11 +27,12 @@
# Read all .txt files from the TEXT_FILES_DIR directory
texts = []
for filename in os.listdir(TEXT_FILES_DIR):
- if filename.endswith('.txt'):
+ if filename.endswith(".txt"):
file_path = os.path.join(TEXT_FILES_DIR, filename)
- with open(file_path, 'r', encoding='utf-8') as file:
+ with open(file_path, "r", encoding="utf-8") as file:
texts.append(file.read())
+
# Batch insert texts into LightRAG with a retry mechanism
def insert_texts_with_retry(rag, texts, retries=3, delay=5):
for _ in range(retries):
@@ -39,37 +40,58 @@ def insert_texts_with_retry(rag, texts, retries=3, delay=5):
rag.insert(texts)
return
except Exception as e:
- print(f"Error occurred during insertion: {e}. Retrying in {delay} seconds...")
+ print(
+ f"Error occurred during insertion: {e}. Retrying in {delay} seconds..."
+ )
time.sleep(delay)
raise RuntimeError("Failed to insert texts after multiple retries.")
+
insert_texts_with_retry(rag, texts)
# Perform different types of queries and handle potential errors
try:
- print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+ print(
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="naive")
+ )
+ )
except Exception as e:
print(f"Error performing naive search: {e}")
try:
- print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+ print(
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="local")
+ )
+ )
except Exception as e:
print(f"Error performing local search: {e}")
try:
- print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+ print(
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="global")
+ )
+ )
except Exception as e:
print(f"Error performing global search: {e}")
try:
- print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+ print(
+ rag.query(
+ "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+ )
+ )
except Exception as e:
print(f"Error performing hybrid search: {e}")
+
# Function to clear VRAM resources
def clear_vram():
os.system("sudo nvidia-smi --gpu-reset")
+
# Regularly clear VRAM to prevent overflow
clear_vram_interval = 3600 # Clear once every hour
start_time = time.time()
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 4dcf535c..eaaa2b75 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -7,7 +7,13 @@
import numpy as np
import ollama
-from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout, AsyncAzureOpenAI
+from openai import (
+ AsyncOpenAI,
+ APIConnectionError,
+ RateLimitError,
+ Timeout,
+ AsyncAzureOpenAI,
+)
import base64
import struct
@@ -70,26 +76,31 @@ async def openai_complete_if_cache(
)
return response.choices[0].message.content
+
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
-async def azure_openai_complete_if_cache(model,
+async def azure_openai_complete_if_cache(
+ model,
prompt,
system_prompt=None,
history_messages=[],
base_url=None,
api_key=None,
- **kwargs):
+ **kwargs,
+):
if api_key:
os.environ["AZURE_OPENAI_API_KEY"] = api_key
if base_url:
os.environ["AZURE_OPENAI_ENDPOINT"] = base_url
- openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
- api_key=os.getenv("AZURE_OPENAI_API_KEY"),
- api_version=os.getenv("AZURE_OPENAI_API_VERSION"))
+ openai_async_client = AsyncAzureOpenAI(
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+ api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+ )
hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
messages = []
@@ -114,6 +125,7 @@ async def azure_openai_complete_if_cache(model,
)
return response.choices[0].message.content
+
class BedrockError(Exception):
"""Generic error for issues related to Amazon Bedrock"""
@@ -205,8 +217,12 @@ async def bedrock_complete_if_cache(
@lru_cache(maxsize=1)
def initialize_hf_model(model_name):
- hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
+ hf_tokenizer = AutoTokenizer.from_pretrained(
+ model_name, device_map="auto", trust_remote_code=True
+ )
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, device_map="auto", trust_remote_code=True
+ )
if hf_tokenizer.pad_token is None:
hf_tokenizer.pad_token = hf_tokenizer.eos_token
@@ -328,8 +344,9 @@ async def gpt_4o_mini_complete(
**kwargs,
)
+
async def azure_openai_complete(
- prompt, system_prompt=None, history_messages=[], **kwargs
+ prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
return await azure_openai_complete_if_cache(
"conversation-4o-mini",
@@ -339,6 +356,7 @@ async def azure_openai_complete(
**kwargs,
)
+
async def bedrock_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -418,9 +436,11 @@ async def azure_openai_embedding(
if base_url:
os.environ["AZURE_OPENAI_ENDPOINT"] = base_url
- openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
- api_key=os.getenv("AZURE_OPENAI_API_KEY"),
- api_version=os.getenv("AZURE_OPENAI_API_VERSION"))
+ openai_async_client = AsyncAzureOpenAI(
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+ api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+ api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+ )
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
@@ -440,35 +460,28 @@ async def siliconcloud_embedding(
max_token_size: int = 512,
api_key: str = None,
) -> np.ndarray:
- if api_key and not api_key.startswith('Bearer '):
- api_key = 'Bearer ' + api_key
+ if api_key and not api_key.startswith("Bearer "):
+ api_key = "Bearer " + api_key
- headers = {
- "Authorization": api_key,
- "Content-Type": "application/json"
- }
+ headers = {"Authorization": api_key, "Content-Type": "application/json"}
truncate_texts = [text[0:max_token_size] for text in texts]
- payload = {
- "model": model,
- "input": truncate_texts,
- "encoding_format": "base64"
- }
+ payload = {"model": model, "input": truncate_texts, "encoding_format": "base64"}
base64_strings = []
async with aiohttp.ClientSession() as session:
async with session.post(base_url, headers=headers, json=payload) as response:
content = await response.json()
- if 'code' in content:
+ if "code" in content:
raise ValueError(content)
- base64_strings = [item['embedding'] for item in content['data']]
-
+ base64_strings = [item["embedding"] for item in content["data"]]
+
embeddings = []
for string in base64_strings:
decode_bytes = base64.b64decode(string)
n = len(decode_bytes) // 4
- float_array = struct.unpack('<' + 'f' * n, decode_bytes)
+ float_array = struct.unpack("<" + "f" * n, decode_bytes)
embeddings.append(float_array)
return np.array(embeddings)
@@ -563,6 +576,7 @@ async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
return embed_text
+
class Model(BaseModel):
"""
This is a Pydantic model class named 'Model' that is used to define a custom language model.
@@ -580,14 +594,20 @@ class Model(BaseModel):
The 'kwargs' dictionary contains the model name and API key to be passed to the function.
"""
- gen_func: Callable[[Any], str] = Field(..., description="A function that generates the response from the llm. The response must be a string")
- kwargs: Dict[str, Any] = Field(..., description="The arguments to pass to the callable function. Eg. the api key, model name, etc")
+ gen_func: Callable[[Any], str] = Field(
+ ...,
+ description="A function that generates the response from the llm. The response must be a string",
+ )
+ kwargs: Dict[str, Any] = Field(
+ ...,
+ description="The arguments to pass to the callable function. Eg. the api key, model name, etc",
+ )
class Config:
arbitrary_types_allowed = True
-class MultiModel():
+class MultiModel:
"""
Distributes the load across multiple language models. Useful for circumventing low rate limits with certain api providers especially if you are on the free tier.
Could also be used for spliting across diffrent models or providers.
@@ -611,26 +631,31 @@ class MultiModel():
)
```
"""
+
def __init__(self, models: List[Model]):
self._models = models
self._current_model = 0
-
+
def _next_model(self):
self._current_model = (self._current_model + 1) % len(self._models)
return self._models[self._current_model]
async def llm_model_func(
- self,
- prompt, system_prompt=None, history_messages=[], **kwargs
+ self, prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
- kwargs.pop("model", None) # stop from overwriting the custom model name
+ kwargs.pop("model", None) # stop from overwriting the custom model name
next_model = self._next_model()
- args = dict(prompt=prompt, system_prompt=system_prompt, history_messages=history_messages, **kwargs, **next_model.kwargs)
-
- return await next_model.gen_func(
- **args
+ args = dict(
+ prompt=prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ **kwargs,
+ **next_model.kwargs,
)
+ return await next_model.gen_func(**args)
+
+
if __name__ == "__main__":
import asyncio
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 9a68c16b..0da4a51a 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -185,6 +185,7 @@ def save_data_to_file(data, file_name):
with open(file_name, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
+
def xml_to_json(xml_file):
try:
tree = ET.parse(xml_file)
@@ -194,31 +195,42 @@ def xml_to_json(xml_file):
print(f"Root element: {root.tag}")
print(f"Root attributes: {root.attrib}")
- data = {
- "nodes": [],
- "edges": []
- }
+ data = {"nodes": [], "edges": []}
# Use namespace
- namespace = {'': 'http://graphml.graphdrawing.org/xmlns'}
+ namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
- for node in root.findall('.//node', namespace):
+ for node in root.findall(".//node", namespace):
node_data = {
- "id": node.get('id').strip('"'),
- "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') if node.find("./data[@key='d0']", namespace) is not None else "",
- "description": node.find("./data[@key='d1']", namespace).text if node.find("./data[@key='d1']", namespace) is not None else "",
- "source_id": node.find("./data[@key='d2']", namespace).text if node.find("./data[@key='d2']", namespace) is not None else ""
+ "id": node.get("id").strip('"'),
+ "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
+ if node.find("./data[@key='d0']", namespace) is not None
+ else "",
+ "description": node.find("./data[@key='d1']", namespace).text
+ if node.find("./data[@key='d1']", namespace) is not None
+ else "",
+ "source_id": node.find("./data[@key='d2']", namespace).text
+ if node.find("./data[@key='d2']", namespace) is not None
+ else "",
}
data["nodes"].append(node_data)
- for edge in root.findall('.//edge', namespace):
+ for edge in root.findall(".//edge", namespace):
edge_data = {
- "source": edge.get('source').strip('"'),
- "target": edge.get('target').strip('"'),
- "weight": float(edge.find("./data[@key='d3']", namespace).text) if edge.find("./data[@key='d3']", namespace) is not None else 0.0,
- "description": edge.find("./data[@key='d4']", namespace).text if edge.find("./data[@key='d4']", namespace) is not None else "",
- "keywords": edge.find("./data[@key='d5']", namespace).text if edge.find("./data[@key='d5']", namespace) is not None else "",
- "source_id": edge.find("./data[@key='d6']", namespace).text if edge.find("./data[@key='d6']", namespace) is not None else ""
+ "source": edge.get("source").strip('"'),
+ "target": edge.get("target").strip('"'),
+ "weight": float(edge.find("./data[@key='d3']", namespace).text)
+ if edge.find("./data[@key='d3']", namespace) is not None
+ else 0.0,
+ "description": edge.find("./data[@key='d4']", namespace).text
+ if edge.find("./data[@key='d4']", namespace) is not None
+ else "",
+ "keywords": edge.find("./data[@key='d5']", namespace).text
+ if edge.find("./data[@key='d5']", namespace) is not None
+ else "",
+ "source_id": edge.find("./data[@key='d6']", namespace).text
+ if edge.find("./data[@key='d6']", namespace) is not None
+ else "",
}
data["edges"].append(edge_data)
diff --git a/requirements.txt b/requirements.txt
index 5b3396fb..98f32b0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,15 @@
accelerate
aioboto3
+aiohttp
graspologic
hnswlib
nano-vectordb
networkx
ollama
openai
+pyvis
tenacity
tiktoken
torch
transformers
xxhash
-pyvis
-aiohttp
\ No newline at end of file
From a16831616ee7b745ffdf7db3ee846c942a516f31 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Fri, 25 Oct 2024 19:25:26 +0800
Subject: [PATCH 093/258] fix Step_3_openai_compatible.py
---
reproduce/Step_3_openai_compatible.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py
index 2be5ea5c..5e2ef778 100644
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@@ -50,8 +50,8 @@ def extract_queries(file_path):
async def process_query(query_text, rag_instance, query_param):
try:
- result, context = await rag_instance.aquery(query_text, param=query_param)
- return {"query": query_text, "result": result, "context": context}, None
+ result = await rag_instance.aquery(query_text, param=query_param)
+ return {"query": query_text, "result": result}, None
except Exception as e:
return None, {"query": query_text, "error": str(e)}
From 72ce8b85f4e6e8144bb3ee2d690df9368bba351c Mon Sep 17 00:00:00 2001
From: jatin009v
Date: Fri, 25 Oct 2024 18:39:55 +0530
Subject: [PATCH 094/258] Key Enhancements: Error Handling:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Handled potential FileNotFoundError for README.md and requirements.txt.
Checked for missing required metadata and raised an informative error if any are missing.
Automated Package Discovery:
Replaced packages=["lightrag"] with setuptools.find_packages() to automatically find sub-packages and exclude test or documentation directories.
Additional Metadata:
Added Development Status in classifiers to indicate a "Beta" release (modify based on the project's maturity).
Used project_urls to link documentation, source code, and an issue tracker, which are standard for open-source projects.
Compatibility:
Included include_package_data=True to include additional files specified in MANIFEST.in.
These changes enhance the readability, reliability, and openness of the code, making it more contributor-friendly and ensuring it’s ready for open-source distribution.
---
setup.py | 74 ++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 53 insertions(+), 21 deletions(-)
diff --git a/setup.py b/setup.py
index 47222420..bdf49f02 100644
--- a/setup.py
+++ b/setup.py
@@ -1,39 +1,71 @@
import setuptools
+from pathlib import Path
-with open("README.md", "r", encoding="utf-8") as fh:
- long_description = fh.read()
+# Reading the long description from README.md
+def read_long_description():
+ try:
+ return Path("README.md").read_text(encoding="utf-8")
+ except FileNotFoundError:
+ return "A description of LightRAG is currently unavailable."
+# Retrieving metadata from __init__.py
+def retrieve_metadata():
+ vars2find = ["__author__", "__version__", "__url__"]
+ vars2readme = {}
+ try:
+ with open("./lightrag/__init__.py") as f:
+ for line in f.readlines():
+ for v in vars2find:
+ if line.startswith(v):
+ line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
+ vars2readme[v] = line.split("=")[1]
+ except FileNotFoundError:
+ raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
+
+ # Checking if all required variables are found
+ missing_vars = [v for v in vars2find if v not in vars2readme]
+ if missing_vars:
+ raise ValueError(f"Missing required metadata variables in __init__.py: {missing_vars}")
+
+ return vars2readme
-vars2find = ["__author__", "__version__", "__url__"]
-vars2readme = {}
-with open("./lightrag/__init__.py") as f:
- for line in f.readlines():
- for v in vars2find:
- if line.startswith(v):
- line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
- vars2readme[v] = line.split("=")[1]
+# Reading dependencies from requirements.txt
+def read_requirements():
+ deps = []
+ try:
+ with open("./requirements.txt") as f:
+ deps = [line.strip() for line in f if line.strip()]
+ except FileNotFoundError:
+ print("Warning: 'requirements.txt' not found. No dependencies will be installed.")
+ return deps
-deps = []
-with open("./requirements.txt") as f:
- for line in f.readlines():
- if not line.strip():
- continue
- deps.append(line.strip())
+metadata = retrieve_metadata()
+long_description = read_long_description()
+requirements = read_requirements()
setuptools.setup(
name="lightrag-hku",
- url=vars2readme["__url__"],
- version=vars2readme["__version__"],
- author=vars2readme["__author__"],
+ url=metadata["__url__"],
+ version=metadata["__version__"],
+ author=metadata["__author__"],
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
long_description=long_description,
long_description_content_type="text/markdown",
- packages=["lightrag"],
+ packages=setuptools.find_packages(exclude=("tests*", "docs*")), # Automatically find packages
classifiers=[
+ "Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
+ "Intended Audience :: Developers",
+ "Topic :: Software Development :: Libraries :: Python Modules",
],
python_requires=">=3.9",
- install_requires=deps,
+ install_requires=requirements,
+ include_package_data=True, # Includes non-code files from MANIFEST.in
+ project_urls={ # Additional project metadata
+ "Documentation": metadata.get("__url__", ""),
+ "Source": metadata.get("__url__", ""),
+ "Tracker": f"{metadata.get('__url__', '')}/issues" if metadata.get("__url__") else ""
+ },
)
From 526cc06e9781d8a4d4e9fb8d15873fcc9c9cbc99 Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Fri, 25 Oct 2024 11:28:41 -0400
Subject: [PATCH 095/258] adding neo4j integration
---
lightrag/storage.py | 243 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 240 insertions(+), 3 deletions(-)
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 1f22fc56..704dc4e8 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -97,14 +97,66 @@ async def upsert(self, data: dict[str, dict]):
d["__vector__"] = embeddings[i]
results = self._client.upsert(datas=list_data)
return results
+
+
+@dataclass
+class PineConeVectorDBStorage(BaseVectorStorage):
+ cosine_better_than_threshold: float = 0.2
+
+ def __post_init__(self):
+ self._client_file_name = os.path.join(
+ self.global_config["working_dir"], f"vdb_{self.namespace}.json"
+ )
+ self._max_batch_size = self.global_config["embedding_batch_num"]
+ self._client = NanoVectorDB(
+ self.embedding_func.embedding_dim, storage_file=self._client_file_name
+ )
+ import os
+ from pinecone import Pinecone
+
+ pc = Pinecone() #api_key=os.environ.get('PINECONE_API_KEY'))
+ # From here on, everything is identical to the REST-based SDK.
+ self._client = pc.Index(host=self._client_pinecone_host)#'my-index-8833ca1.svc.us-east1-gcp.pinecone.io')
+
+ self.cosine_better_than_threshold = self.global_config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
+
+ async def upsert(self, data: dict[str, dict]):
+ logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
+ if not len(data):
+ logger.warning("You insert an empty data to vector DB")
+ return []
+ list_data = [
+ {
+ "__id__": k,
+ **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
+ }
+ for k, v in data.items()
+ ]
+ contents = [v["content"] for v in data.values()]
+ batches = [
+ contents[i : i + self._max_batch_size]
+ for i in range(0, len(contents), self._max_batch_size)
+ ]
+ embeddings_list = await asyncio.gather(
+ *[self.embedding_func(batch) for batch in batches]
+ )
+ embeddings = np.concatenate(embeddings_list)
+ for i, d in enumerate(list_data):
+ d["__vector__"] = embeddings[i]
+ # self._client.upsert(vectors=[]) pinecone
+ results = self._client.upsert(datas=list_data)
+ return results
async def query(self, query: str, top_k=5):
embedding = await self.embedding_func([query])
embedding = embedding[0]
+ # self._client.query(vector=[...], top_key=10) pinecone
results = self._client.query(
- query=embedding,
+ vector=embedding,
top_k=top_k,
- better_than_threshold=self.cosine_better_than_threshold,
+ better_than_threshold=self.cosine_better_than_threshold, ???
)
results = [
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
@@ -112,7 +164,8 @@ async def query(self, query: str, top_k=5):
return results
async def index_done_callback(self):
- self._client.save()
+ print("self._client.save()")
+ # self._client.save()
@dataclass
@@ -243,3 +296,187 @@ async def _node2vec_embed(self):
nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
return embeddings, nodes_ids
+
+
+@dataclass
+class Neo4JStorage(BaseGraphStorage):
+ @staticmethod
+ def load_nx_graph(file_name) -> nx.Graph:
+ if os.path.exists(file_name):
+ return nx.read_graphml(file_name)
+ return None
+
+ # @staticmethod
+ # def write_nx_graph(graph: nx.Graph, file_name):
+ # logger.info(
+ # f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
+ # )
+ # nx.write_graphml(graph, file_name)
+
+
+ def __post_init__(self):
+ self._graphml_xml_file = os.path.join(
+ self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
+ )
+ preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
+ if preloaded_graph is not None:
+ logger.info(
+ f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
+ )
+ self._graph = preloaded_graph or nx.Graph()
+ self._node_embed_algorithms = {
+ "node2vec": self._node2vec_embed,
+ }
+
+ async def index_done_callback(self):
+ print ("KG successfully indexed.")
+ # Neo4JStorage.write_nx_graph(self._graph, self._graphml_xml_file)
+ async def has_node(self, node_id: str) -> bool:
+ entity_name_label = node_id
+ with self.driver.session() as session:
+ return session.read_transaction(self._check_node_exists, entity_name_label)
+
+ @staticmethod
+ def _check_node_exists(tx, label):
+ query = f"MATCH (n:{label}) RETURN count(n) > 0 AS node_exists"
+ result = tx.run(query)
+ return result.single()["node_exists"]
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ entity_name_label_source = source_node_id
+ entity_name_label_target = target_node_id
+ #hard code relaitionship type
+ with self.driver.session() as session:
+ result = session.read_transaction(self._check_edge_existence, entity_name_label_source, entity_name_label_target)
+ return result
+
+ @staticmethod
+ def _check_edge_existence(tx, label1, label2):
+ query = (
+ f"MATCH (a:{label1})-[r]-(b:{label2}) "
+ "RETURN COUNT(r) > 0 AS edgeExists"
+ )
+ result = tx.run(query)
+ return result.single()["edgeExists"]
+ def close(self):
+ self.driver.close()
+
+
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ entity_name_label = node_id
+ with driver.session() as session:
+ result = session.run(
+ "MATCH (n) WHERE n.name = $name RETURN n",
+ name=node_name
+ )
+
+ for record in result:
+ return record["n"] # Return the first matching node
+
+
+
+ async def node_degree(self, node_id: str) -> int:
+ entity_name_label = node_id
+ neo4j = Neo4j("bolt://localhost:7687", "neo4j", "password")
+ with neo4j.driver.session() as session:
+ degree = Neo4j.find_node_degree(session, entity_name_label)
+ return degree
+
+ @staticmethod
+ def find_node_degree(session, label):
+ with session.begin_transaction() as tx:
+ result = tx.run("MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label))
+ record = result.single()
+ if record:
+ return record["degree"]
+ else:
+ return None
+
+# edge_degree
+ # from neo4j import GraphDatabase
+
+ # driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
+
+ #
+ #
+ # def edge_degree(tx, source_id, target_id):
+ # result = tx.run("""
+ # MATCH (source) WHERE ID(source) = $source_id
+ # MATCH (target) WHERE ID(target) = $target_id
+ # MATCH (source)-[r]-(target)
+ # RETURN COUNT(r) AS degree
+ # """, source_id=source_id, target_id=target_id)
+
+ # return result.single()["degree"]
+
+ # with driver.session() as session:
+ # degree = session.read_transaction(get_edge_degree, 1, 2)
+ # print("Degree of edge between source and target:", degree)
+
+
+
+ #get_edge
+ # def get_edge(driver, node_id):
+ # with driver.session() as session:
+ # result = session.run(
+ # """
+ # MATCH (n)-[r]-(m)
+ # WHERE id(n) = $node_id
+ # RETURN r
+ # """,
+ # node_id=node_id
+ # )
+ # return [record["r"] for record in result]
+
+ # driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
+
+ # edges = get_node_edges(driver, 123) # Replace 123 with the actual node ID
+
+ # for edge in edges:
+ # print(f"Edge ID: {edge.id}, Type: {edge.type}, Start: {edge.start_node.id}, End: {edge.end_node.id}")
+
+ # driver.close()
+
+
+#upsert_node
+ #add_node, upsert_node
+ # async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ # node_name = node_id
+ # with driver.session() as session:
+ # session.run("CREATE (p:$node_name $node_data)", node_name=node_name, node_data=**node_data)
+
+ # with GraphDatabase.driver(URI, auth=AUTH) as driver:
+ # add_node(driver, entity, data)
+
+#async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]):
+ # def add_edge_with_data(tx, source_node_id, target_node_id, relationship_type, edge_data: dict[str, str]):
+ # source_node_name = source_node_id
+ # target_node_name = target_node_id
+ # tx.run("MATCH (s), (t) WHERE id(s) = $source_node_id AND id(t) = $target_node_id "
+ # "CREATE (s)-[r:$relationship_type]->(t) SET r = $data",
+ # source_node_id=source_node_id, target_node_id=target_node_id,
+ # relationship_type=relationship_type, data=edge_data)
+
+ # with driver.session() as session:
+ # session.write_transaction(add_edge_with_data, 1, 2, "KNOWS", {"since": 2020, "strength": 5})
+
+
+#async def _node2vec_embed(self):
+ # # async def _node2vec_embed(self):
+ # with driver.session() as session:
+ # #Define the Cypher query
+ # options = self.global_config["node2vec_params"]
+ # query = f"""CALL gds.node2vec.stream('myGraph', {**options})
+ # YIELD nodeId, embedding
+ # RETURN nodeId, embedding"""
+ # # Run the query and process the results
+ # results = session.run(query)
+ # for record in results:
+ # node_id = record["nodeId"]
+ # embedding = record["embedding"]
+ # print(f"Node ID: {node_id}, Embedding: {embedding}")
+ # #need to return two lists here.
+
+
+
From 542f8835f807f2f99ddad1be83f30523a5e82996 Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 00:37:03 +0800
Subject: [PATCH 096/258] add Algorithm Flowchart
---
README.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/README.md b/README.md
index dbabcb56..f2f5c20e 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,10 @@ This repository hosts the code of LightRAG. The structure of this code is based
- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+## Algorithm Flowchart
+
+
+
## Install
* Install from source (Recommend)
From e5cb01b16b92b2473f5dc2e7ad327b60466fbe3c Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 00:37:46 +0800
Subject: [PATCH 097/258] add Algorithm FLowchart
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index f2f5c20e..0f8659b1 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## Algorithm Flowchart
+![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3)
## Install
From d9054c6e4f71147dafe071702512b5498224009b Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Sat, 26 Oct 2024 02:20:23 +0800
Subject: [PATCH 098/258] fix hf output bug
---
lightrag/llm.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 4dcf535c..692937fb 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -266,10 +266,11 @@ async def hf_model_if_cache(
input_ids = hf_tokenizer(
input_prompt, return_tensors="pt", padding=True, truncation=True
).to("cuda")
+ inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
output = hf_model.generate(
**input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
)
- response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True)
+ response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
if hashing_kv is not None:
await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
return response_text
From 5bfd107f5ebd38dbe7e31d4f3d6bf9d3c25389fa Mon Sep 17 00:00:00 2001
From: tackhwa <55059307+tackhwa@users.noreply.github.com>
Date: Sat, 26 Oct 2024 02:42:40 +0800
Subject: [PATCH 099/258] Update token length
---
lightrag/llm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 692937fb..ab459fc7 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -268,7 +268,7 @@ async def hf_model_if_cache(
).to("cuda")
inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
output = hf_model.generate(
- **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
+ **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True
)
response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
if hashing_kv is not None:
From 7b589d41cfbd3dfccc7cdf3f1183487afd0dc39c Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Fri, 25 Oct 2024 17:45:59 -0400
Subject: [PATCH 100/258] adding neo4j
---
lightrag/storage.py | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 704dc4e8..cd313397 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -393,9 +393,19 @@ def find_node_degree(session, label):
else:
return None
-# edge_degree
- # from neo4j import GraphDatabase
-
+ # edge_degree
+ # from neo4j import GraphDatabase
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ entity_name__label_source = src_id
+ entity_name_label_target = tgt_id
+ with graph_db.session() as session:
+ result = session.run(
+ """MATCH (n1:{node_label1})-[r]-(n2:{node_label2})
+ RETURN count(r) AS degree"""
+ .format(node_label1=node_label1, node_label2=node_label2)
+ )
+ record = result.single()
+ return record["degree"]
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
#
From e3d978d331cca8c48df09618fde5d74a0711c285 Mon Sep 17 00:00:00 2001
From: Yazington
Date: Sat, 26 Oct 2024 00:11:21 -0400
Subject: [PATCH 101/258] fixing bug
---
lightrag/lightrag.py | 6 ++++--
lightrag/operate.py | 26 +++++++++++++-------------
2 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 5137af42..3004f5ed 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -85,7 +85,9 @@ class LightRAG:
# LLM
llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
- llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
+ llm_model_name: str = (
+ "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
+ )
llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16
@@ -208,7 +210,7 @@ async def ainsert(self, string_or_strings):
logger.info("[Entity Extraction]...")
maybe_new_kg = await extract_entities(
inserting_chunks,
- knwoledge_graph_inst=self.chunk_entity_relation_graph,
+ knowledge_graph_inst=self.chunk_entity_relation_graph,
entity_vdb=self.entities_vdb,
relationships_vdb=self.relationships_vdb,
global_config=asdict(self),
diff --git a/lightrag/operate.py b/lightrag/operate.py
index a0729cd8..8a6820f5 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -124,14 +124,14 @@ async def _handle_single_relationship_extraction(
async def _merge_nodes_then_upsert(
entity_name: str,
nodes_data: list[dict],
- knwoledge_graph_inst: BaseGraphStorage,
+ knowledge_graph_inst: BaseGraphStorage,
global_config: dict,
):
already_entitiy_types = []
already_source_ids = []
already_description = []
- already_node = await knwoledge_graph_inst.get_node(entity_name)
+ already_node = await knowledge_graph_inst.get_node(entity_name)
if already_node is not None:
already_entitiy_types.append(already_node["entity_type"])
already_source_ids.extend(
@@ -160,7 +160,7 @@ async def _merge_nodes_then_upsert(
description=description,
source_id=source_id,
)
- await knwoledge_graph_inst.upsert_node(
+ await knowledge_graph_inst.upsert_node(
entity_name,
node_data=node_data,
)
@@ -172,7 +172,7 @@ async def _merge_edges_then_upsert(
src_id: str,
tgt_id: str,
edges_data: list[dict],
- knwoledge_graph_inst: BaseGraphStorage,
+ knowledge_graph_inst: BaseGraphStorage,
global_config: dict,
):
already_weights = []
@@ -180,8 +180,8 @@ async def _merge_edges_then_upsert(
already_description = []
already_keywords = []
- if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
- already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id)
+ if await knowledge_graph_inst.has_edge(src_id, tgt_id):
+ already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
already_weights.append(already_edge["weight"])
already_source_ids.extend(
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
@@ -202,8 +202,8 @@ async def _merge_edges_then_upsert(
set([dp["source_id"] for dp in edges_data] + already_source_ids)
)
for need_insert_id in [src_id, tgt_id]:
- if not (await knwoledge_graph_inst.has_node(need_insert_id)):
- await knwoledge_graph_inst.upsert_node(
+ if not (await knowledge_graph_inst.has_node(need_insert_id)):
+ await knowledge_graph_inst.upsert_node(
need_insert_id,
node_data={
"source_id": source_id,
@@ -214,7 +214,7 @@ async def _merge_edges_then_upsert(
description = await _handle_entity_relation_summary(
(src_id, tgt_id), description, global_config
)
- await knwoledge_graph_inst.upsert_edge(
+ await knowledge_graph_inst.upsert_edge(
src_id,
tgt_id,
edge_data=dict(
@@ -237,7 +237,7 @@ async def _merge_edges_then_upsert(
async def extract_entities(
chunks: dict[str, TextChunkSchema],
- knwoledge_graph_inst: BaseGraphStorage,
+ knowledge_graph_inst: BaseGraphStorage,
entity_vdb: BaseVectorStorage,
relationships_vdb: BaseVectorStorage,
global_config: dict,
@@ -341,13 +341,13 @@ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
maybe_edges[tuple(sorted(k))].extend(v)
all_entities_data = await asyncio.gather(
*[
- _merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config)
+ _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
for k, v in maybe_nodes.items()
]
)
all_relationships_data = await asyncio.gather(
*[
- _merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config)
+ _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
for k, v in maybe_edges.items()
]
)
@@ -384,7 +384,7 @@ async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
}
await relationships_vdb.upsert(data_for_vdb)
- return knwoledge_graph_inst
+ return knowledge_graph_inst
async def local_query(
From f6e97c052813d216913cb00d84b668cf732bf6e3 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Sat, 26 Oct 2024 14:04:11 +0800
Subject: [PATCH 102/258] Update graph_visual_with_html.py
---
examples/graph_visual_with_html.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/graph_visual_with_html.py b/examples/graph_visual_with_html.py
index e4337a54..11279b3a 100644
--- a/examples/graph_visual_with_html.py
+++ b/examples/graph_visual_with_html.py
@@ -6,7 +6,7 @@
G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml")
# Create a Pyvis network
-net = Network(notebook=True)
+net = Network(height="100vh", notebook=True)
# Convert NetworkX graph to Pyvis network
net.from_nx(G)
From 4d078e948f9f85eb50cedf178cca77b04a8df74c Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Sat, 26 Oct 2024 14:40:17 +0800
Subject: [PATCH 103/258] update version
---
lightrag/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index db81e005..8e76a260 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
-__version__ = "0.0.7"
+__version__ = "0.0.8"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
From 02f94ab228c17122833173aa4c9825bada4a176f Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 15:56:48 +0800
Subject: [PATCH 104/258] [feat] Add API server implementation and endpoints
---
README.md | 119 ++++++++++++++
.../lightrag_api_openai_compatible_demo.py | 153 ++++++++++++++++++
2 files changed, 272 insertions(+)
create mode 100644 examples/lightrag_api_openai_compatible_demo.py
diff --git a/README.md b/README.md
index 7fab9a01..d11b1691 100644
--- a/README.md
+++ b/README.md
@@ -397,6 +397,125 @@ if __name__ == "__main__":
+## API Server Implementation
+
+LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
+
+### Setting up the API Server
+
+Click to expand setup instructions
+
+1. First, ensure you have the required dependencies:
+```bash
+pip install fastapi uvicorn pydantic
+```
+
+2. Set up your environment variables:
+```bash
+export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
+```
+
+3. Run the API server:
+```bash
+python examples/lightrag_api_openai_compatible_demo.py
+```
+
+The server will start on `http://0.0.0.0:8020`.
+
+
+### API Endpoints
+
+The API server provides the following endpoints:
+
+#### 1. Query Endpoint
+
+Click to view Query endpoint details
+
+- **URL:** `/query`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "query": "Your question here",
+ "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/query" \
+ -H "Content-Type: application/json" \
+ -d '{"query": "What are the main themes?", "mode": "hybrid"}'
+```
+
+
+#### 2. Insert Text Endpoint
+
+Click to view Insert Text endpoint details
+
+- **URL:** `/insert`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "text": "Your text content here"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/insert" \
+ -H "Content-Type: application/json" \
+ -d '{"text": "Content to be inserted into RAG"}'
+```
+
+
+#### 3. Insert File Endpoint
+
+Click to view Insert File endpoint details
+
+- **URL:** `/insert_file`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "file_path": "path/to/your/file.txt"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/insert_file" \
+ -H "Content-Type: application/json" \
+ -d '{"file_path": "./book.txt"}'
+```
+
+
+#### 4. Health Check Endpoint
+
+Click to view Health Check endpoint details
+
+- **URL:** `/health`
+- **Method:** GET
+- **Example:**
+```bash
+curl -X GET "http://127.0.0.1:8020/health"
+```
+
+
+### Configuration
+
+The API server can be configured using environment variables:
+- `RAG_DIR`: Directory for storing the RAG index (default: "index_default")
+- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers
+
+### Error Handling
+
+Click to view error handling details
+
+The API includes comprehensive error handling:
+- File not found errors (404)
+- Processing errors (500)
+- Supports multiple file encodings (UTF-8 and GBK)
+
+
## Evaluation
### Dataset
The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py
new file mode 100644
index 00000000..f8d105ea
--- /dev/null
+++ b/examples/lightrag_api_openai_compatible_demo.py
@@ -0,0 +1,153 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import openai_complete_if_cache, openai_embedding
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+from typing import Optional
+import asyncio
+import nest_asyncio
+
+# Apply nest_asyncio to solve event loop issues
+nest_asyncio.apply()
+
+DEFAULT_RAG_DIR="index_default"
+app = FastAPI(title="LightRAG API", description="API for RAG operations")
+
+# Configure working directory
+WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}')
+print(f"WORKING_DIR: {WORKING_DIR}")
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+# LLM model function
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "gpt-4o-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ api_key='YOUR_API_KEY',
+ base_url="YourURL/v1",
+ **kwargs,
+ )
+
+# Embedding function
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ return await openai_embedding(
+ texts,
+ model="text-embedding-3-large",
+ api_key='YOUR_API_KEY',
+ base_url="YourURL/v1",
+ )
+
+# Initialize RAG instance
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=3072, max_token_size=8192, func=embedding_func
+ ),
+)
+
+# Data models
+class QueryRequest(BaseModel):
+ query: str
+ mode: str = "hybrid"
+
+class InsertRequest(BaseModel):
+ text: str
+
+class InsertFileRequest(BaseModel):
+ file_path: str
+
+class Response(BaseModel):
+ status: str
+ data: Optional[str] = None
+ message: Optional[str] = None
+
+# API routes
+@app.post("/query", response_model=Response)
+async def query_endpoint(request: QueryRequest):
+ try:
+ loop = asyncio.get_event_loop()
+ result = await loop.run_in_executor(
+ None,
+ lambda: rag.query(request.query, param=QueryParam(mode=request.mode))
+ )
+ return Response(
+ status="success",
+ data=result
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/insert", response_model=Response)
+async def insert_endpoint(request: InsertRequest):
+ try:
+ loop = asyncio.get_event_loop()
+ await loop.run_in_executor(None, lambda: rag.insert(request.text))
+ return Response(
+ status="success",
+ message="Text inserted successfully"
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/insert_file", response_model=Response)
+async def insert_file(request: InsertFileRequest):
+ try:
+ # Check if file exists
+ if not os.path.exists(request.file_path):
+ raise HTTPException(
+ status_code=404,
+ detail=f"File not found: {request.file_path}"
+ )
+
+ # Read file content
+ try:
+ with open(request.file_path, 'r', encoding='utf-8') as f:
+ content = f.read()
+ except UnicodeDecodeError:
+ # If UTF-8 decoding fails, try other encodings
+ with open(request.file_path, 'r', encoding='gbk') as f:
+ content = f.read()
+
+ # Insert file content
+ loop = asyncio.get_event_loop()
+ await loop.run_in_executor(None, lambda: rag.insert(content))
+
+ return Response(
+ status="success",
+ message=f"File content from {request.file_path} inserted successfully"
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/health")
+async def health_check():
+ return {"status": "healthy"}
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(app, host="0.0.0.0", port=8020)
+
+# Usage example
+# To run the server, use the following command in your terminal:
+# python lightrag_api_openai_compatible_demo.py
+
+# Example requests:
+# 1. Query:
+# curl -X POST "http://127.0.0.1:8020/query" -H "Content-Type: application/json" -d '{"query": "your query here", "mode": "hybrid"}'
+
+# 2. Insert text:
+# curl -X POST "http://127.0.0.1:8020/insert" -H "Content-Type: application/json" -d '{"text": "your text here"}'
+
+# 3. Insert file:
+# curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}'
+
+# 4. Health check:
+# curl -X GET "http://127.0.0.1:8020/health"
\ No newline at end of file
From 08feac942ad0de01ccbe16253d7b7a2ad35b7621 Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 16:00:30 +0800
Subject: [PATCH 105/258] Refactor code formatting in
lightrag_api_openai_compatible_demo.py
---
.../lightrag_api_openai_compatible_demo.py | 29 ++++++++++++++-----
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py
index f8d105ea..ad9560dc 100644
--- a/examples/lightrag_api_openai_compatible_demo.py
+++ b/examples/lightrag_api_openai_compatible_demo.py
@@ -12,7 +12,7 @@
# Apply nest_asyncio to solve event loop issues
nest_asyncio.apply()
-DEFAULT_RAG_DIR="index_default"
+DEFAULT_RAG_DIR = "index_default"
app = FastAPI(title="LightRAG API", description="API for RAG operations")
# Configure working directory
@@ -22,6 +22,8 @@
os.mkdir(WORKING_DIR)
# LLM model function
+
+
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
@@ -36,6 +38,8 @@ async def llm_model_func(
)
# Embedding function
+
+
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
@@ -54,29 +58,37 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
)
# Data models
+
+
class QueryRequest(BaseModel):
query: str
mode: str = "hybrid"
+
class InsertRequest(BaseModel):
text: str
+
class InsertFileRequest(BaseModel):
file_path: str
+
class Response(BaseModel):
status: str
data: Optional[str] = None
message: Optional[str] = None
# API routes
+
+
@app.post("/query", response_model=Response)
async def query_endpoint(request: QueryRequest):
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
- None,
- lambda: rag.query(request.query, param=QueryParam(mode=request.mode))
+ None,
+ lambda: rag.query(
+ request.query, param=QueryParam(mode=request.mode))
)
return Response(
status="success",
@@ -85,6 +97,7 @@ async def query_endpoint(request: QueryRequest):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
+
@app.post("/insert", response_model=Response)
async def insert_endpoint(request: InsertRequest):
try:
@@ -97,6 +110,7 @@ async def insert_endpoint(request: InsertRequest):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
+
@app.post("/insert_file", response_model=Response)
async def insert_file(request: InsertFileRequest):
try:
@@ -106,7 +120,7 @@ async def insert_file(request: InsertFileRequest):
status_code=404,
detail=f"File not found: {request.file_path}"
)
-
+
# Read file content
try:
with open(request.file_path, 'r', encoding='utf-8') as f:
@@ -115,11 +129,11 @@ async def insert_file(request: InsertFileRequest):
# If UTF-8 decoding fails, try other encodings
with open(request.file_path, 'r', encoding='gbk') as f:
content = f.read()
-
+
# Insert file content
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, lambda: rag.insert(content))
-
+
return Response(
status="success",
message=f"File content from {request.file_path} inserted successfully"
@@ -127,6 +141,7 @@ async def insert_file(request: InsertFileRequest):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
+
@app.get("/health")
async def health_check():
return {"status": "healthy"}
@@ -150,4 +165,4 @@ async def health_check():
# curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}'
# 4. Health check:
-# curl -X GET "http://127.0.0.1:8020/health"
\ No newline at end of file
+# curl -X GET "http://127.0.0.1:8020/health"
From fb84c1e5be3b6b5dc34ed96606194b93624f3900 Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 16:09:36 +0800
Subject: [PATCH 106/258] Refactor code formatting in
lightrag_api_openai_compatible_demo.py
---
.../lightrag_api_openai_compatible_demo.py | 34 ++++++++-----------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py
index ad9560dc..2cd262bb 100644
--- a/examples/lightrag_api_openai_compatible_demo.py
+++ b/examples/lightrag_api_openai_compatible_demo.py
@@ -16,7 +16,7 @@
app = FastAPI(title="LightRAG API", description="API for RAG operations")
# Configure working directory
-WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}')
+WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
print(f"WORKING_DIR: {WORKING_DIR}")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
@@ -32,11 +32,12 @@ async def llm_model_func(
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
- api_key='YOUR_API_KEY',
+ api_key="YOUR_API_KEY",
base_url="YourURL/v1",
**kwargs,
)
+
# Embedding function
@@ -44,10 +45,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embedding(
texts,
model="text-embedding-3-large",
- api_key='YOUR_API_KEY',
+ api_key="YOUR_API_KEY",
base_url="YourURL/v1",
)
+
# Initialize RAG instance
rag = LightRAG(
working_dir=WORKING_DIR,
@@ -78,6 +80,7 @@ class Response(BaseModel):
data: Optional[str] = None
message: Optional[str] = None
+
# API routes
@@ -86,14 +89,9 @@ async def query_endpoint(request: QueryRequest):
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
- None,
- lambda: rag.query(
- request.query, param=QueryParam(mode=request.mode))
- )
- return Response(
- status="success",
- data=result
+ None, lambda: rag.query(request.query, param=QueryParam(mode=request.mode))
)
+ return Response(status="success", data=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -103,10 +101,7 @@ async def insert_endpoint(request: InsertRequest):
try:
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, lambda: rag.insert(request.text))
- return Response(
- status="success",
- message="Text inserted successfully"
- )
+ return Response(status="success", message="Text inserted successfully")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -117,17 +112,16 @@ async def insert_file(request: InsertFileRequest):
# Check if file exists
if not os.path.exists(request.file_path):
raise HTTPException(
- status_code=404,
- detail=f"File not found: {request.file_path}"
+ status_code=404, detail=f"File not found: {request.file_path}"
)
# Read file content
try:
- with open(request.file_path, 'r', encoding='utf-8') as f:
+ with open(request.file_path, "r", encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
# If UTF-8 decoding fails, try other encodings
- with open(request.file_path, 'r', encoding='gbk') as f:
+ with open(request.file_path, "r", encoding="gbk") as f:
content = f.read()
# Insert file content
@@ -136,7 +130,7 @@ async def insert_file(request: InsertFileRequest):
return Response(
status="success",
- message=f"File content from {request.file_path} inserted successfully"
+ message=f"File content from {request.file_path} inserted successfully",
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -146,8 +140,10 @@ async def insert_file(request: InsertFileRequest):
async def health_check():
return {"status": "healthy"}
+
if __name__ == "__main__":
import uvicorn
+
uvicorn.run(app, host="0.0.0.0", port=8020)
# Usage example
From 88f4e3452839e3b1f723c9688a888c8aefeb5f21 Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Sat, 26 Oct 2024 16:11:15 +0800
Subject: [PATCH 107/258] support lmdeploy backend
---
examples/lightrag_lmdeploy_demo.py | 74 +++++++++++++++++++++
lightrag/llm.py | 100 +++++++++++++++++++++++++++++
requirements.txt | 1 +
3 files changed, 175 insertions(+)
create mode 100644 examples/lightrag_lmdeploy_demo.py
diff --git a/examples/lightrag_lmdeploy_demo.py b/examples/lightrag_lmdeploy_demo.py
new file mode 100644
index 00000000..ea7ace0e
--- /dev/null
+++ b/examples/lightrag_lmdeploy_demo.py
@@ -0,0 +1,74 @@
+import os
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import lmdeploy_model_if_cache, hf_embedding
+from lightrag.utils import EmbeddingFunc
+from transformers import AutoModel, AutoTokenizer
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+async def lmdeploy_model_complete(
+ prompt=None, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
+ return await lmdeploy_model_if_cache(
+ model_name,
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ ## please specify chat_template if your local path does not follow original HF file name,
+ ## or model_name is a pytorch model on huggingface.co,
+ ## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py
+ ## for a list of chat_template available in lmdeploy.
+ chat_template = "llama3",
+ # model_format ='awq', # if you are using awq quantization model.
+ # quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8.
+ **kwargs,
+ )
+
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=lmdeploy_model_complete,
+ llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model
+ embedding_func=EmbeddingFunc(
+ embedding_dim=384,
+ max_token_size=5000,
+ func=lambda texts: hf_embedding(
+ texts,
+ tokenizer=AutoTokenizer.from_pretrained(
+ "sentence-transformers/all-MiniLM-L6-v2"
+ ),
+ embed_model=AutoModel.from_pretrained(
+ "sentence-transformers/all-MiniLM-L6-v2"
+ ),
+ ),
+ ),
+)
+
+
+with open("./book.txt", "r", encoding="utf-8") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
+
+# Perform local search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
+
+# Perform global search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
+
+# Perform hybrid search
+print(
+ rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index bb0d6063..028084bd 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -322,6 +322,106 @@ async def ollama_model_if_cache(
return result
+@lru_cache(maxsize=1)
+def initialize_lmdeploy_pipeline(model, tp=1, chat_template=None, log_level='WARNING', model_format='hf', quant_policy=0):
+ from lmdeploy import pipeline, ChatTemplateConfig, TurbomindEngineConfig
+ lmdeploy_pipe = pipeline(
+ model_path=model,
+ backend_config=TurbomindEngineConfig(tp=tp, model_format=model_format, quant_policy=quant_policy),
+ chat_template_config=ChatTemplateConfig(model_name=chat_template) if chat_template else None,
+ log_level='WARNING')
+ return lmdeploy_pipe
+
+
+async def lmdeploy_model_if_cache(
+ model, prompt, system_prompt=None, history_messages=[],
+ chat_template=None, model_format='hf',quant_policy=0, **kwargs
+) -> str:
+ """
+ Args:
+ model (str): The path to the model.
+ It could be one of the following options:
+ - i) A local directory path of a turbomind model which is
+ converted by `lmdeploy convert` command or download
+ from ii) and iii).
+ - ii) The model_id of a lmdeploy-quantized model hosted
+ inside a model repo on huggingface.co, such as
+ "InternLM/internlm-chat-20b-4bit",
+ "lmdeploy/llama2-chat-70b-4bit", etc.
+ - iii) The model_id of a model hosted inside a model repo
+ on huggingface.co, such as "internlm/internlm-chat-7b",
+ "Qwen/Qwen-7B-Chat ", "baichuan-inc/Baichuan2-7B-Chat"
+ and so on.
+ chat_template (str): needed when model is a pytorch model on
+ huggingface.co, such as "internlm-chat-7b",
+ "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on,
+ and when the model name of local path did not match the original model name in HF.
+ tp (int): tensor parallel
+ prompt (Union[str, List[str]]): input texts to be completed.
+ do_preprocess (bool): whether pre-process the messages. Default to
+ True, which means chat_template will be applied.
+ skip_special_tokens (bool): Whether or not to remove special tokens
+ in the decoding. Default to be False.
+ do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise.
+ Default to be False, which means greedy decoding will be applied.
+ """
+ try:
+ import lmdeploy
+ from lmdeploy import version_info, GenerationConfig
+ except:
+ raise ImportError("Please install lmdeploy before intialize lmdeploy backend.")
+
+ kwargs.pop("response_format", None)
+ max_new_tokens = kwargs.pop("max_tokens", 512)
+ tp = kwargs.pop('tp', 1)
+ skip_special_tokens = kwargs.pop('skip_special_tokens', False)
+ do_preprocess = kwargs.pop('do_preprocess', True)
+ do_sample = kwargs.pop('do_sample', False)
+ gen_params = kwargs
+
+ version = version_info
+ if do_sample is not None and version < (0, 6, 0):
+ raise RuntimeError(
+ '`do_sample` parameter is not supported by lmdeploy until '
+ f'v0.6.0, but currently using lmdeloy {lmdeploy.__version__}')
+ else:
+ do_sample = True
+ gen_params.update(do_sample=do_sample)
+
+ lmdeploy_pipe = initialize_lmdeploy_pipeline(
+ model=model,
+ tp=tp,
+ chat_template=chat_template,
+ model_format=model_format,
+ quant_policy=quant_policy,
+ log_level='WARNING')
+
+ messages = []
+ if system_prompt:
+ messages.append({"role": "system", "content": system_prompt})
+
+ hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
+ messages.extend(history_messages)
+ messages.append({"role": "user", "content": prompt})
+ if hashing_kv is not None:
+ args_hash = compute_args_hash(model, messages)
+ if_cache_return = await hashing_kv.get_by_id(args_hash)
+ if if_cache_return is not None:
+ return if_cache_return["return"]
+
+ gen_config = GenerationConfig(
+ skip_special_tokens=skip_special_tokens, max_new_tokens=max_new_tokens, **gen_params)
+
+ response = ""
+ async for res in lmdeploy_pipe.generate(messages, gen_config=gen_config,
+ do_preprocess=do_preprocess, stream_response=False, session_id=1):
+ response += res.response
+
+ if hashing_kv is not None:
+ await hashing_kv.upsert({args_hash: {"return": response, "model": model}})
+ return response
+
+
async def gpt_4o_complete(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
diff --git a/requirements.txt b/requirements.txt
index 98f32b0a..6b0e025a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ tiktoken
torch
transformers
xxhash
+# lmdeploy[all]
From f71e389d5b2772b1cc381dada644b9118334d9dc Mon Sep 17 00:00:00 2001
From: "zhenjie.ye"
Date: Sat, 26 Oct 2024 16:12:10 +0800
Subject: [PATCH 108/258] Refactor code formatting in
lightrag_api_openai_compatible_demo.py
---
lightrag/lightrag.py | 4 +---
lightrag/llm.py | 4 +++-
setup.py | 31 ++++++++++++++++++++++++-------
3 files changed, 28 insertions(+), 11 deletions(-)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 3004f5ed..b84e22ef 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -85,9 +85,7 @@ class LightRAG:
# LLM
llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
- llm_model_name: str = (
- "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
- )
+ llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16
diff --git a/lightrag/llm.py b/lightrag/llm.py
index bb0d6063..fd6b72d6 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -286,7 +286,9 @@ async def hf_model_if_cache(
output = hf_model.generate(
**input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True
)
- response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
+ response_text = hf_tokenizer.decode(
+ output[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True
+ )
if hashing_kv is not None:
await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
return response_text
diff --git a/setup.py b/setup.py
index bdf49f02..1b1f65f0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
import setuptools
from pathlib import Path
+
# Reading the long description from README.md
def read_long_description():
try:
@@ -8,6 +9,7 @@ def read_long_description():
except FileNotFoundError:
return "A description of LightRAG is currently unavailable."
+
# Retrieving metadata from __init__.py
def retrieve_metadata():
vars2find = ["__author__", "__version__", "__url__"]
@@ -17,18 +19,26 @@ def retrieve_metadata():
for line in f.readlines():
for v in vars2find:
if line.startswith(v):
- line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
+ line = (
+ line.replace(" ", "")
+ .replace('"', "")
+ .replace("'", "")
+ .strip()
+ )
vars2readme[v] = line.split("=")[1]
except FileNotFoundError:
raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
-
+
# Checking if all required variables are found
missing_vars = [v for v in vars2find if v not in vars2readme]
if missing_vars:
- raise ValueError(f"Missing required metadata variables in __init__.py: {missing_vars}")
-
+ raise ValueError(
+ f"Missing required metadata variables in __init__.py: {missing_vars}"
+ )
+
return vars2readme
+
# Reading dependencies from requirements.txt
def read_requirements():
deps = []
@@ -36,9 +46,12 @@ def read_requirements():
with open("./requirements.txt") as f:
deps = [line.strip() for line in f if line.strip()]
except FileNotFoundError:
- print("Warning: 'requirements.txt' not found. No dependencies will be installed.")
+ print(
+ "Warning: 'requirements.txt' not found. No dependencies will be installed."
+ )
return deps
+
metadata = retrieve_metadata()
long_description = read_long_description()
requirements = read_requirements()
@@ -51,7 +64,9 @@ def read_requirements():
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
long_description=long_description,
long_description_content_type="text/markdown",
- packages=setuptools.find_packages(exclude=("tests*", "docs*")), # Automatically find packages
+ packages=setuptools.find_packages(
+ exclude=("tests*", "docs*")
+ ), # Automatically find packages
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
@@ -66,6 +81,8 @@ def read_requirements():
project_urls={ # Additional project metadata
"Documentation": metadata.get("__url__", ""),
"Source": metadata.get("__url__", ""),
- "Tracker": f"{metadata.get('__url__', '')}/issues" if metadata.get("__url__") else ""
+ "Tracker": f"{metadata.get('__url__', '')}/issues"
+ if metadata.get("__url__")
+ else "",
},
)
From 2120a6dabb320f8a3a5f9388afda65b80d4093c8 Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Sat, 26 Oct 2024 16:13:18 +0800
Subject: [PATCH 109/258] pre-commit
---
examples/lightrag_lmdeploy_demo.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/examples/lightrag_lmdeploy_demo.py b/examples/lightrag_lmdeploy_demo.py
index ea7ace0e..aeb96f71 100644
--- a/examples/lightrag_lmdeploy_demo.py
+++ b/examples/lightrag_lmdeploy_demo.py
@@ -10,10 +10,11 @@
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
+
async def lmdeploy_model_complete(
prompt=None, system_prompt=None, history_messages=[], **kwargs
) -> str:
- model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
+ model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
return await lmdeploy_model_if_cache(
model_name,
prompt,
@@ -23,7 +24,7 @@ async def lmdeploy_model_complete(
## or model_name is a pytorch model on huggingface.co,
## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py
## for a list of chat_template available in lmdeploy.
- chat_template = "llama3",
+ chat_template="llama3",
# model_format ='awq', # if you are using awq quantization model.
# quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8.
**kwargs,
@@ -33,7 +34,7 @@ async def lmdeploy_model_complete(
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=lmdeploy_model_complete,
- llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model
+ llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=5000,
From 81d5b904fbf06379047ba717869af111d2041333 Mon Sep 17 00:00:00 2001
From: tackhwa
Date: Sat, 26 Oct 2024 16:24:35 +0800
Subject: [PATCH 110/258] update do_preprocess
---
lightrag/llm.py | 77 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 54 insertions(+), 23 deletions(-)
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 028084bd..d86886ea 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -286,7 +286,9 @@ async def hf_model_if_cache(
output = hf_model.generate(
**input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True
)
- response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
+ response_text = hf_tokenizer.decode(
+ output[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True
+ )
if hashing_kv is not None:
await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
return response_text
@@ -323,19 +325,38 @@ async def ollama_model_if_cache(
@lru_cache(maxsize=1)
-def initialize_lmdeploy_pipeline(model, tp=1, chat_template=None, log_level='WARNING', model_format='hf', quant_policy=0):
+def initialize_lmdeploy_pipeline(
+ model,
+ tp=1,
+ chat_template=None,
+ log_level="WARNING",
+ model_format="hf",
+ quant_policy=0,
+):
from lmdeploy import pipeline, ChatTemplateConfig, TurbomindEngineConfig
+
lmdeploy_pipe = pipeline(
model_path=model,
- backend_config=TurbomindEngineConfig(tp=tp, model_format=model_format, quant_policy=quant_policy),
- chat_template_config=ChatTemplateConfig(model_name=chat_template) if chat_template else None,
- log_level='WARNING')
+ backend_config=TurbomindEngineConfig(
+ tp=tp, model_format=model_format, quant_policy=quant_policy
+ ),
+ chat_template_config=ChatTemplateConfig(model_name=chat_template)
+ if chat_template
+ else None,
+ log_level="WARNING",
+ )
return lmdeploy_pipe
async def lmdeploy_model_if_cache(
- model, prompt, system_prompt=None, history_messages=[],
- chat_template=None, model_format='hf',quant_policy=0, **kwargs
+ model,
+ prompt,
+ system_prompt=None,
+ history_messages=[],
+ chat_template=None,
+ model_format="hf",
+ quant_policy=0,
+ **kwargs,
) -> str:
"""
Args:
@@ -354,36 +375,37 @@ async def lmdeploy_model_if_cache(
and so on.
chat_template (str): needed when model is a pytorch model on
huggingface.co, such as "internlm-chat-7b",
- "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on,
+ "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on,
and when the model name of local path did not match the original model name in HF.
tp (int): tensor parallel
prompt (Union[str, List[str]]): input texts to be completed.
do_preprocess (bool): whether pre-process the messages. Default to
True, which means chat_template will be applied.
skip_special_tokens (bool): Whether or not to remove special tokens
- in the decoding. Default to be False.
- do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise.
+ in the decoding. Default to be True.
+ do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise.
Default to be False, which means greedy decoding will be applied.
"""
try:
import lmdeploy
from lmdeploy import version_info, GenerationConfig
- except:
+ except Exception:
raise ImportError("Please install lmdeploy before intialize lmdeploy backend.")
-
+
kwargs.pop("response_format", None)
max_new_tokens = kwargs.pop("max_tokens", 512)
- tp = kwargs.pop('tp', 1)
- skip_special_tokens = kwargs.pop('skip_special_tokens', False)
- do_preprocess = kwargs.pop('do_preprocess', True)
- do_sample = kwargs.pop('do_sample', False)
+ tp = kwargs.pop("tp", 1)
+ skip_special_tokens = kwargs.pop("skip_special_tokens", True)
+ do_preprocess = kwargs.pop("do_preprocess", True)
+ do_sample = kwargs.pop("do_sample", False)
gen_params = kwargs
-
+
version = version_info
if do_sample is not None and version < (0, 6, 0):
raise RuntimeError(
- '`do_sample` parameter is not supported by lmdeploy until '
- f'v0.6.0, but currently using lmdeloy {lmdeploy.__version__}')
+ "`do_sample` parameter is not supported by lmdeploy until "
+ f"v0.6.0, but currently using lmdeloy {lmdeploy.__version__}"
+ )
else:
do_sample = True
gen_params.update(do_sample=do_sample)
@@ -394,7 +416,8 @@ async def lmdeploy_model_if_cache(
chat_template=chat_template,
model_format=model_format,
quant_policy=quant_policy,
- log_level='WARNING')
+ log_level="WARNING",
+ )
messages = []
if system_prompt:
@@ -410,11 +433,19 @@ async def lmdeploy_model_if_cache(
return if_cache_return["return"]
gen_config = GenerationConfig(
- skip_special_tokens=skip_special_tokens, max_new_tokens=max_new_tokens, **gen_params)
+ skip_special_tokens=skip_special_tokens,
+ max_new_tokens=max_new_tokens,
+ **gen_params,
+ )
response = ""
- async for res in lmdeploy_pipe.generate(messages, gen_config=gen_config,
- do_preprocess=do_preprocess, stream_response=False, session_id=1):
+ async for res in lmdeploy_pipe.generate(
+ messages,
+ gen_config=gen_config,
+ do_preprocess=do_preprocess,
+ stream_response=False,
+ session_id=1,
+ ):
response += res.response
if hashing_kv is not None:
From 627776d99be2e8a52cec1b30a826a91cb765cd44 Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Sat, 26 Oct 2024 05:57:56 -0400
Subject: [PATCH 111/258] adding neo4j
---
lightrag/storage.py | 218 ++++++++++++++++++++++++++++++++------------
1 file changed, 161 insertions(+), 57 deletions(-)
diff --git a/lightrag/storage.py b/lightrag/storage.py
index cd313397..19c0ce92 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -425,68 +425,172 @@ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
# print("Degree of edge between source and target:", degree)
-
- #get_edge
- # def get_edge(driver, node_id):
- # with driver.session() as session:
- # result = session.run(
- # """
- # MATCH (n)-[r]-(m)
- # WHERE id(n) = $node_id
- # RETURN r
- # """,
- # node_id=node_id
- # )
- # return [record["r"] for record in result]
-
- # driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
+ async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
+ entity_name__label_source = src_id
+ entity_name_label_target = tgt_id
+ """
+ Find all edges between nodes of two given labels
+
+ Args:
+ source_node_label (str): Label of the source nodes
+ target_node_label (str): Label of the target nodes
+
+ Returns:
+ list: List of all relationships/edges found
+ """
+ with self.driver.session() as session:
+ query = f"""
+ MATCH (source:{entity_name__label_source})-[r]-(target:{entity_name_label_target})
+ RETURN r
+ """
+
+ result = session.run(query)
+ return [record["r"] for record in result]
- # edges = get_node_edges(driver, 123) # Replace 123 with the actual node ID
- # for edge in edges:
- # print(f"Edge ID: {edge.id}, Type: {edge.type}, Start: {edge.start_node.id}, End: {edge.end_node.id}")
+#upsert_node
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ label = node_id
+ properties = node_data
+ """
+ Upsert a node with the given label and properties within a transaction.
+ If a node with the same label exists, it will:
+ - Update existing properties with new values
+ - Add new properties that don't exist
+ If no node exists, creates a new node with all properties.
+
+ Args:
+ label: The node label to search for and apply
+ properties: Dictionary of node properties
+
+ Returns:
+ Dictionary containing the node's properties after upsert, or None if operation fails
+ """
+ with self.driver.session() as session:
+ # Execute the upsert within a transaction
+ result = session.execute_write(
+ self._do_upsert,
+ label,
+ properties
+ )
+ return result
+
- # driver.close()
+ @staticmethod
+ def _do_upsert(tx: Transaction, label: str, properties: Dict[str, Any]):
+ """
+ Static method to perform the actual upsert operation within a transaction
+
+ Args:
+ tx: Neo4j transaction object
+ label: The node label to search for and apply
+ properties: Dictionary of node properties
+
+ Returns:
+ Dictionary containing the node's properties after upsert, or None if operation fails
+ """
+ # Create the dynamic property string for SET clause
+ property_string = ", ".join([
+ f"n.{key} = ${key}"
+ for key in properties.keys()
+ ])
+
+ # Cypher query that either matches existing node or creates new one
+ query = f"""
+ MATCH (n:{label})
+ WITH n LIMIT 1
+ CALL {{
+ WITH n
+ WHERE n IS NOT NULL
+ SET {property_string}
+ RETURN n
+ UNION
+ WITH n
+ WHERE n IS NULL
+ CREATE (n:{label})
+ SET {property_string}
+ RETURN n
+ }}
+ RETURN n
+ """
+
+ # Execute the query with properties as parameters
+ result = tx.run(query, properties)
+ record = result.single()
+
+ if record:
+ return dict(record["n"])
+ return None
+
+
+ async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]) -> None:
+ source_node_label = source_node_id
+ target_node_label = target_node_id
+ """
+ Upsert an edge and its properties between two nodes identified by their labels.
+
+ Args:
+ source_node_label (str): Label of the source node (used as identifier)
+ target_node_label (str): Label of the target node (used as identifier)
+ edge_properties (dict): Dictionary of properties to set on the edge
+ """
+ with self._driver.session() as session:
+ session.execute_write(
+ self._do_upsert_edge,
+ source_node_label,
+ target_node_label,
+ edge_data
+ )
-#upsert_node
- #add_node, upsert_node
- # async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- # node_name = node_id
- # with driver.session() as session:
- # session.run("CREATE (p:$node_name $node_data)", node_name=node_name, node_data=**node_data)
-
- # with GraphDatabase.driver(URI, auth=AUTH) as driver:
- # add_node(driver, entity, data)
-
-#async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]):
- # def add_edge_with_data(tx, source_node_id, target_node_id, relationship_type, edge_data: dict[str, str]):
- # source_node_name = source_node_id
- # target_node_name = target_node_id
- # tx.run("MATCH (s), (t) WHERE id(s) = $source_node_id AND id(t) = $target_node_id "
- # "CREATE (s)-[r:$relationship_type]->(t) SET r = $data",
- # source_node_id=source_node_id, target_node_id=target_node_id,
- # relationship_type=relationship_type, data=edge_data)
-
- # with driver.session() as session:
- # session.write_transaction(add_edge_with_data, 1, 2, "KNOWS", {"since": 2020, "strength": 5})
-
-
-#async def _node2vec_embed(self):
- # # async def _node2vec_embed(self):
- # with driver.session() as session:
- # #Define the Cypher query
- # options = self.global_config["node2vec_params"]
- # query = f"""CALL gds.node2vec.stream('myGraph', {**options})
- # YIELD nodeId, embedding
- # RETURN nodeId, embedding"""
- # # Run the query and process the results
- # results = session.run(query)
- # for record in results:
- # node_id = record["nodeId"]
- # embedding = record["embedding"]
- # print(f"Node ID: {node_id}, Embedding: {embedding}")
- # #need to return two lists here.
+ @staticmethod
+ def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_properties: Dict[str, Any]) -> None:
+ """
+ Static method to perform the edge upsert within a transaction.
+
+ The query will:
+ 1. Match the source and target nodes by their labels
+ 2. Merge the DIRECTED relationship
+ 3. Set all properties on the relationship, updating existing ones and adding new ones
+ """
+ # Convert edge properties to Cypher parameter string
+ props_string = ", ".join(f"r.{key} = ${key}" for key in edge_properties.keys())
+
+ query = """
+ MATCH (source)
+ WHERE source.label = $source_node_label
+ MATCH (target)
+ WHERE target.label = $target_node_label
+ MERGE (source)-[r:DIRECTED]->(target)
+ SET {}
+ """.format(props_string)
+
+ # Prepare parameters dictionary
+ params = {
+ "source_node_label": source_node_label,
+ "target_node_label": target_node_label,
+ **edge_properties
+ }
+
+ # Execute the query
+ tx.run(query, params)
+
+
+async def _node2vec_embed(self):
+ # async def _node2vec_embed(self):
+ with driver.session() as session:
+ #Define the Cypher query
+ options = self.global_config["node2vec_params"]
+ query = f"""CALL gds.node2vec.stream('myGraph', {**options})
+ YIELD nodeId, embedding
+ RETURN nodeId, embedding"""
+ # Run the query and process the results
+ results = session.run(query)
+ for record in results:
+ node_id = record["nodeId"]
+ embedding = record["embedding"]
+ print(f"Node ID: {node_id}, Embedding: {embedding}")
+ #need to return two lists here.
From 474fe59a79432e7fe770c0de12488a0b4c6fe970 Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Sat, 26 Oct 2024 19:29:45 -0400
Subject: [PATCH 112/258] use seperate module
---
lightrag/kg/__init__.py | 5 +
lightrag/kg/neo4j.py | 278 +++++++++++++++++++++++++++++++++++++
lightrag/storage.py | 298 +---------------------------------------
3 files changed, 285 insertions(+), 296 deletions(-)
create mode 100644 lightrag/kg/__init__.py
create mode 100644 lightrag/kg/neo4j.py
diff --git a/lightrag/kg/__init__.py b/lightrag/kg/__init__.py
new file mode 100644
index 00000000..db81e005
--- /dev/null
+++ b/lightrag/kg/__init__.py
@@ -0,0 +1,5 @@
+from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
+
+__version__ = "0.0.7"
+__author__ = "Zirui Guo"
+__url__ = "https://github.com/HKUDS/LightRAG"
diff --git a/lightrag/kg/neo4j.py b/lightrag/kg/neo4j.py
new file mode 100644
index 00000000..5ec5b0cc
--- /dev/null
+++ b/lightrag/kg/neo4j.py
@@ -0,0 +1,278 @@
+import asyncio
+import html
+import os
+from dataclasses import dataclass
+from typing import Any, Union, cast
+import networkx as nx
+import numpy as np
+from nano_vectordb import NanoVectorDB
+
+from .utils import load_json, logger, write_json
+from ..base import (
+ BaseGraphStorage
+)
+from neo4j import GraphDatabase
+# Replace with your actual URI, username, and password
+URI = "neo4j://localhost:7687"
+USERNAME = "neo4j"
+PASSWORD = "your_password"
+# Create a driver object
+
+
+@dataclass
+class GraphStorage(BaseGraphStorage):
+ @staticmethod
+ def load_nx_graph(file_name) -> nx.Graph:
+ if os.path.exists(file_name):
+ return nx.read_graphml(file_name)
+ return None
+
+ def __post_init__(self):
+ # self._graph = preloaded_graph or nx.Graph()
+ self._driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
+ self._node_embed_algorithms = {
+ "node2vec": self._node2vec_embed,
+ }
+
+ async def index_done_callback(self):
+ print ("KG successfully indexed.")
+ async def has_node(self, node_id: str) -> bool:
+ entity_name_label = node_id
+ with self._driver.session() as session:
+ return session.read_transaction(self._check_node_exists, entity_name_label)
+
+ @staticmethod
+ def _check_node_exists(tx, label):
+ query = f"MATCH (n:{label}) RETURN count(n) > 0 AS node_exists"
+ result = tx.run(query)
+ return result.single()["node_exists"]
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ entity_name_label_source = source_node_id
+ entity_name_label_target = target_node_id
+ #hard code relaitionship type
+ with self._driver.session() as session:
+ result = session.read_transaction(self._check_edge_existence, entity_name_label_source, entity_name_label_target)
+ return result
+
+ @staticmethod
+ def _check_edge_existence(tx, label1, label2):
+ query = (
+ f"MATCH (a:{label1})-[r]-(b:{label2}) "
+ "RETURN COUNT(r) > 0 AS edgeExists"
+ )
+ result = tx.run(query)
+ return result.single()["edgeExists"]
+ def close(self):
+ self._driver.close()
+
+
+
+ async def get_node(self, node_id: str) -> Union[dict, None]:
+ entity_name_label = node_id
+ with self._driver.session() as session:
+ result = session.run("MATCH (n:{entity_name_label}) RETURN n".format(entity_name_label=entity_name_label))
+ for record in result:
+ return record["n"]
+
+
+
+ async def node_degree(self, node_id: str) -> int:
+ entity_name_label = node_id
+ with self._driver.session() as session:
+ degree = self._find_node_degree(session, entity_name_label)
+ return degree
+
+ @staticmethod
+ def _find_node_degree(session, label):
+ with session.begin_transaction() as tx:
+ result = tx.run("MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label))
+ record = result.single()
+ if record:
+ return record["degree"]
+ else:
+ return None
+
+
+ # degree = session.read_transaction(get_edge_degree, 1, 2)
+ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
+ entity_name__label_source = src_id
+ entity_name_label_target = tgt_id
+ with self._driver.session() as session:
+ result = session.run(
+ """MATCH (n1:{node_label1})-[r]-(n2:{node_label2})
+ RETURN count(r) AS degree"""
+ .format(node_label1=node_label1, node_label2=node_label2)
+ )
+ record = result.single()
+ return record["degree"]
+
+ async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
+ entity_name__label_source = source_node_id
+ entity_name_label_target = target_node_id
+ """
+ Find all edges between nodes of two given labels
+
+ Args:
+ source_node_label (str): Label of the source nodes
+ target_node_label (str): Label of the target nodes
+
+ Returns:
+ list: List of all relationships/edges found
+ """
+ with self._driver.session() as session:
+ query = f"""
+ MATCH (source:{entity_name__label_source})-[r]-(target:{entity_name_label_target})
+ RETURN r
+ """
+
+ result = session.run(query)
+ return [record["r"] for record in result]
+
+
+#upsert_node
+ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
+ label = node_id
+ properties = node_data
+ """
+ Upsert a node with the given label and properties within a transaction.
+ If a node with the same label exists, it will:
+ - Update existing properties with new values
+ - Add new properties that don't exist
+ If no node exists, creates a new node with all properties.
+
+ Args:
+ label: The node label to search for and apply
+ properties: Dictionary of node properties
+
+ Returns:
+ Dictionary containing the node's properties after upsert, or None if operation fails
+ """
+ with self._driver.session() as session:
+ # Execute the upsert within a transaction
+ result = session.execute_write(
+ self._do_upsert,
+ label,
+ properties
+ )
+ return result
+
+
+ @staticmethod
+ def _do_upsert(tx: Transaction, label: str, properties: Dict[str, Any]):
+ """
+ Static method to perform the actual upsert operation within a transaction
+
+ Args:
+ tx: Neo4j transaction object
+ label: The node label to search for and apply
+ properties: Dictionary of node properties
+
+ Returns:
+ Dictionary containing the node's properties after upsert, or None if operation fails
+ """
+ # Create the dynamic property string for SET clause
+ property_string = ", ".join([
+ f"n.{key} = ${key}"
+ for key in properties.keys()
+ ])
+
+ # Cypher query that either matches existing node or creates new one
+ query = f"""
+ MATCH (n:{label})
+ WITH n LIMIT 1
+ CALL {{
+ WITH n
+ WHERE n IS NOT NULL
+ SET {property_string}
+ RETURN n
+ UNION
+ WITH n
+ WHERE n IS NULL
+ CREATE (n:{label})
+ SET {property_string}
+ RETURN n
+ }}
+ RETURN n
+ """
+
+ # Execute the query with properties as parameters
+ result = tx.run(query, properties)
+ record = result.single()
+
+ if record:
+ return dict(record["n"])
+ return None
+
+
+
+ async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]) -> None:
+ source_node_label = source_node_id
+ target_node_label = target_node_id
+ """
+ Upsert an edge and its properties between two nodes identified by their labels.
+
+ Args:
+ source_node_label (str): Label of the source node (used as identifier)
+ target_node_label (str): Label of the target node (used as identifier)
+ edge_properties (dict): Dictionary of properties to set on the edge
+ """
+ with self._driver.session() as session:
+ session.execute_write(
+ self._do_upsert_edge,
+ source_node_label,
+ target_node_label,
+ edge_data
+ )
+
+ @staticmethod
+ def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_properties: Dict[str, Any]) -> None:
+ """
+ Static method to perform the edge upsert within a transaction.
+
+ The query will:
+ 1. Match the source and target nodes by their labels
+ 2. Merge the DIRECTED relationship
+ 3. Set all properties on the relationship, updating existing ones and adding new ones
+ """
+ # Convert edge properties to Cypher parameter string
+ props_string = ", ".join(f"r.{key} = ${key}" for key in edge_properties.keys())
+
+ query = """
+ MATCH (source)
+ WHERE source.label = $source_node_label
+ MATCH (target)
+ WHERE target.label = $target_node_label
+ MERGE (source)-[r:DIRECTED]->(target)
+ SET {}
+ """.format(props_string)
+
+ # Prepare parameters dictionary
+ params = {
+ "source_node_label": source_node_label,
+ "target_node_label": target_node_label,
+ **edge_properties
+ }
+
+ # Execute the query
+ tx.run(query, params)
+
+
+ async def _node2vec_embed(self):
+ # async def _node2vec_embed(self):
+ with self._driver.session() as session:
+ #Define the Cypher query
+ options = self.global_config["node2vec_params"]
+ query = f"""CALL gds.node2vec.stream('myGraph', {**options})
+ YIELD nodeId, embedding
+ RETURN nodeId, embedding"""
+ # Run the query and process the results
+ results = session.run(query)
+ for record in results:
+ node_id = record["nodeId"]
+ embedding = record["embedding"]
+ print(f"Node ID: {node_id}, Embedding: {embedding}")
+ #need to return two lists here.
+
+
+
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 19c0ce92..85ba2aaa 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -6,6 +6,8 @@
import networkx as nx
import numpy as np
from nano_vectordb import NanoVectorDB
+from kg.neo4j import GraphStorage
+
from .utils import load_json, logger, write_json
from .base import (
@@ -298,299 +300,3 @@ async def _node2vec_embed(self):
return embeddings, nodes_ids
-@dataclass
-class Neo4JStorage(BaseGraphStorage):
- @staticmethod
- def load_nx_graph(file_name) -> nx.Graph:
- if os.path.exists(file_name):
- return nx.read_graphml(file_name)
- return None
-
- # @staticmethod
- # def write_nx_graph(graph: nx.Graph, file_name):
- # logger.info(
- # f"Writing graph with {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
- # )
- # nx.write_graphml(graph, file_name)
-
-
- def __post_init__(self):
- self._graphml_xml_file = os.path.join(
- self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
- )
- preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
- if preloaded_graph is not None:
- logger.info(
- f"Loaded graph from {self._graphml_xml_file} with {preloaded_graph.number_of_nodes()} nodes, {preloaded_graph.number_of_edges()} edges"
- )
- self._graph = preloaded_graph or nx.Graph()
- self._node_embed_algorithms = {
- "node2vec": self._node2vec_embed,
- }
-
- async def index_done_callback(self):
- print ("KG successfully indexed.")
- # Neo4JStorage.write_nx_graph(self._graph, self._graphml_xml_file)
- async def has_node(self, node_id: str) -> bool:
- entity_name_label = node_id
- with self.driver.session() as session:
- return session.read_transaction(self._check_node_exists, entity_name_label)
-
- @staticmethod
- def _check_node_exists(tx, label):
- query = f"MATCH (n:{label}) RETURN count(n) > 0 AS node_exists"
- result = tx.run(query)
- return result.single()["node_exists"]
-
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- entity_name_label_source = source_node_id
- entity_name_label_target = target_node_id
- #hard code relaitionship type
- with self.driver.session() as session:
- result = session.read_transaction(self._check_edge_existence, entity_name_label_source, entity_name_label_target)
- return result
-
- @staticmethod
- def _check_edge_existence(tx, label1, label2):
- query = (
- f"MATCH (a:{label1})-[r]-(b:{label2}) "
- "RETURN COUNT(r) > 0 AS edgeExists"
- )
- result = tx.run(query)
- return result.single()["edgeExists"]
- def close(self):
- self.driver.close()
-
-
-
- async def get_node(self, node_id: str) -> Union[dict, None]:
- entity_name_label = node_id
- with driver.session() as session:
- result = session.run(
- "MATCH (n) WHERE n.name = $name RETURN n",
- name=node_name
- )
-
- for record in result:
- return record["n"] # Return the first matching node
-
-
-
- async def node_degree(self, node_id: str) -> int:
- entity_name_label = node_id
- neo4j = Neo4j("bolt://localhost:7687", "neo4j", "password")
- with neo4j.driver.session() as session:
- degree = Neo4j.find_node_degree(session, entity_name_label)
- return degree
-
- @staticmethod
- def find_node_degree(session, label):
- with session.begin_transaction() as tx:
- result = tx.run("MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label))
- record = result.single()
- if record:
- return record["degree"]
- else:
- return None
-
- # edge_degree
- # from neo4j import GraphDatabase
- async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- entity_name__label_source = src_id
- entity_name_label_target = tgt_id
- with graph_db.session() as session:
- result = session.run(
- """MATCH (n1:{node_label1})-[r]-(n2:{node_label2})
- RETURN count(r) AS degree"""
- .format(node_label1=node_label1, node_label2=node_label2)
- )
- record = result.single()
- return record["degree"]
- # driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
-
- #
- #
- # def edge_degree(tx, source_id, target_id):
- # result = tx.run("""
- # MATCH (source) WHERE ID(source) = $source_id
- # MATCH (target) WHERE ID(target) = $target_id
- # MATCH (source)-[r]-(target)
- # RETURN COUNT(r) AS degree
- # """, source_id=source_id, target_id=target_id)
-
- # return result.single()["degree"]
-
- # with driver.session() as session:
- # degree = session.read_transaction(get_edge_degree, 1, 2)
- # print("Degree of edge between source and target:", degree)
-
-
- async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
- entity_name__label_source = src_id
- entity_name_label_target = tgt_id
- """
- Find all edges between nodes of two given labels
-
- Args:
- source_node_label (str): Label of the source nodes
- target_node_label (str): Label of the target nodes
-
- Returns:
- list: List of all relationships/edges found
- """
- with self.driver.session() as session:
- query = f"""
- MATCH (source:{entity_name__label_source})-[r]-(target:{entity_name_label_target})
- RETURN r
- """
-
- result = session.run(query)
- return [record["r"] for record in result]
-
-
-#upsert_node
- async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- label = node_id
- properties = node_data
- """
- Upsert a node with the given label and properties within a transaction.
- If a node with the same label exists, it will:
- - Update existing properties with new values
- - Add new properties that don't exist
- If no node exists, creates a new node with all properties.
-
- Args:
- label: The node label to search for and apply
- properties: Dictionary of node properties
-
- Returns:
- Dictionary containing the node's properties after upsert, or None if operation fails
- """
- with self.driver.session() as session:
- # Execute the upsert within a transaction
- result = session.execute_write(
- self._do_upsert,
- label,
- properties
- )
- return result
-
-
- @staticmethod
- def _do_upsert(tx: Transaction, label: str, properties: Dict[str, Any]):
- """
- Static method to perform the actual upsert operation within a transaction
-
- Args:
- tx: Neo4j transaction object
- label: The node label to search for and apply
- properties: Dictionary of node properties
-
- Returns:
- Dictionary containing the node's properties after upsert, or None if operation fails
- """
- # Create the dynamic property string for SET clause
- property_string = ", ".join([
- f"n.{key} = ${key}"
- for key in properties.keys()
- ])
-
- # Cypher query that either matches existing node or creates new one
- query = f"""
- MATCH (n:{label})
- WITH n LIMIT 1
- CALL {{
- WITH n
- WHERE n IS NOT NULL
- SET {property_string}
- RETURN n
- UNION
- WITH n
- WHERE n IS NULL
- CREATE (n:{label})
- SET {property_string}
- RETURN n
- }}
- RETURN n
- """
-
- # Execute the query with properties as parameters
- result = tx.run(query, properties)
- record = result.single()
-
- if record:
- return dict(record["n"])
- return None
-
-
-
- async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]) -> None:
- source_node_label = source_node_id
- target_node_label = target_node_id
- """
- Upsert an edge and its properties between two nodes identified by their labels.
-
- Args:
- source_node_label (str): Label of the source node (used as identifier)
- target_node_label (str): Label of the target node (used as identifier)
- edge_properties (dict): Dictionary of properties to set on the edge
- """
- with self._driver.session() as session:
- session.execute_write(
- self._do_upsert_edge,
- source_node_label,
- target_node_label,
- edge_data
- )
-
- @staticmethod
- def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_properties: Dict[str, Any]) -> None:
- """
- Static method to perform the edge upsert within a transaction.
-
- The query will:
- 1. Match the source and target nodes by their labels
- 2. Merge the DIRECTED relationship
- 3. Set all properties on the relationship, updating existing ones and adding new ones
- """
- # Convert edge properties to Cypher parameter string
- props_string = ", ".join(f"r.{key} = ${key}" for key in edge_properties.keys())
-
- query = """
- MATCH (source)
- WHERE source.label = $source_node_label
- MATCH (target)
- WHERE target.label = $target_node_label
- MERGE (source)-[r:DIRECTED]->(target)
- SET {}
- """.format(props_string)
-
- # Prepare parameters dictionary
- params = {
- "source_node_label": source_node_label,
- "target_node_label": target_node_label,
- **edge_properties
- }
-
- # Execute the query
- tx.run(query, params)
-
-
-async def _node2vec_embed(self):
- # async def _node2vec_embed(self):
- with driver.session() as session:
- #Define the Cypher query
- options = self.global_config["node2vec_params"]
- query = f"""CALL gds.node2vec.stream('myGraph', {**options})
- YIELD nodeId, embedding
- RETURN nodeId, embedding"""
- # Run the query and process the results
- results = session.run(query)
- for record in results:
- node_id = record["nodeId"]
- embedding = record["embedding"]
- print(f"Node ID: {node_id}, Embedding: {embedding}")
- #need to return two lists here.
-
-
-
From 0796d3d8e0b04657f2f0a69106bafa008b848c37 Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Sun, 27 Oct 2024 15:37:41 -0400
Subject: [PATCH 113/258] inference running locally. use neo4j next
---
.gitignore | 3 +-
lightrag/kg/__init__.py | 30 +++++++++++++++++---
lightrag/kg/neo4j.py | 20 +++++++------
lightrag/lightrag.py | 15 +++++++++-
lightrag/llm.py | 4 ++-
lightrag/storage.py | 63 ++---------------------------------------
requirements.txt | 3 +-
test.py | 36 +++++++++++++++++++++++
8 files changed, 98 insertions(+), 76 deletions(-)
create mode 100644 test.py
diff --git a/.gitignore b/.gitignore
index 5a41ae32..5eabc386 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ dickens/
book.txt
lightrag-dev/
.idea/
-dist/
\ No newline at end of file
+dist/
+env/
\ No newline at end of file
diff --git a/lightrag/kg/__init__.py b/lightrag/kg/__init__.py
index db81e005..ddd2bb79 100644
--- a/lightrag/kg/__init__.py
+++ b/lightrag/kg/__init__.py
@@ -1,5 +1,27 @@
-from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
+print ("init package vars here. ......")
+from .neo4j import GraphStorage as Neo4JStorage
+
+
+# import sys
+# import importlib
+# # Specify the path to the directory containing the module
+# # Add the directory to the system path
+# module_dir = '/Users/kenwiltshire/documents/dev/LightRag/lightrag/kg'
+# sys.path.append(module_dir)
+# # Specify the module name
+# module_name = 'neo4j'
+# # Import the module
+# spec = importlib.util.spec_from_file_location(module_name, f'{module_dir}/{module_name}.py')
+
+# Neo4JStorage = importlib.util.module_from_spec(spec)
+# spec.loader.exec_module(Neo4JStorage)
+
+
+
+# Relative imports are still possible by adding a leading period to the module name when using the from ... import form:
+
+# # Import names from pkg.string
+# from .string import name1, name2
+# # Import pkg.string
+# from . import string
-__version__ = "0.0.7"
-__author__ = "Zirui Guo"
-__url__ = "https://github.com/HKUDS/LightRAG"
diff --git a/lightrag/kg/neo4j.py b/lightrag/kg/neo4j.py
index 5ec5b0cc..7205d9dc 100644
--- a/lightrag/kg/neo4j.py
+++ b/lightrag/kg/neo4j.py
@@ -3,11 +3,15 @@
import os
from dataclasses import dataclass
from typing import Any, Union, cast
-import networkx as nx
import numpy as np
from nano_vectordb import NanoVectorDB
-from .utils import load_json, logger, write_json
+
+
+# import package.common.utils as utils
+
+
+from lightrag.utils import load_json, logger, write_json
from ..base import (
BaseGraphStorage
)
@@ -22,10 +26,10 @@
@dataclass
class GraphStorage(BaseGraphStorage):
@staticmethod
- def load_nx_graph(file_name) -> nx.Graph:
- if os.path.exists(file_name):
- return nx.read_graphml(file_name)
- return None
+ # def load_nx_graph(file_name) -> nx.Graph:
+ # if os.path.exists(file_name):
+ # return nx.read_graphml(file_name)
+ # return None
def __post_init__(self):
# self._graph = preloaded_graph or nx.Graph()
@@ -102,7 +106,7 @@ async def edge_degree(self, src_id: str, tgt_id: str) -> int:
result = session.run(
"""MATCH (n1:{node_label1})-[r]-(n2:{node_label2})
RETURN count(r) AS degree"""
- .format(node_label1=node_label1, node_label2=node_label2)
+ .format(entity_name__label_source=entity_name__label_source, entity_name_label_target=entity_name_label_target)
)
record = result.single()
return record["degree"]
@@ -263,7 +267,7 @@ async def _node2vec_embed(self):
with self._driver.session() as session:
#Define the Cypher query
options = self.global_config["node2vec_params"]
- query = f"""CALL gds.node2vec.stream('myGraph', {**options})
+ query = f"""CALL gds.node2vec.stream('myGraph', {options}) # **options
YIELD nodeId, embedding
RETURN nodeId, embedding"""
# Run the query and process the results
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 5137af42..28a4af19 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -1,5 +1,6 @@
import asyncio
import os
+import importlib
from dataclasses import asdict, dataclass, field
from datetime import datetime
from functools import partial
@@ -23,6 +24,11 @@
NanoVectorDBStorage,
NetworkXStorage,
)
+
+from .kg.neo4j import (
+ GraphStorage as Neo4JStorage
+)
+
from .utils import (
EmbeddingFunc,
compute_mdhash_id,
@@ -93,7 +99,14 @@ class LightRAG:
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
vector_db_storage_cls: Type[BaseVectorStorage] = NanoVectorDBStorage
vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
- graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
+
+ # module = importlib.import_module('kg.neo4j')
+ # Neo4JStorage = getattr(module, 'GraphStorage')
+
+ if True==False:
+ graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
+ else:
+ graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
enable_llm_cache: bool = True
# extension
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 4dcf535c..208b22a5 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -72,7 +72,9 @@ async def openai_complete_if_cache(
@retry(
stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=4, max=10),
+ #kw_
+ wait=wait_exponential(multiplier=1, min=4, max=60),
+ # wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
async def azure_openai_complete_if_cache(model,
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 85ba2aaa..1f22fc56 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -6,8 +6,6 @@
import networkx as nx
import numpy as np
from nano_vectordb import NanoVectorDB
-from kg.neo4j import GraphStorage
-
from .utils import load_json, logger, write_json
from .base import (
@@ -99,66 +97,14 @@ async def upsert(self, data: dict[str, dict]):
d["__vector__"] = embeddings[i]
results = self._client.upsert(datas=list_data)
return results
-
-
-@dataclass
-class PineConeVectorDBStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
-
- def __post_init__(self):
- self._client_file_name = os.path.join(
- self.global_config["working_dir"], f"vdb_{self.namespace}.json"
- )
- self._max_batch_size = self.global_config["embedding_batch_num"]
- self._client = NanoVectorDB(
- self.embedding_func.embedding_dim, storage_file=self._client_file_name
- )
- import os
- from pinecone import Pinecone
-
- pc = Pinecone() #api_key=os.environ.get('PINECONE_API_KEY'))
- # From here on, everything is identical to the REST-based SDK.
- self._client = pc.Index(host=self._client_pinecone_host)#'my-index-8833ca1.svc.us-east1-gcp.pinecone.io')
-
- self.cosine_better_than_threshold = self.global_config.get(
- "cosine_better_than_threshold", self.cosine_better_than_threshold
- )
-
- async def upsert(self, data: dict[str, dict]):
- logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
- if not len(data):
- logger.warning("You insert an empty data to vector DB")
- return []
- list_data = [
- {
- "__id__": k,
- **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},
- }
- for k, v in data.items()
- ]
- contents = [v["content"] for v in data.values()]
- batches = [
- contents[i : i + self._max_batch_size]
- for i in range(0, len(contents), self._max_batch_size)
- ]
- embeddings_list = await asyncio.gather(
- *[self.embedding_func(batch) for batch in batches]
- )
- embeddings = np.concatenate(embeddings_list)
- for i, d in enumerate(list_data):
- d["__vector__"] = embeddings[i]
- # self._client.upsert(vectors=[]) pinecone
- results = self._client.upsert(datas=list_data)
- return results
async def query(self, query: str, top_k=5):
embedding = await self.embedding_func([query])
embedding = embedding[0]
- # self._client.query(vector=[...], top_key=10) pinecone
results = self._client.query(
- vector=embedding,
+ query=embedding,
top_k=top_k,
- better_than_threshold=self.cosine_better_than_threshold, ???
+ better_than_threshold=self.cosine_better_than_threshold,
)
results = [
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
@@ -166,8 +112,7 @@ async def query(self, query: str, top_k=5):
return results
async def index_done_callback(self):
- print("self._client.save()")
- # self._client.save()
+ self._client.save()
@dataclass
@@ -298,5 +243,3 @@ async def _node2vec_embed(self):
nodes_ids = [self._graph.nodes[node_id]["id"] for node_id in nodes]
return embeddings, nodes_ids
-
-
diff --git a/requirements.txt b/requirements.txt
index 5b3396fb..897c53f8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,5 @@ torch
transformers
xxhash
pyvis
-aiohttp
\ No newline at end of file
+aiohttp
+neo4j
diff --git a/test.py b/test.py
new file mode 100644
index 00000000..4167222b
--- /dev/null
+++ b/test.py
@@ -0,0 +1,36 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
+
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+
+WORKING_DIR = "./dickens"
+
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
+ # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
+)
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybrid search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
\ No newline at end of file
From 6fe468b4f45ea46e8e7fe0d5034baf083b155467 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Mon, 28 Oct 2024 09:59:40 +0800
Subject: [PATCH 114/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d11b1691..bfdf920f 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
-
+
From b2021216e90644e515e8717fc32493b0bd17b54b Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Mon, 28 Oct 2024 15:08:41 +0800
Subject: [PATCH 115/258] Update README.md
---
README.md | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index bfdf920f..15696b57 100644
--- a/README.md
+++ b/README.md
@@ -237,7 +237,15 @@ rag.insert(["TEXT1", "TEXT2",...])
```python
# Incremental Insert: Insert new documents into an existing LightRAG instance
-rag = LightRAG(working_dir="./dickens")
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=embedding_dimension,
+ max_token_size=8192,
+ func=embedding_func,
+ ),
+)
with open("./newText.txt") as f:
rag.insert(f.read())
From 29bf41df4d26ae822bca89cc4e86cc84b44ba934 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Mon, 28 Oct 2024 17:05:38 +0200
Subject: [PATCH 116/258] Fix lint issue
---
examples/lightrag_ollama_demo.py | 5 ++---
lightrag/lightrag.py | 6 +++++-
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index 0a704024..1a320d13 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -1,14 +1,13 @@
import os
import logging
-
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
-
from lightrag import LightRAG, QueryParam
from lightrag.llm import ollama_model_complete, ollama_embedding
from lightrag.utils import EmbeddingFunc
WORKING_DIR = "./dickens"
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
+
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 955651fb..89ee1df5 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -155,7 +155,11 @@ def __post_init__(self):
)
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
- partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
+ partial(
+ self.llm_model_func,
+ hashing_kv=self.llm_response_cache,
+ **self.llm_model_kwargs,
+ )
)
def insert(self, string_or_strings):
From 51c0788faa1c65114d264a8f60cd1557531e4940 Mon Sep 17 00:00:00 2001
From: Andrii Lazarchuk
Date: Mon, 28 Oct 2024 19:05:59 +0200
Subject: [PATCH 117/258] Update README with more details
---
README.md | 46 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 15696b57..683dd0b2 100644
--- a/README.md
+++ b/README.md
@@ -163,7 +163,10 @@ rag = LightRAG(
Using Ollama Models
-* If you want to use Ollama models, you only need to set LightRAG as follows:
+### Overview
+If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`.
+
+Then you only need to set LightRAG as follows:
```python
from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -185,28 +188,59 @@ rag = LightRAG(
)
```
-* Increasing the `num_ctx` parameter:
+### Increasing context size
+In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways:
+
+#### Increasing the `num_ctx` parameter in Modelfile.
1. Pull the model:
-```python
+```bash
ollama pull qwen2
```
2. Display the model file:
-```python
+```bash
ollama show --modelfile qwen2 > Modelfile
```
3. Edit the Modelfile by adding the following line:
-```python
+```bash
PARAMETER num_ctx 32768
```
4. Create the modified model:
-```python
+```bash
ollama create -f Modelfile qwen2m
```
+#### Setup `num_ctx` via Ollama API.
+Tiy can use `llm_model_kwargs` param to configure ollama:
+
+```python
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete, # Use Ollama model for text generation
+ llm_model_name='your_model_name', # Your model name
+ llm_model_kwargs={"options": {"num_ctx": 32768}},
+ # Use Ollama embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(
+ texts,
+ embed_model="nomic-embed-text"
+ )
+ ),
+)
+```
+#### Fully functional example
+
+There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
+
+#### Low RAM GPUs
+
+In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
+
### Query Param
From b6a216e40573b130879caf851bca16d8ff259860 Mon Sep 17 00:00:00 2001
From: MrGidea <98243922+MrGidea@users.noreply.github.com>
Date: Tue, 29 Oct 2024 15:44:41 +0800
Subject: [PATCH 118/258] Update README.md
---
README.md | 866 +-----------------------------------------------------
1 file changed, 11 insertions(+), 855 deletions(-)
diff --git a/README.md b/README.md
index 683dd0b2..c7dbf6b6 100644
--- a/README.md
+++ b/README.md
@@ -1,861 +1,17 @@
-
🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
-
-This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
-![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
-
-
-## 🎉 News
-- [x] [2024.10.20]🎯🎯📢📢We’ve added a new feature to LightRAG: Graph Visualization.
-- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
-- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
-- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
-- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
-
-## Algorithm Flowchart
-
-![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3)
-
-
-## Install
-
-* Install from source (Recommend)
-
-```bash
-cd LightRAG
-pip install -e .
-```
-* Install from PyPI
-```bash
-pip install lightrag-hku
-```
-
-## Quick Start
-* [Video demo](https://www.youtube.com/watch?v=g21royNJ4fw) of running LightRAG locally.
-* All the code can be found in the `examples`.
-* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".`
-* Download the demo text "A Christmas Carol by Charles Dickens":
-```bash
-curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
-```
-Use the below Python snippet (in a script) to initialize LightRAG and perform queries:
-
-```python
-import os
-from lightrag import LightRAG, QueryParam
-from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
-
-#########
-# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
-#########
-
-WORKING_DIR = "./dickens"
-
-
-if not os.path.exists(WORKING_DIR):
- os.mkdir(WORKING_DIR)
-
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
- # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
-)
-
-with open("./book.txt") as f:
- rag.insert(f.read())
-
-# Perform naive search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
-
-# Perform local search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
-
-# Perform global search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
-
-# Perform hybrid search
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
-```
-
-
- Using Open AI-like APIs
-
-* LightRAG also supports Open AI-like chat/embeddings APIs:
-```python
-async def llm_model_func(
- prompt, system_prompt=None, history_messages=[], **kwargs
-) -> str:
- return await openai_complete_if_cache(
- "solar-mini",
- prompt,
- system_prompt=system_prompt,
- history_messages=history_messages,
- api_key=os.getenv("UPSTAGE_API_KEY"),
- base_url="https://api.upstage.ai/v1/solar",
- **kwargs
- )
-
-async def embedding_func(texts: list[str]) -> np.ndarray:
- return await openai_embedding(
- texts,
- model="solar-embedding-1-large-query",
- api_key=os.getenv("UPSTAGE_API_KEY"),
- base_url="https://api.upstage.ai/v1/solar"
- )
-
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=llm_model_func,
- embedding_func=EmbeddingFunc(
- embedding_dim=4096,
- max_token_size=8192,
- func=embedding_func
- )
-)
-```
-
-
-
- Using Hugging Face Models
-
-* If you want to use Hugging Face models, you only need to set LightRAG as follows:
-```python
-from lightrag.llm import hf_model_complete, hf_embedding
-from transformers import AutoModel, AutoTokenizer
-
-# Initialize LightRAG with Hugging Face model
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=hf_model_complete, # Use Hugging Face model for text generation
- llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face
- # Use Hugging Face embedding function
- embedding_func=EmbeddingFunc(
- embedding_dim=384,
- max_token_size=5000,
- func=lambda texts: hf_embedding(
- texts,
- tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
- embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
- )
- ),
-)
-```
-
-
-
- Using Ollama Models
-
-### Overview
-If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`.
-
-Then you only need to set LightRAG as follows:
-
-```python
-from lightrag.llm import ollama_model_complete, ollama_embedding
-
-# Initialize LightRAG with Ollama model
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=ollama_model_complete, # Use Ollama model for text generation
- llm_model_name='your_model_name', # Your model name
- # Use Ollama embedding function
- embedding_func=EmbeddingFunc(
- embedding_dim=768,
- max_token_size=8192,
- func=lambda texts: ollama_embedding(
- texts,
- embed_model="nomic-embed-text"
- )
- ),
-)
-```
-
-### Increasing context size
-In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways:
-
-#### Increasing the `num_ctx` parameter in Modelfile.
-
-1. Pull the model:
-```bash
-ollama pull qwen2
-```
-
-2. Display the model file:
-```bash
-ollama show --modelfile qwen2 > Modelfile
-```
-
-3. Edit the Modelfile by adding the following line:
+## Quick start
+* install textract
```bash
-PARAMETER num_ctx 32768
+pip install textract
```
-
-4. Create the modified model:
+*example
```bash
-ollama create -f Modelfile qwen2m
-```
-
-#### Setup `num_ctx` via Ollama API.
-Tiy can use `llm_model_kwargs` param to configure ollama:
-
-```python
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=ollama_model_complete, # Use Ollama model for text generation
- llm_model_name='your_model_name', # Your model name
- llm_model_kwargs={"options": {"num_ctx": 32768}},
- # Use Ollama embedding function
- embedding_func=EmbeddingFunc(
- embedding_dim=768,
- max_token_size=8192,
- func=lambda texts: ollama_embedding(
- texts,
- embed_model="nomic-embed-text"
- )
- ),
-)
-```
-#### Fully functional example
-
-There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
-
-#### Low RAM GPUs
-
-In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
-
-
-
-### Query Param
-
-```python
-class QueryParam:
- mode: Literal["local", "global", "hybrid", "naive"] = "global"
- only_need_context: bool = False
- response_type: str = "Multiple Paragraphs"
- # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
- top_k: int = 60
- # Number of tokens for the original chunks.
- max_token_for_text_unit: int = 4000
- # Number of tokens for the relationship descriptions
- max_token_for_global_context: int = 4000
- # Number of tokens for the entity descriptions
- max_token_for_local_context: int = 4000
-```
-
-### Batch Insert
-
-```python
-# Batch Insert: Insert multiple texts at once
-rag.insert(["TEXT1", "TEXT2",...])
-```
-
-### Incremental Insert
-
-```python
-# Incremental Insert: Insert new documents into an existing LightRAG instance
-rag = LightRAG(
- working_dir=WORKING_DIR,
- llm_model_func=llm_model_func,
- embedding_func=EmbeddingFunc(
- embedding_dim=embedding_dimension,
- max_token_size=8192,
- func=embedding_func,
- ),
-)
-
-with open("./newText.txt") as f:
- rag.insert(f.read())
-```
-
-### Graph Visualization
-
-
- Graph visualization with html
-
-* The following code can be found in `examples/graph_visual_with_html.py`
-
-```python
-import networkx as nx
-from pyvis.network import Network
-
-# Load the GraphML file
-G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
-
-# Create a Pyvis network
-net = Network(notebook=True)
-
-# Convert NetworkX graph to Pyvis network
-net.from_nx(G)
-
-# Save and display the network
-net.show('knowledge_graph.html')
-```
-
-
-
-
- Graph visualization with Neo4j
-
-* The following code can be found in `examples/graph_visual_with_neo4j.py`
-
-```python
-import os
-import json
-from lightrag.utils import xml_to_json
-from neo4j import GraphDatabase
-
-# Constants
-WORKING_DIR = "./dickens"
-BATCH_SIZE_NODES = 500
-BATCH_SIZE_EDGES = 100
-
-# Neo4j connection credentials
-NEO4J_URI = "bolt://localhost:7687"
-NEO4J_USERNAME = "neo4j"
-NEO4J_PASSWORD = "your_password"
-
-def convert_xml_to_json(xml_path, output_path):
- """Converts XML file to JSON and saves the output."""
- if not os.path.exists(xml_path):
- print(f"Error: File not found - {xml_path}")
- return None
-
- json_data = xml_to_json(xml_path)
- if json_data:
- with open(output_path, 'w', encoding='utf-8') as f:
- json.dump(json_data, f, ensure_ascii=False, indent=2)
- print(f"JSON file created: {output_path}")
- return json_data
- else:
- print("Failed to create JSON data")
- return None
-
-def process_in_batches(tx, query, data, batch_size):
- """Process data in batches and execute the given query."""
- for i in range(0, len(data), batch_size):
- batch = data[i:i + batch_size]
- tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
-
-def main():
- # Paths
- xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml')
- json_file = os.path.join(WORKING_DIR, 'graph_data.json')
-
- # Convert XML to JSON
- json_data = convert_xml_to_json(xml_file, json_file)
- if json_data is None:
- return
-
- # Load nodes and edges
- nodes = json_data.get('nodes', [])
- edges = json_data.get('edges', [])
-
- # Neo4j queries
- create_nodes_query = """
- UNWIND $nodes AS node
- MERGE (e:Entity {id: node.id})
- SET e.entity_type = node.entity_type,
- e.description = node.description,
- e.source_id = node.source_id,
- e.displayName = node.id
- REMOVE e:Entity
- WITH e, node
- CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
- RETURN count(*)
- """
-
- create_edges_query = """
- UNWIND $edges AS edge
- MATCH (source {id: edge.source})
- MATCH (target {id: edge.target})
- WITH source, target, edge,
- CASE
- WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
- WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
- WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
- WHEN edge.keywords CONTAINS 'located' THEN 'located'
- WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
- ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '')
- END AS relType
- CALL apoc.create.relationship(source, relType, {
- weight: edge.weight,
- description: edge.description,
- keywords: edge.keywords,
- source_id: edge.source_id
- }, target) YIELD rel
- RETURN count(*)
- """
-
- set_displayname_and_labels_query = """
- MATCH (n)
- SET n.displayName = n.id
- WITH n
- CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
- RETURN count(*)
- """
-
- # Create a Neo4j driver
- driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
-
- try:
- # Execute queries in batches
- with driver.session() as session:
- # Insert nodes in batches
- session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES)
-
- # Insert edges in batches
- session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES)
-
- # Set displayName and labels
- session.run(set_displayname_and_labels_query)
-
- except Exception as e:
- print(f"Error occurred: {e}")
-
- finally:
- driver.close()
-
-if __name__ == "__main__":
- main()
+import textract
+# 指定要提取文本的文件路径
+file_path = 'path/to/your/file.pdf'
+# 从文件中提取文本
+text_content = textract.process(file_path)
+# 打印提取的文本
+print(text_content.decode('utf-8'))
```
-
-## API Server Implementation
-
-LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
-
-### Setting up the API Server
-
-Click to expand setup instructions
-
-1. First, ensure you have the required dependencies:
-```bash
-pip install fastapi uvicorn pydantic
-```
-
-2. Set up your environment variables:
-```bash
-export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
-```
-
-3. Run the API server:
-```bash
-python examples/lightrag_api_openai_compatible_demo.py
-```
-
-The server will start on `http://0.0.0.0:8020`.
-
-
-### API Endpoints
-
-The API server provides the following endpoints:
-
-#### 1. Query Endpoint
-
-Click to view Query endpoint details
-
-- **URL:** `/query`
-- **Method:** POST
-- **Body:**
-```json
-{
- "query": "Your question here",
- "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/query" \
- -H "Content-Type: application/json" \
- -d '{"query": "What are the main themes?", "mode": "hybrid"}'
-```
-
-
-#### 2. Insert Text Endpoint
-
-Click to view Insert Text endpoint details
-
-- **URL:** `/insert`
-- **Method:** POST
-- **Body:**
-```json
-{
- "text": "Your text content here"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert" \
- -H "Content-Type: application/json" \
- -d '{"text": "Content to be inserted into RAG"}'
-```
-
-
-#### 3. Insert File Endpoint
-
-Click to view Insert File endpoint details
-
-- **URL:** `/insert_file`
-- **Method:** POST
-- **Body:**
-```json
-{
- "file_path": "path/to/your/file.txt"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert_file" \
- -H "Content-Type: application/json" \
- -d '{"file_path": "./book.txt"}'
-```
-
-
-#### 4. Health Check Endpoint
-
-Click to view Health Check endpoint details
-
-- **URL:** `/health`
-- **Method:** GET
-- **Example:**
-```bash
-curl -X GET "http://127.0.0.1:8020/health"
-```
-
-
-### Configuration
-
-The API server can be configured using environment variables:
-- `RAG_DIR`: Directory for storing the RAG index (default: "index_default")
-- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers
-
-### Error Handling
-
-Click to view error handling details
-
-The API includes comprehensive error handling:
-- File not found errors (404)
-- Processing errors (500)
-- Supports multiple file encodings (UTF-8 and GBK)
-
-
-## Evaluation
-### Dataset
-The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
-
-### Generate Query
-LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`.
-
-
- Prompt
-
-```python
-Given the following description of a dataset:
-
-{description}
-
-Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
-
-Output the results in the following structure:
-- User 1: [user description]
- - Task 1: [task description]
- - Question 1:
- - Question 2:
- - Question 3:
- - Question 4:
- - Question 5:
- - Task 2: [task description]
- ...
- - Task 5: [task description]
-- User 2: [user description]
- ...
-- User 5: [user description]
- ...
-```
-
-
- ### Batch Eval
-To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
-
-
- Prompt
-
-```python
----Role---
-You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
----Goal---
-You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
-
-- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
-- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
-- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
-
-For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
-
-Here is the question:
-{query}
-
-Here are the two answers:
-
-**Answer 1:**
-{answer1}
-
-**Answer 2:**
-{answer2}
-
-Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
-
-Output your evaluation in the following JSON format:
-
-{{
- "Comprehensiveness": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Provide explanation here]"
- }},
- "Empowerment": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Provide explanation here]"
- }},
- "Overall Winner": {{
- "Winner": "[Answer 1 or Answer 2]",
- "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
- }}
-}}
-```
-
-
-### Overall Performance Table
-| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
-|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
-| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
-| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** |
-| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** |
-| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** |
-| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** |
-| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** |
-| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** |
-| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** |
-| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** |
-| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** |
-| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** |
-| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** |
-| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** |
-| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** |
-| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** |
-| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** |
-| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% |
-| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** |
-| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
-| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
-
-## Reproduce
-All the code can be found in the `./reproduce` directory.
-
-### Step-0 Extract Unique Contexts
-First, we need to extract unique contexts in the datasets.
-
-
- Code
-
-```python
-def extract_unique_contexts(input_directory, output_directory):
-
- os.makedirs(output_directory, exist_ok=True)
-
- jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
- print(f"Found {len(jsonl_files)} JSONL files.")
-
- for file_path in jsonl_files:
- filename = os.path.basename(file_path)
- name, ext = os.path.splitext(filename)
- output_filename = f"{name}_unique_contexts.json"
- output_path = os.path.join(output_directory, output_filename)
-
- unique_contexts_dict = {}
-
- print(f"Processing file: {filename}")
-
- try:
- with open(file_path, 'r', encoding='utf-8') as infile:
- for line_number, line in enumerate(infile, start=1):
- line = line.strip()
- if not line:
- continue
- try:
- json_obj = json.loads(line)
- context = json_obj.get('context')
- if context and context not in unique_contexts_dict:
- unique_contexts_dict[context] = None
- except json.JSONDecodeError as e:
- print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
- except FileNotFoundError:
- print(f"File not found: {filename}")
- continue
- except Exception as e:
- print(f"An error occurred while processing file {filename}: {e}")
- continue
-
- unique_contexts_list = list(unique_contexts_dict.keys())
- print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
-
- try:
- with open(output_path, 'w', encoding='utf-8') as outfile:
- json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
- print(f"Unique `context` entries have been saved to: {output_filename}")
- except Exception as e:
- print(f"An error occurred while saving to the file {output_filename}: {e}")
-
- print("All files have been processed.")
-
-```
-
-
-### Step-1 Insert Contexts
-For the extracted contexts, we insert them into the LightRAG system.
-
-
- Code
-
-```python
-def insert_text(rag, file_path):
- with open(file_path, mode='r') as f:
- unique_contexts = json.load(f)
-
- retries = 0
- max_retries = 3
- while retries < max_retries:
- try:
- rag.insert(unique_contexts)
- break
- except Exception as e:
- retries += 1
- print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
- time.sleep(10)
- if retries == max_retries:
- print("Insertion failed after exceeding the maximum number of retries")
-```
-
-
-### Step-2 Generate Queries
-
-We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries.
-
-
- Code
-
-```python
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-
-def get_summary(context, tot_tokens=2000):
- tokens = tokenizer.tokenize(context)
- half_tokens = tot_tokens // 2
-
- start_tokens = tokens[1000:1000 + half_tokens]
- end_tokens = tokens[-(1000 + half_tokens):1000]
-
- summary_tokens = start_tokens + end_tokens
- summary = tokenizer.convert_tokens_to_string(summary_tokens)
-
- return summary
-```
-
-
-### Step-3 Query
-For the queries generated in Step-2, we will extract them and query LightRAG.
-
-
- Code
-
-```python
-def extract_queries(file_path):
- with open(file_path, 'r') as f:
- data = f.read()
-
- data = data.replace('**', '')
-
- queries = re.findall(r'- Question \d+: (.+)', data)
-
- return queries
-```
-
-
-## Code Structure
-
-```python
-.
-├── examples
-│ ├── batch_eval.py
-│ ├── graph_visual_with_html.py
-│ ├── graph_visual_with_neo4j.py
-│ ├── generate_query.py
-│ ├── lightrag_azure_openai_demo.py
-│ ├── lightrag_bedrock_demo.py
-│ ├── lightrag_hf_demo.py
-│ ├── lightrag_ollama_demo.py
-│ ├── lightrag_openai_compatible_demo.py
-│ ├── lightrag_openai_demo.py
-│ ├── lightrag_siliconcloud_demo.py
-│ └── vram_management_demo.py
-├── lightrag
-│ ├── __init__.py
-│ ├── base.py
-│ ├── lightrag.py
-│ ├── llm.py
-│ ├── operate.py
-│ ├── prompt.py
-│ ├── storage.py
-│ └── utils.py
-├── reproduce
-│ ├── Step_0.py
-│ ├── Step_1.py
-│ ├── Step_2.py
-│ └── Step_3.py
-├── .gitignore
-├── .pre-commit-config.yaml
-├── LICENSE
-├── README.md
-├── requirements.txt
-└── setup.py
-```
-
-## Star History
-
-
-
-
-
-## Citation
-
-```python
-@article{guo2024lightrag,
-title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
-author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
-year={2024},
-eprint={2410.05779},
-archivePrefix={arXiv},
-primaryClass={cs.IR}
-}
-```
From 2294e401c706aba713e9adcc5fc0039b4a766013 Mon Sep 17 00:00:00 2001
From: MrGidea <98243922+MrGidea@users.noreply.github.com>
Date: Tue, 29 Oct 2024 15:45:00 +0800
Subject: [PATCH 119/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index c7dbf6b6..7f133490 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
```bash
pip install textract
```
-*example
+* example
```bash
import textract
# 指定要提取文本的文件路径
From 12a85bc4a3b6fa338e93823582ac2f8af4652413 Mon Sep 17 00:00:00 2001
From: MrGidea <98243922+MrGidea@users.noreply.github.com>
Date: Tue, 29 Oct 2024 15:46:48 +0800
Subject: [PATCH 120/258] Update README.md
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 7f133490..921b44d8 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
## Quick start
+Currently, the test supports pptx, pdf, csv, word, txt file types
* install textract
```bash
pip install textract
From 9fd2add75535f41c12d6a7aabca0d0e940180d8d Mon Sep 17 00:00:00 2001
From: MrGidea <98243922+MrGidea@users.noreply.github.com>
Date: Tue, 29 Oct 2024 15:47:17 +0800
Subject: [PATCH 121/258] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 921b44d8..660c7b27 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
## Quick start
-Currently, the test supports pptx, pdf, csv, word, txt file types
+Currently, the test supports pptx, pdf, csv, docx, txt file types
* install textract
```bash
pip install textract
From 3e99d3f0e740e378db2fbc99b6728b697f44710b Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Tue, 29 Oct 2024 16:16:11 +0800
Subject: [PATCH 122/258] Update README.md
---
README.md | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 683dd0b2..870c2d6f 100644
--- a/README.md
+++ b/README.md
@@ -804,12 +804,14 @@ def extract_queries(file_path):
.
├── examples
│ ├── batch_eval.py
+│ ├── generate_query.py
│ ├── graph_visual_with_html.py
│ ├── graph_visual_with_neo4j.py
-│ ├── generate_query.py
+│ ├── lightrag_api_openai_compatible_demo.py
│ ├── lightrag_azure_openai_demo.py
│ ├── lightrag_bedrock_demo.py
│ ├── lightrag_hf_demo.py
+│ ├── lightrag_lmdeploy_demo.py
│ ├── lightrag_ollama_demo.py
│ ├── lightrag_openai_compatible_demo.py
│ ├── lightrag_openai_demo.py
@@ -826,8 +828,10 @@ def extract_queries(file_path):
│ └── utils.py
├── reproduce
│ ├── Step_0.py
+│ ├── Step_1_openai_compatible.py
│ ├── Step_1.py
│ ├── Step_2.py
+│ ├── Step_3_openai_compatible.py
│ └── Step_3.py
├── .gitignore
├── .pre-commit-config.yaml
From 31c61bd98a0e26200c2fc8a2d5c1ae9924300c2f Mon Sep 17 00:00:00 2001
From: MrGidea <98243922+MrGidea@users.noreply.github.com>
Date: Tue, 29 Oct 2024 16:36:04 +0800
Subject: [PATCH 123/258] Update README.md
---
README.md | 884 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 874 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index 660c7b27..acfd7c96 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,882 @@
-## Quick start
-Currently, the test supports pptx, pdf, csv, docx, txt file types
-* install textract
+
🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
+
+This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
+![请添加图片描述](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
+
+
+## 🎉 News
+- [x] [2024.10.29]🎯🎯📢📢Multi-file types are now supported by `textract`.
+- [x] [2024.10.20]🎯🎯📢📢We’ve added a new feature to LightRAG: Graph Visualization.
+- [x] [2024.10.18]🎯🎯📢📢We’ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
+- [x] [2024.10.17]🎯🎯📢📢We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! 🎉🎉
+- [x] [2024.10.16]🎯🎯📢📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+- [x] [2024.10.15]🎯🎯📢📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
+
+## Algorithm Flowchart
+
+![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3)
+
+
+## Install
+
+* Install from source (Recommend)
+
+```bash
+cd LightRAG
+pip install -e .
+```
+* Install from PyPI
+```bash
+pip install lightrag-hku
+```
+
+## Quick Start
+* [Video demo](https://www.youtube.com/watch?v=g21royNJ4fw) of running LightRAG locally.
+* All the code can be found in the `examples`.
+* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".`
+* Download the demo text "A Christmas Carol by Charles Dickens":
+```bash
+curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
+```
+Use the below Python snippet (in a script) to initialize LightRAG and perform queries:
+
+```python
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
+
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+
+WORKING_DIR = "./dickens"
+
+
+if not os.path.exists(WORKING_DIR):
+ os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model
+ # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
+)
+
+with open("./book.txt") as f:
+ rag.insert(f.read())
+
+# Perform naive search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+# Perform local search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+# Perform global search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+# Perform hybrid search
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
+```
+
+
+ Using Open AI-like APIs
+
+* LightRAG also supports Open AI-like chat/embeddings APIs:
+```python
+async def llm_model_func(
+ prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+ return await openai_complete_if_cache(
+ "solar-mini",
+ prompt,
+ system_prompt=system_prompt,
+ history_messages=history_messages,
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar",
+ **kwargs
+ )
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+ return await openai_embedding(
+ texts,
+ model="solar-embedding-1-large-query",
+ api_key=os.getenv("UPSTAGE_API_KEY"),
+ base_url="https://api.upstage.ai/v1/solar"
+ )
+
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=4096,
+ max_token_size=8192,
+ func=embedding_func
+ )
+)
+```
+
+
+
+ Using Hugging Face Models
+
+* If you want to use Hugging Face models, you only need to set LightRAG as follows:
+```python
+from lightrag.llm import hf_model_complete, hf_embedding
+from transformers import AutoModel, AutoTokenizer
+
+# Initialize LightRAG with Hugging Face model
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=hf_model_complete, # Use Hugging Face model for text generation
+ llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face
+ # Use Hugging Face embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=384,
+ max_token_size=5000,
+ func=lambda texts: hf_embedding(
+ texts,
+ tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
+ embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+ )
+ ),
+)
+```
+
+
+
+ Using Ollama Models
+
+### Overview
+If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`.
+
+Then you only need to set LightRAG as follows:
+
+```python
+from lightrag.llm import ollama_model_complete, ollama_embedding
+
+# Initialize LightRAG with Ollama model
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete, # Use Ollama model for text generation
+ llm_model_name='your_model_name', # Your model name
+ # Use Ollama embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(
+ texts,
+ embed_model="nomic-embed-text"
+ )
+ ),
+)
+```
+
+### Increasing context size
+In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways:
+
+#### Increasing the `num_ctx` parameter in Modelfile.
+
+1. Pull the model:
+```bash
+ollama pull qwen2
+```
+
+2. Display the model file:
+```bash
+ollama show --modelfile qwen2 > Modelfile
+```
+
+3. Edit the Modelfile by adding the following line:
```bash
-pip install textract
+PARAMETER num_ctx 32768
```
-* example
+
+4. Create the modified model:
```bash
+ollama create -f Modelfile qwen2m
+```
+
+#### Setup `num_ctx` via Ollama API.
+Tiy can use `llm_model_kwargs` param to configure ollama:
+
+```python
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=ollama_model_complete, # Use Ollama model for text generation
+ llm_model_name='your_model_name', # Your model name
+ llm_model_kwargs={"options": {"num_ctx": 32768}},
+ # Use Ollama embedding function
+ embedding_func=EmbeddingFunc(
+ embedding_dim=768,
+ max_token_size=8192,
+ func=lambda texts: ollama_embedding(
+ texts,
+ embed_model="nomic-embed-text"
+ )
+ ),
+)
+```
+#### Fully functional example
+
+There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
+
+#### Low RAM GPUs
+
+In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
+
+
+
+### Query Param
+
+```python
+class QueryParam:
+ mode: Literal["local", "global", "hybrid", "naive"] = "global"
+ only_need_context: bool = False
+ response_type: str = "Multiple Paragraphs"
+ # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
+ top_k: int = 60
+ # Number of tokens for the original chunks.
+ max_token_for_text_unit: int = 4000
+ # Number of tokens for the relationship descriptions
+ max_token_for_global_context: int = 4000
+ # Number of tokens for the entity descriptions
+ max_token_for_local_context: int = 4000
+```
+
+### Batch Insert
+
+```python
+# Batch Insert: Insert multiple texts at once
+rag.insert(["TEXT1", "TEXT2",...])
+```
+
+### Incremental Insert
+
+```python
+# Incremental Insert: Insert new documents into an existing LightRAG instance
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=embedding_dimension,
+ max_token_size=8192,
+ func=embedding_func,
+ ),
+)
+
+with open("./newText.txt") as f:
+ rag.insert(f.read())
+```
+
+### Multi-file Type Support
+
+The `testract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
+
+```python
import textract
-# 指定要提取文本的文件路径
-file_path = 'path/to/your/file.pdf'
-# 从文件中提取文本
+
+file_path = 'TEXT.pdf'
text_content = textract.process(file_path)
-# 打印提取的文本
-print(text_content.decode('utf-8'))
+
+rag.insert(text_content.decode('utf-8'))
+```
+
+### Graph Visualization
+
+
+ Graph visualization with html
+
+* The following code can be found in `examples/graph_visual_with_html.py`
+
+```python
+import networkx as nx
+from pyvis.network import Network
+
+# Load the GraphML file
+G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml')
+
+# Create a Pyvis network
+net = Network(notebook=True)
+
+# Convert NetworkX graph to Pyvis network
+net.from_nx(G)
+
+# Save and display the network
+net.show('knowledge_graph.html')
+```
+
+
+
+
+ Graph visualization with Neo4j
+
+* The following code can be found in `examples/graph_visual_with_neo4j.py`
+
+```python
+import os
+import json
+from lightrag.utils import xml_to_json
+from neo4j import GraphDatabase
+
+# Constants
+WORKING_DIR = "./dickens"
+BATCH_SIZE_NODES = 500
+BATCH_SIZE_EDGES = 100
+
+# Neo4j connection credentials
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USERNAME = "neo4j"
+NEO4J_PASSWORD = "your_password"
+
+def convert_xml_to_json(xml_path, output_path):
+ """Converts XML file to JSON and saves the output."""
+ if not os.path.exists(xml_path):
+ print(f"Error: File not found - {xml_path}")
+ return None
+
+ json_data = xml_to_json(xml_path)
+ if json_data:
+ with open(output_path, 'w', encoding='utf-8') as f:
+ json.dump(json_data, f, ensure_ascii=False, indent=2)
+ print(f"JSON file created: {output_path}")
+ return json_data
+ else:
+ print("Failed to create JSON data")
+ return None
+
+def process_in_batches(tx, query, data, batch_size):
+ """Process data in batches and execute the given query."""
+ for i in range(0, len(data), batch_size):
+ batch = data[i:i + batch_size]
+ tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
+
+def main():
+ # Paths
+ xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml')
+ json_file = os.path.join(WORKING_DIR, 'graph_data.json')
+
+ # Convert XML to JSON
+ json_data = convert_xml_to_json(xml_file, json_file)
+ if json_data is None:
+ return
+
+ # Load nodes and edges
+ nodes = json_data.get('nodes', [])
+ edges = json_data.get('edges', [])
+
+ # Neo4j queries
+ create_nodes_query = """
+ UNWIND $nodes AS node
+ MERGE (e:Entity {id: node.id})
+ SET e.entity_type = node.entity_type,
+ e.description = node.description,
+ e.source_id = node.source_id,
+ e.displayName = node.id
+ REMOVE e:Entity
+ WITH e, node
+ CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode
+ RETURN count(*)
+ """
+
+ create_edges_query = """
+ UNWIND $edges AS edge
+ MATCH (source {id: edge.source})
+ MATCH (target {id: edge.target})
+ WITH source, target, edge,
+ CASE
+ WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
+ WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
+ WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
+ WHEN edge.keywords CONTAINS 'located' THEN 'located'
+ WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
+ ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '')
+ END AS relType
+ CALL apoc.create.relationship(source, relType, {
+ weight: edge.weight,
+ description: edge.description,
+ keywords: edge.keywords,
+ source_id: edge.source_id
+ }, target) YIELD rel
+ RETURN count(*)
+ """
+
+ set_displayname_and_labels_query = """
+ MATCH (n)
+ SET n.displayName = n.id
+ WITH n
+ CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
+ RETURN count(*)
+ """
+
+ # Create a Neo4j driver
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
+
+ try:
+ # Execute queries in batches
+ with driver.session() as session:
+ # Insert nodes in batches
+ session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES)
+
+ # Insert edges in batches
+ session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES)
+
+ # Set displayName and labels
+ session.run(set_displayname_and_labels_query)
+
+ except Exception as e:
+ print(f"Error occurred: {e}")
+
+ finally:
+ driver.close()
+
+if __name__ == "__main__":
+ main()
+```
+
+
+
+## API Server Implementation
+
+LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
+
+### Setting up the API Server
+
+Click to expand setup instructions
+
+1. First, ensure you have the required dependencies:
+```bash
+pip install fastapi uvicorn pydantic
+```
+
+2. Set up your environment variables:
+```bash
+export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
+```
+
+3. Run the API server:
+```bash
+python examples/lightrag_api_openai_compatible_demo.py
+```
+
+The server will start on `http://0.0.0.0:8020`.
+
+
+### API Endpoints
+
+The API server provides the following endpoints:
+
+#### 1. Query Endpoint
+
+Click to view Query endpoint details
+
+- **URL:** `/query`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "query": "Your question here",
+ "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/query" \
+ -H "Content-Type: application/json" \
+ -d '{"query": "What are the main themes?", "mode": "hybrid"}'
+```
+
+
+#### 2. Insert Text Endpoint
+
+Click to view Insert Text endpoint details
+
+- **URL:** `/insert`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "text": "Your text content here"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/insert" \
+ -H "Content-Type: application/json" \
+ -d '{"text": "Content to be inserted into RAG"}'
+```
+
+
+#### 3. Insert File Endpoint
+
+Click to view Insert File endpoint details
+
+- **URL:** `/insert_file`
+- **Method:** POST
+- **Body:**
+```json
+{
+ "file_path": "path/to/your/file.txt"
+}
+```
+- **Example:**
+```bash
+curl -X POST "http://127.0.0.1:8020/insert_file" \
+ -H "Content-Type: application/json" \
+ -d '{"file_path": "./book.txt"}'
+```
+
+
+#### 4. Health Check Endpoint
+
+Click to view Health Check endpoint details
+
+- **URL:** `/health`
+- **Method:** GET
+- **Example:**
+```bash
+curl -X GET "http://127.0.0.1:8020/health"
+```
+
+
+### Configuration
+
+The API server can be configured using environment variables:
+- `RAG_DIR`: Directory for storing the RAG index (default: "index_default")
+- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers
+
+### Error Handling
+
+Click to view error handling details
+
+The API includes comprehensive error handling:
+- File not found errors (404)
+- Processing errors (500)
+- Supports multiple file encodings (UTF-8 and GBK)
+
+
+## Evaluation
+### Dataset
+The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
+
+### Generate Query
+LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`.
+
+
+ Prompt
+
+```python
+Given the following description of a dataset:
+
+{description}
+
+Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+Output the results in the following structure:
+- User 1: [user description]
+ - Task 1: [task description]
+ - Question 1:
+ - Question 2:
+ - Question 3:
+ - Question 4:
+ - Question 5:
+ - Task 2: [task description]
+ ...
+ - Task 5: [task description]
+- User 2: [user description]
+ ...
+- User 5: [user description]
+ ...
+```
+
+
+ ### Batch Eval
+To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`.
+
+
+ Prompt
+
+```python
+---Role---
+You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+---Goal---
+You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+
+- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
+- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
+- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
+
+For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
+
+Here is the question:
+{query}
+
+Here are the two answers:
+
+**Answer 1:**
+{answer1}
+
+**Answer 2:**
+{answer2}
+
+Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
+
+Output your evaluation in the following JSON format:
+
+{{
+ "Comprehensiveness": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Empowerment": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Provide explanation here]"
+ }},
+ "Overall Winner": {{
+ "Winner": "[Answer 1 or Answer 2]",
+ "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
+ }}
+}}
+```
+
+
+### Overall Performance Table
+| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | |
+|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
+| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** |
+| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** |
+| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** |
+| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** |
+| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** |
+| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** |
+| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** |
+| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** |
+| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** |
+| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** |
+| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** |
+| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** |
+| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** |
+| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** |
+| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** |
+| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** |
+| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% |
+| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** |
+| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% |
+| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% |
+
+## Reproduce
+All the code can be found in the `./reproduce` directory.
+
+### Step-0 Extract Unique Contexts
+First, we need to extract unique contexts in the datasets.
+
+
+ Code
+
+```python
+def extract_unique_contexts(input_directory, output_directory):
+
+ os.makedirs(output_directory, exist_ok=True)
+
+ jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
+ print(f"Found {len(jsonl_files)} JSONL files.")
+
+ for file_path in jsonl_files:
+ filename = os.path.basename(file_path)
+ name, ext = os.path.splitext(filename)
+ output_filename = f"{name}_unique_contexts.json"
+ output_path = os.path.join(output_directory, output_filename)
+
+ unique_contexts_dict = {}
+
+ print(f"Processing file: {filename}")
+
+ try:
+ with open(file_path, 'r', encoding='utf-8') as infile:
+ for line_number, line in enumerate(infile, start=1):
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ json_obj = json.loads(line)
+ context = json_obj.get('context')
+ if context and context not in unique_contexts_dict:
+ unique_contexts_dict[context] = None
+ except json.JSONDecodeError as e:
+ print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
+ except FileNotFoundError:
+ print(f"File not found: {filename}")
+ continue
+ except Exception as e:
+ print(f"An error occurred while processing file {filename}: {e}")
+ continue
+
+ unique_contexts_list = list(unique_contexts_dict.keys())
+ print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
+
+ try:
+ with open(output_path, 'w', encoding='utf-8') as outfile:
+ json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
+ print(f"Unique `context` entries have been saved to: {output_filename}")
+ except Exception as e:
+ print(f"An error occurred while saving to the file {output_filename}: {e}")
+
+ print("All files have been processed.")
+
+```
+
+
+### Step-1 Insert Contexts
+For the extracted contexts, we insert them into the LightRAG system.
+
+
+ Code
+
+```python
+def insert_text(rag, file_path):
+ with open(file_path, mode='r') as f:
+ unique_contexts = json.load(f)
+
+ retries = 0
+ max_retries = 3
+ while retries < max_retries:
+ try:
+ rag.insert(unique_contexts)
+ break
+ except Exception as e:
+ retries += 1
+ print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
+ time.sleep(10)
+ if retries == max_retries:
+ print("Insertion failed after exceeding the maximum number of retries")
+```
+
+
+### Step-2 Generate Queries
+
+We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries.
+
+
+ Code
+
+```python
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+
+def get_summary(context, tot_tokens=2000):
+ tokens = tokenizer.tokenize(context)
+ half_tokens = tot_tokens // 2
+
+ start_tokens = tokens[1000:1000 + half_tokens]
+ end_tokens = tokens[-(1000 + half_tokens):1000]
+
+ summary_tokens = start_tokens + end_tokens
+ summary = tokenizer.convert_tokens_to_string(summary_tokens)
+
+ return summary
```
+
+
+### Step-3 Query
+For the queries generated in Step-2, we will extract them and query LightRAG.
+
+
+ Code
+
+```python
+def extract_queries(file_path):
+ with open(file_path, 'r') as f:
+ data = f.read()
+
+ data = data.replace('**', '')
+
+ queries = re.findall(r'- Question \d+: (.+)', data)
+
+ return queries
+```
+
+
+## Code Structure
+
+```python
+.
+├── examples
+│ ├── batch_eval.py
+│ ├── generate_query.py
+│ ├── graph_visual_with_html.py
+│ ├── graph_visual_with_neo4j.py
+│ ├── lightrag_api_openai_compatible_demo.py
+│ ├── lightrag_azure_openai_demo.py
+│ ├── lightrag_bedrock_demo.py
+│ ├── lightrag_hf_demo.py
+│ ├── lightrag_lmdeploy_demo.py
+│ ├── lightrag_ollama_demo.py
+│ ├── lightrag_openai_compatible_demo.py
+│ ├── lightrag_openai_demo.py
+│ ├── lightrag_siliconcloud_demo.py
+│ └── vram_management_demo.py
+├── lightrag
+│ ├── __init__.py
+│ ├── base.py
+│ ├── lightrag.py
+│ ├── llm.py
+│ ├── operate.py
+│ ├── prompt.py
+│ ├── storage.py
+│ └── utils.py
+├── reproduce
+│ ├── Step_0.py
+│ ├── Step_1_openai_compatible.py
+│ ├── Step_1.py
+│ ├── Step_2.py
+│ ├── Step_3_openai_compatible.py
+│ └── Step_3.py
+├── .gitignore
+├── .pre-commit-config.yaml
+├── LICENSE
+├── README.md
+├── requirements.txt
+└── setup.py
+```
+
+## Star History
+
+
+
+
+
+## Citation
+
+```python
+@article{guo2024lightrag,
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
+year={2024},
+eprint={2410.05779},
+archivePrefix={arXiv},
+primaryClass={cs.IR}
+}
+```
+
From 4761a736fd3dd0ba1e4512768dc53de7918a855c Mon Sep 17 00:00:00 2001
From: Zhenyu Pan <120090196@link.cuhk.edu.cn>
Date: Tue, 29 Oct 2024 23:29:47 +0800
Subject: [PATCH 124/258] [hotfix-#163] Fix asynchronous problem
---
examples/lightrag_openai_compatible_demo.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py
index 2470fc00..1422e2c2 100644
--- a/examples/lightrag_openai_compatible_demo.py
+++ b/examples/lightrag_openai_compatible_demo.py
@@ -69,25 +69,25 @@ async def main():
)
with open("./book.txt", "r", encoding="utf-8") as f:
- rag.insert(f.read())
+ await rag.ainsert(f.read())
# Perform naive search
print(
- rag.query(
+ await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="naive")
)
)
# Perform local search
print(
- rag.query(
+ await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="local")
)
)
# Perform global search
print(
- rag.query(
+ await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="global"),
)
@@ -95,7 +95,7 @@ async def main():
# Perform hybrid search
print(
- rag.query(
+ await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="hybrid"),
)
From 7364735df2fee1ae56bdb91acb0bc90038b8cae8 Mon Sep 17 00:00:00 2001
From: Ken Wiltshire
Date: Tue, 29 Oct 2024 15:36:07 -0400
Subject: [PATCH 125/258] edge degree next almost done
---
.DS_Store | Bin 0 -> 8196 bytes
lightrag/kg/neo4j.py | 374 +-
lightrag/lightrag.py | 2 +-
lightrag/llm.py | 2 +-
neo4jWorkDir/kv_store_full_docs.json | 5 +
neo4jWorkDir/kv_store_llm_response_cache.json | 354 +
neo4jWorkDir/kv_store_text_chunks.json | 254 +
neo4jWorkDir/lightrag.log | 10202 ++++++++++++++++
neo4jWorkDir/vdb_chunks.json | 1 +
neo4jWorkDir/vdb_entities.json | 1 +
neo4jWorkDir/vdb_relationships.json | 1 +
testkg.py | 36 +
12 files changed, 11101 insertions(+), 131 deletions(-)
create mode 100644 .DS_Store
create mode 100644 neo4jWorkDir/kv_store_full_docs.json
create mode 100644 neo4jWorkDir/kv_store_llm_response_cache.json
create mode 100644 neo4jWorkDir/kv_store_text_chunks.json
create mode 100644 neo4jWorkDir/lightrag.log
create mode 100644 neo4jWorkDir/vdb_chunks.json
create mode 100644 neo4jWorkDir/vdb_entities.json
create mode 100644 neo4jWorkDir/vdb_relationships.json
create mode 100644 testkg.py
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..651e36edce1916e150c6c409938c130c393b622b
GIT binary patch
literal 8196
zcmeHMTWl0n82u@F6llA%T(YtT5LVq5sg_1q+AWq_irwBp*V&zcPMFS=o!MQa
zsWvgj#0y@ckD3@0AB+-sFd-TrOuXQW8YD!c4@P-1QC>9aoBugymP-o{B*sgalbrLP
z|28xKKi_}Oo>>6E?y}hi5CZ^}I;Ywq8m`f}p7(i82_^MJk^BKHaKSHB{i%X8&pHxB
z8Hh3vWgyBxlz}J%|Ah?Dp3R$joqb15pO%XMmSa1DIe#5e{E@e)kHbIbE1?Y(Ge+&!bXTzhGg5+Mq5{AIeR5
z)m&f%Wxv}f?dRN}=Q)Mi+$ap&RyD6J@A2HguwAPp9Bk7flLI5RYkJjguk4z>@Xbku
z%2bxuV$;)`+B#E-dp4zKQin~Z>3UDgKrmbkpm9!M&&-d?
z@hd$AheuV{Qj6;AT2zcJ(uYSz$MTw1_N@WSF#@|{WsSh{^V$-h0^C{j+IK?sLo_ML0Ib&1x*ma%cliyPPnZL2h9_(jV#ZTHYn)i#5Pk&5lx
z1;-k6jY*3&VUC?^X^l1NaeZY?B@Hh-s#;PVEf?$~WjlCO++($#UJ0dxE>NV}a<4Tc`gx;^JWBh{KP17>ZHMYiD9vslgL$lqAO&F$EAfc?C
zRaG^a*&v}LGMmh#CAWMIzW8pXLkhOS4j8A(aiI#&!gFvOUWGT|6ub`~!e{U~d;w?S
zEBFpBz)$ci{06_nABcDhwqgw9xB_p-G`3+oc3}p$V-IF=FAiW1hj1A4XyHMeKpRVV
z45x7hAH%2c1$+@-##it)d5BMWqz@PCKyoi4)iqfpKDltV@
z;!09kuh7>%-jrI+j7z>YD%~`jvl39v(3d=3Gs=-`)l7WH1AROHm74ixv8a-z%a%8<
zPOiVRy`w902~84yHN;(sV*%c)ATPix-t`>Raro$QwsKXio-S9CtgInIy>(RO(A643
z>)Y0-gnDg3$gW*S>~3wLWaArCDpi=K2$sduZH($js|(qUn~6oqu+A-vDoU#l*{$0c
zFLDh8ACLEB=}Kp`zX7hV!&`6?PQyn8*Yof#d=D21qCgPUutmUh1+K!ixDMCj229~*
z?7(}mlK`8=Ufh9wxQpOANPr!|F)UyaO#-clQ+Nb@EE8lO$60&=pTy(%Jc0M65WH{T
z+Y)$};JB*>L`m_3`Eab{O0MI%he%Tc@zqWdcS9Mdlh!eQ{_nm1_x~HZW@H>?Aj-i1
zD+5@c&1L!af4x;zAzgy{Bh-20cC(TS7aF(;my6>#(Mx|A(mWCBG9jInlr+@-@ecvf
R{(r^K)oA}0AiN2OzW_aM8~y+Q
literal 0
HcmV?d00001
diff --git a/lightrag/kg/neo4j.py b/lightrag/kg/neo4j.py
index 7205d9dc..374d9352 100644
--- a/lightrag/kg/neo4j.py
+++ b/lightrag/kg/neo4j.py
@@ -5,6 +5,8 @@
from typing import Any, Union, cast
import numpy as np
from nano_vectordb import NanoVectorDB
+import inspect
+
@@ -26,14 +28,12 @@
@dataclass
class GraphStorage(BaseGraphStorage):
@staticmethod
- # def load_nx_graph(file_name) -> nx.Graph:
- # if os.path.exists(file_name):
- # return nx.read_graphml(file_name)
- # return None
+ def load_nx_graph(file_name):
+ print ("no preloading of graph with neo4j in production")
def __post_init__(self):
# self._graph = preloaded_graph or nx.Graph()
- self._driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
+ self._driver = GraphDatabase.driver("neo4j+s://91fbae6c.databases.neo4j.io", auth=("neo4j", "KWKPXfXcClDbUlmDdGgIQhU5mL1N4E_2CJp2BDFbEbw"))
self._node_embed_algorithms = {
"node2vec": self._node2vec_embed,
}
@@ -41,79 +41,111 @@ def __post_init__(self):
async def index_done_callback(self):
print ("KG successfully indexed.")
async def has_node(self, node_id: str) -> bool:
- entity_name_label = node_id
- with self._driver.session() as session:
- return session.read_transaction(self._check_node_exists, entity_name_label)
+ entity_name_label = node_id.strip('\"')
- @staticmethod
def _check_node_exists(tx, label):
- query = f"MATCH (n:{label}) RETURN count(n) > 0 AS node_exists"
+ query = f"MATCH (n:`{label}`) RETURN count(n) > 0 AS node_exists"
result = tx.run(query)
- return result.single()["node_exists"]
+ single_result = result.single()
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result["node_exists"]}'
+ )
+
+ return single_result["node_exists"]
- async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
- entity_name_label_source = source_node_id
- entity_name_label_target = target_node_id
- #hard code relaitionship type
with self._driver.session() as session:
- result = session.read_transaction(self._check_edge_existence, entity_name_label_source, entity_name_label_target)
- return result
+ return session.read_transaction(_check_node_exists, entity_name_label)
+
+
+ async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
+ entity_name_label_source = source_node_id.strip('\"')
+ entity_name_label_target = target_node_id.strip('\"')
+
- @staticmethod
def _check_edge_existence(tx, label1, label2):
query = (
- f"MATCH (a:{label1})-[r]-(b:{label2}) "
+ f"MATCH (a:`{label1}`)-[r]-(b:`{label2}`) "
"RETURN COUNT(r) > 0 AS edgeExists"
)
result = tx.run(query)
- return result.single()["edgeExists"]
+ single_result = result.single()
+ # if result.single() == None:
+ # print (f"this should not happen: ---- {label1}/{label2} {query}")
+
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result["edgeExists"]}'
+ )
+
+ return single_result["edgeExists"]
def close(self):
- self._driver.close()
+ self._driver.close()
+ #hard code relaitionship type
+ with self._driver.session() as session:
+ result = session.read_transaction(_check_edge_existence, entity_name_label_source, entity_name_label_target)
+ return result
async def get_node(self, node_id: str) -> Union[dict, None]:
- entity_name_label = node_id
+ entity_name_label = node_id.strip('\"')
with self._driver.session() as session:
- result = session.run("MATCH (n:{entity_name_label}) RETURN n".format(entity_name_label=entity_name_label))
+ query = "MATCH (n:`{entity_name_label}`) RETURN n".format(entity_name_label=entity_name_label)
+ result = session.run(query)
for record in result:
- return record["n"]
+ result = record["n"]
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{result}'
+ )
+ return result
async def node_degree(self, node_id: str) -> int:
- entity_name_label = node_id
- with self._driver.session() as session:
- degree = self._find_node_degree(session, entity_name_label)
- return degree
+ entity_name_label = node_id.strip('\"')
+
- @staticmethod
def _find_node_degree(session, label):
with session.begin_transaction() as tx:
- result = tx.run("MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label))
+ # query = "MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label)
+ query = f"""
+ MATCH (n:`{label}`)
+ RETURN COUNT{{ (n)--() }} AS totalEdgeCount
+ """
+ result = tx.run(query)
record = result.single()
- if record:
- return record["degree"]
+ if record:
+ edge_count = record["totalEdgeCount"]
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{edge_count}'
+ )
+ return edge_count
else:
return None
+
+ with self._driver.session() as session:
+ degree = _find_node_degree(session, entity_name_label)
+ return degree
# degree = session.read_transaction(get_edge_degree, 1, 2)
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
- entity_name__label_source = src_id
- entity_name_label_target = tgt_id
+ entity_name__label_source = src_id.strip('\"')
+ entity_name_label_target = tgt_id.strip('\"')
with self._driver.session() as session:
- result = session.run(
- """MATCH (n1:{node_label1})-[r]-(n2:{node_label2})
- RETURN count(r) AS degree"""
- .format(entity_name__label_source=entity_name__label_source, entity_name_label_target=entity_name_label_target)
- )
- record = result.single()
+ query = """MATCH (n1:`{node_label1}`)-[r]-(n2:`{node_label2}`)
+ RETURN count(r) AS degree""".format(entity_name__label_source=entity_name__label_source,
+ entity_name_label_target=entity_name_label_target)
+
+ result = session.run(query)
+ record = result.single()
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{record["degree"]}'
+ )
return record["degree"]
async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
- entity_name__label_source = source_node_id
- entity_name_label_target = target_node_id
+ entity_name__label_source = source_node_id.strip('\"')
+ entity_name_label_target = target_node_id.strip('\"')
"""
Find all edges between nodes of two given labels
@@ -126,17 +158,109 @@ async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict
"""
with self._driver.session() as session:
query = f"""
- MATCH (source:{entity_name__label_source})-[r]-(target:{entity_name_label_target})
+ MATCH (source:`{entity_name__label_source}`)-[r]-(target:`{entity_name_label_target}`)
RETURN r
"""
result = session.run(query)
+ for logrecord in result:
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{logrecord["r"]}'
+ )
+
+
return [record["r"] for record in result]
+
+
+
+ async def get_node_edges(self, source_node_id: str):
+ if self._graph.has_node(source_node_id):
+ return list(self._graph.edges(source_node_id))
+ return None
+
+ async def get_node_edges(self, source_node_id: str):
+ node_label = source_node_id.strip('\"')
+
+ """
+ Retrieves all edges (relationships) for a particular node identified by its label and ID.
+
+ :param uri: Neo4j database URI
+ :param username: Neo4j username
+ :param password: Neo4j password
+ :param node_label: Label of the node
+ :param node_id: ID property of the node
+ :return: List of dictionaries containing edge information
+ """
+
+ def fetch_edges(tx, label):
+ query = f"""MATCH (n:`{label}`)
+ OPTIONAL MATCH (n)-[r]-(connected)
+ RETURN n, r, connected"""
+
+ results = tx.run(query)
+
+ edges = []
+ for record in results:
+ source_node = record['n']
+ connected_node = record['connected']
+
+ source_label = list(source_node.labels)[0] if source_node.labels else None
+ target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
+
+ if source_label and target_label:
+ print (f"appending: {[source_label, target_label]}")
+ edges.append([source_label, target_label])
+
+ return edges
+
+ with self._driver.session() as session:
+ edges = session.read_transaction(fetch_edges,node_label)
+ return edges
+
+
+ # try:
+ # with self._driver.session() as session:
+ # if self.has_node(node_label):
+ # edges = session.read_transaction(fetch_edges,node_label)
+ # return list(edges)
+ # return edges
+ # finally:
+ # print ("consider closign driver here")
+ # # driver.close()
+
+ from typing import List, Tuple
+ async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
+ def run_query(tx):
+ query = f"""
+ MATCH (n:`{label}`)
+ OPTIONAL MATCH (n)-[r]-(connected)
+ RETURN n, r, connected
+ """
+ results = tx.run(query)
+
+ connections = []
+ for record in results:
+ source_node = record['n']
+ connected_node = record['connected']
+
+ source_label = list(source_node.labels)[0] if source_node.labels else None
+ target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
+
+ if source_label and target_label:
+ connections.append((source_label, target_label))
+
+ return connections
+ with driver.session() as session:
+ return session.read_transaction(run_query)
-#upsert_node
+
+
+
+
+ #upsert_node
async def upsert_node(self, node_id: str, node_data: dict[str, str]):
- label = node_id
+ label = node_id.strip('\"')
properties = node_data
"""
Upsert a node with the given label and properties within a transaction.
@@ -152,21 +276,9 @@ async def upsert_node(self, node_id: str, node_data: dict[str, str]):
Returns:
Dictionary containing the node's properties after upsert, or None if operation fails
"""
- with self._driver.session() as session:
- # Execute the upsert within a transaction
- result = session.execute_write(
- self._do_upsert,
- label,
- properties
- )
- return result
-
+ def _do_upsert(tx, label: str, properties: dict[str, Any]):
- @staticmethod
- def _do_upsert(tx: Transaction, label: str, properties: Dict[str, Any]):
- """
- Static method to perform the actual upsert operation within a transaction
-
+ """
Args:
tx: Neo4j transaction object
label: The node label to search for and apply
@@ -175,44 +287,39 @@ def _do_upsert(tx: Transaction, label: str, properties: Dict[str, Any]):
Returns:
Dictionary containing the node's properties after upsert, or None if operation fails
"""
- # Create the dynamic property string for SET clause
- property_string = ", ".join([
- f"n.{key} = ${key}"
- for key in properties.keys()
- ])
-
- # Cypher query that either matches existing node or creates new one
+
query = f"""
- MATCH (n:{label})
- WITH n LIMIT 1
- CALL {{
- WITH n
- WHERE n IS NOT NULL
- SET {property_string}
- RETURN n
- UNION
- WITH n
- WHERE n IS NULL
- CREATE (n:{label})
- SET {property_string}
- RETURN n
- }}
+ MERGE (n:`{label}`)
+ SET n += $properties
RETURN n
"""
-
- # Execute the query with properties as parameters
- result = tx.run(query, properties)
- record = result.single()
-
- if record:
- return dict(record["n"])
- return None
-
-
+ # Execute the query with properties as parameters
+ # with session.begin_transaction() as tx:
+ result = tx.run(query, properties=properties)
+ record = result.single()
+ if record:
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{dict(record["n"])}'
+ )
+ return dict(record["n"])
+ return None
+
+
+ with self._driver.session() as session:
+ with session.begin_transaction() as tx:
+ try:
+ result = _do_upsert(tx,label,properties)
+ tx.commit()
+ return result
+ except Exception as e:
+ raise # roll back
+
+
async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]) -> None:
- source_node_label = source_node_id
- target_node_label = target_node_id
+ source_node_label = source_node_id.strip('\"')
+ target_node_label = target_node_id.strip('\"')
+ edge_properties = edge_data
"""
Upsert an edge and its properties between two nodes identified by their labels.
@@ -221,16 +328,10 @@ async def upsert_edge(self, source_node_id: str, target_node_id: str, edge_data:
target_node_label (str): Label of the target node (used as identifier)
edge_properties (dict): Dictionary of properties to set on the edge
"""
- with self._driver.session() as session:
- session.execute_write(
- self._do_upsert_edge,
- source_node_label,
- target_node_label,
- edge_data
- )
+
- @staticmethod
- def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_properties: Dict[str, Any]) -> None:
+
+ def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_properties: dict[str, Any]) -> None:
"""
Static method to perform the edge upsert within a transaction.
@@ -240,43 +341,58 @@ def _do_upsert_edge(tx, source_node_label: str, target_node_label: str, edge_pro
3. Set all properties on the relationship, updating existing ones and adding new ones
"""
# Convert edge properties to Cypher parameter string
- props_string = ", ".join(f"r.{key} = ${key}" for key in edge_properties.keys())
-
- query = """
- MATCH (source)
- WHERE source.label = $source_node_label
- MATCH (target)
- WHERE target.label = $target_node_label
+ # props_string = ", ".join(f"r.{key} = ${key}" for key in edge_properties.keys())
+
+ # """.format(props_string)
+ query = f"""
+ MATCH (source:`{source_node_label}`)
+ WITH source
+ MATCH (target:`{target_node_label}`)
MERGE (source)-[r:DIRECTED]->(target)
- SET {}
- """.format(props_string)
-
- # Prepare parameters dictionary
- params = {
- "source_node_label": source_node_label,
- "target_node_label": target_node_label,
- **edge_properties
- }
-
- # Execute the query
- tx.run(query, params)
+ SET r += $properties
+ RETURN r
+ """
+ result = tx.run(query, properties=edge_properties)
+ logger.info(
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{None}'
+ )
+ return result.single()
+
+ with self._driver.session() as session:
+ session.execute_write(
+ _do_upsert_edge,
+ source_node_label,
+ target_node_label,
+ edge_properties
+ )
+ # return result
async def _node2vec_embed(self):
# async def _node2vec_embed(self):
with self._driver.session() as session:
#Define the Cypher query
options = self.global_config["node2vec_params"]
- query = f"""CALL gds.node2vec.stream('myGraph', {options}) # **options
- YIELD nodeId, embedding
- RETURN nodeId, embedding"""
+ logger.info(f"building embeddings with options {options}")
+ query = f"""CALL gds.node2vec.write('91fbae6c', {
+ options
+ })
+ YIELD nodeId, labels, embedding
+ RETURN
+ nodeId AS id,
+ labels[0] AS distinctLabel,
+ embedding AS nodeToVecEmbedding
+ """
# Run the query and process the results
results = session.run(query)
+ embeddings = []
+ node_labels = []
for record in results:
- node_id = record["nodeId"]
- embedding = record["embedding"]
- print(f"Node ID: {node_id}, Embedding: {embedding}")
- #need to return two lists here.
-
-
+ node_id = record["id"]
+ embedding = record["nodeToVecEmbedding"]
+ label = record["distinctLabel"]
+ print(f"Node id/label: {label}/{node_id}, Embedding: {embedding}")
+ embeddings.append(embedding)
+ node_labels.append(label)
+ return embeddings, node_labels
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 28a4af19..61e4e7a2 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -103,7 +103,7 @@ class LightRAG:
# module = importlib.import_module('kg.neo4j')
# Neo4JStorage = getattr(module, 'GraphStorage')
- if True==False:
+ if True==True:
graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
else:
graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
diff --git a/lightrag/llm.py b/lightrag/llm.py
index 208b22a5..9baa6997 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -73,7 +73,7 @@ async def openai_complete_if_cache(
@retry(
stop=stop_after_attempt(3),
#kw_
- wait=wait_exponential(multiplier=1, min=4, max=60),
+ wait=wait_exponential(multiplier=1, min=10, max=60),
# wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
diff --git a/neo4jWorkDir/kv_store_full_docs.json b/neo4jWorkDir/kv_store_full_docs.json
new file mode 100644
index 00000000..eff31337
--- /dev/null
+++ b/neo4jWorkDir/kv_store_full_docs.json
@@ -0,0 +1,5 @@
+{
+ "doc-addb4618e1697da0445ec72a648e1f92": {
+ "content": "The Project Gutenberg eBook of A Christmas Carol\n \nThis ebook is for the use of anyone anywhere in the United States and\nmost other parts of the world at no cost and with almost no restrictions\nwhatsoever. You may copy it, give it away or re-use it under the terms\nof the Project Gutenberg License included with this ebook or online\nat www.gutenberg.org. If you are not located in the United States,\nyou will have to check the laws of the country where you are located\nbefore using this eBook.\n\nTitle: A Christmas Carol\n\nAuthor: Charles Dickens\n\nIllustrator: Arthur Rackham\n\nRelease date: December 24, 2007 [eBook #24022]\n\nLanguage: English\n\nOriginal publication: Philadelphia and New York: J. B. Lippincott Company,, 1915\n\nCredits: Produced by Suzanne Shell, Janet Blenkinship and the Online\n Distributed Proofreading Team at http://www.pgdp.net\n\n\n*** START OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n\n\nProduced by Suzanne Shell, Janet Blenkinship and the Online\nDistributed Proofreading Team at http://www.pgdp.net\n\n\n\n\n\n\n\n\n\n\n\n A CHRISTMAS CAROL\n\n [Illustration: _\"How now?\" said Scrooge, caustic and cold as ever.\n \"What do you want with me?\"_]\n\n\n A CHRISTMAS CAROL\n\n [Illustration]\n\n BY\n\n CHARLES DICKENS\n\n [Illustration]\n\n ILLUSTRATED BY ARTHUR RACKHAM\n\n [Illustration]\n\n J. B. LIPPINCOTT COMPANY PHILADELPHIA AND NEW YORK\n\n FIRST PUBLISHED 1915\n\n REPRINTED 1923, 1927, 1932, 1933, 1934, 1935, 1947, 1948, 1952, 1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1972, 1973\n\n ISBN: 0-397-00033-2\n\n PRINTED IN GREAT BRITAIN\n\n\n\n\n PREFACE\n\n I have endeavoured in this Ghostly little book to raise the Ghost of an\n Idea which shall not put my readers out of humour with themselves, with\n each other, with the season, or with me. May it haunt their house\n pleasantly, and no one wish to lay it.\n\n Their faithful Friend and Servant,\n\n C. D.\n\n _December, 1843._\n\n\n\n\n CHARACTERS\n\n Bob Cratchit, clerk to Ebenezer Scrooge.\n Peter Cratchit, a son of the preceding.\n Tim Cratchit (\"Tiny Tim\"), a cripple, youngest son of Bob Cratchit.\n Mr. Fezziwig, a kind-hearted, jovial old merchant.\n Fred, Scrooge's nephew.\n Ghost of Christmas Past, a phantom showing things past.\n Ghost of Christmas Present, a spirit of a kind, generous,\n and hearty nature.\n Ghost of Christmas Yet to Come, an apparition showing the shadows\n of things which yet may happen.\n Ghost of Jacob Marley, a spectre of Scrooge's former partner in business.\n Joe, a marine-store dealer and receiver of stolen goods.\n Ebenezer Scrooge, a grasping, covetous old man, the surviving partner\n of the firm of Scrooge and Marley.\n Mr. Topper, a bachelor.\n Dick Wilkins, a fellow apprentice of Scrooge's.\n\n Belle, a comely matron, an old sweetheart of Scrooge's.\n Caroline, wife of one of Scrooge's debtors.\n Mrs. Cratchit, wife of Bob Cratchit.\n Belinda and Martha Cratchit, daughters of the preceding.\n\n Mrs. Dilber, a laundress.\n Fan, the sister of Scrooge.\n Mrs. Fezziwig, the worthy partner of Mr. Fezziwig.\n\n\n\n\n CONTENTS\n\n STAVE ONE--MARLEY'S GHOST 3\n STAVE TWO--THE FIRST OF THE THREE SPIRITS 37\n STAVE THREE--THE SECOND OF THE THREE SPIRITS 69\n STAVE FOUR--THE LAST OF THE SPIRITS 111\n STAVE FIVE--THE END OF IT 137\n\n\n LIST OF ILLUSTRATIONS\n\n _IN COLOUR_\n\n\n \"How now?\" said Scrooge, caustic\n and cold as ever. \"What do you\n want with me?\" _Frontispiece_\n\n Bob Cratchit went down a slide on\n Cornhill, at the end of a lane of\n boys, twenty times, in honour of\n its being Christmas Eve 16\n\n Nobody under the bed; nobody in\n the closet; nobody in his dressing-gown,\n which was hanging up\n in a suspicious attitude against\n the wall 20\n\n The air was filled with phantoms,\n wandering hither and thither in\n restless haste and moaning as\n they went 32\n\n Then old Fezziwig stood out to\n dance with Mrs. Fezziwig 54\n\n A flushed and boisterous group 62\n\n Laden with Christmas toys and\n presents 64\n\n The way he went after that plump\n sister in the lace tucker! 100\n\n \"How are you?\" said one.\n \"How are you?\" returned the other.\n \"Well!\" said the first. \"Old\n Scratch has got his own at last,\n hey?\" 114\n\n \"What do you call this?\" said Joe.\n \"Bed-curtains!\" \"Ah!\" returned\n the woman, laughing....\n \"Bed-curtains!\"\n\n \"You don't mean to say you took\n 'em down, rings and all, with him\n lying there?\" said Joe.\n\n \"Yes, I do,\" replied the woman.\n \"Why not?\" 120\n\n \"It's I, your uncle Scrooge. I have\n come to dinner. Will you let\n me in, Fred?\" 144\n\n \"Now, I'll tell you what, my friend,\"\n said Scrooge. \"I am not going\n to stand this sort of thing any\n longer.\" 146\n\n[Illustration]\n\n_IN BLACK AND WHITE_\n\n\n Tailpiece vi\n Tailpiece to List of Coloured Illustrations x\n Tailpiece to List of Black and White Illustrations xi\n Heading to Stave One 3\n They were portly gentlemen, pleasant to behold 12\n On the wings of the wind 28-29\n Tailpiece to Stave One 34\n Heading to Stave Two 37\n He produced a decanter of curiously\n light wine and a block of curiously heavy cake 50\n She left him, and they parted 60\n Tailpiece to Stave Two 65\n Heading to Stave Three 69\n There was nothing very cheerful in the climate 75\n He had been Tim's blood-horse all the way from church 84-85\n With the pudding 88\n Heading to Stave Four 111\n Heading to Stave Five 137\n Tailpiece to Stave Five 147\n\n[Illustration]\n\n\nSTAVE ONE\n\n\n[Illustration]\n\n\n\n\nMARLEY'S GHOST\n\n\nMarley was dead, to begin with. There is no doubt whatever about that.\nThe register of his burial was signed by the clergyman, the clerk, the\nundertaker, and the chief mourner. Scrooge signed it. And Scrooge's name\nwas good upon 'Change for anything he chose to put his hand to. Old\nMarley was as dead as a door-nail.\n\nMind! I don't mean to say that I know of my own knowledge, what there is\nparticularly dead about a door-nail. I might have been inclined, myself,\nto regard a coffin-nail as the deadest piece of ironmongery in the\ntrade. But the wisdom of our ancestors is in the simile; and my\nunhallowed hands shall not disturb it, or the country's done for. You\nwill, therefore, permit me to repeat, emphatically, that Marley was as\ndead as a door-nail.\n\nScrooge knew he was dead? Of course he did. How could it be otherwise?\nScrooge and he were partners for I don't know how many years. Scrooge\nwas his sole executor, his sole administrator, his sole assign, his sole\nresiduary legatee, his sole friend, and sole mourner. And even Scrooge\nwas not so dreadfully cut up by the sad event but that he was an\nexcellent man of business on the very day of the funeral, and solemnised\nit with an undoubted bargain.\n\nThe mention of Marley's funeral brings me back to the point I started\nfrom. There is no doubt that Marley was dead. This must be distinctly\nunderstood, or nothing wonderful can come of the story I am going to\nrelate. If we were not perfectly convinced that Hamlet's father died\nbefore the play began, there would be nothing more remarkable in his\ntaking a stroll at night, in an easterly wind, upon his own ramparts,\nthan there would be in any other middle-aged gentleman rashly turning\nout after dark in a breezy spot--say St. Paul's Churchyard, for\ninstance--literally to astonish his son's weak mind.\n\nScrooge never painted out Old Marley's name. There it stood, years\nafterwards, above the warehouse door: Scrooge and Marley. The firm was\nknown as Scrooge and Marley. Sometimes people new to the business called\nScrooge Scrooge, and sometimes Marley, but he answered to both names. It\nwas all the same to him.\n\nOh! but he was a tight-fisted hand at the grindstone, Scrooge! a\nsqueezing, wrenching, grasping, scraping, clutching, covetous old\nsinner! Hard and sharp as flint, from which no steel had ever struck out\ngenerous fire; secret, and self-contained, and solitary as an oyster.\nThe cold within him froze his old features, nipped his pointed nose,\nshrivelled his cheek, stiffened his gait; made his eyes red, his thin\nlips blue; and spoke out shrewdly in his grating voice. A frosty rime\nwas on his head, and on his eyebrows, and his wiry chin. He carried his\nown low temperature always about with him; he iced his office in the\ndog-days, and didn't thaw it one degree at Christmas.\n\nExternal heat and cold had little influence on Scrooge. No warmth could\nwarm, no wintry weather chill him. No wind that blew was bitterer than\nhe, no falling snow was more intent upon its purpose, no pelting rain\nless open to entreaty. Foul weather didn't know where to have him. The\nheaviest rain, and snow, and hail, and sleet could boast of the\nadvantage over him in only one respect. They often 'came down'\nhandsomely, and Scrooge never did.\n\nNobody ever stopped him in the street to say, with gladsome looks, 'My\ndear Scrooge, how are you? When will you come to see me?' No beggars\nimplored him to bestow a trifle, no children asked him what it was\no'clock, no man or woman ever once in all his life inquired the way to\nsuch and such a place, of Scrooge. Even the blind men's dogs appeared to\nknow him; and, when they saw him coming on, would tug their owners into\ndoorways and up courts; and then would wag their tails as though they\nsaid, 'No eye at all is better than an evil eye, dark master!'\n\nBut what did Scrooge care? It was the very thing he liked. To edge his\nway along the crowded paths of life, warning all human sympathy to keep\nits distance, was what the knowing ones call 'nuts' to Scrooge.\n\nOnce upon a time--of all the good days in the year, on Christmas\nEve--old Scrooge sat busy in his counting-house. It was cold, bleak,\nbiting weather; foggy withal; and he could hear the people in the court\noutside go wheezing up and down, beating their hands upon their breasts,\nand stamping their feet upon the pavement stones to warm them. The City\nclocks had only just gone three, but it was quite dark already--it had\nnot been light all day--and candles were flaring in the windows of the\nneighbouring offices, like ruddy smears upon the palpable brown air. The\nfog came pouring in at every chink and keyhole, and was so dense\nwithout, that, although the court was of the narrowest, the houses\nopposite were mere phantoms. To see the dingy cloud come drooping down,\nobscuring everything, one might have thought that nature lived hard by,\nand was brewing on a large scale.\n\nThe door of Scrooge's counting-house was open, that he might keep his\neye upon his clerk, who in a dismal little cell beyond, a sort of tank,\nwas copying letters. Scrooge had a very small fire, but the clerk's fire\nwas so very much smaller that it looked like one coal. But he couldn't\nreplenish it, for Scrooge kept the coal-box in his own room; and so\nsurely as the clerk came in with the shovel, the master predicted that\nit would be necessary for them to part. Wherefore the clerk put on his\nwhite comforter, and tried to warm himself at the candle; in which\neffort, not being a man of strong imagination, he failed.\n\n'A merry Christmas, uncle! God save you!' cried a cheerful voice. It was\nthe voice of Scrooge's nephew, who came upon him so quickly that this\nwas the first intimation he had of his approach.\n\n'Bah!' said Scrooge. 'Humbug!'\n\nHe had so heated himself with rapid walking in the fog and frost, this\nnephew of Scrooge's, that he was all in a glow; his face was ruddy and\nhandsome; his eyes sparkled, and his breath smoked again.\n\n'Christmas a humbug, uncle!' said Scrooge's nephew. 'You don't mean\nthat, I am sure?'\n\n'I do,' said Scrooge. 'Merry Christmas! What right have you to be merry?\nWhat reason have you to be merry? You're poor enough.'\n\n'Come, then,' returned the nephew gaily. 'What right have you to be\ndismal? What reason have you to be morose? You're rich enough.'\n\nScrooge, having no better answer ready on the spur of the moment, said,\n'Bah!' again; and followed it up with 'Humbug!'\n\n'Don't be cross, uncle!' said the nephew.\n\n'What else can I be,' returned the uncle, 'when I live in such a world\nof fools as this? Merry Christmas! Out upon merry Christmas! What's\nChristmas-time to you but a time for paying bills without money; a time\nfor finding yourself a year older, and not an hour richer; a time for\nbalancing your books, and having every item in 'em through a round dozen\nof months presented dead against you? If I could work my will,' said\nScrooge indignantly, 'every idiot who goes about with \"Merry Christmas\"\non his lips should be boiled with his own pudding, and buried with a\nstake of holly through his heart. He should!'\n\n'Uncle!' pleaded the nephew.\n\n'Nephew!' returned the uncle sternly, 'keep Christmas in your own way,\nand let me keep it in mine.'\n\n'Keep it!' repeated Scrooge's nephew. 'But you don't keep it.'\n\n'Let me leave it alone, then,' said Scrooge. 'Much good may it do you!\nMuch good it has ever done you!'\n\n'There are many things from which I might have derived good, by which I\nhave not profited, I dare say,' returned the nephew; 'Christmas among\nthe rest. But I am sure I have always thought of Christmas-time, when\nit has come round--apart from the veneration due to its sacred name and\norigin, if anything belonging to it can be apart from that--as a good\ntime; a kind, forgiving, charitable, pleasant time; the only time I know\nof, in the long calendar of the year, when men and women seem by one\nconsent to open their shut-up hearts freely, and to think of people\nbelow them as if they really were fellow-passengers to the grave, and\nnot another race of creatures bound on other journeys. And therefore,\nuncle, though it has never put a scrap of gold or silver in my pocket, I\nbelieve that it _has_ done me good and _will_ do me good; and I say, God\nbless it!'\n\nThe clerk in the tank involuntarily applauded. Becoming immediately\nsensible of the impropriety, he poked the fire, and extinguished the\nlast frail spark for ever.\n\n'Let me hear another sound from _you_,' said Scrooge, 'and you'll keep\nyour Christmas by losing your situation! You're quite a powerful\nspeaker, sir,' he added, turning to his nephew. 'I wonder you don't go\ninto Parliament.'\n\n'Don't be angry, uncle. Come! Dine with us to-morrow.'\n\nScrooge said that he would see him----Yes, indeed he did. He went the\nwhole length of the expression, and said that he would see him in that\nextremity first.\n\n'But why?' cried Scrooge's nephew. 'Why?'\n\n'Why did you get married?' said Scrooge.\n\n'Because I fell in love.'\n\n'Because you fell in love!' growled Scrooge, as if that were the only\none thing in the world more ridiculous than a merry Christmas. 'Good\nafternoon!'\n\n'Nay, uncle, but you never came to see me before that happened. Why give\nit as a reason for not coming now?'\n\n'Good afternoon,' said Scrooge.\n\n'I want nothing from you; I ask nothing of you; why cannot we be\nfriends?'\n\n'Good afternoon!' said Scrooge.\n\n'I am sorry, with all my heart, to find you so resolute. We have never\nhad any quarrel to which I have been a party. But I have made the trial\nin homage to Christmas, and I'll keep my Christmas humour to the last.\nSo A Merry Christmas, uncle!'\n\n'Good afternoon,' said Scrooge.\n\n'And A Happy New Year!'\n\n'Good afternoon!' said Scrooge.\n\nHis nephew left the room without an angry word, notwithstanding. He\nstopped at the outer door to bestow the greetings of the season on the\nclerk, who, cold as he was, was warmer than Scrooge; for he returned\nthem cordially.\n\n'There's another fellow,' muttered Scrooge, who overheard him: 'my\nclerk, with fifteen shillings a week, and a wife and family, talking\nabout a merry Christmas. I'll retire to Bedlam.'\n\nThis lunatic, in letting Scrooge's nephew out, had let two other people\nin. They were portly gentlemen, pleasant to behold, and now stood, with\ntheir hats off, in Scrooge's office. They had books and papers in their\nhands, and bowed to him.\n\n'Scrooge and Marley's, I believe,' said one of the gentlemen, referring\nto his list. 'Have I the pleasure of addressing Mr. Scrooge, or Mr.\nMarley?'\n\n'Mr. Marley has been dead these seven years,' Scrooge replied. 'He died\nseven years ago, this very night.'\n\n'We have no doubt his liberality is well represented by his surviving\npartner,' said the gentleman, presenting his credentials.\n\n[Illustration: THEY WERE PORTLY GENTLEMEN, PLEASANT TO BEHOLD]\n\nIt certainly was; for they had been two kindred spirits. At the ominous\nword 'liberality' Scrooge frowned, and shook his head, and handed the\ncredentials back.\n\n'At this festive season of the year, Mr. Scrooge,' said the gentleman,\ntaking up a pen, 'it is more than usually desirable that we should make\nsome slight provision for the poor and destitute, who suffer greatly at\nthe present time. Many thousands are in want of common necessaries;\nhundreds of thousands are in want of common comforts, sir.'\n\n'Are there no prisons?' asked Scrooge.\n\n'Plenty of prisons,' said the gentleman, laying down the pen again.\n\n'And the Union workhouses?' demanded Scrooge. 'Are they still in\noperation?'\n\n'They are. Still,' returned the gentleman, 'I wish I could say they were\nnot.'\n\n'The Treadmill and the Poor Law are in full vigour, then?' said Scrooge.\n\n'Both very busy, sir.'\n\n'Oh! I was afraid, from what you said at first, that something had\noccurred to stop them in their useful course,' said Scrooge. 'I am very\nglad to hear it.'\n\n'Under the impression that they scarcely furnish Christian cheer of mind\nor body to the multitude,' returned the gentleman, 'a few of us are\nendeavouring to raise a fund to buy the Poor some meat and drink, and\nmeans of warmth. We choose this time, because it is a time, of all\nothers, when Want is keenly felt, and Abundance rejoices. What shall I\nput you down for?'\n\n'Nothing!' Scrooge replied.\n\n'You wish to be anonymous?'\n\n'I wish to be left alone,' said Scrooge. 'Since you ask me what I wish,\ngentlemen, that is my answer. I don't make merry myself at Christmas,\nand I can't afford to make idle people merry. I help to support the\nestablishments I have mentioned--they cost enough: and those who are\nbadly off must go there.'\n\n'Many can't go there; and many would rather die.'\n\n'If they would rather die,' said Scrooge, 'they had better do it, and\ndecrease the surplus population. Besides--excuse me--I don't know that.'\n\n'But you might know it,' observed the gentleman.\n\n'It's not my business,' Scrooge returned. 'It's enough for a man to\nunderstand his own business, and not to interfere with other people's.\nMine occupies me constantly. Good afternoon, gentlemen!'\n\nSeeing clearly that it would be useless to pursue their point, the\ngentlemen withdrew. Scrooge resumed his labours with an improved opinion\nof himself, and in a more facetious temper than was usual with him.\n\nMeanwhile the fog and darkness thickened so, that people ran about with\nflaring links, proffering their services to go before horses in\ncarriages, and conduct them on their way. The ancient tower of a church,\nwhose gruff old bell was always peeping slyly down at Scrooge out of a\nGothic window in the wall, became invisible, and struck the hours and\nquarters in the clouds, with tremulous vibrations afterwards, as if its\nteeth were chattering in its frozen head up there. The cold became\nintense. In the main street, at the corner of the court, some labourers\nwere repairing the gas-pipes, and had lighted a great fire in a brazier,\nround which a party of ragged men and boys were gathered: warming their\nhands and winking their eyes before the blaze in rapture. The water-plug\nbeing left in solitude, its overflowings suddenly congealed, and turned\nto misanthropic ice. The brightness of the shops, where holly sprigs and\nberries crackled in the lamp heat of the windows, made pale faces ruddy\nas they passed. Poulterers' and grocers' trades became a splendid joke:\na glorious pageant, with which it was next to impossible to believe that\nsuch dull principles as bargain and sale had anything to do. The Lord\nMayor, in the stronghold of the mighty Mansion House, gave orders to his\nfifty cooks and butlers to keep Christmas as a Lord Mayor's household\nshould; and even the little tailor, whom he had fined five shillings on\nthe previous Monday for being drunk and bloodthirsty in the streets,\nstirred up to-morrow's pudding in his garret, while his lean wife and\nthe baby sallied out to buy the beef.\n\nFoggier yet, and colder! Piercing, searching, biting cold. If the good\nSt. Dunstan had but nipped the Evil Spirit's nose with a touch of such\nweather as that, instead of using his familiar weapons, then indeed he\nwould have roared to lusty purpose. The owner of one scant young nose,\ngnawed and mumbled by the hungry cold as bones are gnawed by dogs,\nstooped down at Scrooge's keyhole to regale him with a Christmas carol;\nbut, at the first sound of\n\n 'God bless you, merry gentleman,\n May nothing you dismay!'\n\nScrooge seized the ruler with such energy of action that the singer fled\nin terror, leaving the keyhole to the fog, and even more congenial\nfrost.\n\nAt length the hour of shutting up the counting-house arrived. With an\nill-will Scrooge dismounted from his stool, and tacitly admitted the\nfact to the expectant clerk in the tank, who instantly snuffed his\ncandle out, and put on his hat.\n\n'You'll want all day to-morrow, I suppose?' said Scrooge.\n\n'If quite convenient, sir.'\n\n'It's not convenient,' said Scrooge, 'and it's not fair. If I was to\nstop half-a-crown for it, you'd think yourself ill used, I'll be bound?'\n\nThe clerk smiled faintly.\n\n'And yet,' said Scrooge, 'you don't think _me_ ill used when I pay a\nday's wages for no work.'\n\n[Illustration: _Bob Cratchit went down a slide on Cornhill, at the end\nof a lane of boys, twenty times, in honour of its being Christmas\nEve_]\n\nThe clerk observed that it was only once a year.\n\n'A poor excuse for picking a man's pocket every twenty-fifth of\nDecember!' said Scrooge, buttoning his greatcoat to the chin. 'But I\nsuppose you must have the whole day. Be here all the earlier next\nmorning.'\n\nThe clerk promised that he would; and Scrooge walked out with a growl.\nThe office was closed in a twinkling, and the clerk, with the long ends\nof his white comforter dangling below his waist (for he boasted no\ngreatcoat), went down a slide on Cornhill, at the end of a lane of boys,\ntwenty times, in honour of its being Christmas Eve, and then ran home to\nCamden Town as hard as he could pelt, to play at blind man's-buff.\n\nScrooge took his melancholy dinner in his usual melancholy tavern; and\nhaving read all the newspapers, and beguiled the rest of the evening\nwith his banker's book, went home to bed. He lived in chambers which had\nonce belonged to his deceased partner. They were a gloomy suite of\nrooms, in a lowering pile of building up a yard, where it had so little\nbusiness to be, that one could scarcely help fancying it must have run\nthere when it was a young house, playing at hide-and-seek with other\nhouses, and have forgotten the way out again. It was old enough now, and\ndreary enough; for nobody lived in it but Scrooge, the other rooms\nbeing all let out as offices. The yard was so dark that even Scrooge,\nwho knew its every stone, was fain to grope with his hands. The fog and\nfrost so hung about the black old gateway of the house, that it seemed\nas if the Genius of the Weather sat in mournful meditation on the\nthreshold.\n\nNow, it is a fact that there was nothing at all particular about the\nknocker on the door, except that it was very large. It is also a fact\nthat Scrooge had seen it, night and morning, during his whole residence\nin that place; also that Scrooge had as little of what is called fancy\nabout him as any man in the City of London, even including--which is a\nbold word--the corporation, aldermen, and livery. Let it also be borne\nin mind that Scrooge had not bestowed one thought on Marley since his\nlast mention of his seven-years'-dead partner that afternoon. And then\nlet any man explain to me, if he can, how it happened that Scrooge,\nhaving his key in the lock of the door, saw in the knocker, without its\nundergoing any intermediate process of change--not a knocker, but\nMarley's face.\n\nMarley's face. It was not in impenetrable shadow, as the other objects\nin the yard were, but had a dismal light about it, like a bad lobster in\na dark cellar. It was not angry or ferocious, but looked at Scrooge as\nMarley used to look; with ghostly spectacles turned up on its ghostly\nforehead. The hair was curiously stirred, as if by breath or hot air;\nand, though the eyes were wide open, they were perfectly motionless.\nThat, and its livid colour, made it horrible; but its horror seemed to\nbe in spite of the face, and beyond its control, rather than a part of\nits own expression.\n\nAs Scrooge looked fixedly at this phenomenon, it was a knocker again.\n\nTo say that he was not startled, or that his blood was not conscious of\na terrible sensation to which it had been a stranger from infancy, would\nbe untrue. But he put his hand upon the key he had relinquished, turned\nit sturdily, walked in, and lighted his candle.\n\nHe _did_ pause, with a moment's irresolution, before he shut the door;\nand he _did_ look cautiously behind it first, as if he half expected to\nbe terrified with the sight of Marley's pigtail sticking out into the\nhall. But there was nothing on the back of the door, except the screws\nand nuts that held the knocker on, so he said, 'Pooh, pooh!' and closed\nit with a bang.\n\nThe sound resounded through the house like thunder. Every room above,\nand every cask in the wine-merchant's cellars below, appeared to have a\nseparate peal of echoes of its own. Scrooge was not a man to be\nfrightened by echoes. He fastened the door, and walked across the hall,\nand up the stairs: slowly, too: trimming his candle as he went.\n\nYou may talk vaguely about driving a coach and six up a good old flight\nof stairs, or through a bad young Act of Parliament; but I mean to say\nyou might have got a hearse up that staircase, and taken it broadwise,\nwith the splinter-bar towards the wall, and the door towards the\nbalustrades: and done it easy. There was plenty of width for that, and\nroom to spare; which is perhaps the reason why Scrooge thought he saw a\nlocomotive hearse going on before him in the gloom. Half-a-dozen\ngas-lamps out of the street wouldn't have lighted the entry too well, so\nyou may suppose that it was pretty dark with Scrooge's dip.\n\nUp Scrooge went, not caring a button for that. Darkness is cheap, and\nScrooge liked it. But, before he shut his heavy door, he walked through\nhis rooms to see that all was right. He had just enough recollection of\nthe face to desire to do that.\n\nSitting-room, bedroom, lumber-room. All as they should be. Nobody under\nthe table, nobody under the sofa; a small fire in the grate; spoon and\nbasin ready; and the little saucepan of gruel (Scrooge had a cold in his\nhead) upon the hob. Nobody under the bed; nobody in the closet; nobody\nin his dressing-gown, which was hanging up in a suspicious attitude\nagainst the wall. Lumber-room as usual. Old fire-guard, old shoes, two\nfish baskets, washing-stand on three legs, and a poker.\n\n[Illustration: _Nobody under the bed; nobody in the closet; nobody in\nhis dressing-gown, which was hanging up in a suspicious attitude against\nthe wall_]\n\nQuite satisfied, he closed his door, and locked himself in; double\nlocked himself in, which was not his custom. Thus secured against\nsurprise, he took off his cravat; put on his dressing-gown and slippers,\nand his nightcap; and sat down before the fire to take his gruel.\n\nIt was a very low fire indeed; nothing on such a bitter night. He was\nobliged to sit close to it, and brood over it, before he could extract\nthe least sensation of warmth from such a handful of fuel. The fireplace\nwas an old one, built by some Dutch merchant long ago, and paved all\nround with quaint Dutch tiles, designed to illustrate the Scriptures.\nThere were Cains and Abels, Pharaoh's daughters, Queens of Sheba,\nAngelic messengers descending through the air on clouds like\nfeather-beds, Abrahams, Belshazzars, Apostles putting off to sea in\nbutter-boats, hundreds of figures to attract his thoughts; and yet that\nface of Marley, seven years dead, came like the ancient Prophet's rod,\nand swallowed up the whole. If each smooth tile had been a blank at\nfirst, with power to shape some picture on its surface from the\ndisjointed fragments of his thoughts, there would have been a copy of\nold Marley's head on every one.\n\n'Humbug!' said Scrooge; and walked across the room.\n\nAfter several turns he sat down again. As he threw his head back in the\nchair, his glance happened to rest upon a bell, a disused bell, that\nhung in the room, and communicated, for some purpose now forgotten, with\na chamber in the highest storey of the building. It was with great\nastonishment, and with a strange, inexplicable dread, that, as he\nlooked, he saw this bell begin to swing. It swung so softly in the\noutset that it scarcely made a sound; but soon it rang out loudly, and\nso did every bell in the house.\n\nThis might have lasted half a minute, or a minute, but it seemed an\nhour. The bells ceased, as they had begun, together. They were succeeded\nby a clanking noise deep down below as if some person were dragging a\nheavy chain over the casks in the wine-merchant's cellar. Scrooge then\nremembered to have heard that ghosts in haunted houses were described as\ndragging chains.\n\nThe cellar door flew open with a booming sound, and then he heard the\nnoise much louder on the floors below; then coming up the stairs; then\ncoming straight towards his door.\n\n'It's humbug still!' said Scrooge. 'I won't believe it.'\n\nHis colour changed, though, when, without a pause, it came on through\nthe heavy door and passed into the room before his eyes. Upon its coming\nin, the dying flame leaped up, as though it cried, 'I know him! Marley's\nGhost!' and fell again.\n\nThe same face: the very same. Marley in his pigtail, usual waistcoat,\ntights, and boots; the tassels on the latter bristling, like his\npigtail, and his coat-skirts, and the hair upon his head. The chain he\ndrew was clasped about his middle. It was long, and wound about him like\na tail; and it was made (for Scrooge observed it closely) of cash-boxes,\nkeys, padlocks, ledgers, deeds, and heavy purses wrought in steel. His\nbody was transparent: so that Scrooge, observing him, and looking\nthrough his waistcoat, could see the two buttons on his coat behind.\n\nScrooge had often heard it said that Marley had no bowels, but he had\nnever believed it until now.\n\nNo, nor did he believe it even now. Though he looked the phantom through\nand through, and saw it standing before him; though he felt the chilling\ninfluence of its death-cold eyes, and marked the very texture of the\nfolded kerchief bound about its head and chin, which wrapper he had not\nobserved before, he was still incredulous, and fought against his\nsenses.\n\n'How now!' said Scrooge, caustic and cold as ever. 'What do you want\nwith me?'\n\n'Much!'--Marley's voice; no doubt about it.\n\n'Who are you?'\n\n'Ask me who I _was_.'\n\n'Who _were_ you, then?' said Scrooge, raising his voice. 'You're\nparticular, for a shade.' He was going to say '_to_ a shade,' but\nsubstituted this, as more appropriate.\n\n'In life I was your partner, Jacob Marley.'\n\n'Can you--can you sit down?' asked Scrooge, looking doubtfully at him.\n\n'I can.'\n\n'Do it, then.'\n\nScrooge asked the question, because he didn't know whether a ghost so\ntransparent might find himself in a condition to take a chair; and felt\nthat in the event of its being impossible, it might involve the\nnecessity of an embarrassing explanation. But the Ghost sat down on the\nopposite side of the fireplace, as if he were quite used to it.\n\n'You don't believe in me,' observed the Ghost.\n\n'I don't,' said Scrooge.\n\n'What evidence would you have of my reality beyond that of your own\nsenses?'\n\n'I don't know,' said Scrooge.\n\n'Why do you doubt your senses?'\n\n'Because,' said Scrooge, 'a little thing affects them. A slight disorder\nof the stomach makes them cheats. You may be an undigested bit of beef,\na blot of mustard, a crumb of cheese, a fragment of an underdone potato.\nThere's more of gravy than of grave about you, whatever you are!'\n\nScrooge was not much in the habit of cracking jokes, nor did he feel in\nhis heart by any means waggish then. The truth is, that he tried to be\nsmart, as a means of distracting his own attention, and keeping down his\nterror; for the spectre's voice disturbed the very marrow in his bones.\n\nTo sit staring at those fixed, glazed eyes in silence, for a moment,\nwould play, Scrooge felt, the very deuce with him. There was something\nvery awful, too, in the spectre's being provided with an infernal\natmosphere of his own. Scrooge could not feel it himself, but this was\nclearly the case; for though the Ghost sat perfectly motionless, its\nhair, and skirts, and tassels were still agitated as by the hot vapour\nfrom an oven.\n\n'You see this toothpick?' said Scrooge, returning quickly to the charge,\nfor the reason just assigned; and wishing, though it were only for a\nsecond, to divert the vision's stony gaze from himself.\n\n'I do,' replied the Ghost.\n\n'You are not looking at it,' said Scrooge.\n\n'But I see it,' said the Ghost, 'notwithstanding.'\n\n'Well!' returned Scrooge, 'I have but to swallow this, and be for the\nrest of my days persecuted by a legion of goblins, all of my own\ncreation. Humbug, I tell you: humbug!'\n\nAt this the spirit raised a frightful cry, and shook its chain with such\na dismal and appalling noise, that Scrooge held on tight to his chair,\nto save himself from falling in a swoon. But how much greater was his\nhorror when the phantom, taking off the bandage round his head, as if it\nwere too warm to wear indoors, its lower jaw dropped down upon its\nbreast!\n\nScrooge fell upon his knees, and clasped his hands before his face.\n\n'Mercy!' he said. 'Dreadful apparition, why do you trouble me?'\n\n'Man of the worldly mind!' replied the Ghost, 'do you believe in me or\nnot?'\n\n'I do,' said Scrooge; 'I must. But why do spirits walk the earth, and\nwhy do they come to me?'\n\n'It is required of every man,' the Ghost returned, 'that the spirit\nwithin him should walk abroad among his fellow-men, and travel far and\nwide; and, if that spirit goes not forth in life, it is condemned to do\nso after death. It is doomed to wander through the world--oh, woe is\nme!--and witness what it cannot share, but might have shared on earth,\nand turned to happiness!'\n\nAgain the spectre raised a cry, and shook its chain and wrung its\nshadowy hands.\n\n'You are fettered,' said Scrooge, trembling. 'Tell me why?'\n\n'I wear the chain I forged in life,' replied the Ghost. 'I made it link\nby link, and yard by yard; I girded it on of my own free will, and of\nmy own free will I wore it. Is its pattern strange to _you_?'\n\nScrooge trembled more and more.\n\n'Or would you know,' pursued the Ghost, 'the weight and length of the\nstrong coil you bear yourself? It was full as heavy and as long as this\nseven Christmas Eves ago. You have laboured on it since. It is a\nponderous chain!'\n\nScrooge glanced about him on the floor, in the expectation of finding\nhimself surrounded by some fifty or sixty fathoms of iron cable; but he\ncould see nothing.\n\n'Jacob!' he said imploringly. 'Old Jacob Marley, tell me more! Speak\ncomfort to me, Jacob!'\n\n'I have none to give,' the Ghost replied. 'It comes from other regions,\nEbenezer Scrooge, and is conveyed by other ministers, to other kinds of\nmen. Nor can I tell you what I would. A very little more is all\npermitted to me. I cannot rest, I cannot stay, I cannot linger anywhere.\nMy spirit never walked beyond our counting-house--mark me;--in life my\nspirit never roved beyond the narrow limits of our money-changing hole;\nand weary journeys lie before me!'\n\nIt was a habit with Scrooge, whenever he became thoughtful, to put his\nhands in his breeches pockets. Pondering on what the Ghost had said, he\ndid so now, but without lifting up his eyes, or getting off his knees.\n\n[Illustration: ON THE WINGS OF THE WIND]\n\n'You must have been very slow about it, Jacob,' Scrooge observed in a\nbusiness-like manner, though with humility and deference.\n\n'Slow!' the Ghost repeated.\n\n'Seven years dead,' mused Scrooge. 'And travelling all the time?'\n\n'The whole time,' said the Ghost. 'No rest, no peace. Incessant torture\nof remorse.'\n\n'You travel fast?' said Scrooge.\n\n[Illustration]\n\n'On the wings of the wind,' replied the Ghost.\n\n'You might have got over a great quantity of ground in seven years,'\nsaid Scrooge.\n\nThe Ghost, on hearing this, set up another cry, and clanked its chain so\nhideously in the dead silence of the night, that the Ward would have\nbeen justified in indicting it for a nuisance.\n\n'Oh! captive, bound, and double-ironed,' cried the phantom, 'not to know\nthat ages of incessant labour, by immortal creatures, for this earth\nmust pass into eternity before the good of which it is susceptible is\nall developed! Not to know that any Christian spirit working kindly in\nits little sphere, whatever it may be, will find its mortal life too\nshort for its vast means of usefulness! Not to know that no space of\nregret can make amends for one life's opportunities misused! Yet such\nwas I! Oh, such was I!'\n\n'But you were always a good man of business, Jacob,' faltered Scrooge,\nwho now began to apply this to himself.\n\n'Business!' cried the Ghost, wringing its hands again. 'Mankind was my\nbusiness. The common welfare was my business; charity, mercy,\nforbearance, and benevolence were, all, my business. The dealings of my\ntrade were but a drop of water in the comprehensive ocean of my\nbusiness!'\n\nIt held up its chain at arm's-length, as if that were the cause of all\nits unavailing grief, and flung it heavily upon the ground again.\n\n'At this time of the rolling year,' the spectre said, 'I suffer most.\nWhy did I walk through crowds of fellow-beings with my eyes turned down,\nand never raise them to that blessed Star which led the Wise Men to a\npoor abode? Were there no poor homes to which its light would have\nconducted _me_?'\n\nScrooge was very much dismayed to hear the spectre going on at this\nrate, and began to quake exceedingly.\n\n'Hear me!' cried the Ghost. 'My time is nearly gone.'\n\n'I will,' said Scrooge. 'But don't be hard upon me! Don't be flowery,\nJacob! Pray!'\n\n'How it is that I appear before you in a shape that you can see, I may\nnot tell. I have sat invisible beside you many and many a day.'\n\nIt was not an agreeable idea. Scrooge shivered, and wiped the\nperspiration from his brow.\n\n'That is no light part of my penance,' pursued the Ghost. 'I am here\nto-night to warn you that you have yet a chance and hope of escaping my\nfate. A chance and hope of my procuring, Ebenezer.'\n\n'You were always a good friend to me,' said Scrooge. 'Thankee!'\n\n'You will be haunted,' resumed the Ghost, 'by Three Spirits.'\n\nScrooge's countenance fell almost as low as the Ghost's had done.\n\n'Is that the chance and hope you mentioned, Jacob?' he demanded in a\nfaltering voice.\n\n'It is.'\n\n'I--I think I'd rather not,' said Scrooge.\n\n'Without their visits,' said the Ghost, 'you cannot hope to shun the\npath I tread. Expect the first to-morrow when the bell tolls One.'\n\n'Couldn't I take 'em all at once, and have it over, Jacob?' hinted\nScrooge.\n\n'Expect the second on the next night at the same hour. The third, upon\nthe next night when the last stroke of Twelve has ceased to vibrate.\nLook to see me no more; and look that, for your own sake, you remember\nwhat has passed between us!'\n\nWhen it had said these words, the spectre took its wrapper from the\ntable, and bound it round its head as before. Scrooge knew this by the\nsmart sound its teeth made when the jaws were brought together by the\nbandage. He ventured to raise his eyes again, and found his supernatural\nvisitor confronting him in an erect attitude, with its chain wound over\nand about its arm.\n\n[Illustration: _The air was filled with phantoms, wandering hither and\nthither in restless haste and moaning as they went_]\n\nThe apparition walked backward from him; and, at every step it took, the\nwindow raised itself a little, so that, when the spectre reached it, it\nwas wide open. It beckoned Scrooge to approach, which he did. When they\nwere within two paces of each other, Marley's Ghost held up its hand,\nwarning him to come no nearer. Scrooge stopped.\n\nNot so much in obedience as in surprise and fear; for, on the raising of\nthe hand, he became sensible of confused noises in the air; incoherent\nsounds of lamentation and regret; wailings inexpressibly sorrowful and\nself-accusatory. The spectre, after listening for a moment, joined in\nthe mournful dirge; and floated out upon the bleak, dark night.\n\nScrooge followed to the window: desperate in his curiosity. He looked\nout.\n\nThe air was filled with phantoms, wandering hither and thither in\nrestless haste, and moaning as they went. Every one of them wore chains\nlike Marley's Ghost; some few (they might be guilty governments) were\nlinked together; none were free. Many had been personally known to\nScrooge in their lives. He had been quite familiar with one old ghost in\na white waistcoat, with a monstrous iron safe attached to its ankle, who\ncried piteously at being unable to assist a wretched woman with an\ninfant, whom it saw below upon a doorstep. The misery with them all was\nclearly, that they sought to interfere, for good, in human matters, and\nhad lost the power for ever.\n\nWhether these creatures faded into mist, or mist enshrouded them, he\ncould not tell. But they and their spirit voices faded together; and\nthe night became as it had been when he walked home.\n\nScrooge closed the window, and examined the door by which the Ghost had\nentered. It was double locked, as he had locked it with his own hands,\nand the bolts were undisturbed. He tried to say 'Humbug!' but stopped at\nthe first syllable. And being, from the emotions he had undergone, or\nthe fatigues of the day, or his glimpse of the Invisible World, or the\ndull conversation of the Ghost, or the lateness of the hour, much in\nneed of repose, went straight to bed without undressing, and fell asleep\nupon the instant.\n\n[Illustration]\n\n\nSTAVE TWO\n\n[Illustration]\n\n\n\n\nTHE FIRST OF THE THREE SPIRITS\n\n\nWhen Scrooge awoke it was so dark, that, looking out of bed, he could\nscarcely distinguish the transparent window from the opaque walls of his\nchamber. He was endeavouring to pierce the darkness with his ferret\neyes, when the chimes of a neighbouring church struck the four quarters.\nSo he listened for the hour.\n\nTo his great astonishment, the heavy bell went on from six to seven, and\nfrom seven to eight, and regularly up to twelve; then stopped. Twelve!\nIt was past two when he went to bed. The clock was wrong. An icicle must\nhave got into the works. Twelve!\n\nHe touched the spring of his repeater, to correct this most preposterous\nclock. Its rapid little pulse beat twelve, and stopped.\n\n'Why, it isn't possible,' said Scrooge, 'that I can have slept through a\nwhole day and far into another night. It isn't possible that anything\nhas happened to the sun, and this is twelve at noon!'\n\nThe idea being an alarming one, he scrambled out of bed, and groped his\nway to the window. He was obliged to rub the frost off with the sleeve\nof his dressing-gown before he could see anything; and could see very\nlittle then. All he could make out was, that it was still very foggy and\nextremely cold, and that there was no noise of people running to and\nfro, and making a great stir, as there unquestionably would have been if\nnight had beaten off bright day, and taken possession of the world. This\nwas a great relief, because 'Three days after sight of this First of\nExchange pay to Mr. Ebenezer Scrooge or his order,' and so forth, would\nhave become a mere United States security if there were no days to count\nby.\n\nScrooge went to bed again, and thought, and thought, and thought it over\nand over, and could make nothing of it. The more he thought, the more\nperplexed he was; and, the more he endeavoured not to think, the more he\nthought.\n\nMarley's Ghost bothered him exceedingly. Every time he resolved within\nhimself, after mature inquiry that it was all a dream, his mind flew\nback again, like a strong spring released, to its first position, and\npresented the same problem to be worked all through, 'Was it a dream or\nnot?'\n\nScrooge lay in this state until the chime had gone three-quarters more,\nwhen he remembered, on a sudden, that the Ghost had warned him of a\nvisitation when the bell tolled one. He resolved to lie awake until the\nhour was passed; and, considering that he could no more go to sleep than\ngo to heaven, this was, perhaps, the wisest resolution in his power.\n\nThe quarter was so long, that he was more than once convinced he must\nhave sunk into a doze unconsciously, and missed the clock. At length it\nbroke upon his listening ear.\n\n'Ding, dong!'\n\n'A quarter past,' said Scrooge, counting.\n\n'Ding, dong!'\n\n'Half past,' said Scrooge.\n\n'Ding, dong!'\n\n'A quarter to it.' said Scrooge.\n\n'Ding, dong!'\n\n'The hour itself,' said Scrooge triumphantly, 'and nothing else!'\n\nHe spoke before the hour bell sounded, which it now did with a deep,\ndull, hollow, melancholy ONE. Light flashed up in the room upon the\ninstant, and the curtains of his bed were drawn.\n\nThe curtains of his bed were drawn aside, I tell you, by a hand. Not\nthe curtains at his feet, nor the curtains at his back, but those to\nwhich his face was addressed. The curtains of his bed were drawn aside;\nand Scrooge, starting up into a half-recumbent attitude, found himself\nface to face with the unearthly visitor who drew them: as close to it as\nI am now to you, and I am standing in the spirit at your elbow.\n\nIt was a strange figure--like a child; yet not so like a child as like\nan old man, viewed through some supernatural medium, which gave him the\nappearance of having receded from the view, and being diminished to a\nchild's proportions. Its hair, which hung about its neck and down its\nback, was white, as if with age; and yet the face had not a wrinkle in\nit, and the tenderest bloom was on the skin. The arms were very long and\nmuscular; the hands the same, as if its hold were of uncommon strength.\nIts legs and feet, most delicately formed, were, like those upper\nmembers, bare. It wore a tunic of the purest white; and round its waist\nwas bound a lustrous belt, the sheen of which was beautiful. It held a\nbranch of fresh green holly in its hand; and, in singular contradiction\nof that wintry emblem, had its dress trimmed with summer flowers. But\nthe strangest thing about it was, that from the crown of its head there\nsprang a bright clear jet of light, by which all this was visible; and\nwhich was doubtless the occasion of its using, in its duller moments, a\ngreat extinguisher for a cap, which it now held under its arm.\n\nEven this, though, when Scrooge looked at it with increasing steadiness,\nwas _not_ its strangest quality. For, as its belt sparkled and\nglittered, now in one part and now in another, and what was light one\ninstant at another time was dark, so the figure itself fluctuated in its\ndistinctness; being now a thing with one arm, now with one leg, now with\ntwenty legs, now a pair of legs without a head, now a head without a\nbody: of which dissolving parts no outline would be visible in the dense\ngloom wherein they melted away. And, in the very wonder of this, it\nwould be itself again; distinct and clear as ever.\n\n'Are you the Spirit, sir, whose coming was foretold to me?' asked\nScrooge.\n\n'I am!'\n\nThe voice was soft and gentle. Singularly low, as if, instead of being\nso close behind him, it were at a distance.\n\n'Who and what are you?' Scrooge demanded.\n\n'I am the Ghost of Christmas Past.'\n\n'Long Past?' inquired Scrooge, observant of its dwarfish stature.\n\n'No. Your past.'\n\nPerhaps Scrooge could not have told anybody why, if anybody could have\nasked him; but he had a special desire to see the Spirit in his cap,\nand begged him to be covered.\n\n'What!' exclaimed the Ghost, 'would you so soon put out, with worldly\nhands, the light I give? Is it not enough that you are one of those\nwhose passions made this cap, and force me through whole trains of years\nto wear it low upon my brow?'\n\nScrooge reverently disclaimed all intention to offend or any knowledge\nof having wilfully 'bonneted' the Spirit at any period of his life. He\nthen made bold to inquire what business brought him there.\n\n'Your welfare!' said the Ghost.\n\nScrooge expressed himself much obliged, but could not help thinking that\na night of unbroken rest would have been more conducive to that end. The\nSpirit must have heard him thinking, for it said immediately--\n\n'Your reclamation, then. Take heed!'\n\nIt put out its strong hand as it spoke, and clasped him gently by the\narm.\n\n'Rise! and walk with me!'\n\nIt would have been in vain for Scrooge to plead that the weather and the\nhour were not adapted to pedestrian purposes; that bed was warm, and the\nthermometer a long way below freezing; that he was clad but lightly in\nhis slippers, dressing-gown, and nightcap; and that he had a cold upon\nhim at that time. The grasp, though gentle as a woman's hand, was not\nto be resisted. He rose; but, finding that the Spirit made towards the\nwindow, clasped its robe in supplication.\n\n'I am a mortal,' Scrooge remonstrated, 'and liable to fall.'\n\n'Bear but a touch of my hand _there_,' said the Spirit, laying it upon\nhis heart, 'and you shall be upheld in more than this!'\n\nAs the words were spoken, they passed through the wall, and stood upon\nan open country road, with fields on either hand. The city had entirely\nvanished. Not a vestige of it was to be seen. The darkness and the mist\nhad vanished with it, for it was a clear, cold, winter day, with snow\nupon the ground.\n\n'Good Heaven!' said Scrooge, clasping his hands together, as he looked\nabout him. 'I was bred in this place. I was a boy here!'\n\nThe Spirit gazed upon him mildly. Its gentle touch, though it had been\nlight and instantaneous, appeared still present to the old man's sense\nof feeling. He was conscious of a thousand odours floating in the air,\neach one connected with a thousand thoughts, and hopes, and joys, and\ncares long, long forgotten!\n\n'Your lip is trembling,' said the Ghost. 'And what is that upon your\ncheek?'\n\nScrooge muttered, with an unusual catching in his voice, that it was a\npimple; and begged the Ghost to lead him where he would.\n\n'You recollect the way?' inquired the Spirit.\n\n'Remember it!' cried Scrooge with fervour; 'I could walk it blindfold.'\n\n'Strange to have forgotten it for so many years!' observed the Ghost.\n'Let us go on.'\n\nThey walked along the road, Scrooge recognising every gate, and post,\nand tree, until a little market-town appeared in the distance, with its\nbridge, its church, and winding river. Some shaggy ponies now were seen\ntrotting towards them with boys upon their backs, who called to other\nboys in country gigs and carts, driven by farmers. All these boys were\nin great spirits, and shouted to each other, until the broad fields were\nso full of merry music, that the crisp air laughed to hear it.\n\n'These are but shadows of the things that have been,' said the Ghost.\n'They have no consciousness of us.'\n\nThe jocund travellers came on; and as they came, Scrooge knew and named\nthem every one. Why was he rejoiced beyond all bounds to see them? Why\ndid his cold eye glisten, and his heart leap up as they went past? Why\nwas he filled with gladness when he heard them give each other Merry\nChristmas, as they parted at cross-roads and by-ways for their several\nhomes? What was merry Christmas to Scrooge? Out upon merry Christmas!\nWhat good had it ever done to him?\n\n'The school is not quite deserted,' said the Ghost. 'A solitary child,\nneglected by his friends, is left there still.'\n\nScrooge said he knew it. And he sobbed.\n\nThey left the high-road by a well-remembered lane and soon approached a\nmansion of dull red brick, with a little weather-cock surmounted cupola\non the roof, and a bell hanging in it. It was a large house, but one of\nbroken fortunes; for the spacious offices were little used, their walls\nwere damp and mossy, their windows broken, and their gates decayed.\nFowls clucked and strutted in the stables; and the coach-houses and\nsheds were overrun with grass. Nor was it more retentive of its ancient\nstate within; for, entering the dreary hall, and glancing through the\nopen doors of many rooms, they found them poorly furnished, cold, and\nvast. There was an earthy savour in the air, a chilly bareness in the\nplace, which associated itself somehow with too much getting up by\ncandle light and not too much to eat.\n\nThey went, the Ghost and Scrooge, across the hall, to a door at the back\nof the house. It opened before them, and disclosed a long, bare,\nmelancholy room, made barer still by lines of plain deal forms and\ndesks. At one of these a lonely boy was reading near a feeble fire; and\nScrooge sat down upon a form, and wept to see his poor forgotten self as\nhe had used to be.\n\nNot a latent echo in the house, not a squeak and scuffle from the mice\nbehind the panelling, not a drip from the half-thawed waterspout in the\ndull yard behind, not a sigh among the leafless boughs of one despondent\npoplar, not the idle swinging of an empty storehouse door, no, not a\nclicking in the fire, but fell upon the heart of Scrooge with softening\ninfluence, and gave a freer passage to his tears.\n\nThe Spirit touched him on the arm, and pointed to his younger self,\nintent upon his reading. Suddenly a man in foreign garments, wonderfully\nreal and distinct to look at, stood outside the window, with an axe\nstuck in his belt, and leading by the bridle an ass laden with wood.\n\n'Why, it's Ali Baba!' Scrooge exclaimed in ecstasy. 'It's dear old\nhonest Ali Baba! Yes, yes, I know. One Christmas-time, when yonder\nsolitary child was left here all alone, he _did_ come, for the first\ntime, just like that. Poor boy! And Valentine,' said Scrooge, 'and his\nwild brother, Orson; there they go! And what's his name, who was put\ndown in his drawers, asleep, at the gate of Damascus; don't you see him?\nAnd the Sultan's Groom turned upside down by the Genii; there he is upon\nhis head! Serve him right! I'm glad of it. What business had he to be\nmarried to the Princess?'\n\nTo hear Scrooge expending all the earnestness of his nature on such\nsubjects, in a most extraordinary voice between laughing and crying; and\nto see his heightened and excited face; would have been a surprise to\nhis business friends in the City, indeed.\n\n'There's the Parrot!' cried Scrooge. 'Green body and yellow tail, with a\nthing like a lettuce growing out of the top of his head; there he is!\nPoor Robin Crusoe he called him, when he came home again after sailing\nround the island. \"Poor Robin Crusoe, where have you been, Robin\nCrusoe?\" The man thought he was dreaming, but he wasn't. It was the\nParrot, you know. There goes Friday, running for his life to the little\ncreek! Halloa! Hoop! Halloo!'\n\nThen, with a rapidity of transition very foreign to his usual character,\nhe said, in pity for his former self, 'Poor boy!' and cried again.\n\n'I wish,' Scrooge muttered, putting his hand in his pocket, and looking\nabout him, after drying his eyes with his cuff; 'but it's too late now.'\n\n'What is the matter?' asked the Spirit.\n\n'Nothing,' said Scrooge. 'Nothing. There was a boy singing a Christmas\ncarol at my door last night. I should like to have given him something:\nthat's all.'\n\nThe Ghost smiled thoughtfully, and waved its hand, saying as it did so,\n'Let us see another Christmas!'\n\nScrooge's former self grew larger at the words, and the room became a\nlittle darker and more dirty. The panels shrunk, the windows cracked;\nfragments of plaster fell out of the ceiling, and the naked laths were\nshown instead; but how all this was brought about Scrooge knew no more\nthan you do. He only knew that it was quite correct; that everything had\nhappened so; that there he was, alone again, when all the other boys had\ngone home for the jolly holidays.\n\nHe was not reading now, but walking up and down despairingly. Scrooge\nlooked at the Ghost, and, with a mournful shaking of his head, glanced\nanxiously towards the door.\n\nIt opened; and a little girl, much younger than the boy, came darting\nin, and, putting her arms about his neck, and often kissing him,\naddressed him as her 'dear, dear brother.'\n\n'I have come to bring you home, dear brother!' said the child, clapping\nher tiny hands, and bending down to laugh. 'To bring you home, home,\nhome!'\n\n'Home, little Fan?' returned the boy.\n\n'Yes!' said the child, brimful of glee. 'Home for good and all. Home for\never and ever. Father is so much kinder than he used to be, that home's\nlike heaven! He spoke so gently to me one dear night when I was going to\nbed, that I was not afraid to ask him once more if you might come home;\nand he said Yes, you should; and sent me in a coach to bring you. And\nyou're to be a man!' said the child, opening her eyes; 'and are never to\ncome back here; but first we're to be together all the Christmas long,\nand have the merriest time in all the world.'\n\n'You are quite a woman, little Fan!' exclaimed the boy.\n\nShe clapped her hands and laughed, and tried to touch his head; but,\nbeing too little laughed again, and stood on tiptoe to embrace him. Then\nshe began to drag him, in her childish eagerness, towards the door; and\nhe, nothing loath to go, accompanied her.\n\nA terrible voice in the hall cried, 'Bring down Master Scrooge's box,\nthere!' and in the hall appeared the schoolmaster himself, who glared on\nMaster Scrooge with a ferocious condescension, and threw him into a\ndreadful state of mind by shaking hands with him. He then conveyed him\nand his sister into the veriest old well of a shivering best parlour\nthat ever was seen, where the maps upon the wall, and the celestial and\nterrestrial globes in the windows, were waxy with cold. Here he produced\na decanter of curiously light wine, and a block of curiously heavy cake,\nand administered instalments of those dainties to the young people; at\nthe same time sending out a meagre servant to offer a glass of\n'something' to the postboy, who answered that he thanked the gentleman,\nbut, if it was the same tap as he had tasted before, he had rather not.\nMaster Scrooge's trunk being by this time tied on to the top of the\nchaise, the children bade the schoolmaster good-bye right willingly;\nand, getting into it, drove gaily down the garden sweep; the quick\nwheels dashing the hoar-frost and snow from off the dark leaves of the\nevergreens like spray.\n\n[Illustration: HE PRODUCED A DECANTER OF CURIOUSLY LIGHT WINE, AND A\nBLOCK OF CURIOUSLY HEAVY CAKE]\n\n'Always a delicate creature, whom a breath might have withered,' said\nthe Ghost. 'But she had a large heart!'\n\n'So she had,' cried Scrooge. 'You're right. I will not gainsay it,\nSpirit. God forbid!'\n\n'She died a woman,' said the Ghost, 'and had, as I think, children.'\n\n'One child,' Scrooge returned.\n\n'True,' said the Ghost. 'Your nephew!'\n\nScrooge seemed uneasy in his mind, and answered briefly, 'Yes.'\n\nAlthough they had but that moment left the school behind them, they were\nnow in the busy thoroughfares of a city, where shadowy passengers passed\nand re-passed; where shadowy carts and coaches battled for the way, and\nall the strife and tumult of a real city were. It was made plain enough,\nby the dressing of the shops, that here, too, it was Christmas-time\nagain; but it was evening, and the streets were lighted up.\n\nThe Ghost stopped at a certain warehouse door, and asked Scrooge if he\nknew it.\n\n'Know it!' said Scrooge. 'Was I apprenticed here?'\n\nThey went in. At sight of an old gentleman in a Welsh wig, sitting\nbehind such a high desk, that if he had been two inches taller, he must\nhave knocked his head against the ceiling, Scrooge cried in great\nexcitement--\n\n'Why, it's old Fezziwig! Bless his heart, it's Fezziwig alive again!'\n\nOld Fezziwig laid down his pen, and looked up at the clock, which\npointed to the hour of seven. He rubbed his hands; adjusted his\ncapacious waistcoat; laughed all over himself, from his shoes to his\norgan of benevolence; and called out, in a comfortable, oily, rich, fat,\njovial voice--\n\n'Yo ho, there! Ebenezer! Dick!'\n\nScrooge's former self, now grown a young man, came briskly in,\naccompanied by his fellow-'prentice.\n\n'Dick Wilkins, to be sure!' said Scrooge to the Ghost. 'Bless me, yes.\nThere he is. He was very much attached to me, was Dick. Poor Dick! Dear,\ndear!'\n\n'Yo ho, my boys!' said Fezziwig. 'No more work to-night. Christmas Eve,\nDick. Christmas, Ebenezer! Let's have the shutters up,' cried old\nFezziwig, with a sharp clap of his hands, 'before a man can say Jack\nRobinson!'\n\nYou wouldn't believe how those two fellows went at it! They charged into\nthe street with the shutters--one, two, three--had 'em up in their\nplaces--four, five, six--barred 'em and pinned 'em--seven, eight,\nnine--and came back before you could have got to twelve, panting like\nracehorses.\n\n'Hilli-ho!' cried old Fezziwig, skipping down from the high desk with\nwonderful agility. 'Clear away, my lads, and let's have lots of room\nhere! Hilli-ho, Dick! Chirrup, Ebenezer!'\n\nClear away! There was nothing they wouldn't have cleared away, or\ncouldn't have cleared away, with old Fezziwig looking on. It was done in\na minute. Every movable was packed off, as if it were dismissed from\npublic life for evermore; the floor was swept and watered, the lamps\nwere trimmed, fuel was heaped upon the fire; and the warehouse was as\nsnug, and warm, and dry, and bright a ball-room as you would desire to\nsee upon a winter's night.\n\nIn came a fiddler with a music-book, and went up to the lofty desk, and\nmade an orchestra of it, and tuned like fifty stomach-aches. In came\nMrs. Fezziwig, one vast substantial smile. In came the three Miss\nFezziwigs, beaming and lovable. In came the six young followers whose\nhearts they broke. In came all the young men and women employed in the\nbusiness. In came the housemaid, with her cousin the baker. In came the\ncook with her brother's particular friend the milkman. In came the boy\nfrom over the way, who was suspected of not having board enough from his\nmaster; trying to hide himself behind the girl from next door but one,\nwho was proved to have had her ears pulled by her mistress. In they all\ncame, one after another; some shyly, some boldly, some gracefully, some\nawkwardly, some pushing, some pulling; in they all came, any how and\nevery how. Away they all went, twenty couple at once; hands half round\nand back again the other way; down the middle and up again; round and\nround in various stages of affectionate grouping; old top couple always\nturning up in the wrong place; new top couple starting off again as soon\nas they got there; all top couples at last, and not a bottom one to help\nthem! When this result was brought about, old Fezziwig, clapping his\nhands to stop the dance, cried out, 'Well done!' and the fiddler plunged\nhis hot face into a pot of porter, especially provided for that purpose.\nBut, scorning rest upon his reappearance, he instantly began again,\nthough there were no dancers yet, as if the other fiddler had been\ncarried home, exhausted, on a shutter, and he were a bran-new man\nresolved to beat him out of sight, or perish.\n\n[Illustration: _Then old Fezziwig stood out to dance with Mrs.\nFezziwig_]\n\nThere were more dances, and there were forfeits, and more dances, and\nthere was cake, and there was negus, and there was a great piece of Cold\nRoast, and there was a great piece of Cold Boiled, and there were\nmince-pies, and plenty of beer. But the great effect of the evening came\nafter the Roast and Boiled, when the fiddler (an artful dog, mind! The\nsort of man who knew his business better than you or I could have told\nit him!) struck up 'Sir Roger de Coverley.' Then old Fezziwig stood\nout to dance with Mrs. Fezziwig. Top couple, too; with a good stiff\npiece of work cut out for them; three or four and twenty pair of\npartners; people who were not to be trifled with; people who would\ndance, and had no notion of walking.\n\nBut if they had been twice as many--ah! four times--old Fezziwig would\nhave been a match for them, and so would Mrs. Fezziwig. As to _her_, she\nwas worthy to be his partner in every sense of the term. If that's not\nhigh praise, tell me higher, and I'll use it. A positive light appeared\nto issue from Fezziwig's calves. They shone in every part of the dance\nlike moons. You couldn't have predicted, at any given time, what would\nbecome of them next. And when old Fezziwig and Mrs. Fezziwig had gone\nall through the dance; advance and retire, both hands to your partner,\nbow and curtsy, cork-screw, thread-the-needle, and back again to your\nplace: Fezziwig 'cut'--cut so deftly, that he appeared to wink with his\nlegs, and came upon his feet again without a stagger.\n\nWhen the clock struck eleven, this domestic ball broke up. Mr. and Mrs.\nFezziwig took their stations, one on either side the door, and, shaking\nhands with every person individually as he or she went out, wished him\nor her a Merry Christmas. When everybody had retired but the two\n'prentices, they did the same to them; and thus the cheerful voices died\naway, and the lads were left to their beds; which were under a counter\nin the back-shop.\n\nDuring the whole of this time Scrooge had acted like a man out of his\nwits. His heart and soul were in the scene, and with his former self. He\ncorroborated everything, remembered everything, enjoyed everything, and\nunderwent the strangest agitation. It was not until now, when the bright\nfaces of his former self and Dick were turned from them, that he\nremembered the Ghost, and became conscious that it was looking full upon\nhim, while the light upon its head burnt very clear.\n\n'A small matter,' said the Ghost, 'to make these silly folks so full of\ngratitude.'\n\n'Small!' echoed Scrooge.\n\nThe Spirit signed to him to listen to the two apprentices, who were\npouring out their hearts in praise of Fezziwig; and when he had done so,\nsaid:\n\n'Why! Is it not? He has spent but a few pounds of your mortal money:\nthree or four, perhaps. Is that so much that he deserves this praise?'\n\n'It isn't that,' said Scrooge, heated by the remark, and speaking\nunconsciously like his former, not his latter self. 'It isn't that,\nSpirit. He has the power to render us happy or unhappy; to make our\nservice light or burdensome; a pleasure or a toil. Say that his power\nlies in words and looks; in things so slight and insignificant that it\nis impossible to add and count 'em up: what then? The happiness he gives\nis quite as great as if it cost a fortune.'\n\nHe felt the Spirit's glance, and stopped.\n\n'What is the matter?' asked the Ghost.\n\n'Nothing particular,' said Scrooge.\n\n'Something, I think?' the Ghost insisted.\n\n'No,' said Scrooge, 'no. I should like to be able to say a word or two\nto my clerk just now. That's all.'\n\nHis former self turned down the lamps as he gave utterance to the wish;\nand Scrooge and the Ghost again stood side by side in the open air.\n\n'My time grows short,' observed the Spirit. 'Quick!'\n\nThis was not addressed to Scrooge, or to any one whom he could see, but\nit produced an immediate effect. For again Scrooge saw himself. He was\nolder now; a man in the prime of life. His face had not the harsh and\nrigid lines of later years; but it had begun to wear the signs of care\nand avarice. There was an eager, greedy, restless motion in the eye,\nwhich showed the passion that had taken root, and where the shadow of\nthe growing tree would fall.\n\nHe was not alone, but sat by the side of a fair young girl in a mourning\ndress: in whose eyes there were tears, which sparkled in the light that\nshone out of the Ghost of Christmas Past.\n\n'It matters little,' she said softly. 'To you, very little. Another idol\nhas displaced me; and, if it can cheer and comfort you in time to come\nas I would have tried to do, I have no just cause to grieve.'\n\n'What Idol has displaced you?' he rejoined.\n\n'A golden one.'\n\n'This is the even-handed dealing of the world!' he said. 'There is\nnothing on which it is so hard as poverty; and there is nothing it\nprofesses to condemn with such severity as the pursuit of wealth!'\n\n'You fear the world too much,' she answered gently. 'All your other\nhopes have merged into the hope of being beyond the chance of its sordid\nreproach. I have seen your nobler aspirations fall off one by one, until\nthe master passion, Gain, engrosses you. Have I not?'\n\n'What then?' he retorted. 'Even if I have grown so much wiser, what\nthen? I am not changed towards you.'\n\nShe shook her head.\n\n'Am I?'\n\n'Our contract is an old one. It was made when we were both poor, and\ncontent to be so, until, in good season, we could improve our worldly\nfortune by our patient industry. You _are_ changed. When it was made you\nwere another man.'\n\n'I was a boy,' he said impatiently.\n\n'Your own feeling tells you that you were not what you are,' she\nreturned. 'I am. That which promised happiness when we were one in heart\nis fraught with misery now that we are two. How often and how keenly I\nhave thought of this I will not say. It is enough that I _have_ thought\nof it, and can release you.'\n\n'Have I ever sought release?'\n\n'In words. No. Never.'\n\n'In what, then?'\n\n'In a changed nature; in an altered spirit; in another atmosphere of\nlife; another Hope as its great end. In everything that made my love of\nany worth or value in your sight. If this had never been between us,'\nsaid the girl, looking mildly, but with steadiness, upon him; 'tell me,\nwould you seek me out and try to win me now? Ah, no!'\n\nHe seemed to yield to the justice of this supposition in spite of\nhimself. But he said, with a struggle, 'You think not.'\n\n'I would gladly think otherwise if I could,' she answered. 'Heaven\nknows! When _I_ have learned a Truth like this, I know how strong and\nirresistible it must be. But if you were free to-day, to-morrow,\nyesterday, can even I believe that you would choose a dowerless\ngirl--you who, in your very confidence with her, weigh everything by\nGain: or, choosing her, if for a moment you were false enough to your\none guiding principle to do so, do I not know that your repentance and\nregret would surely follow? I do; and I release you. With a full heart,\nfor the love of him you once were.'\n\n[Illustration: SHE LEFT HIM, AND THEY PARTED]\n\nHe was about to speak; but, with her head turned from him, she resumed:\n\n'You may--the memory of what is past half makes me hope you will--have\npain in this. A very, very brief time, and you will dismiss the\nrecollection of it gladly, as an unprofitable dream, from which it\nhappened well that you awoke. May you be happy in the life you have\nchosen!'\n\nShe left him, and they parted.\n\n'Spirit!' said Scrooge, 'show me no more! Conduct me home. Why do you\ndelight to torture me?'\n\n'One shadow more!' exclaimed the Ghost.\n\n'No more!' cried Scrooge. 'No more! I don't wish to see it. Show me no\nmore!'\n\nBut the relentless Ghost pinioned him in both his arms, and forced him\nto observe what happened next.\n\nThey were in another scene and place; a room, not very large or\nhandsome, but full of comfort. Near to the winter fire sat a beautiful\nyoung girl, so like that last that Scrooge believed it was the same,\nuntil he saw _her_, now a comely matron, sitting opposite her daughter.\nThe noise in this room was perfectly tumultuous, for there were more\nchildren there than Scrooge in his agitated state of mind could count;\nand, unlike the celebrated herd in the poem, they were not forty\nchildren conducting themselves like one, but every child was conducting\nitself like forty. The consequences were uproarious beyond belief; but\nno one seemed to care; on the contrary, the mother and daughter laughed\nheartily, and enjoyed it very much; and the latter, soon beginning to\nmingle in the sports, got pillaged by the young brigands most\nruthlessly. What would I not have given to be one of them! Though I\nnever could have been so rude, no, no! I wouldn't for the wealth of all\nthe world have crushed that braided hair, and torn it down; and for the\nprecious little shoe, I wouldn't have plucked it off, God bless my soul!\nto save my life. As to measuring her waist in sport, as they did, bold\nyoung brood, I couldn't have done it; I should have expected my arm to\nhave grown round it for a punishment, and never come straight again. And\nyet I should have dearly liked, I own, to have touched her lips; to have\nquestioned her, that she might have opened them; to have looked upon the\nlashes of her downcast eyes, and never raised a blush; to have let loose\nwaves of hair, an inch of which would be a keepsake beyond price: in\nshort, I should have liked, I do confess, to have had the lightest\nlicense of a child, and yet to have been man enough to know its value.\n\n[Illustration: _A flushed and boisterous group_]\n\nBut now a knocking at the door was heard, and such a rush immediately\nensued that she, with laughing face and plundered dress, was borne\ntowards it the centre of a flushed and boisterous group, just in time to\ngreet the father, who came home attended by a man laden with Christmas\ntoys and presents. Then the shouting and the struggling, and the\nonslaught that was made on the defenceless porter! The scaling him, with\nchairs for ladders, to dive into his pockets, despoil him of\nbrown-paper parcels, hold on tight by his cravat, hug him round his\nneck, pummel his back, and kick his legs in irrepressible affection! The\nshouts of wonder and delight with which the development of every package\nwas received! The terrible announcement that the baby had been taken in\nthe act of putting a doll's frying pan into his mouth, and was more than\nsuspected of having swallowed a fictitious turkey, glued on a wooden\nplatter! The immense relief of finding this a false alarm! The joy, and\ngratitude, and ecstasy! They are all indescribable alike. It is enough\nthat, by degrees, the children and their emotions got out of the\nparlour, and, by one stair at a time, up to the top of the house, where\nthey went to bed, and so subsided.\n\nAnd now Scrooge looked on more attentively than ever, when the master of\nthe house, having his daughter leaning fondly on him, sat down with her\nand her mother at his own fireside; and when he thought that such\nanother creature, quite as graceful and as full of promise, might have\ncalled him father, and been a spring-time in the haggard winter of his\nlife, his sight grew very dim indeed.\n\n'Belle,' said the husband, turning to his wife with a smile, 'I saw an\nold friend of yours this afternoon.'\n\n'Who was it?'\n\n'Guess!'\n\n'How can I? Tut, don't I know?' she added in the same breath, laughing\nas he laughed. 'Mr. Scrooge.'\n\n'Mr. Scrooge it was. I passed his office window; and as it was not shut\nup, and he had a candle inside, I could scarcely help seeing him. His\npartner lies upon the point of death, I hear; and there he sat alone.\nQuite alone in the world, I do believe.'\n\n'Spirit!' said Scrooge in a broken voice, 'remove me from this place.'\n\n'I told you these were shadows of the things that have been,' said the\nGhost. 'That they are what they are do not blame me!'\n\n'Remove me!' Scrooge exclaimed, 'I cannot bear it!'\n\nHe turned upon the Ghost, and seeing that it looked upon him with a\nface, in which in some strange way there were fragments of all the faces\nit had shown him, wrestled with it.\n\n'Leave me! Take me back. Haunt me no longer!'\n\nIn the struggle, if that can be called a struggle in which the Ghost\nwith no visible resistance on its own part was undisturbed by any effort\nof its adversary, Scrooge observed that its light was burning high and\nbright; and dimly connecting that with its influence over him, he seized\nthe extinguisher-cap, and by a sudden action pressed it down upon its\nhead.\n\n[Illustration: _Laden with Christmas toys and presents_]\n\nThe Spirit dropped beneath it, so that the extinguisher covered its\nwhole form; but though Scrooge pressed it down with all his force, he\ncould not hide the light, which streamed from under it, in an unbroken\nflood upon the ground.\n\nHe was conscious of being exhausted, and overcome by an irresistible\ndrowsiness; and, further, of being in his own bedroom. He gave the cap a\nparting squeeze, in which his hand relaxed; and had barely time to reel\nto bed, before he sank into a heavy sleep.\n\n[Illustration]\n\n\nSTAVE THREE\n\n\n[Illustration]\n\n\n\n\nTHE SECOND OF THE THREE SPIRITS\n\n\nAwaking in the middle of a prodigiously tough snore, and sitting up in\nbed to get his thoughts together, Scrooge had no occasion to be told\nthat the bell was again upon the stroke of One. He felt that he was\nrestored to consciousness in the right nick of time, for the especial\npurpose of holding a conference with the second messenger despatched to\nhim through Jacob Marley's intervention. But finding that he turned\nuncomfortably cold when he began to wonder which of his curtains this\nnew spectre would draw back, he put them every one aside with his own\nhands, and, lying down again, established a sharp look-out all round the\nbed. For he wished to challenge the Spirit on the moment of its\nappearance, and did not wish to be taken by surprise and made nervous.\n\nGentlemen of the free-and-easy sort, who plume themselves on being\nacquainted with a move or two, and being usually equal to the time of\nday, express the wide range of their capacity for adventure by observing\nthat they are good for anything from pitch-and-toss to manslaughter;\nbetween which opposite extremes, no doubt, there lies a tolerably wide\nand comprehensive range of subjects. Without venturing for Scrooge quite\nas hardily as this, I don't mind calling on you to believe that he was\nready for a good broad field of strange appearances, and that nothing\nbetween a baby and a rhinoceros would have astonished him very much.\n\nNow, being prepared for almost anything, he was not by any means\nprepared for nothing; and consequently, when the bell struck One, and no\nshape appeared, he was taken with a violent fit of trembling. Five\nminutes, ten minutes, a quarter of an hour went by, yet nothing came.\nAll this time he lay upon his bed, the very core and centre of a blaze\nof ruddy light, which streamed upon it when the clock proclaimed the\nhour; and which, being only light, was more alarming than a dozen\nghosts, as he was powerless to make out what it meant, or would be at;\nand was sometimes apprehensive that he might be at that very moment an\ninteresting case of spontaneous combustion, without having the\nconsolation of knowing it. At last, however, he began to think--as you\nor I would have thought at first; for it is always the person not in the\npredicament who knows what ought to have been done in it, and would\nunquestionably have done it too--at last, I say, he began to think that\nthe source and secret of this ghostly light might be in the adjoining\nroom, from whence, on further tracing it, it seemed to shine. This idea\ntaking full possession of his mind, he got up softly, and shuffled in\nhis slippers to the door.\n\nThe moment Scrooge's hand was on the lock a strange voice called him by\nhis name, and bade him enter. He obeyed.\n\nIt was his own room. There was no doubt about that. But it had undergone\na surprising transformation. The walls and ceiling were so hung with\nliving green, that it looked a perfect grove; from every part of which\nbright gleaming berries glistened. The crisp leaves of holly, mistletoe,\nand ivy reflected back the light, as if so many little mirrors had been\nscattered there; and such a mighty blaze went roaring up the chimney as\nthat dull petrification of a hearth had never known in Scrooge's time,\nor Marley's, or for many and many a winter season gone. Heaped up on the\nfloor, to form a kind of throne, were turkeys, geese, game, poultry,\nbrawn, great joints of meat, sucking-pigs, long wreaths of sausages,\nmince-pies, plum-puddings, barrels of oysters, red-hot chestnuts,\ncherry-cheeked apples, juicy oranges, luscious pears, immense\ntwelfth-cakes, and seething bowls of punch, that made the chamber dim\nwith their delicious steam. In easy state upon this couch there sat a\njolly Giant, glorious to see; who bore a glowing torch, in shape not\nunlike Plenty's horn, and held it up, high up, to shed its light on\nScrooge as he came peeping round the door.\n\n'Come in!' exclaimed the Ghost. 'Come in! and know me better, man!'\n\nScrooge entered timidly, and hung his head before this Spirit. He was\nnot the dogged Scrooge he had been; and though the Spirit's eyes were\nclear and kind, he did not like to meet them.\n\n'I am the Ghost of Christmas Present,' said the Spirit. 'Look upon me!'\n\nScrooge reverently did so. It was clothed in one simple deep green robe,\nor mantle, bordered with white fur. This garment hung so loosely on the\nfigure, that its capacious breast was bare, as if disdaining to be\nwarded or concealed by any artifice. Its feet, observable beneath the\nample folds of the garment, were also bare; and on its head it wore no\nother covering than a holly wreath, set here and there with shining\nicicles. Its dark-brown curls were long and free; free as its genial\nface, its sparkling eye, its open hand, its cheery voice, its\nunconstrained demeanour, and its joyful air. Girded round its middle was\nan antique scabbard: but no sword was in it, and the ancient sheath was\neaten up with rust.\n\n'You have never seen the like of me before!' exclaimed the Spirit.\n\n'Never,' Scrooge made answer to it.\n\n'Have never walked forth with the younger members of my family; meaning\n(for I am very young) my elder brothers born in these later years?'\npursued the Phantom.\n\n'I don't think I have,' said Scrooge. 'I am afraid I have not. Have you\nhad many brothers, Spirit?'\n\n'More than eighteen hundred,' said the Ghost.\n\n'A tremendous family to provide for,' muttered Scrooge.\n\nThe Ghost of Christmas Present rose.\n\n'Spirit,' said Scrooge submissively, 'conduct me where you will. I went\nforth last night on compulsion, and I learned a lesson which is working\nnow. To-night if you have aught to teach me, let me profit by it.'\n\n'Touch my robe!'\n\nScrooge did as he was told, and held it fast.\n\nHolly, mistletoe, red berries, ivy, turkeys, geese, game, poultry,\nbrawn, meat, pigs, sausages, oysters, pies, puddings, fruit, and punch,\nall vanished instantly. So did the room, the fire, the ruddy glow, the\nhour of night, and they stood in the city streets on Christmas morning,\nwhere (for the weather was severe) the people made a rough, but brisk\nand not unpleasant kind of music, in scraping the snow from the pavement\nin front of their dwellings, and from the tops of their houses, whence\nit was mad delight to the boys to see it come plumping down into the\nroad below, and splitting into artificial little snowstorms.\n\nThe house-fronts looked black enough, and the windows blacker,\ncontrasting with the smooth white sheet of snow upon the roofs, and with\nthe dirtier snow upon the ground; which last deposit had been ploughed\nup in deep furrows by the heavy wheels of carts and waggons: furrows\nthat crossed and recrossed each other hundreds of times where the great\nstreets branched off; and made intricate channels, hard to trace in the\nthick yellow mud and icy water. The sky was gloomy, and the shortest\nstreets were choked up with a dingy mist, half thawed, half frozen,\nwhose heavier particles descended in a shower of sooty atoms, as if all\nthe chimneys in Great Britain had, by one consent, caught fire, and were\nblazing away to their dear heart's content. There was nothing very\ncheerful in the climate or the town, and yet was there an air of\ncheerfulness abroad that the clearest summer air and brightest summer\nsun might have endeavoured to diffuse in vain.\n\n[Illustration: THERE WAS NOTHING VERY CHEERFUL IN THE CLIMATE]\n\nFor the people who were shovelling away on the house-tops were jovial\nand full of glee; calling out to one another from the parapets, and now\nand then exchanging a facetious snowball--better-natured missile far\nthan many a wordy jest--laughing heartily if it went right, and not less\nheartily if it went wrong. The poulterers' shops were still half open,\nand the fruiterers' were radiant in their glory. There were great,\nround, pot-bellied baskets of chestnuts, shaped like the waistcoats of\njolly old gentlemen, lolling at the doors, and tumbling out into the\nstreet in their apoplectic opulence: There were ruddy, brown-faced,\nbroad-girthed Spanish onions, shining in the fatness of their growth\nlike Spanish friars, and winking from their shelves in wanton slyness at\nthe girls as they went by, and glanced demurely at the hung-up\nmistletoe. There were pears and apples clustered high in blooming\npyramids; there were bunches of grapes, made, in the shopkeepers'\nbenevolence, to dangle from conspicuous hooks that people's mouths might\nwater gratis as they passed; there were piles of filberts, mossy and\nbrown, recalling, in their fragrance, ancient walks among the woods, and\npleasant shufflings ankle deep through withered leaves; there were\nNorfolk Biffins, squab and swarthy, setting off the yellow of the\noranges and lemons, and, in the great compactness of their juicy\npersons, urgently entreating and beseeching to be carried home in paper\nbags and eaten after dinner. The very gold and silver fish, set forth\namong these choice fruits in a bowl, though members of a dull and\nstagnant-blooded race, appeared to know that there was something going\non; and, to a fish, went gasping round and round their little world in\nslow and passionless excitement.\n\nThe Grocers'! oh, the Grocers'! nearly closed, with perhaps two shutters\ndown, or one; but through those gaps such glimpses! It was not alone\nthat the scales descending on the counter made a merry sound, or that\nthe twine and roller parted company so briskly, or that the canisters\nwere rattled up and down like juggling tricks, or even that the blended\nscents of tea and coffee were so grateful to the nose, or even that the\nraisins were so plentiful and rare, the almonds so extremely white, the\nsticks of cinnamon so long and straight, the other spices so delicious,\nthe candied fruits so caked and spotted with molten sugar as to make the\ncoldest lookers-on feel faint, and subsequently bilious. Nor was it that\nthe figs were moist and pulpy, or that the French plums blushed in\nmodest tartness from their highly-decorated boxes, or that everything\nwas good to eat and in its Christmas dress; but the customers were all\nso hurried and so eager in the hopeful promise of the day, that they\ntumbled up against each other at the door, crashing their wicker baskets\nwildly, and left their purchases upon the counter, and came running\nback to fetch them, and committed hundreds of the like mistakes, in the\nbest humour possible; while the grocer and his people were so frank and\nfresh, that the polished hearts with which they fastened their aprons\nbehind might have been their own, worn outside for general inspection,\nand for Christmas daws to peck at if they chose.\n\nBut soon the steeples called good people all to church and chapel, and\naway they came, flocking through the streets in their best clothes and\nwith their gayest faces. And at the same time there emerged, from scores\nof by-streets, lanes, and nameless turnings, innumerable people,\ncarrying their dinners to the bakers' shops. The sight of these poor\nrevellers appeared to interest the Spirit very much, for he stood with\nScrooge beside him in a baker's doorway, and, taking off the covers as\ntheir bearers passed, sprinkled incense on their dinners from his torch.\nAnd it was a very uncommon kind of torch, for once or twice, when there\nwere angry words between some dinner-carriers who had jostled each\nother, he shed a few drops of water on them from it, and their\ngood-humour was restored directly. For they said, it was a shame to\nquarrel upon Christmas Day. And so it was! God love it, so it was!\n\nIn time the bells ceased, and the bakers were shut up; and yet there was\na genial shadowing forth of all these dinners, and the progress of their\ncooking, in the thawed blotch of wet above each baker's oven, where the\npavement smoked as if its stones were cooking too.\n\n'Is there a peculiar flavour in what you sprinkle from your torch?'\nasked Scrooge.\n\n'There is. My own.'\n\n'Would it apply to any kind of dinner on this day?' asked Scrooge.\n\n'To any kindly given. To a poor one most.'\n\n'Why to a poor one most?' asked Scrooge.\n\n'Because it needs it most.'\n\n'Spirit!' said Scrooge, after a moment's thought, 'I wonder you, of all\nthe beings in the many worlds about us, should desire to cramp these\npeople's opportunities of innocent enjoyment.\n\n'I!' cried the Spirit.\n\n'You would deprive them of their means of dining every seventh day,\noften the only day on which they can be said to dine at all,' said\nScrooge; 'wouldn't you?'\n\n'I!' cried the Spirit.\n\n'You seek to close these places on the Seventh Day,' said Scrooge. 'And\nit comes to the same thing.'\n\n'I seek!' exclaimed the Spirit.\n\n'Forgive me if I am wrong. It has been done in your name, or at least in\nthat of your family,' said Scrooge.\n\n'There are some upon this earth of yours,' returned the Spirit, 'who\nlay claim to know us, and who do their deeds of passion, pride,\nill-will, hatred, envy, bigotry, and selfishness in our name, who are as\nstrange to us, and all our kith and kin, as if they had never lived.\nRemember that, and charge their doings on themselves, not us.'\n\nScrooge promised that he would; and they went on, invisible, as they had\nbeen before, into the suburbs of the town. It was a remarkable quality\nof the Ghost (which Scrooge had observed at the baker's), that\nnotwithstanding his gigantic size, he could accommodate himself to any\nplace with ease; and that he stood beneath a low roof quite as\ngracefully and like a supernatural creature as it was possible he could\nhave done in any lofty hall.\n\nAnd perhaps it was the pleasure the good Spirit had in showing off this\npower of his, or else it was his own kind, generous, hearty nature, and\nhis sympathy with all poor men, that led him straight to Scrooge's\nclerk's; for there he went, and took Scrooge with him, holding to his\nrobe; and on the threshold of the door the Spirit smiled, and stopped to\nbless Bob Cratchit's dwelling with the sprinklings of his torch. Think\nof that! Bob had but fifteen 'Bob' a week himself; he pocketed on\nSaturdays but fifteen copies of his Christian name; and yet the Ghost of\nChristmas Present blessed his four-roomed house!\n\nThen up rose Mrs. Cratchit, Cratchit's wife, dressed out but poorly in a\ntwice-turned gown, but brave in ribbons, which are cheap, and make a\ngoodly show for sixpence; and she laid the cloth, assisted by Belinda\nCratchit, second of her daughters, also brave in ribbons; while Master\nPeter Cratchit plunged a fork into the saucepan of potatoes, and getting\nthe corners of his monstrous shirt-collar (Bob's private property,\nconferred upon his son and heir in honour of the day,) into his mouth,\nrejoiced to find himself so gallantly attired, and yearned to show his\nlinen in the fashionable Parks. And now two smaller Cratchits, boy and\ngirl, came tearing in, screaming that outside the baker's they had smelt\nthe goose, and known it for their own; and basking in luxurious thoughts\nof sage and onion, these young Cratchits danced about the table, and\nexalted Master Peter Cratchit to the skies, while he (not proud,\nalthough his collars nearly choked him) blew the fire, until the slow\npotatoes, bubbling up, knocked loudly at the saucepan-lid to be let out\nand peeled.\n\n'What has ever got your precious father, then?' said Mrs. Cratchit. 'And\nyour brother, Tiny Tim? And Martha warn't as late last Christmas Day by\nhalf an hour!'\n\n'Here's Martha, mother!' said a girl, appearing as she spoke.\n\n'Here's Martha, mother!' cried the two young Cratchits. 'Hurrah! There's\n_such_ a goose, Martha!'\n\n'Why, bless your heart alive, my dear, how late you are!' said Mrs.\nCratchit, kissing her a dozen times, and taking off her shawl and bonnet\nfor her with officious zeal.\n\n'We'd a deal of work to finish up last night,' replied the girl, 'and\nhad to clear away this morning, mother!'\n\n'Well! never mind so long as you are come,' said Mrs. Cratchit. 'Sit ye\ndown before the fire, my dear, and have a warm, Lord bless ye!'\n\n'No, no! There's father coming,' cried the two young Cratchits, who were\neverywhere at once. 'Hide, Martha, hide!'\n\nSo Martha hid herself, and in came little Bob, the father, with at least\nthree feet of comforter, exclusive of the fringe, hanging down before\nhim, and his threadbare clothes darned up and brushed to look\nseasonable, and Tiny Tim upon his shoulder. Alas for Tiny Tim, he bore a\nlittle crutch, and had his limbs supported by an iron frame!\n\n'Why, where's our Martha?' cried Bob Cratchit, looking round.\n\n'Not coming,' said Mrs. Cratchit.\n\n'Not coming!' said Bob, with a sudden declension in his high spirits;\nfor he had been Tim's blood-horse all the way from church, and had come\nhome rampant. 'Not coming upon Christmas Day!'\n\nMartha didn't like to see him disappointed, if it were only in joke; so\nshe came out prematurely from behind the closet door, and ran into his\narms, while the two young Cratchits hustled Tiny Tim, and bore him off\ninto the wash-house, that he might hear the pudding singing in the\ncopper.\n\n'And how did little Tim behave?' asked Mrs. Cratchit when she had\nrallied Bob on his credulity, and Bob had hugged his daughter to his\nheart's content.\n\n'As good as gold,' said Bob, 'and better. Somehow, he gets thoughtful,\nsitting by himself so much, and thinks the strangest things you ever\nheard. He told me, coming home, that he hoped the people saw him in the\nchurch, because he was a cripple, and it might be pleasant to them to\nremember upon Christmas Day who made lame beggars walk and blind men\nsee.'\n\nBob's voice was tremulous when he told them this, and trembled more when\nhe said that Tiny Tim was growing strong and hearty.\n\nHis active little crutch was heard upon the floor, and back came Tiny\nTim before another word was spoken, escorted by his brother and\nsister to his stool beside the fire; and while Bob, turning up his\ncuffs--as if, poor fellow, they were capable of being made more\nshabby--compounded some hot mixture in a jug with gin and lemons, and\nstirred it round and round, and put it on the hob to simmer, Master\nPeter and the two ubiquitous young Cratchits went to fetch the goose,\nwith which they soon returned in high procession.\n\n[Illustration]\n\nSuch a bustle ensued that you might have thought a goose the rarest of\nall birds; a feathered phenomenon, to which a black swan was a matter of\ncourse--and, in truth, it was something very like it in that house. Mrs.\nCratchit made the gravy (ready beforehand in a little saucepan) hissing\nhot; Master Peter mashed the potatoes with incredible vigour; Miss\nBelinda sweetened up the apple sauce; Martha dusted the hot plates; Bob\ntook Tiny Tim beside him in a tiny corner at the table; the two young\nCratchits set chairs for everybody, not forgetting themselves, and,\nmounting guard upon their posts, crammed spoons into their mouths, lest\nthey should shriek for goose before their turn came to be helped. At\nlast the dishes were set on, and grace was said. It was succeeded by a\nbreathless pause, as Mrs. Cratchit, looking slowly all along the\ncarving-knife, prepared to plunge it in the breast; but when she did,\nand when the long-expected gush of stuffing issued forth, one murmur of\ndelight arose all round the board, and even Tiny Tim, excited by the two\nyoung Cratchits, beat on the table with the handle of his knife and\nfeebly cried Hurrah!\n\n[Illustration: HE HAD BEEN TIM'S BLOOD-HORSE ALL THE WAY FROM CHURCH]\n\nThere never was such a goose. Bob said he didn't believe there ever was\nsuch a goose cooked. Its tenderness and flavour, size and cheapness,\nwere the themes of universal admiration. Eked out by apple sauce and\nmashed potatoes, it was a sufficient dinner for the whole family;\nindeed, as Mrs. Cratchit said with great delight (surveying one small\natom of a bone upon the dish), they hadn't ate it all at last! Yet every\none had had enough, and the youngest Cratchits, in particular, were\nsteeped in sage and onion to the eyebrows! But now, the plates being\nchanged by Miss Belinda, Mrs. Cratchit left the room alone--too nervous\nto bear witnesses--to take the pudding up, and bring it in.\n\nSuppose it should not be done enough! Suppose it should break in turning\nout! Suppose somebody should have got over the wall of the back-yard and\nstolen it, while they were merry with the goose--a supposition at which\nthe two young Cratchits became livid! All sorts of horrors were\nsupposed.\n\nHallo! A great deal of steam! The pudding was out of the copper. A smell\nlike a washing-day! That was the cloth. A smell like an eating-house and\na pastry-cook's next door to each other, with a laundress's next door to\nthat! That was the pudding! In half a minute Mrs. Cratchit\nentered--flushed, but smiling proudly--with the pudding, like a speckled\ncannon-ball, so hard and firm, blazing in half of half-a-quartern of\nignited brandy, and bedight with Christmas holly stuck into the top.\n\nOh, a wonderful pudding! Bob Cratchit said, and calmly too, that he\nregarded it as the greatest success achieved by Mrs. Cratchit since\ntheir marriage. Mrs. Cratchit said that, now the weight was off her\nmind, she would confess she had her doubts about the quantity of flour.\nEverybody had something to say about it, but nobody said or thought it\nwas at all a small pudding for a large family. It would have been flat\nheresy to do so. Any Cratchit would have blushed to hint at such a\nthing.\n\n[Illustration: WITH THE PUDDING]\n\nAt last the dinner was all done, the cloth was cleared, the hearth\nswept, and the fire made up. The compound in the jug being tasted and\nconsidered perfect, apples and oranges were put upon the table, and a\nshovel full of chestnuts on the fire. Then all the Cratchit family\ndrew round the hearth in what Bob Cratchit called a circle, meaning half\na one; and at Bob Cratchit's elbow stood the family display of glass.\nTwo tumblers and a custard cup without a handle.\n\nThese held the hot stuff from the jug, however, as well as golden\ngoblets would have done; and Bob served it out with beaming looks, while\nthe chestnuts on the fire sputtered and cracked noisily. Then Bob\nproposed:\n\n'A merry Christmas to us all, my dears. God bless us!'\n\nWhich all the family re-echoed.\n\n'God bless us every one!' said Tiny Tim, the last of all.\n\nHe sat very close to his father's side, upon his little stool. Bob held\nhis withered little hand to his, as if he loved the child, and wished to\nkeep him by his side, and dreaded that he might be taken from him.\n\n'Spirit,' said Scrooge, with an interest he had never felt before, 'tell\nme if Tiny Tim will live.'\n\n'I see a vacant seat,' replied the Ghost, 'in the poor chimney corner,\nand a crutch without an owner, carefully preserved. If these shadows\nremain unaltered by the Future, the child will die.'\n\n'No, no,' said Scrooge. 'Oh no, kind Spirit! say he will be spared.'\n\n'If these shadows remain unaltered by the Future none other of my race,'\nreturned the Ghost, 'will find him here. What then? If he be like to\ndie, he had better do it, and decrease the surplus population.'\n\nScrooge hung his head to hear his own words quoted by the Spirit, and\nwas overcome with penitence and grief.\n\n'Man,' said the Ghost, 'if man you be in heart, not adamant, forbear\nthat wicked cant until you have discovered what the surplus is, and\nwhere it is. Will you decide what men shall live, what men shall die? It\nmay be that, in the sight of Heaven, you are more worthless and less fit\nto live than millions like this poor man's child. O God! to hear the\ninsect on the leaf pronouncing on the too much life among his hungry\nbrothers in the dust!'\n\nScrooge bent before the Ghost's rebuke, and, trembling, cast his eyes\nupon the ground. But he raised them speedily on hearing his own name.\n\n'Mr. Scrooge!' said Bob. 'I'll give you Mr. Scrooge, the Founder of the\nFeast!'\n\n'The Founder of the Feast, indeed!' cried Mrs. Cratchit, reddening. 'I\nwish I had him here. I'd give him a piece of my mind to feast upon, and\nI hope he'd have a good appetite for it.'\n\n'My dear,' said Bob, 'the children! Christmas Day.'\n\n'It should be Christmas Day, I am sure,' said she, 'on which one drinks\nthe health of such an odious, stingy, hard, unfeeling man as Mr.\nScrooge. You know he is, Robert! Nobody knows it better than you do,\npoor fellow!'\n\n'My dear!' was Bob's mild answer. 'Christmas Day.'\n\n'I'll drink his health for your sake and the Day's,' said Mrs. Cratchit,\n'not for his. Long life to him! A merry Christmas and a happy New Year!\nHe'll be very merry and very happy, I have no doubt!'\n\nThe children drank the toast after her. It was the first of their\nproceedings which had no heartiness in it. Tiny Tim drank it last of\nall, but he didn't care twopence for it. Scrooge was the Ogre of the\nfamily. The mention of his name cast a dark shadow on the party, which\nwas not dispelled for full five minutes.\n\nAfter it had passed away they were ten times merrier than before, from\nthe mere relief of Scrooge the Baleful being done with. Bob Cratchit\ntold them how he had a situation in his eye for Master Peter, which\nwould bring in, if obtained, full five-and-sixpence weekly. The two\nyoung Cratchits laughed tremendously at the idea of Peter's being a man\nof business; and Peter himself looked thoughtfully at the fire from\nbetween his collars, as if he were deliberating what particular\ninvestments he should favour when he came into the receipt of that\nbewildering income. Martha, who was a poor apprentice at a milliner's,\nthen told them what kind of work she had to do, and how many hours she\nworked at a stretch and how she meant to lie abed to-morrow morning for\na good long rest; to-morrow being a holiday she passed at home. Also how\nshe had seen a countess and a lord some days before, and how the lord\n'was much about as tall as Peter'; at which Peter pulled up his collar\nso high that you couldn't have seen his head if you had been there. All\nthis time the chestnuts and the jug went round and round; and by-and-by\nthey had a song, about a lost child travelling in the snow, from Tiny\nTim, who had a plaintive little voice, and sang it very well indeed.\n\nThere was nothing of high mark in this. They were not a handsome family;\nthey were not well dressed; their shoes were far from being waterproof;\ntheir clothes were scanty; and Peter might have known, and very likely\ndid, the inside of a pawnbroker's. But they were happy, grateful,\npleased with one another, and contented with the time; and when they\nfaded, and looked happier yet in the bright sprinklings of the Spirit's\ntorch at parting, Scrooge had his eye upon them, and especially on Tiny\nTim, until the last.\n\nBy this time it was getting dark, and snowing pretty heavily; and as\nScrooge and the Spirit went along the streets, the brightness of the\nroaring fires in kitchens, parlours, and all sorts of rooms was\nwonderful. Here, the flickering of the blaze showed preparations for a\ncosy dinner, with hot plates baking through and through before the fire,\nand deep red curtains, ready to be drawn to shut out cold and darkness.\nThere, all the children of the house were running out into the snow to\nmeet their married sisters, brothers, cousins, uncles, aunts, and be the\nfirst to greet them. Here, again, were shadows on the window-blinds of\nguests assembling; and there a group of handsome girls, all hooded and\nfur-booted, and all chattering at once, tripped lightly off to some near\nneighbour's house; where, woe upon the single man who saw them\nenter--artful witches, well they knew it--in a glow!\n\nBut, if you had judged from the numbers of people on their way to\nfriendly gatherings, you might have thought that no one was at home to\ngive them welcome when they got there, instead of every house expecting\ncompany, and piling up its fires half-chimney high. Blessings on it, how\nthe Ghost exulted! How it bared its breadth of breast, and opened its\ncapacious palm, and floated on, outpouring with a generous hand its\nbright and harmless mirth on everything within its reach! The very\nlamplighter, who ran on before, dotting the dusky street with specks of\nlight, and who was dressed to spend the evening somewhere, laughed out\nloudly as the Spirit passed, though little kenned the lamplighter that\nhe had any company but Christmas.\n\nAnd now, without a word of warning from the Ghost, they stood upon a\nbleak and desert moor, where monstrous masses of rude stone were cast\nabout, as though it were the burial-place of giants; and water spread\nitself wheresoever it listed; or would have done so, but for the frost\nthat held it prisoner; and nothing grew but moss and furze, and coarse,\nrank grass. Down in the west the setting sun had left a streak of fiery\nred, which glared upon the desolation for an instant, like a sullen eye,\nand frowning lower, lower, lower yet, was lost in the thick gloom of\ndarkest night.\n\n'What place is this?' asked Scrooge.\n\n'A place where miners live, who labour in the bowels of the earth,'\nreturned the Spirit. 'But they know me. See!'\n\nA light shone from the window of a hut, and swiftly they advanced\ntowards it. Passing through the wall of mud and stone, they found a\ncheerful company assembled round a glowing fire. An old, old man and\nwoman, with their children and their children's children, and another\ngeneration beyond that, all decked out gaily in their holiday attire.\nThe old man, in a voice that seldom rose above the howling of the wind\nupon the barren waste, was singing them a Christmas song; it had been a\nvery old song when he was a boy; and from time to time they all joined\nin the chorus. So surely as they raised their voices, the old man got\nquite blithe and loud; and so surely as they stopped, his vigour sank\nagain.\n\nThe Spirit did not tarry here, but bade Scrooge hold his robe, and,\npassing on above the moor, sped whither? Not to sea? To sea. To\nScrooge's horror, looking back, he saw the last of the land, a frightful\nrange of rocks, behind them; and his ears were deafened by the\nthundering of water, as it rolled and roared, and raged among the\ndreadful caverns it had worn, and fiercely tried to undermine the earth.\n\nBuilt upon a dismal reef of sunken rocks, some league or so from shore,\non which the waters chafed and dashed, the wild year through, there\nstood a solitary lighthouse. Great heaps of seaweed clung to its base,\nand storm-birds--born of the wind, one might suppose, as seaweed of the\nwater--rose and fell about it, like the waves they skimmed.\n\nBut, even here, two men who watched the light had made a fire, that\nthrough the loophole in the thick stone wall shed out a ray of\nbrightness on the awful sea. Joining their horny hands over the rough\ntable at which they sat, they wished each other Merry Christmas in their\ncan of grog; and one of them--the elder too, with his face all damaged\nand scarred with hard weather, as the figure-head of an old ship might\nbe--struck up a sturdy song that was like a gale in itself.\n\nAgain the Ghost sped on, above the black and heaving sea--on, on--until\nbeing far away, as he told Scrooge, from any shore, they lighted on a\nship. They stood beside the helmsman at the wheel, the look-out in the\nbow, the officers who had the watch; dark, ghostly figures in their\nseveral stations; but every man among them hummed a Christmas tune, or\nhad a Christmas thought, or spoke below his breath to his companion of\nsome bygone Christmas Day, with homeward hopes belonging to it. And\nevery man on board, waking or sleeping, good or bad, had had a kinder\nword for one another on that day than on any day in the year; and had\nshared to some extent in its festivities; and had remembered those he\ncared for at a distance, and had known that they delighted to remember\nhim.\n\nIt was a great surprise to Scrooge, while listening to the moaning of\nthe wind, and thinking what a solemn thing it was to move on through the\nlonely darkness over an unknown abyss, whose depths were secrets as\nprofound as death: it was a great surprise to Scrooge, while thus\nengaged, to hear a hearty laugh. It was a much greater surprise to\nScrooge to recognise it as his own nephew's and to find himself in a\nbright, dry, gleaming room, with the Spirit standing smiling by his\nside, and looking at that same nephew with approving affability!\n\n'Ha, ha!' laughed Scrooge's nephew. 'Ha, ha, ha!'\n\nIf you should happen, by any unlikely chance, to know a man more blessed\nin a laugh than Scrooge's nephew, all I can say is, I should like to\nknow him too. Introduce him to me, and I'll cultivate his acquaintance.\n\nIt is a fair, even-handed, noble adjustment of things, that while there\nis infection in disease and sorrow, there is nothing in the world so\nirresistibly contagious as laughter and good-humour. When Scrooge's\nnephew laughed in this way--holding his sides, rolling his head, and\ntwisting his face into the most extravagant contortions--Scrooge's\nniece, by marriage, laughed as heartily as he. And their assembled\nfriends, being not a bit behindhand, roared out lustily.\n\n'Ha, ha! Ha, ha, ha, ha!'\n\n'He said that Christmas was a humbug, as I live!' cried Scrooge's\nnephew. 'He believed it, too!'\n\n'More shame for him, Fred!' said Scrooge's niece indignantly. Bless\nthose women! they never do anything by halves. They are always in\nearnest.\n\nShe was very pretty; exceedingly pretty. With a dimpled,\nsurprised-looking, capital face; a ripe little mouth, that seemed made\nto be kissed--as no doubt it was; all kinds of good little dots about\nher chin, that melted into one another when she laughed; and the\nsunniest pair of eyes you ever saw in any little creature's head.\nAltogether she was what you would have called provoking, you know; but\nsatisfactory, too. Oh, perfectly satisfactory!\n\n'He's a comical old fellow,' said Scrooge's nephew, 'that's the truth;\nand not so pleasant as he might be. However, his offences carry their\nown punishment, and I have nothing to say against him.'\n\n'I'm sure he is very rich, Fred,' hinted Scrooge's niece. 'At least, you\nalways tell _me_ so.'\n\n'What of that, my dear?' said Scrooge's nephew. 'His wealth is of no use\nto him. He don't do any good with it. He don't make himself comfortable\nwith it. He hasn't the satisfaction of thinking--ha, ha, ha!--that he is\never going to benefit Us with it.'\n\n'I have no patience with him,' observed Scrooge's niece. Scrooge's\nniece's sisters, and all the other ladies, expressed the same opinion.\n\n'Oh, I have!' said Scrooge's nephew. 'I am sorry for him; I couldn't be\nangry with him if I tried. Who suffers by his ill whims? Himself always.\nHere he takes it into his head to dislike us, and he won't come and dine\nwith us. What's the consequence? He don't lose much of a dinner.'\n\n'Indeed, I think he loses a very good dinner,' interrupted Scrooge's\nniece. Everybody else said the same, and they must be allowed to have\nbeen competent judges, because they had just had dinner; and with the\ndessert upon the table, were clustered round the fire, by lamplight.\n\n'Well! I am very glad to hear it,' said Scrooge's nephew, 'because I\nhaven't any great faith in these young housekeepers. What do _you_ say,\nTopper?'\n\nTopper had clearly got his eye upon one of Scrooge's niece's sisters,\nfor he answered that a bachelor was a wretched outcast, who had no right\nto express an opinion on the subject. Whereat Scrooge's niece's\nsister--the plump one with the lace tucker: not the one with the\nroses--blushed.\n\n'Do go on, Fred,' said Scrooge's niece, clapping her hands. 'He never\nfinishes what he begins to say! He is such a ridiculous fellow!'\n\nScrooge's nephew revelled in another laugh, and as it was impossible to\nkeep the infection off, though the plump sister tried hard to do it with\naromatic vinegar, his example was unanimously followed.\n\n'I was only going to say,' said Scrooge's nephew, 'that the consequence\nof his taking a dislike to us, and not making merry with us, is, as I\nthink, that he loses some pleasant moments, which could do him no harm.\nI am sure he loses pleasanter companions than he can find in his own\nthoughts, either in his mouldy old office or his dusty chambers. I mean\nto give him the same chance every year, whether he likes it or not, for\nI pity him. He may rail at Christmas till he dies, but he can't help\nthinking better of it--I defy him--if he finds me going there, in good\ntemper, year after year, and saying, \"Uncle Scrooge, how are you?\" If it\nonly put him in the vein to leave his poor clerk fifty pounds, _that's_\nsomething; and I think I shook him yesterday.'\n\nIt was their turn to laugh now, at the notion of his shaking Scrooge.\nBut being thoroughly good-natured, and not much caring what they laughed\nat, so that they laughed at any rate, he encouraged them in their\nmerriment, and passed the bottle, joyously.\n\nAfter tea they had some music. For they were a musical family, and knew\nwhat they were about when they sung a Glee or Catch, I can assure you:\nespecially Topper, who could growl away in the bass like a good one, and\nnever swell the large veins in his forehead, or get red in the face over\nit. Scrooge's niece played well upon the harp; and played, among other\ntunes, a simple little air (a mere nothing: you might learn to whistle\nit in two minutes) which had been familiar to the child who fetched\nScrooge from the boarding-school, as he had been reminded by the Ghost\nof Christmas Past. When this strain of music sounded, all the things\nthat Ghost had shown him came upon his mind; he softened more and more;\nand thought that if he could have listened to it often, years ago, he\nmight have cultivated the kindnesses of life for his own happiness with\nhis own hands, without resorting to the sexton's spade that buried Jacob\nMarley.\n\n[Illustration: _The way he went after that plump sister in the lace\ntucker!_]\n\nBut they didn't devote the whole evening to music. After a while they\nplayed at forfeits; for it is good to be children sometimes, and never\nbetter than at Christmas, when its mighty Founder was a child himself.\nStop! There was first a game at blind man's-buff. Of course there was.\nAnd I no more believe Topper was really blind than I believe he had eyes\nin his boots. My opinion is, that it was a done thing between him and\nScrooge's nephew; and that the Ghost of Christmas Present knew it. The\nway he went after that plump sister in the lace tucker was an outrage on\nthe credulity of human nature. Knocking down the fire-irons, tumbling\nover the chairs, bumping up against the piano, smothering himself\namongst the curtains, wherever she went, there went he! He always knew\nwhere the plump sister was. He wouldn't catch anybody else. If you had\nfallen up against him (as some of them did) on purpose, he would have\nmade a feint of endeavouring to seize you, which would have been an\naffront to your understanding, and would instantly have sidled off in\nthe direction of the plump sister. She often cried out that it wasn't\nfair; and it really was not. But when, at last, he caught her; when, in\nspite of all her silken rustlings, and her rapid flutterings past him,\nhe got her into a corner whence there was no escape; then his conduct\nwas the most execrable. For his pretending not to know her; his\npretending that it was necessary to touch her head-dress, and further to\nassure himself of her identity by pressing a certain ring upon her\nfinger, and a certain chain about her neck; was vile, monstrous! No\ndoubt she told him her opinion of it when, another blind man being in\noffice, they were so very confidential together behind the curtains.\n\nScrooge's niece was not one of the blind man's-buff party, but was made\ncomfortable with a large chair and a footstool, in a snug corner where\nthe Ghost and Scrooge were close behind her. But she joined in the\nforfeits, and loved her love to admiration with all the letters of the\nalphabet. Likewise at the game of How, When, and Where, she was very\ngreat, and, to the secret joy of Scrooge's nephew, beat her sisters\nhollow; though they were sharp girls too, as Topper could have told you.\nThere might have been twenty people there, young and old, but they all\nplayed, and so did Scrooge; for wholly forgetting, in the interest he\nhad in what was going on, that his voice made no sound in their ears, he\nsometimes came out with his guess quite loud, and very often guessed\nright, too; for the sharpest needle, best Whitechapel, warranted not to\ncut in the eye, was not sharper than Scrooge, blunt as he took it in\nhis head to be.\n\nThe Ghost was greatly pleased to find him in this mood, and looked upon\nhim with such favour that he begged like a boy to be allowed to stay\nuntil the guests departed. But this the Spirit said could not be done.\n\n'Here is a new game,' said Scrooge. 'One half-hour, Spirit, only one!'\n\nIt was a game called Yes and No, where Scrooge's nephew had to think of\nsomething, and the rest must find out what, he only answering to their\nquestions yes or no, as the case was. The brisk fire of questioning to\nwhich he was exposed elicited from him that he was thinking of an\nanimal, a live animal, rather a disagreeable animal, a savage animal, an\nanimal that growled and grunted sometimes, and talked sometimes and\nlived in London, and walked about the streets, and wasn't made a show\nof, and wasn't led by anybody, and didn't live in a menagerie, and was\nnever killed in a market, and was not a horse, or an ass, or a cow, or a\nbull, or a tiger, or a dog, or a pig, or a cat, or a bear. At every\nfresh question that was put to him, this nephew burst into a fresh roar\nof laughter; and was so inexpressibly tickled, that he was obliged to\nget up off the sofa and stamp. At last the plump sister, falling into a\nsimilar state, cried out:\n\n'I have found it out! I know what it is, Fred! I know what it is!'\n\n'What is it?' cried Fred.\n\n'It's your uncle Scro-o-o-o-oge.'\n\nWhich it certainly was. Admiration was the universal sentiment, though\nsome objected that the reply to 'Is it a bear?' ought to have been\n'Yes'; inasmuch as an answer in the negative was sufficient to have\ndiverted their thoughts from Mr. Scrooge, supposing they had ever had\nany tendency that way.\n\n'He has given us plenty of merriment, I am sure,' said Fred, 'and it\nwould be ungrateful not to drink his health. Here is a glass of mulled\nwine ready to our hand at the moment; and I say, \"Uncle Scrooge!\"'\n\n'Well! Uncle Scrooge!' they cried.\n\n'A merry Christmas and a happy New Year to the old man, whatever he is!'\nsaid Scrooge's nephew. 'He wouldn't take it from me, but may he have it,\nnevertheless. Uncle Scrooge!'\n\nUncle Scrooge had imperceptibly become so gay and light of heart, that\nhe would have pledged the unconscious company in return, and thanked\nthem in an inaudible speech, if the Ghost had given him time. But the\nwhole scene passed off in the breath of the last word spoken by his\nnephew; and he and the Spirit were again upon their travels.\n\nMuch they saw, and far they went, and many homes they visited, but\nalways with a happy end. The Spirit stood beside sick-beds, and they\nwere cheerful; on foreign lands, and they were close at home; by\nstruggling men, and they were patient in their greater hope; by poverty,\nand it was rich. In almshouse, hospital, and gaol, in misery's every\nrefuge, where vain man in his little brief authority had not made fast\nthe door, and barred the Spirit out, he left his blessing and taught\nScrooge his precepts.\n\nIt was a long night, if it were only a night; but Scrooge had his doubts\nof this, because the Christmas holidays appeared to be condensed into\nthe space of time they passed together. It was strange, too, that, while\nScrooge remained unaltered in his outward form, the Ghost grew older,\nclearly older. Scrooge had observed this change, but never spoke of it\nuntil they left a children's Twelfth-Night party, when, looking at the\nSpirit as they stood together in an open place, he noticed that its hair\nwas grey.\n\n'Are spirits' lives so short?' asked Scrooge.\n\n'My life upon this globe is very brief,' replied the Ghost. 'It ends\nto-night.'\n\n'To-night!' cried Scrooge.\n\n'To-night at midnight. Hark! The time is drawing near.'\n\nThe chimes were ringing the three-quarters past eleven at that moment.\n\n'Forgive me if I am not justified in what I ask,' said Scrooge, looking\nintently at the Spirit's robe, 'but I see something strange, and not\nbelonging to yourself, protruding from your skirts. Is it a foot or a\nclaw?'\n\n'It might be a claw, for the flesh there is upon it,' was the Spirit's\nsorrowful reply. 'Look here!'\n\nFrom the foldings of its robe it brought two children, wretched, abject,\nfrightful, hideous, miserable. They knelt down at its feet, and clung\nupon the outside of its garment.\n\n'O Man! look here! Look, look down here!' exclaimed the Ghost.\n\nThey were a boy and girl. Yellow, meagre, ragged, scowling, wolfish, but\nprostrate, too, in their humility. Where graceful youth should have\nfilled their features out, and touched them with its freshest tints, a\nstale and shrivelled hand, like that of age, had pinched and twisted\nthem, and pulled them into shreds. Where angels might have sat\nenthroned, devils lurked, and glared out menacing. No change, no\ndegradation, no perversion of humanity in any grade, through all the\nmysteries of wonderful creation, has monsters half so horrible and\ndread.\n\nScrooge started back, appalled. Having them shown to him in this way, he\ntried to say they were fine children, but the words choked themselves,\nrather than be parties to a lie of such enormous magnitude.\n\n'Spirit! are they yours?' Scrooge could say no more.\n\n'They are Man's,' said the Spirit, looking down upon them. 'And they\ncling to me, appealing from their fathers. This boy is Ignorance. This\ngirl is Want. Beware of them both, and all of their degree, but most of\nall beware this boy, for on his brow I see that written which is Doom,\nunless the writing be erased. Deny it!' cried the Spirit, stretching out\nhis hand towards the city. 'Slander those who tell it ye! Admit it for\nyour factious purposes, and make it worse! And bide the end!'\n\n'Have they no refuge or resource?' cried Scrooge.\n\n'Are there no prisons?' said the Spirit, turning on him for the last\ntime with his own words. 'Are there no workhouses?'\n\nThe bell struck Twelve.\n\nScrooge looked about him for the Ghost, and saw it not. As the last\nstroke ceased to vibrate, he remembered the prediction of old Jacob\nMarley, and, lifting up his eyes, beheld a solemn Phantom, draped and\nhooded, coming like a mist along the ground towards him.\n\n\nSTAVE FOUR\n\n\n\n\nTHE LAST OF THE SPIRITS\n\n\nThe Phantom slowly, gravely, silently approached. When it came near him,\nScrooge bent down upon his knee; for in the very air through which this\nSpirit moved it seemed to scatter gloom and mystery.\n\nIt was shrouded in a deep black garment, which concealed its head, its\nface, its form, and left nothing of it visible, save one outstretched\nhand. But for this, it would have been difficult to detach its figure\nfrom the night, and separate it from the darkness by which it was\nsurrounded.\n\nHe felt that it was tall and stately when it came beside him, and that\nits mysterious presence filled him with a solemn dread. He knew no more,\nfor the Spirit neither spoke nor moved.\n\n'I am in the presence of the Ghost of Christmas Yet to Come?' said\nScrooge.\n\nThe Spirit answered not, but pointed onward with its hand.\n\n'You are about to show me shadows of the things that have not happened,\nbut will happen in the time before us,' Scrooge pursued. 'Is that so,\nSpirit?'\n\nThe upper portion of the garment was contracted for an instant in its\nfolds, as if the Spirit had inclined its head. That was the only answer\nhe received.\n\nAlthough well used to ghostly company by this time, Scrooge feared the\nsilent shape so much that his legs trembled beneath him, and he found\nthat he could hardly stand when he prepared to follow it. The Spirit\npaused a moment, as observing his condition, and giving him time to\nrecover.\n\nBut Scrooge was all the worse for this. It thrilled him with a vague,\nuncertain horror to know that, behind the dusky shroud, there were\nghostly eyes intently fixed upon him, while he, though he stretched his\nown to the utmost, could see nothing but a spectral hand and one great\nheap of black.\n\n'Ghost of the Future!' he exclaimed, 'I fear you more than any spectre\nI have seen. But as I know your purpose is to do me good, and as I hope\nto live to be another man from what I was, I am prepared to bear your\ncompany, and do it with a thankful heart. Will you not speak to me?'\n\nIt gave him no reply. The hand was pointed straight before them.\n\n'Lead on!' said Scrooge. 'Lead on! The night is waning fast, and it is\nprecious time to me, I know. Lead on, Spirit!'\n\nThe Phantom moved away as it had come towards him. Scrooge followed in\nthe shadow of its dress, which bore him up, he thought, and carried him\nalong.\n\nThey scarcely seemed to enter the City; for the City rather seemed to\nspring up about them, and encompass them of its own act. But there they\nwere in the heart of it; on 'Change, amongst the merchants, who hurried\nup and down, and chinked the money in their pockets, and conversed in\ngroups, and looked at their watches, and trifled thoughtfully with their\ngreat gold seals, and so forth, as Scrooge had seen them often.\n\nThe Spirit stopped beside one little knot of business men. Observing\nthat the hand was pointed to them, Scrooge advanced to listen to their\ntalk.\n\n'No,' said a great fat man with a monstrous chin, 'I don't know much\nabout it either way. I only know he's dead.'\n\n'When did he die?' inquired another.\n\n'Last night, I believe.'\n\n'Why, what was the matter with him?' asked a third, taking a vast\nquantity of snuff out of a very large snuff-box. 'I thought he'd never\ndie.'\n\n'God knows,' said the first, with a yawn.\n\n'What has he done with his money?' asked a red-faced gentleman with a\npendulous excrescence on the end of his nose, that shook like the gills\nof a turkey-cock.\n\n'I haven't heard,' said the man with the large chin, yawning again.\n'Left it to his company, perhaps. He hasn't left it to _me_. That's all\nI know.'\n\nThis pleasantry was received with a general laugh.\n\n'It's likely to be a very cheap funeral,' said the same speaker; 'for,\nupon my life, I don't know of anybody to go to it. Suppose we make up a\nparty, and volunteer?'\n\n'I don't mind going if a lunch is provided,' observed the gentleman with\nthe excrescence on his nose. 'But I must be fed if I make one.'\n\nAnother laugh.\n\n[Illustration:\n\n _\"How are you?\" said one.\n \"How are you?\" returned the other.\n \"Well!\" said the first. \"Old Scratch has got his own at last, hey?\"_\n\n]\n\n'Well, I am the most disinterested among you, after all,' said the first\nspeaker, 'for I never wear black gloves, and I never eat lunch. But I'll\noffer to go if anybody else will. When I come to think of it, I'm not\nat all sure that I wasn't his most particular friend; for we used to\nstop and speak whenever we met. Bye, bye!'\n\nSpeakers and listeners strolled away, and mixed with other groups.\nScrooge knew the men, and looked towards the Spirit for an explanation.\n\nThe phantom glided on into a street. Its finger pointed to two persons\nmeeting. Scrooge listened again, thinking that the explanation might lie\nhere.\n\nHe knew these men, also, perfectly. They were men of business: very\nwealthy, and of great importance. He had made a point always of standing\nwell in their esteem in a business point of view, that is; strictly in a\nbusiness point of view.\n\n'How are you?' said one.\n\n'How are you?' returned the other.\n\n'Well!' said the first, 'old Scratch has got his own at last, hey?'\n\n'So I am told,' returned the second. 'Cold, isn't it?'\n\n'Seasonable for Christmas-time. You are not a skater, I suppose?'\n\n'No, no. Something else to think of. Good-morning!'\n\nNot another word. That was their meeting, their conversation, and their\nparting.\n\nScrooge was at first inclined to be surprised that the Spirit should\nattach importance to conversations apparently so trivial; but feeling\nassured that they must have some hidden purpose, he set himself to\nconsider what it was likely to be. They could scarcely be supposed to\nhave any bearing on the death of Jacob, his old partner, for that was\nPast, and this Ghost's province was the Future. Nor could he think of\nany one immediately connected with himself to whom he could apply them.\nBut nothing doubting that, to whomsoever they applied, they had some\nlatent moral for his own improvement, he resolved to treasure up every\nword he heard, and everything he saw; and especially to observe the\nshadow of himself when it appeared. For he had an expectation that the\nconduct of his future self would give him the clue he missed, and would\nrender the solution of these riddles easy.\n\nHe looked about in that very place for his own image, but another man\nstood in his accustomed corner; and though the clock pointed to his\nusual time of day for being there, he saw no likeness of himself among\nthe multitudes that poured in through the Porch. It gave him little\nsurprise, however; for he had been revolving in his mind a change of\nlife, and thought and hoped he saw his new-born resolutions carried out\nin this.\n\nQuiet and dark, beside him stood the Phantom, with its outstretched\nhand. When he roused himself from his thoughtful quest, he fancied,\nfrom the turn of the hand, and its situation in reference to himself,\nthat the Unseen Eyes were looking at him keenly. It made him shudder,\nand feel very cold.\n\nThey left the busy scene, and went into an obscure part of the town,\nwhere Scrooge had never penetrated before, although he recognised its\nsituation and its bad repute. The ways were foul and narrow; the shop\nand houses wretched; the people half naked, drunken, slipshod, ugly.\nAlleys and archways, like so many cesspools, disgorged their offences of\nsmell and dirt, and life upon the straggling streets; and the whole\nquarter reeked with crime, with filth, and misery.\n\nFar in this den of infamous resort, there was a low-browed, beetling\nshop, below a penthouse roof, where iron, old rags, bottles, bones, and\ngreasy offal were bought. Upon the floor within were piled up heaps of\nrusty keys, nails, chains, hinges, files, scales, weights, and refuse\niron of all kinds. Secrets that few would like to scrutinise were bred\nand hidden in mountains of unseemly rags, masses of corrupted fat, and\nsepulchres of bones. Sitting in among the wares he dealt in, by a\ncharcoal stove made of old bricks, was a grey-haired rascal, nearly\nseventy years of age, who had screened himself from the cold air without\nby a frouzy curtaining of miscellaneous tatters hung upon a line and\nsmoked his pipe in all the luxury of calm retirement.\n\nScrooge and the Phantom came into the presence of this man, just as a\nwoman with a heavy bundle slunk into the shop. But she had scarcely\nentered, when another woman, similarly laden, came in too; and she was\nclosely followed by a man in faded black, who was no less startled by\nthe sight of them than they had been upon the recognition of each other.\nAfter a short period of blank astonishment, in which the old man with\nthe pipe had joined them, they all three burst into a laugh.\n\n'Let the charwoman alone to be the first!' cried she who had entered\nfirst. 'Let the laundress alone to be the second; and let the\nundertaker's man alone to be the third. Look here, old Joe, here's a\nchance! If we haven't all three met here without meaning it!'\n\n'You couldn't have met in a better place,' said old Joe, removing his\npipe from his mouth. 'Come into the parlour. You were made free of it\nlong ago, you know; and the other two an't strangers. Stop till I shut\nthe door of the shop. Ah! how it skreeks! There an't such a rusty bit of\nmetal in the place as its own hinges, I believe; and I'm sure there's no\nsuch old bones here as mine. Ha! ha! We're all suitable to our calling,\nwe're well matched. Come into the parlour. Come into the parlour.'\n\nThe parlour was the space behind the screen of rags. The old man raked\nthe fire together with an old stair-rod, and having trimmed his smoky\nlamp (for it was night) with the stem of his pipe, put it into his mouth\nagain.\n\nWhile he did this, the woman who had already spoken threw her bundle on\nthe floor, and sat down in a flaunting manner on a stool, crossing her\nelbows on her knees, and looking with a bold defiance at the other two.\n\n'What odds, then? What odds, Mrs. Dilber?' said the woman. 'Every person\nhas a right to take care of themselves. _He_ always did!'\n\n'That's true, indeed!' said the laundress. 'No man more so.'\n\n'Why, then, don't stand staring as if you was afraid, woman! Who's the\nwiser? We're not going to pick holes in each other's coats, I suppose?'\n\n'No, indeed!' said Mrs. Dilber and the man together. 'We should hope\nnot.'\n\n'Very well then!' cried the woman. 'That's enough. Who's the worse for\nthe loss of a few things like these? Not a dead man, I suppose?'\n\n'No, indeed,' said Mrs. Dilber, laughing.\n\n'If he wanted to keep 'em after he was dead, a wicked old screw,'\npursued the woman, 'why wasn't he natural in his lifetime? If he had\nbeen, he'd have had somebody to look after him when he was struck with\nDeath, instead of lying gasping out his last there, alone by himself.'\n\n'It's the truest word that ever was spoke,' said Mrs. Dilber. 'It's a\njudgment on him.'\n\n'I wish it was a little heavier judgment,' replied the woman: 'and it\nshould have been, you may depend upon it, if I could have laid my hands\non anything else. Open that bundle, old Joe, and let me know the value\nof it. Speak out plain. I'm not afraid to be the first, nor afraid for\nthem to see it. We knew pretty well that we were helping ourselves\nbefore we met here, I believe. It's no sin. Open the bundle, Joe.'\n\nBut the gallantry of her friends would not allow of this; and the man in\nfaded black, mounting the breach first, produced _his_ plunder. It was\nnot extensive. A seal or two, a pencil-case, a pair of sleeve-buttons,\nand a brooch of no great value, were all. They were severally examined\nand appraised by old Joe, who chalked the sums he was disposed to give\nfor each upon the wall, and added them up into a total when he found\nthat there was nothing more to come.\n\n'That's your account,' said Joe, 'and I wouldn't give another sixpence,\nif I was to be boiled for not doing it. Who's next?'\n\n\n[Illustration: _\"What do you call this?\" said Joe. \"Bed-curtains.\"_]\n\nMrs. Dilber was next. Sheets and towels, a little wearing apparel, two\nold fashioned silver teaspoons, a pair of sugar-tongs, and a few\nboots. Her account was stated on the wall in the same manner.\n\n'I always give too much to ladies. It's a weakness of mine, and that's\nthe way I ruin myself,' said old Joe. 'That's your account. If you asked\nme for another penny, and made it an open question, I'd repent of being\nso liberal, and knock off half-a-crown.'\n\n'And now undo _my_ bundle, Joe,' said the first woman.\n\nJoe went down on his knees for the greater convenience of opening it,\nand, having unfastened a great many knots, dragged out a large heavy\nroll of some dark stuff.\n\n'What do you call this?' said Joe. 'Bed-curtains?'\n\n'Ah!' returned the woman, laughing and leaning forward on her crossed\narms. 'Bed-curtains!'\n\n'You don't mean to say you took 'em down, rings and all, with him lying\nthere?' said Joe.\n\n'Yes, I do,' replied the woman. 'Why not?'\n\n'You were born to make your fortune,' said Joe, 'and you'll certainly do\nit.'\n\n'I certainly shan't hold my hand, when I can get anything in it by\nreaching it out, for the sake of such a man as he was, I promise you,\nJoe,' returned the woman coolly. 'Don't drop that oil upon the blankets,\nnow.'\n\n'His blankets?' asked Joe.\n\n'Whose else's do you think?' replied the woman. 'He isn't likely to take\ncold without 'em, I dare say.'\n\n'I hope he didn't die of anything catching? Eh?' said old Joe, stopping\nin his work, and looking up.\n\n'Don't you be afraid of that,' returned the woman. 'I an't so fond of\nhis company that I'd loiter about him for such things, if he did. Ah!\nyou may look through that shirt till your eyes ache, but you won't find\na hole in it, nor a threadbare place. It's the best he had, and a fine\none too. They'd have wasted it, if it hadn't been for me.'\n\n'What do you call wasting of it?' asked old Joe.\n\n'Putting it on him to be buried in, to be sure,' replied the woman, with\na laugh. 'Somebody was fool enough to do it, but I took it off again. If\ncalico an't good enough for such a purpose, it isn't good enough for\nanything. It's quite as becoming to the body. He can't look uglier than\nhe did in that one.'\n\nScrooge listened to this dialogue in horror. As they sat grouped about\ntheir spoil, in the scanty light afforded by the old man's lamp, he\nviewed them with a detestation and disgust which could hardly have been\ngreater, though they had been obscene demons marketing the corpse\nitself.\n\n'Ha, ha!' laughed the same woman when old Joe producing a flannel bag\nwith money in it, told out their several gains upon the ground. 'This\nis the end of it, you see! He frightened every one away from him when he\nwas alive, to profit us when he was dead! Ha, ha, ha!'\n\n'Spirit!' said Scrooge, shuddering from head to foot. 'I see, I see. The\ncase of this unhappy man might be my own. My life tends that way now.\nMerciful heaven, what is this?'\n\nHe recoiled in terror, for the scene had changed, and now he almost\ntouched a bed--a bare, uncurtained bed--on which, beneath a ragged\nsheet, there lay a something covered up, which, though it was dumb,\nannounced itself in awful language.\n\nThe room was very dark, too dark to be observed with any accuracy,\nthough Scrooge glanced round it in obedience to a secret impulse,\nanxious to know what kind of room it was. A pale light, rising in the\nouter air, fell straight upon the bed; and on it, plundered and bereft,\nunwatched, unwept, uncared for, was the body of this man.\n\nScrooge glanced towards the Phantom. Its steady hand was pointed to the\nhead. The cover was so carelessly adjusted that the slightest raising of\nit, the motion of a finger upon Scrooge's part, would have disclosed the\nface. He thought of it, felt how easy it would be to do, and longed to\ndo it; but he had no more power to withdraw the veil than to dismiss the\nspectre at his side.\n\nOh, cold, cold, rigid, dreadful Death, set up thine altar here, and\ndress it with such terrors as thou hast at thy command; for this is thy\ndominion! But of the loved, revered, and honoured head thou canst not\nturn one hair to thy dread purposes, or make one feature odious. It is\nnot that the hand is heavy, and will fall down when released; it is not\nthat the heart and pulse are still; but that the hand was open,\ngenerous, and true; the heart brave, warm, and tender, and the pulse a\nman's. Strike, Shadow, strike! And see his good deeds springing from the\nwound, to sow the world with life immortal!\n\nNo voice pronounced these words in Scrooge's ears, and yet he heard them\nwhen he looked upon the bed. He thought, if this man could be raised up\nnow, what would be his foremost thoughts? Avarice, hard dealing, griping\ncares? They have brought him to a rich end, truly!\n\nHe lay in the dark, empty house, with not a man, a woman, or a child to\nsay he was kind to me in this or that, and for the memory of one kind\nword I will be kind to him. A cat was tearing at the door, and there was\na sound of gnawing rats beneath the hearthstone. What _they_ wanted in\nthe room of death, and why they were so restless and disturbed, Scrooge\ndid not dare to think.\n\n'Spirit!' he said, 'this is a fearful place. In leaving it, I shall not\nleave its lesson, trust me. Let us go!'\n\nStill the Ghost pointed with an unmoved finger to the head.\n\n'I understand you,' Scrooge returned, 'and I would do it if I could. But\nI have not the power, Spirit. I have not the power.'\n\nAgain it seemed to look upon him.\n\n'If there is any person in the town who feels emotion caused by this\nman's death,' said Scrooge, quite agonised, 'show that person to me,\nSpirit, I beseech you!'\n\nThe Phantom spread its dark robe before him for a moment, like a wing;\nand, withdrawing it, revealed a room by daylight, where a mother and her\nchildren were.\n\nShe was expecting some one, and with anxious eagerness; for she walked\nup and down the room, started at every sound, looked out from the\nwindow, glanced at the clock, tried, but in vain, to work with her\nneedle, and could hardly bear the voices of her children in their play.\n\nAt length the long-expected knock was heard. She hurried to the door,\nand met her husband; a man whose face was careworn and depressed, though\nhe was young. There was a remarkable expression in it now, a kind of\nserious delight of which he felt ashamed, and which he struggled to\nrepress.\n\nHe sat down to the dinner that had been hoarding for him by the fire,\nand when she asked him faintly what news (which was not until after a\nlong silence), he appeared embarrassed how to answer.\n\n'Is it good,' she said, 'or bad?' to help him.\n\n'Bad,' he answered.\n\n'We are quite ruined?'\n\n'No. There is hope yet, Caroline.'\n\n'If _he_ relents,' she said, amazed, 'there is! Nothing is past hope, if\nsuch a miracle has happened.'\n\n'He is past relenting,' said her husband. 'He is dead.'\n\nShe was a mild and patient creature, if her face spoke truth; but she\nwas thankful in her soul to hear it, and she said so with clasped hands.\nShe prayed forgiveness the next moment, and was sorry; but the first was\nthe emotion of her heart.\n\n'What the half-drunken woman, whom I told you of last night, said to me\nwhen I tried to see him and obtain a week's delay--and what I thought\nwas a mere excuse to avoid me--turns out to have been quite true. He was\nnot only very ill, but dying, then.'\n\n'To whom will our debt be transferred?'\n\n'I don't know. But, before that time, we shall be ready with the money;\nand even though we were not, it would be bad fortune indeed to find so\nmerciless a creditor in his successor. We may sleep to-night with light\nhearts, Caroline!'\n\nYes. Soften it as they would, their hearts were lighter. The children's\nfaces, hushed and clustered round to hear what they so little\nunderstood, were brighter; and it was a happier house for this man's\ndeath! The only emotion that the Ghost could show him, caused by the\nevent, was one of pleasure.\n\n'Let me see some tenderness connected with a death,' said Scrooge; 'or\nthat dark chamber, Spirit, which we left just now, will be for ever\npresent to me.'\n\nThe Ghost conducted him through several streets familiar to his feet;\nand as they went along, Scrooge looked here and there to find himself,\nbut nowhere was he to be seen. They entered poor Bob Cratchit's house;\nthe dwelling he had visited before; and found the mother and the\nchildren seated round the fire.\n\nQuiet. Very quiet. The noisy little Cratchits were as still as statues\nin one corner, and sat looking up at Peter, who had a book before him.\nThe mother and her daughters were engaged in sewing. But surely they\nwere very quiet!\n\n'\"And he took a child, and set him in the midst of them.\"'\n\nWhere had Scrooge heard those words? He had not dreamed them. The boy\nmust have read them out as he and the Spirit crossed the threshold. Why\ndid he not go on?\n\nThe mother laid her work upon the table, and put her hand up to her\nface.\n\n'The colour hurts my eyes,' she said.\n\nThe colour? Ah, poor Tiny Tim!\n\n'They're better now again,' said Cratchit's wife. 'It makes them weak by\ncandle-light; and I wouldn't show weak eyes to your father when he comes\nhome for the world. It must be near his time.'\n\n'Past it rather,' Peter answered, shutting up his book. 'But I think he\nhas walked a little slower than he used, these few last evenings,\nmother.'\n\nThey were very quiet again. At last she said, and in a steady, cheerful\nvoice, that only faltered once:\n\n'I have known him walk with--I have known him walk with Tiny Tim upon\nhis shoulder very fast indeed.'\n\n'And so have I,' cried Peter. 'Often.'\n\n'And so have I,' exclaimed another. So had all.\n\n'But he was very light to carry,' she resumed, intent upon her work,\n'and his father loved him so, that it was no trouble, no trouble. And\nthere is your father at the door!'\n\nShe hurried out to meet him; and little Bob in his comforter--he had\nneed of it, poor fellow--came in. His tea was ready for him on the hob,\nand they all tried who should help him to it most. Then the two young\nCratchits got upon his knees, and laid, each child, a little cheek\nagainst his face, as if they said, 'Don't mind it, father. Don't be\ngrieved!'\n\nBob was very cheerful with them, and spoke pleasantly to all the family.\nHe looked at the work upon the table, and praised the industry and speed\nof Mrs. Cratchit and the girls. They would be done long before Sunday,\nhe said.\n\n'Sunday! You went to-day, then, Robert?' said his wife.\n\n'Yes, my dear,' returned Bob. 'I wish you could have gone. It would have\ndone you good to see how green a place it is. But you'll see it often. I\npromised him that I would walk there on a Sunday. My little, little\nchild!' cried Bob. 'My little child!'\n\nHe broke down all at once. He couldn't help it. If he could have helped\nit, he and his child would have been farther apart, perhaps, than they\nwere.\n\nHe left the room, and went upstairs into the room above, which was\nlighted cheerfully, and hung with Christmas. There was a chair set close\nbeside the child, and there were signs of some one having been there\nlately. Poor Bob sat down in it, and when he had thought a little and\ncomposed himself, he kissed the little face. He was reconciled to what\nhad happened, and went down again quite happy.\n\nThey drew about the fire, and talked, the girls and mother working\nstill. Bob told them of the extraordinary kindness of Mr. Scrooge's\nnephew, whom he had scarcely seen but once, and who, meeting him in the\nstreet that day, and seeing that he looked a little--'just a little\ndown, you know,' said Bob, inquired what had happened to distress him.\n'On which,' said Bob, 'for he is the pleasantest-spoken gentleman you\never heard, I told him. \"I am heartily sorry for it, Mr. Cratchit,\" he\nsaid, \"and heartily sorry for your good wife.\" By-the-bye, how he ever\nknew _that_ I don't know.'\n\n'Knew what, my dear?'\n\n'Why, that you were a good wife,' replied Bob.\n\n'Everybody knows that,' said Peter.\n\n'Very well observed, my boy!' cried Bob. 'I hope they do. \"Heartily\nsorry,\" he said, \"for your good wife. If I can be of service to you in\nany way,\" he said, giving me his card, \"that's where I live. Pray come\nto me.\" Now, it wasn't,' cried Bob, 'for the sake of anything he might\nbe able to do for us, so much as for his kind way, that this was quite\ndelightful. It really seemed as if he had known our Tiny Tim, and felt\nwith us.'\n\n'I'm sure he's a good soul!' said Mrs. Cratchit.\n\n'You would be sure of it, my dear,' returned Bob, 'if you saw and spoke\nto him. I shouldn't be at all surprised--mark what I say!--if he got\nPeter a better situation.'\n\n'Only hear that, Peter,' said Mrs. Cratchit.\n\n'And then,' cried one of the girls, 'Peter will be keeping company with\nsome one, and setting up for himself.'\n\n'Get along with you!' retorted Peter, grinning.\n\n'It's just as likely as not,' said Bob, 'one of these days; though\nthere's plenty of time for that, my dear. But, however and whenever we\npart from one another, I am sure we shall none of us forget poor Tiny\nTim--shall we--or this first parting that there was among us?'\n\n'Never, father!' cried they all.\n\n'And I know,' said Bob, 'I know, my dears, that when we recollect how\npatient and how mild he was; although he was a little, little child; we\nshall not quarrel easily among ourselves, and forget poor Tiny Tim in\ndoing it.'\n\n'No, never, father!' they all cried again.\n\n'I am very happy,' said little Bob, 'I am very happy!'\n\nMrs. Cratchit kissed him, his daughters kissed him, the two young\nCratchits kissed him, and Peter and himself shook hands. Spirit of Tiny\nTim, thy childish essence was from God!\n\n'Spectre,' said Scrooge, 'something informs me that our parting moment\nis at hand. I know it but I know not how. Tell me what man that was whom\nwe saw lying dead?'\n\nThe Ghost of Christmas Yet to Come conveyed him, as before--though at a\ndifferent time, he thought: indeed there seemed no order in these latter\nvisions, save that they were in the Future--into the resorts of business\nmen, but showed him not himself. Indeed, the Spirit did not stay for\nanything, but went straight on, as to the end just now desired, until\nbesought by Scrooge to tarry for a moment.\n\n'This court,' said Scrooge, 'through which we hurry now, is where my\nplace of occupation is, and has been for a length of time. I see the\nhouse. Let me behold what I shall be in days to come.'\n\nThe Spirit stopped; the hand was pointed elsewhere.\n\n'The house is yonder,' Scrooge exclaimed. 'Why do you point away?'\n\nThe inexorable finger underwent no change.\n\nScrooge hastened to the window of his office, and looked in. It was an\noffice still, but not his. The furniture was not the same, and the\nfigure in the chair was not himself. The Phantom pointed as before.\n\nHe joined it once again, and, wondering why and whither he had gone,\naccompanied it until they reached an iron gate. He paused to look round\nbefore entering.\n\nA churchyard. Here, then, the wretched man, whose name he had now to\nlearn, lay underneath the ground. It was a worthy place. Walled in by\nhouses; overrun by grass and weeds, the growth of vegetation's death,\nnot life; choked up with too much burying; fat with repleted appetite. A\nworthy place!\n\nThe Spirit stood among the graves, and pointed down to One. He advanced\ntowards it trembling. The Phantom was exactly as it had been, but he\ndreaded that he saw new meaning in its solemn shape.\n\n'Before I draw nearer to that stone to which you point,' said Scrooge,\n'answer me one question. Are these the shadows of the things that Will\nbe, or are they shadows of the things that May be only?'\n\nStill the Ghost pointed downward to the grave by which it stood.\n\n'Men's courses will foreshadow certain ends, to which, if persevered in,\nthey must lead,' said Scrooge. 'But if the courses be departed from, the\nends will change. Say it is thus with what you show me!'\n\nThe Spirit was immovable as ever.\n\nScrooge crept towards it, trembling as he went; and, following the\nfinger, read upon the stone of the neglected grave his own name,\nEBENEZER SCROOGE.\n\n'Am I that man who lay upon the bed?' he cried upon his knees.\n\nThe finger pointed from the grave to him, and back again.\n\n'No, Spirit! Oh no, no!'\n\nThe finger still was there.\n\n'Spirit!' he cried, tight clutching at its robe, 'hear me! I am not the\nman I was. I will not be the man I must have been but for this\nintercourse. Why show me this, if I am past all hope?'\n\nFor the first time the hand appeared to shake.\n\n'Good Spirit,' he pursued, as down upon the ground he fell before it,\n'your nature intercedes for me, and pities me. Assure me that I yet may\nchange these shadows you have shown me by an altered life?'\n\nThe kind hand trembled.\n\n'I will honour Christmas in my heart, and try to keep it all the year. I\nwill live in the Past, the Present, and the Future. The Spirits of all\nThree shall strive within me. I will not shut out the lessons that they\nteach. Oh, tell me I may sponge away the writing on this stone!'\n\nIn his agony he caught the spectral hand. It sought to free itself, but\nhe was strong in his entreaty, and detained it. The Spirit stronger yet,\nrepulsed him.\n\nHolding up his hands in a last prayer to have his fate reversed, he saw\nan alteration in the Phantom's hood and dress. It shrunk, collapsed, and\ndwindled down into a bedpost.\n\n\nSTAVE FIVE\n\n\n[Illustration]\n\n\n\n\nTHE END OF IT\n\n\nYes! and the bedpost was his own. The bed was his own, the room was his\nown. Best and happiest of all, the Time before him was his own, to make\namends in!\n\n'I will live in the Past, the Present, and the Future!' Scrooge repeated\nas he scrambled out of bed. 'The Spirits of all Three shall strive\nwithin me. O Jacob Marley! Heaven and the Christmas Time be praised for\nthis! I say it on my knees, old Jacob; on my knees!'\n\nHe was so fluttered and so glowing with his good intentions, that his\nbroken voice would scarcely answer to his call. He had been sobbing\nviolently in his conflict with the Spirit, and his face was wet with\ntears.\n\n'They are not torn down,' cried Scrooge, folding one of his bed-curtains\nin his arms, 'They are not torn down, rings and all. They are here--I am\nhere--the shadows of the things that would have been may be dispelled.\nThey will be. I know they will!'\n\nHis hands were busy with his garments all this time: turning them inside\nout, putting them on upside down, tearing them, mislaying them, making\nthem parties to every kind of extravagance.\n\n'I don't know what to do!' cried Scrooge, laughing and crying in the\nsame breath, and making a perfect Laocoon of himself with his stockings.\n'I am as light as a feather, I am as happy as an angel, I am as merry as\na schoolboy, I am as giddy as a drunken man. A merry Christmas to\neverybody! A happy New Year to all the world! Hallo here! Whoop! Hallo!'\n\nHe had frisked into the sitting-room, and was now standing there,\nperfectly winded.\n\n'There's the saucepan that the gruel was in!' cried Scrooge, starting\noff again, and going round the fireplace. 'There's the door by which the\nGhost of Jacob Marley entered! There's the corner where the Ghost of\nChristmas Present sat! There's the window where I saw the wandering\nSpirits! It's all right, it's all true, it all happened. Ha, ha, ha!'\n\nReally, for a man who had been out of practice for so many years, it was\na splendid laugh, a most illustrious laugh. The father of a long, long\nline of brilliant laughs!\n\n'I don't know what day of the month it is,' said Scrooge. 'I don't know\nhow long I have been among the Spirits. I don't know anything. I'm quite\na baby. Never mind. I don't care. I'd rather be a baby. Hallo! Whoop!\nHallo here!'\n\nHe was checked in his transports by the churches ringing out the\nlustiest peals he had ever heard. Clash, clash, hammer; ding, dong,\nbell! Bell, dong, ding; hammer, clash, clash! Oh, glorious, glorious!\n\nRunning to the window, he opened it, and put out his head. No fog, no\nmist; clear, bright, jovial, stirring, cold; cold, piping for the blood\nto dance to; golden sunlight; heavenly sky; sweet fresh air; merry\nbells. Oh, glorious! Glorious!\n\n'What's to-day?' cried Scrooge, calling downward to a boy in Sunday\nclothes, who perhaps had loitered in to look about him.\n\n'EH?' returned the boy with all his might of wonder.\n\n'What's to-day, my fine fellow?' said Scrooge.\n\n'To-day!' replied the boy. 'Why, CHRISTMAS DAY.'\n\n'It's Christmas Day!' said Scrooge to himself. 'I haven't missed it. The\nSpirits have done it all in one night. They can do anything they like.\nOf course they can. Of course they can. Hallo, my fine fellow!'\n\n'Hallo!' returned the boy.\n\n'Do you know the poulterer's in the next street but one, at the corner?'\nScrooge inquired.\n\n'I should hope I did,' replied the lad.\n\n'An intelligent boy!' said Scrooge. 'A remarkable boy! Do you know\nwhether they've sold the prize turkey that was hanging up there?--Not\nthe little prize turkey: the big one?'\n\n'What! the one as big as me?' returned the boy.\n\n'What a delightful boy!' said Scrooge. 'It's a pleasure to talk to him.\nYes, my buck!'\n\n'It's hanging there now,' replied the boy.\n\n'Is it?' said Scrooge. 'Go and buy it.'\n\n'Walk-ER!' exclaimed the boy.\n\n'No, no,' said Scrooge. 'I am in earnest. Go and buy it, and tell 'em to\nbring it here, that I may give them the directions where to take it.\nCome back with the man, and I'll give you a shilling. Come back with him\nin less than five minutes, and I'll give you half-a-crown!'\n\nThe boy was off like a shot. He must have had a steady hand at a trigger\nwho could have got a shot off half as fast.\n\n'I'll send it to Bob Cratchit's,' whispered Scrooge, rubbing his hands,\nand splitting with a laugh. 'He shan't know who sends it. It's twice the\nsize of Tiny Tim. Joe Miller never made such a joke as sending it to\nBob's will be!'\n\nThe hand in which he wrote the address was not a steady one; but write\nit he did, somehow, and went downstairs to open the street-door, ready\nfor the coming of the poulterer's man. As he stood there, waiting his\narrival, the knocker caught his eye.\n\n'I shall love it as long as I live!' cried Scrooge, patting it with his\nhand. 'I scarcely ever looked at it before. What an honest expression it\nhas in its face! It's a wonderful knocker!--Here's the turkey. Hallo!\nWhoop! How are you! Merry Christmas!'\n\nIt _was_ a turkey! He never could have stood upon his legs, that bird.\nHe would have snapped 'em short off in a minute, like sticks of\nsealing-wax.\n\n'Why, it's impossible to carry that to Camden Town,' said Scrooge. 'You\nmust have a cab.'\n\nThe chuckle with which he said this, and the chuckle with which he paid\nfor the turkey, and the chuckle with which he paid for the cab, and the\nchuckle with which he recompensed the boy, were only to be exceeded by\nthe chuckle with which he sat down breathless in his chair again, and\nchuckled till he cried.\n\nShaving was not an easy task, for his hand continued to shake very much;\nand shaving requires attention, even when you don't dance while you are\nat it. But if he had cut the end of his nose off, he would have put a\npiece of sticking-plaster over it, and been quite satisfied.\n\nHe dressed himself 'all in his best,' and at last got out into the\nstreets. The people were by this time pouring forth, as he had seen them\nwith the Ghost of Christmas Present; and, walking with his hands behind\nhim, Scrooge regarded every one with a delighted smile. He looked so\nirresistibly pleasant, in a word, that three or four good-humoured\nfellows said, 'Good-morning, sir! A merry Christmas to you!' And Scrooge\nsaid often afterwards that, of all the blithe sounds he had ever heard,\nthose were the blithest in his ears.\n\nHe had not gone far when, coming on towards him, he beheld the portly\ngentleman who had walked into his counting-house the day before, and\nsaid, 'Scrooge and Marley's, I believe?' It sent a pang across his heart\nto think how this old gentleman would look upon him when they met; but\nhe knew what path lay straight before him, and he took it.\n\n'My dear sir,' said Scrooge, quickening his pace, and taking the old\ngentleman by both his hands, 'how do you do? I hope you succeeded\nyesterday. It was very kind of you. A merry Christmas to you, sir!'\n\n'Mr. Scrooge?'\n\n'Yes,' said Scrooge. 'That is my name, and I fear it may not be pleasant\nto you. Allow me to ask your pardon. And will you have the goodness----'\nHere Scrooge whispered in his ear.\n\n'Lord bless me!' cried the gentleman, as if his breath were taken away.\n'My dear Mr. Scrooge, are you serious?'\n\n'If you please,' said Scrooge. 'Not a farthing less. A great many\nback-payments are included in it, I assure you. Will you do me that\nfavour?'\n\n'My dear sir,' said the other, shaking hands with him, 'I don't know\nwhat to say to such munifi----'\n\n'Don't say anything, please,' retorted Scrooge. 'Come and see me. Will\nyou come and see me?'\n\n'I will!' cried the old gentleman. And it was clear he meant to do it.\n\n'Thankee,' said Scrooge. 'I am much obliged to you. I thank you fifty\ntimes. Bless you!'\n\nHe went to church, and walked about the streets, and watched the people\nhurrying to and fro, and patted the children on the head, and questioned\nbeggars, and looked down into the kitchens of houses, and up to the\nwindows; and found that everything could yield him pleasure. He had\nnever dreamed that any walk--that anything--could give him so much\nhappiness. In the afternoon he turned his steps towards his nephew's\nhouse.\n\nHe passed the door a dozen times before he had the courage to go up and\nknock. But he made a dash and did it.\n\n'Is your master at home, my dear?' said Scrooge to the girl. 'Nice girl!\nVery.'\n\n'Yes, sir.'\n\n'Where is he, my love?' said Scrooge.\n\n'He's in the dining-room, sir, along with mistress. I'll show you\nupstairs, if you please.'\n\n'Thankee. He knows me,' said Scrooge, with his hand already on the\ndining-room lock. 'I'll go in here, my dear.'\n\nHe turned it gently, and sidled his face in round the door. They were\nlooking at the table (which was spread out in great array); for these\nyoung housekeepers are always nervous on such points, and like to see\nthat everything is right.\n\n'Fred!' said Scrooge.\n\nDear heart alive, how his niece by marriage started! Scrooge had\nforgotten, for the moment, about her sitting in the corner with the\nfootstool, or he wouldn't have done it on any account.\n\n'Why, bless my soul!' cried Fred, 'who's that?'\n\n[Illustration: _\"It's I, your uncle Scrooge. I have come to dinner. Will\nyou let me in, Fred?\"_]\n\n'It's I. Your uncle Scrooge. I have come to dinner. Will you let me in,\nFred?'\n\nLet him in! It is a mercy he didn't shake his arm off. He was at home in\nfive minutes. Nothing could be heartier. His niece looked just the same.\nSo did Topper when _he_ came. So did the plump sister when _she_ came.\nSo did every one when _they_ came. Wonderful party, wonderful games,\nwonderful unanimity, won-der-ful happiness!\n\nBut he was early at the office next morning. Oh, he was early there! If\nhe could only be there first, and catch Bob Cratchit coming late! That\nwas the thing he had set his heart upon.\n\nAnd he did it; yes, he did! The clock struck nine. No Bob. A quarter\npast. No Bob. He was full eighteen minutes and a half behind his time.\nScrooge sat with his door wide open, that he might see him come into the\ntank.\n\nHis hat was off before he opened the door; his comforter too. He was on\nhis stool in a jiffy, driving away with his pen, as if he were trying to\novertake nine o'clock.\n\n'Hallo!' growled Scrooge in his accustomed voice as near as he could\nfeign it. 'What do you mean by coming here at this time of day?'\n\n'I am very sorry, sir,' said Bob. 'I _am_ behind my time.'\n\n'You are!' repeated Scrooge. 'Yes, I think you are. Step this way, sir,\nif you please.'\n\n'It's only once a year, sir,' pleaded Bob, appearing from the tank. 'It\nshall not be repeated. I was making rather merry yesterday, sir.'\n\n'Now, I'll tell you what, my friend,' said Scrooge. 'I am not going to\nstand this sort of thing any longer. And therefore,' he continued,\nleaping from his stool, and giving Bob such a dig in the waistcoat that\nhe staggered back into the tank again--'and therefore I am about to\nraise your salary!'\n\nBob trembled, and got a little nearer to the ruler. He had a momentary\nidea of knocking Scrooge down with it, holding him, and calling to the\npeople in the court for help and a strait-waistcoat.\n\n'A merry Christmas, Bob!' said Scrooge, with an earnestness that could\nnot be mistaken, as he clapped him on the back. 'A merrier Christmas,\nBob, my good fellow, than I have given you for many a year! I'll raise\nyour salary, and endeavour to assist your struggling family, and we will\ndiscuss your affairs this very afternoon, over a Christmas bowl of\nsmoking bishop, Bob! Make up the fires and buy another coal-scuttle\nbefore you dot another i, Bob Cratchit!'\n\n[Illustration: _\"Now, I'll tell you what, my friend,\" said Scrooge. \"I\nam not going to stand this sort of thing any longer.\"_]\n\nScrooge was better than his word. He did it all, and infinitely more;\nand to Tiny Tim, who did NOT die, he was a second father. He became as\ngood a friend, as good a master, and as good a man as the good old\nCity knew, or any other good old city, town, or borough in the good old\nworld. Some people laughed to see the alteration in him, but he let them\nlaugh, and little heeded them; for he was wise enough to know that\nnothing ever happened on this globe, for good, at which some people did\nnot have their fill of laughter in the outset; and knowing that such as\nthese would be blind anyway, he thought it quite as well that they\nshould wrinkle up their eyes in grins as have the malady in less\nattractive forms. His own heart laughed, and that was quite enough for\nhim.\n\nHe had no further intercourse with Spirits, but lived upon the\nTotal-Abstinence Principle ever afterwards; and it was always said of\nhim that he knew how to keep Christmas well, if any man alive possessed\nthe knowledge. May that be truly said of us, and all of us! And so, as\nTiny Tim observed, God bless Us, Every One!\n\n[Illustration]\n\n+---------------------------------------------------------------+\n|Transcriber's note: The Contents were added by the transcriber.|\n+---------------------------------------------------------------+\n\n\n\n\n\n\n\n*** END OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n \n\nUpdated editions will replace the previous one—the old editions will\nbe renamed.\n\nCreating the works from print editions not protected by U.S. copyright\nlaw means that no one owns a United States copyright in these works,\nso the Foundation (and you!) can copy and distribute it in the United\nStates without permission and without paying copyright\nroyalties. Special rules, set forth in the General Terms of Use part\nof this license, apply to copying and distributing Project\nGutenberg™ electronic works to protect the PROJECT GUTENBERG™\nconcept and trademark. Project Gutenberg is a registered trademark,\nand may not be used if you charge for an eBook, except by following\nthe terms of the trademark license, including paying royalties for use\nof the Project Gutenberg trademark. If you do not charge anything for\ncopies of this eBook, complying with the trademark license is very\neasy. You may use this eBook for nearly any purpose such as creation\nof derivative works, reports, performances and research. Project\nGutenberg eBooks may be modified and printed and given away—you may\ndo practically ANYTHING in the United States with eBooks not protected\nby U.S. copyright law. Redistribution is subject to the trademark\nlicense, especially commercial redistribution.\n\n\nSTART: FULL LICENSE\n\nTHE FULL PROJECT GUTENBERG LICENSE\n\nPLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK\n\nTo protect the Project Gutenberg™ mission of promoting the free\ndistribution of electronic works, by using or distributing this work\n(or any other work associated in any way with the phrase “Project\nGutenberg”), you agree to comply with all the terms of the Full\nProject Gutenberg™ License available with this file or online at\nwww.gutenberg.org/license.\n\nSection 1. General Terms of Use and Redistributing Project Gutenberg™\nelectronic works\n\n1.A. By reading or using any part of this Project Gutenberg™\nelectronic work, you indicate that you have read, understand, agree to\nand accept all the terms of this license and intellectual property\n(trademark/copyright) agreement. If you do not agree to abide by all\nthe terms of this agreement, you must cease using and return or\ndestroy all copies of Project Gutenberg™ electronic works in your\npossession. If you paid a fee for obtaining a copy of or access to a\nProject Gutenberg™ electronic work and you do not agree to be bound\nby the terms of this agreement, you may obtain a refund from the person\nor entity to whom you paid the fee as set forth in paragraph 1.E.8.\n\n1.B. “Project Gutenberg” is a registered trademark. It may only be\nused on or associated in any way with an electronic work by people who\nagree to be bound by the terms of this agreement. There are a few\nthings that you can do with most Project Gutenberg™ electronic works\neven without complying with the full terms of this agreement. See\nparagraph 1.C below. There are a lot of things you can do with Project\nGutenberg™ electronic works if you follow the terms of this\nagreement and help preserve free future access to Project Gutenberg™\nelectronic works. See paragraph 1.E below.\n\n1.C. The Project Gutenberg Literary Archive Foundation (“the\nFoundation” or PGLAF), owns a compilation copyright in the collection\nof Project Gutenberg™ electronic works. Nearly all the individual\nworks in the collection are in the public domain in the United\nStates. If an individual work is unprotected by copyright law in the\nUnited States and you are located in the United States, we do not\nclaim a right to prevent you from copying, distributing, performing,\ndisplaying or creating derivative works based on the work as long as\nall references to Project Gutenberg are removed. Of course, we hope\nthat you will support the Project Gutenberg™ mission of promoting\nfree access to electronic works by freely sharing Project Gutenberg™\nworks in compliance with the terms of this agreement for keeping the\nProject Gutenberg™ name associated with the work. You can easily\ncomply with the terms of this agreement by keeping this work in the\nsame format with its attached full Project Gutenberg™ License when\nyou share it without charge with others.\n\n1.D. The copyright laws of the place where you are located also govern\nwhat you can do with this work. Copyright laws in most countries are\nin a constant state of change. If you are outside the United States,\ncheck the laws of your country in addition to the terms of this\nagreement before downloading, copying, displaying, performing,\ndistributing or creating derivative works based on this work or any\nother Project Gutenberg™ work. The Foundation makes no\nrepresentations concerning the copyright status of any work in any\ncountry other than the United States.\n\n1.E. Unless you have removed all references to Project Gutenberg:\n\n1.E.1. The following sentence, with active links to, or other\nimmediate access to, the full Project Gutenberg™ License must appear\nprominently whenever any copy of a Project Gutenberg™ work (any work\non which the phrase “Project Gutenberg” appears, or with which the\nphrase “Project Gutenberg” is associated) is accessed, displayed,\nperformed, viewed, copied or distributed:\n\n This eBook is for the use of anyone anywhere in the United States and most\n other parts of the world at no cost and with almost no restrictions\n whatsoever. You may copy it, give it away or re-use it under the terms\n of the Project Gutenberg License included with this eBook or online\n at www.gutenberg.org. If you\n are not located in the United States, you will have to check the laws\n of the country where you are located before using this eBook.\n \n1.E.2. If an individual Project Gutenberg™ electronic work is\nderived from texts not protected by U.S. copyright law (does not\ncontain a notice indicating that it is posted with permission of the\ncopyright holder), the work can be copied and distributed to anyone in\nthe United States without paying any fees or charges. If you are\nredistributing or providing access to a work with the phrase “Project\nGutenberg” associated with or appearing on the work, you must comply\neither with the requirements of paragraphs 1.E.1 through 1.E.7 or\nobtain permission for the use of the work and the Project Gutenberg™\ntrademark as set forth in paragraphs 1.E.8 or 1.E.9.\n\n1.E.3. If an individual Project Gutenberg™ electronic work is posted\nwith the permission of the copyright holder, your use and distribution\nmust comply with both paragraphs 1.E.1 through 1.E.7 and any\nadditional terms imposed by the copyright holder. Additional terms\nwill be linked to the Project Gutenberg™ License for all works\nposted with the permission of the copyright holder found at the\nbeginning of this work.\n\n1.E.4. Do not unlink or detach or remove the full Project Gutenberg™\nLicense terms from this work, or any files containing a part of this\nwork or any other work associated with Project Gutenberg™.\n\n1.E.5. Do not copy, display, perform, distribute or redistribute this\nelectronic work, or any part of this electronic work, without\nprominently displaying the sentence set forth in paragraph 1.E.1 with\nactive links or immediate access to the full terms of the Project\nGutenberg™ License.\n\n1.E.6. You may convert to and distribute this work in any binary,\ncompressed, marked up, nonproprietary or proprietary form, including\nany word processing or hypertext form. However, if you provide access\nto or distribute copies of a Project Gutenberg™ work in a format\nother than “Plain Vanilla ASCII” or other format used in the official\nversion posted on the official Project Gutenberg™ website\n(www.gutenberg.org), you must, at no additional cost, fee or expense\nto the user, provide a copy, a means of exporting a copy, or a means\nof obtaining a copy upon request, of the work in its original “Plain\nVanilla ASCII” or other form. Any alternate format must include the\nfull Project Gutenberg™ License as specified in paragraph 1.E.1.\n\n1.E.7. Do not charge a fee for access to, viewing, displaying,\nperforming, copying or distributing any Project Gutenberg™ works\nunless you comply with paragraph 1.E.8 or 1.E.9.\n\n1.E.8. You may charge a reasonable fee for copies of or providing\naccess to or distributing Project Gutenberg™ electronic works\nprovided that:\n\n • You pay a royalty fee of 20% of the gross profits you derive from\n the use of Project Gutenberg™ works calculated using the method\n you already use to calculate your applicable taxes. The fee is owed\n to the owner of the Project Gutenberg™ trademark, but he has\n agreed to donate royalties under this paragraph to the Project\n Gutenberg Literary Archive Foundation. Royalty payments must be paid\n within 60 days following each date on which you prepare (or are\n legally required to prepare) your periodic tax returns. Royalty\n payments should be clearly marked as such and sent to the Project\n Gutenberg Literary Archive Foundation at the address specified in\n Section 4, “Information about donations to the Project Gutenberg\n Literary Archive Foundation.”\n \n • You provide a full refund of any money paid by a user who notifies\n you in writing (or by e-mail) within 30 days of receipt that s/he\n does not agree to the terms of the full Project Gutenberg™\n License. You must require such a user to return or destroy all\n copies of the works possessed in a physical medium and discontinue\n all use of and all access to other copies of Project Gutenberg™\n works.\n \n • You provide, in accordance with paragraph 1.F.3, a full refund of\n any money paid for a work or a replacement copy, if a defect in the\n electronic work is discovered and reported to you within 90 days of\n receipt of the work.\n \n • You comply with all other terms of this agreement for free\n distribution of Project Gutenberg™ works.\n \n\n1.E.9. If you wish to charge a fee or distribute a Project\nGutenberg™ electronic work or group of works on different terms than\nare set forth in this agreement, you must obtain permission in writing\nfrom the Project Gutenberg Literary Archive Foundation, the manager of\nthe Project Gutenberg™ trademark. Contact the Foundation as set\nforth in Section 3 below.\n\n1.F.\n\n1.F.1. Project Gutenberg volunteers and employees expend considerable\neffort to identify, do copyright research on, transcribe and proofread\nworks not protected by U.S. copyright law in creating the Project\nGutenberg™ collection. Despite these efforts, Project Gutenberg™\nelectronic works, and the medium on which they may be stored, may\ncontain “Defects,” such as, but not limited to, incomplete, inaccurate\nor corrupt data, transcription errors, a copyright or other\nintellectual property infringement, a defective or damaged disk or\nother medium, a computer virus, or computer codes that damage or\ncannot be read by your equipment.\n\n1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the “Right\nof Replacement or Refund” described in paragraph 1.F.3, the Project\nGutenberg Literary Archive Foundation, the owner of the Project\nGutenberg™ trademark, and any other party distributing a Project\nGutenberg™ electronic work under this agreement, disclaim all\nliability to you for damages, costs and expenses, including legal\nfees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT\nLIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE\nPROVIDED IN PARAGRAPH 1.F.3. YOU AGREE THAT THE FOUNDATION, THE\nTRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE\nLIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR\nINCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH\nDAMAGE.\n\n1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a\ndefect in this electronic work within 90 days of receiving it, you can\nreceive a refund of the money (if any) you paid for it by sending a\nwritten explanation to the person you received the work from. If you\nreceived the work on a physical medium, you must return the medium\nwith your written explanation. The person or entity that provided you\nwith the defective work may elect to provide a replacement copy in\nlieu of a refund. If you received the work electronically, the person\nor entity providing it to you may choose to give you a second\nopportunity to receive the work electronically in lieu of a refund. If\nthe second copy is also defective, you may demand a refund in writing\nwithout further opportunities to fix the problem.\n\n1.F.4. Except for the limited right of replacement or refund set forth\nin paragraph 1.F.3, this work is provided to you ‘AS-IS’, WITH NO\nOTHER WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT\nLIMITED TO WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PURPOSE.\n\n1.F.5. Some states do not allow disclaimers of certain implied\nwarranties or the exclusion or limitation of certain types of\ndamages. If any disclaimer or limitation set forth in this agreement\nviolates the law of the state applicable to this agreement, the\nagreement shall be interpreted to make the maximum disclaimer or\nlimitation permitted by the applicable state law. The invalidity or\nunenforceability of any provision of this agreement shall not void the\nremaining provisions.\n\n1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the\ntrademark owner, any agent or employee of the Foundation, anyone\nproviding copies of Project Gutenberg™ electronic works in\naccordance with this agreement, and any volunteers associated with the\nproduction, promotion and distribution of Project Gutenberg™\nelectronic works, harmless from all liability, costs and expenses,\nincluding legal fees, that arise directly or indirectly from any of\nthe following which you do or cause to occur: (a) distribution of this\nor any Project Gutenberg™ work, (b) alteration, modification, or\nadditions or deletions to any Project Gutenberg™ work, and (c) any\nDefect you cause.\n\nSection 2. Information about the Mission of Project Gutenberg™\n\nProject Gutenberg™ is synonymous with the free distribution of\nelectronic works in formats readable by the widest variety of\ncomputers including obsolete, old, middle-aged and new computers. It\nexists because of the efforts of hundreds of volunteers and donations\nfrom people in all walks of life.\n\nVolunteers and financial support to provide volunteers with the\nassistance they need are critical to reaching Project Gutenberg™’s\ngoals and ensuring that the Project Gutenberg™ collection will\nremain freely available for generations to come. In 2001, the Project\nGutenberg Literary Archive Foundation was created to provide a secure\nand permanent future for Project Gutenberg™ and future\ngenerations. To learn more about the Project Gutenberg Literary\nArchive Foundation and how your efforts and donations can help, see\nSections 3 and 4 and the Foundation information page at www.gutenberg.org.\n\nSection 3. Information about the Project Gutenberg Literary Archive Foundation\n\nThe Project Gutenberg Literary Archive Foundation is a non-profit\n501(c)(3) educational corporation organized under the laws of the\nstate of Mississippi and granted tax exempt status by the Internal\nRevenue Service. The Foundation’s EIN or federal tax identification\nnumber is 64-6221541. Contributions to the Project Gutenberg Literary\nArchive Foundation are tax deductible to the full extent permitted by\nU.S. federal laws and your state’s laws.\n\nThe Foundation’s business office is located at 809 North 1500 West,\nSalt Lake City, UT 84116, (801) 596-1887. Email contact links and up\nto date contact information can be found at the Foundation’s website\nand official page at www.gutenberg.org/contact\n\nSection 4. Information about Donations to the Project Gutenberg\nLiterary Archive Foundation\n\nProject Gutenberg™ depends upon and cannot survive without widespread\npublic support and donations to carry out its mission of\nincreasing the number of public domain and licensed works that can be\nfreely distributed in machine-readable form accessible by the widest\narray of equipment including outdated equipment. Many small donations\n($1 to $5,000) are particularly important to maintaining tax exempt\nstatus with the IRS.\n\nThe Foundation is committed to complying with the laws regulating\ncharities and charitable donations in all 50 states of the United\nStates. Compliance requirements are not uniform and it takes a\nconsiderable effort, much paperwork and many fees to meet and keep up\nwith these requirements. We do not solicit donations in locations\nwhere we have not received written confirmation of compliance. To SEND\nDONATIONS or determine the status of compliance for any particular state\nvisit www.gutenberg.org/donate.\n\nWhile we cannot and do not solicit contributions from states where we\nhave not met the solicitation requirements, we know of no prohibition\nagainst accepting unsolicited donations from donors in such states who\napproach us with offers to donate.\n\nInternational donations are gratefully accepted, but we cannot make\nany statements concerning tax treatment of donations received from\noutside the United States. U.S. laws alone swamp our small staff.\n\nPlease check the Project Gutenberg web pages for current donation\nmethods and addresses. Donations are accepted in a number of other\nways including checks, online payments and credit card donations. To\ndonate, please visit: www.gutenberg.org/donate.\n\nSection 5. General Information About Project Gutenberg™ electronic works\n\nProfessor Michael S. Hart was the originator of the Project\nGutenberg™ concept of a library of electronic works that could be\nfreely shared with anyone. For forty years, he produced and\ndistributed Project Gutenberg™ eBooks with only a loose network of\nvolunteer support.\n\nProject Gutenberg™ eBooks are often created from several printed\neditions, all of which are confirmed as not protected by copyright in\nthe U.S. unless a copyright notice is included. Thus, we do not\nnecessarily keep eBooks in compliance with any particular paper\nedition.\n\nMost people start at our website which has the main PG search\nfacility: www.gutenberg.org.\n\nThis website includes information about Project Gutenberg™,\nincluding how to make donations to the Project Gutenberg Literary\nArchive Foundation, how to help produce our new eBooks, and how to\nsubscribe to our email newsletter to hear about new eBooks."
+ }
+}
\ No newline at end of file
diff --git a/neo4jWorkDir/kv_store_llm_response_cache.json b/neo4jWorkDir/kv_store_llm_response_cache.json
new file mode 100644
index 00000000..5758b7e9
--- /dev/null
+++ b/neo4jWorkDir/kv_store_llm_response_cache.json
@@ -0,0 +1,354 @@
+{
+ "ef7a5529fddead830ae9f9fdfb9e47cf": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is the main character, portrayed as a solitary, cold-hearted man experiencing supernatural events related to Marley’s ghost.\")##\n(\"entity\"<|>\"Marley's Ghost\"<|>\"event\"<|>\"Marley's Ghost represents the supernatural apparition of Marley, who has been dead for seven years and symbolizes regret and warning to Scrooge.\")##\n(\"entity\"<|>\"The House\"<|>\"geo\"<|>\"The House is the setting where Scrooge experiences the haunting, with a description of its features contributing to the eerie atmosphere.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley's Ghost\"<|>\"Scrooge is haunted by Marley's Ghost, which confronts him as a part of a supernatural occurrence meant to incite fear and reflection on his life choices.\"<|>\"supernatural confrontation, regret\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The House\"<|>\"Scrooge's experiences with Marley's Ghost occur within The House, which plays a crucial role in the haunting atmosphere.\"<|>\"haunting, setting influence\"<|>8)##\n(\"content_keywords\"<|>\"haunting, supernatural, regret, isolation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "09c3e65acef33139a3334086692f6727": {
+ "return": "(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is a ghost who appears to Scrooge, having been his business partner in life, and is now bound by chains that signify his earthly ties and regrets.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a cold-hearted, skeptical man who encounters the ghost of Jacob Marley and struggles to believe in the supernatural.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"event\"<|>\"The Ghost is a spectral manifestation of Jacob Marley, appearing to communicate a vital message to Scrooge about the spiritual consequences of his life choices.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Scrooge confronts Jacob Marley, his former business partner, who returns as a ghost to influence Scrooge's perspective on life and his actions.\"<|>\"supernatural encounter, past ties\"<|>9)##\n(\"relationship\"<|>\"The Ghost\"<|>\"Jacob Marley\"<|>\"The Ghost is the spectral form of Jacob Marley, representing his ongoing existence and his desire to communicate a warning to Scrooge.\"<|>\"spectral representation, message delivery\"<|>10)##\n(\"content_keywords\"<|>\"supernatural, redemption, spectral message, skepticism\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "ac87fabbb84a6d5cca16454aa74afa60": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a miserly character, cold-hearted and solitary, known for his disdain for Christmas and lack of human sympathy.\")##\n(\"entity\"<|>\"Scrooge's Nephew\"<|>\"person\"<|>\"Scrooge's Nephew is a cheerful and optimistic character who represents the spirit of Christmas, opposing Scrooge's views.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is portrayed as a festive time that Scrooge dismisses, emphasizing the contrast between joy and his miserable attitude.\")##\n(\"entity\"<|>\"The Counting-House\"<|>\"organization\"<|>\"The Counting-House is where Scrooge works and is characterized by its cold and unwelcoming environment.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Scrooge's Nephew\"<|>\"Scrooge's Nephew tries to cheer Scrooge up and promote the joy of Christmas, but is met with rejection and negativity.\"<|>\"family dynamics, opposing views\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge expresses disdain for Christmas, viewing it as a burden rather than a celebration.\"<|>\"conflict, holiday spirit\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Counting-House\"<|>\"Scrooge's work environment reflects his personality, being cold and uninviting, mirroring his discontent with life.\"<|>\"work environment, personality reflection\"<|>7)##\n(\"content_keywords\"<|>\"disdain for Christmas, family conflict, isolation, work ethic\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "00d7b6433965192b078c10dfaf4206e0": {
+ "return": "(\"entity\"<|>\"Ebenezer Scrooge\"<|>\"person\"<|>\"Ebenezer Scrooge is a protagonist who experiences confusion and fear regarding the passage of time and supernatural events, especially concerned about a ghostly visitation.\")##\n(\"entity\"<|>\"Marley's Ghost\"<|>\"event\"<|>\"Marley's Ghost is a spectral visitation that deeply troubles Scrooge and causes him great perplexity, leading him to question the reality of his experiences.\")##\n(\"entity\"<|>\"The Clock\"<|>\"technology\"<|>\"The Clock is a device that malfunctions, contributing to Scrooge's confusion about time and reality in the narrative.\")##\n(\"entity\"<|>\"The Visitor\"<|>\"person\"<|>\"The Visitor is a mysterious figure that appears to Scrooge; it possesses qualities of both youth and age, signifying the blend of time and supernatural elements.\")##\n(\"relationship\"<|>\"Ebenezer Scrooge\"<|>\"Marley's Ghost\"<|>\"Scrooge is troubled by Marley's Ghost, which leads him to question the nature of his reality and whether he has been dreaming.\"<|>\"supernatural influence, distress\"<|>8)##\n(\"relationship\"<|>\"Ebenezer Scrooge\"<|>\"The Visitor\"<|>\"Scrooge comes face to face with The Visitor, indicating a significant encounter that affects his understanding of life and death.\"<|>\"supernatural encounter, revelation\"<|>9)##\n(\"relationship\"<|>\"Ebenezer Scrooge\"<|>\"The Clock\"<|>\"Scrooge's confusion about time is exacerbated by The Clock's malfunction, heightening his sense of disorientation.\"<|>\"temporal confusion, malfunction\"<|>7)##\n(\"content_keywords\"<|>\"supernatural, time, confusion, spectral visitation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "4498cd83d1404d5f1a79e9d76791f24a": {
+ "return": "(\"entity\"<|>\"Fezziwig\"<|>\"person\"<|>\"Fezziwig is depicted as a joyful character who dances with his wife and embodies the spirit of Christmas.\")##\n(\"entity\"<|>\"Mrs. Fezziwig\"<|>\"person\"<|>\"Mrs. Fezziwig is the wife of Fezziwig, participating in the dance, representing companionship and festive cheer.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character, known for his miserly nature and single-minded focus on business, who also mourns Marley.\")##\n(\"entity\"<|>\"Marley\"<|>\"person\"<|>\"Marley is Scrooge's deceased partner whose death is emphasized, and who impacts Scrooge's story through memories and reflections.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is the season during which festive activities and celebrations are occurring, including dances and gatherings.\")##\n(\"relationship\"<|>\"Fezziwig\"<|>\"Mrs. Fezziwig\"<|>\"Fezziwig and Mrs. Fezziwig dance together, showcasing their joyful relationship and the spirit of togetherness during Christmas.\"<|>\"festivity, companionship\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley\"<|>\"Scrooge is deeply affected by Marley's death, illustrated through his role as the sole executor and mourner of Marley.\"<|>\"mourning, partnership\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's character contrasts sharply with the joy of Christmas, highlighting his disdain for the festive season.\"<|>\"contrast, seasonal significance\"<|>7)##\n(\"content_keywords\"<|>\"festivity, mourning, companionship, Christmas spirit\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "b6d217776b520bf85bf936916835d0b1": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who appears to have a complex relationship with his past, showing a mix of longing and resentment as he encounters the Spirit.\")##\n(\"entity\"<|>\"Ghost of Christmas Past\"<|>\"event\"<|>\"The Ghost of Christmas Past is a supernatural entity that aids Scrooge in reflecting on his past experiences and emotions.\")##\n(\"entity\"<|>\"The City\"<|>\"geo\"<|>\"The City, from which Scrooge is transported, represents his past life and is contrasted with the open country road that signifies his memories.\")##\n(\"entity\"<|>\"Market-Town\"<|>\"geo\"<|>\"The Market-Town is a nostalgic location from Scrooge's youth, evoking memories and emotions tied to his childhood.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Ghost of Christmas Past\"<|>\"Scrooge engages with the Ghost of Christmas Past to explore his earlier life, seeking insight into his current state.\"<|>\"reflection, past influence\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Market-Town\"<|>\"Scrooge recognizes the Market-Town, eliciting fond memories from his childhood and emphasizing his emotional response to the past.\"<|>\"nostalgia, memory\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Past\"<|>\"The City\"<|>\"The Ghost of Christmas Past helps transport Scrooge from The City to a memory of his past, highlighting the contrast between present and memory.\"<|>\"transportation, memory\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The City\"<|>\"Scrooge's relationship with The City is marked by detachment, representing his lost connection to his past self and experiences.\"<|>\"disconnection, personal evolution\"<|>5)##\n(\"content_keywords\"<|>\"reflection, nostalgia, personal evolution, memory\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "2b05f17e6e7f73aa7dc5f2c4d1ef948c": {
+ "return": "(\"entity\"<|>\"Ebenezer\"<|>\"person\"<|>\"Ebenezer is a character who reflects on his past with fondness, remembering his former self and his connection to Dick Wilkins.\")##\n(\"entity\"<|>\"Dick Wilkins\"<|>\"person\"<|>\"Dick Wilkins is Ebenezer's fellow apprentice, with whom he shared a strong attachment during their youth.\")##\n(\"entity\"<|>\"Fezziwig\"<|>\"organization\"<|>\"Fezziwig is a figure representing a cheerful and jovial employer who organizes lively Christmas festivities for his employees.\")##\n(\"entity\"<|>\"Mrs. Fezziwig\"<|>\"person\"<|>\"Mrs. Fezziwig is the wife of Fezziwig, joining him in celebrations and dances, embodying warmth and partnership.\")##\n(\"entity\"<|>\"The Dance\"<|>\"event\"<|>\"The Dance organized by Fezziwig is a festive gathering involving various participants, showcasing joy and celebration during Christmas Eve.\")##\n(\"entity\"<|>\"Christmas Eve\"<|>\"event\"<|>\"Christmas Eve is the night before Christmas, serving as the backdrop for the lively celebrations and dances hosted by Fezziwig.\")##\n(\"relationship\"<|>\"Ebenezer\"<|>\"Dick Wilkins\"<|>\"Ebenezer reflects on his fond memories and attachment to Dick Wilkins from their days as apprentices.\"<|>\"nostalgia, apprenticeship\"<|>8)##\n(\"relationship\"<|>\"Fezziwig\"<|>\"The Dance\"<|>\"Fezziwig organizes The Dance, impacting the morale and enjoyment of his employees on Christmas Eve.\"<|>\"celebration, leadership\"<|>9)##\n(\"relationship\"<|>\"Mrs. Fezziwig\"<|>\"Fezziwig\"<|>\"Mrs. Fezziwig partners with Fezziwig in the festivities, highlighting their companionship during the celebrations.\"<|>\"partnership, celebration\"<|>8)##\n(\"relationship\"<|>\"The Dance\"<|>\"Christmas Eve\"<|>\"The Dance takes place on Christmas Eve, marking a significant festive event in the story.\"<|>\"festivity, holiday celebration\"<|>10)##\n(\"content_keywords\"<|>\"nostalgia, celebration, Christmas, companionship\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "cc95e0e21f91f9d1b6f767e6b0166221": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who engages in a profound conversation with a ghost about the nature of existence and remorse, showcasing his struggle with his past actions.\")##\n(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is the ghost of Scrooge's former business partner, who seeks to impart a lesson about the consequences of a life lived without compassion or connection to humanity.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"event\"<|>\"The Ghost represents the spirit of Jacob Marley, warning Scrooge about the aftereffects of neglecting human virtue and kindness during life.\")##\n(\"entity\"<|>\"Mankind\"<|>\"concept\"<|>\"Mankind signifies the broader human community, which Marley claims should be the focus of compassionate effort rather than mere business pursuits.\")##\n(\"entity\"<|>\"Business\"<|>\"concept\"<|>\"Business refers to the practices of trade and commerce that Marley criticizes for overshadowing more significant moral duties like benevolence and charity.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Scrooge converses with the ghost of Jacob Marley, who tries to guide him toward understanding the importance of compassion and the consequences of his actions.\"<|>\"redemption, moral lesson\"<|>9)##\n(\"relationship\"<|>\"Jacob Marley\"<|>\"The Ghost\"<|>\"Jacob Marley is embodied by The Ghost, which serves to relay messages about regret and the imperative of kindness to Scrooge.\"<|>\"spiritual guidance, past connections\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Mankind\"<|>\"Scrooge's actions and neglect of mankind are highlighted in Marley's admonitions, emphasizing the need for connection and care for others.\"<|>\"moral responsibility, social awareness\"<|>7)##\n(\"relationship\"<|>\"Jacob Marley\"<|>\"Business\"<|>\"Marley critiques Scrooge's concern for business over the well-being of mankind, highlighting a contrast between economic pursuits and ethical obligations.\"<|>\"critique of materialism, ethical duty\"<|>8)##\n(\"content_keywords\"<|>\"redemption, human connection, moral lesson, consequences of actions\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "371f793d5f1464f976f67054a62f95be": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who reflects on his past and is visited by a Ghost, indicating a journey of self-discovery and redemption.\")## \n(\"entity\"<|>\"Ghost\"<|>\"person\"<|>\"The Ghost serves as a supernatural guide for Scrooge, prompting him to reflect on his life and choices.\")## \n(\"entity\"<|>\"Little Fan\"<|>\"person\"<|>\"Little Fan is Scrooge's sister, who brings joy and an invitation to come home, representing innocence and familial love.\")## \n(\"entity\"<|>\"Fezziwig\"<|>\"person\"<|>\"Fezziwig is a jovial figure from Scrooge's past, known for his kind-heartedness and as a former employer who embodies the spirit of Christmas.\")## \n(\"entity\"<|>\"Dick Wilkins\"<|>\"person\"<|>\"Dick Wilkins is Scrooge's fellow apprentice and close companion during his younger years, indicating camaraderie during their apprenticeship.\")## \n(\"entity\"<|>\"Christmas-time\"<|>\"event\"<|>\"Christmas-time represents the festive season, a recurring theme that evokes memories and emotions in Scrooge’s reflections.\")## \n(\"relationship\"<|>\"Scrooge\"<|>\"Ghost\"<|>\"The Ghost guides Scrooge through reflections of his past, influencing his journey toward redemption.\"<|>\"reflection, self-discovery\"<|>8)## \n(\"relationship\"<|>\"Scrooge\"<|>\"Little Fan\"<|>\"Little Fan's joyful visit emphasizes family love and the nostalgic warmth of childhood memories for Scrooge.\"<|>\"family, nostalgia\"<|>9)## \n(\"relationship\"<|>\"Scrooge\"<|>\"Fezziwig\"<|>\"Scrooge reminisces about Fezziwig's kindness during his apprenticeship, comparing his past self to his current state.\"<|>\"kindness, mentorship\"<|>8)## \n(\"relationship\"<|>\"Scrooge\"<|>\"Dick Wilkins\"<|>\"Scrooge recalls his friendship with Dick Wilkins, highlighting the connections of youth and support during their apprenticeship.\"<|>\"friendship, apprenticeship\"<|>7)## \n(\"content_keywords\"<|>\"reflection, redemption, family ties, Christmas spirit, nostalgia\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "634f685c0c2c226df03cfd9c92ac5482": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character in the story, facing his own mortality and the consequences of his past actions through the visits of spirits.\")##\n(\"entity\"<|>\"Marley's Ghost\"<|>\"person\"<|>\"Marley's Ghost is a spectral figure who appears to Scrooge to warn him of the impending visits from three spirits, representing remorse for his past deeds.\")##\n(\"entity\"<|>\"The Three Spirits\"<|>\"event\"<|>\"The Three Spirits refer to the supernatural beings that are prophesied to visit Scrooge, each representing different aspects of time and morality.\")##\n(\"entity\"<|>\"The Invisible World\"<|>\"concept\"<|>\"The Invisible World refers to the realm of spirits and supernatural influences that Scrooge glimpses during his encounter with Marley's Ghost.\")##\n(\"entity\"<|>\"The Clock\"<|>\"technology\"<|>\"The Clock is mentioned as malfunctioning, which symbolizes the confusion and distortion of time that Scrooge experiences during his nighttime visitations.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley's Ghost\"<|>\"Marley's Ghost warns Scrooge about the consequences of his actions and the visits of the three spirits, setting the stage for his transformation.\"<|>\"warning, fate\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Three Spirits\"<|>\"Scrooge is directly involved in the narrative concerning the Three Spirits who are meant to guide him toward redemption.\"<|>\"redemption, transformation\"<|>9)##\n(\"relationship\"<|>\"Marley's Ghost\"<|>\"The Invisible World\"<|>\"Marley's Ghost illustrates the existence of the Invisible World and its impact on human lives as he warns Scrooge about his fate.\"<|>\"supernatural, morality\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Clock\"<|>\"Scrooge's interaction with the faulty clock symbolizes his disconnection from time and reality, as he struggles to comprehend his experiences.\"<|>\"time distortion, confusion\"<|>6)##\n(\"content_keywords\"<|>\"morality, redemption, supernatural, time distortion\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "6abaeba2769933c3d9f1bf8dd66f0db8": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who is portrayed as miserly and unyielding, exhibiting a lack of consideration for others and a deep sense of melancholy.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is Scrooge's clerk, who demonstrates a sense of hope and joy, particularly highlighted during Christmas Eve.\")##\n(\"entity\"<|>\"Marley\"<|>\"person\"<|>\"Marley is Scrooge's deceased business partner, whose spectral presence looms over Scrooge, evoking themes of regret and the supernatural.\")##\n(\"entity\"<|>\"Christmas Eve\"<|>\"event\"<|>\"Christmas Eve is depicted as a significant time, bringing about joyful activities like sliding down snow hills, contrasting with Scrooge's dour demeanor.\")##\n(\"entity\"<|>\"London\"<|>\"geo\"<|>\"London serves as the geographical setting for the story, encapsulating the harsh winter atmosphere and the life of its inhabitants during the festive season.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bob Cratchit\"<|>\"Scrooge is the employer of Bob Cratchit, demonstrating a power dynamic characterized by Scrooge's harsh treatment and Bob's subservient role.\"<|>\"employment, power dynamic\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley\"<|>\"Scrooge reflects on Marley, revealing his past connection and the haunting aspect of Marley's death, which influences Scrooge's character arc.\"<|>\"supernatural connection, past relationships\"<|>8)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Christmas Eve\"<|>\"Bob Cratchit embodies the joy of Christmas Eve while Scrooge's perspective starkly contrasts with Bob's happiness during the holiday season.\"<|>\"celebration, contrast\"<|>5)##\n(\"relationship\"<|>\"Scrooge\"<|>\"London\"<|>\"Scrooge's experiences reflect the larger context of life in London during the festive season, emphasizing the mood of the city at that time.\"<|>\"setting, seasonal context\"<|>7)##\n(\"content_keywords\"<|>\"miserliness, Christmas, supernatural, contrast between joy and melancholy\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "fe9aaca3a0fa3a437655b0148e15b0c1": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a miserly character who despises Christmas and is cynical about the holiday's significance.\")##\n(\"entity\"<|>\"Scrooge's Nephew\"<|>\"person\"<|>\"Scrooge's Nephew is portrayed as optimistic about Christmas, emphasizing its positive qualities despite Scrooge's negativity.\")##\n(\"entity\"<|>\"Mr. Marley\"<|>\"person\"<|>\"Mr. Marley is Scrooge's deceased business partner who died seven years ago, representing a connection to Scrooge's past and his current miserly outlook.\")##\n(\"entity\"<|>\"The Clerk\"<|>\"person\"<|>\"The Clerk is a subordinate to Scrooge who serves as a contrast to his boss's character, showing warmth and holiday spirit.\")##\n(\"entity\"<|>\"Liberty\"<|>\"event\"<|>\"The discussion around liberality represents a call to generosity and charity during the festive season, which Scrooge rebuffs.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is central to the text, symbolizing generosity, charity, and warmth that Scrooge rejects.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Scrooge's Nephew\"<|>\"Scrooge's Nephew tries to convince Scrooge of the joys of Christmas, illustrating the conflict between their differing views on the holiday.\"<|>\"family conflict, differing perspectives\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Mr. Marley\"<|>\"Scrooge reflects on Mr. Marley's death, highlighting his isolation and connection to past values that he now rejects.\"<|>\"past connection, isolation\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Clerk\"<|>\"The Clerk represents the festive spirit of Christmas that Scrooge dismisses, showcasing Scrooge's coldness.\"<|>\"contrast in attitudes, workplace dynamics\"<|>6)##\n(\"relationship\"<|>\"Scrooge's Nephew\"<|>\"Christmas\"<|>\"Scrooge's Nephew defends the positive aspects of Christmas, contrasting Scrooge's view that the holiday is pointless.\"<|>\"advocacy for holiday spirit, family dynamics\"<|>9)##\n(\"relationship\"<|>\"Mr. Marley\"<|>\"Liberty\"<|>\"Mr. Marley’s past generosity is invoked during a discussion about the need for liberality, showing a connection to Scrooge’s current attitudes.\"<|>\"past values, moral expectations\"<|>5)##\n(\"content_keywords\"<|>\"Christmas, family conflict, charity, isolation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "95e3e9ed4ebba90196292e83422ede45": {
+ "return": "(\"entity\"<|>\"Fezziwig\"<|>\"organization\"<|>\"Fezziwig is a character noted for his role during a domestic ball, providing a joyful atmosphere and exemplifying kindness and generosity.\")##\n(\"entity\"<|>\"Mrs. Fezziwig\"<|>\"person\"<|>\"Mrs. Fezziwig is Fezziwig's partner at the domestic ball, contributing to the festive atmosphere alongside her husband.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character experiencing a transformation as he reflects on his past and the impact of Fezziwig's generosity during the ball.\")##\n(\"entity\"<|>\"The Ghost of Christmas Past\"<|>\"event\"<|>\"The Ghost of Christmas Past is a supernatural entity that shows Scrooge moments from his past, eliciting reflection and emotion.\")##\n(\"entity\"<|>\"The Domestic Ball\"<|>\"event\"<|>\"The Domestic Ball is a joyful celebration organized by Fezziwig, where guests enjoy dancing and merry exchanges.\")##\n(\"entity\"<|>\"Dick\"<|>\"person\"<|>\"Dick is a character from Scrooge's past, reminisced about during the scene, representing the camaraderie shared during the ball.\")##\n(\"relationship\"<|>\"Fezziwig\"<|>\"Mrs. Fezziwig\"<|>\"Fezziwig and Mrs. Fezziwig work together to create a festive and welcoming environment during the domestic ball.\"<|>\"partnership, celebration\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Fezziwig\"<|>\"Scrooge reflects positively on Fezziwig's past actions, acknowledging the happiness and gratitude he inspired in people.\"<|>\"reflection, influence\"<|>8)##\n(\"relationship\"<|>\"The Ghost of Christmas Past\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Past guides Scrooge through recollections, prompting his introspection about his former self and choices.\"<|>\"introspection, memory\"<|>10)##\n(\"relationship\"<|>\"Fezziwig\"<|>\"The Domestic Ball\"<|>\"Fezziwig organizes the Domestic Ball, showcasing his ability to create joy and camaraderie among guests.\"<|>\"event organization, joy\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Dick\"<|>\"Scrooge remembers Dick as a part of his past, highlighting their shared experiences during happier times.\"<|>\"nostalgia, friendship\"<|>7)##\n(\"content_keywords\"<|>\"transformation, joy, memory, generosity\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "fb5cc30455a081e6a6571a0627c00c7d": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character known for his miserly disposition and reluctance to aid the poor, particularly during the Christmas season.\")##\n(\"entity\"<|>\"The Gentleman\"<|>\"person\"<|>\"The Gentleman is a character who advocates for helping the poor and trying to raise a fund for those in need during Christmas.\")##\n(\"entity\"<|>\"The Union Workhouses\"<|>\"organization\"<|>\"The Union Workhouses are institutions mentioned as a means of supporting the poor, still operational during Scrooge's time.\")##\n(\"entity\"<|>\"The Treadmill\"<|>\"organization\"<|>\"The Treadmill is an institution referenced as part of the welfare system that Scrooge acknowledges.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is depicted as a festive season when charity and social duty are emphasized, contrasting with Scrooge's values.\")##\n(\"entity\"<|>\"The Main Street\"<|>\"geo\"<|>\"The Main Street is a location described where activity is bustling during the cold season, reflecting the social dynamics of the area.\")##\n(\"entity\"<|>\"The Mansion House\"<|>\"organization\"<|>\"The Mansion House is identified as the residence of the Lord Mayor, a place associated with Christmas festivities and order.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Gentleman\"<|>\"Scrooge and The Gentleman have opposing views on charity and helping the poor during Christmas, showing a stark contrast in values.\"<|>\"charity, conflict of values\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's attitude towards Christmas highlights his disconnection from the festive spirit and charity associated with the holiday.\"<|>\"disconnection, festive spirit\"<|>7)##\n(\"relationship\"<|>\"The Gentleman\"<|>\"The Union Workhouses\"<|>\"The Gentleman mentions The Union Workhouses as alternatives for the poor, suggesting a connection to his advocacy for helping them.\"<|>\"advocacy, support systems\"<|>6)##\n(\"relationship\"<|>\"The Treadmill\"<|>\"The Union Workhouses\"<|>\"Both The Treadmill and The Union Workhouses are part of the system intended to manage the poor, referenced in Scrooge's conversation.\"<|>\"welfare system, social management\"<|>5)##\n(\"relationship\"<|>\"The Mansion House\"<|>\"Christmas\"<|>\"The Mansion House is referenced in the context of Christmas celebrations, illustrating its role in festive festivities and governance.\"<|>\"festivities, governance\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Main Street\"<|>\"Scrooge's experiences on The Main Street during Christmas time reflect his discontent with the surrounding joy and celebrations.\"<|>\"social dynamics, personal struggle\"<|>6)##\n(\"content_keywords\"<|>\"charity, Christmas spirit, conflict of values, social dynamics\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "427f3f0888e677aa2216716b8b911c69": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who reflects on his past and is deeply affected by the memories of his childhood and the loneliness he experienced.\")##\n(\"entity\"<|>\"Ali Baba\"<|>\"person\"<|>\"Ali Baba is mentioned as a character from a story that impacts Scrooge during his reflection on Christmas and childhood.\")##\n(\"entity\"<|>\"Valentine\"<|>\"person\"<|>\"Valentine is referenced as a brother of a character, and his presence contributes to the holiday memories Scrooge recalls.\")##\n(\"entity\"<|>\"Orson\"<|>\"person\"<|>\"Orson is mentioned as another brother mentioned by Scrooge, recalling his childhood tales and adventures.\")##\n(\"entity\"<|>\"Robin Crusoe\"<|>\"person\"<|>\"Robin Crusoe is a character referenced by Scrooge as part of his memories, signifying tales of adventure and nostalgia.\")##\n(\"entity\"<|>\"Friday\"<|>\"person\"<|>\"Friday is another character from a story mentioned by Scrooge during his nostalgic recollections of childhood.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"organization\"<|>\"The Ghost serves as a guiding presence that helps Scrooge confront and reflect on his past during Christmas.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is a significant holiday that triggers Scrooge's memories and reflections on joy, loneliness, and his past.\")##\n(\"entity\"<|>\"Christmas Carol\"<|>\"event\"<|>\"A Christmas Carol is referenced as a song that Scrooge recalls when thinking about giving and community.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Ali Baba\"<|>\"Scrooge recalls Ali Baba while reflecting on memories of Christmas, indicating a strong personal connection to the story and its impact on him.\"<|>\"nostalgia, childhood stories\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Valentine\"<|>\"Scrooge's mention of Valentine signifies his memories of familial ties and childhood experiences during Christmas.\"<|>\"family memories, nostalgia\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Orson\"<|>\"Same as above, Orson is part of Scrooge's memories of his brothers during Christmas time, emphasizing themes of togetherness and nostalgia.\"<|>\"family memories, nostalgia\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Robin Crusoe\"<|>\"Scrooge's reference to Robin Crusoe during his reflections shows how literary characters shaped his childhood thoughts and feelings.\"<|>\"literary influence, nostalgia\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Friday\"<|>\"Scrooge's mention of Friday signifies his recollection of childhood stories and influences during Christmas time.\"<|>\"storytelling, childhood memories\"<|>5)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost\"<|>\"The Ghost serves as a catalyst for Scrooge's reflections, guiding him to confront his past and feelings about Christmas.\"<|>\"reflection, guidance\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Christmas is a central theme that invokes Scrooge's memories of his past, his feelings of loneliness, and his desire to connect.\"<|>\"holiday significance, reflection\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas Carol\"<|>\"Scrooge recalls a Christmas carol, wishing to give to a boy, which signifies his longing for connection and generosity.\"<|>\"generosity, connection\"<|>8)##\n(\"content_keywords\"<|>\"nostalgia, childhood memories, reflection, Christmas\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "0fa945ad99c54bf67214b27830b72e75": {
+ "return": "(\"entity\"<|>\"cash-boxes\"<|>\"concept\"<|>\"Cash-boxes are items represented in the ghost's chain, symbolizing Marley’s greed and the burdens of material wealth in life.\")##\n(\"entity\"<|>\"keys\"<|>\"concept\"<|>\"Keys represent control and access, also included in the ghost's chain, indicating the constraints of Marley’s earthly desires.\")##\n(\"entity\"<|>\"padlocks\"<|>\"concept\"<|>\"Padlocks are part of the ghost's chain, signifying confinement and the lock on Marley’s spirit due to his selfish actions in life.\")##\n(\"entity\"<|>\"ledgers\"<|>\"concept\"<|>\"Ledgers symbolize records of transactions, reflecting Marley’s focus on business and financial dealings during his life.\")##\n(\"entity\"<|>\"deeds\"<|>\"concept\"<|>\"Deeds represent property ownership, indicating the material attachments that bound Marley in life and continue to do so in death.\")##\n(\"entity\"<|>\"heavy purses\"<|>\"concept\"<|>\"Heavy purses in the ghost's chain symbolize the weight of wealth and the burden of not sharing that wealth with others.\")##\n(\"entity\"<|>\"the heavy door\"<|>\"location\"<|>\"The heavy door is a physical boundary encountered by Scrooge, marking the transition between the ordinary world and the supernatural visitation.\")##\n(\"entity\"<|>\"the fireplace\"<|>\"location\"<|>\"The fireplace serves as a significant setting for the interaction between Scrooge and the ghost, representing warmth but also the stark contrast to the ghost's cold presence.\")##\n(\"entity\"<|>\"spirit\"<|>\"concept\"<|>\"Spirit represents the essence of Marley that is attempting to interact with Scrooge, indicating themes of afterlife and unresolved issues.\")##\n(\"entity\"<|>\"the bandage\"<|>\"concept\"<|>\"The bandage is an item used by the ghost, symbolizing decay and the remnants of Marley’s struggle with his own existence post-death.\")##\n(\"entity\"<|>\"the cosmic significance\"<|>\"concept\"<|>\"The cosmic significance refers to the broader themes of morality and the impact of one’s actions on others, reflected in Marley’s message to Scrooge.\")##",
+ "model": "gpt-4o-mini"
+ },
+ "750b5539066571350b27897c23316142": {
+ "return": "(\"entity\"<|>\"The Cold\"<|>\"concept\"<|>\"The Cold represents the emotional and physical chill that surrounds Scrooge, symbolizing his heartless nature and isolation from others.\")##\n(\"entity\"<|>\"Fog\"<|>\"geo\"<|>\"Fog is a weather phenomenon that adds to the bleak atmosphere, reflecting the obscurity and confusion in Scrooge's life.\")##\n(\"entity\"<|>\"The Clerk\"<|>\"person\"<|>\"The Clerk works in Scrooge's counting-house, depicted as miserable and diminished under Scrooge's strict management.\")##\n(\"entity\"<|>\"Christmas Eve\"<|>\"event\"<|>\"Christmas Eve is the specific time setting in the story, highlighting the contrast between the joy of the holiday and Scrooge's negativity.\")##\n(\"entity\"<|>\"The City\"<|>\"geo\"<|>\"The City provides the backdrop for Scrooge's story, representing the bustling environment that underscores his loneliness and detachment from society.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Clerk\"<|>\"Scrooge's treatment of the Clerk highlights his miserly and cruel nature, contributing to an oppressive work environment.\"<|>\"unfair treatment, workplace dynamics\"<|>7)##\n(\"relationship\"<|>\"The Cold\"<|>\"Scrooge\"<|>\"The Cold serves as a metaphor for Scrooge's emotional state, indicating his lack of warmth and compassion.\"<|>\"metaphor, emotional state\"<|>8)##\n(\"relationship\"<|>\"Fog\"<|>\"The City\"<|>\"The fog in The City creates a dreary atmosphere, signifying confusion and obscured visibility, paralleling Scrooge's cold perspective on life.\"<|>\"atmosphere, symbolism\"<|>6)##\n(\"relationship\"<|>\"Christmas Eve\"<|>\"Christmas\"<|>\"Christmas Eve sets the stage for the events leading to Christmas, amplifying the significance of the holiday in Scrooge's transformation.\"<|>\"holiday significance, festive anticipation\"<|>8)<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "f9419be8687d4e03b1b64a53803c4a11": {
+ "return": "(\"entity\"<|>\"Spirit\"<|>\"person\"<|>\"The Spirit is the manifestation of Scrooge's past that guides him through his memories, representing his former self and the influence of time.\")##\n(\"entity\"<|>\"Winter Day\"<|>\"event\"<|>\"The Winter Day signifies the seasonal setting during Scrooge's journey through his memories, highlighting a clear, cold atmosphere filled with nostalgia.\")##\n(\"entity\"<|>\"Children\"<|>\"person\"<|>\"The Children represent the joyful, carefree aspects of Scrooge's past, embodying happiness and the innocence of youth that he has long forgotten.\")##\n(\"entity\"<|>\"School\"<|>\"organization\"<|>\"The School is a location associated with Scrooge's neglected childhood, symbolizing loneliness and lost opportunities for connection and joy.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Spirit\"<|>\"Scrooge’s interaction with the Spirit emphasizes the emotional weight of his past decisions and feelings as they relate to his transformation.\"<|>\"guidance, emotional journey\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Children\"<|>\"Scrooge's reaction to the Children he observes in his memories reveals his longing for the happiness he once knew but has since lost.\"<|>\"longing, innocence\"<|>7)##\n(\"relationship\"<|>\"Spirit\"<|>\"School\"<|>\"The Spirit leads Scrooge to the School, invoking memories of solitary times that deeply impact his emotional state.\"<|>\"reflection, nostalgia\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"School\"<|>\"The School serves as a poignant reminder of Scrooge's childhood neglect, influencing his character development and emotional barriers.\"<|>\"neglect, formative experiences\"<|>7)##\n(\"content_keywords\"<|>\"nostalgia, childhood, emotional impact, guidance\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "9983a71b129178e931ae0c14a77570b5": {
+ "return": "(\"entity\"<|>\"The Spring\"<|>\"technology\"<|>\"The Spring is a mechanism within The Clock that Scrooge touches to correct its malfunction, highlighting his engagement with technology amid his confusion about time.\")##\n(\"entity\"<|>\"Bed Curtains\"<|>\"technology\"<|>\"The Bed Curtains are part of Scrooge's bedroom that are drawn aside, heralding the appearance of The Visitor and creating a pivotal moment in the narrative.\")##\n(\"entity\"<|>\"Light\"<|>\"concept\"<|>\"Light emanates from The Visitor, symbolizing clarity, revelation, and the supernatural, contrasting with the darkness around Scrooge's situation.\")##\n(\"entity\"<|>\"Holly Branch\"<|>\"technology\"<|>\"The Holly Branch is held by The Visitor, representing the duality of winter and spring, life and death, embodying the hope of renewal.\")##\n(\"entity\"<|>\"Dream\"<|>\"concept\"<|>\"Dream refers to the possibility that Scrooge's experiences are a figment of his imagination, reflecting his internal struggle with reality and perception.\")##\n(\"relationship\"<|>\"Marley's Ghost\"<|>\"The Visitor\"<|>\"Both Marley's Ghost and The Visitor are supernatural figures that interact with Scrooge, influencing his understanding of his life's choices.\"<|>\"supernatural connection, influence\"<|>8)##\n(\"relationship\"<|>\"The Visitor\"<|>\"Light\"<|>\"The Visitor is associated with Light, which represents enlightenment and understanding for Scrooge during his existential crisis.\"<|>\"enlightenment, revelation\"<|>9)##\n(\"relationship\"<|>\"The Visitor\"<|>\"Holly Branch\"<|>\"The Holly Branch held by The Visitor signifies themes of life, renewal, and the intersection of winter and spring in their encounter.\"<|>\"symbolism, renewal\"<|>7)##\n(\"relationship\"<|>\"Ebenezer Scrooge\"<|>\"Dream\"<|>\"Scrooge contemplates whether his experiences are merely a Dream, showcasing his inner conflict with reality and imagination.\"<|>\"reality vs. illusion, introspection\"<|>6)##\n(\"content_keywords\"<|>\"supernatural beings, conflict with reality, symbolism, light and dark\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "c6bc5075c23f7eabe3fe35ad6ef0dfd1": {
+ "return": "(\"entity\"<|>\"Joe\"<|>\"person\"<|>\"Joe is a character engaged in a conversation, displaying curiosity and a sense of humor regarding a situation related to 'bed-curtains'.\")##\n(\"entity\"<|>\"The Plump Sister\"<|>\"person\"<|>\"The Plump Sister is depicted in a light-hearted and humorous context, illustrating the fun of Christmas gatherings.\")##\n(\"entity\"<|>\"The Woman\"<|>\"person\"<|>\"The Woman engages in a humorous exchange with Joe about taking down bed-curtains, showcasing lively communication among characters.\")##\n(\"entity\"<|>\"Fred\"<|>\"person\"<|>\"Fred is Scrooge's nephew, representing the warmth and joy of family that Scrooge initially rejects.\")##\n(\"entity\"<|>\"Old Scratch\"<|>\"person\"<|>\"Old Scratch is mentioned as a figure related to the conversation, symbolizing mischief and humor.\")##\n(\"entity\"<|>\"The Register\"<|>\"event\"<|>\"The Register documents Marley's burial, highlighting the formal acknowledgment of his death.\")##\n(\"entity\"<|>\"The Funeral\"<|>\"event\"<|>\"The Funeral serves as a significant event reflecting on Marley's death, noted for its business-like atmosphere as perceived by Scrooge.\")##\n(\"relationship\"<|>\"Joe\"<|>\"The Woman\"<|>\"Joe and The Woman share a humorous exchange about the bed-curtains, reflecting the jovial spirit of the gathering.\"<|>\"humor, interaction\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Fred\"<|>\"Scrooge's interaction with Fred emphasizes his rejection of familial warmth during the Christmas season.\"<|>\"rejection, familial dynamics\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Register\"<|>\"Scrooge's connection to The Register reinforces his involvement in Marley's death and the responsibilities that follow.\"<|>\"responsibility, mourning\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Funeral\"<|>\"Scrooge treats The Funeral as a business transaction, showing his cold approach to personal loss and death.\"<|>\"business, emotional detachment\"<|>7)##\n(\"content_keywords\"<|>\"humor, family, interaction, mourning\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "749b6e8b484fd12302eff82136755095": {
+ "return": "(\"entity\"<|>\"Project Gutenberg\"<|>\"organization\"<|>\"Project Gutenberg is a digital library offering free eBooks, allowing anyone to access literature in various formats without restrictions.\")##\n(\"entity\"<|>\"A Christmas Carol\"<|>\"event\"<|>\"A Christmas Carol is a novella by Charles Dickens that explores themes of redemption, compassion, and the spirit of Christmas, first published in 1843.\")##\n(\"entity\"<|>\"Charles Dickens\"<|>\"person\"<|>\"Charles Dickens is a renowned English writer and social critic, best known for his novels depicting Victorian society, including A Christmas Carol.\")##\n(\"entity\"<|>\"Arthur Rackham\"<|>\"person\"<|>\"Arthur Rackham was an English illustrator known for his imaginative and detailed illustrations for children's books, including A Christmas Carol.\")##\n(\"entity\"<|>\"Ebenezer Scrooge\"<|>\"person\"<|>\"Ebenezer Scrooge is the main character of A Christmas Carol, portrayed as a miserly and greedy old man who experiences a transformation through the visitation of spirits.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is the dedicated clerk of Ebenezer Scrooge, representing the struggles of the working class and the importance of family in A Christmas Carol.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim, the son of Bob Cratchit, is a sickly child whose positive outlook on life symbolizes hope and compassion in A Christmas Carol.\")##\n(\"entity\"<|>\"Ghost of Christmas Past\"<|>\"event\"<|>\"The Ghost of Christmas Past is a supernatural character that shows Scrooge his past, helping him understand the roots of his current miserliness.\")##\n(\"entity\"<|>\"Ghost of Christmas Present\"<|>\"event\"<|>\"The Ghost of Christmas Present is a spirit that reveals to Scrooge the joys and struggles of the present Christmas season, emphasizing generosity and compassion.\")##\n(\"entity\"<|>\"Ghost of Christmas Yet to Come\"<|>\"event\"<|>\"The Ghost of Christmas Yet to Come is an apparition that presents Scrooge with a grim vision of his possible future if he doesn't change his ways.\")##\n(\"relationship\"<|>\"A Christmas Carol\"<|>\"Project Gutenberg\"<|>\"A Christmas Carol is published and distributed freely through Project Gutenberg, making it accessible to a global audience.\"<|>\"publication, accessibility\"<|>9)##\n(\"relationship\"<|>\"Charles Dickens\"<|>\"A Christmas Carol\"<|>\"Charles Dickens authored A Christmas Carol, which reflects his views on Victorian society and social issues.\"<|>\"author, literary impact\"<|>10)##\n(\"relationship\"<|>\"Arthur Rackham\"<|>\"A Christmas Carol\"<|>\"Arthur Rackham illustrated A Christmas Carol, enhancing the visual experience of Dickens' narrative.\"<|>\"illustration, artistic contribution\"<|>8)##\n(\"relationship\"<|>\"Ebenezer Scrooge\"<|>\"Tiny Tim\"<|>\"Ebenezer Scrooge, through his interactions with Tiny Tim, learns about compassion and the importance of family and community.\"<|>\"redemption, compassion\"<|>9)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Ebenezer Scrooge\"<|>\"Bob Cratchit works for Ebenezer Scrooge, embodying the struggles of the working class in contrast to Scrooge's miserliness.\"<|>\"workplace dynamics, social contrast\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Past\"<|>\"Ebenezer Scrooge\"<|>\"The Ghost of Christmas Past helps Scrooge reflect on his earlier life decisions, sparking his journey of transformation.\"<|>\"reflection, transformation\"<|>9)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Ebenezer Scrooge\"<|>\"The Ghost of Christmas Present reveals to Scrooge the reality of his current life, highlighting the need for change.\"<|>\"revelation, current state\"<|>9)##\n(\"relationship\"<|>\"Ghost of Christmas Yet to Come\"<|>\"Ebenezer Scrooge\"<|>\"The Ghost of Christmas Yet to Come shows Scrooge a dire future, which acts as a catalyst for his transformation.\"<|>\"future reflection, transformation\"<|>10)##\n(\"content_keywords\"<|>\"redemption, compassion, social critique, transformation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "180e01b6576aa73addeda5a221eed6b6": {
+ "return": "(\"entity\"<|>\"The Chain\"<|>\"concept\"<|>\"The Chain refers to the symbolic burden that Marley carries, representing the consequences of his actions and the remorse that traps him in his spectral form.\")##\n(\"entity\"<|>\"Crowd of Fellow-Beings\"<|>\"concept\"<|>\"The Crowd of Fellow-Beings signifies the people Marley neglected throughout his life, highlighting the need for social responsibility and community engagement.\")##\n(\"entity\"<|>\"Christmas Eves\"<|>\"event\"<|>\"Christmas Eves refers to the recurring time periods that Marley reflects upon, indicating the weight of his past decisions leading to regret.\")##\n(\"entity\"<|>\"The Blessed Star\"<|>\"concept\"<|>\"The Blessed Star symbolizes hope and guidance, representing the moral path that Marley believes Scrooge should have followed and acknowledged.\")##\n(\"entity\"<|>\"Iron Cable\"<|>\"concept\"<|>\"Iron Cable is a metaphorical representation of the heavy burdens of regret and unfulfilled opportunities carried by Marley in the afterlife.\")##\n(\"entity\"<|>\"The Common Welfare\"<|>\"concept\"<|>\"The Common Welfare emphasizes the idea that collective well-being should be a primary concern for individuals, contrasting with selfish pursuits.\")##\n(\"entity\"<|>\"Other Regions\"<|>\"concept\"<|>\"Other Regions alludes to the spiritual realm and existence beyond earthly life that Marley refers to, highlighting the interaction between the physical and spiritual worlds.\")##\n(\"entity\"<|>\"Seven Years\"<|>\"event\"<|>\"Seven Years denotes the duration since Marley passed away, a significant timeframe during which he reflects on his unending labor and remorse.\")##\n(\"entity\"<|>\"Counting-House\"<|>\"location\"<|>\"The Counting-House is a physical space representing the business environment where Scrooge and Marley engaged in trade but also symbolizes their limited worldview.\")##\n(\"entity\"<|>\"Fellow-Men\"<|>\"concept\"<|>\"Fellow-Men refers to individuals with whom Marley and Scrooge share a human connection, underscoring the importance of empathy and human relationships.\")##\n(\"entity\"<|>\"Ponderous Chain\"<|>\"concept\"<|>\"Ponderous Chain symbolizes the heavy emotional and moral burden that Marley carries as a consequence of his choices in life, particularly relating to his business practices.\")##\n(\"content_keywords\"<|>\"remorse, moral awakening, spiritual burden, societal responsibility\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "79d0cc76f9ccaf3cc5c0c4770762d329": {
+ "return": "(\"entity\"<|>\"The Schoolmaster\"<|>\"person\"<|>\"The Schoolmaster is an authoritative figure who oversees Scrooge and the other boys, representing the harshness of education and discipline.\")## \n(\"entity\"<|>\"Master Scrooge\"<|>\"person\"<|>\"Master Scrooge is the young version of Scrooge who is influenced by his environment and experiences during his schooling.\")## \n(\"entity\"<|>\"Father\"<|>\"person\"<|>\"Father refers to Scrooge's father, who is noted for being kinder than before, allowing Scrooge to return home.\")## \n(\"entity\"<|>\"The Chaise\"<|>\"organization\"<|>\"The Chaise is the mode of transportation that takes Scrooge and Little Fan away from the school, symbolizing transition and change.\")## \n(\"entity\"<|>\"The City\"<|>\"geo\"<|>\"The City represents the bustling urban environment where Scrooge reflects on his past, filled with activity and shadowy figures.\")## \n(\"entity\"<|>\"The Warehouse\"<|>\"location\"<|>\"The Warehouse is a specific location where Scrooge had previously apprenticed and holds significant memories for him.\")## \n(\"relationship\"<|>\"Scrooge\"<|>\"The Schoolmaster\"<|>\"The Schoolmaster represents authority and discipline in Scrooge's early life, influencing his development.\"<|>\"authority, discipline\"<|>6)## \n(\"relationship\"<|>\"Little Fan\"<|>\"Father\"<|>\"Little Fan discusses the kindness of their Father, indicating a change in the family dynamics that impacts Scrooge.\"<|>\"family, change\"<|>7)## \n(\"relationship\"<|>\"Scrooge\"<|>\"The City\"<|>\"Scrooge's reflections in the City highlight his relationship with his past and the contrast of his current life.\"<|>\"reflection, urban life\"<|>5)## \n(\"relationship\"<|>\"Scrooge\"<|>\"The Warehouse\"<|>\"Scrooge's connection to the Warehouse emphasizes his memories of apprenticeship and his past experiences.\"<|>\"memories, apprenticeship\"<|>8)## \n(\"relationship\"<|>\"Scrooge\"<|>\"Master Scrooge\"<|>\"The young Master Scrooge represents Scrooge's earlier aspirations and innocence, contrasting with his current self.\"<|>\"nostalgia, identity\"<|>9)## \n(\"content_keywords\"<|>\"authority, family dynamics, urban life, memories, identity\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "0b2c60d67fd1327eaf6b8ff21a5a2f79": {
+ "return": "(\"entity\"<|>\"The Poor\"<|>\"person\"<|>\"The Poor refers to the individuals in society who are destitute and in need of assistance, often highlighted in discussions about charity during Christmas.\")##\n(\"entity\"<|>\"The Clerk\"<|>\"person\"<|>\"The Clerk is an employee of Scrooge, who quietly awaits the end of the workday and represents the working class.\")##\n(\"entity\"<|>\"Christmas Carol\"<|>\"event\"<|>\"The Christmas Carol is a musical tradition performed during the Christmas season, symbolizing goodwill and festive spirit.\")##\n(\"entity\"<|>\"The Fog\"<|>\"geo\"<|>\"The Fog is a weather condition described as thickening around Scrooge, metaphorically representing the gloomy atmosphere surrounding him.\")##\n(\"entity\"<|>\"The Gas-pipes\"<|>\"organization\"<|>\"The Gas-pipes are mentioned as undergoing repairs in The Main Street, indicating the infrastructure's role in the community during winter.\")##\n(\"entity\"<|>\"The Labourers\"<|>\"organization\"<|>\"The Labourers refer to the workers repairing the gas-pipes, reflecting the lower working-class experiencing harsh winter conditions.\")##\n(\"entity\"<|>\"The Lord Mayor\"<|>\"person\"<|>\"The Lord Mayor is a civic leader responsible for overseeing Christmas festivities and ensuring proper celebrations in the town.\")##\n(\"entity\"<|>\"The Tailor\"<|>\"person\"<|>\"The Tailor is a character referenced as preparing for Christmas by stirring pudding, illustrating the engagement of tradespeople in holiday traditions.\")##\n(\"entity\"<|>\"The Pudding\"<|>\"event\"<|>\"The Pudding is a traditional dish prepared for Christmas, symbolizing festivity and the warmth of family gatherings.\")##\n(\"entity\"<|>\"St. Dunstan\"<|>\"person\"<|>\"St. Dunstan is a historical figure mentioned metaphorically, associated with a legend of confronting evil, reflecting the theme of good versus evil.\")##\n\n##",
+ "model": "gpt-4o-mini"
+ },
+ "e8321a26bab0df2d45d23b47ecd1f74c": {
+ "return": "(\"entity\"<|>\"Portly Gentlemen\"<|>\"person\"<|>\"The Portly Gentlemen are representatives who seek to gather funds for the poor during Christmas, embodying the spirit of generosity that Scrooge rejects.\")##\n(\"entity\"<|>\"Credentials\"<|>\"event\"<|>\"The Credentials refer to the documents presented by the Portly Gentlemen, indicating their legitimacy and purpose in seeking donations from Scrooge.\")##\n(\"entity\"<|>\"Bedlam\"<|>\"geo\"<|>\"Bedlam is referenced as a metaphorical place for madness, illustrating Scrooge's disdain for the cheerful nature of Christmas and his sense of isolation.\")##\n(\"entity\"<|>\"Christmas-time\"<|>\"event\"<|>\"Christmas-time is characterized as a period filled with festivities, joy, and community engagement, which Scrooge regards cynically.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Portly Gentlemen\"<|>\"Scrooge dismisses the Portly Gentlemen's request for donations, rejecting their appeal for charity.\"<|>\"charity rejection, conflict\"<|>8)##\n(\"relationship\"<|>\"Portly Gentlemen\"<|>\"Credentials\"<|>\"The Portly Gentlemen present their Credentials to establish their purpose and authority to solicit donations from Scrooge.\"<|>\"authorization, fundraising\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas-time\"<|>\"Scrooge views Christmas-time as a burden rather than a celebration, contrasting with the feelings of others around him.\"<|>\"cynicism, holiday spirit conflict\"<|>9)##\n(\"relationship\"<|>\"Scrooge's Nephew\"<|>\"Bedlam\"<|>\"Scrooge references Bedlam in expressing his frustration with the jovial attitudes of others, indicating his desire to distance himself from them.\"<|>\"isolation, metaphorical reference\"<|>6)##\n(\"content_keywords\"<|>\"generosity, rejection of charity, holiday spirit, generosity\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "18ae0495a82ddbf3ce5051da174a25c5": {
+ "return": "(\"entity\"<|>\"The Two Apprentices\"<|>\"person\"<|>\"The Two Apprentices are characters from Fezziwig's establishment who express gratitude and admiration for Fezziwig's positive influence during the Domestic Ball.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"concept\"<|>\"The Spirit refers to the supernatural essence of the Ghost of Christmas Past, embodying the idea of reflection and memory.\")##\n(\"entity\"<|>\"Merry Christmas\"<|>\"event\"<|>\"Merry Christmas is a common greeting shared during the Domestic Ball by Fezziwig and Mrs. Fezziwig, representing the spirit of the holiday.\")##\n(\"entity\"<|>\"Wealth\"<|>\"concept\"<|>\"Wealth is referred to as a 'golden idol' that has displaced genuine happiness in Scrooge's life, highlighting themes of materialism versus emotional fulfillment.\")##\n(\"entity\"<|>\"Poverty\"<|>\"concept\"<|>\"Poverty is presented in contrast to wealth, representing struggles and rejections of superficial values in Scrooge's past.\")##\n(\"relationship\"<|>\"Fezziwig\"<|>\"The Two Apprentices\"<|>\"Fezziwig inspires deep gratitude and respect from his apprentices, who appreciate his kindness during their interactions at the ball.\"<|>\"gratitude, mentorship\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Spirit\"<|>\"Scrooge engages with the Spirit as it reveals memories that prompt significant self-reflection and consideration of change.\"<|>\"self-reflection, transformation\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Wealth\"<|>\"Scrooge confronts the obsession with wealth which has led him away from meaningful connections and happiness.\"<|>\"materialism, character development\"<|>8)##\n(\"relationship\"<|>\"Wealth\"<|>\"Poverty\"<|>\"Wealth is juxtaposed against poverty, emphasizing the harsh realities faced by those consumed by the pursuit of money.\"<|>\"contrast, societal norms\"<|>7)##\n(\"content_keywords\"<|>\"gratitude, introspection, materialism, holiday spirit\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "f1ab5d1e23b02aec6e22e3ebbbdaaea6": {
+ "return": "(\"entity\"<|>\"The Window\"<|>\"concept\"<|>\"The Window represents a threshold between the visible and invisible worlds, allowing Scrooge a view into the realm of spirits and their lamentations.\")##\n(\"entity\"<|>\"The Bell\"<|>\"technology\"<|>\"The Bell is a church bell that signifies the time passing and marks the moments of Scrooge's encounters with the ghosts.\")##\n(\"entity\"<|>\"The Air\"<|>\"concept\"<|>\"The Air is described as filled with phantoms, representing the haunting nature of Scrooge's past choices and regrets.\")##\n(\"entity\"<|>\"The Phantoms\"<|>\"person\"<|>\"The Phantoms are spirits that wander restlessly, symbolizing lost opportunities and unresolved issues from their past lives.\")##\n(\"entity\"<|>\"The Dark Night\"<|>\"concept\"<|>\"The Dark Night signifies the ominous and mysterious setting during which Scrooge faces his fears and experiences the supernatural.\")##\n(\"entity\"<|>\"Wailings\"<|>\"event\"<|>\"Wailings represent the sorrowful expressions of the phantoms, illustrating the pain and regret of spirits bound by their earthly actions.\")##\n(\"entity\"<|>\"The Chain\"<|>\"concept\"<|>\"The Chain symbolizes the burdens of the past that haunt the spirits, showcasing the consequences of one's actions in life.\")##\n(\"entity\"<|>\"The Door\"<|>\"concept\"<|>\"The Door is described as being double locked, representing Scrooge’s isolation and the barriers he has built around himself in life.\")##\n(\"entity\"<|>\"The Spirits\"<|>\"concept\"<|>\"The Spirits collectively represent the supernatural forces that seek to guide and teach Scrooge about the impact of his life choices.\")##\n(\"entity\"<|>\"The Ghostly Visitations\"<|>\"event\"<|>\"The Ghostly Visitations are the anticipated encounters that Scrooge must endure as part of his journey toward redemption.\")##\n(\"entity\"<|>\"Ebenezer\"<|>\"person\"<|>\"Ebenezer refers to Scrooge's full name, emphasizing his identity and character as a miser who learns valuable life lessons.\")##\n(\"entity\"<|>\"The Penance\"<|>\"concept\"<|>\"The Penance refers to the moral and emotional suffering that Marley's Ghost must endure due to his past actions, prompting Scrooge's journey of redemption.\")##\n(\"entity\"<|>\"The Transparent Window\"<|>\"concept\"<|>\"The Transparent Window symbolizes the blurred line between the known and the supernatural, reflecting Scrooge’s struggle to understand his experiences.\")##\n(\"entity\"<|>\"The Darkness\"<|>\"concept\"<|>\"The Darkness refers to the absence of light, signifying uncertainty and fear in Scrooge's transformation journey.\")##\n(\"content_keywords\"<|>\"ghostly visitations, consequences, moral revelation, isolation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "c250789d45734212bf94b46504d5afcf": {
+ "return": "(\"entity\"<|>\"Marley\"<|>\"person\"<|>\"Marley is a deceased character whose ghost appears to Scrooge, representing regret and the consequences of a life led without compassion.\")##\n(\"entity\"<|>\"The Knocker\"<|>\"geo\"<|>\"The Knocker on the door serves as a symbolic gateway to the supernatural events, being the first point of eerie interaction for Scrooge.\")##\n(\"entity\"<|>\"The Bell\"<|>\"geo\"<|>\"The Bell is described as a disused bell that starts to swing and ring during the haunting, signifying the onset of supernatural occurrences.\")##\n(\"entity\"<|>\"The Wine-Merchant's Cellar\"<|>\"geo\"<|>\"The Wine-Merchant's Cellar is a location that holds significance in the story, where sounds of dragging chains are heard, adding to the ghostly ambiance.\")##\n(\"entity\"<|>\"The Grate\"<|>\"geo\"<|>\"The Grate is part of Scrooge's room where a small fire burns, contributing to the bleak and cold atmosphere of his setting.\")##\n(\"entity\"<|>\"The Cask\"<|>\"geo\"<|>\"The Cask is part of the cellar, contributing to the sounds associated with Marley's Ghost and aiding in creating a sense of dread.\")##\n(\"entity\"<|>\"The Dressing-Gown\"<|>\"geo\"<|>\"The Dressing-Gown is worn by Scrooge, illustrating his solitary, night-time routine and emphasizing his isolation.\")##\n(\"entity\"<|>\"The Candle\"<|>\"geo\"<|>\"The Candle represents a source of light in Scrooge's dark life, emphasizing the contrast between warmth and isolation.\")##\n(\"entity\"<|>\"Darkness\"<|>\"concept\"<|>\"Darkness symbolizes the oppressive loneliness and despair in Scrooge's life, as well as the supernatural elements of the story.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley\"<|>\"Scrooge's encounter with Marley’s Ghost serves as a confrontation with his past self and the life choices he regrets, depicting the theme of redemption.\"<|>\"redemption, confrontation\"<|>9)##\n(\"relationship\"<|>\"Marley\"<|>\"The Knocker\"<|>\"Marley is connected to the Knocker as the ghostly presence that Scrooge anticipates as he approaches his door, setting up the eerie tone of the haunting.\"<|>\"supernatural symbolism, foreshadowing\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Bell\"<|>\"The Bell's movement and sound foreshadow Marley's Ghost's appearance, enhancing the sense of impending supernatural activity in Scrooge's environment.\"<|>\"foreboding, supernatural\"<|>8)##\n(\"relationship\"<|>\"Marley's Ghost\"<|>\"The Wine-Merchant's Cellar\"<|>\"The sounds coming from The Wine-Merchant's Cellar amplify the haunting experience for Scrooge, linking the ghostly presence to a tangible location.\"<|>\"atmosphere, haunting\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Grate\"<|>\"The Grate's cold fire adds to the bleak setting of Scrooge's home, illustrating his disconnection from warmth and comfort before the supernatural events unfold.\"<|>\"isolation, setting mood\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Dressing-Gown\"<|>\"The Dressing-Gown emphasizes Scrooge's character as a solitary, reclusive man, reinforcing his isolation during the haunting.\"<|>\"characterization, solitude\"<|>7)##\n(\"relationship\"<|>\"Darkness\"<|>\"Scrooge\"<|>\"Darkness symbolizes Scrooge's internal feelings of isolation and regret, presenting the overall mood of his journey towards potential redemption.\"<|>\"themes, isolation\"<|>9)##\n(\"relationship\"<|>\"Marley's Ghost\"<|>\"The Candle\"<|>\"The Candle's light contrasts with the darkness of the haunting, representing the flicker of hope amidst Scrooge's despair.\"<|>\"hope, contrast\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Cask\"<|>\"The Cask is associated with the sounds of chains during the haunting, heightening Scrooge's fear and trepidation about the ghostly visit.\"<|>\"fear, tension\"<|>8)##\n<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "75c99644e77c82502e8543b94d61350d": {
+ "return": "(\"entity\"<|>\"Joe\"<|>\"person\"<|>\"Joe is portrayed as a character engaged in a conversation about the deceased, displaying a mix of humor and practicality in dealing with the possessions of the dead.\")##\n(\"entity\"<|>\"The Woman\"<|>\"person\"<|>\"The Woman is a character involved in the dialogue about the deceased's belongings, showing a carefree attitude towards their former owner.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who experiences horror and reflection as he witnesses the events surrounding the deceased and contemplates his own life.\")##\n(\"entity\"<|>\"The Deceased\"<|>\"person\"<|>\"The Deceased is a character discussed among Joe and the Woman, representing themes of avarice and death, although his identity is not revealed.\")##\n(\"entity\"<|>\"Death\"<|>\"concept\"<|>\"Death is described as a powerful force in the narrative, represented through the imagery of a cold, rigid presence and the contemplation of mortality.\")##\n(\"relationship\"<|>\"Joe\"<|>\"The Woman\"<|>\"Joe and The Woman engage in dialogue about the deceased's belongings, showcasing their interactions and differing attitudes towards death.\"<|>\"dialogue, humor, death\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Deceased\"<|>\"Scrooge reflects on the condition and life of The Deceased, equating his own life with the man's fate, indicating a profound contemplation of mortality.\"<|>\"reflection, mortality\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Death\"<|>\"Scrooge grapples with the idea of Death as he witnesses the aftermath of The Deceased's life, leading to an inner turmoil.\"<|>\"existential crisis, contemplation\"<|>9)##\n(\"content_keywords\"<|>\"mortality, dialogue, reflection, death, avarice\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "933bd23f0f260c19afa06c88bc306289": {
+ "return": "(\"entity\"<|>\"The Mansion\"<|>\"geo\"<|>\"The Mansion is described as a large house of dull red brick, associated with Scrooge's childhood memories and reflecting the state of neglect.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"organization\"<|>\"The Spirit is the entity that guides Scrooge through memories, prompting reflections on his past during the Christmas season.\")##\n(\"entity\"<|>\"The Solitary Child\"<|>\"person\"<|>\"The Solitary Child is a representation of Scrooge's younger self, neglected and alone in the school during Christmas, evoking emotions of sympathy and regret.\")##\n(\"entity\"<|>\"The Little Girl\"<|>\"person\"<|>\"The Little Girl is mentioned as appearing at the door, representing innocence and the enduring spirit of Christmas that contrasts with Scrooge's past.\")##\n(\"entity\"<|>\"The Stables\"<|>\"geo\"<|>\"The Stables are part of the Mansion's property, depicting the state of the neglected estate and surroundings.\")##\n(\"entity\"<|>\"The Chilly Hall\"<|>\"geo\"<|>\"The Chilly Hall is the entrance area of the Mansion where Scrooge begins to reflect on his past.\")##\n(\"entity\"<|>\"The Empty Storehouse\"<|>\"geo\"<|>\"The Empty Storehouse is mentioned as part of the Mansion, symbolizing decay and forgotten memories.\")##\n(\"content_keywords\"<|>\"remembrance, neglect, childhood neglect, emotional reflection\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "6f98316fbf0ffe67c9577902ce5acb18": {
+ "return": "(\"entity\"<|>\"The Counting-House\"<|>\"organization\"<|>\"The Counting-House is Scrooge's place of work, characterized by a gloomy demeanor and an atmosphere that reflects Scrooge's character.\")##\n(\"entity\"<|>\"Clerk\"<|>\"person\"<|>\"The Clerk is an unnamed character who serves Scrooge, exhibiting a hopeful disposition despite Scrooge's ill-tempered management.\")##\n(\"entity\"<|>\"Fog\"<|>\"geo\"<|>\"Fog is a weather condition mentioned that enhances the dreary atmosphere surrounding Scrooge's home and workplace.\")##\n(\"entity\"<|>\"Frost\"<|>\"geo\"<|>\"Frost is another atmospheric condition depicted, contributing to the cold and somber feeling of the environment during the story.\")##\n(\"entity\"<|>\"Camden Town\"<|>\"geo\"<|>\"Camden Town is a location where Bob Cratchit runs home to celebrate Christmas Eve, representing a more joyful and lively setting.\")##\n(\"entity\"<|>\"The Ghost of Marley\"<|>\"event\"<|>\"The Ghost of Marley is an event that is anticipated in the narrative, representing Scrooge's confrontation with his past and the supernatural.\")##\n(\"entity\"<|>\"Keyhole\"<|>\"concept\"<|>\"The Keyhole is a metaphorical representation of Scrooge's perspective and the barriers to his understanding of joy and connection.\")##\n(\"entity\"<|>\"The Knocker\"<|>\"concept\"<|>\"The Knocker is a physical object that becomes a symbol of Scrooge's psyche, transforming into Marley's face, representing his guilt and past.\")##\n(\"relationship\"<|>\"The Counting-House\"<|>\"Scrooge\"<|>\"The Counting-House is where Scrooge works, illustrating the environment that shapes his bleak personality.\"<|>\"work environment, character development\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Clerk\"<|>\"The Clerk communicates with Scrooge about his work, highlighting the lack of mutual respect in their employer-employee relationship.\"<|>\"employment relationship, respect\"<|>5)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"The Counting-House\"<|>\"Bob Cratchit works at The Counting-House, emphasizing his servitude and the dismal working conditions under Scrooge.\"<|>\"employment, servitude\"<|>6)##\n(\"relationship\"<|>\"Fog\"<|>\"Scrooge\"<|>\"Fog represents the obscured vision of Scrooge's soul and emotions, aligning with his unknowable nature.\"<|>\"atmosphere, emotional reflection\"<|>8)##\n(\"relationship\"<|>\"Frost\"<|>\"Scrooge\"<|>\"Frost enhances the chilling atmosphere around Scrooge's life and mindset, reflecting his cold character.\"<|>\"atmospheric condition, characterization\"<|>8)##\n(\"relationship\"<|>\"Camden Town\"<|>\"Bob Cratchit\"<|>\"Camden Town serves as a location representing warmth and family contrast to Scrooge's loneliness.\"<|>\"contrast, warmth\"<|>6)##\n(\"relationship\"<|>\"The Ghost of Marley\"<|>\"Scrooge\"<|>\"The Ghost of Marley symbolizes Scrooge's past mistakes and serves as a catalyst for his transformation.\"<|>\"supernatural, moral reckoning\"<|>9)##\n(\"relationship\"<|>\"The Knocker\"<|>\"Scrooge\"<|>\"The Knocker, transforming into Marley's face, symbolizes Scrooge's ghostly past and impending redemption challenges.\"<|>\"symbolism, transformation\"<|>9)##\n(\"content_keywords\"<|>\"supernatural influence, work dynamics, familial warmth, transformation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "09bcfbc45e8dca5a2c7d3008a6d4d077": {
+ "return": "(\"entity\"<|>\"Scrooge's Nephew\"<|>\"person\"<|>\"Scrooge's Nephew is characterized by his contagious laughter and good humor, contrasting with his uncle's miserliness.\")##\n(\"entity\"<|>\"Scrooge's Niece\"<|>\"person\"<|>\"Scrooge's Niece is depicted as a beautiful and lively character who expresses strong opinions about her uncle Scrooge.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is described as a comical old fellow who has a reputation for being miserly and dislikes Christmas.\")##\n(\"entity\"<|>\"Topper\"<|>\"person\"<|>\"Topper is a character who participates in the dinner and seems to have an interest in one of Scrooge's niece's sisters.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is presented as a time for merriment and social gatherings, which Scrooge's nephew advocates for despite his uncle's opposition.\")##\n(\"relationship\"<|>\"Scrooge's Nephew\"<|>\"Scrooge\"<|>\"Scrooge's Nephew contrasts with Scrooge, advocating for merriment and showing pity for his uncle's ill tempers.\"<|>\"family dynamics, contrasting views\"<|>8)##\n(\"relationship\"<|>\"Scrooge's Niece\"<|>\"Scrooge\"<|>\"Scrooge's Niece expresses indignation towards Scrooge's belief that Christmas is a humbug, demonstrating her strong opinions against his views.\"<|>\"ideological conflict, family dynamics\"<|>7)##\n(\"relationship\"<|>\"Scrooge's Nephew\"<|>\"Scrooge's Niece\"<|>\"Scrooge's Nephew and Scrooge's Niece share a humorous dynamic, both laughing and contrasting with Scrooge's grim outlook.\"<|>\"shared humor, family bonding\"<|>8)##\n(\"relationship\"<|>\"Topper\"<|>\"Scrooge's Niece\"<|>\"Topper shows interest in Scrooge's Niece, indicating a budding romantic interest during the family gathering.\"<|>\"romantic interest, social dynamics\"<|>6)##\n(\"content_keywords\"<|>\"family dynamics, humor, Christmas, ideological conflict\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "50af407f80d189a9540c6254bfc56182": {
+ "return": "(\"entity\"<|>\"The Boy\"<|>\"person\"<|>\"The Boy is a character mentioned briefly, who tries to hide behind another girl, indicating shyness among the festive crowd.\")##\n(\"entity\"<|>\"The Girl\"<|>\"person\"<|>\"The Girl is referenced in relation to The Boy, highlighting the dynamics of the young attendees at the Fezziwig's celebrations.\")##\n(\"entity\"<|>\"The Fiddler\"<|>\"person\"<|>\"The Fiddler is a musician who plays during the festivities, contributing to the atmosphere of joy and celebration.\")##\n(\"entity\"<|>\"Mrs. Fezziwig's Three Misses\"<|>\"person\"<|>\"Mrs. Fezziwig's Three Misses are her daughters, described as beaming and lovable, participating in the Christmas festivities.\")##\n(\"entity\"<|>\"The Young Followers\"<|>\"person\"<|>\"The Young Followers are young men and women employed by Fezziwig, who interact during the dances, showing youthful exuberance.\")##\n(\"entity\"<|>\"The Housemaid\"<|>\"person\"<|>\"The Housemaid is mentioned as part of the domestic staff involved in the celebration, contributing to the jovial atmosphere.\")##\n(\"entity\"<|>\"The Cook\"<|>\"person\"<|>\"The Cook is part of the household and participates in the festive gathering, alongside other staff.\")##\n(\"entity\"<|>\"The Baker\"<|>\"person\"<|>\"The Baker is a cousin to the housemaid, indicating the interconnectedness of the household staff during the festivities.\")##\n(\"entity\"<|>\"Cold Roast\"<|>\"food\"<|>\"Cold Roast is a dish served during the celebration, adding to the festive feast prepared by Fezziwig.\")##\n(\"entity\"<|>\"Cold Boiled\"<|>\"food\"<|>\"Cold Boiled is another dish mentioned, part of the spread during the festivities at Fezziwig's celebration.\")##\n(\"entity\"<|>\"Mince-Pies\"<|>\"food\"<|>\"Mince-Pies are a type of dessert featured at the celebration, contributing to the festive feast.\")##\n(\"entity\"<|>\"Negus\"<|>\"food\"<|>\"Negus is a drink served during the festivities, adding to the array of refreshments available.\")##\n(\"entity\"<|>\"The Warehouse\"<|>\"location\"<|>\"The Warehouse is the setting for Fezziwig's celebrations, transformed into a ballroom for the festivities.\")##\n(\"relationship\"<|>\"The Fiddler\"<|>\"The Dance\"<|>\"The Fiddler provides music for The Dance, enhancing the celebratory atmosphere of the event.\"<|>\"music, celebration\"<|>9)##\n(\"relationship\"<|>\"Fezziwig\"<|>\"The Young Followers\"<|>\"Fezziwig's celebration involves the Young Followers, highlighting the festive and communal aspect of the gathering.\"<|>\"festivity, community\"<|>8)##\n(\"relationship\"<|>\"Mrs. Fezziwig\"<|>\"The Young Followers\"<|>\"Mrs. Fezziwig interacts with the Young Followers during the festivities, showcasing warmth and hospitality.\"<|>\"hospitality, warmth\"<|>7)##\n(\"relationship\"<|>\"The Housemaid\"<|>\"The Bake\"<|>\"The Housemaid and The Baker are connected through their roles in the household during the festive gathering.\"<|>\"staff dynamics, celebration\"<|>6)##\n(\"relationship\"<|>\"The Dance\"<|>\"Cold Roast\"<|>\"Cold Roast is part of the festivities during The Dance, providing nourishment to the guests.\"<|>\"food, celebration\"<|>8)##\n(\"relationship\"<|>\"The Dance\"<|>\"Mince-Pies\"<|>\"Mince-Pies are served during The Dance, adding to the festive spirit of the gathering.\"<|>\"food, celebration\"<|>7)##\n(\"relationship\"<|>\"The Dance\"<|>\"Negus\"<|>\"Negus complements the festivities of The Dance, contributing to the overall celebration.\"<|>\"beverages, festivity\"<|>8)##\n(\"content_keywords\"<|>\"joviality, community, festive atmosphere, traditional foods\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "fe25dacdf5d1e0b2f5fb48141271e005": {
+ "return": "(\"entity\"<|>\"Fred\"<|>\"person\"<|>\"Fred is Ebenezer Scrooge's lively nephew, who embodies the spirit of Christmas and contrasts with Scrooge's misanthropy by inviting him to family gatherings.\")##\n(\"entity\"<|>\"Mr. Fezziwig\"<|>\"person\"<|>\"Mr. Fezziwig is a kind-hearted old merchant who served as a mentor to Scrooge, representing generosity and the joy of Christmas through his festive parties.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is Bob Cratchit's supportive wife, who strives to provide for her family despite their financial struggles and embodies resilience.\")##\n(\"entity\"<|>\"Belinda Cratchit\"<|>\"person\"<|>\"Belinda is a daughter of Bob and Mrs. Cratchit, representing the Cratchit family's love and unity during difficult times.\")##\n(\"entity\"<|>\"Martha Cratchit\"<|>\"person\"<|>\"Martha is another daughter of Bob and Mrs. Cratchit, who, like her sister Belinda, symbolizes the warmth and togetherness in the Cratchit household.\")##\n(\"entity\"<|>\"Joe\"<|>\"person\"<|>\"Joe is a marine-store dealer depicted as receiving stolen goods, illustrating the consequences of crime and the darker sides of society.\")##\n(\"entity\"<|>\"Ghost of Jacob Marley\"<|>\"event\"<|>\"The Ghost of Jacob Marley is Scrooge's deceased business partner who returns as a spirit to warn Scrooge about the consequences of his life choices and to encourage change.\")##\n(\"entity\"<|>\"Fan\"<|>\"person\"<|>\"Fan is Scrooge's caring sister who represents innocence and love, showcasing a more tender side of Scrooge’s past.\")##\n(\"entity\"<|>\"Mrs. Fezziwig\"<|>\"person\"<|>\"Mrs. Fezziwig is the kind partner of Mr. Fezziwig, contributing to the festive atmosphere of their celebrations, emphasizing the importance of joyous gatherings.\")##\n(\"content_keywords\"<|>\"Christmas spirit, family, generosity, social dynamics\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "455c4a7b963c1fcad74e0724d83d7b3a": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character experiencing a transformative journey as he reflects on death and its impact on others, showcasing his internal struggle and growth.\")##\n(\"entity\"<|>\"Spirit\"<|>\"concept\"<|>\"The Spirit represents a supernatural entity guiding Scrooge through his reflections on life, death, and human emotions, prompting self-awareness.\")##\n(\"entity\"<|>\"Caroline\"<|>\"person\"<|>\"Caroline is a character representing hope and emotional depth, reacting to her husband's news about their financial situation with a mixture of dread and optimism.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is depicted as a struggling father, representative of familial love and concern, particularly for his children, including Tiny Tim.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a symbol of innocence and vulnerability within the Cratchit family, evoking compassion and highlighting the harsh realities of illness and poverty.\")##\n(\"entity\"<|>\"Cratchit Family\"<|>\"organization\"<|>\"The Cratchit family represents a unit bound by love and resilience in the face of adversity, dealing with emotional and financial struggles.\")##\n(\"entity\"<|>\"Death\"<|>\"event\"<|>\"Death refers to the passing of a character that triggers a range of emotions and reflections among the living, influencing their lives and relationships.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Spirit\"<|>\"Scrooge learns from the Spirit about the consequences of his actions and the sentiments evoked by death, leading to personal growth.\"<|>\"reflection, guidance\"<|>8)##\n(\"relationship\"<|>\"Caroline\"<|>\"Bob Cratchit\"<|>\"Caroline expresses hope combined with anxiety regarding their financial situation, affecting her relationship with Bob Cratchit.\"<|>\"hope, financial strain\"<|>7)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit's concern for Tiny Tim illustrates the deep familial love and the challenges facing the Cratchit family.\"<|>\"family love, vulnerability\"<|>9)##\n(\"relationship\"<|>\"Cratchit Family\"<|>\"Death\"<|>\"The death event impacts the Cratchit family deeply, influencing their emotional state and perceptions of hope and kindness.\"<|>\"grief, family dynamics\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Cratchit Family\"<|>\"Scrooge's reflections on the Cratchit family's situation symbolize his evolving understanding of compassion and social responsibility.\"<|>\"compassion, social awareness\"<|>7)##\n(\"content_keywords\"<|>\"transformation, family dynamics, death, compassion\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "116ac1ae1f082545ac96dbcd9c9b9366": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who experiences a transformation, feeling joyful and generous as he interacts with others after Christmas.\")##\n(\"entity\"<|>\"Bob\"<|>\"person\"<|>\"Bob is referenced humorously by Scrooge in the context of a joke regarding a large turkey.\")##\n(\"entity\"<|>\"Camden Town\"<|>\"geo\"<|>\"Camden Town is a location mentioned in relation to the logistics of transporting a large turkey.\" )##\n(\"entity\"<|>\"Fred\"<|>\"person\"<|>\"Fred is Scrooge's nephew, mentioned in the context of Scrooge's visit to his house.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is a festive occasion that has led to a joyful transformation in Scrooge's character and actions.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bob\"<|>\"Scrooge makes a humorous comment about sending a large turkey to Bob, indicating a lighthearted relationship.\" <|>\"humor, generosity\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Fred\"<|>\"Scrooge expresses a desire to visit Fred, which signifies a reconciliatory gesture toward his family.\"<|>\"family connection, reconciliation\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's transformation and newfound joy are directly linked to the spirit of Christmas.\" <|>\"transformation, festive spirit\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Camden Town\"<|>\"Scrooge mentions Camden Town in relation to the impracticality of delivering a large turkey there.\" <|>\"logistics, festive preparation\"<|>5)##\n(\"content_keywords\"<|>\"transformation, joy, Christmas spirit, family connection\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "984442fe3bdd9f4f0ef6e22dbcef8654": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who undergoes a transformation, filled with joy and determination to make amends during Christmas.\")##\n(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is the ghost who represents Scrooge's past and serves as a catalyst for his change.\")##\n(\"entity\"<|>\"Christmas Day\"<|>\"event\"<|>\"Christmas Day is a significant holiday that marks the time of Scrooge's revelation and intent to change.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is a character associated with Scrooge, representing the family Scrooge intends to support by sending a turkey.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a member of the Cratchit family, who represents the innocence and impact of Scrooge's generosity.\")##\n(\"entity\"<|>\"The Poulterer's\"<|>\"organization\"<|>\"The Poulterer's is a shop where Scrooge intends to buy a turkey for the Cratchit family.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Scrooge's transformation is prompted by the appearance and message of Jacob Marley, his former business partner.\"<|>\"transformation, influence\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas Day\"<|>\"Scrooge's change in attitude and actions occur on Christmas Day, making it a pivotal event in his story.\"<|>\"seasonal significance, renewal\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bob Cratchit\"<|>\"Scrooge decides to send a turkey to Bob Cratchit, indicating a newfound kindness towards him and his family.\"<|>\"generosity, relationship change\"<|>7)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit is Tiny Tim's father, and the turkey Scrooge plans to send will impact Tiny Tim's well-being.\"<|>\"family, support\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Poulterer's\"<|>\"Scrooge interacts with The Poulterer's to purchase a turkey as an act of generosity toward Bob Cratchit's family.\"<|>\"generosity, commerce\"<|>8)##\n(\"content_keywords\"<|>\"transformation, generosity, Christmas spirit, family\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "cfc3e5735472a18ba0f2a89f51843d86": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who is initially depicted as dogged and fearful, but experiences a transformation through his encounters with spirits.\")##\n(\"entity\"<|>\"Ghost of Christmas Present\"<|>\"event\"<|>\"The Ghost of Christmas Present is a spirit that embodies generosity and abundance, leading Scrooge to self-reflection and understanding.\")##\n(\"entity\"<|>\"Turkey\"<|>\"organization\"<|>\"Turkey here represents a food item that symbolizes abundance and festive celebration, present in the spirit's demonstration of cheer.\")##\n(\"entity\"<|>\"Holly\"<|>\"organization\"<|>\"Holly is a plant associated with Christmas, featured prominently in the Spirit's attire, symbolizing joy and celebration during the festive season.\")##\n(\"entity\"<|>\"Ivy\"<|>\"organization\"<|>\"Ivy is another plant associated with the festive decor, symbolizing eternity and fidelity, adding to the spirit of the occasion.\")##\n(\"entity\"<|>\"City\"<|>\"geo\"<|>\"The City represents the urban setting where Scrooge's transformation takes place, symbolizing the broader society impacted by his changes.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Ghost of Christmas Present\"<|>\"Scrooge is led by the Ghost of Christmas Present, who teaches him valuable lessons about generosity and compassion.\"<|>\"transformation, mentoring\"<|>10)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Turkey\"<|>\"The Ghost showcases the Turkey as a representation of abundance during the Christmas season, emphasizing the spirit of generosity.\"<|>\"abundance, symbolism\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Holly\"<|>\"Holly is part of the Ghost's attire, reinforcing the themes of joy and celebration during Christmas.\"<|>\"festive spirit, symbolism\"<|>9)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Ivy\"<|>\"Ivy, like Holly, symbolizes continuity and joy, complementing the festive imagery presented by the Ghost.\"<|>\"festive spirit, symbolism\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"City\"<|>\"Scrooge's transformation is set against the backdrop of the City, reflecting the societal implications of his personal changes.\"<|>\"societal impact, personal change\"<|>7)##\n(\"content_keywords\"<|>\"transformation, Christmas spirit, generosity, abundance\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "596ac46576474fa7c137975ee2b863b8": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who experiences a transformation during the Christmas holidays and receives lessons from a Spirit regarding humanity and compassion.\")##\n(\"entity\"<|>\"Fred\"<|>\"person\"<|>\"Fred is Scrooge's nephew who expresses admiration for his uncle and proposes a toast in his honor during a Christmas gathering.\")##\n(\"entity\"<|>\"Ghost\"<|>\"organization\"<|>\"The Ghost represents a spiritual guide that takes Scrooge on a journey to learn important lessons about compassion and humanity.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is a significant holiday that serves as the backdrop for Scrooge's transformation and the events that unfold during the narrative.\")##\n(\"entity\"<|>\"Ignorance\"<|>\"concept\"<|>\"Ignorance is personified as a boy shown to Scrooge by the Spirit, representing a societal issue that appeals for attention and change.\")##\n(\"entity\"<|>\"Humanity\"<|>\"concept\"<|>\"Humanity’s relationship with its most vulnerable members is central to the lessons presented to Scrooge through the Spirit's journey.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Fred\"<|>\"Scrooge and Fred share a familial bond, with Fred expressing goodwill and celebrating Scrooge despite his uncle's previous behavior.\"<|>\"family connection, goodwill\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Ghost\"<|>\"The Ghost takes Scrooge on a journey to teach him lessons about generosity and compassion towards others.\"<|>\"spiritual guidance, transformation\"<|>9)##\n(\"relationship\"<|>\"Christmas\"<|>\"Scrooge\"<|>\"Christmas serves as the catalyst for Scrooge's transformation, highlighting themes of giving and compassion.\"<|>\"transformation, holiday significance\"<|>10)##\n(\"relationship\"<|>\"Ghost\"<|>\"Ignorance\"<|>\"The Ghost presents Ignorance to Scrooge as a representation of societal neglect and a call for responsibility from humanity.\"<|>\"societal issues, responsibility\"<|>8)##\n(\"relationship\"<|>\"Ghost\"<|>\"Humanity\"<|>\"The Ghost symbolizes humanity's need for introspection and change, urging Scrooge to recognize and act upon social issues.\"<|>\"social consciousness, humanitarian appeal\"<|>9)##\n(\"content_keywords\"<|>\"transformation, compassion, family, societal neglect\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "9480a8c953892d44bd64f1e51589df33": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a key character who experiences a transformation throughout the narrative, beginning from apathy to a desire for redemption and reflection on his life choices.\")##\n(\"entity\"<|>\"Spirit\"<|>\"organization\"<|>\"The Spirit represents a ghostly entity, guiding Scrooge through visions of potential future events and urging him to acknowledge societal issues.\")##\n(\"entity\"<|>\"Ignorance\"<|>\"concept\"<|>\"Ignorance is personified as a boy that symbolizes the dangers of ignoring social issues and the consequences of a lack of awareness.\")##\n(\"entity\"<|>\"Want\"<|>\"concept\"<|>\"Want is depicted as a girl, representing the consequences of poverty and desire in society, warning against neglecting these issues.\")##\n(\"entity\"<|>\"Ghost of Christmas Yet to Come\"<|>\"event\"<|>\"The Ghost of Christmas Yet to Come is an event within the story that signifies Scrooge's confrontation with the future consequences of his actions.\")##\n(\"entity\"<|>\"City\"<|>\"geo\"<|>\"The City represents the urban setting where Scrooge witnesses the indifference of its inhabitants regarding his death.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Spirit\"<|>\"Scrooge engages with the Spirit, who guides him through reflections on his life and warns him of societal issues.\"<|>\"guidance, transformation\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Ignorance\"<|>\"Scrooge is warned about the dangers posed by Ignorance, which embodies significant societal issues he has ignored.\"<|>\"social awareness, warning\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Want\"<|>\"Scrooge is shown the consequences of Want, reflecting on the impacts of desire and neglect in society.\"<|>\"social critique, consequences\"<|>9)##\n(\"relationship\"<|>\"Spirit\"<|>\"Ghost of Christmas Yet to Come\"<|>\"The Spirit is connected to the Ghost of Christmas Yet to Come as both serve to illustrate potential future outcomes for Scrooge.\"<|>\"foreshadowing, guidance\"<|>7)##\n(\"relationship\"<|>\"City\"<|>\"Scrooge\"<|>\"Scrooge experiences the indifference of the City towards his death, illustrating societal apathy.\"<|>\"indifference, societal critique\"<|>8)##\n(\"content_keywords\"<|>\"transformation, social awareness, consequences, indifference\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "fcf8d38c5fa7849c04b307de5329e635": {
+ "return": "(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a young child in the Cratchit family, whose health and well-being are of great concern to his family, and he is remembered fondly.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is the father of Tiny Tim, characterized by his kindness and emotional depth, especially in relation to his family's struggles.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is Bob's wife, a supportive mother who manages the household while showing concern for her children's well-being.\")##\n(\"entity\"<|>\"Peter\"<|>\"person\"<|>\"Peter is one of the Cratchit children, who occasionally expresses a sense of duty and future aspirations, contributing to family discussions.\")##\n(\"entity\"<|>\"Mr. Scrooge's Nephew\"<|>\"person\"<|>\"Mr. Scrooge's Nephew is a kind gentleman who shows concern for Bob Cratchit and offers support, symbolizing generosity and compassion.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is a festive occasion that brings the Cratchit family together, highlighting themes of love, loss, and remembrance.\")##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit deeply cares for Tiny Tim, demonstrating emotional support and love, especially in moments of distress.\"<|>\"family bond, love\"<|>9)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Tiny Tim\"<|>\"Mrs. Cratchit shows concern for Tiny Tim's health and well-being, reflecting her role as a nurturing mother.\"<|>\"maternal love, support\"<|>9)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Mr. Scrooge's Nephew\"<|>\"Bob Cratchit shares a positive interaction with Mr. Scrooge's Nephew, who expresses kindness and concern for his family.\"<|>\"kindness, support\"<|>7)##\n(\"relationship\"<|>\"Peter\"<|>\"Bob Cratchit\"<|>\"Peter interacts with Bob Cratchit, showing the dynamics of a supportive family unit while discussing aspirations.\"<|>\"family discussions, support\"<|>6)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Bob Cratchit\"<|>\"Mrs. Cratchit supports Bob Cratchit in his parenting efforts and encourages the family’s unity during challenging times.\"<|>\"partnership, family unity\"<|>8)##\n(\"content_keywords\"<|>\"family, love, kindness, remembrance, Christmas\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "703928b09650c3af9aa88f22dde5aa52": {
+ "return": "(\"entity\"<|>\"Old Joe\"<|>\"person\"<|>\"Old Joe is a grey-haired man, nearly seventy years old, who runs a shop and conducts business in a rather nonchalant manner, showcasing a certain charm amidst his questionable dealings.\")## \n(\"entity\"<|>\"Mrs. Dilber\"<|>\"person\"<|>\"Mrs. Dilber is a laundress who is involved in the discussion about the belongings they are handling, exhibiting a pragmatic attitude towards the situation.\")## \n(\"entity\"<|>\"The Woman\"<|>\"person\"<|>\"The Woman is a character who enters the shop with a heavy bundle and demonstrates a bold and defiant demeanor, eager to take part in the conversation about the deceased's belongings.\")## \n(\"entity\"<|>\"The Shop\"<|>\"organization\"<|>\"The Shop is Old Joe's place of business and serves as the meeting location for the characters, where they evaluate and exchange stolen items.\")## \n(\"entity\"<|>\"The Bundle\"<|>\"event\"<|>\"The Bundle refers to the collection of stolen items that the characters present to Old Joe for appraisal.\")## \n(\"relationship\"<|>\"Old Joe\"<|>\"The Woman\"<|>\"Old Joe interacts with The Woman as she encourages him to evaluate her stolen bundle, indicating a business-like relationship based on appraisal and exchange.\"<|>\"business interaction, appraisal\"<|>7)## \n(\"relationship\"<|>\"Old Joe\"<|>\"Mrs. Dilber\"<|>\"Old Joe assesses the belongings of Mrs. Dilber, reinforcing his role as an appraiser of stolen goods and demonstrating a tendency to be generous towards women.\"<|>\"business interaction, appraisal\"<|>6)## \n(\"relationship\"<|>\"Mrs. Dilber\"<|>\"The Woman\"<|>\"Mrs. Dilber and The Woman share a conversational dynamic that touches on the moral implications of their actions, creating a camaraderie as they discuss the deceased.\"<|>\"moral discussion, camaraderie\"<|>5)## \n(\"relationship\"<|>\"Old Joe\"<|>\"The Shop\"<|>\"Old Joe's activities, including the appraisal of stolen items, solidify the shop's role as a hub for unscrupulous dealings.\"<|>\"business hub, unscrupulous dealings\"<|>8)## \n(\"content_keywords\"<|>\"moral ambiguity, camaraderie, valuation, stolen goods\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "10c3ebd5e67ddaefba0e8292c3ba4c3e": {
+ "return": "(\"entity\"<|>\"Project Gutenberg\"<|>\"organization\"<|>\"Project Gutenberg is an organization dedicated to the free distribution of electronic works and the preservation of the cultural heritage by providing access to public domain literature.\")##\n(\"entity\"<|>\"The Foundation\"<|>\"organization\"<|>\"The Foundation, referred to as the Project Gutenberg Literary Archive Foundation, owns the compilation copyright of Project Gutenberg's collection.\")##\n(\"entity\"<|>\"United States\"<|>\"geo\"<|>\"The United States is a geographical location with specific copyright laws affecting the distribution of electronic works.\")##\n(\"entity\"<|>\"Project Gutenberg License\"<|>\"event\"<|>\"The Project Gutenberg License is a set of terms that govern the use and distribution of works associated with Project Gutenberg.\")##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"The Foundation\"<|>\"Project Gutenberg is operated and managed by The Foundation, which governs its copyright and licensing aspects.\"<|>\"organizational governance, copyright management\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"United States\"<|>\"Project Gutenberg operates within the legal framework of the United States, influencing its copyright policies and user access.\"<|>\"legal framework, copyright law\"<|>8)##\n(\"relationship\"<|>\"The Foundation\"<|>\"Project Gutenberg License\"<|>\"The Foundation authorizes the terms of the Project Gutenberg License, which regulates the distribution of its works.\"<|>\"licensing, organizational policy\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg License\"<|>\"United States\"<|>\"The Project Gutenberg License is compliant with the copyright laws of the United States, affecting how works can be shared.\"<|>\"legal compliance, distribution terms\"<|>8)##\n(\"content_keywords\"<|>\"free distribution, copyright, licensing, public domain\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "218340b48cdbb62185bf8a16e4b92aeb": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is depicted as a stingy and unfeeling man, who is criticized during a Christmas feast and is viewed negatively by his family.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is a character who appears to be the voice of moderation and kindness, attempting to maintain positivity during the family Christmas feast despite Scrooge's presence.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit expresses strong disdain for Scrooge, highlighting her feelings towards him during the Christmas festivities.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is depicted as a vulnerable child who participates in the Christmas toast but seems indifferent to it, symbolizing the family's struggles.\")##\n(\"entity\"<|>\"Christmas Day\"<|>\"event\"<|>\"Christmas Day serves as the central event around which the family gathers and reflects on their feelings towards Scrooge.\")##\n(\"entity\"<|>\"Cratchit Family\"<|>\"organization\"<|>\"The Cratchit Family is portrayed as a close-knit group who, despite their hardships, find joy and gratitude during the Christmas gathering.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"organization\"<|>\"The Spirit represents the essence of Christmas cheer and generosity, influencing the atmosphere and morale during the feast.\")##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Scrooge\"<|>\"Bob Cratchit tries to maintain a positive atmosphere during the Christmas feast despite the negative association with Scrooge.\"<|>\"family dynamics, acknowledgment\"<|>6)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Scrooge\"<|>\"Mrs. Cratchit openly criticizes Scrooge during the Christmas dinner, expressing her feelings towards him and emphasizing the family's resentment.\"<|>\"criticism, emotional conflict\"<|>8)##\n(\"relationship\"<|>\"Tiny Tim\"<|>\"Cratchit Family\"<|>\"Tiny Tim, as part of the Cratchit Family, represents the innocence and vulnerability of the family, affecting their dynamics during Christmas.\"<|>\"family ties, innocence\"<|>7)##\n(\"relationship\"<|>\"Cratchit Family\"<|>\"Christmas Day\"<|>\"The Cratchit Family gathers to celebrate Christmas Day, reflecting their togetherness and joy despite their hardships.\"<|>\"celebration, tradition\"<|>9)##\n(\"relationship\"<|>\"The Spirit\"<|>\"Christmas Day\"<|>\"The Spirit embodies the joy and generosity of Christmas Day, enhancing the celebratory atmosphere for the Cratchit Family.\"<|>\"festivity, cheer\"<|>10)##\n(\"content_keywords\"<|>\"Christmas, family dynamics, generosity, celebration\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "0d3cba455a41514103ad7f64e7073f3a": {
+ "return": "(\"entity\"<|>\"Scrooge's Nephew's Friends\"<|>\"organization\"<|>\"Scrooge's Nephew's Friends are a group of individuals gathered around the dinner table, characterized by their shared laughter and merriment in contrast to Scrooge.\")##\n(\"entity\"<|>\"Scrooge's Niece's Sisters\"<|>\"organization\"<|>\"Scrooge's Niece's Sisters are part of the family gathering, contributing to the lively atmosphere and expressing opinions about Scrooge.\")##\n(\"entity\"<|>\"The Dinner\"<|>\"event\"<|>\"The Dinner is a social event characterized by laughter, music, and discussions about Scrooge, portraying a festive Christmas gathering.\")##\n(\"entity\"<|>\"The Fire\"<|>\"concept\"<|>\"The Fire represents warmth and comfort during the family gathering, enhancing the atmosphere of camaraderie.\")##\n(\"entity\"<|>\"The Bottle\"<|>\"concept\"<|>\"The Bottle symbolizes celebration and the shared merriment among the characters during their Christmas gathering.\")##\n(\"entity\"<|>\"Music\"<|>\"event\"<|>\"Music is an activity enjoyed by the family, contributing to the festive environment of their gathering.\")##\n(\"entity\"<|>\"Laughter\"<|>\"concept\"<|>\"Laughter symbolizes joy and the contagious nature of good humor among the characters, especially in the context of Christmas.\")##\n(\"content_keywords\"<|>\"joy, celebration, family gathering, contrast, festive atmosphere\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "21c6a49f73df0dd3242f406476537037": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who undergoes emotional changes and reflects on his past during a Christmas gathering.\")##\n(\"entity\"<|>\"Scrooge's niece\"<|>\"person\"<|>\"Scrooge's niece is depicted as a joyful participant in the evening's festivities, engaging in games and music.\")##\n(\"entity\"<|>\"Topper\"<|>\"person\"<|>\"Topper is a character known for his lively and mischievous behavior during the games at the festive gathering.\")##\n(\"entity\"<|>\"The Ghost of Christmas Present\"<|>\"event\"<|>\"The Ghost of Christmas Present is a spirit that guides Scrooge through his memories and the joy of the present Christmas celebration.\")##\n(\"entity\"<|>\"Blind Man's-Buff\"<|>\"event\"<|>\"Blind Man's-Buff is a game played during the gathering, showcasing the playful and carefree nature of the participants.\")##\n(\"entity\"<|>\"Forfeits\"<|>\"event\"<|>\"Forfeits is another game played that allows participants to engage in fun activities, emphasizing the spirit of the gathering.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Scrooge's niece\"<|>\"Scrooge's niece embraces the joyful atmosphere of the celebration, engaging with family and inviting good-natured fun.\"<|>\"family bonds, celebration\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Topper\"<|>\"Scrooge observes Topper's antics and expressions of joy during the games, revealing a reflection on human nature and merriment.\"<|>\"observation, merriment\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Present\"<|>\"The Ghost of Christmas Present observes Scrooge's transformation and emotional engagement with his memories of Christmas.\"<|>\"transformation, guidance\"<|>9)##\n(\"relationship\"<|>\"Topper\"<|>\"Blind Man's-Buff\"<|>\"Topper participates in the game of Blind Man's-Buff, contributing to the laughter and enjoyment of the gathering.\"<|>\"participation, joy\"<|>7)##\n(\"relationship\"<|>\"Scrooge's niece\"<|>\"Forfeits\"<|>\"Scrooge's niece actively engages in the game of Forfeits, highlighting her enjoyment and spirited nature during the Christmas celebration.\"<|>\"active participation, enjoyment\"<|>8)##\n(\"content_keywords\"<|>\"Christmas celebration, family interaction, joy, transformation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "0f6fd6369fa3a1fc4eaaa76ac7cdd24d": {
+ "return": "(\"entity\"<|>\"The Bed\"<|>\"geo\"<|>\"The Bed represents a central object in the scene, symbolizing the final resting place of The Deceased and contributing to the themes of mortality and neglect.\")##\n(\"entity\"<|>\"The Blankets\"<|>\"geo\"<|>\"The Blankets are discussed in the context of the deceased's possessions, highlighting themes of care and abandonment after death.\")##\n(\"entity\"<|>\"The Dark Room\"<|>\"geo\"<|>\"The Dark Room serves as the setting for the scene, enhancing the atmosphere of dread and the theme of impending death.\")##\n(\"entity\"<|>\"The Flannel Bag\"<|>\"geo\"<|>\"The Flannel Bag contains money and symbolizes the materialistic aspects of life and the relationships surrounding The Deceased’s possessions.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"concept\"<|>\"The Spirit represents the supernatural presence urging Scrooge to reflect deeply on the consequences of his life choices, particularly in relation to Death.\")##\n(\"entity\"<|>\"Avarice\"<|>\"concept\"<|>\"Avarice is a theme personified in Scrooge's contemplation of The Deceased, symbolizing greed and its ultimate consequences in death.\")##\n(\"relationship\"<|>\"Joe\"<|>\"The Deceased\"<|>\"Joe discusses the belongings and implications of The Deceased's life, indicating a financial and material relationship with the dead.\"<|>\"materialism, death\"<|>5)##\n(\"relationship\"<|>\"The Woman\"<|>\"The Deceased\"<|>\"The Woman's carefree attitude while discussing The Deceased reflects a stark contrast to how their life was valued in death.\"<|>\"indifference, mortality\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Blankets\"<|>\"Scrooge contemplates the significance of The Blankets, linking them to the neglected state of The Deceased in death.\"<|>\"neglect, reflection\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Bed\"<|>\"Scrooge observes The Bed where The Deceased lies, prompting profound thoughts on mortality and regret.\"<|>\"contemplation, mortality\"<|>9)##\n(\"relationship\"<|>\"The Spirit\"<|>\"Scrooge\"<|>\"The Spirit challenges Scrooge's perspective on life, urging him to consider the implications of his actions in light of death.\"<|>\"supernatural guidance, self-reflection\"<|>10)##\n(\"content_keywords\"<|>\"mortality, reflection, avarice, supernatural, neglect\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "5f787a11337980d80fd28d5e22074583": {
+ "return": "(\"entity\"<|>\"Cratchit Family\"<|>\"organization\"<|>\"The Cratchit Family is portrayed as a loving unit celebrating Christmas together, with various members actively participating in the festivities.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is the father of the Cratchit family, characterized by his warmth and dedication to his family, particularly in caring for Tiny Tim.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a young boy in the Cratchit family, characterized by his spirit and resilience despite being a cripple, symbolizing hope and kindness during Christmas.\")##\n(\"entity\"<|>\"Martha Cratchit\"<|>\"person\"<|>\"Martha Cratchit is a daughter in the Cratchit family who contributes to the household and shows affection for her family, particularly her father Bob.\")##\n(\"entity\"<|>\"Christmas Day\"<|>\"event\"<|>\"Christmas Day is celebrated by the Cratchit family with a communal meal and expressions of joy and togetherness, highlighting the importance of family and gratitude.\")##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit shows deep care for Tiny Tim, helping him and expressing pride in his strength and character despite challenges.\"<|>\"father-son bond, care\"<|>9)##\n(\"relationship\"<|>\"Tiny Tim\"<|>\"Cratchit Family\"<|>\"Tiny Tim's presence in the Cratchit family adds to their joy and highlights the family's spirit of togetherness during Christmas.\"<|>\"family bond, celebration\"<|>8)##\n(\"relationship\"<|>\"Martha Cratchit\"<|>\"Bob Cratchit\"<|>\"Martha shows affection for Bob Cratchit upon her arrival, indicating a warm father-daughter relationship within the family.\"<|>\"family bond, affection\"<|>7)##\n(\"relationship\"<|>\"Cratchit Family\"<|>\"Christmas Day\"<|>\"The Cratchit Family's celebration on Christmas Day emphasizes their unity and joy in sharing the holiday together.\"<|>\"celebration, unity\"<|>10)##\n(\"content_keywords\"<|>\"family, Christmas, celebration, togetherness\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "dc71d9f6de748681ea5f44140f2e5530": {
+ "return": "(\"entity\"<|>\"The Cratchit Family\"<|>\"organization\"<|>\"The Cratchit Family consists of Bob, Mrs. Cratchit, and their children, highlighting themes of familial love and support amidst hardship.\")##\n(\"entity\"<|>\"The Two Young Cratchits\"<|>\"person\"<|>\"The Two Young Cratchits are the younger children in the Cratchit family, who express affection towards their father, Bob.\")##\n(\"entity\"<|>\"Christmas Dinner\"<|>\"event\"<|>\"Christmas Dinner is an important family gathering that symbolizes togetherness and joy despite hardships.\")##\n(\"entity\"<|>\"Sunday\"<|>\"event\"<|>\"Sunday is presented as a day of walking to a green place, signifying hope and connection to nature in the Cratchit family's lives.\")##\n(\"entity\"<|>\"The Street\"<|>\"geo\"<|>\"The Street is where Mr. Scrooge's Nephew encounters Bob Cratchit, facilitating a significant moment of kindness in the narrative.\")##\n(\"entity\"<|>\"The Work on the Table\"<|>\"concept\"<|>\"The Work on the Table represents the industriousness and efforts of the Cratchit family to provide and bond together.\")##\n(\"content_keywords\"<|>\"family struggles, kindness, support, hope, togetherness\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "f36cccb2f6122a29414c9ed057806f4e": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who expresses concern about social issues related to dining opportunities on Christmas Day.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is Scrooge's clerk, depicted as a humble family man with limited income but a warm home atmosphere.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is Bob's wife, portrayed as making an effort to prepare a festive atmosphere despite their financial struggles.\")##\n(\"entity\"<|>\"Belinda Cratchit\"<|>\"person\"<|>\"Belinda Cratchit is one of Bob Cratchit's daughters, assisting her mother in laying the dining cloth.\")##\n(\"entity\"<|>\"Peter Cratchit\"<|>\"person\"<|>\"Peter Cratchit is Bob's son, excited about dressing up and participating in the festive meal preparation.\")##\n(\"entity\"<|>\"The Ghost of Christmas Present\"<|>\"event\"<|>\"The Ghost of Christmas Present is a supernatural being that shows Scrooge the true spirit of Christmas, including kindness towards the poor.\")##\n(\"entity\"<|>\"Christmas Day\"<|>\"event\"<|>\"Christmas Day is a holiday during which the characters express joy and the significance of communal dining.\")##\n(\"entity\"<|>\"Baker's Shops\"<|>\"geo\"<|>\"Baker's Shops are locations where people bring their dinners, showcasing the community's festive preparation.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bob Cratchit\"<|>\"Scrooge is Bob Cratchit's employer, and he reflects on the challenges Bob faces regarding family and food on Christmas Day.\".\"<|>\"employment, social reflection\"<|>6)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Mrs. Cratchit\"<|>\"Bob and Mrs. Cratchit represent a family working together to create a festive and warm environment despite their limited means.\"<|>\"family unity, celebration\"<|>8)##\n(\"relationship\"<|>\"Peter Cratchit\"<|>\"Mrs. Cratchit\"<|>\"Peter Cratchit assists his mother, reflecting the family dynamic and contribution of children during festive preparations.\"<|>\"family participation, dynamic\"<|>7)##\n(\"relationship\"<|>\"The Ghost of Christmas Present\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Present shows Scrooge the joy and importance of kindness during Christmas, impacting his perspective on social issues.\"<|>\"supernatural guidance, social impact\"<|>9)##\n(\"relationship\"<|>\"Christmas Day\"<|>\"Baker's Shops\"<|>\"Christmas Day is a significant time when the community comes together, evident in the activities around Baker's Shops.\"<|>\"community gathering, festivity\"<|>8)##\n(\"content_keywords\"<|>\"social issues, family celebration, Christmas spirit, kindness\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "7dc7f73e19232313a8e0e47e4aa8ebc5": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who experiences transformative encounters with a Spirit that leads him through different scenes depicting joy and hardship during Christmas.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"person\"<|>\"The Spirit is a ghostly figure guiding Scrooge on a journey to witness the moments of cheer and resilience among people during Christmas, influencing his perspective.\")##\n(\"entity\"<|>\"The Lamplighter\"<|>\"person\"<|>\"The Lamplighter is a character who fills the street with light, representing the warmth and joy of the holiday season.\")##\n(\"entity\"<|>\"The Miners\"<|>\"organization\"<|>\"The Miners refer to a group of laborers living in a bleak landscape, embodying the struggles and resilience of working-class life.\")##\n(\"entity\"<|>\"The Old Man\"<|>\"person\"<|>\"The Old Man is portrayed as a figure of light and joy among his family, singing a Christmas song that connects generations.\")##\n(\"entity\"<|>\"The Lighthouse\"<|>\"geo\"<|>\"The Lighthouse is a solitary structure standing on a reef, symbolizing hope and guidance amidst the tumultuous sea.\")##\n(\"entity\"<|>\"The Ship\"<|>\"geo\"<|>\"The Ship is described as a vessel on the open sea, where the crew reflects on Christmas, showcasing camaraderie and a sense of belonging.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is portrayed as a time of joy, reflection, and connection, emphasizing the positive impact of the holiday spirit on individuals and communities.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Spirit\"<|>\"Scrooge is guided by the Spirit, who shows him different scenes related to the joy and struggles of Christmas, aiming to change his heart.\"<|>\"guidance, transformation\"<|>9)##\n(\"relationship\"<|>\"The Old Man\"<|>\"The Miners\"<|>\"The Old Man's singing brings together the miners and their families, creating a moment of collective joy during Christmas.\"<|>\"community, celebration\"<|>8)##\n(\"relationship\"<|>\"The Lamplighter\"<|>\"Christmas\"<|>\"The Lamplighter's actions of spreading light symbolize the joy and warmth associated with Christmas.\"<|>\"symbolism, holiday spirit\"<|>7)##\n(\"relationship\"<|>\"The Lighthouse\"<|>\"The Ship\"<|>\"The Lighthouse serves as a guiding light for the Ship, representing hope and safety on a tumultuous sea.\"<|>\"guidance, hope\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's journey with the Spirit ultimately leads him to reflect on the meaning of Christmas and its impact on human connections.\"<|>\"reflection, redemption\"<|>10)##\n(\"content_keywords\"<|>\"redemption, joy, resilience, holiday spirit\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e3393b2470d607ce5b8f6e798ef29820": {
+ "return": "(\"entity\"<|>\"The Turkey\"<|>\"event\"<|>\"The Turkey is a large bird that Scrooge has humorously commented on sending to Bob, symbolizing generosity and Christmas spirit.\")##\n(\"entity\"<|>\"The Knocker\"<|>\"event\"<|>\"The Knocker is an ornate door knocker that Scrooge admires, reflecting his newfound appreciation for simple joys.\")##\n(\"entity\"<|>\"The Boy\"<|>\"person\"<|>\"The Boy is a young messenger who interacts with Scrooge during his purchase of the turkey.\")##\n(\"entity\"<|>\"The Girl\"<|>\"person\"<|>\"The Girl is a young servant who assists Scrooge at Fred's house.\")##\n(\"entity\"<|>\"The Church\"<|>\"organization\"<|>\"The Church is a place Scrooge visits, indicating his desire to engage in the Christmas spirit and community.\")##\n(\"entity\"<|>\"The People\"<|>\"organization\"<|>\"The People represent the various individuals Scrooge encounters during his joyful walk, symbolizing community and shared happiness.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Turkey\"<|>\"Scrooge expresses joy and excitement about the turkey he plans to send, highlighting his generous transformation.\"<|>\"generosity, celebration\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Knocker\"<|>\"Scrooge admires the knocker at his door, marking a positive change in his attitude towards life and surroundings.\"<|>\"appreciation, transformation\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Boy\"<|>\"Scrooge interacts with The Boy to deliver the turkey, showing a newfound generosity and connection to others.\"<|>\"generosity, kindness\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Girl\"<|>\"Scrooge's pleasant interaction with The Girl at Fred's house shows his desire for family connection and warmth.\"<|>\"family ties, kindness\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Church\"<|>\"Scrooge's visit to The Church reflects his desire to embrace the spirit of Christmas and community engagement.\"<|>\"community, spirituality\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The People\"<|>\"Scrooge greets The People he encounters with joy and a newfound delight, signaling his welcome transformation.\"<|>\"community, joy\"<|>8)##\n<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "7b6a06103c40899ec90c34bda42b3f73": {
+ "return": "(\"entity\"<|>\"Project Gutenberg\"<|>\"organization\"<|>\"Project Gutenberg is a digital library providing free access to a wide range of eBooks, primarily in the United States and other regions, under specific licensing terms.\")##\n(\"entity\"<|>\"Project Gutenberg License\"<|>\"event\"<|>\"The Project Gutenberg License governs the use, copying, and distribution of the works contained within the Project Gutenberg digital library.\")##\n(\"entity\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"organization\"<|>\"The Project Gutenberg Literary Archive Foundation manages the Project Gutenberg trademark and oversees rights and licensing for the organization.\")##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"Project Gutenberg License\"<|>\"The Project Gutenberg License is associated with the usage and distribution of works from Project Gutenberg, establishing the terms under which they may be accessed and utilized.\"<|>\"licensing, access regulations\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"The Project Gutenberg Literary Archive Foundation is responsible for managing the trademark of Project Gutenberg and ensuring compliance with its licensing terms.\"<|>\"management, oversight\"<|>8)##\n(\"content_keywords\"<|>\"digital library, free access, copyright compliance, licensing terms\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "805f7e8c7afaf4c559b64b7f038605f1": {
+ "return": "(\"entity\"<|>\"The Dark Chamber\"<|>\"concept\"<|>\"The Dark Chamber represents a metaphorical space associated with death, despair, and the consequences of one's actions, highlighted during Scrooge's reflections.\")##\n(\"entity\"<|>\"The Room by Daylight\"<|>\"concept\"<|>\"The Room by Daylight symbolizes hope and normalcy amidst adversity, contrasting with the earlier dark and fearful atmosphere.\")##\n(\"entity\"<|>\"The Mother\"<|>\"person\"<|>\"The Mother exemplifies apprehension and hope regarding her husband's return, depicting the emotional burden of waiting and uncertainty.\")##\n(\"entity\"<|>\"Peter\"<|>\"person\"<|>\"Peter is a member of the Cratchit family, characterized by his quiet demeanor while being engaged in reading, representing the next generation's concern.\")##\n(\"entity\"<|>\"Ferocious Cat\"<|>\"concept\"<|>\"The Ferocious Cat represents untamed instincts and survival in a harsh environment, providing a sense of discomfort and disturbance in Scrooge's reflections.\")##\n(\"entity\"<|>\"Fearful Place\"<|>\"concept\"<|>\"The Fearful Place embodies Scrooge's dread of confronting his past mistakes and the implications of death, showcasing his internal struggles.\")##\n(\"entity\"<|>\"The Long-Expected Knock\"<|>\"event\"<|>\"The Long-Expected Knock signifies a pivotal moment of anticipation related to the return of the mother’s husband, laden with emotional significance.\")##\n(\"entity\"<|>\"The Candle-Light\"<|>\"concept\"<|>\"The Candle-Light represents fragility and vulnerability, affecting the perception of Tiny Tim and reflecting the family's struggles with health and poverty.\")##\n(\"entity\"<|>\"Debt\"<|>\"concept\"<|>\"Debt signifies a financial burden underpinning the Cratchit family's tension and expectations regarding their future after a death event.\")##\n(\"entity\"<|>\"Merciless Creditor\"<|>\"concept\"<|>\"The Merciless Creditor symbolizes the harsh realities of financial struggles faced by the Cratchit family, representing the societal pressures they endure.\")##\n(\"entity\"<|>\"Emotion\"<|>\"concept\"<|>\"Emotion encompasses the various feelings experienced by the characters in response to death, hope, and familial bonds, underscoring the human condition.\")##\n(\"entity\"<|>\"Kind Word\"<|>\"concept\"<|>\"A Kind Word illustrates the theme of empathy and how small gestures can significantly impact one's emotional state, as recalled by Scrooge.\")##\n(\"entity\"<|>\"Clasped Hands\"<|>\"concept\"<|>\"Clasped Hands symbolize prayer or hope, reflecting the mother's reaction and the emotional weight carried by her and the Cratchit family.\")##\n(\"content_keywords\"<|>\"futility, emotion, hope, family sacrifice\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "15cd51f4dc0fb941a5dbfd8ce2db3faa": {
+ "return": "(\"entity\"<|>\"The Charwoman\"<|>\"person\"<|>\"The Charwoman is a character who enters the shop first, contributing to the conversation about their collective situation concerning the deceased.\")## \n(\"entity\"<|>\"The Undertaker's Man\"<|>\"person\"<|>\"The Undertaker's Man is a character who appears after The Charwoman, joining the group in discussing their encounters and the deceased's possessions.\")## \n(\"entity\"<|>\"The Clothes and Items\"<|>\"event\"<|>\"The Clothes and Items refer to the various possessions being appraised, representing the themes of loss and exploitation.\")## \n(\"entity\"<|>\"The Dead Man\"<|>\"person\"<|>\"The Dead Man is mentioned as the previous owner of the items being appraised, embodying the consequences of greed and isolation.\")## \n(\"relationship\"<|>\"The Charwoman\"<|>\"The Undertaker's Man\"<|>\"The Charwoman and The Undertaker's Man interact directly while sharing their experiences, indicating a familiarity and common ground in their circumstances.\"<|>\"shared experiences, camaraderie\"<|>6)## \n(\"relationship\"<|>\"Old Joe\"<|>\"The Dead Man\"<|>\"Old Joe's valuation of the stolen items highlights the worthlessness attributed to The Dead Man's belongings posthumously.\"<|>\"valuation, moral judgment\"<|>7)## \n(\"relationship\"<|>\"The Woman\"<|>\"The Charwoman\"<|>\"The Woman and The Charwoman engage in conversation that underscores their desire to disregard moral implications and focus on personal gain.\"<|>\"self-interest, moral disregard\"<|>5)## \n(\"relationship\"<|>\"Mrs. Dilber\"<|>\"The Dead Man\"<|>\"Mrs. Dilber's remarks imply judgment regarding The Dead Man's life choices and the neglect he faced in death.\"<|>\"judgment, reflection\"<|>6)## \n(\"content_keywords\"<|>\"exploitation, death, appraisal, moral judgments\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "12c820b3b26f89138846abc45f15f4cf": {
+ "return": "(\"entity\"<|>\"The Spirits\"<|>\"concept\"<|>\"The Spirits represent the past, present, and future in Scrooge's life, guiding him toward redemption and self-reflection.\")##\n(\"entity\"<|>\"The Ghost of Jacob Marley\"<|>\"event\"<|>\"The Ghost of Jacob Marley is the supernatural being that visits Scrooge to warn him and set the stage for his transformation.\")##\n(\"entity\"<|>\"The Ghost of Christmas Present\"<|>\"event\"<|>\"The Ghost of Christmas Present signifies abundance and the spirit of joy that Scrooge is encouraged to embrace.\")##\n(\"entity\"<|>\"The Ghost of Christmas Future\"<|>\"event\"<|>\"The Ghost of Christmas Future represents the potential consequences of Scrooge's actions and choices.\")##\n(\"entity\"<|>\"Sunday Clothes Boy\"<|>\"person\"<|>\"The boy in Sunday clothes serves as a character who interacts with Scrooge, demonstrating innocence and curiosity.\")##\n(\"entity\"<|>\"The Sitting Room\"<|>\"location\"<|>\"The Sitting Room is a significant space in Scrooge's home where he experiences emotional revelations and transformations.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Spirits\"<|>\"The Spirits embody the lessons Scrooge must learn to change his life and embrace kindness.\"<|>\"redemption, guidance\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost of Jacob Marley\"<|>\"Scrooge's encounter with the Ghost of Jacob Marley initiates his journey of self-reflection and realization.\"<|>\"transformation, warning\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Present\"<|>\"The Ghost of Christmas Present exposes Scrooge to the joy and generosity he has been missing.\"<|>\"awareness, joy\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Future\"<|>\"The Ghost of Christmas Future reveals the dire consequences of Scrooge's current behavior, prompting a desire for change.\"<|>\"consequences, transformation\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Sunday Clothes Boy\"<|>\"Scrooge's interaction with the Sunday Clothes Boy highlights his evolving attitude towards generosity and kindness.\"<|>\"kindness, generosity\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Sitting Room\"<|>\"The Sitting Room is where Scrooge reflects on his past and contemplates his future, crucial to his transformation.\"<|>\"reflection, transformation\"<|>8)##\n(\"content_keywords\"<|>\"redemption, lessons, transformation, reflection\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "a320cf7e0627f1687d10e7f235ec7bf4": {
+ "return": "(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is Scrooge's deceased business partner, who appears as a ghost to warn Scrooge of his impending fate and the need for change.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"organization\"<|>\"The Ghost represents various spectral entities that visit Scrooge to facilitate his transformation by highlighting the consequences of his actions.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas serves as a thematic backdrop for the story, symbolizing redemption, generosity, and the spirit of giving.\")##\n(\"entity\"<|>\"The Future\"<|>\"concept\"<|>\"The Future represents the potential outcomes of Scrooge’s life choices, depicted through the visions shown by the Spirits.\")##\n(\"entity\"<|>\"The Businessmen\"<|>\"organization\"<|>\"The Businessmen are a group of individuals in the City who display a lack of concern about Scrooge's death, reflecting societal apathy toward individual loss.\")##\n(\"entity\"<|>\"Death\"<|>\"event\"<|>\"Death in the story is a significant event that triggers discussion among the Businessmen and prompts Scrooge's reflections.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Scrooge interacts with Jacob Marley, who serves as a critical warning for Scrooge to change his ways before it's too late.\"<|>\"warning, transformation\"<|>10)##\n(\"relationship\"<|>\"The Ghost\"<|>\"Christmas\"<|>\"The Ghosts represent the essence of Christmas, highlighting the themes of generosity and change during this festive time.\"<|>\"spiritual guide, thematic connection\"<|>8)##\n(\"relationship\"<|>\"Jacob Marley\"<|>\"The Future\"<|>\"Jacob Marley's warning about the consequences of ignorance reflects on the potential dark Future that awaits Scrooge.\"<|>\"foreshadowing, caution\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Businessmen\"<|>\"Scrooge’s death is discussed among the Businessmen, who show a lack of concern, exemplifying the indifference of society.\"<|>\"societal apathy, lack of empathy\"<|>8)##\n(\"relationship\"<|>\"The Future\"<|>\"Death\"<|>\"The concept of The Future is intricately linked to discussions about Death, pondering the consequences of life's choices.\"<|>\"consequences of choices, reflection\"<|>7)##\n(\"content_keywords\"<|>\"redemption, apathy, warnings, societal critique\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "d266f37a6283d07b56094f9791118f2e": {
+ "return": "(\"entity\"<|>\"Master Peter Cratchit\"<|>\"person\"<|>\"Master Peter Cratchit is one of the young sons in the Cratchit family, actively contributes to the family's Christmas preparations with energy and enthusiasm.\")##\n(\"entity\"<|>\"Belinda Cratchit\"<|>\"person\"<|>\"Belinda Cratchit is another daughter in the Cratchit family, depicted as participating in the festive meal by sweetening the apple sauce, showcasing her involvement in family traditions.\")##\n(\"entity\"<|>\"The Goose\"<|>\"event\"<|>\"The Goose is a central part of the Cratchit family Christmas dinner, which they celebrate with excitement and anticipation, symbolizing abundance and joy.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is the mother in the family who prepares the meal and shows warmth and attentiveness towards her family members during their Christmas celebration.\")##\n(\"relationship\"<|>\"Master Peter Cratchit\"<|>\"Cratchit Family\"<|>\"Master Peter Cratchit is a part of the Cratchit family and contributes to their joyful Christmas celebration with enthusiasm.\"<|>\"family participation, joy\"<|>8)##\n(\"relationship\"<|>\"Belinda Cratchit\"<|>\"Cratchit Family\"<|>\"Belinda Cratchit's role in sweetening the apple sauce shows her active involvement in family traditions during Christmas.\"<|>\"family participation, tradition\"<|>8)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Cratchit Family\"<|>\"Mrs. Cratchit plays a significant role in nurturing the family atmosphere during Christmas by preparing and caring for everyone.\"<|>\"nurturing, familial care\"<|>9)##\n(\"relationship\"<|>\"The Goose\"<|>\"Cratchit Family\"<|>\"The Goose represents the festive meal that the Cratchit family cherishes during their Christmas gathering, enhancing their celebration.\"<|>\"symbol of abundance, celebration\"<|>10)##\n(\"content_keywords\"<|>\"family roles, Christmas dinner, festive traditions, joy\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "4153111ded591e4cc8dfcafce17f9c44": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who engages with a spiritual entity and witnesses the transformation of his surroundings during Christmas.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is depicted as a time of joy and communal spirit, marked by people's activities and interactions in the streets.\")##\n(\"entity\"<|>\"Great Britain\"<|>\"geo\"<|>\"Great Britain is referenced as a location suffering from severe weather, which contrasts with the cheerful activities of its people during Christmas.\")##\n(\"entity\"<|>\"The Grocers\"<|>\"organization\"<|>\"The Grocers are a social hub where customers eagerly purchase goods, contributing to the festive atmosphere of the day.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's experience during the Christmas event highlights the transformation of his character and the spirit of the holiday.\"<|>\"transformation, holiday spirit\"<|>8)##\n(\"relationship\"<|>\"Christmas\"<|>\"Great Britain\"<|>\"The celebration of Christmas brings a cheerful atmosphere to Great Britain despite the gloomy weather conditions.\"<|>\"celebration, societal mood\"<|>7)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Christmas\"<|>\"The Grocers contribute to the festive Christmas atmosphere by providing goods and serving eager customers.\"<|>\"community engagement, festive spirit\"<|>9)##\n(\"content_keywords\"<|>\"Christmas, transformation, festive spirit, community\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "0ead7f4b5608c9f8f6389a53fbf84467": {
+ "return": "(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a young boy in the Cratchit family, who is portrayed as frail yet joyous and embodies the spirit of Christmas.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is the father of Tiny Tim and the head of the Cratchit family, who expresses joy and gratitude during Christmas dinner.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is Bob's wife who prepares the Christmas pudding, showcasing her pride and care in family traditions.\")##\n(\"entity\"<|>\"Cratchit Family\"<|>\"organization\"<|>\"The Cratchit Family is a close-knit family characterized by love and togetherness, celebrating Christmas with simple but joyful traditions.\")##\n(\"entity\"<|>\"Christmas Pudding\"<|>\"event\"<|>\"The Christmas Pudding is a significant dish prepared by Mrs. Cratchit, symbolizing the family's festive celebration and unity.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who undergoes a transformation after reflecting on his past and attitude towards the Cratchit family, particularly Tiny Tim.\")##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit expresses love and concern for his son Tiny Tim during the Christmas dinner, highlighting a strong familial bond.\"<|>\"family bond, care\"<|>9)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Christmas Pudding\"<|>\"Mrs. Cratchit takes pride in serving the Christmas Pudding, marking it as a highlight of the family's festive gathering.\"<|>\"family tradition, celebration\"<|>8)##\n(\"relationship\"<|>\"Cratchit Family\"<|>\"Christmas Pudding\"<|>\"The Cratchit Family gathers around to enjoy the Christmas Pudding, demonstrating their togetherness and festive spirit.\"<|>\"family unity, celebration\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Tiny Tim\"<|>\"Scrooge reflects on Tiny Tim and expresses concern for his wellbeing, indicating a pivotal change in his character and attitudes towards the less fortunate.\"<|>\"personal transformation, empathy\"<|>8)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Scrooge\"<|>\"Bob praises Scrooge as 'The Founder of the Feast', showing appreciation despite Scrooge’s previous miserliness.\"<|>\"gratitude, redemption\"<|>7)##\n(\"content_keywords\"<|>\"family unity, Christmas spirit, personal transformation, compassion\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "6c24aa07a7cb508ee61487a63b4627d8": {
+ "return": "(\"entity\"<|>\"The Spirit\"<|>\"person\"<|>\"The Spirit refers to the Ghost of Christmas Present, a supernatural figure that guides Scrooge through lessons about generosity and community.\")##\n(\"entity\"<|>\"Wicker Baskets\"<|>\"geo\"<|>\"Wicker Baskets are mentioned as items that characters use to carry their purchases, symbolizing the festive preparations of the community.\")##\n(\"entity\"<|>\"Dinners\"<|>\"event\"<|>\"Dinners represent the communal and familial gatherings that occur during Christmas, emphasized in the text as significant for enjoyment and sharing.\")##\n(\"entity\"<|>\"Christmas Goose\"<|>\"food\"<|>\"The Christmas Goose is a traditional dish that symbolizes festive feasting and joy during the holiday.\")##\n(\"entity\"<|>\"Four-Roomed House\"<|>\"geo\"<|>\"Bob Cratchit's four-roomed house serves as a setting that reflects the modest means yet warm atmosphere of the Cratchit family.\")##\n(\"entity\"<|>\"The Baker's Doorway\"<|>\"geo\"<|>\"The Baker's Doorway is a place where the Spirit and Scrooge observe dinner carriers, indicating the communal effort in preparing for Christmas.\")##\n(\"entity\"<|>\"Angry Words\"<|>\"event\"<|>\"Angry Words refer to conflicts that arise between dinner carriers, showcasing human interactions and the spirit of camaraderie needed during the holiday.\")##\n(\"entity\"<|>\"Bakers\"<|>\"organization\"<|>\"Bakers are individuals contributing to the community's ability to celebrate by providing food for Christmas.\")##\n(\"entity\"<|>\"Sage and Onion\"<|>\"food\"<|>\"Sage and Onion are ingredients associated with the traditional preparation of Christmas Goose, evoking the culinary aspects of the holiday.\")##\n(\"content_keywords\"<|>\"community, festivities, generosity, family dynamics\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "f77d8d11b2e499cc9287be7a5000801b": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a central character who experiences transformation and reflection on his past decisions and relationships.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"event\"<|>\"The Ghost is a supernatural entity that shows Scrooge scenes from his past to provoke realization and remorse.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is depicted as a time associated with joy and abundance, contrasting with Scrooge's initial views.\")##\n(\"entity\"<|>\"The Young Girl\"<|>\"person\"<|>\"The Young Girl is a significant figure from Scrooge's past, representing lost opportunities and innocence.\")##\n(\"entity\"<|>\"The Mother\"<|>\"person\"<|>\"The Mother is depicted as a comely matron, who enjoys the laughter and chaos with her children, showing warmth and family value.\")##\n(\"entity\"<|>\"The Daughter\"<|>\"person\"<|>\"The Daughter is involved in the scene of merriment and childhood play, representing the continuation of family joy.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Young Girl\"<|>\"Scrooge reflects on his past feelings for the Young Girl, recognizing the change in his own spirit and desires over time.\"<|>\"reflection, lost love\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost\"<|>\"The Ghost guides Scrooge through visions of his past, forcing him to confront his choices and their consequences.\"<|>\"guidance, transformation\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's views on Christmas evolve as he witnesses the joy and familial love associated with the holiday, marking a shift in his character.\"<|>\"character development, festive spirit\"<|>8)##\n(\"relationship\"<|>\"The Mother\"<|>\"The Daughter\"<|>\"The Mother and Daughter share a loving and playful relationship, showcasing familial bonds and joy during the Christmas season.\"<|>\"familial love, joy\"<|>7)##\n(\"content_keywords\"<|>\"transformation, reflection, Christmas spirit, familial bonds\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "c48d46dbd6ce06dc16be08d332209205": {
+ "return": "(\"entity\"<|>\"Master Peter\"<|>\"person\"<|>\"Master Peter is a member of the Cratchit family, who is considered for a job opportunity and reflects on his future prospects during the Christmas gathering.\")##\n(\"entity\"<|>\"Martha\"<|>\"person\"<|>\"Martha is a poor apprentice at a milliner's and shares her experience with the family during their Christmas gathering, contributing to the family atmosphere.\")##\n(\"entity\"<|>\"The Lord\"<|>\"person\"<|>\"The Lord is mentioned by Martha as someone she encountered, illustrating her experiences outside her family's struggles.\")##\n(\"entity\"<|>\"The Countess\"<|>\"person\"<|>\"The Countess is referenced by Martha, showcasing her interaction with figures of higher social status, contrasting with the Cratchit family's situation.\")##\n(\"entity\"<|>\"Christmas Feast\"<|>\"event\"<|>\"The Christmas Feast is the central gathering for the Cratchit family, where various interactions occur, highlighting their feelings towards Scrooge.\")##\n(\"relationship\"<|>\"Master Peter\"<|>\"Bob Cratchit\"<|>\"Bob Cratchit is considering job opportunities for Master Peter, reflecting the family's aspirations and hopes for betterment.\"<|>\"aspiration, family support\"<|>6)##\n(\"relationship\"<|>\"Martha\"<|>\"Cratchit Family\"<|>\"Martha contributes to the family dynamic during their Christmas gathering by sharing her experiences and adding to the festive atmosphere.\"<|>\"family interaction, celebration\"<|>7)##\n(\"relationship\"<|>\"Martha\"<|>\"Tiny Tim\"<|>\"Martha's presence and conversation at the gathering impacts Tiny Tim's experience and highlights family connections during Christmas.\"<|>\"family ties, support\"<|>5)##\n(\"relationship\"<|>\"The Lord\"<|>\"Martha\"<|>\"Martha's reference to The Lord indicates her exposure to higher social circles, which contrasts her family's financial struggles.\"<|>\"social status, contrast\"<|>4)##\n(\"relationship\"<|>\"The Countess\"<|>\"Martha\"<|>\"Martha's encounter with The Countess reflects her aspirations and experiences outside her lower socio-economic status.\"<|>\"social contrast, aspirations\"<|>4)##\n(\"relationship\"<|>\"Christmas Feast\"<|>\"Cratchit Family\"<|>\"The Christmas Feast is a pivotal event for the Cratchit Family where they express their feelings and celebrate together.\"<|>\"family gathering, significance\"<|>9)##\n(\"content_keywords\"<|>\"family aspirations, social contrast, nurturing relationships, holiday spirit\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "402a24595ac910e991e1e4049b397dec": {
+ "return": "(\"entity\"<|>\"The Cheerful Company\"<|>\"organization\"<|>\"The Cheerful Company refers to the group assembled around the fire, representing family togetherness and joy during Christmas.\")##\n(\"entity\"<|>\"The Setting Sun\"<|>\"geo\"<|>\"The Setting Sun is depicted as leaving a streak of fiery red, symbolizing transition and the passage of time against the desolation of the moor.\")##\n(\"entity\"<|>\"The Frozen Moor\"<|>\"geo\"<|>\"The Frozen Moor is a bleak and desolate landscape, representing hardship and the struggles faced by those who live there.\")##\n(\"entity\"<|>\"The Hut\"<|>\"geo\"<|>\"The Hut is a shelter for the miners that provides warmth and a gathering space for family during the winter, serving as a symbol of refuge.\")##\n(\"entity\"<|>\"Storm-Birds\"<|>\"geo\"<|>\"Storm-Birds are depicted as creatures that rise and fall with the waves, symbolizing the harshness of the environment and the connection to the sea.\")##\n(\"entity\"<|>\"The Hearth\"<|>\"geo\"<|>\"The Hearth is the gathering place around which the old man and his family sing, signifying warmth, comfort, and familial bonds during the festive season.\")##\n(\"entity\"<|>\"Christmas Song\"<|>\"event\"<|>\"The Christmas Song is the traditional tune sung by the old man and his family, highlighting themes of nostalgia and familial joy during the holiday season.\")##\n(\"relationship\"<|>\"The Old Man\"<|>\"The Cheerful Company\"<|>\"The Old Man leads the Cheerful Company in song, promoting joy and togetherness during their Christmas gathering.\"<|>\"family, togetherness\"<|>8)##\n(\"relationship\"<|>\"The Frozen Moor\"<|>\"The Miners\"<|>\"The Frozen Moor serves as the living environment for the Miners, reflecting the harsh living conditions they endure.\"<|>\"environment, hardship\"<|>7)##\n(\"relationship\"<|>\"The Setting Sun\"<|>\"The Frozen Moor\"<|>\"The Setting Sun casts a dramatic light on the Frozen Moor, highlighting the stark beauty and desolation of the landscape.\"<|>\"nature, contrast\"<|>6)##\n(\"relationship\"<|>\"The Hut\"<|>\"The Cheerful Company\"<|>\"The Hut provides shelter and warmth to the Cheerful Company, facilitating their celebration during a cold season.\"<|>\"refuge, celebration\"<|>9)##\n(\"relationship\"<|>\"The Lighthouse\"<|>\"The Frozen Moor\"<|>\"The Lighthouse stands solitary and contrasting against the harshness of the Frozen Moor, symbolizing hope amidst desolation.\"<|>\"symbolism, contrast\"<|>8)##\n(\"content_keywords\"<|>\"family, hardship, hope, nostalgia\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "d47101395ee9b0b7d3415f5eca4cb88b": {
+ "return": "(\"entity\"<|>\"Scrooge's nephew\"<|>\"person\"<|>\"Scrooge's nephew is a character present at the gathering who actively participates in the games and engages with his family.\")##\n(\"entity\"<|>\"The plump sister\"<|>\"person\"<|>\"The plump sister is a character involved in the games, specifically blind man's-buff, and captures Topper's attention during the festivities.\")##\n(\"entity\"<|>\"Ghost\"<|>\"event\"<|>\"The Ghost refers to the spirit guiding Scrooge, embodying the themes of memory and reflection on the joys of Christmas.\")##\n(\"entity\"<|>\"London\"<|>\"geo\"<|>\"London serves as the setting where the events take place, adding a cultural backdrop to the Christmas gathering.\")##\n(\"entity\"<|>\"Games\"<|>\"event\"<|>\"The Games represent various activities played during the gathering, reinforcing themes of merriment and family unity.\")##\n(\"relationship\"<|>\"Scrooge's nephew\"<|>\"Scrooge's niece\"<|>\"Scrooge's nephew and niece engage in playful banter and competition during the games, enhancing their familial bond.\"<|>\"family interaction, playful rivalry\"<|>7)##\n(\"relationship\"<|>\"Topper\"<|>\"The plump sister\"<|>\"Topper shows a clear interest in the plump sister during the games, indicating a playful dynamic between the two.\"<|>\"flirtation, playful attention\"<|>6)##\n(\"relationship\"<|>\"The Ghost\"<|>\"Scrooge\"<|>\"The Ghost influences Scrooge's reflections on his past and his current emotions regarding Christmas festivities.\"<|>\"memory, influence\"<|>8)##\n(\"relationship\"<|>\"The plump sister\"<|>\"Blind Man's-Buff\"<|>\"The plump sister is central to the game of Blind Man's-Buff, as Topper pursues her, highlighting personal dynamics during the playing.\"<|>\"game participation, personal dynamics\"<|>7)##\n(\"relationship\"<|>\"London\"<|>\"Scrooge\"<|>\"London as the setting contextualizes Scrooge's experiences and transformations regarding Christmas celebrations.\"<|>\"setting, transformation\"<|>5)##\n(\"content_keywords\"<|>\"family games, festive spirit, playful interactions, cultural setting\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "155d334f9fdac73f94da7c5f075ae662": {
+ "return": "(\"entity\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"organization\"<|>\"The Project Gutenberg Literary Archive Foundation is a 501(c)(3) non-profit educational corporation dedicated to increasing access to public domain and licensed works that can be freely distributed in machine-readable form.\")##\n(\"entity\"<|>\"Salt Lake City\"<|>\"geo\"<|>\"Salt Lake City is the location of the Foundation's business office, indicating its operational base in the state of Utah.\")##\n(\"entity\"<|>\"Michael S. Hart\"<|>\"person\"<|>\"Michael S. Hart is the founder of Project Gutenberg, known for creating a library of electronic works that can be freely shared, playing a significant role in the development of eBooks.\")##\n(\"entity\"<|>\"eBooks\"<|>\"event\"<|>\"eBooks refer to the electronic books produced and distributed by Project Gutenberg, often created from printed editions confirmed as not protected by copyright.\")##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Michael S. Hart\"<|>\"Michael S. Hart is the originator of the Project Gutenberg concept, instrumental in establishing its mission and operations.\"<|>\"founder, mission creation\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Salt Lake City\"<|>\"The Foundation has its business office in Salt Lake City, establishing a geographic link to its operations.\"<|>\"operational base, location\"<|>8)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"eBooks\"<|>\"The Foundation is dedicated to increasing the number of eBooks available, which directly relates to its mission.\"<|>\"mission, eBook distribution\"<|>10)##\n(\"content_keywords\"<|>\"non-profit, eBooks, accessibility, donation compliance\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "5edd0e3325656990b98ea9b9186264d6": {
+ "return": "(\"entity\"<|>\"Transcriber's Note\"<|>\"event\"<|>\"Transcriber's Note refers to the added content by the transcriber, indicating additional information or context regarding the work.\")##\n(\"entity\"<|>\"END OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL\"<|>\"event\"<|>\"END OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL marks the conclusion of a particular eBook within the Project Gutenberg collection.\")##\n(\"entity\"<|>\"old editions\"<|>\"organization\"<|>\"Old editions refer to previous versions of eBooks that are being renamed and replaced, part of Project Gutenberg's updating process.\")##\n(\"entity\"<|>\"special rules\"<|>\"concept\"<|>\"Special rules outline specific guidelines that govern the copying and distribution of Project Gutenberg eBooks to protect its mission and trademark.\")##\n(\"entity\"<|>\"General Terms of Use\"<|>\"concept\"<|>\"General Terms of Use details the overall framework and guidelines for using Project Gutenberg's works and services.\")##\n(\"entity\"<|>\"copyright law\"<|>\"concept\"<|>\"Copyright law encompasses the legal rules governing the rights and usage of literary and artistic works, relevant to Project Gutenberg's operations.\")##\n(\"entity\"<|>\"trademark license\"<|>\"concept\"<|>\"Trademark license describes the regulations for using the Project Gutenberg trademark and ensures its protection in distributions.\")##\n(\"entity\"<|>\"derivative works\"<|>\"concept\"<|>\"Derivative works refer to adaptations or creations based on original texts, allowed under certain conditions set by Project Gutenberg.\")##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"Transcriber's Note\"<|>\"The transcriber's note provides additional context about the eBook created and distributed by Project Gutenberg.\"<|>\"contextual enhancement, editorial contribution\"<|>6)##\n(\"relationship\"<|>\"Project Gutenberg\"<|>\"old editions\"<|>\"Project Gutenberg manages the updating and renaming of old editions to align with current standards and offerings.\"<|>\"updating process, content management\"<|>7)##\n(\"relationship\"<|>\"special rules\"<|>\"Project Gutenberg License\"<|>\"Special rules are a part of the guidelines established by the Project Gutenberg License to ensure compliance in distribution.\"<|>\"guidelines, compliance\"<|>8)##\n(\"relationship\"<|>\"General Terms of Use\"<|>\"Project Gutenberg License\"<|>\"The General Terms of Use provides the foundational legal framework for the Project Gutenberg License.\"<|>\"legal structure, user guidelines\"<|>9)##\n(\"relationship\"<|>\"copyright law\"<|>\"Project Gutenberg License\"<|>\"The Project Gutenberg License operates within the confines of copyright law, affecting all distributions of its works.\"<|>\"legal compliance, distribution rights\"<|>9)##\n(\"relationship\"<|>\"trademark license\"<|>\"Project Gutenberg License\"<|>\"The trademark license governs the usage of the Project Gutenberg name in association with its electronic works.\"<|>\"brand protection, governance\"<|>8)##\n(\"relationship\"<|>\"derivative works\"<|>\"General Terms of Use\"<|>\"The General Terms of Use outlines the policies regarding the creation of derivative works based on Project Gutenberg content.\"<|>\"adaptation rights, user permissions\"<|>7)##\n(\"content_keywords\"<|>\"Project Gutenberg eBooks, updating process, copyright governance\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "ae027fae1a5f9d6a056ff95c9e55fd51": {
+ "return": "(\"entity\"<|>\"Bob\"<|>\"person\"<|>\"Bob is a character who reflects on the importance of unity and remembering Tiny Tim among his family.\")##\n(\"entity\"<|>\"Mrs. Cratchit\"<|>\"person\"<|>\"Mrs. Cratchit is a character who shows affection towards Bob, enhancing the family dynamic.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is a symbolic character representing innocence and the impact of loss on family cohesion.\")##\n(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who undergoes a transformation, contemplating his own future and the significance of his past actions.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas represents a time of reflection and change for Scrooge, emphasizing themes of redemption.\")##\n(\"entity\"<|>\"The Ghost of Christmas Yet to Come\"<|>\"organization\"<|>\"The Ghost of Christmas Yet to Come is a spectral being that guides Scrooge through visions of his potential future.\")##\n(\"entity\"<|>\"The Future\"<|>\"geo\"<|>\"The Future is represented as a realm of possibility and consequence that concerns Scrooge's fate and choices.\")##\n(\"relationship\"<|>\"Bob\"<|>\"Tiny Tim\"<|>\"Bob reflects on Tiny Tim's memory, highlighting the impact of his loss on the family dynamics.\"<|>\"family dynamics, loss\"<|>8)##\n(\"relationship\"<|>\"Mrs. Cratchit\"<|>\"Bob\"<|>\"Mrs. Cratchit supports Bob emotionally, exemplifying love within the Cratchit family.\"<|>\"emotional support, family love\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Tiny Tim\"<|>\"Scrooge's transformation is influenced by his reflections on the impact of Tiny Tim's life and death.\"<|>\"transformation, redemption\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost of Christmas Yet to Come\"<|>\"The Ghost guides Scrooge to confront his potential future that threatens his existence.\"<|>\"guidance, self-confrontation\"<|>10)##\n(\"relationship\"<|>\"The Ghost of Christmas Yet to Come\"<|>\"Scrooge\"<|>\"The Ghost is critical in revealing the consequences of Scrooge's life choices, pushing for change.\"<|>\"consequences, change\"<|>9)##\n(\"content_keywords\"<|>\"redemption, family, transformation, reflection\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "12f98d3ad5f87ad4f6411a0b36056ce4": {
+ "return": "(\"entity\"<|>\"Spirit\"<|>\"concept\"<|>\"The Spirit embodies the lessons of compassion and social responsibility, guiding Scrooge through his reflections and realizations.\")##\n(\"entity\"<|>\"Christmas Holidays\"<|>\"event\"<|>\"The Christmas Holidays serve as a crucial backdrop for the narrative, representing themes of generosity, family, and redemption.\")##\n(\"entity\"<|>\"Twelfth-Night Party\"<|>\"event\"<|>\"The Twelfth-Night Party is mentioned as a festive gathering that Scrooge and the Spirit visit, symbolizing celebration and community.\")##\n(\"entity\"<|>\"Almshouse\"<|>\"location\"<|>\"The Almshouse is a place symbolizing human suffering and neglect, where the Spirit shows Scrooge the harsh realities faced by the needy.\")##\n(\"entity\"<|>\"Hospital\"<|>\"location\"<|>\"The Hospital represents a location of care and compassion, highlighting the human condition and the need for kindness.\")##\n(\"entity\"<|>\"Gaol\"<|>\"location\"<|>\"The Gaol symbolizes imprisonment and despair, representing the societal failings and the need for mercy and understanding.\")##\n(\"entity\"<|>\"Man\"<|>\"concept\"<|>\"Man reflects the collective responsibility of humanity towards its most vulnerable, portrayed through the metaphor of Ignorance and Want.\")##\n(\"entity\"<|>\"Children\"<|>\"person\"<|>\"The Children represent innocence and purity, highlighted as victims of societal neglect and calling for compassion from Scrooge.\")##\n(\"entity\"<|>\"Misery's Refuge\"<|>\"concept\"<|>\"Misery's Refuge represents the places where society fails to provide support and care for those in need, emphasizing social responsibility.\")##\n(\"entity\"<|>\"Mulled Wine\"<|>\"concept\"<|>\"Mulled Wine symbolizes warmth and cheer, often associated with celebrations during the holiday season, particularly in the context of family gatherings.\")##\n(\"entity\"<|>\"Health\"<|>\"concept\"<|>\"Health is a theme represented through the need for care and the role of the community in supporting those who are suffering.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Spirit\"<|>\"The Spirit directly influences Scrooge’s transformation and highlights the importance of compassion in human life.\"<|>\"personal growth, spiritual journey\"<|>10)##\n(\"relationship\"<|>\"Spirit\"<|>\"Children\"<|>\"The Spirit introduces Scrooge to the Children as representations of societal neglect, urging recognition and action.\"<|>\"social issues, innocence\"<|>9)##\n(\"relationship\"<|>\"Twelfth-Night Party\"<|>\"Scrooge\"<|>\"The Twelfth-Night Party serves as a representation of festive joy that contrasts with Scrooge's initial isolation.\"<|>\"celebration, contrast\"<|>8)##\n(\"relationship\"<|>\"Almshouse\"<|>\"Spirit\"<|>\"The Spirit shows Scrooge the Almshouse to illustrate the plight of the poor and the societal neglect present during the holiday season.\"<|>\"social critique, awareness\"<|>9)##\n(\"relationship\"<|>\"Hospital\"<|>\"Spirit\"<|>\"The Spirit uses the Hospital to demonstrate humanity's capacity for care and its responsibility towards the sick and needy.\"<|>\"care, compassion\"<|>8)##\n(\"relationship\"<|>\"Gaol\"<|>\"Spirit\"<|>\"The Gaol represents the consequences of societal indifference, highlighted by the Spirit to show the need for empathy and reform.\"<|>\"empowerment, societal change\"<|>8)##\n(\"relationship\"<|>\"Man\"<|>\"Ignorance\"<|>\"The concept of Man is tied to Ignorance and Want, illustrating the failures of society in addressing poverty and need.\"<|>\"societal responsibility, advocacy\"<|>9)##\n(\"relationship\"<|>\"Misery's Refuge\"<|>\"Spirit\"<|>\"The Spirit's presence in Misery's Refuge underscores the necessity for societal compassion during difficult times.\"<|>\"social awareness, empathy\"<|>10)##\n(\"content_keywords\"<|>\"compassion, societal failure, redemption, social responsibility\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e3d312cd2f7557186eea60b2055b49cd": {
+ "return": "(\"entity\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"organization\"<|>\"The Project Gutenberg Literary Archive Foundation is a non-profit organization established to ensure the free distribution of electronic works and maintain the Project Gutenberg™ collection for future generations.\")##\n(\"entity\"<|>\"Project Gutenberg™\"<|>\"organization\"<|>\"Project Gutenberg™ is synonymous with the free distribution of electronic works, supported by volunteers and donations to provide accessible formats for a variety of computers.\")##\n(\"entity\"<|>\"Mississippi\"<|>\"geo\"<|>\"Mississippi is the U.S. state where the Project Gutenberg Literary Archive Foundation is organized under local laws.\")##\n(\"entity\"<|>\"Salt Lake City\"<|>\"geo\"<|>\"Salt Lake City is the location of the business office of the Project Gutenberg Literary Archive Foundation.\")##\n(\"entity\"<|>\"Project Gutenberg™ Collection\"<|>\"event\"<|>\"The Project Gutenberg™ Collection refers to the collection of electronic works created through the efforts of volunteers, aimed at free access to literature.\")##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Project Gutenberg™\"<|>\"The Project Gutenberg Literary Archive Foundation oversees and manages Project Gutenberg™, ensuring the continuation and accessibility of its electronic works.\"<|>\"management, oversight\"<|>8)##\n(\"relationship\"<|>\"Project Gutenberg™\"<|>\"Project Gutenberg™ Collection\"<|>\"Project Gutenberg™ is responsible for the creation and distribution of the Project Gutenberg™ Collection, making literary works accessible electronically.\"<|>\"literary access, distribution\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Salt Lake City\"<|>\"The Project Gutenberg Literary Archive Foundation has its business office located in Salt Lake City, establishing a physical presence there.\"<|>\"organizational presence, location\"<|>7)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Mississippi\"<|>\"The Project Gutenberg Literary Archive Foundation is organized under the laws of Mississippi, linking its operations to the state.\"<|>\"legal identity, location\"<|>6)##\n(\"content_keywords\"<|>\"free distribution, electronic works, volunteer support, accessibility\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "bc02ec5761f825a4a1064c51b11f5f28": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a main character who undergoes a transformation, initially embodying greed and later embracing generosity and kindness.\")##\n(\"entity\"<|>\"Fred\"<|>\"person\"<|>\"Fred is Scrooge's niece by marriage who expresses surprise and warmth at Scrooge's unexpected visit for dinner.\")##\n(\"entity\"<|>\"Bob Cratchit\"<|>\"person\"<|>\"Bob Cratchit is Scrooge's employee who arrives late to work after celebrating Christmas, displaying a sense of trepidation toward Scrooge.\")##\n(\"entity\"<|>\"Tiny Tim\"<|>\"person\"<|>\"Tiny Tim is Bob Cratchit's son, who represents innocence and the impact of Scrooge's change of heart.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is the central holiday during which the events unfold, leading to significant character transformations and themes of generosity.\")##\n(\"entity\"<|>\"City\"<|>\"geo\"<|>\"The City refers to a generic location embodying the community and societal norms, with which Scrooge has a complex relationship.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Fred\"<|>\"Scrooge surprises Fred with his unexpected visit, indicating a shift in their familial relationship and Scrooge's attitude toward family.\"<|>\"family, transformation\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bob Cratchit\"<|>\"Scrooge reprimands Bob for arriving late, but ultimately decides to raise his salary, signaling a significant change in their employer-employee relationship.\"<|>\"employment, generosity\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Tiny Tim\"<|>\"Scrooge becomes a father figure to Tiny Tim, showing a deep commitment to his well-being after realizing the importance of family and caring for others.\"<|>\"transformation, mentorship\"<|>9)##\n(\"relationship\"<|>\"Bob Cratchit\"<|>\"Tiny Tim\"<|>\"Bob Cratchit is Tiny Tim's father, and their relationship highlights the theme of family struggle and love, especially during Christmas.\"<|>\"family, struggle\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Christmas\"<|>\"Scrooge's transformation is centered around the spirit of Christmas, which he learns to embrace and celebrate.\"<|>\"transformation, holiday spirit\"<|>10)##\n(\"content_keywords\"<|>\"transformation, family, generosity, Christmas, kindness\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "124515245e07a8287850be12cf0c6fa8": {
+ "return": "(\"entity\"<|>\"The Man\"<|>\"person\"<|>\"The Man is a character who brings Christmas toys and presents, representing the spirit of giving associated with the holiday season.\")##\n(\"entity\"<|>\"Christmas Toys and Presents\"<|>\"event\"<|>\"Christmas Toys and Presents are associated with joy and celebration, marking the festive occasion and the love shared among family.\")##\n(\"entity\"<|>\"The Boisterous Group\"<|>\"organization\"<|>\"The Boisterous Group consists of children engaged in playful and lively activities, embodying the joy and chaos of familial festivities.\")##\n(\"entity\"<|>\"The Defenceless Porter\"<|>\"person\"<|>\"The Defenceless Porter is a character who is humorously overrun by children excited for the Christmas celebrations and gifts.\")##\n(\"content_keywords\"<|>\"joy, chaos, holiday spirit, reflection on past choices\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "da9373c0b0c596fde597d43a3eab0185": {
+ "return": "(\"entity\"<|>\"Topper\"<|>\"person\"<|>\"Topper is a guest at the Christmas party, representing the jovial and festive spirit of the gathering with Fred and Scrooge.\")##\n(\"entity\"<|>\"The Party\"<|>\"event\"<|>\"The Party is a festive gathering that celebrates Christmas, filled with games and joyful unity among friends and family.\")##\n(\"entity\"<|>\"The Tank\"<|>\"location\"<|>\"The Tank refers to Bob Cratchit's work area, where he encounters Scrooge and faces the consequences of arriving late.\")\n(\"entity\"<|>\"Christmas Bowl of Smoking Bishop\"<|>\"event\"<|>\"The Christmas Bowl of Smoking Bishop refers to a traditional holiday drink that Scrooge invites Bob Cratchit to share, symbolizing camaraderie and celebration.\")##\n(\"content_keywords\"<|>\"festive spirit, unity, workplace dynamics, holiday traditions\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "b6d15714a948777d8db0132513bccee9": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who experiences deep emotions and reflections on his life, including grief and regret for what he has lost.\")##\n(\"entity\"<|>\"Belle\"<|>\"person\"<|>\"Belle is Scrooge's former love whose presence evokes strong feelings of longing and sorrow in Scrooge.\")##\n(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is Scrooge's deceased business partner who plays a significant role in introducing the spirits to Scrooge.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"event\"<|>\"The Spirit refers to the supernatural influences that visit Scrooge, particularly the second of the Three Spirits.\")##\n(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is depicted as a time of joy and generosity, contrasting sharply with Scrooge's past behavior and current solitude.\")##\n(\"entity\"<|>\"The Bedroom\"<|>\"geo\"<|>\"The Bedroom is the personal space of Scrooge where he undergoes significant emotional transitions and encounters the spirits.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Belle\"<|>\"Scrooge reflects on his lost relationship with Belle, highlighting his feelings of regret and emotional pain.\" <|>\"lost love, regret\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Jacob Marley is the spirit who brings forth the need for Scrooge to change his ways, impacting his character development.\"<|>\"catalyst for change, supernatural guidance\"<|>9)##\n(\"relationship\"<|>\"The Spirit\"<|>\"Scrooge\"<|>\"The Spirit represents an upcoming change in Scrooge's life, aiming to guide him towards redemption.\"<|>\"supernatural intervention, guidance\"<|>9)##\n(\"relationship\"<|>\"Christmas\"<|>\"Scrooge\"<|>\"Christmas serves as a focal point for Scrooge's emotional reflections on joy and his disconnect from societal celebrations.\"<|>\"seasonal significance, emotional reflection\"<|>7)##\n(\"relationship\"<|>\"The Bedroom\"<|>\"The Spirit\"<|>\"The Spirit visits Scrooge in his bedroom, representing a profound moment of transformation and introspection for Scrooge.\"<|>\"introspection, transformation\"<|>8)##\n(\"content_keywords\"<|>\"redemption, loss, supernatural influences, emotional transformation\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "8d71e0c5ec953f8d4d22525f4be94532": {
+ "return": "(\"entity\"<|>\"Scrooge\"<|>\"person\"<|>\"Scrooge is a character who reflects on his life and interactions, particularly regarding his partner Jacob and the consequences of his past actions.\")##\n(\"entity\"<|>\"Jacob\"<|>\"person\"<|>\"Jacob is referenced as Scrooge's old partner, significant for his recent death and the impact of this event on Scrooge's reflections.\")##\n(\"entity\"<|>\"Christmas-time\"<|>\"event\"<|>\"Christmas-time serves as a backdrop for the narrative, emphasizing themes of reflection, mortality, and social interactions.\")##\n(\"entity\"<|>\"The Phantom\"<|>\"organization\"<|>\"The Phantom represents the supernatural element in Scrooge's journey of self-reflection and moral awakening.\")##\n(\"entity\"<|>\"Businessmen\"<|>\"organization\"<|>\"The businessmen are characterized as wealthy and important individuals in Scrooge's past, highlighting themes of social status and superficiality in interactions.\")##\n(\"entity\"<|>\"The Obscure Part of Town\"<|>\"geo\"<|>\"The Obscure Part of Town is a location described as having a bad reputation, symbolizing the darker aspects of society that Scrooge must confront.\")##\n(\"entity\"<|>\"The Low-Browed Shop\"<|>\"organization\"<|>\"The Low-Browed Shop is a setting that symbolizes neglect and degradation, filled with discarded items and representing societal decay.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob\"<|>\"Scrooge reflects on his past relationship with Jacob, influenced by feelings of guilt and loss after Jacob's death.\"<|>\"reflection, past relationship\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Phantom\"<|>\"Scrooge interacts with the Phantom, which guides him through reflections on his life and future, emphasizing his journey of change.\"<|>\"guidance, self-reflection\"<|>9)##\n(\"relationship\"<|>\"The Phantom\"<|>\"Christmas-time\"<|>\"The Phantom's visits coincide with Christmas time, a time symbolizing reflection, redemption, and moral awakening.\"<|>\"supernatural influence, time of reflection\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Businessmen\"<|>\"Scrooge's past interactions with the businessmen are superficial, reflecting his focus on wealth and status rather than meaningful connections.\"<|>\"social interactions, superficiality\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Obscure Part of Town\"<|>\"Scrooge's journey leads him to the Obscure Part of Town, representing his confrontation with the darker aspects of life and society.\"<|>\"confrontation, societal reflection\"<|>9)##\n(\"relationship\"<|>\"The Low-Browed Shop\"<|>\"The Obscure Part of Town\"<|>\"The Low-Browed Shop is located in The Obscure Part of Town, symbolizing the decay and neglect found in that area.\"<|>\"symbolism, decay\"<|>6)##\n(\"content_keywords\"<|>\"reflection, redemption, societal decay, Christmas-time\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e7695338ca043e0be110d22ebc4020b7": {
+ "return": "(\"entity\"<|>\"Internal Revenue Service\"<|>\"organization\"<|>\"The Internal Revenue Service (IRS) is the U.S. government agency responsible for tax collection and tax law enforcement, granting tax-exempt status to organizations like the Project Gutenberg Literary Archive Foundation.\")##\n(\"entity\"<|>\"Mississippi\"<|>\"geo\"<|>\"Mississippi is the state under whose laws the Project Gutenberg Literary Archive Foundation is organized, relevant to its legal status.\")##\n(\"entity\"<|>\"Contributions\"<|>\"event\"<|>\"Contributions refer to the donations made to the Project Gutenberg Literary Archive Foundation, vital for its operational funding and tax-exempt status.\")##\n(\"entity\"<|>\"Donations\"<|>\"event\"<|>\"Donations represent the financial support provided to the Project Gutenberg Literary Archive Foundation, essential for carrying out its mission.\")##\n(\"entity\"<|>\"U.S. Federal Laws\"<|>\"concept\"<|>\"U.S. federal laws set the framework and conditions under which non-profit organizations like the Project Gutenberg Literary Archive Foundation operate and maintain tax-exempt status.\")##\n(\"entity\"<|>\"Website\"<|>\"concept\"<|>\"The Project Gutenberg website serves as the central platform for accessing eBooks, making donations, and providing information about the Foundation.\")##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Internal Revenue Service\"<|>\"The Foundation's tax-exempt status is granted by the IRS, creating a regulatory relationship.\"<|>\"tax exemption, regulatory\"<|>8)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Mississippi\"<|>\"The Foundation is organized under the laws of Mississippi, establishing its legal framework.\"<|>\"legal framework, organizational base\"<|>7)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Contributions\"<|>\"Contributions are essential for the operational viability of the Foundation, directly supporting its mission.\"<|>\"funding, operational sustainability\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Donations\"<|>\"Donations are critical financial support for the Foundation, enabling its initiatives and projects.\"<|>\"financial support, project funding\"<|>9)##\n(\"relationship\"<|>\"Project Gutenberg Literary Archive Foundation\"<|>\"Website\"<|>\"The Foundation uses its website to facilitate donations and disseminate information about its services and eBooks.\"<|>\"information dissemination, online presence\"<|>8)##\n(\"content_keywords\"<|>\"non-profit, tax exemption, contributions, financial support, online access\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "9d0523eeca12adfe2cad7e11da5946e7": {
+ "return": "(\"entity\"<|>\"U.S. Copyright Law\"<|>\"concept\"<|>\"U.S. Copyright Law refers to the legal framework governing copyright protection in the United States, which impacts the distribution and usage of literary works and other creative content.\")##\n(\"entity\"<|>\"Electronic Work\"<|>\"concept\"<|>\"An Electronic Work refers to the digital version of text or literature that is provided by Project Gutenberg, which may be freely used under certain conditions.\")##\n(\"entity\"<|>\"Users\"<|>\"person\"<|>\"Users refer to individuals accessing, viewing, copying, or distributing Project Gutenberg works, subject to the terms of the Project Gutenberg License.\")##\n(\"entity\"<|>\"Royalty Fees\"<|>\"concept\"<|>\"Royalty Fees are payments due to the owner of the Project Gutenberg trademark based on the profits derived from the distribution or use of Project Gutenberg works.\")##\n(\"entity\"<|>\"Permission\"<|>\"concept\"<|>\"Permission is the consent required to use or distribute Project Gutenberg works under terms that differ from the established Project Gutenberg License.\")##\n(\"relationship\"<|>\"Users\"<|>\"U.S. Copyright Law\"<|>\"Users must check U.S. Copyright Law to ensure that their usage of Project Gutenberg works complies with legal requirements, especially when outside the United States.\"<|>\"compliance, legal requirements\"<|>6)##\n(\"relationship\"<|>\"Project Gutenberg License\"<|>\"Royalty Fees\"<|>\"The Project Gutenberg License stipulates the conditions under which royalty fees must be paid when using Project Gutenberg works for profit.\"<|>\"financial obligations, licensing terms\"<|>7)##\n(\"relationship\"<|>\"Users\"<|>\"Electronic Work\"<|>\"Users interact with Electronic Works provided by Project Gutenberg according to the terms outlined in the Project Gutenberg License.\"<|>\"access, digital interactions\"<|>8)##\n(\"relationship\"<|>\"Permission\"<|>\"Project Gutenberg License\"<|>\"Permission must be obtained if an individual wishes to use Project Gutenberg works under different terms than those specified in the Project Gutenberg License.\"<|>\"usage terms, licensing\"<|>9)##\n(\"content_keywords\"<|>\"copyright compliance, user access, distribution rights, digital works\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "9028cc06f39a83b4843e700e601a1553": {
+ "return": "(\"entity\"<|>\"Marley\"<|>\"person\"<|>\"Marley is a character who, although only mentioned, has had a significant influence on Scrooge's life and perspective, representing his past connections.\")##\n(\"entity\"<|>\"Turkeys\"<|>\"organization\"<|>\"Turkeys are described as part of a lavish spread that symbolizes the abundance and joy of the Christmas season.\")##\n(\"entity\"<|>\"Geese\"<|>\"organization\"<|>\"Geese are included in the festive feast, representing the bounty and celebration associated with Christmas.\")##\n(\"entity\"<|>\"Game\"<|>\"organization\"<|>\"Game refers to meat served during the Christmas season, signifying abundance and celebration.\")##\n(\"entity\"<|>\"Brawn\"<|>\"organization\"<|>\"Brawn is a type of meat included in the festive gathering, representing the richness of the Christmas feast.\")##\n(\"entity\"<|>\"Sucking-pigs\"<|>\"organization\"<|>\"Sucking-pigs represent a traditional dish during Christmas, showcasing the festive abundance.\")##\n(\"entity\"<|>\"Sausages\"<|>\"organization\"<|>\"Sausages contribute to the rich variety of foods present at the celebration, reflecting festive abundance.\")##\n(\"entity\"<|>\"Mince-pies\"<|>\"organization\"<|>\"Mince-pies are a traditional Christmas dessert, symbolizing the sweet and festive aspects of the holiday season.\")##\n(\"entity\"<|>\"Plum-puddings\"<|>\"organization\"<|>\"Plum-puddings are another festive dessert, symbolizing celebration and joy during Christmas.\")##\n(\"entity\"<|>\"Barrels of oysters\"<|>\"organization\"<|>\"Barrels of oysters symbolize luxury and indulgence in the holiday feast.\")##\n(\"entity\"<|>\"Red-hot chestnuts\"<|> \"organization\"<|>\"Red-hot chestnuts are a warm snack commonly enjoyed during the Christmas season, adding to the festive atmosphere.\")##\n(\"entity\"<|>\"Cherry-cheeked apples\"<|>\"organization\"<|>\"Cherry-cheeked apples add to the feast's festive decoration and represent the sweetness of the season.\")##\n(\"entity\"<|>\"Juicy oranges\"<|>\"organization\"<|>\"Juicy oranges symbolize freshness and abundance, often associated with Christmas festivities.\")##\n(\"entity\"<|>\"Luscious pears\"<|>\"organization\"<|>\"Luscious pears contribute to the variety of festive foods, symbolizing richness during the holiday season.\")##\n(\"entity\"<|>\"Immense twelfth-cakes\"<|>\"organization\"<|>\"Immense twelfth-cakes represent a traditional centerpiece of the Christmas feast, symbolizing celebration.\")##\n(\"entity\"<|>\"Seething bowls of punch\"<|>\"organization\"<|>\"Seething bowls of punch are indicative of the festive beverages enjoyed during Christmas, adding to the celebratory atmosphere.\")##\n(\"entity\"<|>\"Spirit\"<|>\"event\"<|>\"The Spirit represents the embodiment of the Christmas present, personifying the themes of generosity and merriment.\")##\n(\"relationship\"<|>\"Scrooge\"<|>\"Marley\"<|>\"Marley’s influence and past relationship with Scrooge is indicated, suggesting a connection that impacts Scrooge's character growth.\"<|>\"past influence, personal history\"<|>6)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Turkeys\"<|>\"The Ghost highlights the presence of Turkeys at the feast, emphasizing the spirit of generosity during Christmas.\"<|>\"feasting, abundance\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Geese\"<|>\"The inclusion of Geese in the holiday spread reinforces the theme of abundance and joy during Christmas.\"<|>\"celebration, abundance\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Game\"<|>\"The Ghost showcases Game as part of the feast, emphasizing the wealth of food associated with Christmas.\"<|>\"feasting, abundance\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Brawn\"<|>\"The presence of Brawn at the feast illustrates the richness and variety of the holiday season.\"<|>\"celebration, abundance\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Sucking-pigs\"<|>\"Sucking-pigs represent the extravagant and joyous food available during the Christmas season.\"<|>\"feasting, celebration\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Sausages\"<|>\"Sausages contribute to the lavish spread, symbolizing the abundance of the Christmas feast.\"<|>\"celebration, abundance\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Mince-pies\"<|>\"Mince-pies are presented by the Ghost, symbolizing the sweetness and joy of Christmas feasting.\"<|>\"festive spirit, sweet traditions\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Plum-puddings\"<|>\"Plum-puddings highlight traditional festive desserts, showcasing the richness of the holiday spirit.\"<|>\"celebration, sweetness\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Barrels of oysters\"<|>\"Barrels of oysters symbolize indulgence and luxury, enhancing the festive atmosphere presented by the Spirit.\"<|>\"luxury, feasting\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Red-hot chestnuts\"<|>\"Red-hot chestnuts are part of traditional Christmas snacks, enhancing the celebratory mood of the feast.\"<|>\"warmth, festive atmosphere\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Cherry-cheeked apples\"<|>\"Cherry-cheeked apples add to the feast's vibrancy, contributing to the festive decorations and spirit.\"<|>\"festive spirit, abundance\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Juicy oranges\"<|>\"Juicy oranges symbolize freshness in the extravagant Christmas spread demonstrated by the Ghost.\"<|>\"celebration, freshness\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Luscious pears\"<|>\"Luscious pears contribute to the rich variety of fruits in the festive spread, symbolizing abundance.\"<|>\"feasting, abundance\"<|>7)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Immense twelfth-cakes\"<|>\"Immense twelfth-cakes represent the grandeur of Christmas celebrations that the Ghost highlights.\"<|>\"celebration, festivity\"<|>9)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Seething bowls of punch\"<|>\"Seething bowls of punch contribute to the warm and inviting atmosphere of the Christmas feast showcased by the Spirit.\"<|>\"beverage, festive atmosphere\"<|>8)##\n(\"relationship\"<|>\"Ghost of Christmas Present\"<|>\"Spirit\"<|>\"The Ghost embodies the essence of Christmas present, revealing the themes of joy and abundance to Scrooge.\"<|>\"mentoring, generosity\"<|>10)##\n<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "90bdac63539b82902ce063b858cf465d": {
+ "return": "(\"entity\"<|>\"The Father\"<|>\"person\"<|>\"The Father is a character who arrives home with Christmas presents, symbolizing familial joy and festive spirit.\")##\n(\"entity\"<|>\"The Children\"<|>\"person\"<|>\"The Children refer to the group of young individuals filled with wonder and delight during the Christmas festivities.\")##\n(\"entity\"<|>\"The Porter\"<|>\"person\"<|>\"The Porter is a character who becomes the target of playful affection from the children, showcasing innocence and chaos during celebrations.\")##\n(\"entity\"<|>\"The House\"<|>\"geo\"<|>\"The House is the setting for the Christmas celebration and interactions among the characters, embodying warmth and family togetherness.\")##\n(\"entity\"<|>\"The Mother\"<|>\"person\"<|>\"The Mother is a character associated with the family dynamics during the Christmas scene, though her actions are not explicitly detailed.\")##\n(\"entity\"<|>\"The Doll's Frying Pan\"<|>\"event\"<|>\"The Doll's Frying Pan is part of a humorous incident involving the baby, which reflects the playful and chaotic ambiance of childhood.\")##\n(\"entity\"<|>\"The Ghost\"<|>\"event\"<|>\"The Ghost refers specifically to the supernatural presence guiding Scrooge through his reflections and past experiences.\")##\n(\"relationship\"<|>\"The Father\"<|>\"The Children\"<|>\"The Father engages with the children, highlighting familial bonds and the excitement of Christmas celebrations.\"<|>\"familial joy, celebration\"<|>8)##\n(\"relationship\"<|>\"The Porter\"<|>\"The Children\"<|>\"The Children playfully interact with the Porter, creating a scene of joyous chaos and affection.\"<|>\"playfulness, joy\"<|>9)##\n(\"relationship\"<|>\"The House\"<|>\"The Children\"<|>\"The House serves as the setting for the children's joyous activities and emotional expressions during the Christmas event.\"<|>\"setting, celebration\"<|>7)##\n(\"relationship\"<|>\"The Mother\"<|>\"The Father\"<|>\"The Mother and Father are depicted together, emphasizing family dynamics during the festive occasion.\"<|>\"family dynamics, togetherness\"<|>6)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Ghost\"<|>\"Scrooge's engagement with The Ghost prompts a significant emotional and psychological transformation as he confronts his past.\"<|>\"self-reflection, transformation\"<|>9)##\n(\"relationship\"<|>\"The Ghost\"<|>\"The Spirit\"<|>\"The Ghost acts as a precursor to The Spirit, both influencing Scrooge's journey of enlightenment and redemption.\"<|>\"supernatural influence, guidance\"<|>8)##\n(\"content_keywords\"<|>\"family dynamics, Christmas celebration, children's joy, supernatural guidance\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e6e2db88f809aa18ddd99f5536d1412d": {
+ "return": "(\"entity\"<|>\"Christmas\"<|>\"event\"<|>\"Christmas is a festive time celebrated by the Cratchit family, symbolizing joy, family togetherness, and generosity.\")##\n(\"entity\"<|>\"Ghost\"<|>\"person\"<|>\"The Ghost is a supernatural entity that provides guidance and reflection on Scrooge's past, representing moral and societal themes.\")##\n(\"entity\"<|>\"Cratchit Dinner\"<|>\"event\"<|>\"The Cratchit Dinner is a gathering of the Cratchit family for their Christmas feast, embodying tradition and familial love.\")##\n(\"entity\"<|>\"Sage and Onion\"<|>\"technology\"<|>\"Sage and Onion is an ingredient used in the Cratchit family's Christmas dinner, symbolizing traditional cooking and festive meals.\")##\n(\"entity\"<|>\"Chimney Corner\"<|>\"geo\"<|>\"The Chimney Corner refers to a specific location in the Cratchit household associated with warmth and family gatherings during the holiday season.\")##\n(\"entity\"<|>\"Surplus Population\"<|>\"concept\"<|>\"Surplus Population is a concept referenced in Scrooge's reflection on societal values and the consequences of neglect, particularly concerning the vulnerable.\")##\n(\"entity\"<|>\"Feast\"<|>\"event\"<|>\"The Feast refers to the Christmas celebration involving food and family, symbolizing gratitude and festive traditions.\")##\n(\"entity\"<|>\"Handle of Knife\"<|>\"object\"<|>\"The Handle of Knife used by Tiny Tim during Christmas dinner signifies the child's involvement and excitement in the festive celebrations.\")##\n(\"entity\"<|>\"Plates\"<|>\"object\"<|>\"The Plates represent the meal served and the act of sharing food, highlighting the importance of gathering around the table as a family.\")##\n(\"entity\"<|>\"Hearth\"<|>\"geo\"<|>\"The Hearth represents the heart of the home where the family gathers, symbolizing warmth, comfort, and family ties.\")##\n(\"entity\"<|>\"Apple Sauce\"<|>\"object\"<|>\"Apple Sauce is a dish served with the Cratchit family's Christmas dinner, contributing to the festive meal.\")##\n(\"entity\"<|>\"Mashed Potatoes\"<|>\"object\"<|>\"Mashed Potatoes are another dish served during the Cratchit family's Christmas dinner, showcasing the simplicity and warmth of their celebration.\")## \n(\"content_keywords\"<|>\"family tradition, holiday celebration, social commentary, compassion for the needy\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e8398bd91d13a7d06df89dfac2fee7fd": {
+ "return": "(\"entity\"<|>\"Project Gutenberg™ trademark\"<|>\"organization\"<|>\"The Project Gutenberg™ trademark is the intellectual property associated with Project Gutenberg™, which signifies its dedication to the free distribution of electronic works.\")##\n(\"entity\"<|>\"EIN 64-6221541\"<|>\"organization\"<|>\"The EIN 64-6221541 is the federal tax identification number for the Project Gutenberg Literary Archive Foundation, which identifies it for tax purposes.\")##\n(\"entity\"<|>\"Defects\"<|>\"concept\"<|>\"Defects refer to potential issues that may arise within Project Gutenberg™ electronic works, including incomplete or corrupt data and other errors.\")##\n(\"entity\"<|>\"limited warranty\"<|>\"concept\"<|>\"Limited warranty refers to the disclaimer provided by the Project Gutenberg Literary Archive Foundation regarding the condition and reliability of electronic works provided.\")##\n(\"entity\"<|>\"Right of Replacement or Refund\"<|>\"concept\"<|>\"The Right of Replacement or Refund is a provision allowing users to request a refund or replacement for defective Project Gutenberg™ works within a specified timeframe.\")##\n(\"entity\"<|>\"Section 3 and 4\"<|>\"event\"<|>\"Sections 3 and 4 of the document provide information on how contributions and support can help Project Gutenberg™ continue its mission.\")##\n(\"entity\"<|>\"volunteers\"<|>\"person\"<|>\"Volunteers play a crucial role in the operation of Project Gutenberg™, contributing to copyright research, transcription, and proofreading of works to ensure quality and availability.\")##\n(\"entity\"<|>\"donations\"<|>\"concept\"<|>\"Donations are financial contributions that provide essential support to Project Gutenberg™, facilitating the efforts of volunteers and the overall project.\")##\n(\"entity\"<|>\"copyright research\"<|>\"event\"<|>\"Copyright research refers to the process undertaken by Project Gutenberg™ volunteers to ensure that works are not protected by U.S. copyright law before distribution.\")##\n(\"entity\"<|>\"U.S. copyright law\"<|>\"concept\"<|>\"U.S. copyright law governs the protection of intellectual property in the United States, impacting how works can be distributed through Project Gutenberg™.\")##\n(\"entity\"<|>\"legal fees\"<|>\"concept\"<|>\"Legal fees refer to costs associated with legal actions or advice that may arise in the context of using or distributing Project Gutenberg™ works.\")##\n\n##",
+ "model": "gpt-4o-mini"
+ },
+ "b007fc600f0390dc5b16c9aa362f2fbc": {
+ "return": "(\"entity\"<|>\"Holly\"<|>\"geo\"<|>\"Holly is a plant associated with Christmas, mentioned among other decorations that contribute to the festive atmosphere.\")##\n(\"entity\"<|>\"Mistletoe\"<|>\"geo\"<|>\"Mistletoe is a traditional plant used for decoration during Christmas, symbolizing joy and a festive spirit.\")##\n(\"entity\"<|>\"Turkeys\"<|>\"geo\"<|>\"Turkeys represent a common food item associated with the Christmas feast, highlighting the culinary aspects of the celebration.\")##\n(\"entity\"<|>\"Geese\"<|>\"geo\"<|>\"Geese are another traditional food item during Christmas, contributing to the festive dining experiences.\")##\n(\"entity\"<|>\"Poultry\"<|>\"geo\"<|>\"Poultry refers to various birds used for Christmas meals, representing the culinary traditions of the holiday.\")##\n(\"entity\"<|>\"Chestnuts\"<|>\"geo\"<|>\"Chestnuts are a festive food mentioned among the goods sold during Christmas, adding to the holiday's sensory experience.\")##\n(\"entity\"<|>\"Spanish Onions\"<|>\"geo\"<|>\"Spanish Onions are a vegetable featured in the festive markets, enhancing the richness of Christmas cooking.\")##\n(\"entity\"<|>\"Apples\"<|>\"geo\"<|>\"Apples are a seasonal fruit displayed prominently during Christmas markets, symbolizing traditional holiday fare.\")##\n(\"entity\"<|>\"Pears\"<|>\"geo\"<|>\"Pears are another fruit contributing to the vibrant offerings in markets during the Christmas season.\")##\n(\"entity\"<|>\"Grapes\"<|>\"geo\"<|>\"Grapes are included in the festive market displays, representing holiday indulgence.\")##\n(\"entity\"<|>\"Filberts\"<|>\"geo\"<|>\"Filberts are nuts that evoke nostalgia and contribute to the traditional foods associated with Christmas.\")##\n(\"entity\"<|>\"Norfolk Biffins\"<|>\"geo\"<|>\"Norfolk Biffins are a type of apple referenced in the context of Christmas fruit offerings.\")##\n(\"entity\"<|>\"Figs\"<|>\"geo\"<|>\"Figs are sweet fruits that are particularly mentioned as part of the festive market offerings during Christmas.\")##\n(\"entity\"<|>\"French Plums\"<|>\"geo\"<|>\"French Plums are fruits showcased in the Christmas markets, adding to the eclectic array of festive produce.\")##\n\n## \n(\"relationship\"<|>\"Christmas\"<|>\"Holly\"<|>\"Holly contributes to the festive decorations associated with the Christmas celebration.\"<|>\"festive decorations, holiday spirit\"<|>8)##\n(\"relationship\"<|>\"Christmas\"<|>\"Mistletoe\"<|>\"Mistletoe is a traditional decoration that enhances the joy and atmosphere of Christmas.\"<|>\"festive symbols, joy\"<|>7)##\n(\"relationship\"<|>\"Christmas\"<|>\"Turkeys\"<|>\"Turkeys are a traditional dish served during Christmas, contributing to the celebratory meal.\"<|>\"culinary tradition, celebration\"<|>9)##\n(\"relationship\"<|>\"Christmas\"<|>\"Geese\"<|>\"Geese are commonly served at Christmas dinners, forming part of the holiday feast.\"<|>\"culinary tradition, celebration\"<|>9)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Holly\"<|>\"The Grocers sell Holly and other festive items, enhancing the Christmas shopping experience.\"<|>\"market offerings, festive supplies\"<|>8)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Mistletoe\"<|>\"The Grocers provide Mistletoe among their offerings, reflecting the holiday spirit.\"<|>\"market offerings, holiday spirit\"<|>8)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Turkeys\"<|>\"The Grocers supply Turkeys for Christmas dinners, contributing to the festive meal preparations.\"<|>\"holiday meals, supply chain\"<|>9)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Geese\"<|>\"The Grocers help provide Geese for the Christmas feast, supporting culinary traditions.\"<|>\"holiday meals, supply chain\"<|>9)##\n(\"relationship\"<|>\"Christmas\"<|>\"Apples\"<|>\"Apples are featured in markets during Christmas, indicating traditional holiday fare.\"<|>\"seasonal offerings, tradition\"<|>6)##\n(\"relationship\"<|>\"Christmas\"<|>\"Pears\"<|>\"Pears are part of the fruits offered during the Christmas season, enhancing festive markets.\"<|>\"seasonal offerings, tradition\"<|>6)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Apples\"<|>\"The Grocers offer Apples as part of their Christmas produce, reflecting seasonal traditions.\"<|>\"market offerings, seasonal tradition\"<|>7)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Pears\"<|>\"The Grocers supply Pears during Christmas, contributing to the holiday's culinary delights.\"<|>\"market offerings, festive spirit\"<|>7)##\n(\"relationship\"<|>\"Christmas\"<|>\"Chestnuts\"<|>\"Chestnuts are sold during Christmas, contributing to the sensory experience of the holiday.\"<|>\"festive foods, sensory experiences\"<|>8)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Chestnuts\"<|>\"The Grocers feature Chestnuts in their market offerings during Christmas, enhancing the festive atmosphere.\"<|>\"market offerings, festive spirit\"<|>8)##\n(\"relationship\"<|>\"Christmas\"<|>\"Spanish Onions\"<|>\"Spanish Onions are part of the seasonal offerings that contribute to the culinary landscape of Christmas.\"<|>\"culinary tradition, seasonal offerings\"<|>7)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Spanish Onions\"<|>\"The Grocers provide Spanish Onions, enriching the food selections during Christmas.\"<|>\"market offerings, food supply\"<|>7)##\n(\"relationship\"<|>\"Christmas\"<|>\"Figs\"<|>\"Figs are highlighted among the festive foods offered during Christmas, enhancing markets.\"<|>\"seasonal offerings, festive foods\"<|>6)##\n(\"relationship\"<|>\"The Grocers\"<|>\"Figs\"<|>\"The Grocers feature Figs in the Christmas markets, capturing the essence of holiday indulgence.\"<|>\"market offerings, seasonal treats\"<|>7)##\n(\"relationship\"<|>\"Christmas\"<|>\"French Plums\"<|>\"French Plums are included in the festive offerings, creating a rich holiday atmosphere.\"<|>\"seasonal offerings, festive foods\"<|>6)##\n(\"relationship\"<|>\"The Grocers\"<|>\"French Plums\"<|>\"The Grocers sell French Plums during Christmas, showcasing holiday produce.\"<|>\"market offerings, culinary delights\"<|>7)##\n(\"content_keywords\"<|>\"Christmas, festive decorations, culinary traditions, seasonal offerings\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "cb2a450472979815fb1376af3c2d6d61": {
+ "return": "(\"entity\"<|>\"Gentleman with Excrescence\"<|>\"person\"<|>\"The Gentleman with Excrescence is characterized by a distinctive physical feature and expresses opinions about a funeral with a humorous outlook.\")##\n(\"entity\"<|>\"Large Chin Man\"<|>\"person\"<|>\"The Large Chin Man is a character who displays disinterest and engages in conversation about the deceased, revealing a lack of genuine concern.\")##\n(\"entity\"<|>\"Cold Weather\"<|>\"event\"<|>\"Cold Weather is noted in the conversation between the businessmen, serving as a backdrop to their trivial interactions.\")##\n(\"entity\"<|>\"The Party\"<|>\"event\"<|>\"The Party refers to the potential gathering for the funeral, which the characters consider with amusement rather than sincerity.\")##\n(\"entity\"<|>\"Rich Men\"<|>\"organization\"<|>\"Rich Men are depicted as individuals of considerable wealth and status, reflecting the social hierarchy and indifference towards the less fortunate.\")##\n(\"entity\"<|>\"Street\"<|>\"geo\"<|>\"The Street refers to the location where Scrooge observes the businessmen, symbolizing the busy yet impersonal urban environment.\")##\n(\"entity\"<|>\"Multitudes\"<|>\"concept\"<|>\"Multitudes refer to the crowd in the city, illustrating the anonymity and disconnect among people in urban life.\")##\n(\"entity\"<|>\"Shadow of Himself\"<|>\"concept\"<|>\"Shadow of Himself represents Scrooge's introspection and anticipation of his future actions based on his reflections.\")##\n(\"entity\"<|>\"Bad Repute\"<|>\"concept\"<|>\"Bad Repute refers to the negative perception attached to certain areas of the town, underscoring themes of moral decay and neglect.\")##\n(\"entity\"<|>\"Frouzy Curtaining\"<|>\"concept\"<|>\"Frouzy Curtaining signifies the poor living conditions and the sense of disarray found in the low-browed shop and its surroundings.\")##\n(\"entity\"<|>\"Rusty Keys, Nails, Chains, Hinges\"<|>\"concept\"<|>\"Rusty Keys, Nails, Chains, Hinges are symbolic items representing the remnants of past lives and the detritus of a forgotten society.\")##\n\n## \n(\"relationship\"<|>\"Gentleman with Excrescence\"<|>\"Large Chin Man\"<|>\"The Gentleman with Excrescence and the Large Chin Man engage in light-hearted banter about the deceased, showcasing their superficial camaraderie.\"<|>\"humor, social interaction\"<|>5)##\n(\"relationship\"<|>\"Gentleman with Excrescence\"<|>\"The Party\"<|>\"The Gentleman with Excrescence seeks a lunch provision for attending the party, indicating a lack of genuine interest in the funeral itself.\"<|>\"indifference, social obligation\"<|>6)##\n(\"relationship\"<|>\"Large Chin Man\"<|>\"Businessmen\"<|>\"The Large Chin Man interacts with the businessmen, demonstrating a shared triviality in their concerns and reinforcing social hierarchies.\"<|>\"social dynamics, triviality\"<|>5)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Street\"<|>\"Scrooge observes the Street's inhabitants and their interactions, reflecting on his own status and societal detachment.\"<|>\"observation, societal critique\"<|>7)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Multitudes\"<|>\"Scrooge's feelings of alienation are heightened as he witnesses the Multitudes, illustrating his inner turmoil and societal disconnect.\"<|>\"alienation, introspection\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Shadow of Himself\"<|>\"Scrooge hopes to learn from the Shadow of Himself, signifying his desire for personal growth and understanding of his life choices.\"<|>\"introspection, personal growth\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Bad Repute\"<|>\"Scrooge's experience in areas of Bad Repute forces him to confront the realities of society and his own role within it.\"<|>\"self-reflection, moral awakening\"<|>9)##\n(\"relationship\"<|>\"The Low-Browed Shop\"<|>\"Frouzy Curtaining\"<|>\"The Frouzy Curtaining serves as a descriptor of the Low-Browed Shop, symbolizing neglect and the harsh realities of the environment.\"<|>\"symbolism, decay\"<|>6)##\n(\"relationship\"<|>\"The Low-Browed Shop\"<|>\"Rusty Keys, Nails, Chains, Hinges\"<|>\"The Rusty Keys, Nails, Chains, and Hinges within the Low-Browed Shop highlight the decay and abandonment of valued items, metaphorically reflecting societal neglect.\"<|>\"symbolism, societal neglect\"<|>7)##\n<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "5b16ce1999bd217b54628b839f8038fe": {
+ "return": "(\"entity\"<|>\"Peter Cratchit\"<|>\"person\"<|>\"Peter Cratchit is a member of the Cratchit family who participates in expressing their unity and happiness.\")##\n(\"entity\"<|>\"Two Young Cratchits\"<|>\"person\"<|>\"The Two Young Cratchits symbolize the innocence and joy of childhood within the Cratchit family.\")##\n(\"entity\"<|>\"The Spirit\"<|>\"organization\"<|>\"The Spirit refers to an embodiment of the Christmas message that represents moral guidance and introspection in Scrooge's life.\")##\n(\"entity\"<|>\"The Grave\"<|>\"geo\"<|>\"The Grave symbolizes the ultimate consequence of Scrooge's actions and the life he leads, representing mortality.\")##\n(\"entity\"<|>\"Jacob Marley\"<|>\"person\"<|>\"Jacob Marley is a deceased character who symbolizes the consequences of a life led without compassion, influencing Scrooge's redemption.\")##\n(\"entity\"<|>\"The Past\"<|>\"geo\"<|>\"The Past is a concept that reflects Scrooge's memories and previous actions that influence his current state and future decisions.\")##\n(\"entity\"<|>\"The Present\"<|>\"geo\"<|>\"The Present represents the current moment in Scrooge's life that he must recognize and change to achieve redemption.\")##\n(\"relationship\"<|>\"Bob\"<|>\"Peter Cratchit\"<|>\"Bob expresses feelings of happiness to Peter, showcasing the close family ties and the importance of togetherness.\"<|>\"family ties, happiness\"<|>8)##\n(\"relationship\"<|>\"The Spirit\"<|>\"Scrooge\"<|>\"The Spirit represents moral lessons and urges Scrooge to change his ways for a better future.\"<|>\"moral guidance, transformation\"<|>10)##\n(\"relationship\"<|>\"Scrooge\"<|>\"Jacob Marley\"<|>\"Scrooge reflects on Jacob Marley’s past warnings, which serve as a catalyst for his transformation.\"<|>\"past influence, warning\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Grave\"<|>\"Scrooge’s need to confront his own grave signifies the urgency to change his life before it is too late.\"<|>\"mortality, urgency\"<|>8)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Past\"<|>\"Scrooge must acknowledge The Past to understand his current situation and guide his choices for the future.\"<|>\"self-reflection, choices\"<|>9)##\n(\"relationship\"<|>\"Scrooge\"<|>\"The Present\"<|>\"Scrooge emphasizes the importance of living fully in The Present to change his fate going forward.\"<|>\"change, awareness\"<|>9)##\n(\"content_keywords\"<|>\"family unity, moral lessons, introspection, life choices\")<|COMPLETE|>",
+ "model": "gpt-4o-mini"
+ },
+ "e66d39e87f590a51bd35dd2f7d69e462": {
+ "return": "Scrooge is a central character known for his miserly disposition, cold-hearted nature, and disdain for Christmas. He initially embodies greed, focusing solely on business and exhibiting a lack of compassion for others. Throughout the story, Scrooge undergoes a profound transformation as he reflects on his life and the impact of his past decisions, particularly through the visits of spirits, including the ghost of his former partner, Jacob Marley.\n\nScrooge is depicted as a solitary figure who experiences a journey of self-discovery and redemption. He grapples with feelings of horror and remorse as he witnesses events related to the deceased and contemplates his own mortality, illustrating his internal struggle. His reflections often lead him to recall his lonely childhood and the mistakes that have shaped his pessimistic worldview.\n\nThe encounters with the spirits guide Scrooge through different scenes, revealing both joy and hardship during the Christmas season. These transformative experiences prompt him to reconsider his relationships, especially in relation to the Cratchit family and Tiny Tim, evoking deep emotions such as grief, regret, and longing for connection. Scrooge's character evolves from a skeptical, apathetic man to someone who embraces generosity, compassion, and a desire to make amends.\n\nBy the end of the narrative, Scrooge's transformation is marked by a newfound joy and determination, as he seeks to contribute positively to the lives of those around him, ultimately reshaping his previous views on Christmas and community.",
+ "model": "gpt-4o-mini"
+ },
+ "a88aa22110316b0a82d181d878dfcadb": {
+ "return": "The story prominently features several key themes that resonate throughout its narrative. One of the most significant themes is **redemption**. The protagonist, Ebenezer Scrooge, undergoes a profound transformation as he is confronted with the consequences of his past actions, ultimately leading to his resolution to change and embrace a better, more compassionate life. This theme underscores the idea that it is never too late to alter one's path and seek forgiveness for past wrongs.\n\nAnother central theme is **the importance of kindness and generosity**. Scrooge’s initial miserliness contrasts sharply with the warmth and joy displayed by characters like the Cratchit family. Their humble celebration of Christmas, despite their struggles, highlights the value of familial love, community spirit, and the act of giving, which serves as a reminder that wealth is not measured solely in material possessions.\n\nThe theme of **time** is also critical, as the narrative explores the impact of the past, present, and future on individual lives. Scrooge's journey with the three spirits emphasizes how experiences shape one's character and choices, reinforcing the notion that the present moment is vital for making positive changes. Through his reflections on lost opportunities and potential futures, Scrooge learns that acknowledging time's lessons is essential for personal growth.\n\nLastly, **isolation versus community** is a significant theme. Scrooge's initial solitude reflects his disconnection from humanity, which is contrasted by the stronger bonds of friendship and familial ties depicted in the lives of those around him. This theme highlights the necessity of nurturing relationships and the adverse effects of alienation. Ultimately, the story advocates for the interconnectedness of people and the joy that comes from engaging with others.\n\nIn summary, the themes of redemption, kindness, the significance of time, and the contrast between isolation and community weave together to create a rich moral tapestry that encourages readers to embrace empathy and change.",
+ "model": "gpt-4o-mini"
+ },
+ "393cba6de95d05eff27a695ea77494f6": {
+ "return": "{\n \"high_level_keywords\": [\"Themes\", \"Story analysis\", \"Literary elements\"],\n \"low_level_keywords\": [\"Character development\", \"Conflict\", \"Symbolism\", \"Narrative structure\", \"Moral lessons\"]\n}",
+ "model": "gpt-4o-mini"
+ },
+ "a983995d27621826c2c34b3086cc20b3": {
+ "return": "The novella \"A Christmas Carol\" by Charles Dickens is rich with themes that resonate with readers and audiences across generations. Several key themes emerge prominently, each contributing to the moral fabric of the narrative.\n\n### Transformation and Redemption\n\nAt the heart of the story lies the theme of transformation, particularly as embodied by the character of Ebenezer Scrooge. Initially depicted as miserly and unyielding, Scrooge undergoes a profound change facilitated by the supernatural visits of the three spirits. This metamorphosis underscores the transformative power of reflection, compassion, and the spirit of giving typically embodied by the Christmas season. Scrooge’s journey from a cold-hearted miser to a kind and generous man illustrates the possibility of redemption, regardless of one's past.\n\n### The Consequences of Avarice\n\nAvarice, or excessive greed, is another central theme depicted through Scrooge's character. His initial cold-heartedness is a direct consequence of his avaricious nature; he focuses solely on material wealth and business dealings at the expense of relationships and joy. The narrative highlights the ultimate emptiness that such a life leads to, culminating in feelings of loneliness and despair. This theme serves as a cautionary tale about the dangers of prioritizing wealth over human connection and empathy.\n\n### The Importance of Family and Community\n\nFamily and community play crucial roles throughout the story, serving as a stark contrast to Scrooge’s lonely existence. The interactions of the Cratchit family during Christmas exemplify warmth, love, and togetherness. Through the lens of the Cratchits, especially Tiny Tim’s character, readers witness the importance of familial bonds and social responsibility. The emotional connections and support displayed in these relationships portray a sense of hope and joy that transcends material circumstances.\n\n### Mortality and Reflection\n\nMortality is a recurring theme that compels both Scrooge and the reader to contemplate the fragility of life. The visits from the spirits, particularly the Ghost of Christmas Yet to Come, force Scrooge to confront the reality of his actions and their ultimate consequences—his own death and legacy. The theme of mortality drives home the message that it is essential to live a life marked by compassion and kindness, as these legacy elements are what endure beyond one's physical existence.\n\n### The Supernatural and Symbolism\n\nThe influence of supernatural elements and symbolism plays a significant role in conveying the story's themes. The three spirits serve not only as guides to Scrooge’s past, present, and future but also symbolize the moral choices that individuals must confront. For example, the chains worn by Marley's Ghost symbolize the burdens of regret that accompany a life devoid of compassion. Darkness and light are depicted throughout the novella, representing the struggles between ignorance and knowledge, despair and hope.\n\n### Conclusion \n\nIn summary, \"A Christmas Carol\" encompasses profound themes that engage with the complexities of the human experience. Transformation and redemption, avarice, the significance of family and community, mortality, and the role of the supernatural come together to form a rich tapestry of moral lessons. These themes not only elevate the narrative but also invite readers to reflect on their own lives and relationships, making the novella a timeless classic in discussions of human values and societal responsibilities. The story illustrates how one's choices can create ripples of impact throughout their life and the lives of others.",
+ "model": "gpt-4o-mini"
+ }
+}
\ No newline at end of file
diff --git a/neo4jWorkDir/kv_store_text_chunks.json b/neo4jWorkDir/kv_store_text_chunks.json
new file mode 100644
index 00000000..f737052e
--- /dev/null
+++ b/neo4jWorkDir/kv_store_text_chunks.json
@@ -0,0 +1,254 @@
+{
+ "chunk-9e3921da66da5d761ab73cd849af6c43": {
+ "tokens": 1200,
+ "content": "The Project Gutenberg eBook of A Christmas Carol\n \nThis ebook is for the use of anyone anywhere in the United States and\nmost other parts of the world at no cost and with almost no restrictions\nwhatsoever. You may copy it, give it away or re-use it under the terms\nof the Project Gutenberg License included with this ebook or online\nat www.gutenberg.org. If you are not located in the United States,\nyou will have to check the laws of the country where you are located\nbefore using this eBook.\n\nTitle: A Christmas Carol\n\nAuthor: Charles Dickens\n\nIllustrator: Arthur Rackham\n\nRelease date: December 24, 2007 [eBook #24022]\n\nLanguage: English\n\nOriginal publication: Philadelphia and New York: J. B. Lippincott Company,, 1915\n\nCredits: Produced by Suzanne Shell, Janet Blenkinship and the Online\n Distributed Proofreading Team at http://www.pgdp.net\n\n\n*** START OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n\n\nProduced by Suzanne Shell, Janet Blenkinship and the Online\nDistributed Proofreading Team at http://www.pgdp.net\n\n\n\n\n\n\n\n\n\n\n\n A CHRISTMAS CAROL\n\n [Illustration: _\"How now?\" said Scrooge, caustic and cold as ever.\n \"What do you want with me?\"_]\n\n\n A CHRISTMAS CAROL\n\n [Illustration]\n\n BY\n\n CHARLES DICKENS\n\n [Illustration]\n\n ILLUSTRATED BY ARTHUR RACKHAM\n\n [Illustration]\n\n J. B. LIPPINCOTT COMPANY PHILADELPHIA AND NEW YORK\n\n FIRST PUBLISHED 1915\n\n REPRINTED 1923, 1927, 1932, 1933, 1934, 1935, 1947, 1948, 1952, 1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1972, 1973\n\n ISBN: 0-397-00033-2\n\n PRINTED IN GREAT BRITAIN\n\n\n\n\n PREFACE\n\n I have endeavoured in this Ghostly little book to raise the Ghost of an\n Idea which shall not put my readers out of humour with themselves, with\n each other, with the season, or with me. May it haunt their house\n pleasantly, and no one wish to lay it.\n\n Their faithful Friend and Servant,\n\n C. D.\n\n _December, 1843._\n\n\n\n\n CHARACTERS\n\n Bob Cratchit, clerk to Ebenezer Scrooge.\n Peter Cratchit, a son of the preceding.\n Tim Cratchit (\"Tiny Tim\"), a cripple, youngest son of Bob Cratchit.\n Mr. Fezziwig, a kind-hearted, jovial old merchant.\n Fred, Scrooge's nephew.\n Ghost of Christmas Past, a phantom showing things past.\n Ghost of Christmas Present, a spirit of a kind, generous,\n and hearty nature.\n Ghost of Christmas Yet to Come, an apparition showing the shadows\n of things which yet may happen.\n Ghost of Jacob Marley, a spectre of Scrooge's former partner in business.\n Joe, a marine-store dealer and receiver of stolen goods.\n Ebenezer Scrooge, a grasping, covetous old man, the surviving partner\n of the firm of Scrooge and Marley.\n Mr. Topper, a bachelor.\n Dick Wilkins, a fellow apprentice of Scrooge's.\n\n Belle, a comely matron, an old sweetheart of Scrooge's.\n Caroline, wife of one of Scrooge's debtors.\n Mrs. Cratchit, wife of Bob Cratchit.\n Belinda and Martha Cratchit, daughters of the preceding.\n\n Mrs. Dilber, a laundress.\n Fan, the sister of Scrooge.\n Mrs. Fezziwig, the worthy partner of Mr. Fezziwig.\n\n\n\n\n CONTENTS\n\n STAVE ONE--MARLEY'S GHOST 3\n STAVE TWO--THE FIRST OF THE THREE SPIRITS 37\n STAVE THREE--THE SECOND OF THE THREE SPIRITS 69\n STAVE FOUR--THE LAST OF THE SPIRITS 111\n STAVE FIVE--THE END OF IT 137\n\n\n LIST OF ILLUSTRATIONS\n\n _IN COLOUR_\n\n\n \"How now?\" said Scrooge, caustic\n and cold as ever. \"What do you\n want with me?\" _Frontispiece_\n\n Bob Cratchit went down a slide on\n Cornhill, at the end of a lane of\n boys, twenty times, in honour of\n its being Christmas Eve 16\n\n Nobody under the bed; nobody in\n the closet; nobody in his dressing-gown,\n which was hanging up\n in a suspicious attitude against\n the wall 20\n\n The air was filled with phantoms,\n wandering hither and thither in\n restless haste and moaning as\n they went 32\n\n Then old Fezziwig stood out to\n dance with Mrs. Fezziwig 54\n\n A flushed and boisterous group 62\n\n Laden with Christmas toys and\n presents 64\n\n The way he went after that plump\n sister in the lace tucker! 100\n\n \"How are you?\" said one.\n \"How are you?\"",
+ "chunk_order_index": 0,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-89777b838d5447c7bd1ec11282c4ee89": {
+ "tokens": 1200,
+ "content": "restless haste and moaning as\n they went 32\n\n Then old Fezziwig stood out to\n dance with Mrs. Fezziwig 54\n\n A flushed and boisterous group 62\n\n Laden with Christmas toys and\n presents 64\n\n The way he went after that plump\n sister in the lace tucker! 100\n\n \"How are you?\" said one.\n \"How are you?\" returned the other.\n \"Well!\" said the first. \"Old\n Scratch has got his own at last,\n hey?\" 114\n\n \"What do you call this?\" said Joe.\n \"Bed-curtains!\" \"Ah!\" returned\n the woman, laughing....\n \"Bed-curtains!\"\n\n \"You don't mean to say you took\n 'em down, rings and all, with him\n lying there?\" said Joe.\n\n \"Yes, I do,\" replied the woman.\n \"Why not?\" 120\n\n \"It's I, your uncle Scrooge. I have\n come to dinner. Will you let\n me in, Fred?\" 144\n\n \"Now, I'll tell you what, my friend,\"\n said Scrooge. \"I am not going\n to stand this sort of thing any\n longer.\" 146\n\n[Illustration]\n\n_IN BLACK AND WHITE_\n\n\n Tailpiece vi\n Tailpiece to List of Coloured Illustrations x\n Tailpiece to List of Black and White Illustrations xi\n Heading to Stave One 3\n They were portly gentlemen, pleasant to behold 12\n On the wings of the wind 28-29\n Tailpiece to Stave One 34\n Heading to Stave Two 37\n He produced a decanter of curiously\n light wine and a block of curiously heavy cake 50\n She left him, and they parted 60\n Tailpiece to Stave Two 65\n Heading to Stave Three 69\n There was nothing very cheerful in the climate 75\n He had been Tim's blood-horse all the way from church 84-85\n With the pudding 88\n Heading to Stave Four 111\n Heading to Stave Five 137\n Tailpiece to Stave Five 147\n\n[Illustration]\n\n\nSTAVE ONE\n\n\n[Illustration]\n\n\n\n\nMARLEY'S GHOST\n\n\nMarley was dead, to begin with. There is no doubt whatever about that.\nThe register of his burial was signed by the clergyman, the clerk, the\nundertaker, and the chief mourner. Scrooge signed it. And Scrooge's name\nwas good upon 'Change for anything he chose to put his hand to. Old\nMarley was as dead as a door-nail.\n\nMind! I don't mean to say that I know of my own knowledge, what there is\nparticularly dead about a door-nail. I might have been inclined, myself,\nto regard a coffin-nail as the deadest piece of ironmongery in the\ntrade. But the wisdom of our ancestors is in the simile; and my\nunhallowed hands shall not disturb it, or the country's done for. You\nwill, therefore, permit me to repeat, emphatically, that Marley was as\ndead as a door-nail.\n\nScrooge knew he was dead? Of course he did. How could it be otherwise?\nScrooge and he were partners for I don't know how many years. Scrooge\nwas his sole executor, his sole administrator, his sole assign, his sole\nresiduary legatee, his sole friend, and sole mourner. And even Scrooge\nwas not so dreadfully cut up by the sad event but that he was an\nexcellent man of business on the very day of the funeral, and solemnised\nit with an undoubted bargain.\n\nThe mention of Marley's funeral brings me back to the point I started\nfrom. There is no doubt that Marley was dead. This must be distinctly\nunderstood, or nothing wonderful can come of the story I am going to\nrelate. If we were not perfectly convinced that Hamlet's father died\nbefore the play began, there would be nothing more remarkable in his\ntaking a stroll at night, in an easterly wind, upon his own ramparts,\nthan there would be in any other middle-aged gentleman rashly turning\nout after dark in a breezy spot--say St. Paul's Churchyard, for\ninstance--literally to astonish his son's weak mind.\n\nScrooge never painted out Old Marley's name. There it stood, years\nafterwards, above the warehouse door: Scrooge and Marley. The firm was\nknown as Scrooge and Marley. Sometimes people new to the business called\nScrooge Scrooge, and sometimes Marley, but he answered to both names. It\nwas all the same to him.\n\nOh! but he was a tight-fisted hand at the grindstone, Scrooge! a\nsqueezing, wrenching, grasping, scraping, clutching, covetous old\nsinner! Hard and sharp as flint, from which no steel had ever struck out\ngenerous fire; secret, and self-contained, and solitary as an oyster.\nThe cold within him froze his old features, nipped his pointed nose,\nshrivelled his cheek, stiffened his gait; made his eyes red, his thin\nlips blue; and spoke out shrewdly in his grating voice. A frosty rime",
+ "chunk_order_index": 1,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-74e2466de2f67fd710ef2f20c0a8d9e0": {
+ "tokens": 1200,
+ "content": "clutching, covetous old\nsinner! Hard and sharp as flint, from which no steel had ever struck out\ngenerous fire; secret, and self-contained, and solitary as an oyster.\nThe cold within him froze his old features, nipped his pointed nose,\nshrivelled his cheek, stiffened his gait; made his eyes red, his thin\nlips blue; and spoke out shrewdly in his grating voice. A frosty rime\nwas on his head, and on his eyebrows, and his wiry chin. He carried his\nown low temperature always about with him; he iced his office in the\ndog-days, and didn't thaw it one degree at Christmas.\n\nExternal heat and cold had little influence on Scrooge. No warmth could\nwarm, no wintry weather chill him. No wind that blew was bitterer than\nhe, no falling snow was more intent upon its purpose, no pelting rain\nless open to entreaty. Foul weather didn't know where to have him. The\nheaviest rain, and snow, and hail, and sleet could boast of the\nadvantage over him in only one respect. They often 'came down'\nhandsomely, and Scrooge never did.\n\nNobody ever stopped him in the street to say, with gladsome looks, 'My\ndear Scrooge, how are you? When will you come to see me?' No beggars\nimplored him to bestow a trifle, no children asked him what it was\no'clock, no man or woman ever once in all his life inquired the way to\nsuch and such a place, of Scrooge. Even the blind men's dogs appeared to\nknow him; and, when they saw him coming on, would tug their owners into\ndoorways and up courts; and then would wag their tails as though they\nsaid, 'No eye at all is better than an evil eye, dark master!'\n\nBut what did Scrooge care? It was the very thing he liked. To edge his\nway along the crowded paths of life, warning all human sympathy to keep\nits distance, was what the knowing ones call 'nuts' to Scrooge.\n\nOnce upon a time--of all the good days in the year, on Christmas\nEve--old Scrooge sat busy in his counting-house. It was cold, bleak,\nbiting weather; foggy withal; and he could hear the people in the court\noutside go wheezing up and down, beating their hands upon their breasts,\nand stamping their feet upon the pavement stones to warm them. The City\nclocks had only just gone three, but it was quite dark already--it had\nnot been light all day--and candles were flaring in the windows of the\nneighbouring offices, like ruddy smears upon the palpable brown air. The\nfog came pouring in at every chink and keyhole, and was so dense\nwithout, that, although the court was of the narrowest, the houses\nopposite were mere phantoms. To see the dingy cloud come drooping down,\nobscuring everything, one might have thought that nature lived hard by,\nand was brewing on a large scale.\n\nThe door of Scrooge's counting-house was open, that he might keep his\neye upon his clerk, who in a dismal little cell beyond, a sort of tank,\nwas copying letters. Scrooge had a very small fire, but the clerk's fire\nwas so very much smaller that it looked like one coal. But he couldn't\nreplenish it, for Scrooge kept the coal-box in his own room; and so\nsurely as the clerk came in with the shovel, the master predicted that\nit would be necessary for them to part. Wherefore the clerk put on his\nwhite comforter, and tried to warm himself at the candle; in which\neffort, not being a man of strong imagination, he failed.\n\n'A merry Christmas, uncle! God save you!' cried a cheerful voice. It was\nthe voice of Scrooge's nephew, who came upon him so quickly that this\nwas the first intimation he had of his approach.\n\n'Bah!' said Scrooge. 'Humbug!'\n\nHe had so heated himself with rapid walking in the fog and frost, this\nnephew of Scrooge's, that he was all in a glow; his face was ruddy and\nhandsome; his eyes sparkled, and his breath smoked again.\n\n'Christmas a humbug, uncle!' said Scrooge's nephew. 'You don't mean\nthat, I am sure?'\n\n'I do,' said Scrooge. 'Merry Christmas! What right have you to be merry?\nWhat reason have you to be merry? You're poor enough.'\n\n'Come, then,' returned the nephew gaily. 'What right have you to be\ndismal? What reason have you to be morose? You're rich enough.'\n\nScrooge, having no better answer ready on the spur of the moment, said,\n'Bah!' again; and followed it up with 'Humbug!'\n\n'Don't be cross, uncle!' said the nephew.\n\n'What else can I be,' returned the uncle, 'when I live in such a world\nof fools as this? Merry Christmas! Out upon merry Christmas! What's\nChristmas-time to you but a time for paying bills without money; a time\nfor finding yourself a year older, and not an hour richer; a time for\nbalancing your books, and having every item in 'em through a round dozen\nof months presented dead against you? If I could work my will,' said\nScrooge indignantly, 'every idiot who goes",
+ "chunk_order_index": 2,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-5dac41b3f9eeaf794f0147400b1718cd": {
+ "tokens": 1200,
+ "content": "when I live in such a world\nof fools as this? Merry Christmas! Out upon merry Christmas! What's\nChristmas-time to you but a time for paying bills without money; a time\nfor finding yourself a year older, and not an hour richer; a time for\nbalancing your books, and having every item in 'em through a round dozen\nof months presented dead against you? If I could work my will,' said\nScrooge indignantly, 'every idiot who goes about with \"Merry Christmas\"\non his lips should be boiled with his own pudding, and buried with a\nstake of holly through his heart. He should!'\n\n'Uncle!' pleaded the nephew.\n\n'Nephew!' returned the uncle sternly, 'keep Christmas in your own way,\nand let me keep it in mine.'\n\n'Keep it!' repeated Scrooge's nephew. 'But you don't keep it.'\n\n'Let me leave it alone, then,' said Scrooge. 'Much good may it do you!\nMuch good it has ever done you!'\n\n'There are many things from which I might have derived good, by which I\nhave not profited, I dare say,' returned the nephew; 'Christmas among\nthe rest. But I am sure I have always thought of Christmas-time, when\nit has come round--apart from the veneration due to its sacred name and\norigin, if anything belonging to it can be apart from that--as a good\ntime; a kind, forgiving, charitable, pleasant time; the only time I know\nof, in the long calendar of the year, when men and women seem by one\nconsent to open their shut-up hearts freely, and to think of people\nbelow them as if they really were fellow-passengers to the grave, and\nnot another race of creatures bound on other journeys. And therefore,\nuncle, though it has never put a scrap of gold or silver in my pocket, I\nbelieve that it _has_ done me good and _will_ do me good; and I say, God\nbless it!'\n\nThe clerk in the tank involuntarily applauded. Becoming immediately\nsensible of the impropriety, he poked the fire, and extinguished the\nlast frail spark for ever.\n\n'Let me hear another sound from _you_,' said Scrooge, 'and you'll keep\nyour Christmas by losing your situation! You're quite a powerful\nspeaker, sir,' he added, turning to his nephew. 'I wonder you don't go\ninto Parliament.'\n\n'Don't be angry, uncle. Come! Dine with us to-morrow.'\n\nScrooge said that he would see him----Yes, indeed he did. He went the\nwhole length of the expression, and said that he would see him in that\nextremity first.\n\n'But why?' cried Scrooge's nephew. 'Why?'\n\n'Why did you get married?' said Scrooge.\n\n'Because I fell in love.'\n\n'Because you fell in love!' growled Scrooge, as if that were the only\none thing in the world more ridiculous than a merry Christmas. 'Good\nafternoon!'\n\n'Nay, uncle, but you never came to see me before that happened. Why give\nit as a reason for not coming now?'\n\n'Good afternoon,' said Scrooge.\n\n'I want nothing from you; I ask nothing of you; why cannot we be\nfriends?'\n\n'Good afternoon!' said Scrooge.\n\n'I am sorry, with all my heart, to find you so resolute. We have never\nhad any quarrel to which I have been a party. But I have made the trial\nin homage to Christmas, and I'll keep my Christmas humour to the last.\nSo A Merry Christmas, uncle!'\n\n'Good afternoon,' said Scrooge.\n\n'And A Happy New Year!'\n\n'Good afternoon!' said Scrooge.\n\nHis nephew left the room without an angry word, notwithstanding. He\nstopped at the outer door to bestow the greetings of the season on the\nclerk, who, cold as he was, was warmer than Scrooge; for he returned\nthem cordially.\n\n'There's another fellow,' muttered Scrooge, who overheard him: 'my\nclerk, with fifteen shillings a week, and a wife and family, talking\nabout a merry Christmas. I'll retire to Bedlam.'\n\nThis lunatic, in letting Scrooge's nephew out, had let two other people\nin. They were portly gentlemen, pleasant to behold, and now stood, with\ntheir hats off, in Scrooge's office. They had books and papers in their\nhands, and bowed to him.\n\n'Scrooge and Marley's, I believe,' said one of the gentlemen, referring\nto his list. 'Have I the pleasure of addressing Mr. Scrooge, or Mr.\nMarley?'\n\n'Mr. Marley has been dead these seven years,' Scrooge replied. 'He died\nseven years ago, this very night.'\n\n'We have no doubt his liberality is well represented by his surviving\npartner,' said the gentleman, presenting his credentials.\n\n[Illustration: THEY WERE PORTLY GENTLEMEN, PLEASANT TO BEHOLD]\n\nIt certainly was; for they had been two kindred spirits. At the ominous\nword 'liberality' Scrooge frowned, and shook his head, and handed the\ncredentials back.\n\n'At this festive season of the year, Mr. Scrooge,' said the gentleman,\ntaking up a pen, 'it is more than usually desirable that we should make\nsome slight provision for the poor and destitute, who suffer greatly at\nthe present time. Many thousands are",
+ "chunk_order_index": 3,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-90d5764e301321c087f5a8f78b73a145": {
+ "tokens": 1200,
+ "content": "It certainly was; for they had been two kindred spirits. At the ominous\nword 'liberality' Scrooge frowned, and shook his head, and handed the\ncredentials back.\n\n'At this festive season of the year, Mr. Scrooge,' said the gentleman,\ntaking up a pen, 'it is more than usually desirable that we should make\nsome slight provision for the poor and destitute, who suffer greatly at\nthe present time. Many thousands are in want of common necessaries;\nhundreds of thousands are in want of common comforts, sir.'\n\n'Are there no prisons?' asked Scrooge.\n\n'Plenty of prisons,' said the gentleman, laying down the pen again.\n\n'And the Union workhouses?' demanded Scrooge. 'Are they still in\noperation?'\n\n'They are. Still,' returned the gentleman, 'I wish I could say they were\nnot.'\n\n'The Treadmill and the Poor Law are in full vigour, then?' said Scrooge.\n\n'Both very busy, sir.'\n\n'Oh! I was afraid, from what you said at first, that something had\noccurred to stop them in their useful course,' said Scrooge. 'I am very\nglad to hear it.'\n\n'Under the impression that they scarcely furnish Christian cheer of mind\nor body to the multitude,' returned the gentleman, 'a few of us are\nendeavouring to raise a fund to buy the Poor some meat and drink, and\nmeans of warmth. We choose this time, because it is a time, of all\nothers, when Want is keenly felt, and Abundance rejoices. What shall I\nput you down for?'\n\n'Nothing!' Scrooge replied.\n\n'You wish to be anonymous?'\n\n'I wish to be left alone,' said Scrooge. 'Since you ask me what I wish,\ngentlemen, that is my answer. I don't make merry myself at Christmas,\nand I can't afford to make idle people merry. I help to support the\nestablishments I have mentioned--they cost enough: and those who are\nbadly off must go there.'\n\n'Many can't go there; and many would rather die.'\n\n'If they would rather die,' said Scrooge, 'they had better do it, and\ndecrease the surplus population. Besides--excuse me--I don't know that.'\n\n'But you might know it,' observed the gentleman.\n\n'It's not my business,' Scrooge returned. 'It's enough for a man to\nunderstand his own business, and not to interfere with other people's.\nMine occupies me constantly. Good afternoon, gentlemen!'\n\nSeeing clearly that it would be useless to pursue their point, the\ngentlemen withdrew. Scrooge resumed his labours with an improved opinion\nof himself, and in a more facetious temper than was usual with him.\n\nMeanwhile the fog and darkness thickened so, that people ran about with\nflaring links, proffering their services to go before horses in\ncarriages, and conduct them on their way. The ancient tower of a church,\nwhose gruff old bell was always peeping slyly down at Scrooge out of a\nGothic window in the wall, became invisible, and struck the hours and\nquarters in the clouds, with tremulous vibrations afterwards, as if its\nteeth were chattering in its frozen head up there. The cold became\nintense. In the main street, at the corner of the court, some labourers\nwere repairing the gas-pipes, and had lighted a great fire in a brazier,\nround which a party of ragged men and boys were gathered: warming their\nhands and winking their eyes before the blaze in rapture. The water-plug\nbeing left in solitude, its overflowings suddenly congealed, and turned\nto misanthropic ice. The brightness of the shops, where holly sprigs and\nberries crackled in the lamp heat of the windows, made pale faces ruddy\nas they passed. Poulterers' and grocers' trades became a splendid joke:\na glorious pageant, with which it was next to impossible to believe that\nsuch dull principles as bargain and sale had anything to do. The Lord\nMayor, in the stronghold of the mighty Mansion House, gave orders to his\nfifty cooks and butlers to keep Christmas as a Lord Mayor's household\nshould; and even the little tailor, whom he had fined five shillings on\nthe previous Monday for being drunk and bloodthirsty in the streets,\nstirred up to-morrow's pudding in his garret, while his lean wife and\nthe baby sallied out to buy the beef.\n\nFoggier yet, and colder! Piercing, searching, biting cold. If the good\nSt. Dunstan had but nipped the Evil Spirit's nose with a touch of such\nweather as that, instead of using his familiar weapons, then indeed he\nwould have roared to lusty purpose. The owner of one scant young nose,\ngnawed and mumbled by the hungry cold as bones are gnawed by dogs,\nstooped down at Scrooge's keyhole to regale him with a Christmas carol;\nbut, at the first sound of\n\n 'God bless you, merry gentleman,\n May nothing you dismay!'\n\nScrooge seized the ruler with such energy of action that the singer fled\nin terror, leaving the keyhole to the fog, and even more congenial\nfrost.\n\nAt length the hour of shutting up the counting-house arrived. With an\nill-will Scrooge dismounted from his stool, and tacitly admitted the\nfact to the expectant clerk in the tank, who instantly snuffed his",
+ "chunk_order_index": 4,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-d9aac3484185ac66045df92214d245d5": {
+ "tokens": 1200,
+ "content": "God bless you, merry gentleman,\n May nothing you dismay!'\n\nScrooge seized the ruler with such energy of action that the singer fled\nin terror, leaving the keyhole to the fog, and even more congenial\nfrost.\n\nAt length the hour of shutting up the counting-house arrived. With an\nill-will Scrooge dismounted from his stool, and tacitly admitted the\nfact to the expectant clerk in the tank, who instantly snuffed his\ncandle out, and put on his hat.\n\n'You'll want all day to-morrow, I suppose?' said Scrooge.\n\n'If quite convenient, sir.'\n\n'It's not convenient,' said Scrooge, 'and it's not fair. If I was to\nstop half-a-crown for it, you'd think yourself ill used, I'll be bound?'\n\nThe clerk smiled faintly.\n\n'And yet,' said Scrooge, 'you don't think _me_ ill used when I pay a\nday's wages for no work.'\n\n[Illustration: _Bob Cratchit went down a slide on Cornhill, at the end\nof a lane of boys, twenty times, in honour of its being Christmas\nEve_]\n\nThe clerk observed that it was only once a year.\n\n'A poor excuse for picking a man's pocket every twenty-fifth of\nDecember!' said Scrooge, buttoning his greatcoat to the chin. 'But I\nsuppose you must have the whole day. Be here all the earlier next\nmorning.'\n\nThe clerk promised that he would; and Scrooge walked out with a growl.\nThe office was closed in a twinkling, and the clerk, with the long ends\nof his white comforter dangling below his waist (for he boasted no\ngreatcoat), went down a slide on Cornhill, at the end of a lane of boys,\ntwenty times, in honour of its being Christmas Eve, and then ran home to\nCamden Town as hard as he could pelt, to play at blind man's-buff.\n\nScrooge took his melancholy dinner in his usual melancholy tavern; and\nhaving read all the newspapers, and beguiled the rest of the evening\nwith his banker's book, went home to bed. He lived in chambers which had\nonce belonged to his deceased partner. They were a gloomy suite of\nrooms, in a lowering pile of building up a yard, where it had so little\nbusiness to be, that one could scarcely help fancying it must have run\nthere when it was a young house, playing at hide-and-seek with other\nhouses, and have forgotten the way out again. It was old enough now, and\ndreary enough; for nobody lived in it but Scrooge, the other rooms\nbeing all let out as offices. The yard was so dark that even Scrooge,\nwho knew its every stone, was fain to grope with his hands. The fog and\nfrost so hung about the black old gateway of the house, that it seemed\nas if the Genius of the Weather sat in mournful meditation on the\nthreshold.\n\nNow, it is a fact that there was nothing at all particular about the\nknocker on the door, except that it was very large. It is also a fact\nthat Scrooge had seen it, night and morning, during his whole residence\nin that place; also that Scrooge had as little of what is called fancy\nabout him as any man in the City of London, even including--which is a\nbold word--the corporation, aldermen, and livery. Let it also be borne\nin mind that Scrooge had not bestowed one thought on Marley since his\nlast mention of his seven-years'-dead partner that afternoon. And then\nlet any man explain to me, if he can, how it happened that Scrooge,\nhaving his key in the lock of the door, saw in the knocker, without its\nundergoing any intermediate process of change--not a knocker, but\nMarley's face.\n\nMarley's face. It was not in impenetrable shadow, as the other objects\nin the yard were, but had a dismal light about it, like a bad lobster in\na dark cellar. It was not angry or ferocious, but looked at Scrooge as\nMarley used to look; with ghostly spectacles turned up on its ghostly\nforehead. The hair was curiously stirred, as if by breath or hot air;\nand, though the eyes were wide open, they were perfectly motionless.\nThat, and its livid colour, made it horrible; but its horror seemed to\nbe in spite of the face, and beyond its control, rather than a part of\nits own expression.\n\nAs Scrooge looked fixedly at this phenomenon, it was a knocker again.\n\nTo say that he was not startled, or that his blood was not conscious of\na terrible sensation to which it had been a stranger from infancy, would\nbe untrue. But he put his hand upon the key he had relinquished, turned\nit sturdily, walked in, and lighted his candle.\n\nHe _did_ pause, with a moment's irresolution, before he shut the door;\nand he _did_ look cautiously behind it first, as if he half expected to\nbe terrified with the sight of Marley's pigtail sticking out into the\nhall. But there was nothing on the back of the door, except the screws\nand nuts that held the knocker on, so he said, 'Pooh, pooh!' and closed\nit with a bang.\n\nThe sound resounded through the house like thunder. Every room above,\nand every cask in the wine-merchant's cellars below,",
+ "chunk_order_index": 5,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-9c43dac2fcd5b578242d907ea6f8fc7b": {
+ "tokens": 1200,
+ "content": "behind it first, as if he half expected to\nbe terrified with the sight of Marley's pigtail sticking out into the\nhall. But there was nothing on the back of the door, except the screws\nand nuts that held the knocker on, so he said, 'Pooh, pooh!' and closed\nit with a bang.\n\nThe sound resounded through the house like thunder. Every room above,\nand every cask in the wine-merchant's cellars below, appeared to have a\nseparate peal of echoes of its own. Scrooge was not a man to be\nfrightened by echoes. He fastened the door, and walked across the hall,\nand up the stairs: slowly, too: trimming his candle as he went.\n\nYou may talk vaguely about driving a coach and six up a good old flight\nof stairs, or through a bad young Act of Parliament; but I mean to say\nyou might have got a hearse up that staircase, and taken it broadwise,\nwith the splinter-bar towards the wall, and the door towards the\nbalustrades: and done it easy. There was plenty of width for that, and\nroom to spare; which is perhaps the reason why Scrooge thought he saw a\nlocomotive hearse going on before him in the gloom. Half-a-dozen\ngas-lamps out of the street wouldn't have lighted the entry too well, so\nyou may suppose that it was pretty dark with Scrooge's dip.\n\nUp Scrooge went, not caring a button for that. Darkness is cheap, and\nScrooge liked it. But, before he shut his heavy door, he walked through\nhis rooms to see that all was right. He had just enough recollection of\nthe face to desire to do that.\n\nSitting-room, bedroom, lumber-room. All as they should be. Nobody under\nthe table, nobody under the sofa; a small fire in the grate; spoon and\nbasin ready; and the little saucepan of gruel (Scrooge had a cold in his\nhead) upon the hob. Nobody under the bed; nobody in the closet; nobody\nin his dressing-gown, which was hanging up in a suspicious attitude\nagainst the wall. Lumber-room as usual. Old fire-guard, old shoes, two\nfish baskets, washing-stand on three legs, and a poker.\n\n[Illustration: _Nobody under the bed; nobody in the closet; nobody in\nhis dressing-gown, which was hanging up in a suspicious attitude against\nthe wall_]\n\nQuite satisfied, he closed his door, and locked himself in; double\nlocked himself in, which was not his custom. Thus secured against\nsurprise, he took off his cravat; put on his dressing-gown and slippers,\nand his nightcap; and sat down before the fire to take his gruel.\n\nIt was a very low fire indeed; nothing on such a bitter night. He was\nobliged to sit close to it, and brood over it, before he could extract\nthe least sensation of warmth from such a handful of fuel. The fireplace\nwas an old one, built by some Dutch merchant long ago, and paved all\nround with quaint Dutch tiles, designed to illustrate the Scriptures.\nThere were Cains and Abels, Pharaoh's daughters, Queens of Sheba,\nAngelic messengers descending through the air on clouds like\nfeather-beds, Abrahams, Belshazzars, Apostles putting off to sea in\nbutter-boats, hundreds of figures to attract his thoughts; and yet that\nface of Marley, seven years dead, came like the ancient Prophet's rod,\nand swallowed up the whole. If each smooth tile had been a blank at\nfirst, with power to shape some picture on its surface from the\ndisjointed fragments of his thoughts, there would have been a copy of\nold Marley's head on every one.\n\n'Humbug!' said Scrooge; and walked across the room.\n\nAfter several turns he sat down again. As he threw his head back in the\nchair, his glance happened to rest upon a bell, a disused bell, that\nhung in the room, and communicated, for some purpose now forgotten, with\na chamber in the highest storey of the building. It was with great\nastonishment, and with a strange, inexplicable dread, that, as he\nlooked, he saw this bell begin to swing. It swung so softly in the\noutset that it scarcely made a sound; but soon it rang out loudly, and\nso did every bell in the house.\n\nThis might have lasted half a minute, or a minute, but it seemed an\nhour. The bells ceased, as they had begun, together. They were succeeded\nby a clanking noise deep down below as if some person were dragging a\nheavy chain over the casks in the wine-merchant's cellar. Scrooge then\nremembered to have heard that ghosts in haunted houses were described as\ndragging chains.\n\nThe cellar door flew open with a booming sound, and then he heard the\nnoise much louder on the floors below; then coming up the stairs; then\ncoming straight towards his door.\n\n'It's humbug still!' said Scrooge. 'I won't believe it.'\n\nHis colour changed, though, when, without a pause, it came on through\nthe heavy door and passed into the room before his eyes. Upon its coming\nin, the dying flame leaped up, as though it cried, 'I know him! Marley's\nGhost!' and fell again.\n\nThe same face: the very same. Marley in his pigtail, usual waistcoat,\ntights, and boots; the tassels on the latter bristling",
+ "chunk_order_index": 6,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-1aa50206d02ed89418f65f97c6441d1c": {
+ "tokens": 1200,
+ "content": "I won't believe it.'\n\nHis colour changed, though, when, without a pause, it came on through\nthe heavy door and passed into the room before his eyes. Upon its coming\nin, the dying flame leaped up, as though it cried, 'I know him! Marley's\nGhost!' and fell again.\n\nThe same face: the very same. Marley in his pigtail, usual waistcoat,\ntights, and boots; the tassels on the latter bristling, like his\npigtail, and his coat-skirts, and the hair upon his head. The chain he\ndrew was clasped about his middle. It was long, and wound about him like\na tail; and it was made (for Scrooge observed it closely) of cash-boxes,\nkeys, padlocks, ledgers, deeds, and heavy purses wrought in steel. His\nbody was transparent: so that Scrooge, observing him, and looking\nthrough his waistcoat, could see the two buttons on his coat behind.\n\nScrooge had often heard it said that Marley had no bowels, but he had\nnever believed it until now.\n\nNo, nor did he believe it even now. Though he looked the phantom through\nand through, and saw it standing before him; though he felt the chilling\ninfluence of its death-cold eyes, and marked the very texture of the\nfolded kerchief bound about its head and chin, which wrapper he had not\nobserved before, he was still incredulous, and fought against his\nsenses.\n\n'How now!' said Scrooge, caustic and cold as ever. 'What do you want\nwith me?'\n\n'Much!'--Marley's voice; no doubt about it.\n\n'Who are you?'\n\n'Ask me who I _was_.'\n\n'Who _were_ you, then?' said Scrooge, raising his voice. 'You're\nparticular, for a shade.' He was going to say '_to_ a shade,' but\nsubstituted this, as more appropriate.\n\n'In life I was your partner, Jacob Marley.'\n\n'Can you--can you sit down?' asked Scrooge, looking doubtfully at him.\n\n'I can.'\n\n'Do it, then.'\n\nScrooge asked the question, because he didn't know whether a ghost so\ntransparent might find himself in a condition to take a chair; and felt\nthat in the event of its being impossible, it might involve the\nnecessity of an embarrassing explanation. But the Ghost sat down on the\nopposite side of the fireplace, as if he were quite used to it.\n\n'You don't believe in me,' observed the Ghost.\n\n'I don't,' said Scrooge.\n\n'What evidence would you have of my reality beyond that of your own\nsenses?'\n\n'I don't know,' said Scrooge.\n\n'Why do you doubt your senses?'\n\n'Because,' said Scrooge, 'a little thing affects them. A slight disorder\nof the stomach makes them cheats. You may be an undigested bit of beef,\na blot of mustard, a crumb of cheese, a fragment of an underdone potato.\nThere's more of gravy than of grave about you, whatever you are!'\n\nScrooge was not much in the habit of cracking jokes, nor did he feel in\nhis heart by any means waggish then. The truth is, that he tried to be\nsmart, as a means of distracting his own attention, and keeping down his\nterror; for the spectre's voice disturbed the very marrow in his bones.\n\nTo sit staring at those fixed, glazed eyes in silence, for a moment,\nwould play, Scrooge felt, the very deuce with him. There was something\nvery awful, too, in the spectre's being provided with an infernal\natmosphere of his own. Scrooge could not feel it himself, but this was\nclearly the case; for though the Ghost sat perfectly motionless, its\nhair, and skirts, and tassels were still agitated as by the hot vapour\nfrom an oven.\n\n'You see this toothpick?' said Scrooge, returning quickly to the charge,\nfor the reason just assigned; and wishing, though it were only for a\nsecond, to divert the vision's stony gaze from himself.\n\n'I do,' replied the Ghost.\n\n'You are not looking at it,' said Scrooge.\n\n'But I see it,' said the Ghost, 'notwithstanding.'\n\n'Well!' returned Scrooge, 'I have but to swallow this, and be for the\nrest of my days persecuted by a legion of goblins, all of my own\ncreation. Humbug, I tell you: humbug!'\n\nAt this the spirit raised a frightful cry, and shook its chain with such\na dismal and appalling noise, that Scrooge held on tight to his chair,\nto save himself from falling in a swoon. But how much greater was his\nhorror when the phantom, taking off the bandage round his head, as if it\nwere too warm to wear indoors, its lower jaw dropped down upon its\nbreast!\n\nScrooge fell upon his knees, and clasped his hands before his face.\n\n'Mercy!' he said. 'Dreadful apparition, why do you trouble me?'\n\n'Man of the worldly mind!' replied the Ghost, 'do you believe in me or\nnot?'\n\n'I do,' said Scrooge; 'I must. But why do spirits walk the earth, and\nwhy do they come to me?'\n\n'It is required of every man,' the Ghost returned, 'that the spirit\nwithin him should walk abroad among his fellow-men, and travel far and\nwide",
+ "chunk_order_index": 7,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-359187e7571bda4b0b08b2e0b6581e52": {
+ "tokens": 1200,
+ "content": "said. 'Dreadful apparition, why do you trouble me?'\n\n'Man of the worldly mind!' replied the Ghost, 'do you believe in me or\nnot?'\n\n'I do,' said Scrooge; 'I must. But why do spirits walk the earth, and\nwhy do they come to me?'\n\n'It is required of every man,' the Ghost returned, 'that the spirit\nwithin him should walk abroad among his fellow-men, and travel far and\nwide; and, if that spirit goes not forth in life, it is condemned to do\nso after death. It is doomed to wander through the world--oh, woe is\nme!--and witness what it cannot share, but might have shared on earth,\nand turned to happiness!'\n\nAgain the spectre raised a cry, and shook its chain and wrung its\nshadowy hands.\n\n'You are fettered,' said Scrooge, trembling. 'Tell me why?'\n\n'I wear the chain I forged in life,' replied the Ghost. 'I made it link\nby link, and yard by yard; I girded it on of my own free will, and of\nmy own free will I wore it. Is its pattern strange to _you_?'\n\nScrooge trembled more and more.\n\n'Or would you know,' pursued the Ghost, 'the weight and length of the\nstrong coil you bear yourself? It was full as heavy and as long as this\nseven Christmas Eves ago. You have laboured on it since. It is a\nponderous chain!'\n\nScrooge glanced about him on the floor, in the expectation of finding\nhimself surrounded by some fifty or sixty fathoms of iron cable; but he\ncould see nothing.\n\n'Jacob!' he said imploringly. 'Old Jacob Marley, tell me more! Speak\ncomfort to me, Jacob!'\n\n'I have none to give,' the Ghost replied. 'It comes from other regions,\nEbenezer Scrooge, and is conveyed by other ministers, to other kinds of\nmen. Nor can I tell you what I would. A very little more is all\npermitted to me. I cannot rest, I cannot stay, I cannot linger anywhere.\nMy spirit never walked beyond our counting-house--mark me;--in life my\nspirit never roved beyond the narrow limits of our money-changing hole;\nand weary journeys lie before me!'\n\nIt was a habit with Scrooge, whenever he became thoughtful, to put his\nhands in his breeches pockets. Pondering on what the Ghost had said, he\ndid so now, but without lifting up his eyes, or getting off his knees.\n\n[Illustration: ON THE WINGS OF THE WIND]\n\n'You must have been very slow about it, Jacob,' Scrooge observed in a\nbusiness-like manner, though with humility and deference.\n\n'Slow!' the Ghost repeated.\n\n'Seven years dead,' mused Scrooge. 'And travelling all the time?'\n\n'The whole time,' said the Ghost. 'No rest, no peace. Incessant torture\nof remorse.'\n\n'You travel fast?' said Scrooge.\n\n[Illustration]\n\n'On the wings of the wind,' replied the Ghost.\n\n'You might have got over a great quantity of ground in seven years,'\nsaid Scrooge.\n\nThe Ghost, on hearing this, set up another cry, and clanked its chain so\nhideously in the dead silence of the night, that the Ward would have\nbeen justified in indicting it for a nuisance.\n\n'Oh! captive, bound, and double-ironed,' cried the phantom, 'not to know\nthat ages of incessant labour, by immortal creatures, for this earth\nmust pass into eternity before the good of which it is susceptible is\nall developed! Not to know that any Christian spirit working kindly in\nits little sphere, whatever it may be, will find its mortal life too\nshort for its vast means of usefulness! Not to know that no space of\nregret can make amends for one life's opportunities misused! Yet such\nwas I! Oh, such was I!'\n\n'But you were always a good man of business, Jacob,' faltered Scrooge,\nwho now began to apply this to himself.\n\n'Business!' cried the Ghost, wringing its hands again. 'Mankind was my\nbusiness. The common welfare was my business; charity, mercy,\nforbearance, and benevolence were, all, my business. The dealings of my\ntrade were but a drop of water in the comprehensive ocean of my\nbusiness!'\n\nIt held up its chain at arm's-length, as if that were the cause of all\nits unavailing grief, and flung it heavily upon the ground again.\n\n'At this time of the rolling year,' the spectre said, 'I suffer most.\nWhy did I walk through crowds of fellow-beings with my eyes turned down,\nand never raise them to that blessed Star which led the Wise Men to a\npoor abode? Were there no poor homes to which its light would have\nconducted _me_?'\n\nScrooge was very much dismayed to hear the spectre going on at this\nrate, and began to quake exceedingly.\n\n'Hear me!' cried the Ghost. 'My time is nearly gone.'\n\n'I will,' said Scrooge. 'But don't be hard upon me! Don't be flowery,\nJacob! Pray!'\n\n'How it is that I appear before you in a shape that you can see, I may\nnot tell. I have sat invisible beside you many and many a day.'\n\nIt was not an agreeable idea. Scrooge shivered, and wiped the\nperspiration from his brow.\n\n'That is no light part of my",
+ "chunk_order_index": 8,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-cb661c37436355ccec2769b1d0350c5f": {
+ "tokens": 1200,
+ "content": "'My time is nearly gone.'\n\n'I will,' said Scrooge. 'But don't be hard upon me! Don't be flowery,\nJacob! Pray!'\n\n'How it is that I appear before you in a shape that you can see, I may\nnot tell. I have sat invisible beside you many and many a day.'\n\nIt was not an agreeable idea. Scrooge shivered, and wiped the\nperspiration from his brow.\n\n'That is no light part of my penance,' pursued the Ghost. 'I am here\nto-night to warn you that you have yet a chance and hope of escaping my\nfate. A chance and hope of my procuring, Ebenezer.'\n\n'You were always a good friend to me,' said Scrooge. 'Thankee!'\n\n'You will be haunted,' resumed the Ghost, 'by Three Spirits.'\n\nScrooge's countenance fell almost as low as the Ghost's had done.\n\n'Is that the chance and hope you mentioned, Jacob?' he demanded in a\nfaltering voice.\n\n'It is.'\n\n'I--I think I'd rather not,' said Scrooge.\n\n'Without their visits,' said the Ghost, 'you cannot hope to shun the\npath I tread. Expect the first to-morrow when the bell tolls One.'\n\n'Couldn't I take 'em all at once, and have it over, Jacob?' hinted\nScrooge.\n\n'Expect the second on the next night at the same hour. The third, upon\nthe next night when the last stroke of Twelve has ceased to vibrate.\nLook to see me no more; and look that, for your own sake, you remember\nwhat has passed between us!'\n\nWhen it had said these words, the spectre took its wrapper from the\ntable, and bound it round its head as before. Scrooge knew this by the\nsmart sound its teeth made when the jaws were brought together by the\nbandage. He ventured to raise his eyes again, and found his supernatural\nvisitor confronting him in an erect attitude, with its chain wound over\nand about its arm.\n\n[Illustration: _The air was filled with phantoms, wandering hither and\nthither in restless haste and moaning as they went_]\n\nThe apparition walked backward from him; and, at every step it took, the\nwindow raised itself a little, so that, when the spectre reached it, it\nwas wide open. It beckoned Scrooge to approach, which he did. When they\nwere within two paces of each other, Marley's Ghost held up its hand,\nwarning him to come no nearer. Scrooge stopped.\n\nNot so much in obedience as in surprise and fear; for, on the raising of\nthe hand, he became sensible of confused noises in the air; incoherent\nsounds of lamentation and regret; wailings inexpressibly sorrowful and\nself-accusatory. The spectre, after listening for a moment, joined in\nthe mournful dirge; and floated out upon the bleak, dark night.\n\nScrooge followed to the window: desperate in his curiosity. He looked\nout.\n\nThe air was filled with phantoms, wandering hither and thither in\nrestless haste, and moaning as they went. Every one of them wore chains\nlike Marley's Ghost; some few (they might be guilty governments) were\nlinked together; none were free. Many had been personally known to\nScrooge in their lives. He had been quite familiar with one old ghost in\na white waistcoat, with a monstrous iron safe attached to its ankle, who\ncried piteously at being unable to assist a wretched woman with an\ninfant, whom it saw below upon a doorstep. The misery with them all was\nclearly, that they sought to interfere, for good, in human matters, and\nhad lost the power for ever.\n\nWhether these creatures faded into mist, or mist enshrouded them, he\ncould not tell. But they and their spirit voices faded together; and\nthe night became as it had been when he walked home.\n\nScrooge closed the window, and examined the door by which the Ghost had\nentered. It was double locked, as he had locked it with his own hands,\nand the bolts were undisturbed. He tried to say 'Humbug!' but stopped at\nthe first syllable. And being, from the emotions he had undergone, or\nthe fatigues of the day, or his glimpse of the Invisible World, or the\ndull conversation of the Ghost, or the lateness of the hour, much in\nneed of repose, went straight to bed without undressing, and fell asleep\nupon the instant.\n\n[Illustration]\n\n\nSTAVE TWO\n\n[Illustration]\n\n\n\n\nTHE FIRST OF THE THREE SPIRITS\n\n\nWhen Scrooge awoke it was so dark, that, looking out of bed, he could\nscarcely distinguish the transparent window from the opaque walls of his\nchamber. He was endeavouring to pierce the darkness with his ferret\neyes, when the chimes of a neighbouring church struck the four quarters.\nSo he listened for the hour.\n\nTo his great astonishment, the heavy bell went on from six to seven, and\nfrom seven to eight, and regularly up to twelve; then stopped. Twelve!\nIt was past two when he went to bed. The clock was wrong. An icicle must\nhave got into the works. Twelve!\n\nHe touched the spring of his repeater, to correct this most preposterous\nclock. Its rapid little pulse beat twelve, and stopped.\n\n'Why, it isn't possible,' said Scrooge, 'that I can have slept through a\nwhole day and far into",
+ "chunk_order_index": 9,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-c7b10789a9cb0af6c553730b29bd9034": {
+ "tokens": 1200,
+ "content": "from seven to eight, and regularly up to twelve; then stopped. Twelve!\nIt was past two when he went to bed. The clock was wrong. An icicle must\nhave got into the works. Twelve!\n\nHe touched the spring of his repeater, to correct this most preposterous\nclock. Its rapid little pulse beat twelve, and stopped.\n\n'Why, it isn't possible,' said Scrooge, 'that I can have slept through a\nwhole day and far into another night. It isn't possible that anything\nhas happened to the sun, and this is twelve at noon!'\n\nThe idea being an alarming one, he scrambled out of bed, and groped his\nway to the window. He was obliged to rub the frost off with the sleeve\nof his dressing-gown before he could see anything; and could see very\nlittle then. All he could make out was, that it was still very foggy and\nextremely cold, and that there was no noise of people running to and\nfro, and making a great stir, as there unquestionably would have been if\nnight had beaten off bright day, and taken possession of the world. This\nwas a great relief, because 'Three days after sight of this First of\nExchange pay to Mr. Ebenezer Scrooge or his order,' and so forth, would\nhave become a mere United States security if there were no days to count\nby.\n\nScrooge went to bed again, and thought, and thought, and thought it over\nand over, and could make nothing of it. The more he thought, the more\nperplexed he was; and, the more he endeavoured not to think, the more he\nthought.\n\nMarley's Ghost bothered him exceedingly. Every time he resolved within\nhimself, after mature inquiry that it was all a dream, his mind flew\nback again, like a strong spring released, to its first position, and\npresented the same problem to be worked all through, 'Was it a dream or\nnot?'\n\nScrooge lay in this state until the chime had gone three-quarters more,\nwhen he remembered, on a sudden, that the Ghost had warned him of a\nvisitation when the bell tolled one. He resolved to lie awake until the\nhour was passed; and, considering that he could no more go to sleep than\ngo to heaven, this was, perhaps, the wisest resolution in his power.\n\nThe quarter was so long, that he was more than once convinced he must\nhave sunk into a doze unconsciously, and missed the clock. At length it\nbroke upon his listening ear.\n\n'Ding, dong!'\n\n'A quarter past,' said Scrooge, counting.\n\n'Ding, dong!'\n\n'Half past,' said Scrooge.\n\n'Ding, dong!'\n\n'A quarter to it.' said Scrooge.\n\n'Ding, dong!'\n\n'The hour itself,' said Scrooge triumphantly, 'and nothing else!'\n\nHe spoke before the hour bell sounded, which it now did with a deep,\ndull, hollow, melancholy ONE. Light flashed up in the room upon the\ninstant, and the curtains of his bed were drawn.\n\nThe curtains of his bed were drawn aside, I tell you, by a hand. Not\nthe curtains at his feet, nor the curtains at his back, but those to\nwhich his face was addressed. The curtains of his bed were drawn aside;\nand Scrooge, starting up into a half-recumbent attitude, found himself\nface to face with the unearthly visitor who drew them: as close to it as\nI am now to you, and I am standing in the spirit at your elbow.\n\nIt was a strange figure--like a child; yet not so like a child as like\nan old man, viewed through some supernatural medium, which gave him the\nappearance of having receded from the view, and being diminished to a\nchild's proportions. Its hair, which hung about its neck and down its\nback, was white, as if with age; and yet the face had not a wrinkle in\nit, and the tenderest bloom was on the skin. The arms were very long and\nmuscular; the hands the same, as if its hold were of uncommon strength.\nIts legs and feet, most delicately formed, were, like those upper\nmembers, bare. It wore a tunic of the purest white; and round its waist\nwas bound a lustrous belt, the sheen of which was beautiful. It held a\nbranch of fresh green holly in its hand; and, in singular contradiction\nof that wintry emblem, had its dress trimmed with summer flowers. But\nthe strangest thing about it was, that from the crown of its head there\nsprang a bright clear jet of light, by which all this was visible; and\nwhich was doubtless the occasion of its using, in its duller moments, a\ngreat extinguisher for a cap, which it now held under its arm.\n\nEven this, though, when Scrooge looked at it with increasing steadiness,\nwas _not_ its strangest quality. For, as its belt sparkled and\nglittered, now in one part and now in another, and what was light one\ninstant at another time was dark, so the figure itself fluctuated in its\ndistinctness; being now a thing with one arm, now with one leg, now with\ntwenty legs, now a pair of legs without a head, now a head without a\nbody: of which dissolving parts no outline would be visible in the dense\ngloom wherein they melted away. And, in the very wonder of this, it\nwould be itself again; distinct and clear as ever.",
+ "chunk_order_index": 10,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-bb21d58d36c6306fd8810ddd51c4a971": {
+ "tokens": 1200,
+ "content": "instant at another time was dark, so the figure itself fluctuated in its\ndistinctness; being now a thing with one arm, now with one leg, now with\ntwenty legs, now a pair of legs without a head, now a head without a\nbody: of which dissolving parts no outline would be visible in the dense\ngloom wherein they melted away. And, in the very wonder of this, it\nwould be itself again; distinct and clear as ever.\n\n'Are you the Spirit, sir, whose coming was foretold to me?' asked\nScrooge.\n\n'I am!'\n\nThe voice was soft and gentle. Singularly low, as if, instead of being\nso close behind him, it were at a distance.\n\n'Who and what are you?' Scrooge demanded.\n\n'I am the Ghost of Christmas Past.'\n\n'Long Past?' inquired Scrooge, observant of its dwarfish stature.\n\n'No. Your past.'\n\nPerhaps Scrooge could not have told anybody why, if anybody could have\nasked him; but he had a special desire to see the Spirit in his cap,\nand begged him to be covered.\n\n'What!' exclaimed the Ghost, 'would you so soon put out, with worldly\nhands, the light I give? Is it not enough that you are one of those\nwhose passions made this cap, and force me through whole trains of years\nto wear it low upon my brow?'\n\nScrooge reverently disclaimed all intention to offend or any knowledge\nof having wilfully 'bonneted' the Spirit at any period of his life. He\nthen made bold to inquire what business brought him there.\n\n'Your welfare!' said the Ghost.\n\nScrooge expressed himself much obliged, but could not help thinking that\na night of unbroken rest would have been more conducive to that end. The\nSpirit must have heard him thinking, for it said immediately--\n\n'Your reclamation, then. Take heed!'\n\nIt put out its strong hand as it spoke, and clasped him gently by the\narm.\n\n'Rise! and walk with me!'\n\nIt would have been in vain for Scrooge to plead that the weather and the\nhour were not adapted to pedestrian purposes; that bed was warm, and the\nthermometer a long way below freezing; that he was clad but lightly in\nhis slippers, dressing-gown, and nightcap; and that he had a cold upon\nhim at that time. The grasp, though gentle as a woman's hand, was not\nto be resisted. He rose; but, finding that the Spirit made towards the\nwindow, clasped its robe in supplication.\n\n'I am a mortal,' Scrooge remonstrated, 'and liable to fall.'\n\n'Bear but a touch of my hand _there_,' said the Spirit, laying it upon\nhis heart, 'and you shall be upheld in more than this!'\n\nAs the words were spoken, they passed through the wall, and stood upon\nan open country road, with fields on either hand. The city had entirely\nvanished. Not a vestige of it was to be seen. The darkness and the mist\nhad vanished with it, for it was a clear, cold, winter day, with snow\nupon the ground.\n\n'Good Heaven!' said Scrooge, clasping his hands together, as he looked\nabout him. 'I was bred in this place. I was a boy here!'\n\nThe Spirit gazed upon him mildly. Its gentle touch, though it had been\nlight and instantaneous, appeared still present to the old man's sense\nof feeling. He was conscious of a thousand odours floating in the air,\neach one connected with a thousand thoughts, and hopes, and joys, and\ncares long, long forgotten!\n\n'Your lip is trembling,' said the Ghost. 'And what is that upon your\ncheek?'\n\nScrooge muttered, with an unusual catching in his voice, that it was a\npimple; and begged the Ghost to lead him where he would.\n\n'You recollect the way?' inquired the Spirit.\n\n'Remember it!' cried Scrooge with fervour; 'I could walk it blindfold.'\n\n'Strange to have forgotten it for so many years!' observed the Ghost.\n'Let us go on.'\n\nThey walked along the road, Scrooge recognising every gate, and post,\nand tree, until a little market-town appeared in the distance, with its\nbridge, its church, and winding river. Some shaggy ponies now were seen\ntrotting towards them with boys upon their backs, who called to other\nboys in country gigs and carts, driven by farmers. All these boys were\nin great spirits, and shouted to each other, until the broad fields were\nso full of merry music, that the crisp air laughed to hear it.\n\n'These are but shadows of the things that have been,' said the Ghost.\n'They have no consciousness of us.'\n\nThe jocund travellers came on; and as they came, Scrooge knew and named\nthem every one. Why was he rejoiced beyond all bounds to see them? Why\ndid his cold eye glisten, and his heart leap up as they went past? Why\nwas he filled with gladness when he heard them give each other Merry\nChristmas, as they parted at cross-roads and by-ways for their several\nhomes? What was merry Christmas to Scrooge? Out upon merry Christmas!\nWhat good had it ever done to him?\n\n'The school is not quite deserted,' said the Ghost. 'A solitary child,\nneglected by his friends, is left there still.'\n\nScrooge said he knew it. And he sobbed.\n\nThey left the high-road by a well-remembered lane and soon approached",
+ "chunk_order_index": 11,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-8590d9263cd2cd4fa583c432fc54e6c3": {
+ "tokens": 1200,
+ "content": "Merry\nChristmas, as they parted at cross-roads and by-ways for their several\nhomes? What was merry Christmas to Scrooge? Out upon merry Christmas!\nWhat good had it ever done to him?\n\n'The school is not quite deserted,' said the Ghost. 'A solitary child,\nneglected by his friends, is left there still.'\n\nScrooge said he knew it. And he sobbed.\n\nThey left the high-road by a well-remembered lane and soon approached a\nmansion of dull red brick, with a little weather-cock surmounted cupola\non the roof, and a bell hanging in it. It was a large house, but one of\nbroken fortunes; for the spacious offices were little used, their walls\nwere damp and mossy, their windows broken, and their gates decayed.\nFowls clucked and strutted in the stables; and the coach-houses and\nsheds were overrun with grass. Nor was it more retentive of its ancient\nstate within; for, entering the dreary hall, and glancing through the\nopen doors of many rooms, they found them poorly furnished, cold, and\nvast. There was an earthy savour in the air, a chilly bareness in the\nplace, which associated itself somehow with too much getting up by\ncandle light and not too much to eat.\n\nThey went, the Ghost and Scrooge, across the hall, to a door at the back\nof the house. It opened before them, and disclosed a long, bare,\nmelancholy room, made barer still by lines of plain deal forms and\ndesks. At one of these a lonely boy was reading near a feeble fire; and\nScrooge sat down upon a form, and wept to see his poor forgotten self as\nhe had used to be.\n\nNot a latent echo in the house, not a squeak and scuffle from the mice\nbehind the panelling, not a drip from the half-thawed waterspout in the\ndull yard behind, not a sigh among the leafless boughs of one despondent\npoplar, not the idle swinging of an empty storehouse door, no, not a\nclicking in the fire, but fell upon the heart of Scrooge with softening\ninfluence, and gave a freer passage to his tears.\n\nThe Spirit touched him on the arm, and pointed to his younger self,\nintent upon his reading. Suddenly a man in foreign garments, wonderfully\nreal and distinct to look at, stood outside the window, with an axe\nstuck in his belt, and leading by the bridle an ass laden with wood.\n\n'Why, it's Ali Baba!' Scrooge exclaimed in ecstasy. 'It's dear old\nhonest Ali Baba! Yes, yes, I know. One Christmas-time, when yonder\nsolitary child was left here all alone, he _did_ come, for the first\ntime, just like that. Poor boy! And Valentine,' said Scrooge, 'and his\nwild brother, Orson; there they go! And what's his name, who was put\ndown in his drawers, asleep, at the gate of Damascus; don't you see him?\nAnd the Sultan's Groom turned upside down by the Genii; there he is upon\nhis head! Serve him right! I'm glad of it. What business had he to be\nmarried to the Princess?'\n\nTo hear Scrooge expending all the earnestness of his nature on such\nsubjects, in a most extraordinary voice between laughing and crying; and\nto see his heightened and excited face; would have been a surprise to\nhis business friends in the City, indeed.\n\n'There's the Parrot!' cried Scrooge. 'Green body and yellow tail, with a\nthing like a lettuce growing out of the top of his head; there he is!\nPoor Robin Crusoe he called him, when he came home again after sailing\nround the island. \"Poor Robin Crusoe, where have you been, Robin\nCrusoe?\" The man thought he was dreaming, but he wasn't. It was the\nParrot, you know. There goes Friday, running for his life to the little\ncreek! Halloa! Hoop! Halloo!'\n\nThen, with a rapidity of transition very foreign to his usual character,\nhe said, in pity for his former self, 'Poor boy!' and cried again.\n\n'I wish,' Scrooge muttered, putting his hand in his pocket, and looking\nabout him, after drying his eyes with his cuff; 'but it's too late now.'\n\n'What is the matter?' asked the Spirit.\n\n'Nothing,' said Scrooge. 'Nothing. There was a boy singing a Christmas\ncarol at my door last night. I should like to have given him something:\nthat's all.'\n\nThe Ghost smiled thoughtfully, and waved its hand, saying as it did so,\n'Let us see another Christmas!'\n\nScrooge's former self grew larger at the words, and the room became a\nlittle darker and more dirty. The panels shrunk, the windows cracked;\nfragments of plaster fell out of the ceiling, and the naked laths were\nshown instead; but how all this was brought about Scrooge knew no more\nthan you do. He only knew that it was quite correct; that everything had\nhappened so; that there he was, alone again, when all the other boys had\ngone home for the jolly holidays.\n\nHe was not reading now, but walking up and down despairingly. Scrooge\nlooked at the Ghost, and, with a mournful shaking of his head, glanced\nanxiously towards the door.\n\nIt opened; and a little girl, much",
+ "chunk_order_index": 12,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-c6c248858d1b13ab7ee9ca13c1aeed6b": {
+ "tokens": 1200,
+ "content": "more\nthan you do. He only knew that it was quite correct; that everything had\nhappened so; that there he was, alone again, when all the other boys had\ngone home for the jolly holidays.\n\nHe was not reading now, but walking up and down despairingly. Scrooge\nlooked at the Ghost, and, with a mournful shaking of his head, glanced\nanxiously towards the door.\n\nIt opened; and a little girl, much younger than the boy, came darting\nin, and, putting her arms about his neck, and often kissing him,\naddressed him as her 'dear, dear brother.'\n\n'I have come to bring you home, dear brother!' said the child, clapping\nher tiny hands, and bending down to laugh. 'To bring you home, home,\nhome!'\n\n'Home, little Fan?' returned the boy.\n\n'Yes!' said the child, brimful of glee. 'Home for good and all. Home for\never and ever. Father is so much kinder than he used to be, that home's\nlike heaven! He spoke so gently to me one dear night when I was going to\nbed, that I was not afraid to ask him once more if you might come home;\nand he said Yes, you should; and sent me in a coach to bring you. And\nyou're to be a man!' said the child, opening her eyes; 'and are never to\ncome back here; but first we're to be together all the Christmas long,\nand have the merriest time in all the world.'\n\n'You are quite a woman, little Fan!' exclaimed the boy.\n\nShe clapped her hands and laughed, and tried to touch his head; but,\nbeing too little laughed again, and stood on tiptoe to embrace him. Then\nshe began to drag him, in her childish eagerness, towards the door; and\nhe, nothing loath to go, accompanied her.\n\nA terrible voice in the hall cried, 'Bring down Master Scrooge's box,\nthere!' and in the hall appeared the schoolmaster himself, who glared on\nMaster Scrooge with a ferocious condescension, and threw him into a\ndreadful state of mind by shaking hands with him. He then conveyed him\nand his sister into the veriest old well of a shivering best parlour\nthat ever was seen, where the maps upon the wall, and the celestial and\nterrestrial globes in the windows, were waxy with cold. Here he produced\na decanter of curiously light wine, and a block of curiously heavy cake,\nand administered instalments of those dainties to the young people; at\nthe same time sending out a meagre servant to offer a glass of\n'something' to the postboy, who answered that he thanked the gentleman,\nbut, if it was the same tap as he had tasted before, he had rather not.\nMaster Scrooge's trunk being by this time tied on to the top of the\nchaise, the children bade the schoolmaster good-bye right willingly;\nand, getting into it, drove gaily down the garden sweep; the quick\nwheels dashing the hoar-frost and snow from off the dark leaves of the\nevergreens like spray.\n\n[Illustration: HE PRODUCED A DECANTER OF CURIOUSLY LIGHT WINE, AND A\nBLOCK OF CURIOUSLY HEAVY CAKE]\n\n'Always a delicate creature, whom a breath might have withered,' said\nthe Ghost. 'But she had a large heart!'\n\n'So she had,' cried Scrooge. 'You're right. I will not gainsay it,\nSpirit. God forbid!'\n\n'She died a woman,' said the Ghost, 'and had, as I think, children.'\n\n'One child,' Scrooge returned.\n\n'True,' said the Ghost. 'Your nephew!'\n\nScrooge seemed uneasy in his mind, and answered briefly, 'Yes.'\n\nAlthough they had but that moment left the school behind them, they were\nnow in the busy thoroughfares of a city, where shadowy passengers passed\nand re-passed; where shadowy carts and coaches battled for the way, and\nall the strife and tumult of a real city were. It was made plain enough,\nby the dressing of the shops, that here, too, it was Christmas-time\nagain; but it was evening, and the streets were lighted up.\n\nThe Ghost stopped at a certain warehouse door, and asked Scrooge if he\nknew it.\n\n'Know it!' said Scrooge. 'Was I apprenticed here?'\n\nThey went in. At sight of an old gentleman in a Welsh wig, sitting\nbehind such a high desk, that if he had been two inches taller, he must\nhave knocked his head against the ceiling, Scrooge cried in great\nexcitement--\n\n'Why, it's old Fezziwig! Bless his heart, it's Fezziwig alive again!'\n\nOld Fezziwig laid down his pen, and looked up at the clock, which\npointed to the hour of seven. He rubbed his hands; adjusted his\ncapacious waistcoat; laughed all over himself, from his shoes to his\norgan of benevolence; and called out, in a comfortable, oily, rich, fat,\njovial voice--\n\n'Yo ho, there! Ebenezer! Dick!'\n\nScrooge's former self, now grown a young man, came briskly in,\naccompanied by his fellow-'prentice.\n\n'Dick Wilkins, to be sure!' said Scrooge to the Ghost. 'Bless me, yes.\nThere he is. He was very much attached to me, was Dick",
+ "chunk_order_index": 13,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-8bc1edd983869352d14cf0886a9175a7": {
+ "tokens": 1200,
+ "content": "benevolence; and called out, in a comfortable, oily, rich, fat,\njovial voice--\n\n'Yo ho, there! Ebenezer! Dick!'\n\nScrooge's former self, now grown a young man, came briskly in,\naccompanied by his fellow-'prentice.\n\n'Dick Wilkins, to be sure!' said Scrooge to the Ghost. 'Bless me, yes.\nThere he is. He was very much attached to me, was Dick. Poor Dick! Dear,\ndear!'\n\n'Yo ho, my boys!' said Fezziwig. 'No more work to-night. Christmas Eve,\nDick. Christmas, Ebenezer! Let's have the shutters up,' cried old\nFezziwig, with a sharp clap of his hands, 'before a man can say Jack\nRobinson!'\n\nYou wouldn't believe how those two fellows went at it! They charged into\nthe street with the shutters--one, two, three--had 'em up in their\nplaces--four, five, six--barred 'em and pinned 'em--seven, eight,\nnine--and came back before you could have got to twelve, panting like\nracehorses.\n\n'Hilli-ho!' cried old Fezziwig, skipping down from the high desk with\nwonderful agility. 'Clear away, my lads, and let's have lots of room\nhere! Hilli-ho, Dick! Chirrup, Ebenezer!'\n\nClear away! There was nothing they wouldn't have cleared away, or\ncouldn't have cleared away, with old Fezziwig looking on. It was done in\na minute. Every movable was packed off, as if it were dismissed from\npublic life for evermore; the floor was swept and watered, the lamps\nwere trimmed, fuel was heaped upon the fire; and the warehouse was as\nsnug, and warm, and dry, and bright a ball-room as you would desire to\nsee upon a winter's night.\n\nIn came a fiddler with a music-book, and went up to the lofty desk, and\nmade an orchestra of it, and tuned like fifty stomach-aches. In came\nMrs. Fezziwig, one vast substantial smile. In came the three Miss\nFezziwigs, beaming and lovable. In came the six young followers whose\nhearts they broke. In came all the young men and women employed in the\nbusiness. In came the housemaid, with her cousin the baker. In came the\ncook with her brother's particular friend the milkman. In came the boy\nfrom over the way, who was suspected of not having board enough from his\nmaster; trying to hide himself behind the girl from next door but one,\nwho was proved to have had her ears pulled by her mistress. In they all\ncame, one after another; some shyly, some boldly, some gracefully, some\nawkwardly, some pushing, some pulling; in they all came, any how and\nevery how. Away they all went, twenty couple at once; hands half round\nand back again the other way; down the middle and up again; round and\nround in various stages of affectionate grouping; old top couple always\nturning up in the wrong place; new top couple starting off again as soon\nas they got there; all top couples at last, and not a bottom one to help\nthem! When this result was brought about, old Fezziwig, clapping his\nhands to stop the dance, cried out, 'Well done!' and the fiddler plunged\nhis hot face into a pot of porter, especially provided for that purpose.\nBut, scorning rest upon his reappearance, he instantly began again,\nthough there were no dancers yet, as if the other fiddler had been\ncarried home, exhausted, on a shutter, and he were a bran-new man\nresolved to beat him out of sight, or perish.\n\n[Illustration: _Then old Fezziwig stood out to dance with Mrs.\nFezziwig_]\n\nThere were more dances, and there were forfeits, and more dances, and\nthere was cake, and there was negus, and there was a great piece of Cold\nRoast, and there was a great piece of Cold Boiled, and there were\nmince-pies, and plenty of beer. But the great effect of the evening came\nafter the Roast and Boiled, when the fiddler (an artful dog, mind! The\nsort of man who knew his business better than you or I could have told\nit him!) struck up 'Sir Roger de Coverley.' Then old Fezziwig stood\nout to dance with Mrs. Fezziwig. Top couple, too; with a good stiff\npiece of work cut out for them; three or four and twenty pair of\npartners; people who were not to be trifled with; people who would\ndance, and had no notion of walking.\n\nBut if they had been twice as many--ah! four times--old Fezziwig would\nhave been a match for them, and so would Mrs. Fezziwig. As to _her_, she\nwas worthy to be his partner in every sense of the term. If that's not\nhigh praise, tell me higher, and I'll use it. A positive light appeared\nto issue from Fezziwig's calves. They shone in every part of the dance\nlike moons. You couldn't have predicted, at any given time, what would\nbecome of them next. And when old Fezziwig and Mrs. Fezziwig had gone\nall through the dance; advance and retire, both hands to your partner,\nbow and curtsy, cork-screw, thread-the-needle",
+ "chunk_order_index": 14,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-b2f98bc7abb0c67d4c86055c7fd72218": {
+ "tokens": 1200,
+ "content": "me higher, and I'll use it. A positive light appeared\nto issue from Fezziwig's calves. They shone in every part of the dance\nlike moons. You couldn't have predicted, at any given time, what would\nbecome of them next. And when old Fezziwig and Mrs. Fezziwig had gone\nall through the dance; advance and retire, both hands to your partner,\nbow and curtsy, cork-screw, thread-the-needle, and back again to your\nplace: Fezziwig 'cut'--cut so deftly, that he appeared to wink with his\nlegs, and came upon his feet again without a stagger.\n\nWhen the clock struck eleven, this domestic ball broke up. Mr. and Mrs.\nFezziwig took their stations, one on either side the door, and, shaking\nhands with every person individually as he or she went out, wished him\nor her a Merry Christmas. When everybody had retired but the two\n'prentices, they did the same to them; and thus the cheerful voices died\naway, and the lads were left to their beds; which were under a counter\nin the back-shop.\n\nDuring the whole of this time Scrooge had acted like a man out of his\nwits. His heart and soul were in the scene, and with his former self. He\ncorroborated everything, remembered everything, enjoyed everything, and\nunderwent the strangest agitation. It was not until now, when the bright\nfaces of his former self and Dick were turned from them, that he\nremembered the Ghost, and became conscious that it was looking full upon\nhim, while the light upon its head burnt very clear.\n\n'A small matter,' said the Ghost, 'to make these silly folks so full of\ngratitude.'\n\n'Small!' echoed Scrooge.\n\nThe Spirit signed to him to listen to the two apprentices, who were\npouring out their hearts in praise of Fezziwig; and when he had done so,\nsaid:\n\n'Why! Is it not? He has spent but a few pounds of your mortal money:\nthree or four, perhaps. Is that so much that he deserves this praise?'\n\n'It isn't that,' said Scrooge, heated by the remark, and speaking\nunconsciously like his former, not his latter self. 'It isn't that,\nSpirit. He has the power to render us happy or unhappy; to make our\nservice light or burdensome; a pleasure or a toil. Say that his power\nlies in words and looks; in things so slight and insignificant that it\nis impossible to add and count 'em up: what then? The happiness he gives\nis quite as great as if it cost a fortune.'\n\nHe felt the Spirit's glance, and stopped.\n\n'What is the matter?' asked the Ghost.\n\n'Nothing particular,' said Scrooge.\n\n'Something, I think?' the Ghost insisted.\n\n'No,' said Scrooge, 'no. I should like to be able to say a word or two\nto my clerk just now. That's all.'\n\nHis former self turned down the lamps as he gave utterance to the wish;\nand Scrooge and the Ghost again stood side by side in the open air.\n\n'My time grows short,' observed the Spirit. 'Quick!'\n\nThis was not addressed to Scrooge, or to any one whom he could see, but\nit produced an immediate effect. For again Scrooge saw himself. He was\nolder now; a man in the prime of life. His face had not the harsh and\nrigid lines of later years; but it had begun to wear the signs of care\nand avarice. There was an eager, greedy, restless motion in the eye,\nwhich showed the passion that had taken root, and where the shadow of\nthe growing tree would fall.\n\nHe was not alone, but sat by the side of a fair young girl in a mourning\ndress: in whose eyes there were tears, which sparkled in the light that\nshone out of the Ghost of Christmas Past.\n\n'It matters little,' she said softly. 'To you, very little. Another idol\nhas displaced me; and, if it can cheer and comfort you in time to come\nas I would have tried to do, I have no just cause to grieve.'\n\n'What Idol has displaced you?' he rejoined.\n\n'A golden one.'\n\n'This is the even-handed dealing of the world!' he said. 'There is\nnothing on which it is so hard as poverty; and there is nothing it\nprofesses to condemn with such severity as the pursuit of wealth!'\n\n'You fear the world too much,' she answered gently. 'All your other\nhopes have merged into the hope of being beyond the chance of its sordid\nreproach. I have seen your nobler aspirations fall off one by one, until\nthe master passion, Gain, engrosses you. Have I not?'\n\n'What then?' he retorted. 'Even if I have grown so much wiser, what\nthen? I am not changed towards you.'\n\nShe shook her head.\n\n'Am I?'\n\n'Our contract is an old one. It was made when we were both poor, and\ncontent to be so, until, in good season, we could improve our worldly\nfortune by our patient industry. You _are_ changed. When it was made you\nwere another man.'\n\n'I was a boy,' he said impatiently.\n\n'Your own feeling tells you that you were not what you are,' she\nreturned. 'I am. That which promised happiness when we were one in heart\nis fraught with misery now that we are two. How often and how keenly I\nhave thought of this I will",
+ "chunk_order_index": 15,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-54bdd22db1f84a965b5f4c06c9216d8d": {
+ "tokens": 1200,
+ "content": "in good season, we could improve our worldly\nfortune by our patient industry. You _are_ changed. When it was made you\nwere another man.'\n\n'I was a boy,' he said impatiently.\n\n'Your own feeling tells you that you were not what you are,' she\nreturned. 'I am. That which promised happiness when we were one in heart\nis fraught with misery now that we are two. How often and how keenly I\nhave thought of this I will not say. It is enough that I _have_ thought\nof it, and can release you.'\n\n'Have I ever sought release?'\n\n'In words. No. Never.'\n\n'In what, then?'\n\n'In a changed nature; in an altered spirit; in another atmosphere of\nlife; another Hope as its great end. In everything that made my love of\nany worth or value in your sight. If this had never been between us,'\nsaid the girl, looking mildly, but with steadiness, upon him; 'tell me,\nwould you seek me out and try to win me now? Ah, no!'\n\nHe seemed to yield to the justice of this supposition in spite of\nhimself. But he said, with a struggle, 'You think not.'\n\n'I would gladly think otherwise if I could,' she answered. 'Heaven\nknows! When _I_ have learned a Truth like this, I know how strong and\nirresistible it must be. But if you were free to-day, to-morrow,\nyesterday, can even I believe that you would choose a dowerless\ngirl--you who, in your very confidence with her, weigh everything by\nGain: or, choosing her, if for a moment you were false enough to your\none guiding principle to do so, do I not know that your repentance and\nregret would surely follow? I do; and I release you. With a full heart,\nfor the love of him you once were.'\n\n[Illustration: SHE LEFT HIM, AND THEY PARTED]\n\nHe was about to speak; but, with her head turned from him, she resumed:\n\n'You may--the memory of what is past half makes me hope you will--have\npain in this. A very, very brief time, and you will dismiss the\nrecollection of it gladly, as an unprofitable dream, from which it\nhappened well that you awoke. May you be happy in the life you have\nchosen!'\n\nShe left him, and they parted.\n\n'Spirit!' said Scrooge, 'show me no more! Conduct me home. Why do you\ndelight to torture me?'\n\n'One shadow more!' exclaimed the Ghost.\n\n'No more!' cried Scrooge. 'No more! I don't wish to see it. Show me no\nmore!'\n\nBut the relentless Ghost pinioned him in both his arms, and forced him\nto observe what happened next.\n\nThey were in another scene and place; a room, not very large or\nhandsome, but full of comfort. Near to the winter fire sat a beautiful\nyoung girl, so like that last that Scrooge believed it was the same,\nuntil he saw _her_, now a comely matron, sitting opposite her daughter.\nThe noise in this room was perfectly tumultuous, for there were more\nchildren there than Scrooge in his agitated state of mind could count;\nand, unlike the celebrated herd in the poem, they were not forty\nchildren conducting themselves like one, but every child was conducting\nitself like forty. The consequences were uproarious beyond belief; but\nno one seemed to care; on the contrary, the mother and daughter laughed\nheartily, and enjoyed it very much; and the latter, soon beginning to\nmingle in the sports, got pillaged by the young brigands most\nruthlessly. What would I not have given to be one of them! Though I\nnever could have been so rude, no, no! I wouldn't for the wealth of all\nthe world have crushed that braided hair, and torn it down; and for the\nprecious little shoe, I wouldn't have plucked it off, God bless my soul!\nto save my life. As to measuring her waist in sport, as they did, bold\nyoung brood, I couldn't have done it; I should have expected my arm to\nhave grown round it for a punishment, and never come straight again. And\nyet I should have dearly liked, I own, to have touched her lips; to have\nquestioned her, that she might have opened them; to have looked upon the\nlashes of her downcast eyes, and never raised a blush; to have let loose\nwaves of hair, an inch of which would be a keepsake beyond price: in\nshort, I should have liked, I do confess, to have had the lightest\nlicense of a child, and yet to have been man enough to know its value.\n\n[Illustration: _A flushed and boisterous group_]\n\nBut now a knocking at the door was heard, and such a rush immediately\nensued that she, with laughing face and plundered dress, was borne\ntowards it the centre of a flushed and boisterous group, just in time to\ngreet the father, who came home attended by a man laden with Christmas\ntoys and presents. Then the shouting and the struggling, and the\nonslaught that was made on the defenceless porter! The scaling him, with\nchairs for ladders, to dive into his pockets, despoil him of\nbrown-paper parcels, hold on tight by his cravat, hug him round his\nneck, pummel his back, and kick his legs in irrepressible",
+ "chunk_order_index": 16,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-da2472b4ef2a535b62908f14d0fb0ca9": {
+ "tokens": 1200,
+ "content": "greet the father, who came home attended by a man laden with Christmas\ntoys and presents. Then the shouting and the struggling, and the\nonslaught that was made on the defenceless porter! The scaling him, with\nchairs for ladders, to dive into his pockets, despoil him of\nbrown-paper parcels, hold on tight by his cravat, hug him round his\nneck, pummel his back, and kick his legs in irrepressible affection! The\nshouts of wonder and delight with which the development of every package\nwas received! The terrible announcement that the baby had been taken in\nthe act of putting a doll's frying pan into his mouth, and was more than\nsuspected of having swallowed a fictitious turkey, glued on a wooden\nplatter! The immense relief of finding this a false alarm! The joy, and\ngratitude, and ecstasy! They are all indescribable alike. It is enough\nthat, by degrees, the children and their emotions got out of the\nparlour, and, by one stair at a time, up to the top of the house, where\nthey went to bed, and so subsided.\n\nAnd now Scrooge looked on more attentively than ever, when the master of\nthe house, having his daughter leaning fondly on him, sat down with her\nand her mother at his own fireside; and when he thought that such\nanother creature, quite as graceful and as full of promise, might have\ncalled him father, and been a spring-time in the haggard winter of his\nlife, his sight grew very dim indeed.\n\n'Belle,' said the husband, turning to his wife with a smile, 'I saw an\nold friend of yours this afternoon.'\n\n'Who was it?'\n\n'Guess!'\n\n'How can I? Tut, don't I know?' she added in the same breath, laughing\nas he laughed. 'Mr. Scrooge.'\n\n'Mr. Scrooge it was. I passed his office window; and as it was not shut\nup, and he had a candle inside, I could scarcely help seeing him. His\npartner lies upon the point of death, I hear; and there he sat alone.\nQuite alone in the world, I do believe.'\n\n'Spirit!' said Scrooge in a broken voice, 'remove me from this place.'\n\n'I told you these were shadows of the things that have been,' said the\nGhost. 'That they are what they are do not blame me!'\n\n'Remove me!' Scrooge exclaimed, 'I cannot bear it!'\n\nHe turned upon the Ghost, and seeing that it looked upon him with a\nface, in which in some strange way there were fragments of all the faces\nit had shown him, wrestled with it.\n\n'Leave me! Take me back. Haunt me no longer!'\n\nIn the struggle, if that can be called a struggle in which the Ghost\nwith no visible resistance on its own part was undisturbed by any effort\nof its adversary, Scrooge observed that its light was burning high and\nbright; and dimly connecting that with its influence over him, he seized\nthe extinguisher-cap, and by a sudden action pressed it down upon its\nhead.\n\n[Illustration: _Laden with Christmas toys and presents_]\n\nThe Spirit dropped beneath it, so that the extinguisher covered its\nwhole form; but though Scrooge pressed it down with all his force, he\ncould not hide the light, which streamed from under it, in an unbroken\nflood upon the ground.\n\nHe was conscious of being exhausted, and overcome by an irresistible\ndrowsiness; and, further, of being in his own bedroom. He gave the cap a\nparting squeeze, in which his hand relaxed; and had barely time to reel\nto bed, before he sank into a heavy sleep.\n\n[Illustration]\n\n\nSTAVE THREE\n\n\n[Illustration]\n\n\n\n\nTHE SECOND OF THE THREE SPIRITS\n\n\nAwaking in the middle of a prodigiously tough snore, and sitting up in\nbed to get his thoughts together, Scrooge had no occasion to be told\nthat the bell was again upon the stroke of One. He felt that he was\nrestored to consciousness in the right nick of time, for the especial\npurpose of holding a conference with the second messenger despatched to\nhim through Jacob Marley's intervention. But finding that he turned\nuncomfortably cold when he began to wonder which of his curtains this\nnew spectre would draw back, he put them every one aside with his own\nhands, and, lying down again, established a sharp look-out all round the\nbed. For he wished to challenge the Spirit on the moment of its\nappearance, and did not wish to be taken by surprise and made nervous.\n\nGentlemen of the free-and-easy sort, who plume themselves on being\nacquainted with a move or two, and being usually equal to the time of\nday, express the wide range of their capacity for adventure by observing\nthat they are good for anything from pitch-and-toss to manslaughter;\nbetween which opposite extremes, no doubt, there lies a tolerably wide\nand comprehensive range of subjects. Without venturing for Scrooge quite\nas hardily as this, I don't mind calling on you to believe that he was\nready for a good broad field of strange appearances, and that nothing\nbetween a baby and a rhinoceros would have astonished him very much.\n\nNow, being prepared for almost anything, he was not by any means\nprepared for nothing; and consequently, when the bell struck One, and no\nshape appeared, he was taken with a violent fit of trembling. Five",
+ "chunk_order_index": 17,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-843b5b2f944c7413d9ca876e2e2f151f": {
+ "tokens": 1200,
+ "content": "uring for Scrooge quite\nas hardily as this, I don't mind calling on you to believe that he was\nready for a good broad field of strange appearances, and that nothing\nbetween a baby and a rhinoceros would have astonished him very much.\n\nNow, being prepared for almost anything, he was not by any means\nprepared for nothing; and consequently, when the bell struck One, and no\nshape appeared, he was taken with a violent fit of trembling. Five\nminutes, ten minutes, a quarter of an hour went by, yet nothing came.\nAll this time he lay upon his bed, the very core and centre of a blaze\nof ruddy light, which streamed upon it when the clock proclaimed the\nhour; and which, being only light, was more alarming than a dozen\nghosts, as he was powerless to make out what it meant, or would be at;\nand was sometimes apprehensive that he might be at that very moment an\ninteresting case of spontaneous combustion, without having the\nconsolation of knowing it. At last, however, he began to think--as you\nor I would have thought at first; for it is always the person not in the\npredicament who knows what ought to have been done in it, and would\nunquestionably have done it too--at last, I say, he began to think that\nthe source and secret of this ghostly light might be in the adjoining\nroom, from whence, on further tracing it, it seemed to shine. This idea\ntaking full possession of his mind, he got up softly, and shuffled in\nhis slippers to the door.\n\nThe moment Scrooge's hand was on the lock a strange voice called him by\nhis name, and bade him enter. He obeyed.\n\nIt was his own room. There was no doubt about that. But it had undergone\na surprising transformation. The walls and ceiling were so hung with\nliving green, that it looked a perfect grove; from every part of which\nbright gleaming berries glistened. The crisp leaves of holly, mistletoe,\nand ivy reflected back the light, as if so many little mirrors had been\nscattered there; and such a mighty blaze went roaring up the chimney as\nthat dull petrification of a hearth had never known in Scrooge's time,\nor Marley's, or for many and many a winter season gone. Heaped up on the\nfloor, to form a kind of throne, were turkeys, geese, game, poultry,\nbrawn, great joints of meat, sucking-pigs, long wreaths of sausages,\nmince-pies, plum-puddings, barrels of oysters, red-hot chestnuts,\ncherry-cheeked apples, juicy oranges, luscious pears, immense\ntwelfth-cakes, and seething bowls of punch, that made the chamber dim\nwith their delicious steam. In easy state upon this couch there sat a\njolly Giant, glorious to see; who bore a glowing torch, in shape not\nunlike Plenty's horn, and held it up, high up, to shed its light on\nScrooge as he came peeping round the door.\n\n'Come in!' exclaimed the Ghost. 'Come in! and know me better, man!'\n\nScrooge entered timidly, and hung his head before this Spirit. He was\nnot the dogged Scrooge he had been; and though the Spirit's eyes were\nclear and kind, he did not like to meet them.\n\n'I am the Ghost of Christmas Present,' said the Spirit. 'Look upon me!'\n\nScrooge reverently did so. It was clothed in one simple deep green robe,\nor mantle, bordered with white fur. This garment hung so loosely on the\nfigure, that its capacious breast was bare, as if disdaining to be\nwarded or concealed by any artifice. Its feet, observable beneath the\nample folds of the garment, were also bare; and on its head it wore no\nother covering than a holly wreath, set here and there with shining\nicicles. Its dark-brown curls were long and free; free as its genial\nface, its sparkling eye, its open hand, its cheery voice, its\nunconstrained demeanour, and its joyful air. Girded round its middle was\nan antique scabbard: but no sword was in it, and the ancient sheath was\neaten up with rust.\n\n'You have never seen the like of me before!' exclaimed the Spirit.\n\n'Never,' Scrooge made answer to it.\n\n'Have never walked forth with the younger members of my family; meaning\n(for I am very young) my elder brothers born in these later years?'\npursued the Phantom.\n\n'I don't think I have,' said Scrooge. 'I am afraid I have not. Have you\nhad many brothers, Spirit?'\n\n'More than eighteen hundred,' said the Ghost.\n\n'A tremendous family to provide for,' muttered Scrooge.\n\nThe Ghost of Christmas Present rose.\n\n'Spirit,' said Scrooge submissively, 'conduct me where you will. I went\nforth last night on compulsion, and I learned a lesson which is working\nnow. To-night if you have aught to teach me, let me profit by it.'\n\n'Touch my robe!'\n\nScrooge did as he was told, and held it fast.\n\nHolly, mistletoe, red berries, ivy, turkeys, geese, game, poultry,\nbrawn, meat, pigs, sausages, oysters, pies, puddings, fruit, and punch,\nall vanished instantly. So did the room, the fire, the ruddy glow, the\nhour of night, and they stood in the city",
+ "chunk_order_index": 18,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-99b0ec7f7e006561352b5d65e849611d": {
+ "tokens": 1200,
+ "content": ", let me profit by it.'\n\n'Touch my robe!'\n\nScrooge did as he was told, and held it fast.\n\nHolly, mistletoe, red berries, ivy, turkeys, geese, game, poultry,\nbrawn, meat, pigs, sausages, oysters, pies, puddings, fruit, and punch,\nall vanished instantly. So did the room, the fire, the ruddy glow, the\nhour of night, and they stood in the city streets on Christmas morning,\nwhere (for the weather was severe) the people made a rough, but brisk\nand not unpleasant kind of music, in scraping the snow from the pavement\nin front of their dwellings, and from the tops of their houses, whence\nit was mad delight to the boys to see it come plumping down into the\nroad below, and splitting into artificial little snowstorms.\n\nThe house-fronts looked black enough, and the windows blacker,\ncontrasting with the smooth white sheet of snow upon the roofs, and with\nthe dirtier snow upon the ground; which last deposit had been ploughed\nup in deep furrows by the heavy wheels of carts and waggons: furrows\nthat crossed and recrossed each other hundreds of times where the great\nstreets branched off; and made intricate channels, hard to trace in the\nthick yellow mud and icy water. The sky was gloomy, and the shortest\nstreets were choked up with a dingy mist, half thawed, half frozen,\nwhose heavier particles descended in a shower of sooty atoms, as if all\nthe chimneys in Great Britain had, by one consent, caught fire, and were\nblazing away to their dear heart's content. There was nothing very\ncheerful in the climate or the town, and yet was there an air of\ncheerfulness abroad that the clearest summer air and brightest summer\nsun might have endeavoured to diffuse in vain.\n\n[Illustration: THERE WAS NOTHING VERY CHEERFUL IN THE CLIMATE]\n\nFor the people who were shovelling away on the house-tops were jovial\nand full of glee; calling out to one another from the parapets, and now\nand then exchanging a facetious snowball--better-natured missile far\nthan many a wordy jest--laughing heartily if it went right, and not less\nheartily if it went wrong. The poulterers' shops were still half open,\nand the fruiterers' were radiant in their glory. There were great,\nround, pot-bellied baskets of chestnuts, shaped like the waistcoats of\njolly old gentlemen, lolling at the doors, and tumbling out into the\nstreet in their apoplectic opulence: There were ruddy, brown-faced,\nbroad-girthed Spanish onions, shining in the fatness of their growth\nlike Spanish friars, and winking from their shelves in wanton slyness at\nthe girls as they went by, and glanced demurely at the hung-up\nmistletoe. There were pears and apples clustered high in blooming\npyramids; there were bunches of grapes, made, in the shopkeepers'\nbenevolence, to dangle from conspicuous hooks that people's mouths might\nwater gratis as they passed; there were piles of filberts, mossy and\nbrown, recalling, in their fragrance, ancient walks among the woods, and\npleasant shufflings ankle deep through withered leaves; there were\nNorfolk Biffins, squab and swarthy, setting off the yellow of the\noranges and lemons, and, in the great compactness of their juicy\npersons, urgently entreating and beseeching to be carried home in paper\nbags and eaten after dinner. The very gold and silver fish, set forth\namong these choice fruits in a bowl, though members of a dull and\nstagnant-blooded race, appeared to know that there was something going\non; and, to a fish, went gasping round and round their little world in\nslow and passionless excitement.\n\nThe Grocers'! oh, the Grocers'! nearly closed, with perhaps two shutters\ndown, or one; but through those gaps such glimpses! It was not alone\nthat the scales descending on the counter made a merry sound, or that\nthe twine and roller parted company so briskly, or that the canisters\nwere rattled up and down like juggling tricks, or even that the blended\nscents of tea and coffee were so grateful to the nose, or even that the\nraisins were so plentiful and rare, the almonds so extremely white, the\nsticks of cinnamon so long and straight, the other spices so delicious,\nthe candied fruits so caked and spotted with molten sugar as to make the\ncoldest lookers-on feel faint, and subsequently bilious. Nor was it that\nthe figs were moist and pulpy, or that the French plums blushed in\nmodest tartness from their highly-decorated boxes, or that everything\nwas good to eat and in its Christmas dress; but the customers were all\nso hurried and so eager in the hopeful promise of the day, that they\ntumbled up against each other at the door, crashing their wicker baskets\nwildly, and left their purchases upon the counter, and came running\nback to fetch them, and committed hundreds of the like mistakes, in the\nbest humour possible; while the grocer and his people were so frank and\nfresh, that the polished hearts with which they fastened their aprons\nbehind might have been their own, worn outside for general inspection,\nand for Christmas daws to peck at if they chose.\n\nBut soon",
+ "chunk_order_index": 19,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-b0f459f5af1c3a5e0b92cbe4ee48b77b": {
+ "tokens": 1200,
+ "content": "other at the door, crashing their wicker baskets\nwildly, and left their purchases upon the counter, and came running\nback to fetch them, and committed hundreds of the like mistakes, in the\nbest humour possible; while the grocer and his people were so frank and\nfresh, that the polished hearts with which they fastened their aprons\nbehind might have been their own, worn outside for general inspection,\nand for Christmas daws to peck at if they chose.\n\nBut soon the steeples called good people all to church and chapel, and\naway they came, flocking through the streets in their best clothes and\nwith their gayest faces. And at the same time there emerged, from scores\nof by-streets, lanes, and nameless turnings, innumerable people,\ncarrying their dinners to the bakers' shops. The sight of these poor\nrevellers appeared to interest the Spirit very much, for he stood with\nScrooge beside him in a baker's doorway, and, taking off the covers as\ntheir bearers passed, sprinkled incense on their dinners from his torch.\nAnd it was a very uncommon kind of torch, for once or twice, when there\nwere angry words between some dinner-carriers who had jostled each\nother, he shed a few drops of water on them from it, and their\ngood-humour was restored directly. For they said, it was a shame to\nquarrel upon Christmas Day. And so it was! God love it, so it was!\n\nIn time the bells ceased, and the bakers were shut up; and yet there was\na genial shadowing forth of all these dinners, and the progress of their\ncooking, in the thawed blotch of wet above each baker's oven, where the\npavement smoked as if its stones were cooking too.\n\n'Is there a peculiar flavour in what you sprinkle from your torch?'\nasked Scrooge.\n\n'There is. My own.'\n\n'Would it apply to any kind of dinner on this day?' asked Scrooge.\n\n'To any kindly given. To a poor one most.'\n\n'Why to a poor one most?' asked Scrooge.\n\n'Because it needs it most.'\n\n'Spirit!' said Scrooge, after a moment's thought, 'I wonder you, of all\nthe beings in the many worlds about us, should desire to cramp these\npeople's opportunities of innocent enjoyment.\n\n'I!' cried the Spirit.\n\n'You would deprive them of their means of dining every seventh day,\noften the only day on which they can be said to dine at all,' said\nScrooge; 'wouldn't you?'\n\n'I!' cried the Spirit.\n\n'You seek to close these places on the Seventh Day,' said Scrooge. 'And\nit comes to the same thing.'\n\n'I seek!' exclaimed the Spirit.\n\n'Forgive me if I am wrong. It has been done in your name, or at least in\nthat of your family,' said Scrooge.\n\n'There are some upon this earth of yours,' returned the Spirit, 'who\nlay claim to know us, and who do their deeds of passion, pride,\nill-will, hatred, envy, bigotry, and selfishness in our name, who are as\nstrange to us, and all our kith and kin, as if they had never lived.\nRemember that, and charge their doings on themselves, not us.'\n\nScrooge promised that he would; and they went on, invisible, as they had\nbeen before, into the suburbs of the town. It was a remarkable quality\nof the Ghost (which Scrooge had observed at the baker's), that\nnotwithstanding his gigantic size, he could accommodate himself to any\nplace with ease; and that he stood beneath a low roof quite as\ngracefully and like a supernatural creature as it was possible he could\nhave done in any lofty hall.\n\nAnd perhaps it was the pleasure the good Spirit had in showing off this\npower of his, or else it was his own kind, generous, hearty nature, and\nhis sympathy with all poor men, that led him straight to Scrooge's\nclerk's; for there he went, and took Scrooge with him, holding to his\nrobe; and on the threshold of the door the Spirit smiled, and stopped to\nbless Bob Cratchit's dwelling with the sprinklings of his torch. Think\nof that! Bob had but fifteen 'Bob' a week himself; he pocketed on\nSaturdays but fifteen copies of his Christian name; and yet the Ghost of\nChristmas Present blessed his four-roomed house!\n\nThen up rose Mrs. Cratchit, Cratchit's wife, dressed out but poorly in a\ntwice-turned gown, but brave in ribbons, which are cheap, and make a\ngoodly show for sixpence; and she laid the cloth, assisted by Belinda\nCratchit, second of her daughters, also brave in ribbons; while Master\nPeter Cratchit plunged a fork into the saucepan of potatoes, and getting\nthe corners of his monstrous shirt-collar (Bob's private property,\nconferred upon his son and heir in honour of the day,) into his mouth,\nrejoiced to find himself so gallantly attired, and yearned to show his\nlinen in the fashionable Parks. And now two smaller Cratchits, boy and\ngirl, came tearing in, screaming that outside the baker's they had smelt\nthe goose, and known it for their own; and basking in luxurious thoughts\nof sage and onion, these young Cratchits danced about the table, and\nexalted Master Peter Cratchit to the skies, while he (not proud,\nalthough his collars nearly",
+ "chunk_order_index": 20,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-7e2e7ebcc19a53b399dc03aded4743e7": {
+ "tokens": 1200,
+ "content": "antly attired, and yearned to show his\nlinen in the fashionable Parks. And now two smaller Cratchits, boy and\ngirl, came tearing in, screaming that outside the baker's they had smelt\nthe goose, and known it for their own; and basking in luxurious thoughts\nof sage and onion, these young Cratchits danced about the table, and\nexalted Master Peter Cratchit to the skies, while he (not proud,\nalthough his collars nearly choked him) blew the fire, until the slow\npotatoes, bubbling up, knocked loudly at the saucepan-lid to be let out\nand peeled.\n\n'What has ever got your precious father, then?' said Mrs. Cratchit. 'And\nyour brother, Tiny Tim? And Martha warn't as late last Christmas Day by\nhalf an hour!'\n\n'Here's Martha, mother!' said a girl, appearing as she spoke.\n\n'Here's Martha, mother!' cried the two young Cratchits. 'Hurrah! There's\n_such_ a goose, Martha!'\n\n'Why, bless your heart alive, my dear, how late you are!' said Mrs.\nCratchit, kissing her a dozen times, and taking off her shawl and bonnet\nfor her with officious zeal.\n\n'We'd a deal of work to finish up last night,' replied the girl, 'and\nhad to clear away this morning, mother!'\n\n'Well! never mind so long as you are come,' said Mrs. Cratchit. 'Sit ye\ndown before the fire, my dear, and have a warm, Lord bless ye!'\n\n'No, no! There's father coming,' cried the two young Cratchits, who were\neverywhere at once. 'Hide, Martha, hide!'\n\nSo Martha hid herself, and in came little Bob, the father, with at least\nthree feet of comforter, exclusive of the fringe, hanging down before\nhim, and his threadbare clothes darned up and brushed to look\nseasonable, and Tiny Tim upon his shoulder. Alas for Tiny Tim, he bore a\nlittle crutch, and had his limbs supported by an iron frame!\n\n'Why, where's our Martha?' cried Bob Cratchit, looking round.\n\n'Not coming,' said Mrs. Cratchit.\n\n'Not coming!' said Bob, with a sudden declension in his high spirits;\nfor he had been Tim's blood-horse all the way from church, and had come\nhome rampant. 'Not coming upon Christmas Day!'\n\nMartha didn't like to see him disappointed, if it were only in joke; so\nshe came out prematurely from behind the closet door, and ran into his\narms, while the two young Cratchits hustled Tiny Tim, and bore him off\ninto the wash-house, that he might hear the pudding singing in the\ncopper.\n\n'And how did little Tim behave?' asked Mrs. Cratchit when she had\nrallied Bob on his credulity, and Bob had hugged his daughter to his\nheart's content.\n\n'As good as gold,' said Bob, 'and better. Somehow, he gets thoughtful,\nsitting by himself so much, and thinks the strangest things you ever\nheard. He told me, coming home, that he hoped the people saw him in the\nchurch, because he was a cripple, and it might be pleasant to them to\nremember upon Christmas Day who made lame beggars walk and blind men\nsee.'\n\nBob's voice was tremulous when he told them this, and trembled more when\nhe said that Tiny Tim was growing strong and hearty.\n\nHis active little crutch was heard upon the floor, and back came Tiny\nTim before another word was spoken, escorted by his brother and\nsister to his stool beside the fire; and while Bob, turning up his\ncuffs--as if, poor fellow, they were capable of being made more\nshabby--compounded some hot mixture in a jug with gin and lemons, and\nstirred it round and round, and put it on the hob to simmer, Master\nPeter and the two ubiquitous young Cratchits went to fetch the goose,\nwith which they soon returned in high procession.\n\n[Illustration]\n\nSuch a bustle ensued that you might have thought a goose the rarest of\nall birds; a feathered phenomenon, to which a black swan was a matter of\ncourse--and, in truth, it was something very like it in that house. Mrs.\nCratchit made the gravy (ready beforehand in a little saucepan) hissing\nhot; Master Peter mashed the potatoes with incredible vigour; Miss\nBelinda sweetened up the apple sauce; Martha dusted the hot plates; Bob\ntook Tiny Tim beside him in a tiny corner at the table; the two young\nCratchits set chairs for everybody, not forgetting themselves, and,\nmounting guard upon their posts, crammed spoons into their mouths, lest\nthey should shriek for goose before their turn came to be helped. At\nlast the dishes were set on, and grace was said. It was succeeded by a\nbreathless pause, as Mrs. Cratchit, looking slowly all along the\ncarving-knife, prepared to plunge it in the breast; but when she did,\nand when the long-expected gush of stuffing issued forth, one murmur of\ndelight arose all round the board, and even Tiny Tim, excited by the two\nyoung Cratchits, beat on the table with the handle of his knife and\nfeebly cried Hurrah!\n\n[Illustration: HE HAD BEEN TIM'S BLOOD-HORSE ALL THE WAY FROM CHURCH]\n\nThere never was such a goose. Bob said he didn't believe there ever was\nsuch",
+ "chunk_order_index": 21,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-503f0bfa5453467c7c61d160a3540eca": {
+ "tokens": 1200,
+ "content": "she did,\nand when the long-expected gush of stuffing issued forth, one murmur of\ndelight arose all round the board, and even Tiny Tim, excited by the two\nyoung Cratchits, beat on the table with the handle of his knife and\nfeebly cried Hurrah!\n\n[Illustration: HE HAD BEEN TIM'S BLOOD-HORSE ALL THE WAY FROM CHURCH]\n\nThere never was such a goose. Bob said he didn't believe there ever was\nsuch a goose cooked. Its tenderness and flavour, size and cheapness,\nwere the themes of universal admiration. Eked out by apple sauce and\nmashed potatoes, it was a sufficient dinner for the whole family;\nindeed, as Mrs. Cratchit said with great delight (surveying one small\natom of a bone upon the dish), they hadn't ate it all at last! Yet every\none had had enough, and the youngest Cratchits, in particular, were\nsteeped in sage and onion to the eyebrows! But now, the plates being\nchanged by Miss Belinda, Mrs. Cratchit left the room alone--too nervous\nto bear witnesses--to take the pudding up, and bring it in.\n\nSuppose it should not be done enough! Suppose it should break in turning\nout! Suppose somebody should have got over the wall of the back-yard and\nstolen it, while they were merry with the goose--a supposition at which\nthe two young Cratchits became livid! All sorts of horrors were\nsupposed.\n\nHallo! A great deal of steam! The pudding was out of the copper. A smell\nlike a washing-day! That was the cloth. A smell like an eating-house and\na pastry-cook's next door to each other, with a laundress's next door to\nthat! That was the pudding! In half a minute Mrs. Cratchit\nentered--flushed, but smiling proudly--with the pudding, like a speckled\ncannon-ball, so hard and firm, blazing in half of half-a-quartern of\nignited brandy, and bedight with Christmas holly stuck into the top.\n\nOh, a wonderful pudding! Bob Cratchit said, and calmly too, that he\nregarded it as the greatest success achieved by Mrs. Cratchit since\ntheir marriage. Mrs. Cratchit said that, now the weight was off her\nmind, she would confess she had her doubts about the quantity of flour.\nEverybody had something to say about it, but nobody said or thought it\nwas at all a small pudding for a large family. It would have been flat\nheresy to do so. Any Cratchit would have blushed to hint at such a\nthing.\n\n[Illustration: WITH THE PUDDING]\n\nAt last the dinner was all done, the cloth was cleared, the hearth\nswept, and the fire made up. The compound in the jug being tasted and\nconsidered perfect, apples and oranges were put upon the table, and a\nshovel full of chestnuts on the fire. Then all the Cratchit family\ndrew round the hearth in what Bob Cratchit called a circle, meaning half\na one; and at Bob Cratchit's elbow stood the family display of glass.\nTwo tumblers and a custard cup without a handle.\n\nThese held the hot stuff from the jug, however, as well as golden\ngoblets would have done; and Bob served it out with beaming looks, while\nthe chestnuts on the fire sputtered and cracked noisily. Then Bob\nproposed:\n\n'A merry Christmas to us all, my dears. God bless us!'\n\nWhich all the family re-echoed.\n\n'God bless us every one!' said Tiny Tim, the last of all.\n\nHe sat very close to his father's side, upon his little stool. Bob held\nhis withered little hand to his, as if he loved the child, and wished to\nkeep him by his side, and dreaded that he might be taken from him.\n\n'Spirit,' said Scrooge, with an interest he had never felt before, 'tell\nme if Tiny Tim will live.'\n\n'I see a vacant seat,' replied the Ghost, 'in the poor chimney corner,\nand a crutch without an owner, carefully preserved. If these shadows\nremain unaltered by the Future, the child will die.'\n\n'No, no,' said Scrooge. 'Oh no, kind Spirit! say he will be spared.'\n\n'If these shadows remain unaltered by the Future none other of my race,'\nreturned the Ghost, 'will find him here. What then? If he be like to\ndie, he had better do it, and decrease the surplus population.'\n\nScrooge hung his head to hear his own words quoted by the Spirit, and\nwas overcome with penitence and grief.\n\n'Man,' said the Ghost, 'if man you be in heart, not adamant, forbear\nthat wicked cant until you have discovered what the surplus is, and\nwhere it is. Will you decide what men shall live, what men shall die? It\nmay be that, in the sight of Heaven, you are more worthless and less fit\nto live than millions like this poor man's child. O God! to hear the\ninsect on the leaf pronouncing on the too much life among his hungry\nbrothers in the dust!'\n\nScrooge bent before the Ghost's rebuke, and, trembling, cast his eyes\nupon the ground. But he raised them speedily on hearing his own name.\n\n'Mr. Scrooge!' said Bob. 'I'll give you Mr. Scrooge, the Founder of the\nFeast!'\n\n'The Founder of the Feast",
+ "chunk_order_index": 22,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-19d72c57ae3408758b18d2568e86dc6b": {
+ "tokens": 1200,
+ "content": "O God! to hear the\ninsect on the leaf pronouncing on the too much life among his hungry\nbrothers in the dust!'\n\nScrooge bent before the Ghost's rebuke, and, trembling, cast his eyes\nupon the ground. But he raised them speedily on hearing his own name.\n\n'Mr. Scrooge!' said Bob. 'I'll give you Mr. Scrooge, the Founder of the\nFeast!'\n\n'The Founder of the Feast, indeed!' cried Mrs. Cratchit, reddening. 'I\nwish I had him here. I'd give him a piece of my mind to feast upon, and\nI hope he'd have a good appetite for it.'\n\n'My dear,' said Bob, 'the children! Christmas Day.'\n\n'It should be Christmas Day, I am sure,' said she, 'on which one drinks\nthe health of such an odious, stingy, hard, unfeeling man as Mr.\nScrooge. You know he is, Robert! Nobody knows it better than you do,\npoor fellow!'\n\n'My dear!' was Bob's mild answer. 'Christmas Day.'\n\n'I'll drink his health for your sake and the Day's,' said Mrs. Cratchit,\n'not for his. Long life to him! A merry Christmas and a happy New Year!\nHe'll be very merry and very happy, I have no doubt!'\n\nThe children drank the toast after her. It was the first of their\nproceedings which had no heartiness in it. Tiny Tim drank it last of\nall, but he didn't care twopence for it. Scrooge was the Ogre of the\nfamily. The mention of his name cast a dark shadow on the party, which\nwas not dispelled for full five minutes.\n\nAfter it had passed away they were ten times merrier than before, from\nthe mere relief of Scrooge the Baleful being done with. Bob Cratchit\ntold them how he had a situation in his eye for Master Peter, which\nwould bring in, if obtained, full five-and-sixpence weekly. The two\nyoung Cratchits laughed tremendously at the idea of Peter's being a man\nof business; and Peter himself looked thoughtfully at the fire from\nbetween his collars, as if he were deliberating what particular\ninvestments he should favour when he came into the receipt of that\nbewildering income. Martha, who was a poor apprentice at a milliner's,\nthen told them what kind of work she had to do, and how many hours she\nworked at a stretch and how she meant to lie abed to-morrow morning for\na good long rest; to-morrow being a holiday she passed at home. Also how\nshe had seen a countess and a lord some days before, and how the lord\n'was much about as tall as Peter'; at which Peter pulled up his collar\nso high that you couldn't have seen his head if you had been there. All\nthis time the chestnuts and the jug went round and round; and by-and-by\nthey had a song, about a lost child travelling in the snow, from Tiny\nTim, who had a plaintive little voice, and sang it very well indeed.\n\nThere was nothing of high mark in this. They were not a handsome family;\nthey were not well dressed; their shoes were far from being waterproof;\ntheir clothes were scanty; and Peter might have known, and very likely\ndid, the inside of a pawnbroker's. But they were happy, grateful,\npleased with one another, and contented with the time; and when they\nfaded, and looked happier yet in the bright sprinklings of the Spirit's\ntorch at parting, Scrooge had his eye upon them, and especially on Tiny\nTim, until the last.\n\nBy this time it was getting dark, and snowing pretty heavily; and as\nScrooge and the Spirit went along the streets, the brightness of the\nroaring fires in kitchens, parlours, and all sorts of rooms was\nwonderful. Here, the flickering of the blaze showed preparations for a\ncosy dinner, with hot plates baking through and through before the fire,\nand deep red curtains, ready to be drawn to shut out cold and darkness.\nThere, all the children of the house were running out into the snow to\nmeet their married sisters, brothers, cousins, uncles, aunts, and be the\nfirst to greet them. Here, again, were shadows on the window-blinds of\nguests assembling; and there a group of handsome girls, all hooded and\nfur-booted, and all chattering at once, tripped lightly off to some near\nneighbour's house; where, woe upon the single man who saw them\nenter--artful witches, well they knew it--in a glow!\n\nBut, if you had judged from the numbers of people on their way to\nfriendly gatherings, you might have thought that no one was at home to\ngive them welcome when they got there, instead of every house expecting\ncompany, and piling up its fires half-chimney high. Blessings on it, how\nthe Ghost exulted! How it bared its breadth of breast, and opened its\ncapacious palm, and floated on, outpouring with a generous hand its\nbright and harmless mirth on everything within its reach! The very\nlamplighter, who ran on before, dotting the dusky street with specks of\nlight, and who was dressed to spend the evening somewhere, laughed out\nloudly as the Spirit passed, though little kenned the lamplighter that\nhe had any company but Christmas.\n\nAnd now, without a word of warning from",
+ "chunk_order_index": 23,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-91614df5cb74cbe5e109a338a82041e9": {
+ "tokens": 1200,
+ "content": "capacious palm, and floated on, outpouring with a generous hand its\nbright and harmless mirth on everything within its reach! The very\nlamplighter, who ran on before, dotting the dusky street with specks of\nlight, and who was dressed to spend the evening somewhere, laughed out\nloudly as the Spirit passed, though little kenned the lamplighter that\nhe had any company but Christmas.\n\nAnd now, without a word of warning from the Ghost, they stood upon a\nbleak and desert moor, where monstrous masses of rude stone were cast\nabout, as though it were the burial-place of giants; and water spread\nitself wheresoever it listed; or would have done so, but for the frost\nthat held it prisoner; and nothing grew but moss and furze, and coarse,\nrank grass. Down in the west the setting sun had left a streak of fiery\nred, which glared upon the desolation for an instant, like a sullen eye,\nand frowning lower, lower, lower yet, was lost in the thick gloom of\ndarkest night.\n\n'What place is this?' asked Scrooge.\n\n'A place where miners live, who labour in the bowels of the earth,'\nreturned the Spirit. 'But they know me. See!'\n\nA light shone from the window of a hut, and swiftly they advanced\ntowards it. Passing through the wall of mud and stone, they found a\ncheerful company assembled round a glowing fire. An old, old man and\nwoman, with their children and their children's children, and another\ngeneration beyond that, all decked out gaily in their holiday attire.\nThe old man, in a voice that seldom rose above the howling of the wind\nupon the barren waste, was singing them a Christmas song; it had been a\nvery old song when he was a boy; and from time to time they all joined\nin the chorus. So surely as they raised their voices, the old man got\nquite blithe and loud; and so surely as they stopped, his vigour sank\nagain.\n\nThe Spirit did not tarry here, but bade Scrooge hold his robe, and,\npassing on above the moor, sped whither? Not to sea? To sea. To\nScrooge's horror, looking back, he saw the last of the land, a frightful\nrange of rocks, behind them; and his ears were deafened by the\nthundering of water, as it rolled and roared, and raged among the\ndreadful caverns it had worn, and fiercely tried to undermine the earth.\n\nBuilt upon a dismal reef of sunken rocks, some league or so from shore,\non which the waters chafed and dashed, the wild year through, there\nstood a solitary lighthouse. Great heaps of seaweed clung to its base,\nand storm-birds--born of the wind, one might suppose, as seaweed of the\nwater--rose and fell about it, like the waves they skimmed.\n\nBut, even here, two men who watched the light had made a fire, that\nthrough the loophole in the thick stone wall shed out a ray of\nbrightness on the awful sea. Joining their horny hands over the rough\ntable at which they sat, they wished each other Merry Christmas in their\ncan of grog; and one of them--the elder too, with his face all damaged\nand scarred with hard weather, as the figure-head of an old ship might\nbe--struck up a sturdy song that was like a gale in itself.\n\nAgain the Ghost sped on, above the black and heaving sea--on, on--until\nbeing far away, as he told Scrooge, from any shore, they lighted on a\nship. They stood beside the helmsman at the wheel, the look-out in the\nbow, the officers who had the watch; dark, ghostly figures in their\nseveral stations; but every man among them hummed a Christmas tune, or\nhad a Christmas thought, or spoke below his breath to his companion of\nsome bygone Christmas Day, with homeward hopes belonging to it. And\nevery man on board, waking or sleeping, good or bad, had had a kinder\nword for one another on that day than on any day in the year; and had\nshared to some extent in its festivities; and had remembered those he\ncared for at a distance, and had known that they delighted to remember\nhim.\n\nIt was a great surprise to Scrooge, while listening to the moaning of\nthe wind, and thinking what a solemn thing it was to move on through the\nlonely darkness over an unknown abyss, whose depths were secrets as\nprofound as death: it was a great surprise to Scrooge, while thus\nengaged, to hear a hearty laugh. It was a much greater surprise to\nScrooge to recognise it as his own nephew's and to find himself in a\nbright, dry, gleaming room, with the Spirit standing smiling by his\nside, and looking at that same nephew with approving affability!\n\n'Ha, ha!' laughed Scrooge's nephew. 'Ha, ha, ha!'\n\nIf you should happen, by any unlikely chance, to know a man more blessed\nin a laugh than Scrooge's nephew, all I can say is, I should like to\nknow him too. Introduce him to me, and I'll cultivate his acquaintance.\n\nIt is a fair, even-handed, noble adjustment of things, that while there\nis infection in disease and sorrow, there is nothing in the world so\nirresistibly contagious as laughter and good-humour.",
+ "chunk_order_index": 24,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-773d0df1dd25c356f7b771d780583e09": {
+ "tokens": 1200,
+ "content": "'\n\nIf you should happen, by any unlikely chance, to know a man more blessed\nin a laugh than Scrooge's nephew, all I can say is, I should like to\nknow him too. Introduce him to me, and I'll cultivate his acquaintance.\n\nIt is a fair, even-handed, noble adjustment of things, that while there\nis infection in disease and sorrow, there is nothing in the world so\nirresistibly contagious as laughter and good-humour. When Scrooge's\nnephew laughed in this way--holding his sides, rolling his head, and\ntwisting his face into the most extravagant contortions--Scrooge's\nniece, by marriage, laughed as heartily as he. And their assembled\nfriends, being not a bit behindhand, roared out lustily.\n\n'Ha, ha! Ha, ha, ha, ha!'\n\n'He said that Christmas was a humbug, as I live!' cried Scrooge's\nnephew. 'He believed it, too!'\n\n'More shame for him, Fred!' said Scrooge's niece indignantly. Bless\nthose women! they never do anything by halves. They are always in\nearnest.\n\nShe was very pretty; exceedingly pretty. With a dimpled,\nsurprised-looking, capital face; a ripe little mouth, that seemed made\nto be kissed--as no doubt it was; all kinds of good little dots about\nher chin, that melted into one another when she laughed; and the\nsunniest pair of eyes you ever saw in any little creature's head.\nAltogether she was what you would have called provoking, you know; but\nsatisfactory, too. Oh, perfectly satisfactory!\n\n'He's a comical old fellow,' said Scrooge's nephew, 'that's the truth;\nand not so pleasant as he might be. However, his offences carry their\nown punishment, and I have nothing to say against him.'\n\n'I'm sure he is very rich, Fred,' hinted Scrooge's niece. 'At least, you\nalways tell _me_ so.'\n\n'What of that, my dear?' said Scrooge's nephew. 'His wealth is of no use\nto him. He don't do any good with it. He don't make himself comfortable\nwith it. He hasn't the satisfaction of thinking--ha, ha, ha!--that he is\never going to benefit Us with it.'\n\n'I have no patience with him,' observed Scrooge's niece. Scrooge's\nniece's sisters, and all the other ladies, expressed the same opinion.\n\n'Oh, I have!' said Scrooge's nephew. 'I am sorry for him; I couldn't be\nangry with him if I tried. Who suffers by his ill whims? Himself always.\nHere he takes it into his head to dislike us, and he won't come and dine\nwith us. What's the consequence? He don't lose much of a dinner.'\n\n'Indeed, I think he loses a very good dinner,' interrupted Scrooge's\nniece. Everybody else said the same, and they must be allowed to have\nbeen competent judges, because they had just had dinner; and with the\ndessert upon the table, were clustered round the fire, by lamplight.\n\n'Well! I am very glad to hear it,' said Scrooge's nephew, 'because I\nhaven't any great faith in these young housekeepers. What do _you_ say,\nTopper?'\n\nTopper had clearly got his eye upon one of Scrooge's niece's sisters,\nfor he answered that a bachelor was a wretched outcast, who had no right\nto express an opinion on the subject. Whereat Scrooge's niece's\nsister--the plump one with the lace tucker: not the one with the\nroses--blushed.\n\n'Do go on, Fred,' said Scrooge's niece, clapping her hands. 'He never\nfinishes what he begins to say! He is such a ridiculous fellow!'\n\nScrooge's nephew revelled in another laugh, and as it was impossible to\nkeep the infection off, though the plump sister tried hard to do it with\naromatic vinegar, his example was unanimously followed.\n\n'I was only going to say,' said Scrooge's nephew, 'that the consequence\nof his taking a dislike to us, and not making merry with us, is, as I\nthink, that he loses some pleasant moments, which could do him no harm.\nI am sure he loses pleasanter companions than he can find in his own\nthoughts, either in his mouldy old office or his dusty chambers. I mean\nto give him the same chance every year, whether he likes it or not, for\nI pity him. He may rail at Christmas till he dies, but he can't help\nthinking better of it--I defy him--if he finds me going there, in good\ntemper, year after year, and saying, \"Uncle Scrooge, how are you?\" If it\nonly put him in the vein to leave his poor clerk fifty pounds, _that's_\nsomething; and I think I shook him yesterday.'\n\nIt was their turn to laugh now, at the notion of his shaking Scrooge.\nBut being thoroughly good-natured, and not much caring what they laughed\nat, so that they laughed at any rate, he encouraged them in their\nmerriment, and passed the bottle, joyously.\n\nAfter tea they had some music. For they were a musical family, and knew\nwhat they were about when they sung a Glee or Catch, I can assure you:\nespecially Topper, who could growl away in the bass like a good one",
+ "chunk_order_index": 25,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-43c9bee23adfa77aa9dedd22401bba7d": {
+ "tokens": 1200,
+ "content": "shaking Scrooge.\nBut being thoroughly good-natured, and not much caring what they laughed\nat, so that they laughed at any rate, he encouraged them in their\nmerriment, and passed the bottle, joyously.\n\nAfter tea they had some music. For they were a musical family, and knew\nwhat they were about when they sung a Glee or Catch, I can assure you:\nespecially Topper, who could growl away in the bass like a good one, and\nnever swell the large veins in his forehead, or get red in the face over\nit. Scrooge's niece played well upon the harp; and played, among other\ntunes, a simple little air (a mere nothing: you might learn to whistle\nit in two minutes) which had been familiar to the child who fetched\nScrooge from the boarding-school, as he had been reminded by the Ghost\nof Christmas Past. When this strain of music sounded, all the things\nthat Ghost had shown him came upon his mind; he softened more and more;\nand thought that if he could have listened to it often, years ago, he\nmight have cultivated the kindnesses of life for his own happiness with\nhis own hands, without resorting to the sexton's spade that buried Jacob\nMarley.\n\n[Illustration: _The way he went after that plump sister in the lace\ntucker!_]\n\nBut they didn't devote the whole evening to music. After a while they\nplayed at forfeits; for it is good to be children sometimes, and never\nbetter than at Christmas, when its mighty Founder was a child himself.\nStop! There was first a game at blind man's-buff. Of course there was.\nAnd I no more believe Topper was really blind than I believe he had eyes\nin his boots. My opinion is, that it was a done thing between him and\nScrooge's nephew; and that the Ghost of Christmas Present knew it. The\nway he went after that plump sister in the lace tucker was an outrage on\nthe credulity of human nature. Knocking down the fire-irons, tumbling\nover the chairs, bumping up against the piano, smothering himself\namongst the curtains, wherever she went, there went he! He always knew\nwhere the plump sister was. He wouldn't catch anybody else. If you had\nfallen up against him (as some of them did) on purpose, he would have\nmade a feint of endeavouring to seize you, which would have been an\naffront to your understanding, and would instantly have sidled off in\nthe direction of the plump sister. She often cried out that it wasn't\nfair; and it really was not. But when, at last, he caught her; when, in\nspite of all her silken rustlings, and her rapid flutterings past him,\nhe got her into a corner whence there was no escape; then his conduct\nwas the most execrable. For his pretending not to know her; his\npretending that it was necessary to touch her head-dress, and further to\nassure himself of her identity by pressing a certain ring upon her\nfinger, and a certain chain about her neck; was vile, monstrous! No\ndoubt she told him her opinion of it when, another blind man being in\noffice, they were so very confidential together behind the curtains.\n\nScrooge's niece was not one of the blind man's-buff party, but was made\ncomfortable with a large chair and a footstool, in a snug corner where\nthe Ghost and Scrooge were close behind her. But she joined in the\nforfeits, and loved her love to admiration with all the letters of the\nalphabet. Likewise at the game of How, When, and Where, she was very\ngreat, and, to the secret joy of Scrooge's nephew, beat her sisters\nhollow; though they were sharp girls too, as Topper could have told you.\nThere might have been twenty people there, young and old, but they all\nplayed, and so did Scrooge; for wholly forgetting, in the interest he\nhad in what was going on, that his voice made no sound in their ears, he\nsometimes came out with his guess quite loud, and very often guessed\nright, too; for the sharpest needle, best Whitechapel, warranted not to\ncut in the eye, was not sharper than Scrooge, blunt as he took it in\nhis head to be.\n\nThe Ghost was greatly pleased to find him in this mood, and looked upon\nhim with such favour that he begged like a boy to be allowed to stay\nuntil the guests departed. But this the Spirit said could not be done.\n\n'Here is a new game,' said Scrooge. 'One half-hour, Spirit, only one!'\n\nIt was a game called Yes and No, where Scrooge's nephew had to think of\nsomething, and the rest must find out what, he only answering to their\nquestions yes or no, as the case was. The brisk fire of questioning to\nwhich he was exposed elicited from him that he was thinking of an\nanimal, a live animal, rather a disagreeable animal, a savage animal, an\nanimal that growled and grunted sometimes, and talked sometimes and\nlived in London, and walked about the streets, and wasn't made a show\nof, and wasn't led by anybody, and didn't live in a menagerie, and was\nnever killed in a market, and was not a horse, or an ass, or a cow, or a\nbull, or a tiger, or a dog, or a pig, or a cat, or",
+ "chunk_order_index": 26,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-e7637ff18c5ded77a68cce0a87883fad": {
+ "tokens": 1200,
+ "content": "animal, a savage animal, an\nanimal that growled and grunted sometimes, and talked sometimes and\nlived in London, and walked about the streets, and wasn't made a show\nof, and wasn't led by anybody, and didn't live in a menagerie, and was\nnever killed in a market, and was not a horse, or an ass, or a cow, or a\nbull, or a tiger, or a dog, or a pig, or a cat, or a bear. At every\nfresh question that was put to him, this nephew burst into a fresh roar\nof laughter; and was so inexpressibly tickled, that he was obliged to\nget up off the sofa and stamp. At last the plump sister, falling into a\nsimilar state, cried out:\n\n'I have found it out! I know what it is, Fred! I know what it is!'\n\n'What is it?' cried Fred.\n\n'It's your uncle Scro-o-o-o-oge.'\n\nWhich it certainly was. Admiration was the universal sentiment, though\nsome objected that the reply to 'Is it a bear?' ought to have been\n'Yes'; inasmuch as an answer in the negative was sufficient to have\ndiverted their thoughts from Mr. Scrooge, supposing they had ever had\nany tendency that way.\n\n'He has given us plenty of merriment, I am sure,' said Fred, 'and it\nwould be ungrateful not to drink his health. Here is a glass of mulled\nwine ready to our hand at the moment; and I say, \"Uncle Scrooge!\"'\n\n'Well! Uncle Scrooge!' they cried.\n\n'A merry Christmas and a happy New Year to the old man, whatever he is!'\nsaid Scrooge's nephew. 'He wouldn't take it from me, but may he have it,\nnevertheless. Uncle Scrooge!'\n\nUncle Scrooge had imperceptibly become so gay and light of heart, that\nhe would have pledged the unconscious company in return, and thanked\nthem in an inaudible speech, if the Ghost had given him time. But the\nwhole scene passed off in the breath of the last word spoken by his\nnephew; and he and the Spirit were again upon their travels.\n\nMuch they saw, and far they went, and many homes they visited, but\nalways with a happy end. The Spirit stood beside sick-beds, and they\nwere cheerful; on foreign lands, and they were close at home; by\nstruggling men, and they were patient in their greater hope; by poverty,\nand it was rich. In almshouse, hospital, and gaol, in misery's every\nrefuge, where vain man in his little brief authority had not made fast\nthe door, and barred the Spirit out, he left his blessing and taught\nScrooge his precepts.\n\nIt was a long night, if it were only a night; but Scrooge had his doubts\nof this, because the Christmas holidays appeared to be condensed into\nthe space of time they passed together. It was strange, too, that, while\nScrooge remained unaltered in his outward form, the Ghost grew older,\nclearly older. Scrooge had observed this change, but never spoke of it\nuntil they left a children's Twelfth-Night party, when, looking at the\nSpirit as they stood together in an open place, he noticed that its hair\nwas grey.\n\n'Are spirits' lives so short?' asked Scrooge.\n\n'My life upon this globe is very brief,' replied the Ghost. 'It ends\nto-night.'\n\n'To-night!' cried Scrooge.\n\n'To-night at midnight. Hark! The time is drawing near.'\n\nThe chimes were ringing the three-quarters past eleven at that moment.\n\n'Forgive me if I am not justified in what I ask,' said Scrooge, looking\nintently at the Spirit's robe, 'but I see something strange, and not\nbelonging to yourself, protruding from your skirts. Is it a foot or a\nclaw?'\n\n'It might be a claw, for the flesh there is upon it,' was the Spirit's\nsorrowful reply. 'Look here!'\n\nFrom the foldings of its robe it brought two children, wretched, abject,\nfrightful, hideous, miserable. They knelt down at its feet, and clung\nupon the outside of its garment.\n\n'O Man! look here! Look, look down here!' exclaimed the Ghost.\n\nThey were a boy and girl. Yellow, meagre, ragged, scowling, wolfish, but\nprostrate, too, in their humility. Where graceful youth should have\nfilled their features out, and touched them with its freshest tints, a\nstale and shrivelled hand, like that of age, had pinched and twisted\nthem, and pulled them into shreds. Where angels might have sat\nenthroned, devils lurked, and glared out menacing. No change, no\ndegradation, no perversion of humanity in any grade, through all the\nmysteries of wonderful creation, has monsters half so horrible and\ndread.\n\nScrooge started back, appalled. Having them shown to him in this way, he\ntried to say they were fine children, but the words choked themselves,\nrather than be parties to a lie of such enormous magnitude.\n\n'Spirit! are they yours?' Scrooge could say no more.\n\n'They are Man's,' said the Spirit, looking down upon them. 'And they\ncling to me, appealing from their fathers. This boy is Ignorance. This\ngirl is",
+ "chunk_order_index": 27,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-02baee20cc9463dbe08170a8e1043e32": {
+ "tokens": 1200,
+ "content": "oge started back, appalled. Having them shown to him in this way, he\ntried to say they were fine children, but the words choked themselves,\nrather than be parties to a lie of such enormous magnitude.\n\n'Spirit! are they yours?' Scrooge could say no more.\n\n'They are Man's,' said the Spirit, looking down upon them. 'And they\ncling to me, appealing from their fathers. This boy is Ignorance. This\ngirl is Want. Beware of them both, and all of their degree, but most of\nall beware this boy, for on his brow I see that written which is Doom,\nunless the writing be erased. Deny it!' cried the Spirit, stretching out\nhis hand towards the city. 'Slander those who tell it ye! Admit it for\nyour factious purposes, and make it worse! And bide the end!'\n\n'Have they no refuge or resource?' cried Scrooge.\n\n'Are there no prisons?' said the Spirit, turning on him for the last\ntime with his own words. 'Are there no workhouses?'\n\nThe bell struck Twelve.\n\nScrooge looked about him for the Ghost, and saw it not. As the last\nstroke ceased to vibrate, he remembered the prediction of old Jacob\nMarley, and, lifting up his eyes, beheld a solemn Phantom, draped and\nhooded, coming like a mist along the ground towards him.\n\n\nSTAVE FOUR\n\n\n\n\nTHE LAST OF THE SPIRITS\n\n\nThe Phantom slowly, gravely, silently approached. When it came near him,\nScrooge bent down upon his knee; for in the very air through which this\nSpirit moved it seemed to scatter gloom and mystery.\n\nIt was shrouded in a deep black garment, which concealed its head, its\nface, its form, and left nothing of it visible, save one outstretched\nhand. But for this, it would have been difficult to detach its figure\nfrom the night, and separate it from the darkness by which it was\nsurrounded.\n\nHe felt that it was tall and stately when it came beside him, and that\nits mysterious presence filled him with a solemn dread. He knew no more,\nfor the Spirit neither spoke nor moved.\n\n'I am in the presence of the Ghost of Christmas Yet to Come?' said\nScrooge.\n\nThe Spirit answered not, but pointed onward with its hand.\n\n'You are about to show me shadows of the things that have not happened,\nbut will happen in the time before us,' Scrooge pursued. 'Is that so,\nSpirit?'\n\nThe upper portion of the garment was contracted for an instant in its\nfolds, as if the Spirit had inclined its head. That was the only answer\nhe received.\n\nAlthough well used to ghostly company by this time, Scrooge feared the\nsilent shape so much that his legs trembled beneath him, and he found\nthat he could hardly stand when he prepared to follow it. The Spirit\npaused a moment, as observing his condition, and giving him time to\nrecover.\n\nBut Scrooge was all the worse for this. It thrilled him with a vague,\nuncertain horror to know that, behind the dusky shroud, there were\nghostly eyes intently fixed upon him, while he, though he stretched his\nown to the utmost, could see nothing but a spectral hand and one great\nheap of black.\n\n'Ghost of the Future!' he exclaimed, 'I fear you more than any spectre\nI have seen. But as I know your purpose is to do me good, and as I hope\nto live to be another man from what I was, I am prepared to bear your\ncompany, and do it with a thankful heart. Will you not speak to me?'\n\nIt gave him no reply. The hand was pointed straight before them.\n\n'Lead on!' said Scrooge. 'Lead on! The night is waning fast, and it is\nprecious time to me, I know. Lead on, Spirit!'\n\nThe Phantom moved away as it had come towards him. Scrooge followed in\nthe shadow of its dress, which bore him up, he thought, and carried him\nalong.\n\nThey scarcely seemed to enter the City; for the City rather seemed to\nspring up about them, and encompass them of its own act. But there they\nwere in the heart of it; on 'Change, amongst the merchants, who hurried\nup and down, and chinked the money in their pockets, and conversed in\ngroups, and looked at their watches, and trifled thoughtfully with their\ngreat gold seals, and so forth, as Scrooge had seen them often.\n\nThe Spirit stopped beside one little knot of business men. Observing\nthat the hand was pointed to them, Scrooge advanced to listen to their\ntalk.\n\n'No,' said a great fat man with a monstrous chin, 'I don't know much\nabout it either way. I only know he's dead.'\n\n'When did he die?' inquired another.\n\n'Last night, I believe.'\n\n'Why, what was the matter with him?' asked a third, taking a vast\nquantity of snuff out of a very large snuff-box. 'I thought he'd never\ndie.'\n\n'God knows,' said the first, with a yawn.\n\n'What has he done with his money?' asked a red-faced gentleman with a\npendulous excrescence on the end of his nose, that shook like the gills\nof a turkey-cock.\n\n'I haven't heard,' said the man with the large chin, yawning again.\n'Left it to his company, perhaps. He hasn't left it to _me_. That's all\nI know.'\n\nThis pleasantry was received with a general",
+ "chunk_order_index": 28,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-f1a4fbcf1ed86864b9bfd5e8dba4c683": {
+ "tokens": 1200,
+ "content": ",' said the first, with a yawn.\n\n'What has he done with his money?' asked a red-faced gentleman with a\npendulous excrescence on the end of his nose, that shook like the gills\nof a turkey-cock.\n\n'I haven't heard,' said the man with the large chin, yawning again.\n'Left it to his company, perhaps. He hasn't left it to _me_. That's all\nI know.'\n\nThis pleasantry was received with a general laugh.\n\n'It's likely to be a very cheap funeral,' said the same speaker; 'for,\nupon my life, I don't know of anybody to go to it. Suppose we make up a\nparty, and volunteer?'\n\n'I don't mind going if a lunch is provided,' observed the gentleman with\nthe excrescence on his nose. 'But I must be fed if I make one.'\n\nAnother laugh.\n\n[Illustration:\n\n _\"How are you?\" said one.\n \"How are you?\" returned the other.\n \"Well!\" said the first. \"Old Scratch has got his own at last, hey?\"_\n\n]\n\n'Well, I am the most disinterested among you, after all,' said the first\nspeaker, 'for I never wear black gloves, and I never eat lunch. But I'll\noffer to go if anybody else will. When I come to think of it, I'm not\nat all sure that I wasn't his most particular friend; for we used to\nstop and speak whenever we met. Bye, bye!'\n\nSpeakers and listeners strolled away, and mixed with other groups.\nScrooge knew the men, and looked towards the Spirit for an explanation.\n\nThe phantom glided on into a street. Its finger pointed to two persons\nmeeting. Scrooge listened again, thinking that the explanation might lie\nhere.\n\nHe knew these men, also, perfectly. They were men of business: very\nwealthy, and of great importance. He had made a point always of standing\nwell in their esteem in a business point of view, that is; strictly in a\nbusiness point of view.\n\n'How are you?' said one.\n\n'How are you?' returned the other.\n\n'Well!' said the first, 'old Scratch has got his own at last, hey?'\n\n'So I am told,' returned the second. 'Cold, isn't it?'\n\n'Seasonable for Christmas-time. You are not a skater, I suppose?'\n\n'No, no. Something else to think of. Good-morning!'\n\nNot another word. That was their meeting, their conversation, and their\nparting.\n\nScrooge was at first inclined to be surprised that the Spirit should\nattach importance to conversations apparently so trivial; but feeling\nassured that they must have some hidden purpose, he set himself to\nconsider what it was likely to be. They could scarcely be supposed to\nhave any bearing on the death of Jacob, his old partner, for that was\nPast, and this Ghost's province was the Future. Nor could he think of\nany one immediately connected with himself to whom he could apply them.\nBut nothing doubting that, to whomsoever they applied, they had some\nlatent moral for his own improvement, he resolved to treasure up every\nword he heard, and everything he saw; and especially to observe the\nshadow of himself when it appeared. For he had an expectation that the\nconduct of his future self would give him the clue he missed, and would\nrender the solution of these riddles easy.\n\nHe looked about in that very place for his own image, but another man\nstood in his accustomed corner; and though the clock pointed to his\nusual time of day for being there, he saw no likeness of himself among\nthe multitudes that poured in through the Porch. It gave him little\nsurprise, however; for he had been revolving in his mind a change of\nlife, and thought and hoped he saw his new-born resolutions carried out\nin this.\n\nQuiet and dark, beside him stood the Phantom, with its outstretched\nhand. When he roused himself from his thoughtful quest, he fancied,\nfrom the turn of the hand, and its situation in reference to himself,\nthat the Unseen Eyes were looking at him keenly. It made him shudder,\nand feel very cold.\n\nThey left the busy scene, and went into an obscure part of the town,\nwhere Scrooge had never penetrated before, although he recognised its\nsituation and its bad repute. The ways were foul and narrow; the shop\nand houses wretched; the people half naked, drunken, slipshod, ugly.\nAlleys and archways, like so many cesspools, disgorged their offences of\nsmell and dirt, and life upon the straggling streets; and the whole\nquarter reeked with crime, with filth, and misery.\n\nFar in this den of infamous resort, there was a low-browed, beetling\nshop, below a penthouse roof, where iron, old rags, bottles, bones, and\ngreasy offal were bought. Upon the floor within were piled up heaps of\nrusty keys, nails, chains, hinges, files, scales, weights, and refuse\niron of all kinds. Secrets that few would like to scrutinise were bred\nand hidden in mountains of unseemly rags, masses of corrupted fat, and\nsepulchres of bones. Sitting in among the wares he dealt in, by a\ncharcoal stove made of old bricks, was a grey-haired rascal, nearly\nseventy years of age, who had screened himself from the cold air without\nby a frouzy curtaining of miscellaneous tatters hung upon a line and\nsmoked his pipe in all",
+ "chunk_order_index": 29,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-60fc063c79fc0d4acc91a57e79de04dc": {
+ "tokens": 1200,
+ "content": "to scrutinise were bred\nand hidden in mountains of unseemly rags, masses of corrupted fat, and\nsepulchres of bones. Sitting in among the wares he dealt in, by a\ncharcoal stove made of old bricks, was a grey-haired rascal, nearly\nseventy years of age, who had screened himself from the cold air without\nby a frouzy curtaining of miscellaneous tatters hung upon a line and\nsmoked his pipe in all the luxury of calm retirement.\n\nScrooge and the Phantom came into the presence of this man, just as a\nwoman with a heavy bundle slunk into the shop. But she had scarcely\nentered, when another woman, similarly laden, came in too; and she was\nclosely followed by a man in faded black, who was no less startled by\nthe sight of them than they had been upon the recognition of each other.\nAfter a short period of blank astonishment, in which the old man with\nthe pipe had joined them, they all three burst into a laugh.\n\n'Let the charwoman alone to be the first!' cried she who had entered\nfirst. 'Let the laundress alone to be the second; and let the\nundertaker's man alone to be the third. Look here, old Joe, here's a\nchance! If we haven't all three met here without meaning it!'\n\n'You couldn't have met in a better place,' said old Joe, removing his\npipe from his mouth. 'Come into the parlour. You were made free of it\nlong ago, you know; and the other two an't strangers. Stop till I shut\nthe door of the shop. Ah! how it skreeks! There an't such a rusty bit of\nmetal in the place as its own hinges, I believe; and I'm sure there's no\nsuch old bones here as mine. Ha! ha! We're all suitable to our calling,\nwe're well matched. Come into the parlour. Come into the parlour.'\n\nThe parlour was the space behind the screen of rags. The old man raked\nthe fire together with an old stair-rod, and having trimmed his smoky\nlamp (for it was night) with the stem of his pipe, put it into his mouth\nagain.\n\nWhile he did this, the woman who had already spoken threw her bundle on\nthe floor, and sat down in a flaunting manner on a stool, crossing her\nelbows on her knees, and looking with a bold defiance at the other two.\n\n'What odds, then? What odds, Mrs. Dilber?' said the woman. 'Every person\nhas a right to take care of themselves. _He_ always did!'\n\n'That's true, indeed!' said the laundress. 'No man more so.'\n\n'Why, then, don't stand staring as if you was afraid, woman! Who's the\nwiser? We're not going to pick holes in each other's coats, I suppose?'\n\n'No, indeed!' said Mrs. Dilber and the man together. 'We should hope\nnot.'\n\n'Very well then!' cried the woman. 'That's enough. Who's the worse for\nthe loss of a few things like these? Not a dead man, I suppose?'\n\n'No, indeed,' said Mrs. Dilber, laughing.\n\n'If he wanted to keep 'em after he was dead, a wicked old screw,'\npursued the woman, 'why wasn't he natural in his lifetime? If he had\nbeen, he'd have had somebody to look after him when he was struck with\nDeath, instead of lying gasping out his last there, alone by himself.'\n\n'It's the truest word that ever was spoke,' said Mrs. Dilber. 'It's a\njudgment on him.'\n\n'I wish it was a little heavier judgment,' replied the woman: 'and it\nshould have been, you may depend upon it, if I could have laid my hands\non anything else. Open that bundle, old Joe, and let me know the value\nof it. Speak out plain. I'm not afraid to be the first, nor afraid for\nthem to see it. We knew pretty well that we were helping ourselves\nbefore we met here, I believe. It's no sin. Open the bundle, Joe.'\n\nBut the gallantry of her friends would not allow of this; and the man in\nfaded black, mounting the breach first, produced _his_ plunder. It was\nnot extensive. A seal or two, a pencil-case, a pair of sleeve-buttons,\nand a brooch of no great value, were all. They were severally examined\nand appraised by old Joe, who chalked the sums he was disposed to give\nfor each upon the wall, and added them up into a total when he found\nthat there was nothing more to come.\n\n'That's your account,' said Joe, 'and I wouldn't give another sixpence,\nif I was to be boiled for not doing it. Who's next?'\n\n\n[Illustration: _\"What do you call this?\" said Joe. \"Bed-curtains.\"_]\n\nMrs. Dilber was next. Sheets and towels, a little wearing apparel, two\nold fashioned silver teaspoons, a pair of sugar-tongs, and a few\nboots. Her account was stated on the wall in the same manner.\n\n'I always give too much to ladies. It's a weakness of mine, and that's\nthe way I ruin myself,' said old Joe. 'That's your account. If you asked\nme for another penny, and made it an open question, I'd repent of being\nso liberal, and knock off half-a-crown.'\n\n'And now undo _my_ bundle, Joe,' said the first woman.\n\nJoe went down on his knees for the",
+ "chunk_order_index": 30,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-e1f29eeadc1d8ab46dc8a0bbd3c56b64": {
+ "tokens": 1200,
+ "content": "Her account was stated on the wall in the same manner.\n\n'I always give too much to ladies. It's a weakness of mine, and that's\nthe way I ruin myself,' said old Joe. 'That's your account. If you asked\nme for another penny, and made it an open question, I'd repent of being\nso liberal, and knock off half-a-crown.'\n\n'And now undo _my_ bundle, Joe,' said the first woman.\n\nJoe went down on his knees for the greater convenience of opening it,\nand, having unfastened a great many knots, dragged out a large heavy\nroll of some dark stuff.\n\n'What do you call this?' said Joe. 'Bed-curtains?'\n\n'Ah!' returned the woman, laughing and leaning forward on her crossed\narms. 'Bed-curtains!'\n\n'You don't mean to say you took 'em down, rings and all, with him lying\nthere?' said Joe.\n\n'Yes, I do,' replied the woman. 'Why not?'\n\n'You were born to make your fortune,' said Joe, 'and you'll certainly do\nit.'\n\n'I certainly shan't hold my hand, when I can get anything in it by\nreaching it out, for the sake of such a man as he was, I promise you,\nJoe,' returned the woman coolly. 'Don't drop that oil upon the blankets,\nnow.'\n\n'His blankets?' asked Joe.\n\n'Whose else's do you think?' replied the woman. 'He isn't likely to take\ncold without 'em, I dare say.'\n\n'I hope he didn't die of anything catching? Eh?' said old Joe, stopping\nin his work, and looking up.\n\n'Don't you be afraid of that,' returned the woman. 'I an't so fond of\nhis company that I'd loiter about him for such things, if he did. Ah!\nyou may look through that shirt till your eyes ache, but you won't find\na hole in it, nor a threadbare place. It's the best he had, and a fine\none too. They'd have wasted it, if it hadn't been for me.'\n\n'What do you call wasting of it?' asked old Joe.\n\n'Putting it on him to be buried in, to be sure,' replied the woman, with\na laugh. 'Somebody was fool enough to do it, but I took it off again. If\ncalico an't good enough for such a purpose, it isn't good enough for\nanything. It's quite as becoming to the body. He can't look uglier than\nhe did in that one.'\n\nScrooge listened to this dialogue in horror. As they sat grouped about\ntheir spoil, in the scanty light afforded by the old man's lamp, he\nviewed them with a detestation and disgust which could hardly have been\ngreater, though they had been obscene demons marketing the corpse\nitself.\n\n'Ha, ha!' laughed the same woman when old Joe producing a flannel bag\nwith money in it, told out their several gains upon the ground. 'This\nis the end of it, you see! He frightened every one away from him when he\nwas alive, to profit us when he was dead! Ha, ha, ha!'\n\n'Spirit!' said Scrooge, shuddering from head to foot. 'I see, I see. The\ncase of this unhappy man might be my own. My life tends that way now.\nMerciful heaven, what is this?'\n\nHe recoiled in terror, for the scene had changed, and now he almost\ntouched a bed--a bare, uncurtained bed--on which, beneath a ragged\nsheet, there lay a something covered up, which, though it was dumb,\nannounced itself in awful language.\n\nThe room was very dark, too dark to be observed with any accuracy,\nthough Scrooge glanced round it in obedience to a secret impulse,\nanxious to know what kind of room it was. A pale light, rising in the\nouter air, fell straight upon the bed; and on it, plundered and bereft,\nunwatched, unwept, uncared for, was the body of this man.\n\nScrooge glanced towards the Phantom. Its steady hand was pointed to the\nhead. The cover was so carelessly adjusted that the slightest raising of\nit, the motion of a finger upon Scrooge's part, would have disclosed the\nface. He thought of it, felt how easy it would be to do, and longed to\ndo it; but he had no more power to withdraw the veil than to dismiss the\nspectre at his side.\n\nOh, cold, cold, rigid, dreadful Death, set up thine altar here, and\ndress it with such terrors as thou hast at thy command; for this is thy\ndominion! But of the loved, revered, and honoured head thou canst not\nturn one hair to thy dread purposes, or make one feature odious. It is\nnot that the hand is heavy, and will fall down when released; it is not\nthat the heart and pulse are still; but that the hand was open,\ngenerous, and true; the heart brave, warm, and tender, and the pulse a\nman's. Strike, Shadow, strike! And see his good deeds springing from the\nwound, to sow the world with life immortal!\n\nNo voice pronounced these words in Scrooge's ears, and yet he heard them\nwhen he looked upon the bed. He thought, if this man could be raised up\nnow, what would be his foremost thoughts? Avarice, hard dealing, griping\ncares? They have brought him to a rich end, truly!\n\nHe lay in the dark, empty house, with not",
+ "chunk_order_index": 31,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-83ac129dc9b56f84c46760d153d68e93": {
+ "tokens": 1200,
+ "content": "see his good deeds springing from the\nwound, to sow the world with life immortal!\n\nNo voice pronounced these words in Scrooge's ears, and yet he heard them\nwhen he looked upon the bed. He thought, if this man could be raised up\nnow, what would be his foremost thoughts? Avarice, hard dealing, griping\ncares? They have brought him to a rich end, truly!\n\nHe lay in the dark, empty house, with not a man, a woman, or a child to\nsay he was kind to me in this or that, and for the memory of one kind\nword I will be kind to him. A cat was tearing at the door, and there was\na sound of gnawing rats beneath the hearthstone. What _they_ wanted in\nthe room of death, and why they were so restless and disturbed, Scrooge\ndid not dare to think.\n\n'Spirit!' he said, 'this is a fearful place. In leaving it, I shall not\nleave its lesson, trust me. Let us go!'\n\nStill the Ghost pointed with an unmoved finger to the head.\n\n'I understand you,' Scrooge returned, 'and I would do it if I could. But\nI have not the power, Spirit. I have not the power.'\n\nAgain it seemed to look upon him.\n\n'If there is any person in the town who feels emotion caused by this\nman's death,' said Scrooge, quite agonised, 'show that person to me,\nSpirit, I beseech you!'\n\nThe Phantom spread its dark robe before him for a moment, like a wing;\nand, withdrawing it, revealed a room by daylight, where a mother and her\nchildren were.\n\nShe was expecting some one, and with anxious eagerness; for she walked\nup and down the room, started at every sound, looked out from the\nwindow, glanced at the clock, tried, but in vain, to work with her\nneedle, and could hardly bear the voices of her children in their play.\n\nAt length the long-expected knock was heard. She hurried to the door,\nand met her husband; a man whose face was careworn and depressed, though\nhe was young. There was a remarkable expression in it now, a kind of\nserious delight of which he felt ashamed, and which he struggled to\nrepress.\n\nHe sat down to the dinner that had been hoarding for him by the fire,\nand when she asked him faintly what news (which was not until after a\nlong silence), he appeared embarrassed how to answer.\n\n'Is it good,' she said, 'or bad?' to help him.\n\n'Bad,' he answered.\n\n'We are quite ruined?'\n\n'No. There is hope yet, Caroline.'\n\n'If _he_ relents,' she said, amazed, 'there is! Nothing is past hope, if\nsuch a miracle has happened.'\n\n'He is past relenting,' said her husband. 'He is dead.'\n\nShe was a mild and patient creature, if her face spoke truth; but she\nwas thankful in her soul to hear it, and she said so with clasped hands.\nShe prayed forgiveness the next moment, and was sorry; but the first was\nthe emotion of her heart.\n\n'What the half-drunken woman, whom I told you of last night, said to me\nwhen I tried to see him and obtain a week's delay--and what I thought\nwas a mere excuse to avoid me--turns out to have been quite true. He was\nnot only very ill, but dying, then.'\n\n'To whom will our debt be transferred?'\n\n'I don't know. But, before that time, we shall be ready with the money;\nand even though we were not, it would be bad fortune indeed to find so\nmerciless a creditor in his successor. We may sleep to-night with light\nhearts, Caroline!'\n\nYes. Soften it as they would, their hearts were lighter. The children's\nfaces, hushed and clustered round to hear what they so little\nunderstood, were brighter; and it was a happier house for this man's\ndeath! The only emotion that the Ghost could show him, caused by the\nevent, was one of pleasure.\n\n'Let me see some tenderness connected with a death,' said Scrooge; 'or\nthat dark chamber, Spirit, which we left just now, will be for ever\npresent to me.'\n\nThe Ghost conducted him through several streets familiar to his feet;\nand as they went along, Scrooge looked here and there to find himself,\nbut nowhere was he to be seen. They entered poor Bob Cratchit's house;\nthe dwelling he had visited before; and found the mother and the\nchildren seated round the fire.\n\nQuiet. Very quiet. The noisy little Cratchits were as still as statues\nin one corner, and sat looking up at Peter, who had a book before him.\nThe mother and her daughters were engaged in sewing. But surely they\nwere very quiet!\n\n'\"And he took a child, and set him in the midst of them.\"'\n\nWhere had Scrooge heard those words? He had not dreamed them. The boy\nmust have read them out as he and the Spirit crossed the threshold. Why\ndid he not go on?\n\nThe mother laid her work upon the table, and put her hand up to her\nface.\n\n'The colour hurts my eyes,' she said.\n\nThe colour? Ah, poor Tiny Tim!\n\n'They're better now again,' said Cratchit's wife. 'It makes them weak by\ncandle-light; and I wouldn't show weak eyes to your father when he comes\nhome for the world. It must be near his time.'\n\n'Past it rather,' Peter answered, shutting up his",
+ "chunk_order_index": 32,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-1d4b58de5429cd1261370c231c8673e8": {
+ "tokens": 1200,
+ "content": "go on?\n\nThe mother laid her work upon the table, and put her hand up to her\nface.\n\n'The colour hurts my eyes,' she said.\n\nThe colour? Ah, poor Tiny Tim!\n\n'They're better now again,' said Cratchit's wife. 'It makes them weak by\ncandle-light; and I wouldn't show weak eyes to your father when he comes\nhome for the world. It must be near his time.'\n\n'Past it rather,' Peter answered, shutting up his book. 'But I think he\nhas walked a little slower than he used, these few last evenings,\nmother.'\n\nThey were very quiet again. At last she said, and in a steady, cheerful\nvoice, that only faltered once:\n\n'I have known him walk with--I have known him walk with Tiny Tim upon\nhis shoulder very fast indeed.'\n\n'And so have I,' cried Peter. 'Often.'\n\n'And so have I,' exclaimed another. So had all.\n\n'But he was very light to carry,' she resumed, intent upon her work,\n'and his father loved him so, that it was no trouble, no trouble. And\nthere is your father at the door!'\n\nShe hurried out to meet him; and little Bob in his comforter--he had\nneed of it, poor fellow--came in. His tea was ready for him on the hob,\nand they all tried who should help him to it most. Then the two young\nCratchits got upon his knees, and laid, each child, a little cheek\nagainst his face, as if they said, 'Don't mind it, father. Don't be\ngrieved!'\n\nBob was very cheerful with them, and spoke pleasantly to all the family.\nHe looked at the work upon the table, and praised the industry and speed\nof Mrs. Cratchit and the girls. They would be done long before Sunday,\nhe said.\n\n'Sunday! You went to-day, then, Robert?' said his wife.\n\n'Yes, my dear,' returned Bob. 'I wish you could have gone. It would have\ndone you good to see how green a place it is. But you'll see it often. I\npromised him that I would walk there on a Sunday. My little, little\nchild!' cried Bob. 'My little child!'\n\nHe broke down all at once. He couldn't help it. If he could have helped\nit, he and his child would have been farther apart, perhaps, than they\nwere.\n\nHe left the room, and went upstairs into the room above, which was\nlighted cheerfully, and hung with Christmas. There was a chair set close\nbeside the child, and there were signs of some one having been there\nlately. Poor Bob sat down in it, and when he had thought a little and\ncomposed himself, he kissed the little face. He was reconciled to what\nhad happened, and went down again quite happy.\n\nThey drew about the fire, and talked, the girls and mother working\nstill. Bob told them of the extraordinary kindness of Mr. Scrooge's\nnephew, whom he had scarcely seen but once, and who, meeting him in the\nstreet that day, and seeing that he looked a little--'just a little\ndown, you know,' said Bob, inquired what had happened to distress him.\n'On which,' said Bob, 'for he is the pleasantest-spoken gentleman you\never heard, I told him. \"I am heartily sorry for it, Mr. Cratchit,\" he\nsaid, \"and heartily sorry for your good wife.\" By-the-bye, how he ever\nknew _that_ I don't know.'\n\n'Knew what, my dear?'\n\n'Why, that you were a good wife,' replied Bob.\n\n'Everybody knows that,' said Peter.\n\n'Very well observed, my boy!' cried Bob. 'I hope they do. \"Heartily\nsorry,\" he said, \"for your good wife. If I can be of service to you in\nany way,\" he said, giving me his card, \"that's where I live. Pray come\nto me.\" Now, it wasn't,' cried Bob, 'for the sake of anything he might\nbe able to do for us, so much as for his kind way, that this was quite\ndelightful. It really seemed as if he had known our Tiny Tim, and felt\nwith us.'\n\n'I'm sure he's a good soul!' said Mrs. Cratchit.\n\n'You would be sure of it, my dear,' returned Bob, 'if you saw and spoke\nto him. I shouldn't be at all surprised--mark what I say!--if he got\nPeter a better situation.'\n\n'Only hear that, Peter,' said Mrs. Cratchit.\n\n'And then,' cried one of the girls, 'Peter will be keeping company with\nsome one, and setting up for himself.'\n\n'Get along with you!' retorted Peter, grinning.\n\n'It's just as likely as not,' said Bob, 'one of these days; though\nthere's plenty of time for that, my dear. But, however and whenever we\npart from one another, I am sure we shall none of us forget poor Tiny\nTim--shall we--or this first parting that there was among us?'\n\n'Never, father!' cried they all.\n\n'And I know,' said Bob, 'I know, my dears, that when we recollect how\npatient and how mild he was; although he was a little, little child; we\nshall not quarrel easily among ourselves, and forget poor Tiny Tim in\ndoing it.'\n\n'No, never, father!' they all cried again.\n\n'I am very happy,' said little Bob, 'I am very happy",
+ "chunk_order_index": 33,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-91572b3cd42786ba4cb2f180ca25cbf4": {
+ "tokens": 1200,
+ "content": "there was among us?'\n\n'Never, father!' cried they all.\n\n'And I know,' said Bob, 'I know, my dears, that when we recollect how\npatient and how mild he was; although he was a little, little child; we\nshall not quarrel easily among ourselves, and forget poor Tiny Tim in\ndoing it.'\n\n'No, never, father!' they all cried again.\n\n'I am very happy,' said little Bob, 'I am very happy!'\n\nMrs. Cratchit kissed him, his daughters kissed him, the two young\nCratchits kissed him, and Peter and himself shook hands. Spirit of Tiny\nTim, thy childish essence was from God!\n\n'Spectre,' said Scrooge, 'something informs me that our parting moment\nis at hand. I know it but I know not how. Tell me what man that was whom\nwe saw lying dead?'\n\nThe Ghost of Christmas Yet to Come conveyed him, as before--though at a\ndifferent time, he thought: indeed there seemed no order in these latter\nvisions, save that they were in the Future--into the resorts of business\nmen, but showed him not himself. Indeed, the Spirit did not stay for\nanything, but went straight on, as to the end just now desired, until\nbesought by Scrooge to tarry for a moment.\n\n'This court,' said Scrooge, 'through which we hurry now, is where my\nplace of occupation is, and has been for a length of time. I see the\nhouse. Let me behold what I shall be in days to come.'\n\nThe Spirit stopped; the hand was pointed elsewhere.\n\n'The house is yonder,' Scrooge exclaimed. 'Why do you point away?'\n\nThe inexorable finger underwent no change.\n\nScrooge hastened to the window of his office, and looked in. It was an\noffice still, but not his. The furniture was not the same, and the\nfigure in the chair was not himself. The Phantom pointed as before.\n\nHe joined it once again, and, wondering why and whither he had gone,\naccompanied it until they reached an iron gate. He paused to look round\nbefore entering.\n\nA churchyard. Here, then, the wretched man, whose name he had now to\nlearn, lay underneath the ground. It was a worthy place. Walled in by\nhouses; overrun by grass and weeds, the growth of vegetation's death,\nnot life; choked up with too much burying; fat with repleted appetite. A\nworthy place!\n\nThe Spirit stood among the graves, and pointed down to One. He advanced\ntowards it trembling. The Phantom was exactly as it had been, but he\ndreaded that he saw new meaning in its solemn shape.\n\n'Before I draw nearer to that stone to which you point,' said Scrooge,\n'answer me one question. Are these the shadows of the things that Will\nbe, or are they shadows of the things that May be only?'\n\nStill the Ghost pointed downward to the grave by which it stood.\n\n'Men's courses will foreshadow certain ends, to which, if persevered in,\nthey must lead,' said Scrooge. 'But if the courses be departed from, the\nends will change. Say it is thus with what you show me!'\n\nThe Spirit was immovable as ever.\n\nScrooge crept towards it, trembling as he went; and, following the\nfinger, read upon the stone of the neglected grave his own name,\nEBENEZER SCROOGE.\n\n'Am I that man who lay upon the bed?' he cried upon his knees.\n\nThe finger pointed from the grave to him, and back again.\n\n'No, Spirit! Oh no, no!'\n\nThe finger still was there.\n\n'Spirit!' he cried, tight clutching at its robe, 'hear me! I am not the\nman I was. I will not be the man I must have been but for this\nintercourse. Why show me this, if I am past all hope?'\n\nFor the first time the hand appeared to shake.\n\n'Good Spirit,' he pursued, as down upon the ground he fell before it,\n'your nature intercedes for me, and pities me. Assure me that I yet may\nchange these shadows you have shown me by an altered life?'\n\nThe kind hand trembled.\n\n'I will honour Christmas in my heart, and try to keep it all the year. I\nwill live in the Past, the Present, and the Future. The Spirits of all\nThree shall strive within me. I will not shut out the lessons that they\nteach. Oh, tell me I may sponge away the writing on this stone!'\n\nIn his agony he caught the spectral hand. It sought to free itself, but\nhe was strong in his entreaty, and detained it. The Spirit stronger yet,\nrepulsed him.\n\nHolding up his hands in a last prayer to have his fate reversed, he saw\nan alteration in the Phantom's hood and dress. It shrunk, collapsed, and\ndwindled down into a bedpost.\n\n\nSTAVE FIVE\n\n\n[Illustration]\n\n\n\n\nTHE END OF IT\n\n\nYes! and the bedpost was his own. The bed was his own, the room was his\nown. Best and happiest of all, the Time before him was his own, to make\namends in!\n\n'I will live in the Past, the Present, and the Future!' Scrooge repeated\nas he scrambled out of bed. 'The Spirits of all Three shall strive\nwithin me. O Jacob Marley! Heaven and the Christmas Time be praised for\nthis! I say it on my knees, old Jacob; on my knees!'",
+ "chunk_order_index": 34,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-90b95db5c53e4364a6bee36b7aa4d70f": {
+ "tokens": 1200,
+ "content": "was his own, the room was his\nown. Best and happiest of all, the Time before him was his own, to make\namends in!\n\n'I will live in the Past, the Present, and the Future!' Scrooge repeated\nas he scrambled out of bed. 'The Spirits of all Three shall strive\nwithin me. O Jacob Marley! Heaven and the Christmas Time be praised for\nthis! I say it on my knees, old Jacob; on my knees!'\n\nHe was so fluttered and so glowing with his good intentions, that his\nbroken voice would scarcely answer to his call. He had been sobbing\nviolently in his conflict with the Spirit, and his face was wet with\ntears.\n\n'They are not torn down,' cried Scrooge, folding one of his bed-curtains\nin his arms, 'They are not torn down, rings and all. They are here--I am\nhere--the shadows of the things that would have been may be dispelled.\nThey will be. I know they will!'\n\nHis hands were busy with his garments all this time: turning them inside\nout, putting them on upside down, tearing them, mislaying them, making\nthem parties to every kind of extravagance.\n\n'I don't know what to do!' cried Scrooge, laughing and crying in the\nsame breath, and making a perfect Laocoon of himself with his stockings.\n'I am as light as a feather, I am as happy as an angel, I am as merry as\na schoolboy, I am as giddy as a drunken man. A merry Christmas to\neverybody! A happy New Year to all the world! Hallo here! Whoop! Hallo!'\n\nHe had frisked into the sitting-room, and was now standing there,\nperfectly winded.\n\n'There's the saucepan that the gruel was in!' cried Scrooge, starting\noff again, and going round the fireplace. 'There's the door by which the\nGhost of Jacob Marley entered! There's the corner where the Ghost of\nChristmas Present sat! There's the window where I saw the wandering\nSpirits! It's all right, it's all true, it all happened. Ha, ha, ha!'\n\nReally, for a man who had been out of practice for so many years, it was\na splendid laugh, a most illustrious laugh. The father of a long, long\nline of brilliant laughs!\n\n'I don't know what day of the month it is,' said Scrooge. 'I don't know\nhow long I have been among the Spirits. I don't know anything. I'm quite\na baby. Never mind. I don't care. I'd rather be a baby. Hallo! Whoop!\nHallo here!'\n\nHe was checked in his transports by the churches ringing out the\nlustiest peals he had ever heard. Clash, clash, hammer; ding, dong,\nbell! Bell, dong, ding; hammer, clash, clash! Oh, glorious, glorious!\n\nRunning to the window, he opened it, and put out his head. No fog, no\nmist; clear, bright, jovial, stirring, cold; cold, piping for the blood\nto dance to; golden sunlight; heavenly sky; sweet fresh air; merry\nbells. Oh, glorious! Glorious!\n\n'What's to-day?' cried Scrooge, calling downward to a boy in Sunday\nclothes, who perhaps had loitered in to look about him.\n\n'EH?' returned the boy with all his might of wonder.\n\n'What's to-day, my fine fellow?' said Scrooge.\n\n'To-day!' replied the boy. 'Why, CHRISTMAS DAY.'\n\n'It's Christmas Day!' said Scrooge to himself. 'I haven't missed it. The\nSpirits have done it all in one night. They can do anything they like.\nOf course they can. Of course they can. Hallo, my fine fellow!'\n\n'Hallo!' returned the boy.\n\n'Do you know the poulterer's in the next street but one, at the corner?'\nScrooge inquired.\n\n'I should hope I did,' replied the lad.\n\n'An intelligent boy!' said Scrooge. 'A remarkable boy! Do you know\nwhether they've sold the prize turkey that was hanging up there?--Not\nthe little prize turkey: the big one?'\n\n'What! the one as big as me?' returned the boy.\n\n'What a delightful boy!' said Scrooge. 'It's a pleasure to talk to him.\nYes, my buck!'\n\n'It's hanging there now,' replied the boy.\n\n'Is it?' said Scrooge. 'Go and buy it.'\n\n'Walk-ER!' exclaimed the boy.\n\n'No, no,' said Scrooge. 'I am in earnest. Go and buy it, and tell 'em to\nbring it here, that I may give them the directions where to take it.\nCome back with the man, and I'll give you a shilling. Come back with him\nin less than five minutes, and I'll give you half-a-crown!'\n\nThe boy was off like a shot. He must have had a steady hand at a trigger\nwho could have got a shot off half as fast.\n\n'I'll send it to Bob Cratchit's,' whispered Scrooge, rubbing his hands,\nand splitting with a laugh. 'He shan't know who sends it. It's twice the\nsize of Tiny Tim. Joe Miller never made such a joke as sending it to\nBob's will be!'\n\nThe hand in which he wrote the address was not a steady one; but write\nit he did, somehow, and went downstairs to open the street-door, ready\nfor the coming of the poulterer's man. As he stood there, waiting his\narrival, the",
+ "chunk_order_index": 35,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-8f738cb25d60a7dab782162d9d0a25ed": {
+ "tokens": 1200,
+ "content": "hands,\nand splitting with a laugh. 'He shan't know who sends it. It's twice the\nsize of Tiny Tim. Joe Miller never made such a joke as sending it to\nBob's will be!'\n\nThe hand in which he wrote the address was not a steady one; but write\nit he did, somehow, and went downstairs to open the street-door, ready\nfor the coming of the poulterer's man. As he stood there, waiting his\narrival, the knocker caught his eye.\n\n'I shall love it as long as I live!' cried Scrooge, patting it with his\nhand. 'I scarcely ever looked at it before. What an honest expression it\nhas in its face! It's a wonderful knocker!--Here's the turkey. Hallo!\nWhoop! How are you! Merry Christmas!'\n\nIt _was_ a turkey! He never could have stood upon his legs, that bird.\nHe would have snapped 'em short off in a minute, like sticks of\nsealing-wax.\n\n'Why, it's impossible to carry that to Camden Town,' said Scrooge. 'You\nmust have a cab.'\n\nThe chuckle with which he said this, and the chuckle with which he paid\nfor the turkey, and the chuckle with which he paid for the cab, and the\nchuckle with which he recompensed the boy, were only to be exceeded by\nthe chuckle with which he sat down breathless in his chair again, and\nchuckled till he cried.\n\nShaving was not an easy task, for his hand continued to shake very much;\nand shaving requires attention, even when you don't dance while you are\nat it. But if he had cut the end of his nose off, he would have put a\npiece of sticking-plaster over it, and been quite satisfied.\n\nHe dressed himself 'all in his best,' and at last got out into the\nstreets. The people were by this time pouring forth, as he had seen them\nwith the Ghost of Christmas Present; and, walking with his hands behind\nhim, Scrooge regarded every one with a delighted smile. He looked so\nirresistibly pleasant, in a word, that three or four good-humoured\nfellows said, 'Good-morning, sir! A merry Christmas to you!' And Scrooge\nsaid often afterwards that, of all the blithe sounds he had ever heard,\nthose were the blithest in his ears.\n\nHe had not gone far when, coming on towards him, he beheld the portly\ngentleman who had walked into his counting-house the day before, and\nsaid, 'Scrooge and Marley's, I believe?' It sent a pang across his heart\nto think how this old gentleman would look upon him when they met; but\nhe knew what path lay straight before him, and he took it.\n\n'My dear sir,' said Scrooge, quickening his pace, and taking the old\ngentleman by both his hands, 'how do you do? I hope you succeeded\nyesterday. It was very kind of you. A merry Christmas to you, sir!'\n\n'Mr. Scrooge?'\n\n'Yes,' said Scrooge. 'That is my name, and I fear it may not be pleasant\nto you. Allow me to ask your pardon. And will you have the goodness----'\nHere Scrooge whispered in his ear.\n\n'Lord bless me!' cried the gentleman, as if his breath were taken away.\n'My dear Mr. Scrooge, are you serious?'\n\n'If you please,' said Scrooge. 'Not a farthing less. A great many\nback-payments are included in it, I assure you. Will you do me that\nfavour?'\n\n'My dear sir,' said the other, shaking hands with him, 'I don't know\nwhat to say to such munifi----'\n\n'Don't say anything, please,' retorted Scrooge. 'Come and see me. Will\nyou come and see me?'\n\n'I will!' cried the old gentleman. And it was clear he meant to do it.\n\n'Thankee,' said Scrooge. 'I am much obliged to you. I thank you fifty\ntimes. Bless you!'\n\nHe went to church, and walked about the streets, and watched the people\nhurrying to and fro, and patted the children on the head, and questioned\nbeggars, and looked down into the kitchens of houses, and up to the\nwindows; and found that everything could yield him pleasure. He had\nnever dreamed that any walk--that anything--could give him so much\nhappiness. In the afternoon he turned his steps towards his nephew's\nhouse.\n\nHe passed the door a dozen times before he had the courage to go up and\nknock. But he made a dash and did it.\n\n'Is your master at home, my dear?' said Scrooge to the girl. 'Nice girl!\nVery.'\n\n'Yes, sir.'\n\n'Where is he, my love?' said Scrooge.\n\n'He's in the dining-room, sir, along with mistress. I'll show you\nupstairs, if you please.'\n\n'Thankee. He knows me,' said Scrooge, with his hand already on the\ndining-room lock. 'I'll go in here, my dear.'\n\nHe turned it gently, and sidled his face in round the door. They were\nlooking at the table (which was spread out in great array); for these\nyoung housekeepers are always nervous on such points, and like to see\nthat everything is right.\n\n'Fred!' said Scrooge.\n\nDear heart alive, how his niece by marriage started! Scrooge had\nforgotten,",
+ "chunk_order_index": 36,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-94bea83e7153fc2064aa382b494936a1": {
+ "tokens": 1200,
+ "content": "hand already on the\ndining-room lock. 'I'll go in here, my dear.'\n\nHe turned it gently, and sidled his face in round the door. They were\nlooking at the table (which was spread out in great array); for these\nyoung housekeepers are always nervous on such points, and like to see\nthat everything is right.\n\n'Fred!' said Scrooge.\n\nDear heart alive, how his niece by marriage started! Scrooge had\nforgotten, for the moment, about her sitting in the corner with the\nfootstool, or he wouldn't have done it on any account.\n\n'Why, bless my soul!' cried Fred, 'who's that?'\n\n[Illustration: _\"It's I, your uncle Scrooge. I have come to dinner. Will\nyou let me in, Fred?\"_]\n\n'It's I. Your uncle Scrooge. I have come to dinner. Will you let me in,\nFred?'\n\nLet him in! It is a mercy he didn't shake his arm off. He was at home in\nfive minutes. Nothing could be heartier. His niece looked just the same.\nSo did Topper when _he_ came. So did the plump sister when _she_ came.\nSo did every one when _they_ came. Wonderful party, wonderful games,\nwonderful unanimity, won-der-ful happiness!\n\nBut he was early at the office next morning. Oh, he was early there! If\nhe could only be there first, and catch Bob Cratchit coming late! That\nwas the thing he had set his heart upon.\n\nAnd he did it; yes, he did! The clock struck nine. No Bob. A quarter\npast. No Bob. He was full eighteen minutes and a half behind his time.\nScrooge sat with his door wide open, that he might see him come into the\ntank.\n\nHis hat was off before he opened the door; his comforter too. He was on\nhis stool in a jiffy, driving away with his pen, as if he were trying to\novertake nine o'clock.\n\n'Hallo!' growled Scrooge in his accustomed voice as near as he could\nfeign it. 'What do you mean by coming here at this time of day?'\n\n'I am very sorry, sir,' said Bob. 'I _am_ behind my time.'\n\n'You are!' repeated Scrooge. 'Yes, I think you are. Step this way, sir,\nif you please.'\n\n'It's only once a year, sir,' pleaded Bob, appearing from the tank. 'It\nshall not be repeated. I was making rather merry yesterday, sir.'\n\n'Now, I'll tell you what, my friend,' said Scrooge. 'I am not going to\nstand this sort of thing any longer. And therefore,' he continued,\nleaping from his stool, and giving Bob such a dig in the waistcoat that\nhe staggered back into the tank again--'and therefore I am about to\nraise your salary!'\n\nBob trembled, and got a little nearer to the ruler. He had a momentary\nidea of knocking Scrooge down with it, holding him, and calling to the\npeople in the court for help and a strait-waistcoat.\n\n'A merry Christmas, Bob!' said Scrooge, with an earnestness that could\nnot be mistaken, as he clapped him on the back. 'A merrier Christmas,\nBob, my good fellow, than I have given you for many a year! I'll raise\nyour salary, and endeavour to assist your struggling family, and we will\ndiscuss your affairs this very afternoon, over a Christmas bowl of\nsmoking bishop, Bob! Make up the fires and buy another coal-scuttle\nbefore you dot another i, Bob Cratchit!'\n\n[Illustration: _\"Now, I'll tell you what, my friend,\" said Scrooge. \"I\nam not going to stand this sort of thing any longer.\"_]\n\nScrooge was better than his word. He did it all, and infinitely more;\nand to Tiny Tim, who did NOT die, he was a second father. He became as\ngood a friend, as good a master, and as good a man as the good old\nCity knew, or any other good old city, town, or borough in the good old\nworld. Some people laughed to see the alteration in him, but he let them\nlaugh, and little heeded them; for he was wise enough to know that\nnothing ever happened on this globe, for good, at which some people did\nnot have their fill of laughter in the outset; and knowing that such as\nthese would be blind anyway, he thought it quite as well that they\nshould wrinkle up their eyes in grins as have the malady in less\nattractive forms. His own heart laughed, and that was quite enough for\nhim.\n\nHe had no further intercourse with Spirits, but lived upon the\nTotal-Abstinence Principle ever afterwards; and it was always said of\nhim that he knew how to keep Christmas well, if any man alive possessed\nthe knowledge. May that be truly said of us, and all of us! And so, as\nTiny Tim observed, God bless Us, Every One!\n\n[Illustration]\n\n+---------------------------------------------------------------+\n|Transcriber's note: The Contents were added by the transcriber.|\n+---------------------------------------------------------------+\n\n\n\n\n\n\n\n*** END OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n \n\nUpdated editions will replace the previous one—the old editions will\nbe renamed.\n\nCreating the works from print editions not protected by U.S. copyright\nlaw means that no one owns a United States copyright in these works,\nso the Foundation (and you",
+ "chunk_order_index": 37,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-194cd9680f4967f48bea89678f7ece21": {
+ "tokens": 1200,
+ "content": "One!\n\n[Illustration]\n\n+---------------------------------------------------------------+\n|Transcriber's note: The Contents were added by the transcriber.|\n+---------------------------------------------------------------+\n\n\n\n\n\n\n\n*** END OF THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL ***\n\n\n \n\nUpdated editions will replace the previous one—the old editions will\nbe renamed.\n\nCreating the works from print editions not protected by U.S. copyright\nlaw means that no one owns a United States copyright in these works,\nso the Foundation (and you!) can copy and distribute it in the United\nStates without permission and without paying copyright\nroyalties. Special rules, set forth in the General Terms of Use part\nof this license, apply to copying and distributing Project\nGutenberg™ electronic works to protect the PROJECT GUTENBERG™\nconcept and trademark. Project Gutenberg is a registered trademark,\nand may not be used if you charge for an eBook, except by following\nthe terms of the trademark license, including paying royalties for use\nof the Project Gutenberg trademark. If you do not charge anything for\ncopies of this eBook, complying with the trademark license is very\neasy. You may use this eBook for nearly any purpose such as creation\nof derivative works, reports, performances and research. Project\nGutenberg eBooks may be modified and printed and given away—you may\ndo practically ANYTHING in the United States with eBooks not protected\nby U.S. copyright law. Redistribution is subject to the trademark\nlicense, especially commercial redistribution.\n\n\nSTART: FULL LICENSE\n\nTHE FULL PROJECT GUTENBERG LICENSE\n\nPLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK\n\nTo protect the Project Gutenberg™ mission of promoting the free\ndistribution of electronic works, by using or distributing this work\n(or any other work associated in any way with the phrase “Project\nGutenberg”), you agree to comply with all the terms of the Full\nProject Gutenberg™ License available with this file or online at\nwww.gutenberg.org/license.\n\nSection 1. General Terms of Use and Redistributing Project Gutenberg™\nelectronic works\n\n1.A. By reading or using any part of this Project Gutenberg™\nelectronic work, you indicate that you have read, understand, agree to\nand accept all the terms of this license and intellectual property\n(trademark/copyright) agreement. If you do not agree to abide by all\nthe terms of this agreement, you must cease using and return or\ndestroy all copies of Project Gutenberg™ electronic works in your\npossession. If you paid a fee for obtaining a copy of or access to a\nProject Gutenberg™ electronic work and you do not agree to be bound\nby the terms of this agreement, you may obtain a refund from the person\nor entity to whom you paid the fee as set forth in paragraph 1.E.8.\n\n1.B. “Project Gutenberg” is a registered trademark. It may only be\nused on or associated in any way with an electronic work by people who\nagree to be bound by the terms of this agreement. There are a few\nthings that you can do with most Project Gutenberg™ electronic works\neven without complying with the full terms of this agreement. See\nparagraph 1.C below. There are a lot of things you can do with Project\nGutenberg™ electronic works if you follow the terms of this\nagreement and help preserve free future access to Project Gutenberg™\nelectronic works. See paragraph 1.E below.\n\n1.C. The Project Gutenberg Literary Archive Foundation (“the\nFoundation” or PGLAF), owns a compilation copyright in the collection\nof Project Gutenberg™ electronic works. Nearly all the individual\nworks in the collection are in the public domain in the United\nStates. If an individual work is unprotected by copyright law in the\nUnited States and you are located in the United States, we do not\nclaim a right to prevent you from copying, distributing, performing,\ndisplaying or creating derivative works based on the work as long as\nall references to Project Gutenberg are removed. Of course, we hope\nthat you will support the Project Gutenberg™ mission of promoting\nfree access to electronic works by freely sharing Project Gutenberg™\nworks in compliance with the terms of this agreement for keeping the\nProject Gutenberg™ name associated with the work. You can easily\ncomply with the terms of this agreement by keeping this work in the\nsame format with its attached full Project Gutenberg™ License when\nyou share it without charge with others.\n\n1.D. The copyright laws of the place where you are located also govern\nwhat you can do with this work. Copyright laws in most countries are\nin a constant state of change. If you are outside the United States,\ncheck the laws of your country in addition to the terms of this\nagreement before downloading, copying, displaying, performing,\ndistributing or creating derivative works based on this work or any\nother Project Gutenberg™ work. The Foundation makes no\nrepresentations concerning the copyright status of any work in any\ncountry other than the United States.\n\n1.E. Unless you have removed all references to Project Gutenberg:\n\n1.E.1. The following sentence, with active links to, or other\nimmediate access to, the full Project Gutenberg™ License must appear\nprominently whenever any copy of a Project Gutenberg™ work (any work\non which the phrase “Project Gutenberg” appears, or with which the\nphrase “Project Gutenberg” is associated) is accessed, displayed,\nperformed, viewed, copied or distributed:\n\n This eBook is for the use of anyone anywhere in the United States and most\n other parts of the world at no cost and with almost no restrictions\n whatsoever. You may copy it, give it away or re-use it under the terms\n of the Project Gutenberg License included with this eBook or online\n at www.gutenberg.org",
+ "chunk_order_index": 38,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-af4ae173c561e4594c5aea96198cce9d": {
+ "tokens": 1200,
+ "content": ", or with which the\nphrase “Project Gutenberg” is associated) is accessed, displayed,\nperformed, viewed, copied or distributed:\n\n This eBook is for the use of anyone anywhere in the United States and most\n other parts of the world at no cost and with almost no restrictions\n whatsoever. You may copy it, give it away or re-use it under the terms\n of the Project Gutenberg License included with this eBook or online\n at www.gutenberg.org. If you\n are not located in the United States, you will have to check the laws\n of the country where you are located before using this eBook.\n \n1.E.2. If an individual Project Gutenberg™ electronic work is\nderived from texts not protected by U.S. copyright law (does not\ncontain a notice indicating that it is posted with permission of the\ncopyright holder), the work can be copied and distributed to anyone in\nthe United States without paying any fees or charges. If you are\nredistributing or providing access to a work with the phrase “Project\nGutenberg” associated with or appearing on the work, you must comply\neither with the requirements of paragraphs 1.E.1 through 1.E.7 or\nobtain permission for the use of the work and the Project Gutenberg™\ntrademark as set forth in paragraphs 1.E.8 or 1.E.9.\n\n1.E.3. If an individual Project Gutenberg™ electronic work is posted\nwith the permission of the copyright holder, your use and distribution\nmust comply with both paragraphs 1.E.1 through 1.E.7 and any\nadditional terms imposed by the copyright holder. Additional terms\nwill be linked to the Project Gutenberg™ License for all works\nposted with the permission of the copyright holder found at the\nbeginning of this work.\n\n1.E.4. Do not unlink or detach or remove the full Project Gutenberg™\nLicense terms from this work, or any files containing a part of this\nwork or any other work associated with Project Gutenberg™.\n\n1.E.5. Do not copy, display, perform, distribute or redistribute this\nelectronic work, or any part of this electronic work, without\nprominently displaying the sentence set forth in paragraph 1.E.1 with\nactive links or immediate access to the full terms of the Project\nGutenberg™ License.\n\n1.E.6. You may convert to and distribute this work in any binary,\ncompressed, marked up, nonproprietary or proprietary form, including\nany word processing or hypertext form. However, if you provide access\nto or distribute copies of a Project Gutenberg™ work in a format\nother than “Plain Vanilla ASCII” or other format used in the official\nversion posted on the official Project Gutenberg™ website\n(www.gutenberg.org), you must, at no additional cost, fee or expense\nto the user, provide a copy, a means of exporting a copy, or a means\nof obtaining a copy upon request, of the work in its original “Plain\nVanilla ASCII” or other form. Any alternate format must include the\nfull Project Gutenberg™ License as specified in paragraph 1.E.1.\n\n1.E.7. Do not charge a fee for access to, viewing, displaying,\nperforming, copying or distributing any Project Gutenberg™ works\nunless you comply with paragraph 1.E.8 or 1.E.9.\n\n1.E.8. You may charge a reasonable fee for copies of or providing\naccess to or distributing Project Gutenberg™ electronic works\nprovided that:\n\n • You pay a royalty fee of 20% of the gross profits you derive from\n the use of Project Gutenberg™ works calculated using the method\n you already use to calculate your applicable taxes. The fee is owed\n to the owner of the Project Gutenberg™ trademark, but he has\n agreed to donate royalties under this paragraph to the Project\n Gutenberg Literary Archive Foundation. Royalty payments must be paid\n within 60 days following each date on which you prepare (or are\n legally required to prepare) your periodic tax returns. Royalty\n payments should be clearly marked as such and sent to the Project\n Gutenberg Literary Archive Foundation at the address specified in\n Section 4, “Information about donations to the Project Gutenberg\n Literary Archive Foundation.”\n \n • You provide a full refund of any money paid by a user who notifies\n you in writing (or by e-mail) within 30 days of receipt that s/he\n does not agree to the terms of the full Project Gutenberg™\n License. You must require such a user to return or destroy all\n copies of the works possessed in a physical medium and discontinue\n all use of and all access to other copies of Project Gutenberg™\n works.\n \n • You provide, in accordance with paragraph 1.F.3, a full refund of\n any money paid for a work or a replacement copy, if a defect in the\n electronic work is discovered and reported to you within 90 days of\n receipt of the work.\n \n • You comply with all other terms of this agreement for free\n distribution of Project Gutenberg™ works.\n \n\n1.E.9. If you wish to charge a fee or distribute a Project\nGutenberg™ electronic work or group of works on different terms than\nare set forth in this agreement, you must obtain permission in writing\nfrom the Project Gutenberg Literary Archive Foundation, the manager of\nthe Project Gutenberg™ trademark. Contact the Foundation as set\nforth in Section 3 below.\n\n1.F.\n\n1.F.1. Project Gutenberg volunteers and employees expend considerable\neffort to identify, do copyright research on, transcribe and proofread\nworks not protected by U.S. copyright law",
+ "chunk_order_index": 39,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-c51b3dc560b408cce12cdfe5c9c94f57": {
+ "tokens": 1200,
+ "content": "utenberg™ electronic work or group of works on different terms than\nare set forth in this agreement, you must obtain permission in writing\nfrom the Project Gutenberg Literary Archive Foundation, the manager of\nthe Project Gutenberg™ trademark. Contact the Foundation as set\nforth in Section 3 below.\n\n1.F.\n\n1.F.1. Project Gutenberg volunteers and employees expend considerable\neffort to identify, do copyright research on, transcribe and proofread\nworks not protected by U.S. copyright law in creating the Project\nGutenberg™ collection. Despite these efforts, Project Gutenberg™\nelectronic works, and the medium on which they may be stored, may\ncontain “Defects,” such as, but not limited to, incomplete, inaccurate\nor corrupt data, transcription errors, a copyright or other\nintellectual property infringement, a defective or damaged disk or\nother medium, a computer virus, or computer codes that damage or\ncannot be read by your equipment.\n\n1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the “Right\nof Replacement or Refund” described in paragraph 1.F.3, the Project\nGutenberg Literary Archive Foundation, the owner of the Project\nGutenberg™ trademark, and any other party distributing a Project\nGutenberg™ electronic work under this agreement, disclaim all\nliability to you for damages, costs and expenses, including legal\nfees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT\nLIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE\nPROVIDED IN PARAGRAPH 1.F.3. YOU AGREE THAT THE FOUNDATION, THE\nTRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE\nLIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR\nINCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH\nDAMAGE.\n\n1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a\ndefect in this electronic work within 90 days of receiving it, you can\nreceive a refund of the money (if any) you paid for it by sending a\nwritten explanation to the person you received the work from. If you\nreceived the work on a physical medium, you must return the medium\nwith your written explanation. The person or entity that provided you\nwith the defective work may elect to provide a replacement copy in\nlieu of a refund. If you received the work electronically, the person\nor entity providing it to you may choose to give you a second\nopportunity to receive the work electronically in lieu of a refund. If\nthe second copy is also defective, you may demand a refund in writing\nwithout further opportunities to fix the problem.\n\n1.F.4. Except for the limited right of replacement or refund set forth\nin paragraph 1.F.3, this work is provided to you ‘AS-IS’, WITH NO\nOTHER WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT\nLIMITED TO WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PURPOSE.\n\n1.F.5. Some states do not allow disclaimers of certain implied\nwarranties or the exclusion or limitation of certain types of\ndamages. If any disclaimer or limitation set forth in this agreement\nviolates the law of the state applicable to this agreement, the\nagreement shall be interpreted to make the maximum disclaimer or\nlimitation permitted by the applicable state law. The invalidity or\nunenforceability of any provision of this agreement shall not void the\nremaining provisions.\n\n1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the\ntrademark owner, any agent or employee of the Foundation, anyone\nproviding copies of Project Gutenberg™ electronic works in\naccordance with this agreement, and any volunteers associated with the\nproduction, promotion and distribution of Project Gutenberg™\nelectronic works, harmless from all liability, costs and expenses,\nincluding legal fees, that arise directly or indirectly from any of\nthe following which you do or cause to occur: (a) distribution of this\nor any Project Gutenberg™ work, (b) alteration, modification, or\nadditions or deletions to any Project Gutenberg™ work, and (c) any\nDefect you cause.\n\nSection 2. Information about the Mission of Project Gutenberg™\n\nProject Gutenberg™ is synonymous with the free distribution of\nelectronic works in formats readable by the widest variety of\ncomputers including obsolete, old, middle-aged and new computers. It\nexists because of the efforts of hundreds of volunteers and donations\nfrom people in all walks of life.\n\nVolunteers and financial support to provide volunteers with the\nassistance they need are critical to reaching Project Gutenberg™’s\ngoals and ensuring that the Project Gutenberg™ collection will\nremain freely available for generations to come. In 2001, the Project\nGutenberg Literary Archive Foundation was created to provide a secure\nand permanent future for Project Gutenberg™ and future\ngenerations. To learn more about the Project Gutenberg Literary\nArchive Foundation and how your efforts and donations can help, see\nSections 3 and 4 and the Foundation information page at www.gutenberg.org.\n\nSection 3. Information about the Project Gutenberg Literary Archive Foundation\n\nThe Project Gutenberg Literary Archive Foundation is a non-profit\n501(c)(3) educational corporation organized under the laws of the\nstate of Mississippi and granted tax exempt status by the Internal\nRevenue Service. The Foundation’s EIN or federal tax identification\nnumber is 64-6221541. Contributions to the Project Gutenberg Literary\nArchive Foundation are tax deductible to the full extent permitted by\nU.S. federal laws and your state’s laws.\n\nThe Foundation’s business office is located at 809 North 1500 West,\nSalt Lake City,",
+ "chunk_order_index": 40,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ },
+ "chunk-85f77f5e3a9aba01133555a2d20e2fd8": {
+ "tokens": 671,
+ "content": "non-profit\n501(c)(3) educational corporation organized under the laws of the\nstate of Mississippi and granted tax exempt status by the Internal\nRevenue Service. The Foundation’s EIN or federal tax identification\nnumber is 64-6221541. Contributions to the Project Gutenberg Literary\nArchive Foundation are tax deductible to the full extent permitted by\nU.S. federal laws and your state’s laws.\n\nThe Foundation’s business office is located at 809 North 1500 West,\nSalt Lake City, UT 84116, (801) 596-1887. Email contact links and up\nto date contact information can be found at the Foundation’s website\nand official page at www.gutenberg.org/contact\n\nSection 4. Information about Donations to the Project Gutenberg\nLiterary Archive Foundation\n\nProject Gutenberg™ depends upon and cannot survive without widespread\npublic support and donations to carry out its mission of\nincreasing the number of public domain and licensed works that can be\nfreely distributed in machine-readable form accessible by the widest\narray of equipment including outdated equipment. Many small donations\n($1 to $5,000) are particularly important to maintaining tax exempt\nstatus with the IRS.\n\nThe Foundation is committed to complying with the laws regulating\ncharities and charitable donations in all 50 states of the United\nStates. Compliance requirements are not uniform and it takes a\nconsiderable effort, much paperwork and many fees to meet and keep up\nwith these requirements. We do not solicit donations in locations\nwhere we have not received written confirmation of compliance. To SEND\nDONATIONS or determine the status of compliance for any particular state\nvisit www.gutenberg.org/donate.\n\nWhile we cannot and do not solicit contributions from states where we\nhave not met the solicitation requirements, we know of no prohibition\nagainst accepting unsolicited donations from donors in such states who\napproach us with offers to donate.\n\nInternational donations are gratefully accepted, but we cannot make\nany statements concerning tax treatment of donations received from\noutside the United States. U.S. laws alone swamp our small staff.\n\nPlease check the Project Gutenberg web pages for current donation\nmethods and addresses. Donations are accepted in a number of other\nways including checks, online payments and credit card donations. To\ndonate, please visit: www.gutenberg.org/donate.\n\nSection 5. General Information About Project Gutenberg™ electronic works\n\nProfessor Michael S. Hart was the originator of the Project\nGutenberg™ concept of a library of electronic works that could be\nfreely shared with anyone. For forty years, he produced and\ndistributed Project Gutenberg™ eBooks with only a loose network of\nvolunteer support.\n\nProject Gutenberg™ eBooks are often created from several printed\neditions, all of which are confirmed as not protected by copyright in\nthe U.S. unless a copyright notice is included. Thus, we do not\nnecessarily keep eBooks in compliance with any particular paper\nedition.\n\nMost people start at our website which has the main PG search\nfacility: www.gutenberg.org.\n\nThis website includes information about Project Gutenberg™,\nincluding how to make donations to the Project Gutenberg Literary\nArchive Foundation, how to help produce our new eBooks, and how to\nsubscribe to our email newsletter to hear about new eBooks.",
+ "chunk_order_index": 41,
+ "full_doc_id": "doc-addb4618e1697da0445ec72a648e1f92"
+ }
+}
\ No newline at end of file
diff --git a/neo4jWorkDir/lightrag.log b/neo4jWorkDir/lightrag.log
new file mode 100644
index 00000000..5dab2a0c
--- /dev/null
+++ b/neo4jWorkDir/lightrag.log
@@ -0,0 +1,10202 @@
+2024-10-29 13:32:26,188 - lightrag - INFO - Logger initialized for working directory: ./neo4jWorkDir
+2024-10-29 13:32:26,188 - lightrag - DEBUG - LightRAG init with param:
+ working_dir = ./neo4jWorkDir,
+ chunk_token_size = 1200,
+ chunk_overlap_token_size = 100,
+ tiktoken_model_name = gpt-4o-mini,
+ entity_extract_max_gleaning = 1,
+ entity_summary_to_max_tokens = 500,
+ node_embedding_algorithm = node2vec,
+ node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3},
+ embedding_func = {'embedding_dim': 1536, 'max_token_size': 8192, 'func': },
+ embedding_batch_num = 32,
+ embedding_func_max_async = 16,
+ llm_model_func = ,
+ llm_model_name = meta-llama/Llama-3.2-1B-Instruct,
+ llm_model_max_token_size = 32768,
+ llm_model_max_async = 16,
+ key_string_value_json_storage_cls = ,
+ vector_db_storage_cls = ,
+ vector_db_storage_cls_kwargs = {},
+ graph_storage_cls = ,
+ enable_llm_cache = True,
+ addon_params = {},
+ convert_response_to_json_func =
+
+2024-10-29 13:32:26,188 - lightrag - INFO - Load KV full_docs with 0 data
+2024-10-29 13:32:26,189 - lightrag - INFO - Load KV text_chunks with 0 data
+2024-10-29 13:32:26,189 - lightrag - INFO - Load KV llm_response_cache with 0 data
+2024-10-29 13:32:26,190 - lightrag - INFO - Creating a new event loop in a sub-thread.
+2024-10-29 13:32:26,190 - lightrag - INFO - [New Docs] inserting 1 docs
+2024-10-29 13:32:26,366 - lightrag - INFO - [New Chunks] inserting 42 chunks
+2024-10-29 13:32:26,366 - lightrag - INFO - Inserting 42 vectors to chunks
+2024-10-29 13:32:29,041 - lightrag - INFO - [Entity Extraction]...
+2024-10-29 13:33:22,411 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`PROJECT GUTENBERG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"Project Gutenberg is a digital library offering free eBooks, allowing anyone to access literature in various formats without restrictions.""Project Gutenberg is a digital library providing free access to a wide range of eBooks, primarily in the United States and other regions, under specific licensing terms.""Project Gutenberg is an organization dedicated to the free distribution of electronic works and the preservation of the cultural heritage by providing access to public domain literature."', 'source_id': 'chunk-194cd9680f4967f48bea89678f7ece21chunk-af4ae173c561e4594c5aea96198cce9dchunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:22,678 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`A CHRISTMAS CAROL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"A Christmas Carol is a novella by Charles Dickens that explores themes of redemption, compassion, and the spirit of Christmas, first published in 1843."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:22,954 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHARLES DICKENS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Charles Dickens is a renowned English writer and social critic, best known for his novels depicting Victorian society, including A Christmas Carol."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:23,226 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`ARTHUR RACKHAM`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Arthur Rackham was an English illustrator known for his imaginative and detailed illustrations for children\'s books, including A Christmas Carol."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:23,497 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`EBENEZER SCROOGE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Ebenezer Scrooge is a protagonist who experiences confusion and fear regarding the passage of time and supernatural events, especially concerned about a ghostly visitation.""Ebenezer Scrooge is the main character of A Christmas Carol, portrayed as a miserly and greedy old man who experiences a transformation through the visitation of spirits."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:23,767 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`BOB CRATCHIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Bob Cratchit is Scrooge\'s clerk, depicted as a humble family man with limited income but a warm home atmosphere.""Bob Cratchit is Scrooge\'s clerk, who demonstrates a sense of hope and joy, particularly highlighted during Christmas Eve.""Bob Cratchit is Scrooge\'s employee who arrives late to work after celebrating Christmas, displaying a sense of trepidation toward Scrooge.""Bob Cratchit is a character associated with Scrooge, representing the family Scrooge intends to support by sending a turkey.""Bob Cratchit is a character who appears to be the voice of moderation and kindness, attempting to maintain positivity during the family Christmas feast despite Scrooge\'s presence.""Bob Cratchit is depicted as a struggling father, representative of familial love and concern, particularly for his children, including Tiny Tim.""Bob Cratchit is the dedicated clerk of Ebenezer Scrooge, representing the struggles of the working class and the importance of family in A Christmas Carol.""Bob Cratchit is the father of Tiny Tim and the head of the Cratchit family, who expresses joy and gratitude during Christmas dinner.""Bob Cratchit is the father of Tiny Tim, characterized by his kindness and emotional depth, especially in relation to his family\'s struggles.""Bob Cratchit is the father of the Cratchit family, characterized by his warmth and dedication to his family, particularly in caring for Tiny Tim."', 'source_id': 'chunk-83ac129dc9b56f84c46760d153d68e93chunk-503f0bfa5453467c7c61d160a3540ecachunk-90b95db5c53e4364a6bee36b7aa4d70fchunk-b0f459f5af1c3a5e0b92cbe4ee48b77bchunk-1d4b58de5429cd1261370c231c8673e8chunk-d9aac3484185ac66045df92214d245d5chunk-94bea83e7153fc2064aa382b494936a1chunk-7e2e7ebcc19a53b399dc03aded4743e7chunk-9e3921da66da5d761ab73cd849af6c43chunk-19d72c57ae3408758b18d2568e86dc6b'}
+2024-10-29 13:33:24,038 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`TINY TIM`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Tiny Tim is Bob Cratchit\'s son, who represents innocence and the impact of Scrooge\'s change of heart.""Tiny Tim is a member of the Cratchit family, who represents the innocence and impact of Scrooge\'s generosity.""Tiny Tim is a symbol of innocence and vulnerability within the Cratchit family, evoking compassion and highlighting the harsh realities of illness and poverty.""Tiny Tim is a symbolic character representing innocence and the impact of loss on family cohesion.""Tiny Tim is a young boy in the Cratchit family, characterized by his spirit and resilience despite being a cripple, symbolizing hope and kindness during Christmas.""Tiny Tim is a young boy in the Cratchit family, who is portrayed as frail yet joyous and embodies the spirit of Christmas.""Tiny Tim is a young child in the Cratchit family, whose health and well-being are of great concern to his family, and he is remembered fondly.""Tiny Tim is depicted as a vulnerable child who participates in the Christmas toast but seems indifferent to it, symbolizing the family\'s struggles.""Tiny Tim, the son of Bob Cratchit, is a sickly child whose positive outlook on life symbolizes hope and compassion in A Christmas Carol."', 'source_id': 'chunk-83ac129dc9b56f84c46760d153d68e93chunk-503f0bfa5453467c7c61d160a3540ecachunk-91572b3cd42786ba4cb2f180ca25cbf4chunk-1d4b58de5429cd1261370c231c8673e8chunk-90b95db5c53e4364a6bee36b7aa4d70fchunk-94bea83e7153fc2064aa382b494936a1chunk-7e2e7ebcc19a53b399dc03aded4743e7chunk-9e3921da66da5d761ab73cd849af6c43chunk-19d72c57ae3408758b18d2568e86dc6b'}
+2024-10-29 13:33:24,309 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`GHOST OF CHRISTMAS PAST`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost of Christmas Past is a supernatural character that shows Scrooge his past, helping him understand the roots of his current miserliness.""The Ghost of Christmas Past is a supernatural entity that aids Scrooge in reflecting on his past experiences and emotions."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:24,576 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`GHOST OF CHRISTMAS PRESENT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost of Christmas Present is a spirit that embodies generosity and abundance, leading Scrooge to self-reflection and understanding.""The Ghost of Christmas Present is a spirit that reveals to Scrooge the joys and struggles of the present Christmas season, emphasizing generosity and compassion."', 'source_id': 'chunk-843b5b2f944c7413d9ca876e2e2f151fchunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:24,851 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`GHOST OF CHRISTMAS YET TO COME`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost of Christmas Yet to Come is an apparition that presents Scrooge with a grim vision of his possible future if he doesn\'t change his ways.""The Ghost of Christmas Yet to Come is an event within the story that signifies Scrooge\'s confrontation with the future consequences of his actions."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43chunk-02baee20cc9463dbe08170a8e1043e32'}
+2024-10-29 13:33:25,124 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FRED`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Fred is Ebenezer Scrooge\'s lively nephew, who embodies the spirit of Christmas and contrasts with Scrooge\'s misanthropy by inviting him to family gatherings.""Fred is Scrooge\'s nephew who expresses admiration for his uncle and proposes a toast in his honor during a Christmas gathering.""Fred is Scrooge\'s nephew, mentioned in the context of Scrooge\'s visit to his house.""Fred is Scrooge\'s nephew, representing the warmth and joy of family that Scrooge initially rejects.""Fred is Scrooge\'s niece by marriage who expresses surprise and warmth at Scrooge\'s unexpected visit for dinner."', 'source_id': 'chunk-8f738cb25d60a7dab782162d9d0a25edchunk-e7637ff18c5ded77a68cce0a87883fadchunk-94bea83e7153fc2064aa382b494936a1chunk-89777b838d5447c7bd1ec11282c4ee89chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:25,397 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MR. FEZZIWIG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Mr. Fezziwig is a kind-hearted old merchant who served as a mentor to Scrooge, representing generosity and the joy of Christmas through his festive parties."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:25,659 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MRS. CRATCHIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Mrs. Cratchit expresses strong disdain for Scrooge, highlighting her feelings towards him during the Christmas festivities.""Mrs. Cratchit is Bob Cratchit\'s supportive wife, who strives to provide for her family despite their financial struggles and embodies resilience.""Mrs. Cratchit is Bob\'s wife who prepares the Christmas pudding, showcasing her pride and care in family traditions.""Mrs. Cratchit is Bob\'s wife, a supportive mother who manages the household while showing concern for her children\'s well-being.""Mrs. Cratchit is Bob\'s wife, portrayed as making an effort to prepare a festive atmosphere despite their financial struggles.""Mrs. Cratchit is a character who shows affection towards Bob, enhancing the family dynamic.""Mrs. Cratchit is the mother in the family who prepares the meal and shows warmth and attentiveness towards her family members during their Christmas celebration."', 'source_id': 'chunk-503f0bfa5453467c7c61d160a3540ecachunk-b0f459f5af1c3a5e0b92cbe4ee48b77bchunk-91572b3cd42786ba4cb2f180ca25cbf4chunk-1d4b58de5429cd1261370c231c8673e8chunk-7e2e7ebcc19a53b399dc03aded4743e7chunk-9e3921da66da5d761ab73cd849af6c43chunk-19d72c57ae3408758b18d2568e86dc6b'}
+2024-10-29 13:33:25,931 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`BELINDA CRATCHIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Belinda Cratchit is another daughter in the Cratchit family, depicted as participating in the festive meal by sweetening the apple sauce, showcasing her involvement in family traditions.""Belinda Cratchit is one of Bob Cratchit\'s daughters, assisting her mother in laying the dining cloth.""Belinda is a daughter of Bob and Mrs. Cratchit, representing the Cratchit family\'s love and unity during difficult times."', 'source_id': 'chunk-7e2e7ebcc19a53b399dc03aded4743e7chunk-9e3921da66da5d761ab73cd849af6c43chunk-b0f459f5af1c3a5e0b92cbe4ee48b77b'}
+2024-10-29 13:33:26,201 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MARTHA CRATCHIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Martha Cratchit is a daughter in the Cratchit family who contributes to the household and shows affection for her family, particularly her father Bob.""Martha is another daughter of Bob and Mrs. Cratchit, who, like her sister Belinda, symbolizes the warmth and togetherness in the Cratchit household."', 'source_id': 'chunk-7e2e7ebcc19a53b399dc03aded4743e7chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:26,474 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`JOE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Joe is a character engaged in a conversation, displaying curiosity and a sense of humor regarding a situation related to \'bed-curtains\'.""Joe is a marine-store dealer depicted as receiving stolen goods, illustrating the consequences of crime and the darker sides of society.""Joe is portrayed as a character engaged in a conversation about the deceased, displaying a mix of humor and practicality in dealing with the possessions of the dead."', 'source_id': 'chunk-89777b838d5447c7bd1ec11282c4ee89chunk-9e3921da66da5d761ab73cd849af6c43chunk-e1f29eeadc1d8ab46dc8a0bbd3c56b64'}
+2024-10-29 13:33:26,752 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`GHOST OF JACOB MARLEY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost of Jacob Marley is Scrooge\'s deceased business partner who returns as a spirit to warn Scrooge about the consequences of his life choices and to encourage change."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:27,022 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FAN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Fan is Scrooge\'s caring sister who represents innocence and love, showcasing a more tender side of Scrooge’s past."', 'source_id': 'chunk-9e3921da66da5d761ab73cd849af6c43'}
+2024-10-29 13:33:27,295 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MRS. FEZZIWIG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Mrs. Fezziwig is Fezziwig\'s partner at the domestic ball, contributing to the festive atmosphere alongside her husband.""Mrs. Fezziwig is the kind partner of Mr. Fezziwig, contributing to the festive atmosphere of their celebrations, emphasizing the importance of joyous gatherings.""Mrs. Fezziwig is the wife of Fezziwig, joining him in celebrations and dances, embodying warmth and partnership.""Mrs. Fezziwig is the wife of Fezziwig, participating in the dance, representing companionship and festive cheer."', 'source_id': 'chunk-b2f98bc7abb0c67d4c86055c7fd72218chunk-89777b838d5447c7bd1ec11282c4ee89chunk-9e3921da66da5d761ab73cd849af6c43chunk-8bc1edd983869352d14cf0886a9175a7'}
+2024-10-29 13:33:27,562 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FEZZIWIG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Fezziwig is a character noted for his role during a domestic ball, providing a joyful atmosphere and exemplifying kindness and generosity.""Fezziwig is a figure representing a cheerful and jovial employer who organizes lively Christmas festivities for his employees.""Fezziwig is a jovial figure from Scrooge\'s past, known for his kind-heartedness and as a former employer who embodies the spirit of Christmas.""Fezziwig is depicted as a joyful character who dances with his wife and embodies the spirit of Christmas."', 'source_id': 'chunk-b2f98bc7abb0c67d4c86055c7fd72218chunk-89777b838d5447c7bd1ec11282c4ee89chunk-c6c248858d1b13ab7ee9ca13c1aeed6bchunk-8bc1edd983869352d14cf0886a9175a7'}
+2024-10-29 13:33:27,700 - lightrag - DEBUG - Trigger summary: "SCROOGE"
+2024-10-29 13:33:27,941 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MARLEY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Marley is Scrooge\'s deceased business partner, whose spectral presence looms over Scrooge, evoking themes of regret and the supernatural.""Marley is Scrooge\'s deceased partner whose death is emphasized, and who impacts Scrooge\'s story through memories and reflections.""Marley is a character who, although only mentioned, has had a significant influence on Scrooge\'s life and perspective, representing his past connections.""Marley is a deceased character whose ghost appears to Scrooge, representing regret and the consequences of a life led without compassion."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-843b5b2f944c7413d9ca876e2e2f151fchunk-89777b838d5447c7bd1ec11282c4ee89chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:28,214 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHRISTMAS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Christmas is a festive occasion that brings the Cratchit family together, highlighting themes of love, loss, and remembrance.""Christmas is a festive occasion that has led to a joyful transformation in Scrooge\'s character and actions.""Christmas is a festive time celebrated by the Cratchit family, symbolizing joy, family togetherness, and generosity.""Christmas is a significant holiday that serves as the backdrop for Scrooge\'s transformation and the events that unfold during the narrative.""Christmas is a significant holiday that triggers Scrooge\'s memories and reflections on joy, loneliness, and his past.""Christmas is central to the text, symbolizing generosity, charity, and warmth that Scrooge rejects.""Christmas is depicted as a festive season when charity and social duty are emphasized, contrasting with Scrooge\'s values.""Christmas is depicted as a time associated with joy and abundance, contrasting with Scrooge\'s initial views.""Christmas is depicted as a time of joy and communal spirit, marked by people\'s activities and interactions in the streets.""Christmas is depicted as a time of joy and generosity, contrasting sharply with Scrooge\'s past behavior and current solitude.""Christmas is portrayed as a festive time that Scrooge dismisses, emphasizing the contrast between joy and his miserable attitude.""Christmas is portrayed as a time of joy, reflection, and connection, emphasizing the positive impact of the holiday spirit on individuals and communities.""Christmas is presented as a time for merriment and social gatherings, which Scrooge\'s nephew advocates for despite his uncle\'s opposition.""Christmas is the central holiday during which the events unfold, leading to significant character transformations and themes of generosity.""Christmas is the season during which festive activities and celebrations are occurring, including dances and gatherings.""Christmas represents a time of reflection and change for Scrooge, emphasizing themes of redemption.""Christmas serves as a thematic backdrop for the story, symbolizing redemption, generosity, and the spirit of giving."', 'source_id': 'chunk-91614df5cb74cbe5e109a338a82041e9chunk-503f0bfa5453467c7c61d160a3540ecachunk-773d0df1dd25c356f7b771d780583e09chunk-8f738cb25d60a7dab782162d9d0a25edchunk-e7637ff18c5ded77a68cce0a87883fadchunk-8590d9263cd2cd4fa583c432fc54e6c3chunk-54bdd22db1f84a965b5f4c06c9216d8dchunk-02baee20cc9463dbe08170a8e1043e32chunk-90d5764e301321c087f5a8f78b73a145chunk-91572b3cd42786ba4cb2f180ca25cbf4chunk-1d4b58de5429cd1261370c231c8673e8chunk-da2472b4ef2a535b62908f14d0fb0ca9chunk-94bea83e7153fc2064aa382b494936a1chunk-89777b838d5447c7bd1ec11282c4ee89chunk-99b0ec7f7e006561352b5d65e849611dchunk-74e2466de2f67fd710ef2f20c0a8d9e0chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:28,486 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE PLUMP SISTER`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Plump Sister is depicted in a light-hearted and humorous context, illustrating the fun of Christmas gatherings.""The plump sister is a character involved in the games, specifically blind man\'s-buff, and captures Topper\'s attention during the festivities."', 'source_id': 'chunk-89777b838d5447c7bd1ec11282c4ee89chunk-43c9bee23adfa77aa9dedd22401bba7d'}
+2024-10-29 13:33:28,756 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE WOMAN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Woman engages in a humorous exchange with Joe about taking down bed-curtains, showcasing lively communication among characters.""The Woman is a character involved in the dialogue about the deceased\'s belongings, showing a carefree attitude towards their former owner.""The Woman is a character who enters the shop with a heavy bundle and demonstrates a bold and defiant demeanor, eager to take part in the conversation about the deceased\'s belongings."', 'source_id': 'chunk-60fc063c79fc0d4acc91a57e79de04dcchunk-89777b838d5447c7bd1ec11282c4ee89chunk-e1f29eeadc1d8ab46dc8a0bbd3c56b64'}
+2024-10-29 13:33:29,033 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`OLD SCRATCH`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Old Scratch is mentioned as a figure related to the conversation, symbolizing mischief and humor."', 'source_id': 'chunk-89777b838d5447c7bd1ec11282c4ee89'}
+2024-10-29 13:33:29,305 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE REGISTER`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Register documents Marley\'s burial, highlighting the formal acknowledgment of his death."', 'source_id': 'chunk-89777b838d5447c7bd1ec11282c4ee89'}
+2024-10-29 13:33:29,579 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE FUNERAL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Funeral serves as a significant event reflecting on Marley\'s death, noted for its business-like atmosphere as perceived by Scrooge."', 'source_id': 'chunk-89777b838d5447c7bd1ec11282c4ee89'}
+2024-10-29 13:33:29,843 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`SCROOGE'S NEPHEW`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Scrooge\'s Nephew is a cheerful and optimistic character who represents the spirit of Christmas, opposing Scrooge\'s views.""Scrooge\'s Nephew is characterized by his contagious laughter and good humor, contrasting with his uncle\'s miserliness.""Scrooge\'s Nephew is portrayed as optimistic about Christmas, emphasizing its positive qualities despite Scrooge\'s negativity.""Scrooge\'s nephew is a character present at the gathering who actively participates in the games and engages with his family."', 'source_id': 'chunk-43c9bee23adfa77aa9dedd22401bba7dchunk-773d0df1dd25c356f7b771d780583e09chunk-74e2466de2f67fd710ef2f20c0a8d9e0chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:30,117 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE COUNTING-HOUSE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Counting-House is Scrooge\'s place of work, characterized by a gloomy demeanor and an atmosphere that reflects Scrooge\'s character.""The Counting-House is where Scrooge works and is characterized by its cold and unwelcoming environment."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-74e2466de2f67fd710ef2f20c0a8d9e0'}
+2024-10-29 13:33:30,388 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE COLD`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Cold represents the emotional and physical chill that surrounds Scrooge, symbolizing his heartless nature and isolation from others."', 'source_id': 'chunk-74e2466de2f67fd710ef2f20c0a8d9e0'}
+2024-10-29 13:33:30,660 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FOG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"Fog is a weather condition mentioned that enhances the dreary atmosphere surrounding Scrooge\'s home and workplace.""Fog is a weather phenomenon that adds to the bleak atmosphere, reflecting the obscurity and confusion in Scrooge\'s life."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-74e2466de2f67fd710ef2f20c0a8d9e0'}
+2024-10-29 13:33:30,935 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CLERK`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Clerk is a subordinate to Scrooge who serves as a contrast to his boss\'s character, showing warmth and holiday spirit.""The Clerk is an employee of Scrooge, who quietly awaits the end of the workday and represents the working class.""The Clerk works in Scrooge\'s counting-house, depicted as miserable and diminished under Scrooge\'s strict management."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145chunk-74e2466de2f67fd710ef2f20c0a8d9e0chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:31,204 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHRISTMAS EVE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Christmas Eve is depicted as a significant time, bringing about joyful activities like sliding down snow hills, contrasting with Scrooge\'s dour demeanor.""Christmas Eve is the night before Christmas, serving as the backdrop for the lively celebrations and dances hosted by Fezziwig.""Christmas Eve is the specific time setting in the story, highlighting the contrast between the joy of the holiday and Scrooge\'s negativity."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-8bc1edd983869352d14cf0886a9175a7chunk-74e2466de2f67fd710ef2f20c0a8d9e0'}
+2024-10-29 13:33:31,477 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CITY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The City provides the backdrop for Scrooge\'s story, representing the bustling environment that underscores his loneliness and detachment from society.""The City represents the bustling urban environment where Scrooge reflects on his past, filled with activity and shadowy figures.""The City, from which Scrooge is transported, represents his past life and is contrasted with the open country road that signifies his memories."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971chunk-c6c248858d1b13ab7ee9ca13c1aeed6bchunk-74e2466de2f67fd710ef2f20c0a8d9e0'}
+2024-10-29 13:33:31,741 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MR. MARLEY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Mr. Marley is Scrooge\'s deceased business partner who died seven years ago, representing a connection to Scrooge\'s past and his current miserly outlook."', 'source_id': 'chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:32,024 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`LIBERTY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The discussion around liberality represents a call to generosity and charity during the festive season, which Scrooge rebuffs."', 'source_id': 'chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:32,300 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`PORTLY GENTLEMEN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Portly Gentlemen are representatives who seek to gather funds for the poor during Christmas, embodying the spirit of generosity that Scrooge rejects."', 'source_id': 'chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:32,574 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CREDENTIALS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Credentials refer to the documents presented by the Portly Gentlemen, indicating their legitimacy and purpose in seeking donations from Scrooge."', 'source_id': 'chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:32,847 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`BEDLAM`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"Bedlam is referenced as a metaphorical place for madness, illustrating Scrooge\'s disdain for the cheerful nature of Christmas and his sense of isolation."', 'source_id': 'chunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:33,118 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHRISTMAS-TIME`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Christmas-time is characterized as a period filled with festivities, joy, and community engagement, which Scrooge regards cynically.""Christmas-time represents the festive season, a recurring theme that evokes memories and emotions in Scrooge’s reflections.""Christmas-time serves as a backdrop for the narrative, emphasizing themes of reflection, mortality, and social interactions."', 'source_id': 'chunk-f1a4fbcf1ed86864b9bfd5e8dba4c683chunk-c6c248858d1b13ab7ee9ca13c1aeed6bchunk-5dac41b3f9eeaf794f0147400b1718cd'}
+2024-10-29 13:33:33,435 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GENTLEMAN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Gentleman is a character who advocates for helping the poor and trying to raise a fund for those in need during Christmas."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:33,711 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE UNION WORKHOUSES`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Union Workhouses are institutions mentioned as a means of supporting the poor, still operational during Scrooge\'s time."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:33,978 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE TREADMILL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Treadmill is an institution referenced as part of the welfare system that Scrooge acknowledges."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:34,258 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE MAIN STREET`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Main Street is a location described where activity is bustling during the cold season, reflecting the social dynamics of the area."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:34,544 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE MANSION HOUSE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Mansion House is identified as the residence of the Lord Mayor, a place associated with Christmas festivities and order."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:34,816 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE POOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Poor refers to the individuals in society who are destitute and in need of assistance, often highlighted in discussions about charity during Christmas."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:35,092 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHRISTMAS CAROL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"A Christmas Carol is referenced as a song that Scrooge recalls when thinking about giving and community.""The Christmas Carol is a musical tradition performed during the Christmas season, symbolizing goodwill and festive spirit."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:35,356 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE FOG`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Fog is a weather condition described as thickening around Scrooge, metaphorically representing the gloomy atmosphere surrounding him."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:35,625 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GAS-PIPES`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Gas-pipes are mentioned as undergoing repairs in The Main Street, indicating the infrastructure\'s role in the community during winter."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:35,894 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE LABOURERS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Labourers refer to the workers repairing the gas-pipes, reflecting the lower working-class experiencing harsh winter conditions."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:36,162 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE LORD MAYOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Lord Mayor is a civic leader responsible for overseeing Christmas festivities and ensuring proper celebrations in the town."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:36,436 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE TAILOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Tailor is a character referenced as preparing for Christmas by stirring pudding, illustrating the engagement of tradespeople in holiday traditions."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:36,712 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE PUDDING`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Pudding is a traditional dish prepared for Christmas, symbolizing festivity and the warmth of family gatherings."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:36,991 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`ST. DUNSTAN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"St. Dunstan is a historical figure mentioned metaphorically, associated with a legend of confronting evil, reflecting the theme of good versus evil."', 'source_id': 'chunk-90d5764e301321c087f5a8f78b73a145'}
+2024-10-29 13:33:37,266 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`LONDON`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"London serves as the geographical setting for the story, encapsulating the harsh winter atmosphere and the life of its inhabitants during the festive season.""London serves as the setting where the events take place, adding a cultural backdrop to the Christmas gathering."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-43c9bee23adfa77aa9dedd22401bba7d'}
+2024-10-29 13:33:37,547 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CLERK`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Clerk is an unnamed character who serves Scrooge, exhibiting a hopeful disposition despite Scrooge\'s ill-tempered management."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5'}
+2024-10-29 13:33:37,822 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FROST`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"Frost is another atmospheric condition depicted, contributing to the cold and somber feeling of the environment during the story."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5'}
+2024-10-29 13:33:38,089 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CAMDEN TOWN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"Camden Town is a location mentioned in relation to the logistics of transporting a large turkey.""Camden Town is a location where Bob Cratchit runs home to celebrate Christmas Eve, representing a more joyful and lively setting."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-8f738cb25d60a7dab782162d9d0a25ed'}
+2024-10-29 13:33:38,360 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GHOST OF MARLEY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost of Marley is an event that is anticipated in the narrative, representing Scrooge\'s confrontation with his past and the supernatural."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5'}
+2024-10-29 13:33:38,634 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`KEYHOLE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Keyhole is a metaphorical representation of Scrooge\'s perspective and the barriers to his understanding of joy and connection."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5'}
+2024-10-29 13:33:38,904 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE KNOCKER`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Knocker is a physical object that becomes a symbol of Scrooge\'s psyche, transforming into Marley\'s face, representing his guilt and past.""The Knocker is an ornate door knocker that Scrooge admires, reflecting his newfound appreciation for simple joys.""The Knocker on the door serves as a symbolic gateway to the supernatural events, being the first point of eerie interaction for Scrooge."', 'source_id': 'chunk-d9aac3484185ac66045df92214d245d5chunk-8f738cb25d60a7dab782162d9d0a25edchunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:39,173 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MARLEY'S GHOST`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Marley\'s Ghost is a spectral figure who appears to Scrooge to warn him of the impending visits from three spirits, representing remorse for his past deeds.""Marley\'s Ghost is a spectral visitation that deeply troubles Scrooge and causes him great perplexity, leading him to question the reality of his experiences.""Marley\'s Ghost represents the supernatural apparition of Marley, who has been dead for seven years and symbolizes regret and warning to Scrooge."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034chunk-9c43dac2fcd5b578242d907ea6f8fc7bchunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:39,448 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE HOUSE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The House is the setting for the Christmas celebration and interactions among the characters, embodying warmth and family togetherness.""The House is the setting where Scrooge experiences the haunting, with a description of its features contributing to the eerie atmosphere."', 'source_id': 'chunk-da2472b4ef2a535b62908f14d0fb0ca9chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:39,724 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE BELL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Bell is a church bell that signifies the time passing and marks the moments of Scrooge\'s encounters with the ghosts.""The Bell is described as a disused bell that starts to swing and ring during the haunting, signifying the onset of supernatural occurrences."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7bchunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:39,990 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE WINE-MERCHANT'S CELLAR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Wine-Merchant\'s Cellar is a location that holds significance in the story, where sounds of dragging chains are heard, adding to the ghostly ambiance."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:40,264 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GRATE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Grate is part of Scrooge\'s room where a small fire burns, contributing to the bleak and cold atmosphere of his setting."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:40,539 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CASK`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Cask is part of the cellar, contributing to the sounds associated with Marley\'s Ghost and aiding in creating a sense of dread."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:40,814 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE DRESSING-GOWN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Dressing-Gown is worn by Scrooge, illustrating his solitary, night-time routine and emphasizing his isolation."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:41,082 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CANDLE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Candle represents a source of light in Scrooge\'s dark life, emphasizing the contrast between warmth and isolation."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:41,350 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`DARKNESS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Darkness symbolizes the oppressive loneliness and despair in Scrooge\'s life, as well as the supernatural elements of the story."', 'source_id': 'chunk-9c43dac2fcd5b578242d907ea6f8fc7b'}
+2024-10-29 13:33:41,622 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`JACOB MARLEY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Jacob Marley is Scrooge\'s deceased business partner who plays a significant role in introducing the spirits to Scrooge.""Jacob Marley is Scrooge\'s deceased business partner, who appears as a ghost to warn Scrooge of his impending fate and the need for change.""Jacob Marley is a deceased character who symbolizes the consequences of a life led without compassion, influencing Scrooge\'s redemption.""Jacob Marley is a ghost who appears to Scrooge, having been his business partner in life, and is now bound by chains that signify his earthly ties and regrets.""Jacob Marley is the ghost of Scrooge\'s former business partner, who seeks to impart a lesson about the consequences of a life lived without compassion or connection to humanity.""Jacob Marley is the ghost who represents Scrooge\'s past and serves as a catalyst for his change."', 'source_id': 'chunk-90b95db5c53e4364a6bee36b7aa4d70fchunk-1aa50206d02ed89418f65f97c6441d1cchunk-02baee20cc9463dbe08170a8e1043e32chunk-91572b3cd42786ba4cb2f180ca25cbf4chunk-da2472b4ef2a535b62908f14d0fb0ca9chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:41,894 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GHOST`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghost is a spectral manifestation of Jacob Marley, appearing to communicate a vital message to Scrooge about the spiritual consequences of his life choices.""The Ghost is a supernatural entity that shows Scrooge scenes from his past to provoke realization and remorse.""The Ghost refers specifically to the supernatural presence guiding Scrooge through his reflections and past experiences.""The Ghost represents the spirit of Jacob Marley, warning Scrooge about the aftereffects of neglecting human virtue and kindness during life.""The Ghost represents various spectral entities that visit Scrooge to facilitate his transformation by highlighting the consequences of his actions.""The Ghost serves as a guiding presence that helps Scrooge confront and reflect on his past during Christmas."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3chunk-54bdd22db1f84a965b5f4c06c9216d8dchunk-1aa50206d02ed89418f65f97c6441d1cchunk-02baee20cc9463dbe08170a8e1043e32chunk-da2472b4ef2a535b62908f14d0fb0ca9chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:42,164 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CASH-BOXES`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Cash-boxes are items represented in the ghost\'s chain, symbolizing Marley’s greed and the burdens of material wealth in life."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:42,431 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`KEYS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Keys represent control and access, also included in the ghost\'s chain, indicating the constraints of Marley’s earthly desires."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:42,701 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`PADLOCKS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Padlocks are part of the ghost\'s chain, signifying confinement and the lock on Marley’s spirit due to his selfish actions in life."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:42,969 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`LEDGERS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Ledgers symbolize records of transactions, reflecting Marley’s focus on business and financial dealings during his life."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:43,240 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`DEEDS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Deeds represent property ownership, indicating the material attachments that bound Marley in life and continue to do so in death."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:43,513 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`HEAVY PURSES`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Heavy purses in the ghost\'s chain symbolize the weight of wealth and the burden of not sharing that wealth with others."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:43,782 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE HEAVY DOOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"LOCATION"', 'description': '"The heavy door is a physical boundary encountered by Scrooge, marking the transition between the ordinary world and the supernatural visitation."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:44,046 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE FIREPLACE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"LOCATION"', 'description': '"The fireplace serves as a significant setting for the interaction between Scrooge and the ghost, representing warmth but also the stark contrast to the ghost\'s cold presence."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:44,314 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`SPIRIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Spirit represents the essence of Marley that is attempting to interact with Scrooge, indicating themes of afterlife and unresolved issues.""The Spirit embodies the lessons of compassion and social responsibility, guiding Scrooge through his reflections and realizations.""The Spirit is the manifestation of Scrooge\'s past that guides him through his memories, representing his former self and the influence of time.""The Spirit represents a ghostly entity, guiding Scrooge through visions of potential future events and urging him to acknowledge societal issues.""The Spirit represents a supernatural entity guiding Scrooge through his reflections on life, death, and human emotions, prompting self-awareness.""The Spirit represents the embodiment of the Christmas present, personifying the themes of generosity and merriment."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971chunk-83ac129dc9b56f84c46760d153d68e93chunk-e7637ff18c5ded77a68cce0a87883fadchunk-843b5b2f944c7413d9ca876e2e2f151fchunk-1aa50206d02ed89418f65f97c6441d1cchunk-02baee20cc9463dbe08170a8e1043e32'}
+2024-10-29 13:33:44,590 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE BANDAGE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The bandage is an item used by the ghost, symbolizing decay and the remnants of Marley’s struggle with his own existence post-death."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:44,872 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE COSMIC SIGNIFICANCE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The cosmic significance refers to the broader themes of morality and the impact of one’s actions on others, reflected in Marley’s message to Scrooge."', 'source_id': 'chunk-1aa50206d02ed89418f65f97c6441d1c'}
+2024-10-29 13:33:45,144 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MANKIND`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Mankind signifies the broader human community, which Marley claims should be the focus of compassionate effort rather than mere business pursuits."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:45,422 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`BUSINESS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Business refers to the practices of trade and commerce that Marley criticizes for overshadowing more significant moral duties like benevolence and charity."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:45,701 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CHAIN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Chain refers to the symbolic burden that Marley carries, representing the consequences of his actions and the remorse that traps him in his spectral form.""The Chain symbolizes the burdens of the past that haunt the spirits, showcasing the consequences of one\'s actions in life."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:45,968 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CROWD OF FELLOW-BEINGS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Crowd of Fellow-Beings signifies the people Marley neglected throughout his life, highlighting the need for social responsibility and community engagement."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:46,239 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHRISTMAS EVES`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Christmas Eves refers to the recurring time periods that Marley reflects upon, indicating the weight of his past decisions leading to regret."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:46,506 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE BLESSED STAR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Blessed Star symbolizes hope and guidance, representing the moral path that Marley believes Scrooge should have followed and acknowledged."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:46,786 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`IRON CABLE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Iron Cable is a metaphorical representation of the heavy burdens of regret and unfulfilled opportunities carried by Marley in the afterlife."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:47,054 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE COMMON WELFARE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Common Welfare emphasizes the idea that collective well-being should be a primary concern for individuals, contrasting with selfish pursuits."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:47,324 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`OTHER REGIONS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Other Regions alludes to the spiritual realm and existence beyond earthly life that Marley refers to, highlighting the interaction between the physical and spiritual worlds."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:47,591 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`SEVEN YEARS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Seven Years denotes the duration since Marley passed away, a significant timeframe during which he reflects on his unending labor and remorse."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:47,863 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`COUNTING-HOUSE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"LOCATION"', 'description': '"The Counting-House is a physical space representing the business environment where Scrooge and Marley engaged in trade but also symbolizes their limited worldview."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:48,139 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FELLOW-MEN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Fellow-Men refers to individuals with whom Marley and Scrooge share a human connection, underscoring the importance of empathy and human relationships."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:48,416 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`PONDEROUS CHAIN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Ponderous Chain symbolizes the heavy emotional and moral burden that Marley carries as a consequence of his choices in life, particularly relating to his business practices."', 'source_id': 'chunk-359187e7571bda4b0b08b2e0b6581e52'}
+2024-10-29 13:33:48,681 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE THREE SPIRITS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Three Spirits refer to the supernatural beings that are prophesied to visit Scrooge, each representing different aspects of time and morality."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:48,961 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE INVISIBLE WORLD`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Invisible World refers to the realm of spirits and supernatural influences that Scrooge glimpses during his encounter with Marley\'s Ghost."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:49,226 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE CLOCK`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"TECHNOLOGY"', 'description': '"The Clock is a device that malfunctions, contributing to Scrooge\'s confusion about time and reality in the narrative.""The Clock is mentioned as malfunctioning, which symbolizes the confusion and distortion of time that Scrooge experiences during his nighttime visitations."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:49,502 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE WINDOW`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Window represents a threshold between the visible and invisible worlds, allowing Scrooge a view into the realm of spirits and their lamentations."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:49,775 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE AIR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Air is described as filled with phantoms, representing the haunting nature of Scrooge\'s past choices and regrets."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:50,048 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE PHANTOMS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Phantoms are spirits that wander restlessly, symbolizing lost opportunities and unresolved issues from their past lives."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:50,322 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE DARK NIGHT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Dark Night signifies the ominous and mysterious setting during which Scrooge faces his fears and experiences the supernatural."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:50,601 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`WAILINGS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"Wailings represent the sorrowful expressions of the phantoms, illustrating the pain and regret of spirits bound by their earthly actions."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:50,871 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE DOOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Door is described as being double locked, representing Scrooge’s isolation and the barriers he has built around himself in life."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:51,143 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE SPIRITS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Spirits collectively represent the supernatural forces that seek to guide and teach Scrooge about the impact of his life choices.""The Spirits represent the past, present, and future in Scrooge\'s life, guiding him toward redemption and self-reflection."', 'source_id': 'chunk-90b95db5c53e4364a6bee36b7aa4d70fchunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:51,416 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE GHOSTLY VISITATIONS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Ghostly Visitations are the anticipated encounters that Scrooge must endure as part of his journey toward redemption."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:51,702 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`EBENEZER`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Ebenezer is a character who reflects on his past with fondness, remembering his former self and his connection to Dick Wilkins.""Ebenezer refers to Scrooge\'s full name, emphasizing his identity and character as a miser who learns valuable life lessons."', 'source_id': 'chunk-8bc1edd983869352d14cf0886a9175a7chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:51,981 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE PENANCE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Penance refers to the moral and emotional suffering that Marley\'s Ghost must endure due to his past actions, prompting Scrooge\'s journey of redemption."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:52,268 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE TRANSPARENT WINDOW`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Transparent Window symbolizes the blurred line between the known and the supernatural, reflecting Scrooge’s struggle to understand his experiences."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:52,546 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE DARKNESS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"The Darkness refers to the absence of light, signifying uncertainty and fear in Scrooge\'s transformation journey."', 'source_id': 'chunk-cb661c37436355ccec2769b1d0350c5f'}
+2024-10-29 13:33:52,822 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE VISITOR`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Visitor is a mysterious figure that appears to Scrooge; it possesses qualities of both youth and age, signifying the blend of time and supernatural elements."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:53,098 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE SPRING`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"TECHNOLOGY"', 'description': '"The Spring is a mechanism within The Clock that Scrooge touches to correct its malfunction, highlighting his engagement with technology amid his confusion about time."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:53,367 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`BED CURTAINS`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"TECHNOLOGY"', 'description': '"The Bed Curtains are part of Scrooge\'s bedroom that are drawn aside, heralding the appearance of The Visitor and creating a pivotal moment in the narrative."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:53,638 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`LIGHT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Light emanates from The Visitor, symbolizing clarity, revelation, and the supernatural, contrasting with the darkness around Scrooge\'s situation."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:53,915 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`HOLLY BRANCH`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"TECHNOLOGY"', 'description': '"The Holly Branch is held by The Visitor, representing the duality of winter and spring, life and death, embodying the hope of renewal."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:54,188 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`DREAM`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"CONCEPT"', 'description': '"Dream refers to the possibility that Scrooge\'s experiences are a figment of his imagination, reflecting his internal struggle with reality and perception."', 'source_id': 'chunk-c7b10789a9cb0af6c553730b29bd9034'}
+2024-10-29 13:33:54,456 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`MARKET-TOWN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Market-Town is a nostalgic location from Scrooge\'s youth, evoking memories and emotions tied to his childhood."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971'}
+2024-10-29 13:33:54,726 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`WINTER DAY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"EVENT"', 'description': '"The Winter Day signifies the seasonal setting during Scrooge\'s journey through his memories, highlighting a clear, cold atmosphere filled with nostalgia."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971'}
+2024-10-29 13:33:55,000 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`CHILDREN`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"The Children represent innocence and purity, highlighted as victims of societal neglect and calling for compassion from Scrooge.""The Children represent the joyful, carefree aspects of Scrooge\'s past, embodying happiness and the innocence of youth that he has long forgotten."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971chunk-e7637ff18c5ded77a68cce0a87883fad'}
+2024-10-29 13:33:55,272 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`SCHOOL`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The School is a location associated with Scrooge\'s neglected childhood, symbolizing loneliness and lost opportunities for connection and joy."', 'source_id': 'chunk-bb21d58d36c6306fd8810ddd51c4a971'}
+2024-10-29 13:33:55,547 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`ALI BABA`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Ali Baba is mentioned as a character from a story that impacts Scrooge during his reflection on Christmas and childhood."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:55,841 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`VALENTINE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Valentine is referenced as a brother of a character, and his presence contributes to the holiday memories Scrooge recalls."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:56,118 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`ORSON`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Orson is mentioned as another brother mentioned by Scrooge, recalling his childhood tales and adventures."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:56,384 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`ROBIN CRUSOE`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Robin Crusoe is a character referenced by Scrooge as part of his memories, signifying tales of adventure and nostalgia."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:56,998 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`FRIDAY`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"PERSON"', 'description': '"Friday is another character from a story mentioned by Scrooge during his nostalgic recollections of childhood."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:57,977 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE MANSION`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"GEO"', 'description': '"The Mansion is described as a large house of dull red brick, associated with Scrooge\'s childhood memories and reflecting the state of neglect."', 'source_id': 'chunk-8590d9263cd2cd4fa583c432fc54e6c3'}
+2024-10-29 13:33:58,568 - lightrag - INFO - _do_upsert:query:
+ MERGE (n:`THE SPIRIT`)
+ SET n += $properties
+ RETURN n
+ :result:{'entity_type': '"ORGANIZATION"', 'description': '"The Spirit is a ghostly figure guiding Scrooge on a journey to witness the moments of cheer and resilience among people during Christmas, influencing his perspective.""The Spirit is the entity that guides Scrooge through memories, prompting reflections on his past during the Christmas season."