Merge branch 'main' into distil

instructor-ai · Oct 15, 2023 · 0149d5f · 0149d5f
2 parents 6d78cdf + 41c4636
commit 0149d5f
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 3 deletions.
diff --git a/examples/knowledge-graph/final.png b/examples/knowledge-graph/final.png
diff --git a/examples/knowledge-graph/iteration_0.png b/examples/knowledge-graph/iteration_0.png
diff --git a/examples/knowledge-graph/iteration_1.png b/examples/knowledge-graph/iteration_1.png
diff --git a/examples/knowledge-graph/iteration_2.png b/examples/knowledge-graph/iteration_2.png
diff --git a/examples/knowledge-graph/iteration_3.png b/examples/knowledge-graph/iteration_3.png
diff --git a/examples/knowledge-graph/run_stream.py b/examples/knowledge-graph/run_stream.py
@@ -0,0 +1,105 @@
+import openai
+import instructor
+
+from graphviz import Digraph
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+instructor.patch()
+
+
+class Node(BaseModel):
+    id: int
+    label: str
+    color: str
+
+    def __hash__(self) -> int:
+        return hash((id, self.label))
+
+
+class Edge(BaseModel):
+    source: int
+    target: int
+    label: str
+    color: str = "black"
+
+    def __hash__(self) -> int:
+        return hash((self.source, self.target, self.label))
+
+
+class KnowledgeGraph(BaseModel):
+    nodes: Optional[List[Node]] = Field(..., default_factory=list)
+    edges: Optional[List[Edge]] = Field(..., default_factory=list)
+
+    def update(self, other: "KnowledgeGraph") -> "KnowledgeGraph":
+        """Updates the current graph with the other graph, deduplicating nodes and edges."""
+        return KnowledgeGraph(
+            nodes=list(set(self.nodes + other.nodes)),
+            edges=list(set(self.edges + other.edges)),
+        )
+
+    def draw(self, prefix: str = None):
+        dot = Digraph(comment="Knowledge Graph")
+
+        # Add nodes
+        for node in self.nodes:
+            dot.node(str(node.id), node.label, color=node.color)
+
+        # Add edges
+        for edge in self.edges:
+            dot.edge(
+                str(edge.source), str(edge.target), label=edge.label, color=edge.color
+            )
+        dot.render(prefix, format="png", view=True)
+
+
+def generate_graph(input: List[str]) -> KnowledgeGraph:
+    cur_state = KnowledgeGraph()
+
+    num_iterations = len(input)
+
+    for i, inp in enumerate(input):
+        new_updates = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo-16k",
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"""You are an iterative knowledge graph builder.
+                    You are given the current state of the graph, and you must append the nodes and edges 
+                    to it Do not procide any duplcates and try to reuse nodes as much as possible.""",
+                },
+                {
+                    "role": "user",
+                    "content": f"""Extract any new nodes and edges from the following:
+                    # Part {i}/{num_iterations} of the input:
+
+                    {inp}""",
+                },
+                {
+                    "role": "user",
+                    "content": f"""Here is the current state of the graph:
+                    {cur_state.model_dump_json(indent=2)}""",
+                },
+            ],
+            response_model=KnowledgeGraph,
+        )  # type: ignore
+
+        # Update the current state
+        cur_state = cur_state.update(new_updates)
+        cur_state.draw(prefix=f"iteration_{i}")
+    return cur_state
+
+
+# here we assume that we have to process the text in chunks
+# one at a time since they may not fit in the prompt otherwise
+text_chunks = [
+    "Jason knows a lot about quantum mechanics. He is a physicist. He is a professor",
+    "Professors are smart.",
+    "Sarah knows Jason and is a student of his.",
+    "Sarah is a student at the University of Toronto. and UofT is in Canada.",
+]
+
+graph: KnowledgeGraph = generate_graph(text_chunks)
+
+graph.draw(prefix="final")
diff --git a/instructor/__init__.py b/instructor/__init__.py
@@ -1,7 +1,7 @@
 from .function_calls import OpenAISchema, openai_function, openai_schema
 from .distil import FinetuneFormat, Instructions
 from .dsl import MultiTask, Maybe, llm_validator, CitationMixin
-from .patch import patch
+from .patch import patch, unpatch
 
 __all__ = [
     "OpenAISchema",
@@ -14,4 +14,5 @@
     "llm_validator",
     "FinetuneFormat",
     "Instructions",
+    "unpatch",
 ]
diff --git a/instructor/patch.py b/instructor/patch.py
@@ -103,7 +103,7 @@ def wrap_chatcompletion(func: Callable) -> Callable:
 
     @wraps(func)
     async def new_chatcompletion_async(
-        response_model=None, validation_context=None, *args, max_retries=0, **kwargs
+        response_model=None, validation_context=None, *args, max_retries=1, **kwargs
     ):
         response_model, new_kwargs = handle_response_model(response_model, kwargs)  # type: ignore
         response, error = await retry_async(
@@ -120,7 +120,7 @@ async def new_chatcompletion_async(
 
     @wraps(func)
     def new_chatcompletion_sync(
-        response_model=None, validation_context=None, *args, max_retries=0, **kwargs
+        response_model=None, validation_context=None, *args, max_retries=1, **kwargs
     ):
         response_model, new_kwargs = handle_response_model(response_model, kwargs)  # type: ignore
         response, error = retry_sync(