molinfo-vienna · sedorfer · Jan 16, 2025 · Mar 29, 2024 · May 14, 2024 · May 14, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -17,7 +17,7 @@
 import sys
 sys.path.insert(0, os.path.abspath('..'))
 
-import graphRAG
+import graphRAG_system
 
 
 # -- Project information -----------------------------------------------------

diff --git a/graphRAG/benchmark_questions.txt b/graphRAG/benchmark_questions.txt
diff --git a/graphRAG/benchmark_results/benchmark_results_1.json b/graphRAG/benchmark_results/benchmark_results_1.json
diff --git a/graphRAG/benchmark_results/benchmark_results_10.json b/graphRAG/benchmark_results/benchmark_results_10.json
diff --git a/graphRAG/benchmark_results/benchmark_results_100.json b/graphRAG/benchmark_results/benchmark_results_100.json
diff --git a/graphRAG/benchmark_results/benchmark_results_11.json b/graphRAG/benchmark_results/benchmark_results_11.json
diff --git a/graphRAG/benchmark_results/benchmark_results_12.json b/graphRAG/benchmark_results/benchmark_results_12.json
diff --git a/graphRAG/benchmark_results/benchmark_results_13.json b/graphRAG/benchmark_results/benchmark_results_13.json
diff --git a/graphRAG/benchmark_results/benchmark_results_14.json b/graphRAG/benchmark_results/benchmark_results_14.json
diff --git a/graphRAG/benchmark_results/benchmark_results_15.json b/graphRAG/benchmark_results/benchmark_results_15.json
diff --git a/graphRAG/benchmark_results/benchmark_results_16.json b/graphRAG/benchmark_results/benchmark_results_16.json
diff --git a/graphRAG/benchmark_results/benchmark_results_17.json b/graphRAG/benchmark_results/benchmark_results_17.json
diff --git a/graphRAG/benchmark_results/benchmark_results_18.json b/graphRAG/benchmark_results/benchmark_results_18.json
diff --git a/graphRAG/benchmark_results/benchmark_results_19.json b/graphRAG/benchmark_results/benchmark_results_19.json
diff --git a/graphRAG/benchmark_results/benchmark_results_2.json b/graphRAG/benchmark_results/benchmark_results_2.json
diff --git a/graphRAG/benchmark_results/benchmark_results_20.json b/graphRAG/benchmark_results/benchmark_results_20.json
diff --git a/graphRAG/benchmark_results/benchmark_results_21.json b/graphRAG/benchmark_results/benchmark_results_21.json
diff --git a/graphRAG/benchmark_results/benchmark_results_22.json b/graphRAG/benchmark_results/benchmark_results_22.json
diff --git a/graphRAG/benchmark_results/benchmark_results_23.json b/graphRAG/benchmark_results/benchmark_results_23.json
diff --git a/graphRAG/benchmark_results/benchmark_results_24.json b/graphRAG/benchmark_results/benchmark_results_24.json
diff --git a/graphRAG/benchmark_results/benchmark_results_25.json b/graphRAG/benchmark_results/benchmark_results_25.json
diff --git a/graphRAG/benchmark_results/benchmark_results_26.json b/graphRAG/benchmark_results/benchmark_results_26.json
diff --git a/graphRAG/benchmark_results/benchmark_results_27.json b/graphRAG/benchmark_results/benchmark_results_27.json
diff --git a/graphRAG/benchmark_results/benchmark_results_28.json b/graphRAG/benchmark_results/benchmark_results_28.json
diff --git a/graphRAG/benchmark_results/benchmark_results_29.json b/graphRAG/benchmark_results/benchmark_results_29.json
diff --git a/graphRAG/benchmark_results/benchmark_results_3.json b/graphRAG/benchmark_results/benchmark_results_3.json
diff --git a/graphRAG/benchmark_results/benchmark_results_30.json b/graphRAG/benchmark_results/benchmark_results_30.json
diff --git a/graphRAG/benchmark_results/benchmark_results_31.json b/graphRAG/benchmark_results/benchmark_results_31.json
diff --git a/graphRAG/benchmark_results/benchmark_results_32.json b/graphRAG/benchmark_results/benchmark_results_32.json
diff --git a/graphRAG/benchmark_results/benchmark_results_33.json b/graphRAG/benchmark_results/benchmark_results_33.json
diff --git a/graphRAG/benchmark_results/benchmark_results_34.json b/graphRAG/benchmark_results/benchmark_results_34.json
diff --git a/graphRAG/benchmark_results/benchmark_results_35.json b/graphRAG/benchmark_results/benchmark_results_35.json
diff --git a/graphRAG/benchmark_results/benchmark_results_36.json b/graphRAG/benchmark_results/benchmark_results_36.json
diff --git a/graphRAG/benchmark_results/benchmark_results_37.json b/graphRAG/benchmark_results/benchmark_results_37.json
diff --git a/graphRAG/benchmark_results/benchmark_results_38.json b/graphRAG/benchmark_results/benchmark_results_38.json
diff --git a/graphRAG/benchmark_results/benchmark_results_39.json b/graphRAG/benchmark_results/benchmark_results_39.json
diff --git a/graphRAG/benchmark_results/benchmark_results_4.json b/graphRAG/benchmark_results/benchmark_results_4.json
diff --git a/graphRAG/benchmark_results/benchmark_results_40.json b/graphRAG/benchmark_results/benchmark_results_40.json
diff --git a/graphRAG/benchmark_results/benchmark_results_41.json b/graphRAG/benchmark_results/benchmark_results_41.json
diff --git a/graphRAG/benchmark_results/benchmark_results_42.json b/graphRAG/benchmark_results/benchmark_results_42.json
diff --git a/graphRAG/benchmark_results/benchmark_results_43.json b/graphRAG/benchmark_results/benchmark_results_43.json
diff --git a/graphRAG/benchmark_results/benchmark_results_44.json b/graphRAG/benchmark_results/benchmark_results_44.json
diff --git a/graphRAG/benchmark_results/benchmark_results_45.json b/graphRAG/benchmark_results/benchmark_results_45.json
diff --git a/graphRAG/benchmark_results/benchmark_results_46.json b/graphRAG/benchmark_results/benchmark_results_46.json
diff --git a/graphRAG/benchmark_results/benchmark_results_47.json b/graphRAG/benchmark_results/benchmark_results_47.json
diff --git a/graphRAG/benchmark_results/benchmark_results_48.json b/graphRAG/benchmark_results/benchmark_results_48.json
diff --git a/graphRAG/benchmark_results/benchmark_results_49.json b/graphRAG/benchmark_results/benchmark_results_49.json
diff --git a/graphRAG/benchmark_results/benchmark_results_5.json b/graphRAG/benchmark_results/benchmark_results_5.json
diff --git a/graphRAG/benchmark_results/benchmark_results_50.json b/graphRAG/benchmark_results/benchmark_results_50.json
diff --git a/graphRAG/benchmark_results/benchmark_results_51.json b/graphRAG/benchmark_results/benchmark_results_51.json
diff --git a/graphRAG/benchmark_results/benchmark_results_52.json b/graphRAG/benchmark_results/benchmark_results_52.json
diff --git a/graphRAG/benchmark_results/benchmark_results_53.json b/graphRAG/benchmark_results/benchmark_results_53.json
diff --git a/graphRAG/benchmark_results/benchmark_results_54.json b/graphRAG/benchmark_results/benchmark_results_54.json
diff --git a/graphRAG/benchmark_results/benchmark_results_55.json b/graphRAG/benchmark_results/benchmark_results_55.json
diff --git a/graphRAG/benchmark_results/benchmark_results_56.json b/graphRAG/benchmark_results/benchmark_results_56.json
diff --git a/graphRAG/benchmark_results/benchmark_results_57.json b/graphRAG/benchmark_results/benchmark_results_57.json
diff --git a/graphRAG/benchmark_results/benchmark_results_58.json b/graphRAG/benchmark_results/benchmark_results_58.json
diff --git a/graphRAG/benchmark_results/benchmark_results_59.json b/graphRAG/benchmark_results/benchmark_results_59.json
diff --git a/graphRAG/benchmark_results/benchmark_results_6.json b/graphRAG/benchmark_results/benchmark_results_6.json
diff --git a/graphRAG/benchmark_results/benchmark_results_60.json b/graphRAG/benchmark_results/benchmark_results_60.json
diff --git a/graphRAG/benchmark_results/benchmark_results_61.json b/graphRAG/benchmark_results/benchmark_results_61.json
diff --git a/graphRAG/benchmark_results/benchmark_results_62.json b/graphRAG/benchmark_results/benchmark_results_62.json
diff --git a/graphRAG/benchmark_results/benchmark_results_63.json b/graphRAG/benchmark_results/benchmark_results_63.json
diff --git a/graphRAG/benchmark_results/benchmark_results_64.json b/graphRAG/benchmark_results/benchmark_results_64.json
diff --git a/graphRAG/benchmark_results/benchmark_results_65.json b/graphRAG/benchmark_results/benchmark_results_65.json
diff --git a/graphRAG/benchmark_results/benchmark_results_66.json b/graphRAG/benchmark_results/benchmark_results_66.json
diff --git a/graphRAG/benchmark_results/benchmark_results_67.json b/graphRAG/benchmark_results/benchmark_results_67.json
diff --git a/graphRAG/benchmark_results/benchmark_results_68.json b/graphRAG/benchmark_results/benchmark_results_68.json
diff --git a/graphRAG/benchmark_results/benchmark_results_69.json b/graphRAG/benchmark_results/benchmark_results_69.json
diff --git a/graphRAG/benchmark_results/benchmark_results_7.json b/graphRAG/benchmark_results/benchmark_results_7.json
diff --git a/graphRAG/benchmark_results/benchmark_results_70.json b/graphRAG/benchmark_results/benchmark_results_70.json
diff --git a/graphRAG/benchmark_results/benchmark_results_71.json b/graphRAG/benchmark_results/benchmark_results_71.json
diff --git a/graphRAG/benchmark_results/benchmark_results_72.json b/graphRAG/benchmark_results/benchmark_results_72.json
diff --git a/graphRAG/benchmark_results/benchmark_results_73.json b/graphRAG/benchmark_results/benchmark_results_73.json
diff --git a/graphRAG/benchmark_results/benchmark_results_74.json b/graphRAG/benchmark_results/benchmark_results_74.json
diff --git a/graphRAG/benchmark_results/benchmark_results_75.json b/graphRAG/benchmark_results/benchmark_results_75.json
diff --git a/graphRAG/benchmark_results/benchmark_results_76.json b/graphRAG/benchmark_results/benchmark_results_76.json
diff --git a/graphRAG/benchmark_results/benchmark_results_77.json b/graphRAG/benchmark_results/benchmark_results_77.json
diff --git a/graphRAG/benchmark_results/benchmark_results_78.json b/graphRAG/benchmark_results/benchmark_results_78.json
diff --git a/graphRAG/benchmark_results/benchmark_results_79.json b/graphRAG/benchmark_results/benchmark_results_79.json
diff --git a/graphRAG/benchmark_results/benchmark_results_8.json b/graphRAG/benchmark_results/benchmark_results_8.json
diff --git a/graphRAG/benchmark_results/benchmark_results_80.json b/graphRAG/benchmark_results/benchmark_results_80.json
diff --git a/graphRAG/benchmark_results/benchmark_results_81.json b/graphRAG/benchmark_results/benchmark_results_81.json
diff --git a/graphRAG/benchmark_results/benchmark_results_82.json b/graphRAG/benchmark_results/benchmark_results_82.json
diff --git a/graphRAG/benchmark_results/benchmark_results_83.json b/graphRAG/benchmark_results/benchmark_results_83.json
diff --git a/graphRAG/benchmark_results/benchmark_results_84.json b/graphRAG/benchmark_results/benchmark_results_84.json
diff --git a/graphRAG/benchmark_results/benchmark_results_85.json b/graphRAG/benchmark_results/benchmark_results_85.json
diff --git a/graphRAG/benchmark_results/benchmark_results_86.json b/graphRAG/benchmark_results/benchmark_results_86.json
diff --git a/graphRAG/benchmark_results/benchmark_results_87.json b/graphRAG/benchmark_results/benchmark_results_87.json
diff --git a/graphRAG/benchmark_results/benchmark_results_88.json b/graphRAG/benchmark_results/benchmark_results_88.json
diff --git a/graphRAG/benchmark_results/benchmark_results_89.json b/graphRAG/benchmark_results/benchmark_results_89.json
diff --git a/graphRAG/benchmark_results/benchmark_results_9.json b/graphRAG/benchmark_results/benchmark_results_9.json
diff --git a/graphRAG/benchmark_results/benchmark_results_90.json b/graphRAG/benchmark_results/benchmark_results_90.json
diff --git a/graphRAG/benchmark_results/benchmark_results_91.json b/graphRAG/benchmark_results/benchmark_results_91.json
diff --git a/graphRAG/benchmark_results/benchmark_results_92.json b/graphRAG/benchmark_results/benchmark_results_92.json
diff --git a/graphRAG/benchmark_results/benchmark_results_93.json b/graphRAG/benchmark_results/benchmark_results_93.json
diff --git a/graphRAG/benchmark_results/benchmark_results_94.json b/graphRAG/benchmark_results/benchmark_results_94.json
diff --git a/graphRAG/benchmark_results/benchmark_results_95.json b/graphRAG/benchmark_results/benchmark_results_95.json
diff --git a/graphRAG/benchmark_results/benchmark_results_96.json b/graphRAG/benchmark_results/benchmark_results_96.json
diff --git a/graphRAG/benchmark_results/benchmark_results_97.json b/graphRAG/benchmark_results/benchmark_results_97.json
diff --git a/graphRAG/benchmark_results/benchmark_results_98.json b/graphRAG/benchmark_results/benchmark_results_98.json
diff --git a/graphRAG/benchmark_results/benchmark_results_99.json b/graphRAG/benchmark_results/benchmark_results_99.json
diff --git a/graphRAG/config.py b/graphRAG/config.py
@@ -0,0 +1,7 @@
+import os 
+
+neo4j_uri = "neo4j+s://4de35fba.databases.neo4j.io"  
+neo4j_user = "neo4j"  
+neo4j_password = "87YkRGzIftmB-QU8CvYcLNzHZeFAZkeEQpwtZTEa4PU"  
+# neo4j_password = os.getenv("NEO4J_PW") in the future, you should use this and set the password as an environment variable
+# but to make it easily runnable i left it like this 
diff --git a/graphRAG/data/README.md b/graphRAG/data/README.md
diff --git a/graphRAG/data/look_and_say.dat b/graphRAG/data/look_and_say.dat
diff --git a/graphRAG/evaluation.py b/graphRAG/evaluation.py
@@ -0,0 +1,74 @@
+import os
+import json
+import copy
+from utils.evaluation_utils import calculate_metrics, print_metrics, print_comparison_dict, compare_result
+
+
+def testset_evaluation(mode: str, directory: str) -> None:
+    # evaluates the different categories of the testset evaluation for either "manual" mode or "automated" for all the files in the directory 
+    if mode not in ["manual", "automated"]: 
+        raise ValueError("Mode must be one of the following: manual, automated.")
+    categories = ["cypher", "context", "answer", "code", "overall"]
+    metrics_template = {cat: {"accuracy": [], "precision": [], "recall": [], "f1": [], "counts": []} for cat in categories} # template to store the results in
+    metrics_specific = copy.deepcopy(metrics_template)
+    metrics_general = copy.deepcopy(metrics_template)
+
+    files = sorted([f for f in os.listdir(directory) if f.endswith('.json')], key=lambda x: int(x.split('_')[2].split('.')[0])) # sort the result files by ending
+
+
+    if mode == "manual": 
+        files = files[:20]
+    for file in files:
+        file_path = os.path.join(directory, file)
+        with open(file_path, "r") as f:
+            results = json.load(f)
+
+        # calculate the metrics for the specific and the general questions
+        calculate_metrics(results[:17], metrics_specific, categories, mode) 
+        calculate_metrics(results[17:], metrics_general, categories, mode)
+
+    print_metrics("SPECIFIC", metrics_specific)
+    print_metrics("GENERAL", metrics_general)
+
+
+def compare_manual_automated(directory: str) -> None: 
+    # compares the result of the automated evaluation with that of the manual one for each category
+    categories = ["cypher", "context", "answer", "code", "overall"]
+    # the manual evaluation is the 'ground truth' and the dict will show how often a score has been correctly classified/missclassified as something else
+    comparison_template = {cat: {"tp": {"tp": 0, "tn": 0, "fp": 0, "fn": 0}, "tn": {"tp": 0, "tn": 0, "fp": 0, "fn": 0}, "fp": {"tp": 0, "tn": 0, "fp": 0, "fn": 0}, "fn": {"tp": 0, "tn": 0, "fp": 0, "fn": 0}} for cat in categories}
+    comparison_specific = copy.deepcopy(comparison_template)
+    comparison_general = copy.deepcopy(comparison_template)
+
+    files = sorted([f for f in os.listdir(directory) if f.endswith('.json')], key=lambda x: int(x.split('_')[2].split('.')[0]))
+
+    for file in files[:20]:
+        file_path = os.path.join(directory, file)
+        with open(file_path, "r") as f:
+            results = json.load(f)
+
+        for q in results[:17]:
+            if "score_context_manual:" in q.keys(): 
+                q["score_context_manual"] = q["score_context_manual:"] # correct a naming error
+            for cat in categories: 
+                compare_result(cat, comparison_specific, q[f"score_{cat}_manual"], q[f"score_{cat}_automated"])
+        for q in results[17: ]:
+            if "score_context_manual:" in q.keys(): 
+                q["score_context_manual"] = q["score_context_manual:"]
+            for cat in categories: 
+                compare_result(cat, comparison_general, q[f"score_{cat}_manual"], q[f"score_{cat}_automated"])
+
+    print("#### SPECIFIC:")
+    print_comparison_dict(comparison_specific)
+    print("#### GENERAL:")
+    print_comparison_dict(comparison_general)
+
+
+
+
+if __name__ == "__main__": 
+    directory = "/data/shared/projects/graphRAG/graphRAG/graphRAG/evaluation/benchmark_results"
+    # print("######### MANUAL ########")
+    # testset_evaluation("manual", directory)
+    # print("######### AUTOMATED ########")
+    # testset_evaluation("automated", directory)
+    compare_manual_automated(directory=directory)
diff --git a/graphRAG/generator.py b/graphRAG/generator.py
@@ -0,0 +1,48 @@
+from transformers.pipelines.text_generation import TextGenerationPipeline
+
+def generate_answer_qwen(user_prompt: str, system_prompt: str, pipe: TextGenerationPipeline, **kwargs: dict) -> str:
+    # https://medium.com/@silviaonofrei/code-llamas-knowledge-of-neo4j-s-cypher-query-language-54783d2ad421
+    full_prompt = f"System: {system_prompt}\nUser: {user_prompt}\nAssistant:" # combine the system and user prompt into a from that is easily understoof by Qwen
+
+    if "max_new_tokens" not in kwargs:
+        kwargs["max_new_tokens"] = 512 # Set a default max_new_tokens if not provided
+
+    # generate the answer
+    output = pipe(full_prompt,
+                  do_sample=True, # enables sampling and a more varied generation
+                  top_k=5, # take the top 5 most likely tokens at each generation step
+                  top_p=0.9, 
+                  temperature = 0.7, # controls randomness of sampling
+                  **kwargs
+                  )
+
+    output = output[0]["generated_text"] # Extract the relevant part of the generated text
+    return output.split('Assistant:', 1)[1].strip() # takes only the portion of the text after the "Assistant: "
+
+
+def generate_rag_prompt(retrieved_context: str, cypher_query: str) -> str:
+    # takes the cypher query and retrieved context and creates a system prompt
+    # positive examples are added as well as clear instructions
+    return """
+
+    You are a highly intelligent assistant. Your job is to answer user questions using *only* the information from the retrieved context provided from a Neo4j knowledge graph database. If appropriate, add a small python example for the retrieved context, but no cypher queries. Only add this Python code if it is appropriate for the question.  
+    The retrieved context is the result of a cypher query.
+    Ensure that your response strictly relies on the retrieved context, and do not add any information from other sources.
+
+    Cypher query: 
+    {cypher_query}
+    Retrieved Context: 
+    {retrieved_context}
+
+    ### Example 1: 
+    Q: What methods does AromaticSubstructure have?
+    Cypher query: MATCH (c:Class {{name: 'AromaticSubstructure'}})-[:HAS]->(f:Function) RETURN f.name, f.comment
+    Retrieved Context: [['f.name', 'f.comment'], ['__init__', 'Constructs an empty <tt>AromaticSubstructure</tt> instance.'], ['__init__', 'Construct a <tt>AromaticSubstructure</tt> instance that consists of the aromatic atoms and bonds of the molecular graph <em>molgraph</em>.'], ['perceive', 'Replaces the currently stored atoms and bonds by the set of aromatic atoms and bonds of the molecular graph <em>molgraph</em>.']]
+    A: AromaticSubstructure has the following methods:
+    - __init__: Constructs an empty <tt>AromaticSubstructure</tt> instance.
+    - __init__: Construct a <tt>AromaticSubstructure</tt> instance that consists of the aromatic atoms and bonds of the molecular graph <em>molgraph</em>.
+    - perceive: Replaces the currently stored atoms and bonds by the set of aromatic atoms and bonds of the molecular graph <em>molgraph</em>.
+    """.format(retrieved_context = retrieved_context, cypher_query = cypher_query) 
+
+
+
diff --git a/graphRAG/graphRAG.py b/graphRAG/graphRAG.py
@@ -1,29 +1,90 @@
-"""Provide the primary functions."""
+import os
+os.environ['HF_HOME'] = '/data/local/sschoendorfer'
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 
+from utils.rag_utils import initialize_neo4j, get_kg_schema, get_pipeline_from_model
+from retriever import retrieve_context
+from generator import generate_rag_prompt, generate_answer_qwen
+import json
+import re 
+from transformers.pipelines.text_generation import TextGenerationPipeline
 
-def canvas(with_attribution=True):
-    """
-    Placeholder function to show example docstring (NumPy format).
+# Base idea from this article: 
+# https://medium.com/@silviaonofrei/code-llamas-knowledge-of-neo4j-s-cypher-query-language-54783d2ad421
 
-    Replace this function and doc string for your own project.
 
-    Parameters
-    ----------
-    with_attribution : bool, Optional, default: True
-        Set whether or not to display who the quote is from.
+def question_rag(user_prompt: str, pipe_cypher: TextGenerationPipeline, pipe_answer: TextGenerationPipeline) -> tuple[str, str]:
+    # function to pass a user prompt to the Graph RAG system
+    driver = initialize_neo4j() # initialize the neo4j driver to communicate with the Knowledge Graph
+    schema = get_kg_schema() # get the KG schema 
+    try: 
+        query_result, cypher_query = retrieve_context(driver, user_prompt, pipe_cypher, schema)
+    except Exception as e: 
+        print("Exception while retrieving context: ", e)
+        query_result = "Context could not be retrieved" # if there is an exception, the query was not functional
+        cypher_query = "None" # set it to None to flag for non-runnable queries during benchmarking 
+
+    system_prompt_rag = generate_rag_prompt(query_result, cypher_query) # get the final system prompt for the rag
 
-    Returns
-    -------
-    quote : str
-        Compiled string including quote and optional attribution.
-    """
+    final_answer = generate_answer_qwen(user_prompt, system_prompt_rag, pipe_answer) # generate the final answer 
 
-    quote = "The code is but a canvas to our imagination."
-    if with_attribution:
-        quote += "\n\t- Adapted from Henry David Thoreau"
-    return quote
+    return cypher_query, query_result, final_answer
+
+
+def benchmark_rag(pipe_cypher: TextGenerationPipeline, pipe_answer: TextGenerationPipeline) -> None:
+    # function for running the benchmark questions 
+    with open("/data/shared/projects/graphRAG/graphRAG/graphRAG/benchmark_questions.txt", "r") as f: 
+        testset = f.read()
+
+    pattern = r"Q:\s*(.+?)\nQuery:\s*(.+?)\nC:\s*(.+?)\nA:\s*(.+?)(?=\nQ:|\Z)"
+
+    # Find all matches
+    matches = re.findall(pattern, testset, re.DOTALL)
+
+    # Parse into a list of dictionaries
+    parsed_questions = [
+        {"Question": match[0].strip(), "Query": match[1].strip(), "Context": match[2].strip(), "Answer": match[3].strip()}
+        for match in matches
+    ]
+
+    for i in range(97, 100): 
+        benchmark = []
+        for question in parsed_questions:
+            cypher_query, query_result, final_answer = question_rag(question["Question"], pipe_cypher, pipe_answer)
+            benchmark.append({"user_prompt": question["Question"], "cypher_query": cypher_query, 
+                              "retrieved_context": query_result, 
+                                "final_answer": final_answer,
+                                "model_cypher": question["Query"], "model_answer": question["Answer"],
+                                "model_context": question["Context"],
+                                "score_cypher_automated": "", 
+                                "score_context_automated":"",
+                                "score_answer_automated": "", 
+                                "score_code_automated":"",
+                                "score_overall_automated": "", 
+                                "score_cypher_manual": "", 
+                                "score_context_manual":"",
+                                "score_answer_manual": "",
+                                "score_code_manual":"", 
+                                "score_overall_manual": ""
+                                })
+
+        with open(f"/data/shared/projects/graphRAG/graphRAG/graphRAG/benchmark_results/benchmark_results_{i+1}.json", "w") as file:
+            json.dump(benchmark, file, indent=4)
 
 
 if __name__ == "__main__":
-    # Do something if this file is invoked on its own
-    print(canvas())
+    model_cypher = "codellama/CodeLlama-13b-Instruct-hf"
+
+    model_answer = "Qwen/Qwen2.5-7B-Instruct"
+
+    pipe_cypher = get_pipeline_from_model(model_cypher)
+
+    pipe_answer = get_pipeline_from_model(model_answer)
+
+    _, _, answer = question_rag("How can I read in molecules", pipe_cypher, pipe_answer)
+
+    print(answer)
+
+    # benchmark_rag(pipe_cypher, pipe_answer)
+
+