From 785db0a772d4d318a100e98288335ef5b610a2f0 Mon Sep 17 00:00:00 2001
From: Michael Ilie <mcilieg@gmail.com>
Date: Mon, 13 May 2024 15:43:00 -0400
Subject: [PATCH 1/5] Increase font size for consistenct

---
 .../experiments/graph_dataset_citations.py           | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/prompt_systematic_review/experiments/graph_dataset_citations.py b/src/prompt_systematic_review/experiments/graph_dataset_citations.py
index 3c0463b..e3a6dc9 100644
--- a/src/prompt_systematic_review/experiments/graph_dataset_citations.py
+++ b/src/prompt_systematic_review/experiments/graph_dataset_citations.py
@@ -77,10 +77,11 @@ def graph_dataset_citations():
 
     plt.figure(figsize=(10, 6))
     plt.bar(datasets, counts, color="#2E8991")
-    plt.xlabel("Dataset Name")
-    plt.ylabel("Number of Mentions")
-    plt.title("Dataset Mentions in Papers")
-    plt.xticks(rotation=45, ha="right")
+    plt.xlabel("Dataset Name",fontsize=20)
+    plt.ylabel("Number of Mentions",fontsize=20)
+    plt.title("Dataset Mentions in Papers",fontsize=30)
+    plt.xticks(rotation=45, ha="right",fontsize=15)
+    plt.yticks(fontsize=15)
     plt.tight_layout()
 
     output_dir = os.path.join(DataFolderPath, "experiments_output")
@@ -95,6 +96,5 @@ class Experiment:
     def run():
         graph_dataset_citations()
 
-
 if __name__ == "__main__":
-    graph_dataset_citations()
+    graph_dataset_citations()
\ No newline at end of file

From e047148f6474bbe64c83a7bf53e03979871234b4 Mon Sep 17 00:00:00 2001
From: Michael Ilie <mcilieg@gmail.com>
Date: Wed, 29 May 2024 11:08:16 -0400
Subject: [PATCH 2/5] formatted

---
 .../experiments/graph_dataset_citations.py            | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/prompt_systematic_review/experiments/graph_dataset_citations.py b/src/prompt_systematic_review/experiments/graph_dataset_citations.py
index e3a6dc9..5e7adfd 100644
--- a/src/prompt_systematic_review/experiments/graph_dataset_citations.py
+++ b/src/prompt_systematic_review/experiments/graph_dataset_citations.py
@@ -77,10 +77,10 @@ def graph_dataset_citations():
 
     plt.figure(figsize=(10, 6))
     plt.bar(datasets, counts, color="#2E8991")
-    plt.xlabel("Dataset Name",fontsize=20)
-    plt.ylabel("Number of Mentions",fontsize=20)
-    plt.title("Dataset Mentions in Papers",fontsize=30)
-    plt.xticks(rotation=45, ha="right",fontsize=15)
+    plt.xlabel("Dataset Name", fontsize=20)
+    plt.ylabel("Number of Mentions", fontsize=20)
+    plt.title("Dataset Mentions in Papers", fontsize=30)
+    plt.xticks(rotation=45, ha="right", fontsize=15)
     plt.yticks(fontsize=15)
     plt.tight_layout()
 
@@ -96,5 +96,6 @@ class Experiment:
     def run():
         graph_dataset_citations()
 
+
 if __name__ == "__main__":
-    graph_dataset_citations()
\ No newline at end of file
+    graph_dataset_citations()

From 255332646a4d8b9034347b6c53f733d91d155674 Mon Sep 17 00:00:00 2001
From: hudssntao <hudsontao@gmail.com>
Date: Wed, 29 May 2024 11:16:25 -0400
Subject: [PATCH 3/5] fix: update experiments

---
 README.md                                     | 19 ++++-----
 .../experiments/__init__.py                   |  7 +++-
 .../experiments/download_mmlu.py              | 41 +++++++++++--------
 .../experiments/graph.py                      |  7 +++-
 .../experiments/graph_internal_references.py  |  8 +++-
 5 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 8e090e6..369839f 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ For HF: https://huggingface.co/docs/hub/security-tokens, also run `huggingface-c
 Put your key in like:
 
 `OPENAI_API_KEY=sk-...`
+`SEMANTIC_SCHOLAR_API_KEY=...`
 `HF_TOKEN=...`
 
 Then to load the .env file, type:
@@ -24,16 +25,14 @@ py.test --envfile path/to/.env
 In the case that you have several .env files, create a new env_files in the pytest config folder and type:
 
 env_files =
-    .env
-    .test.env
-    .deploy.env
-    
+.env
+.test.env
+.deploy.env
+
 ## blacklist.csv
 
 Papers we should not include due to being poorly written or AI generated
 
-
-
 ## Notes
 
 - Sometimes a paper title may appear differently on the arXiv API. For example, "Visual Attention-Prompted Prediction and Learning" (arXiv:2310.08420), according to arXiv API is titled "A visual encoding model based on deep neural networks and transfer learning"
@@ -41,8 +40,6 @@ Papers we should not include due to being poorly written or AI generated
 - When testing APIs, there may be latency and aborted connections
 
 - Publication dates of papers from IEEE are missing the day about half the time. They also may come in any of the following formats
-    - "April 1988"
-    - "2-4 April 2002"
-    - "29 Nov.-2 Dec. 2022"
- 
-
+  - "April 1988"
+  - "2-4 April 2002"
+  - "29 Nov.-2 Dec. 2022"
diff --git a/src/prompt_systematic_review/experiments/__init__.py b/src/prompt_systematic_review/experiments/__init__.py
index 986de33..79cdf78 100644
--- a/src/prompt_systematic_review/experiments/__init__.py
+++ b/src/prompt_systematic_review/experiments/__init__.py
@@ -12,7 +12,9 @@
 from . import graph_gpt_3_5_benchmarks
 from . import run_tomotopy
 from . import topicgpt
-
+from . import download_mmlu
+from . import graph_internal_references
+from . import graph
 
 experiments = [
     count_tool_mentions.Experiment,
@@ -28,4 +30,7 @@
     graph_gpt_3_5_benchmarks.Experiment,
     run_tomotopy.Experiment,
     topicgpt.Experiment,
+    download_mmlu.Experiment,
+    graph_internal_references.Experiment,
+    graph.Experiment,
 ]
diff --git a/src/prompt_systematic_review/experiments/download_mmlu.py b/src/prompt_systematic_review/experiments/download_mmlu.py
index 854ca57..bbb9520 100644
--- a/src/prompt_systematic_review/experiments/download_mmlu.py
+++ b/src/prompt_systematic_review/experiments/download_mmlu.py
@@ -29,27 +29,32 @@ def move_and_rename_extracted_contents(extracted_folder, final_folder, new_folde
 
     return mmlu_folder
 
+def download_mmlu():
+    # URL of the .tar file
+    url = "https://people.eecs.berkeley.edu/~hendrycks/data.tar"
 
-# URL of the .tar file
-url = "https://people.eecs.berkeley.edu/~hendrycks/data.tar"
+    # Temporary paths
+    download_path = "./data.tar"
+    extract_path = "./extracted"
 
-# Temporary paths
-download_path = "./data.tar"
-extract_path = "./extracted"
+    # Final path
+    final_data_folder = "./data"
+    final_folder_name = "mmlu"
 
-# Final path
-final_data_folder = "./data"
-final_folder_name = "mmlu"
+    # Download and extract the file
+    download_and_extract(url, download_path)
+    extract_tar(download_path, extract_path)
 
-# Download and extract the file
-download_and_extract(url, download_path)
-extract_tar(download_path, extract_path)
+    # Move and rename the contents of the extracted folder
+    move_and_rename_extracted_contents(extract_path, final_data_folder, final_folder_name)
+
+    # Cleanup
+    if os.path.exists(download_path):
+        os.remove(download_path)
+    if os.path.exists(extract_path):
+        shutil.rmtree(extract_path)
 
-# Move and rename the contents of the extracted folder
-move_and_rename_extracted_contents(extract_path, final_data_folder, final_folder_name)
 
-# Cleanup
-if os.path.exists(download_path):
-    os.remove(download_path)
-if os.path.exists(extract_path):
-    shutil.rmtree(extract_path)
+class Experiment:
+    def run():
+        download_mmlu()
\ No newline at end of file
diff --git a/src/prompt_systematic_review/experiments/graph.py b/src/prompt_systematic_review/experiments/graph.py
index 6893695..5effa69 100644
--- a/src/prompt_systematic_review/experiments/graph.py
+++ b/src/prompt_systematic_review/experiments/graph.py
@@ -102,7 +102,7 @@ def run(self, csv_file_path, technique_to_title):
         )
 
 
-if __name__ == "__main__":
+def run_graph():
     main = Main()
     titles = [
         "Bounding the Capabilities of Large Language Models in Open Text Generation with Prompt Constraints",
@@ -208,3 +208,8 @@ def run(self, csv_file_path, technique_to_title):
 
     csv_file_path = "path_to_your_csv.csv"
     main.run(csv_file_path, technique_to_title)
+
+
+class Experiment:
+    def run():
+        run_graph()
\ No newline at end of file
diff --git a/src/prompt_systematic_review/experiments/graph_internal_references.py b/src/prompt_systematic_review/experiments/graph_internal_references.py
index a2a6795..d9d9f8d 100644
--- a/src/prompt_systematic_review/experiments/graph_internal_references.py
+++ b/src/prompt_systematic_review/experiments/graph_internal_references.py
@@ -7,7 +7,6 @@
 from dotenv import load_dotenv
 import csv
 import random
-import scipy
 import networkx as nx
 import matplotlib.pyplot as plt
 import textwrap
@@ -428,7 +427,7 @@ def visualize_chart(self, technique_to_title):
         )
 
 
-if __name__ == "__main__":
+def graph_internal_references():
     main = Main()
 
     titles = [
@@ -533,3 +532,8 @@ def visualize_chart(self, technique_to_title):
         "Rephrase and Respond: Let Large Language Models Ask Better Questions for Themselves": "Rephrase and Respond",
     }
     main.visualize_chart(technique_to_title)
+    
+    
+class Experiment:
+    def run():
+        graph_internal_references()

From 24068607df192d60d9751b59c169268378b31aff Mon Sep 17 00:00:00 2001
From: hudssntao <hudsontao@gmail.com>
Date: Wed, 29 May 2024 11:21:44 -0400
Subject: [PATCH 4/5] style: black

---
 .../experiments/download_mmlu.py                 |  7 +++++--
 .../experiments/graph.py                         |  2 +-
 .../experiments/graph_internal_references.py     | 16 ++++++++--------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/prompt_systematic_review/experiments/download_mmlu.py b/src/prompt_systematic_review/experiments/download_mmlu.py
index bbb9520..45a143d 100644
--- a/src/prompt_systematic_review/experiments/download_mmlu.py
+++ b/src/prompt_systematic_review/experiments/download_mmlu.py
@@ -29,6 +29,7 @@ def move_and_rename_extracted_contents(extracted_folder, final_folder, new_folde
 
     return mmlu_folder
 
+
 def download_mmlu():
     # URL of the .tar file
     url = "https://people.eecs.berkeley.edu/~hendrycks/data.tar"
@@ -46,7 +47,9 @@ def download_mmlu():
     extract_tar(download_path, extract_path)
 
     # Move and rename the contents of the extracted folder
-    move_and_rename_extracted_contents(extract_path, final_data_folder, final_folder_name)
+    move_and_rename_extracted_contents(
+        extract_path, final_data_folder, final_folder_name
+    )
 
     # Cleanup
     if os.path.exists(download_path):
@@ -57,4 +60,4 @@ def download_mmlu():
 
 class Experiment:
     def run():
-        download_mmlu()
\ No newline at end of file
+        download_mmlu()
diff --git a/src/prompt_systematic_review/experiments/graph.py b/src/prompt_systematic_review/experiments/graph.py
index 5effa69..4832940 100644
--- a/src/prompt_systematic_review/experiments/graph.py
+++ b/src/prompt_systematic_review/experiments/graph.py
@@ -212,4 +212,4 @@ def run_graph():
 
 class Experiment:
     def run():
-        run_graph()
\ No newline at end of file
+        run_graph()
diff --git a/src/prompt_systematic_review/experiments/graph_internal_references.py b/src/prompt_systematic_review/experiments/graph_internal_references.py
index d9d9f8d..0270fc3 100644
--- a/src/prompt_systematic_review/experiments/graph_internal_references.py
+++ b/src/prompt_systematic_review/experiments/graph_internal_references.py
@@ -187,9 +187,9 @@ def process_papers(self, csv_file_path):
                             arxiv_paper_id
                         )
                     else:
-                        unmatched_papers[
-                            row.get("title", "").strip()
-                        ] = "Source not supported"
+                        unmatched_papers[row.get("title", "").strip()] = (
+                            "Source not supported"
+                        )
                         continue
 
                     if paper_id:
@@ -197,9 +197,9 @@ def process_papers(self, csv_file_path):
                         if references is not None:
                             paper_references[paper_id] = references
                         else:
-                            unmatched_papers[
-                                row["title"]
-                            ] = "No references found or error occurred"
+                            unmatched_papers[row["title"]] = (
+                                "No references found or error occurred"
+                            )
                     else:
                         print(f"Paper Id Could not be found for: {row}")
         else:
@@ -532,8 +532,8 @@ def graph_internal_references():
         "Rephrase and Respond: Let Large Language Models Ask Better Questions for Themselves": "Rephrase and Respond",
     }
     main.visualize_chart(technique_to_title)
-    
-    
+
+
 class Experiment:
     def run():
         graph_internal_references()

From 84fb279726c61ff967407c03ae52cfe7d46984bc Mon Sep 17 00:00:00 2001
From: hudssntao <hudsontao@gmail.com>
Date: Wed, 29 May 2024 11:39:59 -0400
Subject: [PATCH 5/5] style: exclude file

---
 .pre-commit-config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ebfaa8f..375a5bf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,4 +3,5 @@ repos:
     rev: 23.10.1
     hooks:
     -   id: black
-        exclude: src/prompt_systematic_review/experiments/find_internal_reference_count.py
\ No newline at end of file
+        exclude: src/prompt_systematic_review/experiments/find_internal_reference_count.py
+        exclude: src/prompt_systematic_review/experiments/graph_internal_references.py
\ No newline at end of file