From 9199acaf9565e245395f7a723ce14d4e55d92f48 Mon Sep 17 00:00:00 2001 From: nbotti Date: Sat, 28 Sep 2024 16:35:48 -0400 Subject: [PATCH 1/3] add batch script for generating reviews --- .gitignore | 3 +++ generate_review.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 3 files changed, 71 insertions(+) create mode 100644 generate_review.py diff --git a/.gitignore b/.gitignore index 4364d7e9..5b2af7c7 100755 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,6 @@ data/ ICLR2022-OpenReviewData/ templates/*/run_0/ templates/*/*.png + +# outputs +outputs/* \ No newline at end of file diff --git a/generate_review.py b/generate_review.py new file mode 100644 index 00000000..94b7863a --- /dev/null +++ b/generate_review.py @@ -0,0 +1,66 @@ +import os +import json +from dotenv import load_dotenv +import argparse +import openai +from ai_scientist.perform_review import load_paper, perform_review +from PyPDF2 import PdfReader, PdfWriter + +load_dotenv() + +# Parse command line arguments +parser = argparse.ArgumentParser() +parser.add_argument("--model", default="gpt-4o-2024-05-13", help="Model name") +parser.add_argument("paper", help="Path to the PDF file") +parser.add_argument("-n", type=int, default=1, help="Number of reviews") +parser.add_argument("--num_reflections", type=int, default=5, help="Number of reflections") +parser.add_argument("--num_fs_examples", type=int, default=1, help="Number of FS examples") +parser.add_argument("--num_reviews_ensemble", type=int, default=5, help="Number of reviews in ensemble") +parser.add_argument("--temperature", type=float, default=0.1, help="Temperature") +parser.add_argument("--max-pages", type=int, default=0, help="Maximum number of pages of the paper to process. Useful to exclude appendixes. Will truncate any pages after the number you specify.") +parser.add_argument("--openai-api-key", type=str, default=os.getenv("OPENAI_API_KEY")) +args = parser.parse_args() + +openai.api_key = args.openai_api_key + +# Truncate the PDF file if necessary +if args.max_pages > 0: + print(f"Truncating {args.paper} to {args.max_pages} pages") + input_pdf = PdfReader(open(args.paper, "rb")) + output_pdf = PdfWriter() + + for page_num in range(min(args.max_pages, len(input_pdf.pages))): + output_pdf.add_page(input_pdf.pages[page_num]) + + with open("temp.pdf", "wb") as f: + output_pdf.write(f) +else: + # Copy the file to temp.pdf + with open(args.paper, "rb") as src_file, open("temp.pdf", "wb") as dest_file: + dest_file.write(src_file.read()) + +# Repeat the perform_review function args.n times +for i in range(args.n): + print(f"Starting Review {i+1} of {args.n}") + # Get the review dict of the review + review = perform_review( + load_paper("temp.pdf"), + args.model, + openai.OpenAI(), + num_reflections=args.num_reflections, + num_fs_examples=args.num_fs_examples, + num_reviews_ensemble=args.num_reviews_ensemble, + temperature=args.temperature, + ) + + # Output review as JSON + output_dir = "outputs" + os.makedirs(output_dir, exist_ok=True) + output_file = output_dir + "/" + os.path.basename(args.paper).replace(".pdf", f"_{i+1}.json") + with open(output_file, "w") as f: + json.dump(review, f) + +# Cleanup temp.pdf +print("Cleaning up temporary files") +os.remove("temp.pdf") +print("Done") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 77e755fe..90e11395 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ datasets tiktoken wandb tqdm +# pdf tool +PyPDF2 \ No newline at end of file From 0cc46d113f6f9f0ba8f70c8c31ac238fba7a73ce Mon Sep 17 00:00:00 2001 From: nbotti Date: Sat, 28 Sep 2024 16:43:08 -0400 Subject: [PATCH 2/3] update readme.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index fe570d8b..19715135 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,14 @@ review["Decision"] # ['Accept', 'Reject'] review["Weaknesses"] # List of weaknesses (str) ``` +To generate a quick review for a single paper: + +```bash +python genereate_review.py --model gpt-4o-2024-05-13 -n 1 --num_reflections 5 --num_fs_examples 1 --num_reviews_ensemble 5 --temperature 0.1 --max-pages 0 --openai-api-key sk-1234 ~/path/to/paper.pdf +``` + +Will output `n` number of reviews to `outputs/paper_{n}.json` + To run batch analysis: ```bash From e5c46894f2c04fccc7e78b1ae49ec026323ab32a Mon Sep 17 00:00:00 2001 From: nbotti Date: Sat, 28 Sep 2024 16:45:13 -0400 Subject: [PATCH 3/3] fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 19715135..53a8f49e 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ review["Weaknesses"] # List of weaknesses (str) To generate a quick review for a single paper: ```bash -python genereate_review.py --model gpt-4o-2024-05-13 -n 1 --num_reflections 5 --num_fs_examples 1 --num_reviews_ensemble 5 --temperature 0.1 --max-pages 0 --openai-api-key sk-1234 ~/path/to/paper.pdf +python generate_review.py --model gpt-4o-2024-05-13 -n 1 --num_reflections 5 --num_fs_examples 1 --num_reviews_ensemble 5 --temperature 0.1 --max-pages 0 --openai-api-key sk-1234 ~/path/to/paper.pdf ``` Will output `n` number of reviews to `outputs/paper_{n}.json`