Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The segmented paper template allows weak LLMs to write articles #114

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 55 additions & 24 deletions ai_scientist/perform_writeup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import shutil
import subprocess

from typing import Optional, Tuple

from ai_scientist.generate_ideas import search_for_papers
Expand All @@ -15,7 +16,7 @@
def generate_latex(coder, folder_name, pdf_file, timeout=30, num_error_corrections=5):
folder = osp.abspath(folder_name)
cwd = osp.join(folder, "latex") # Fixed potential issue with path
writeup_file = osp.join(cwd, "template.tex")
writeup_file = osp.join(cwd, "template_segments.tex")

# Check all references are valid and in the references.bib file
with open(writeup_file, "r") as f:
Expand All @@ -27,15 +28,15 @@ def generate_latex(coder, folder_name, pdf_file, timeout=30, num_error_correctio
re.DOTALL,
)
if references_bib is None:
print("No references.bib found in template.tex")
print("No references.bib found in template_segments.tex")
return
bib_text = references_bib.group(1)
cites = [cite.strip() for item in cites for cite in item.split(",")]
for cite in cites:
if cite not in bib_text:
print(f"Reference {cite} not found in references.")
prompt = f"""Reference {cite} not found in references.bib. Is this included under a different name?
If so, please modify the citation in template.tex to match the name in references.bib at the top. Otherwise, remove the cite."""
If so, please modify the citation in template_segments.tex to match the name in references.bib at the top. Otherwise, remove the cite."""
coder.run(prompt)

# Check all included figures are actually in the directory.
Expand Down Expand Up @@ -79,7 +80,7 @@ def generate_latex(coder, folder_name, pdf_file, timeout=30, num_error_correctio
# Filter trivial bugs in chktex
check_output = os.popen(f"chktex {writeup_file} -q -n2 -n24 -n13 -n1").read()
if check_output:
prompt = f"""Please fix the following LaTeX errors in `template.tex` guided by the output of `chktek`:
prompt = f"""Please fix the following LaTeX errors in `template_segments.tex` guided by the output of `chktek`:
{check_output}.

Make the minimal fix required and do not remove or change any packages.
Expand All @@ -95,10 +96,10 @@ def compile_latex(cwd, pdf_file, timeout=30):
print("GENERATING LATEX")

commands = [
["pdflatex", "-interaction=nonstopmode", "template.tex"],
["pdflatex", "-interaction=nonstopmode", "template_segments.tex"],
["bibtex", "template"],
["pdflatex", "-interaction=nonstopmode", "template.tex"],
["pdflatex", "-interaction=nonstopmode", "template.tex"],
["pdflatex", "-interaction=nonstopmode", "template_segments.tex"],
["pdflatex", "-interaction=nonstopmode", "template_segments.tex"],
]

for command in commands:
Expand All @@ -122,7 +123,7 @@ def compile_latex(cwd, pdf_file, timeout=30):

# Attempt to move the PDF to the desired location
try:
shutil.move(osp.join(cwd, "template.pdf"), pdf_file)
shutil.move(osp.join(cwd, "template_segments.pdf"), pdf_file)
except FileNotFoundError:
print("Failed to rename PDF.")

Expand Down Expand Up @@ -401,18 +402,48 @@ def get_citation_aider_prompt(
def perform_writeup(
idea, folder_name, coder, cite_client, cite_model, num_cite_rounds=20
):
# CURRENTLY ASSUMES LATEX
abstract_prompt = f"""We've provided the `latex/template.tex` file to the project. We will be filling it in section by section.
dic_section_files = {"TITLE": "TITLE_HERE.tex",
"ABSTRACT" : "ABSTRACT_HERE.tex",
"Introduction" : "INTRO_HERE.tex",
"Background" : "BACKGROUND_HERE.tex",
"Related work" : "RELATED_WORK_HERE.tex",
"Method" : "METHOD_HERE.tex",
"Experimental Setup" : "EXPERIMENTAL_SETUP_HERE.tex",
"Results" : "RESULTS_HERE.tex",
"Conclusion": "CONCLUSIONS_HERE.tex",
}

title_prompt = f"""We've provided the file to the project.
We will be filling it in section by section. Every section is located in a separate file.

First, please fill the "Title" sections of the writeup in file {dic_section_files["TITLE"]}.

Before every paragraph, please include a brief description of what you plan to write in that paragraph in a comment.

Be sure to first name the file and then filling.
"""
coder_out = coder.run(title_prompt)
coder_out = coder.run(
refinement_prompt.format(section="Title")
.replace(r"{{", "{")
.replace(r"}}", "}")
)

First, please fill in the "Title" and "Abstract" sections of the writeup.
# CURRENTLY ASSUMES LATEX
abstract_prompt = f"""We've provided the `latex/template_segments.tex` file to the project.
We will be filling it in section by section. Every section is located in a separate file.

Some tips are provided below:
{per_section_tips["Abstract"]}
First, please fill the "Abstract" sections of the writeup in file {dic_section_files["ABSTRACT"]}.

Some tips are provided below:
{per_section_tips["Abstract"]}

Before every paragraph, please include a brief description of what you plan to write in that paragraph in a comment.

Be sure to first name the file and then filling.
"""

Before every paragraph, please include a brief description of what you plan to write in that paragraph in a comment.

Be sure to first name the file and use *SEARCH/REPLACE* blocks to perform these edits.
"""
coder_out = coder.run(abstract_prompt)
coder_out = coder.run(
refinement_prompt.format(section="Abstract")
Expand All @@ -427,7 +458,7 @@ def perform_writeup(
"Results",
"Conclusion",
]:
section_prompt = f"""Please fill in the {section} of the writeup. Some tips are provided below:
section_prompt = f"""Please fill in the {section} of the writeup in file {dic_section_files[section]}. Some tips are provided below:
{per_section_tips[section]}

Be sure to use \cite or \citet where relevant, referring to the works provided in the file.
Expand All @@ -438,7 +469,7 @@ def perform_writeup(

Before every paragraph, please include a brief description of what you plan to write in that paragraph in a comment.

Be sure to first name the file and use *SEARCH/REPLACE* blocks to perform these edits.
Be sure to first name the file and then filling.
"""
coder_out = coder.run(section_prompt)
coder_out = coder.run(
Expand All @@ -448,7 +479,7 @@ def perform_writeup(
)

# SKETCH THE RELATED WORK
section_prompt = f"""Please fill in the Related Work of the writeup. Some tips are provided below:
section_prompt = f"""Please fill in the Related Work of the writeup in file {dic_section_files['Related work']}. Some tips are provided below:

{per_section_tips["Related Work"]}

Expand All @@ -457,13 +488,13 @@ def perform_writeup(
The related work should be concise, only plan to discuss the most relevant work.
Do not modify `references.bib` to add any new citations, this will be filled in at a later stage.

Be sure to first name the file and use *SEARCH/REPLACE* blocks to perform these edits.
Be sure to first name the file and then filling.
"""
coder_out = coder.run(section_prompt)

# Fill paper with cites.
for _ in range(num_cite_rounds):
with open(osp.join(folder_name, "latex", "template.tex"), "r") as f:
with open(osp.join(folder_name, "latex", "template_segments.tex"), "r") as f:
draft = f.read()
prompt, done = get_citation_aider_prompt(
cite_client, cite_model, draft, _, num_cite_rounds
Expand All @@ -476,7 +507,7 @@ def perform_writeup(
# insert this into draft before the "\end{filecontents}" line
search_str = r"\end{filecontents}"
draft = draft.replace(search_str, f"{bibtex_string}{search_str}")
with open(osp.join(folder_name, "latex", "template.tex"), "w") as f:
with open(osp.join(folder_name, "latex", "template_segments.tex"), "w") as f:
f.write(draft)
coder_out = coder.run(prompt)

Expand Down Expand Up @@ -537,7 +568,7 @@ def perform_writeup(
vis_file = osp.join(folder_name, "plot.py")
notes = osp.join(folder_name, "notes.txt")
model = args.model
writeup_file = osp.join(folder_name, "latex", "template.tex")
writeup_file = osp.join(folder_name, "latex", "template_segments.tex")
ideas_file = osp.join(folder_name, "ideas.json")
with open(ideas_file, "r") as f:
ideas = json.load(f)
Expand All @@ -561,7 +592,7 @@ def perform_writeup(
io=io,
stream=False,
use_git=False,
edit_format="diff",
edit_format="whole",
)
if args.no_writing:
generate_latex(coder, args.folder, f"{args.folder}/test.pdf")
Expand Down
18 changes: 16 additions & 2 deletions launch_scientist.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,22 @@ def do_idea(
print(f"*Starting Writeup*")
## PERFORM WRITEUP
if writeup == "latex":
writeup_file = osp.join(folder_name, "latex", "template.tex")
fnames = [exp_file, writeup_file, notes]
writeup_file = osp.join(folder_name, "latex", "template_segments.tex")
TITLE_file = osp.join(folder_name, "latex", "TITLE_HERE.tex")
ABSTRACT_file = osp.join(folder_name, "latex", "ABSTRACT_HERE.tex")


INTRO_file = osp.join(folder_name, "latex", "INTRO_HERE.tex")
RELATED_WORK_file = osp.join(folder_name, "latex", "RELATED_WORK_HERE.tex")
BACKGROUND_file = osp.join(folder_name, "latex", "BACKGROUND_HERE.tex")
METHOD_file = osp.join(folder_name, "latex", "METHOD_HERE.tex")
EXPERIMENTAL_SETUP_file = osp.join(folder_name, "latex", "EXPERIMENTAL_SETUP_HERE.tex")
RESULTS_file = osp.join(folder_name, "latex", "RESULTS_HERE.tex")
CONCLUSIONS_file = osp.join(folder_name, "latex", "CONCLUSIONS_HERE.tex")

fnames = [exp_file, writeup_file, notes,\
TITLE_file, ABSTRACT_file, INTRO_file, RELATED_WORK_file, BACKGROUND_file,\
METHOD_file, EXPERIMENTAL_SETUP_file, RESULTS_file, CONCLUSIONS_file]
if model == "deepseek-coder-v2-0724":
main_model = Model("deepseek/deepseek-coder")
elif model == "llama3.1-405b":
Expand Down
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/ABSTRACT_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ABSTRACT*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/BACKGROUND_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
BACKGROUND*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/CONCLUSIONS_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CONCLUSIONS*HERE
12 changes: 12 additions & 0 deletions templates/2d_diffusion/latex/EXPERIMENTAL_SETUP_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
EXPERIMENTAL*SETUP*HERE

% EXAMPLE FIGURE: REPLACE AND ADD YOUR OWN FIGURES / CAPTIONS
\begin{figure}[t]
\centering
\begin{subfigure}{0.9\textwidth}
\includegraphics[width=\textwidth]{generated_images.png}
\label{fig:diffusion-samples}
\end{subfigure}
\caption{PLEASE FILL IN CAPTION HERE}
\label{fig:first_figure}
\end{figure}
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/INTRO_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INTRO*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/METHOD_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
METHOD*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/RELATED_WORK_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RELATED*WORK*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/RESULTS_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RESULTS*HERE
1 change: 1 addition & 0 deletions templates/2d_diffusion/latex/TITLE_HERE.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TITLE*HERE
Loading