From 4ce270031ee5501e99745a29cdd416b5e5268850 Mon Sep 17 00:00:00 2001 From: Amitayush Thakur Date: Mon, 10 Feb 2025 21:35:05 -0600 Subject: [PATCH] Added scripts to validate the solution. --- .github/workflows/lean.yml | 22 +++++--- lean4/scripts/insert_solutions.py | 14 +++++ lean4/scripts/score_solutions.py | 90 +++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 lean4/scripts/score_solutions.py diff --git a/.github/workflows/lean.yml b/.github/workflows/lean.yml index a80a47e1..e4c32574 100644 --- a/.github/workflows/lean.yml +++ b/.github/workflows/lean.yml @@ -43,17 +43,27 @@ jobs: run: | lake exe check_docstrings - - name: Insert solutions + - name: Build problems with sorries working-directory: lean4 run: | - python scripts/rewrite_solutions.py + lake build putnam_competition_sorries > original_build.log - - name: Build + - name: Save olean cache working-directory: lean4 run: | - lake build putnam_with_solutions + lake exe cache pack - - name: Save olean cache + - name: Insert solutions + run: | + python lean4/scripts/insert_solutions.py + + - name: Build User submitted problems working-directory: lean4 + # Ignore the return code because we want to score the solutions even if the build fails + # Assuming someone submitted a solution which is not correct run: | - lake exe cache pack + lake build putnam_competition_solution > solution_build.log || true + + - name: Score User submitted problems + run: | + python lean4/scripts/score_solutions.py diff --git a/lean4/scripts/insert_solutions.py b/lean4/scripts/insert_solutions.py index 4791db6e..5885afd0 100644 --- a/lean4/scripts/insert_solutions.py +++ b/lean4/scripts/insert_solutions.py @@ -35,6 +35,18 @@ def main(): args = args.parse_args() assert os.path.exists(args.problem_with_sorries_folder) assert os.path.exists(args.json_solution_file) + # Make sure that the dump folder exists in lean4 folder + lean4_folder = os.path.abspath("lean4") + # lean_solution_dump_folder should be a subfolder of lean4 + abs_lean_solution_dump_folder = os.path.abspath(args.lean_solution_dump_folder) + assert abs_lean_solution_dump_folder.startswith(lean4_folder), f"lean_solution_dump_folder should be a subfolder of lean4 folder." + # lean_solution_dump_folder should not be a subfolder of problem_with_sorries_folder + abs_problem_with_sorries_folder = os.path.abspath(args.problem_with_sorries_folder) + assert not (abs_lean_solution_dump_folder.strip('/') + '/').startswith(abs_problem_with_sorries_folder.strip('/') + '/'), f"lean_solution_dump_folder should not be a subfolder of problem_with_sorries_folder. {abs_lean_solution_dump_folder} {abs_problem_with_sorries_folder}" + # lean_solution_dump_folder should be only one level below lean4 + assert abs_lean_solution_dump_folder.count(os.sep) == lean4_folder.count(os.sep) + 1, f"lean_solution_dump_folder should be only one level below lean4." + lean_soln_dump_folder_name = os.path.basename(abs_lean_solution_dump_folder) + problem_with_sorries_folder_name = os.path.basename(abs_problem_with_sorries_folder) if os.path.exists(args.lean_solution_dump_folder): os.system(f"rm -rf {args.lean_solution_dump_folder}") # Copy the problems folder to the dump folder @@ -79,6 +91,8 @@ def main(): "total_problems": problems_count, "problems_attempted_count": len(problem_id_attempted_set), "problems_not_attempted_count": problems_count - len(problem_id_attempted_set), + "lean_solution_dump_folder_name": lean_soln_dump_folder_name, + "problem_with_sorries_folder_name": problem_with_sorries_folder_name, "problems_attempted_list": sorted(list(problem_id_attempted_set)) } with open(args.summary_file, "w") as f: diff --git a/lean4/scripts/score_solutions.py b/lean4/scripts/score_solutions.py new file mode 100644 index 00000000..b086fde2 --- /dev/null +++ b/lean4/scripts/score_solutions.py @@ -0,0 +1,90 @@ +import os +import json +import argparse +import re + +# This script reads the build logs and scores the solutions. + +def main(): + args = argparse.ArgumentParser() + args.add_argument("--original_build_log", type=str, default="lean4/original_build.log", + help="Original build log.") + args.add_argument("--solution_build_log", type=str, default="lean4/solution_build.log", + help="Build log with solutions.") + args.add_argument("--insertion_summary_file", type=str, default="lean4/src_with_sorries_replaced/insert_solutions_summary.json", + help="JSON file containing summary of the insertion process.") + args.add_argument("--score_file", type=str, default="lean4/src_with_sorries_replaced/score.json", + help="JSON file containing scores.") + args = args.parse_args() + assert os.path.exists(args.original_build_log), f"Original build log {args.original_build_log} does not exist." + assert os.path.exists(args.solution_build_log), f"Solution build log {args.solution_build_log} does not exist." + assert os.path.exists(args.insertion_summary_file), f"Insertion summary file {args.insertion_summary_file} does not exist." + with open(args.original_build_log, "r") as f: + original_build_log = f.read() + with open(args.solution_build_log, "r") as f: + solution_build_log = f.read() + with open(args.insertion_summary_file, "r") as f: + insertion_summary = json.load(f) + lean_soln_dump_folder_name = insertion_summary["lean_solution_dump_folder_name"] + problem_with_sorries_folder_name = insertion_summary["problem_with_sorries_folder_name"] + # Count the number of problems from the original build log + rexp = rf"{problem_with_sorries_folder_name}\.([\s\S]*?)declaration uses 'sorry'" + original_probl_count_regex = re.compile(rexp, re.MULTILINE) + original_problems = original_probl_count_regex.findall(original_build_log) + all_problems_compiled = [] + for i, problem in enumerate(original_problems): + problem_id = problem.split(problem_with_sorries_folder_name)[-1].split(".lean")[0].strip("/") + all_problems_compiled.append(problem_id) + all_problems_mentioned = [] + for file in os.listdir(os.path.join("lean4", problem_with_sorries_folder_name)): + if file.endswith(".lean") and not file == "allimports.lean": + problem_id = file.split(".")[0] + all_problems_mentioned.append(problem_id) + assert len(all_problems_mentioned) == len(all_problems_compiled), f"Number of problems mentioned in the folder {len(all_problems_mentioned)} does not match the number of problems compiled {len(all_problems_compiled)}. Possibly some problems have their solutions leaked." + all_problems_mentioned = set(all_problems_mentioned) + all_problems_compiled = set(all_problems_compiled) + assert all_problems_mentioned.issubset(all_problems_compiled), f"Some problems have their solutions leaked. {all_problems_mentioned - all_problems_compiled}" + # Count the number of problems from the solution build log which have sorries + rexp = rf"{lean_soln_dump_folder_name}\.([\s\S]*?)declaration uses 'sorry'" + unsolved_probl_count_regex = re.compile(rexp, re.MULTILINE) + unsolved_problems = unsolved_probl_count_regex.findall(solution_build_log) + unsolved_problems_set = set() + for i, problem in enumerate(unsolved_problems): + problem_id = problem.split(lean_soln_dump_folder_name)[-1].split(".lean")[0].strip("/") + unsolved_problems_set.add(problem_id) + problems_attempted = set(insertion_summary["problems_attempted_list"]) + assert unsolved_problems_set.issubset(problems_attempted), f"Something wrong with insertion of proofs. Some problems have their solutions leaked. {unsolved_problems_set - problems_attempted}" + + # Count the problems for which the build failed + rexp = rf"✖([\s\S]*?){lean_soln_dump_folder_name}\.(\w+)" + failed_probl_count_regex = re.compile(rexp, re.MULTILINE) + failed_problems = failed_probl_count_regex.findall(solution_build_log) + failed_problems_set = set() + for i, problem in enumerate(failed_problems): + problem_id = problem[1] + failed_problems_set.add(problem_id) + assert failed_problems_set.issubset(problems_attempted), f"Something wrong with insertion of proofs. Some problems have their solutions leaked. {failed_problems_set - problems_attempted}" + solved_problems = problems_attempted - unsolved_problems_set - failed_problems_set + score = len(solved_problems) / len(all_problems_mentioned) + summary = { + "problems_attempted": len(problems_attempted), + "problems_solved": len(solved_problems), + "problems_unsolved": len(unsolved_problems_set), + "problems_failed_compilation": len(failed_problems_set), + "total_problems": len(all_problems_mentioned), + "score": score, + "problems_solved_list": sorted(list(solved_problems)), + "problems_attempted_list": sorted(list(problems_attempted)), + "problems_failed_compilation_list": sorted(list(failed_problems_set)), + "note": "If any problem has failed to compile (i.e. `problems_failed_compilation` > 0), then `problems_solved` may be less than the actual number of problems solved. " + + "Please submit only those problems which can be compiled successfully or else add sorries in your solutions to make compilation successful. " + + "The lean build will break at the first problem which fails to compile, and hence it is not possible to score the problems after that." + } + with open(args.score_file, "w") as f: + json.dump(summary, f, indent=4) + with open(args.score_file, "r") as f: + summary_str = f.read() + print(summary_str) + +if __name__ == "__main__": + main() \ No newline at end of file