diff --git a/yival_experiments/output/generate_report.py b/yival_experiments/output/generate_report.py
index e7c7fb0..1dead14 100644
--- a/yival_experiments/output/generate_report.py
+++ b/yival_experiments/output/generate_report.py
@@ -18,6 +18,7 @@
 # rs[1].asdict()
 
 # We will combine all pickle files in output dir and calculate final scores.
+# TODO: follow the format in `Latest Results` sheet of AI eval spreadsheet
 # 1. Store all responses into excel file.
 output_list = []
 
@@ -45,6 +46,7 @@
 
 
 # 2. calculate a final score per model configuration
+# TODO: I think it's possible to convert these into a Yival Evaluator.
 def is_correct_p(round_results):
     c = Counter(round_results)
     top2 = c.most_common(2)