Storing one questions csv per language

Gapminder · Jan 25, 2024 · 65f7453 · 65f7453
1 parent ed666eb
commit 65f7453
Show file tree

Hide file tree

Showing 6 changed files with 13 additions and 7 deletions.
diff --git a/automation-api/yival_experiments/README.md b/automation-api/yival_experiments/README.md
@@ -19,8 +19,7 @@ poetry shell
 poe fetch_questions
 ```
 
-This will fetch all enabled questions in the AI eval spreadsheet and create data/questions.csv.
-We also included all questions in data/questions_cn.csv and data/questions_en.csv
+This will fetch all enabled questions in the AI eval spreadsheet and create data/questions_{language}.csv files, one per language.
 
 Note: Yival requires a dataset to have a local file when the source type is set to "dataset". So we need to fetch it first.
 

diff --git a/...i/yival_experiments/data/questions_en.csv → ...ival_experiments/data/questions_en-US.csv b/...i/yival_experiments/data/questions_en.csv → ...ival_experiments/data/questions_en-US.csv
diff --git a/...i/yival_experiments/data/questions_cn.csv → ...ival_experiments/data/questions_zh-CN.csv b/...i/yival_experiments/data/questions_cn.csv → ...ival_experiments/data/questions_zh-CN.csv
diff --git a/automation-api/yival_experiments/experiment_archive/experiment_20231104_cn.yaml b/automation-api/yival_experiments/experiment_archive/experiment_20231104_cn.yaml
@@ -9,7 +9,7 @@ custom_variation_generators:
 custom_function: model_compare.model_compare
 dataset:
   source_type: dataset
-  file_path: data/questions_cn.csv
+  file_path: data/questions_zh-CN.csv
   reader: csv_reader
   reader_config:
     expected_result_column: correct_answer

diff --git a/automation-api/yival_experiments/experiment_archive/experiment_20231104_en.yaml b/automation-api/yival_experiments/experiment_archive/experiment_20231104_en.yaml
@@ -9,7 +9,7 @@ custom_variation_generators:
 custom_function: model_compare.model_compare
 dataset:
   source_type: dataset
-  file_path: data/questions_en.csv
+  file_path: data/questions_en-US.csv
   reader: csv_reader
   reader_config:
     expected_result_column: correct_answer

diff --git a/automation-api/yival_experiments/scripts/fetch_questions.py b/automation-api/yival_experiments/scripts/fetch_questions.py
@@ -8,7 +8,6 @@
 
 
 correctness_map = {1: "Correct", 2: "Wrong", 3: "Very Wrong"}
-output_file = current_script_path / "../data/questions.csv"
 
 
 def main():
@@ -53,8 +52,16 @@ def main():
         output_list.append(output_item)
 
     output_df = pd.DataFrame.from_records(output_list)
-    output_df.to_csv(output_file, index=False)
-    print("Questions saved to", output_file)
+
+    # Grouping the DataFrame by 'language'
+    grouped = output_df.groupby("language")
+
+    for language, group in grouped:
+        # Constructing the filename for each language
+        output_file = current_script_path / f"../data/questions_{language}.csv"
+        # Saving each group to a separate CSV file
+        group.to_csv(output_file, index=False)
+        print(f"Questions in '{language}' language saved to {output_file}")
 
 
 if __name__ == "__main__":