Yale-LILY · troyfeng116 · Apr 21, 2023 · Apr 21, 2023 · Apr 24, 2023 · Apr 24, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -60,9 +60,9 @@ jobs:
     # runs-on: ubuntu-22.04
 
     # do not expose sensitive environment variables in this yaml
-    env:
+    # env:
     # TODO: dynamically choose GPUs if tests end up using them
-      CUDA_VISIBLE_DEVICES: 1
+    #   CUDA_VISIBLE_DEVICES: 1
 
     steps:
       - uses: actions/checkout@v2
@@ -129,19 +129,19 @@ jobs:
         run: |
           . .venv/bin/activate
           ls -la
-          python -m unittest tests/test_datasets.py
+          python -m unittest -v tests/test_datasets.py
 
       - name: Test executors
         run: |
           . .venv/bin/activate
-          python -m unittest tests/test_executors.py
+          python -m unittest -v tests/test_executors.py
 
       - name: Test inference pipeline
         run: |
           . .venv/bin/activate
-          python -m unittest tests/test_inference_pipeline.py
+          python -m unittest -v tests/test_inference_pipeline.py
 
       - name: Test models
         run: |
           . .venv/bin/activate
-          python -m unittest tests/test_models.py
+          python -m unittest -v tests/test_models.py
diff --git a/finetuning/lightning_modules/datasets/mathqa_reader.py b/finetuning/lightning_modules/datasets/mathqa_reader.py
@@ -13,7 +13,8 @@ def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
     def get_test_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
         # parse the answer and add the field
         example["original_answer"] = example["answer"]
-        example["answer"] = example["answer"].split("\n####")[-1].strip()
+        # TODO: in data/mathqa/val_dedup.jsonl, example["answer"] are floats
+        # example["answer"] = example["answer"].split("\n####")[-1].strip()
 
         return [self.get_example_dict(example, example["text"], "", train_mode=False)]
 

diff --git a/finetuning/lightning_modules/datasets/spider_reader.py b/finetuning/lightning_modules/datasets/spider_reader.py
@@ -9,6 +9,11 @@
 
 from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, FewShotNL2CodeDataset
 
+# DB_INFO_FILE = os.path.join(os.path.dirname(__file__), '../../../data/squall/db_info_wtq.json')
+DB_INFO_FILE = os.path.join(os.path.dirname(__file__), f"{os.environ['NLP4CODE_TEST_DATA_PATH']}/squall/db_info_wtq.json")
+with open(DB_INFO_FILE, "r") as f:
+    full_db_info = json.load(f)
+
 
 class FewShotSpiderDataset(FewShotNL2CodeDataset):
 

diff --git a/finetuning/lightning_modules/models/seq2seq_model_util.py b/finetuning/lightning_modules/models/seq2seq_model_util.py
@@ -83,14 +83,19 @@ def get_model(model_name: str,
                                                                use_cache=not gradient_ckpt,
                                                                **additional_init_args)
     elif model_name.startswith("Salesforce/codegen-"):
+        # TODO: using float32 here for tests
+        # RunTime error: "LayerNormKernelImpl" not implemented for 'Half' codegen
+        # https://github.com/huggingface/transformers/issues/21989
         tokenizer = AutoTokenizer.from_pretrained(model_name,
-                                                    additional_special_tokens=additional_special_tokens)
+                                                    additional_special_tokens=additional_special_tokens,
+                                                    torch_dtype=torch.float32)
+
         tokenizer.pad_token = tokenizer.eos_token
 
         if not tokenizer_only:
             model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                     pad_token_id=tokenizer.eos_token_id, 
-                                                    torch_dtype=torch.float16, 
+                                                    torch_dtype=torch.float32, 
                                                     # device_map="auto",
                                                     use_cache=True)
     elif model_name.startswith("bigscience/bloom-"):

diff --git a/tests/consts.py b/tests/consts.py
@@ -0,0 +1,211 @@
+import os
+from typing import List, Dict, Tuple, Optional
+
+NLP4CODE_TEST_DATA_PATH = os.environ["NLP4CODE_TEST_DATA_PATH"]
+
+
+from finetuning.lightning_modules.datasets.base_reader import (
+    FewShotNL2CodeDataset,
+    NL2CodeDataset,
+)
+from finetuning.lightning_modules.datasets.mathqa_reader import (
+    FewShotMathQADataset,
+    MathQADataset,
+)
+from finetuning.lightning_modules.datasets.mbpp_reader import (
+    FewShotMBPPDataset,
+)
+from finetuning.lightning_modules.datasets.spider_reader import (
+    FewShotSpiderDataset,
+    SpiderDataset,
+)
+
+from execution.executors import (
+    BaseExecutor,
+    MathExecutor,
+    MBPPExecutor,
+    SpiderExecutor,
+    WTQExecutor,
+)
+
+
+# TODO: use special test string for test transformer model name? (don't load model)
+TEST_TRANSFORMER_MODEL_NAME = "EleutherAI/gpt-neo-125M"
+
+
+# ======== datasets ========
+
+# defines kwargs needed to initialize NL2CodeDataset
+class TestDatasetInitKwargs:
+    transformer_model_name: str
+    file_path: str
+    mode: str
+
+    def __init__(
+        self,
+        file_path: str,
+        mode: Optional[str] = "train",  # default to train
+        transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME,
+    ):
+        self.file_path = file_path
+        self.mode = mode
+        self.transformer_model_name = transformer_model_name
+
+
+DATASETS: List[Tuple[NL2CodeDataset, TestDatasetInitKwargs]] = [
+    (
+        MathQADataset,
+        TestDatasetInitKwargs(
+            file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl",
+        ),
+    ),
+    # TODO: SpiderDataset prompt_function
+    # (
+    #     SpiderDataset,
+    #     TestDatasetInitKwargs(
+    #         file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl",
+    #     ),
+    # ),
+]
+
+
+# defines kwargs needed to instantiate FewShotNL2CodeDataset
+class TestFewShotDatasetInitKwargs(TestDatasetInitKwargs):
+    transformer_model_name: str
+    file_path: str
+    exemplar_file_path: str
+    mode: str = "test"
+
+    def __init__(
+        self,
+        file_path: str,
+        exemplar_file_path: str,
+        transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME,
+    ):
+        super().__init__(
+            file_path=file_path,
+            transformer_model_name=transformer_model_name,
+            mode="test",
+        )
+        self.exemplar_file_path = exemplar_file_path
+
+
+# TODO: better way to do this? (custom types for each kwargs?)
+# TODO: make sure to keep dataset files up to date here
+# list of (dataset, **init_kwargs) tuples
+FEW_SHOT_DATASETS: List[Tuple[FewShotNL2CodeDataset, TestFewShotDatasetInitKwargs]] = [
+    (
+        FewShotMathQADataset,
+        TestFewShotDatasetInitKwargs(
+            exemplar_file_path="prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl",
+            file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl",
+        ),
+    ),
+    (
+        FewShotMBPPDataset,
+        TestFewShotDatasetInitKwargs(
+            exemplar_file_path="prompt_files/mbpp-official_first_3-10_exemplars.jsonl",
+            file_path=f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl",
+        ),
+    ),
+    (
+        FewShotSpiderDataset,
+        TestFewShotDatasetInitKwargs(
+            exemplar_file_path="prompt_files/spider-8_exemplars.jsonl",
+            file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl",
+        ),
+    ),
+    (
+        FewShotSpiderDataset,
+        TestFewShotDatasetInitKwargs(
+            exemplar_file_path="prompt_files/wtq-8_exemplars.jsonl",
+            # TODO: why does wtq_restored_dev.jsonl error
+            file_path=f"{NLP4CODE_TEST_DATA_PATH}/squall/wtq_restored_test.jsonl",
+        ),
+    ),
+]
+
+
+# ======== models ========
+
+TEST_MODEL_TRANSFORMER_MODEL_NAMES: List[str] = [
+    "EleutherAI/gpt-neo-125M",
+    "Salesforce/codet5-small",
+    "Salesforce/codegen-350M-multi",
+]
+
+TEST_MODEL_EXECUTOR_CLS = "execution.executors.MathExecutor"
+
+
+# ======== executors ========
+
+TEST_PROGRAM = "answer = 5"
+
+# Tuple[ExecutorCls, program, example]
+TEST_EXECUTORS: List[Tuple[BaseExecutor, str, Dict]] = [
+    (
+        MathExecutor,
+        TEST_PROGRAM,
+        {
+            "question": "some question",
+            "answer": 5,
+        },
+    ),
+    (
+        MBPPExecutor,
+        TEST_PROGRAM,
+        {
+            "question": "some question",
+            "answer": 5,
+            "code": "return 5",
+            "task_id": "xyz",
+            "test_setup_code": 'print("setup")',
+            "test_list": ["assert 1+1 == 2", "assert 1+1 != 3"],
+        },
+    ),
+    (
+        SpiderExecutor,
+        TEST_PROGRAM,
+        {
+            "question": "some question",
+            "db_id": "my_db_id",
+            "query": "SELECT * FROM table",
+        },
+    ),
+    (
+        WTQExecutor,
+        TEST_PROGRAM,
+        {
+            "question": "some question",
+            "db_id": "my_db_id",
+            "db_path": "path/to/my/db",
+            "original_answer": 5,
+        },
+    ),
+]
+
+
+# ======== integration ========
+
+TEST_PIPELINE_YAML_CONFIG_FILE = "tests/test_configs/few_shot-pipeline.yaml"
+
+# TODO: more datasets (see SummerTime matrix)
+# each tuple contains model_name, Pytorch Lightning config YAML file, val_file_path
+TEST_PIPELINE_INFO: List[Tuple[str, str, str]] = [
+    (
+        "EleutherAI/gpt-neo-125M",
+        TEST_PIPELINE_YAML_CONFIG_FILE,
+        "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
+    ),
+    # TODO: tensor dimension mismatch error for codet5-small (probably config file problem)
+    # (
+    #     "Salesforce/codet5-small",
+    #     TEST_PIPELINE_YAML_CONFIG_FILE,
+    #     "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
+    # ),
+    (
+        "Salesforce/codegen-350M-multi",
+        TEST_PIPELINE_YAML_CONFIG_FILE,
+        "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
+    ),
+]