Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

troyfeng116/tests #45

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
12 changes: 6 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ jobs:
# runs-on: ubuntu-22.04

# do not expose sensitive environment variables in this yaml
env:
# env:
# TODO: dynamically choose GPUs if tests end up using them
CUDA_VISIBLE_DEVICES: 1
# CUDA_VISIBLE_DEVICES: 1

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -129,19 +129,19 @@ jobs:
run: |
. .venv/bin/activate
ls -la
python -m unittest tests/test_datasets.py
python -m unittest -v tests/test_datasets.py

- name: Test executors
run: |
. .venv/bin/activate
python -m unittest tests/test_executors.py
python -m unittest -v tests/test_executors.py

- name: Test inference pipeline
run: |
. .venv/bin/activate
python -m unittest tests/test_inference_pipeline.py
python -m unittest -v tests/test_inference_pipeline.py

- name: Test models
run: |
. .venv/bin/activate
python -m unittest tests/test_models.py
python -m unittest -v tests/test_models.py
3 changes: 2 additions & 1 deletion finetuning/lightning_modules/datasets/mathqa_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
def get_test_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]:
# parse the answer and add the field
example["original_answer"] = example["answer"]
example["answer"] = example["answer"].split("\n####")[-1].strip()
# TODO: in data/mathqa/val_dedup.jsonl, example["answer"] are floats
# example["answer"] = example["answer"].split("\n####")[-1].strip()

return [self.get_example_dict(example, example["text"], "", train_mode=False)]

Expand Down
5 changes: 5 additions & 0 deletions finetuning/lightning_modules/datasets/spider_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@

from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, FewShotNL2CodeDataset

# DB_INFO_FILE = os.path.join(os.path.dirname(__file__), '../../../data/squall/db_info_wtq.json')
DB_INFO_FILE = os.path.join(os.path.dirname(__file__), f"{os.environ['NLP4CODE_TEST_DATA_PATH']}/squall/db_info_wtq.json")
with open(DB_INFO_FILE, "r") as f:
full_db_info = json.load(f)


class FewShotSpiderDataset(FewShotNL2CodeDataset):

Expand Down
9 changes: 7 additions & 2 deletions finetuning/lightning_modules/models/seq2seq_model_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,19 @@ def get_model(model_name: str,
use_cache=not gradient_ckpt,
**additional_init_args)
elif model_name.startswith("Salesforce/codegen-"):
# TODO: using float32 here for tests
# RunTime error: "LayerNormKernelImpl" not implemented for 'Half' codegen
# https://github.com/huggingface/transformers/issues/21989
tokenizer = AutoTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens)
additional_special_tokens=additional_special_tokens,
torch_dtype=torch.float32)

tokenizer.pad_token = tokenizer.eos_token

if not tokenizer_only:
model = AutoModelForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
torch_dtype=torch.float16,
torch_dtype=torch.float32,
# device_map="auto",
use_cache=True)
elif model_name.startswith("bigscience/bloom-"):
Expand Down
211 changes: 211 additions & 0 deletions tests/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import os
from typing import List, Dict, Tuple, Optional

NLP4CODE_TEST_DATA_PATH = os.environ["NLP4CODE_TEST_DATA_PATH"]


from finetuning.lightning_modules.datasets.base_reader import (
FewShotNL2CodeDataset,
NL2CodeDataset,
)
from finetuning.lightning_modules.datasets.mathqa_reader import (
FewShotMathQADataset,
MathQADataset,
)
from finetuning.lightning_modules.datasets.mbpp_reader import (
FewShotMBPPDataset,
)
from finetuning.lightning_modules.datasets.spider_reader import (
FewShotSpiderDataset,
SpiderDataset,
)

from execution.executors import (
BaseExecutor,
MathExecutor,
MBPPExecutor,
SpiderExecutor,
WTQExecutor,
)


# TODO: use special test string for test transformer model name? (don't load model)
TEST_TRANSFORMER_MODEL_NAME = "EleutherAI/gpt-neo-125M"


# ======== datasets ========

# defines kwargs needed to initialize NL2CodeDataset
class TestDatasetInitKwargs:
transformer_model_name: str
file_path: str
mode: str

def __init__(
self,
file_path: str,
mode: Optional[str] = "train", # default to train
transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME,
):
self.file_path = file_path
self.mode = mode
self.transformer_model_name = transformer_model_name


DATASETS: List[Tuple[NL2CodeDataset, TestDatasetInitKwargs]] = [
(
MathQADataset,
TestDatasetInitKwargs(
file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl",
),
),
# TODO: SpiderDataset prompt_function
# (
# SpiderDataset,
# TestDatasetInitKwargs(
# file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl",
# ),
# ),
]


# defines kwargs needed to instantiate FewShotNL2CodeDataset
class TestFewShotDatasetInitKwargs(TestDatasetInitKwargs):
transformer_model_name: str
file_path: str
exemplar_file_path: str
mode: str = "test"

def __init__(
self,
file_path: str,
exemplar_file_path: str,
transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME,
):
super().__init__(
file_path=file_path,
transformer_model_name=transformer_model_name,
mode="test",
)
self.exemplar_file_path = exemplar_file_path


# TODO: better way to do this? (custom types for each kwargs?)
# TODO: make sure to keep dataset files up to date here
# list of (dataset, **init_kwargs) tuples
FEW_SHOT_DATASETS: List[Tuple[FewShotNL2CodeDataset, TestFewShotDatasetInitKwargs]] = [
(
FewShotMathQADataset,
TestFewShotDatasetInitKwargs(
exemplar_file_path="prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl",
file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl",
),
),
(
FewShotMBPPDataset,
TestFewShotDatasetInitKwargs(
exemplar_file_path="prompt_files/mbpp-official_first_3-10_exemplars.jsonl",
file_path=f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl",
),
),
(
FewShotSpiderDataset,
TestFewShotDatasetInitKwargs(
exemplar_file_path="prompt_files/spider-8_exemplars.jsonl",
file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl",
),
),
(
FewShotSpiderDataset,
TestFewShotDatasetInitKwargs(
exemplar_file_path="prompt_files/wtq-8_exemplars.jsonl",
# TODO: why does wtq_restored_dev.jsonl error
file_path=f"{NLP4CODE_TEST_DATA_PATH}/squall/wtq_restored_test.jsonl",
),
),
]


# ======== models ========

TEST_MODEL_TRANSFORMER_MODEL_NAMES: List[str] = [
"EleutherAI/gpt-neo-125M",
"Salesforce/codet5-small",
"Salesforce/codegen-350M-multi",
]

TEST_MODEL_EXECUTOR_CLS = "execution.executors.MathExecutor"


# ======== executors ========

TEST_PROGRAM = "answer = 5"

# Tuple[ExecutorCls, program, example]
TEST_EXECUTORS: List[Tuple[BaseExecutor, str, Dict]] = [
(
MathExecutor,
TEST_PROGRAM,
{
"question": "some question",
"answer": 5,
},
),
(
MBPPExecutor,
TEST_PROGRAM,
{
"question": "some question",
"answer": 5,
"code": "return 5",
"task_id": "xyz",
"test_setup_code": 'print("setup")',
"test_list": ["assert 1+1 == 2", "assert 1+1 != 3"],
},
),
(
SpiderExecutor,
TEST_PROGRAM,
{
"question": "some question",
"db_id": "my_db_id",
"query": "SELECT * FROM table",
},
),
(
WTQExecutor,
TEST_PROGRAM,
{
"question": "some question",
"db_id": "my_db_id",
"db_path": "path/to/my/db",
"original_answer": 5,
},
),
]


# ======== integration ========

TEST_PIPELINE_YAML_CONFIG_FILE = "tests/test_configs/few_shot-pipeline.yaml"

# TODO: more datasets (see SummerTime matrix)
# each tuple contains model_name, Pytorch Lightning config YAML file, val_file_path
TEST_PIPELINE_INFO: List[Tuple[str, str, str]] = [
(
"EleutherAI/gpt-neo-125M",
TEST_PIPELINE_YAML_CONFIG_FILE,
"$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
),
# TODO: tensor dimension mismatch error for codet5-small (probably config file problem)
# (
# "Salesforce/codet5-small",
# TEST_PIPELINE_YAML_CONFIG_FILE,
# "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
# ),
(
"Salesforce/codegen-350M-multi",
TEST_PIPELINE_YAML_CONFIG_FILE,
"$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl",
),
]
Loading