Skip to content

Commit

Permalink
Merge pull request #33 from alan-turing-institute/15-change-model-to-api
Browse files Browse the repository at this point in the history
Change "model" to "api"
  • Loading branch information
rchan26 authored Apr 26, 2024
2 parents bebeee7 + 412d142 commit b72cd31
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 41 deletions.
2 changes: 1 addition & 1 deletion notebook/example/data/judge/settings.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"gemini-1.0-pro": {"model":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}
{"gemini-1.0-pro": {"api":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}
6 changes: 3 additions & 3 deletions notebook/example/data2/input/azureopenai.jsonl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "azure_openai", "model_name": "reginald-gpt4", "parameters": {"temperature": 1, "top_p": 0.8}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "azure_openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "azure_openai"}
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "azure_openai", "model_name": "reginald-gpt4", "parameters": {"temperature": 1, "top_p": 0.8}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "azure_openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "azure_openai"}
3 changes: 3 additions & 0 deletions notebook/example/data2/input/openai.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "openai", "model_name": "gpt-3.5-turbo-instruct", "parameters": {"temperature": 1, "top_p": 0.8}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "openai"}
6 changes: 3 additions & 3 deletions notebook/example/data2/input/test.jsonl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "unknown-model-name"}
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "unknown-model-name"}
6 changes: 3 additions & 3 deletions notebook/example/data2/input/test2.jsonl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "gemini"}
{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "gemini"}
6 changes: 3 additions & 3 deletions notebook/example/data3/input/ollama.jsonl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"id": 9, "prompt": "Hello, my name is Bob and I'm 6 years old. How old am I next year?", "model": "ollama", "model_name": "gemma", "parameters": {"temperature": 1}}
{"id": 10, "prompt": "Can you give me a random number between 1-10?", "model": "ollama", "model_name": "llama", "parameters": {"temperature": 0.5}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "ollama"}
{"id": 9, "prompt": "Hello, my name is Bob and I'm 6 years old. How old am I next year?", "api": "ollama", "model_name": "gemma", "parameters": {"temperature": 1}}
{"id": 10, "prompt": "Can you give me a random number between 1-10?", "api": "ollama", "model_name": "llama", "parameters": {"temperature": 0.5}}
{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "ollama"}
16 changes: 7 additions & 9 deletions src/batch_llm/experiment_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def group_prompts_by_model(self) -> dict[str, list[dict]]:

grouped_dict = {}
for item in self.experiment_prompts:
model = item.get("model")
model = item.get("api")
if model not in grouped_dict:
grouped_dict[model] = [item]

Expand Down Expand Up @@ -372,7 +372,7 @@ async def query_model_and_record_response(
----------
prompt_dict : dict
Dictionary containing the prompt and other parameters to be
used for text generation. Required keys are "prompt" and "model".
used for text generation. Required keys are "prompt" and "api".
Some models may have other required keys.
settings : Settings
Settings for the pipeline
Expand Down Expand Up @@ -478,7 +478,7 @@ async def generate_text(
----------
prompt_dict : dict
Dictionary containing the prompt and other parameters to be
used for text generation. Required keys are "prompt" and "model".
used for text generation. Required keys are "prompt" and "api".
Some models may have other required keys.
settings : Settings
Settings for the pipeline
Expand All @@ -497,19 +497,17 @@ async def generate_text(
"""
if index is None:
index = "NA"
if "model" not in prompt_dict:
raise KeyError(
"Model is not specified in the prompt_dict. Must have 'model' key"
)
if "api" not in prompt_dict:
raise KeyError("API is not specified in the prompt_dict. Must have 'api' key")

# obtain model
try:
model = ASYNC_MODELS[prompt_dict["model"]](
model = ASYNC_MODELS[prompt_dict["api"]](
settings=settings, log_file=experiment.log_file
)
except KeyError:
raise NotImplementedError(
f"Model {prompt_dict['model']} not recognised or implemented"
f"Model {prompt_dict['api']} not recognised or implemented"
)

# query the model
Expand Down
2 changes: 1 addition & 1 deletion src/batch_llm/scripts/create_judge_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def main():
{
"id": prompt_id,
"prompt": judge_prompt,
"model": judge_settings[judge]["model"],
"api": judge_settings[judge]["api"],
"model_name": judge_settings[judge]["model_name"],
"parameters": judge_settings[judge]["parameters"],
}
Expand Down
12 changes: 6 additions & 6 deletions src/batch_llm/scripts/run_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def is_valid_jsonl(
issues.append(KeyError('"prompt" key not found'))
valid_indicator = False

# check if "model" is a key in the json
if "model" not in data:
# if "model" is not a key, add index to list
issues.append(KeyError('"model" key not found'))
# check if "api" is a key in the json
if "api" not in data:
# if "api" is not a key, add index to list
issues.append(KeyError('"api" key not found'))
valid_indicator = False

# if parameters is passed, check its a dictionary
Expand All @@ -149,9 +149,9 @@ def is_valid_jsonl(
multimedia_path_errors.add(path_errors)

# model specific checks
issues.extend(ASYNC_MODELS[data["model"]].check_prompt_dict(data))
issues.extend(ASYNC_MODELS[data["api"]].check_prompt_dict(data))
# add model to set of models to check environment variables for
model_environments_to_check.add(data["model"])
model_environments_to_check.add(data["api"])
except json.JSONDecodeError as err:
# if line is not a valid json, add index to list
issues.append(err)
Expand Down
16 changes: 8 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,18 @@ def temporary_data_folders(tmp_path: Path):

# create a jsonl file in the folder
with open(Path(tmp_path / "test.jsonl"), "w") as f:
f.write('{"prompt": "test prompt", "model": "test"}\n')
f.write('{"prompt": "test prompt", "api": "test"}\n')

# create utils folder which we use to test the sorting of files
utils_dir = Path(tmp_path / "utils").mkdir()
with open(Path(tmp_path / "utils" / "first.jsonl"), "w") as f:
f.write('{"prompt": "test prompt 1", "model": "test"}\n')
f.write('{"prompt": "test prompt 1", "api": "test"}\n')
time.sleep(0.01)
with open(Path(tmp_path / "utils" / "second.jsonl"), "w") as f:
f.write('{"prompt": "test prompt 2", "model": "test"}\n')
f.write('{"prompt": "test prompt 2", "api": "test"}\n')
time.sleep(0.01)
with open(Path(tmp_path / "utils" / "third.jsonl"), "w") as f:
f.write('{"prompt": "test prompt 3", "model": "test"}\n')
f.write('{"prompt": "test prompt 3", "api": "test"}\n')

# create a folder for testing the experiment pipeline
experiment_pipeline = Path(tmp_path / "experiment_pipeline").mkdir()
Expand All @@ -68,14 +68,14 @@ def temporary_data_folders(tmp_path: Path):
with open(
Path(tmp_path / "experiment_pipeline" / "input" / "first.jsonl"), "w"
) as f:
f.write('{"prompt": "test prompt 1", "model": "test"}\n')
f.write('{"prompt": "test prompt 2", "model": "test"}\n')
f.write('{"prompt": "test prompt 3", "model": "test"}\n')
f.write('{"prompt": "test prompt 1", "api": "test"}\n')
f.write('{"prompt": "test prompt 2", "api": "test"}\n')
f.write('{"prompt": "test prompt 3", "api": "test"}\n')
time.sleep(0.01)
with open(
Path(tmp_path / "experiment_pipeline" / "input" / "second.jsonl"), "w"
) as f:
f.write('{"prompt": "test prompt 2", "model": "test"}\n')
f.write('{"prompt": "test prompt 2", "api": "test"}\n')

# store current working directory
cwd = os.getcwd()
Expand Down
8 changes: 4 additions & 4 deletions tests/core/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def test_experiment_init(temporary_data_folders):

# create a jsonl file in the input folder (which is created when initialising Settings object)
with open("data/input/test_in_input.jsonl", "w") as f:
f.write('{"id": 0, "prompt": "test prompt 0", "model": "test"}\n')
f.write('{"id": 1, "prompt": "test prompt 1", "model": "test"}\n')
f.write('{"id": 0, "prompt": "test prompt 0", "api": "test"}\n')
f.write('{"id": 1, "prompt": "test prompt 1", "api": "test"}\n')

# create an experiment object
experiment = Experiment("test_in_input.jsonl", settings=settings)
Expand All @@ -56,8 +56,8 @@ def test_experiment_init(temporary_data_folders):
== f"data/output/test_in_input/{experiment.creation_time}-input-test_in_input.jsonl"
)
assert experiment.experiment_prompts == [
{"id": 0, "prompt": "test prompt 0", "model": "test"},
{"id": 1, "prompt": "test prompt 1", "model": "test"},
{"id": 0, "prompt": "test prompt 0", "api": "test"},
{"id": 1, "prompt": "test prompt 1", "api": "test"},
]
assert experiment.number_queries == 2
assert isinstance(experiment.creation_time, str)
Expand Down

0 comments on commit b72cd31

Please sign in to comment.