Merge pull request #33 from alan-turing-institute/15-change-model-to-api

Change "model" to "api"
alan-turing-institute · Apr 26, 2024 · b72cd31 · b72cd31
2 parents bebeee7 + 412d142
commit b72cd31
Show file tree

Hide file tree

Showing 11 changed files with 42 additions and 41 deletions.
diff --git a/notebook/example/data/judge/settings.json b/notebook/example/data/judge/settings.json
@@ -1 +1 @@
-{"gemini-1.0-pro": {"model":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}
+{"gemini-1.0-pro": {"api":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}
diff --git a/notebook/example/data2/input/azureopenai.jsonl b/notebook/example/data2/input/azureopenai.jsonl
@@ -1,3 +1,3 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "azure_openai", "model_name": "reginald-gpt4", "parameters": {"temperature": 1, "top_p": 0.8}}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "azure_openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "azure_openai"}
+{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "azure_openai", "model_name": "reginald-gpt4", "parameters": {"temperature": 1, "top_p": 0.8}}
+{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "azure_openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "azure_openai"}
diff --git a/notebook/example/data2/input/openai.jsonl b/notebook/example/data2/input/openai.jsonl
@@ -0,0 +1,3 @@
+{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "openai", "model_name": "gpt-3.5-turbo-instruct", "parameters": {"temperature": 1, "top_p": 0.8}}
+{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "openai", "parameters": {"temperature": 0.5, "top_p": 0.6}}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "openai"}
diff --git a/notebook/example/data2/input/test.jsonl b/notebook/example/data2/input/test.jsonl
@@ -1,3 +1,3 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "unknown-model-name"}
+{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
+{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "unknown-model-name", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "unknown-model-name"}
diff --git a/notebook/example/data2/input/test2.jsonl b/notebook/example/data2/input/test2.jsonl
@@ -1,3 +1,3 @@
-{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "model": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
-{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "model": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "gemini"}
+{"id": 9, "prompt": ["Hello", "My name is Bob and I'm 6 years old", "How old am I next year?"], "api": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 64, "temperature": 1, "top_k": 40}}
+{"id": 10, "prompt": ["Can you give me a random number between 1-10?", "What is +5 of that number?", "What is half of that number?"], "api": "gemini", "parameters": {"candidate_count": 1, "max_output_tokens": 128, "temperature": 0.5, "top_k": 40}}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "gemini"}
diff --git a/notebook/example/data3/input/ollama.jsonl b/notebook/example/data3/input/ollama.jsonl
@@ -1,3 +1,3 @@
-{"id": 9, "prompt": "Hello, my name is Bob and I'm 6 years old. How old am I next year?", "model": "ollama", "model_name": "gemma", "parameters": {"temperature": 1}}
-{"id": 10, "prompt": "Can you give me a random number between 1-10?", "model": "ollama", "model_name": "llama", "parameters": {"temperature": 0.5}}
-{"id": 11, "prompt": "How many theaters are there in London's South End?", "model": "ollama"}
+{"id": 9, "prompt": "Hello, my name is Bob and I'm 6 years old. How old am I next year?", "api": "ollama", "model_name": "gemma", "parameters": {"temperature": 1}}
+{"id": 10, "prompt": "Can you give me a random number between 1-10?", "api": "ollama", "model_name": "llama", "parameters": {"temperature": 0.5}}
+{"id": 11, "prompt": "How many theaters are there in London's South End?", "api": "ollama"}
diff --git a/src/batch_llm/experiment_processing.py b/src/batch_llm/experiment_processing.py
@@ -90,7 +90,7 @@ def group_prompts_by_model(self) -> dict[str, list[dict]]:
 
         grouped_dict = {}
         for item in self.experiment_prompts:
-            model = item.get("model")
+            model = item.get("api")
             if model not in grouped_dict:
                 grouped_dict[model] = [item]
 
@@ -372,7 +372,7 @@ async def query_model_and_record_response(
     ----------
     prompt_dict : dict
         Dictionary containing the prompt and other parameters to be
-        used for text generation. Required keys are "prompt" and "model".
+        used for text generation. Required keys are "prompt" and "api".
         Some models may have other required keys.
     settings : Settings
         Settings for the pipeline
@@ -478,7 +478,7 @@ async def generate_text(
     ----------
     prompt_dict : dict
         Dictionary containing the prompt and other parameters to be
-        used for text generation. Required keys are "prompt" and "model".
+        used for text generation. Required keys are "prompt" and "api".
         Some models may have other required keys.
     settings : Settings
         Settings for the pipeline
@@ -497,19 +497,17 @@ async def generate_text(
     """
     if index is None:
         index = "NA"
-    if "model" not in prompt_dict:
-        raise KeyError(
-            "Model is not specified in the prompt_dict. Must have 'model' key"
-        )
+    if "api" not in prompt_dict:
+        raise KeyError("API is not specified in the prompt_dict. Must have 'api' key")
 
     # obtain model
     try:
-        model = ASYNC_MODELS[prompt_dict["model"]](
+        model = ASYNC_MODELS[prompt_dict["api"]](
             settings=settings, log_file=experiment.log_file
         )
     except KeyError:
         raise NotImplementedError(
-            f"Model {prompt_dict['model']} not recognised or implemented"
+            f"Model {prompt_dict['api']} not recognised or implemented"
         )
 
     # query the model

diff --git a/src/batch_llm/scripts/create_judge_file.py b/src/batch_llm/scripts/create_judge_file.py
@@ -91,7 +91,7 @@ def main():
                     {
                         "id": prompt_id,
                         "prompt": judge_prompt,
-                        "model": judge_settings[judge]["model"],
+                        "api": judge_settings[judge]["api"],
                         "model_name": judge_settings[judge]["model_name"],
                         "parameters": judge_settings[judge]["parameters"],
                     }

diff --git a/src/batch_llm/scripts/run_checks.py b/src/batch_llm/scripts/run_checks.py
@@ -124,10 +124,10 @@ def is_valid_jsonl(
                     issues.append(KeyError('"prompt" key not found'))
                     valid_indicator = False
 
-                # check if "model" is a key in the json
-                if "model" not in data:
-                    # if "model" is not a key, add index to list
-                    issues.append(KeyError('"model" key not found'))
+                # check if "api" is a key in the json
+                if "api" not in data:
+                    # if "api" is not a key, add index to list
+                    issues.append(KeyError('"api" key not found'))
                     valid_indicator = False
 
                 # if parameters is passed, check its a dictionary
@@ -149,9 +149,9 @@ def is_valid_jsonl(
                     multimedia_path_errors.add(path_errors)
 
                 # model specific checks
-                issues.extend(ASYNC_MODELS[data["model"]].check_prompt_dict(data))
+                issues.extend(ASYNC_MODELS[data["api"]].check_prompt_dict(data))
                 # add model to set of models to check environment variables for
-                model_environments_to_check.add(data["model"])
+                model_environments_to_check.add(data["api"])
             except json.JSONDecodeError as err:
                 # if line is not a valid json, add index to list
                 issues.append(err)

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -45,18 +45,18 @@ def temporary_data_folders(tmp_path: Path):
 
     # create a jsonl file in the folder
     with open(Path(tmp_path / "test.jsonl"), "w") as f:
-        f.write('{"prompt": "test prompt", "model": "test"}\n')
+        f.write('{"prompt": "test prompt", "api": "test"}\n')
 
     # create utils folder which we use to test the sorting of files
     utils_dir = Path(tmp_path / "utils").mkdir()
     with open(Path(tmp_path / "utils" / "first.jsonl"), "w") as f:
-        f.write('{"prompt": "test prompt 1", "model": "test"}\n')
+        f.write('{"prompt": "test prompt 1", "api": "test"}\n')
     time.sleep(0.01)
     with open(Path(tmp_path / "utils" / "second.jsonl"), "w") as f:
-        f.write('{"prompt": "test prompt 2", "model": "test"}\n')
+        f.write('{"prompt": "test prompt 2", "api": "test"}\n')
     time.sleep(0.01)
     with open(Path(tmp_path / "utils" / "third.jsonl"), "w") as f:
-        f.write('{"prompt": "test prompt 3", "model": "test"}\n')
+        f.write('{"prompt": "test prompt 3", "api": "test"}\n')
 
     # create a folder for testing the experiment pipeline
     experiment_pipeline = Path(tmp_path / "experiment_pipeline").mkdir()
@@ -68,14 +68,14 @@ def temporary_data_folders(tmp_path: Path):
     with open(
         Path(tmp_path / "experiment_pipeline" / "input" / "first.jsonl"), "w"
     ) as f:
-        f.write('{"prompt": "test prompt 1", "model": "test"}\n')
-        f.write('{"prompt": "test prompt 2", "model": "test"}\n')
-        f.write('{"prompt": "test prompt 3", "model": "test"}\n')
+        f.write('{"prompt": "test prompt 1", "api": "test"}\n')
+        f.write('{"prompt": "test prompt 2", "api": "test"}\n')
+        f.write('{"prompt": "test prompt 3", "api": "test"}\n')
     time.sleep(0.01)
     with open(
         Path(tmp_path / "experiment_pipeline" / "input" / "second.jsonl"), "w"
     ) as f:
-        f.write('{"prompt": "test prompt 2", "model": "test"}\n')
+        f.write('{"prompt": "test prompt 2", "api": "test"}\n')
 
     # store current working directory
     cwd = os.getcwd()

diff --git a/tests/core/test_experiment.py b/tests/core/test_experiment.py
@@ -35,8 +35,8 @@ def test_experiment_init(temporary_data_folders):
 
     # create a jsonl file in the input folder (which is created when initialising Settings object)
     with open("data/input/test_in_input.jsonl", "w") as f:
-        f.write('{"id": 0, "prompt": "test prompt 0", "model": "test"}\n')
-        f.write('{"id": 1, "prompt": "test prompt 1", "model": "test"}\n')
+        f.write('{"id": 0, "prompt": "test prompt 0", "api": "test"}\n')
+        f.write('{"id": 1, "prompt": "test prompt 1", "api": "test"}\n')
 
     # create an experiment object
     experiment = Experiment("test_in_input.jsonl", settings=settings)
@@ -56,8 +56,8 @@ def test_experiment_init(temporary_data_folders):
         == f"data/output/test_in_input/{experiment.creation_time}-input-test_in_input.jsonl"
     )
     assert experiment.experiment_prompts == [
-        {"id": 0, "prompt": "test prompt 0", "model": "test"},
-        {"id": 1, "prompt": "test prompt 1", "model": "test"},
+        {"id": 0, "prompt": "test prompt 0", "api": "test"},
+        {"id": 1, "prompt": "test prompt 1", "api": "test"},
     ]
     assert experiment.number_queries == 2
     assert isinstance(experiment.creation_time, str)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"gemini-1.0-pro": {"model":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}
		{"gemini-1.0-pro": {"api":"gemini", "model_name":"gemini-1.0-pro-002", "parameters": {"temperature": 0}}}