openvinotoolkit · ilya-lavrenov · Feb 27, 2025 · Feb 24, 2025 · Feb 25, 2025 · Feb 25, 2025
diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
@@ -71,59 +71,105 @@ def run_llm_pipeline(
                            ov_config=properties)
 
 
-def run_llm_pipeline_with_ref(model_id: str, 
-                              prompts: List[str], 
-                              generation_config: GenerationConfig | dict, 
-                              tmp_path: Path | TemporaryDirectory = TemporaryDirectory(), 
-                              use_cb : bool = False,
-                              streamer: StreamerWithResults | Callable | StreamerBase = None):
-    if type(generation_config) is dict:
-        generation_config = GenerationConfig(**generation_config)
+# def run_llm_pipeline_with_ref(model_id: str, 
+#                               prompts: List[str], 
+#                               generation_config: GenerationConfig | dict, 
+#                               tmp_path: Path | TemporaryDirectory = TemporaryDirectory(), 
+#                               use_cb : bool = False,
+#                               streamer: StreamerWithResults | Callable | StreamerBase = None):
+    # if type(generation_config) is dict:
+    #     generation_config = GenerationConfig(**generation_config)
 
-    opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id, Path(tmp_path.name))
+    # opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id, Path(tmp_path.name))
 
-    ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb, streamer=streamer.accumulate if isinstance(streamer, StreamerWithResults) else streamer)
-    hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)
+    # ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb, streamer=streamer.accumulate if isinstance(streamer, StreamerWithResults) else streamer)
+    # hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)
 
-    compare_generation_results(prompts, hf_results, ov_results, generation_config)
+    # compare_generation_results(prompts, hf_results, ov_results, generation_config)
 
 
-def run_cb_pipeline_with_ref(tmp_path: str,
-                             model_id: str,
-                             scheduler_params: dict = {},
-                             generation_config : GenerationConfig | dict = None):
-    prompts, generation_configs = get_test_dataset()
-    scheduler_config = dict_to_scheduler_config(scheduler_params)
+# def run_cb_pipeline_with_ref(tmp_path: str,
+#                              model_id: str,
+#                              scheduler_params: dict = {},
+#                              generation_config : GenerationConfig | dict = None):
+#     prompts, generation_configs = get_test_dataset()
+#     scheduler_config = dict_to_scheduler_config(scheduler_params)
 
-    # override dataset's generation config
-    if generation_config is not None:
-        if type(generation_config) is dict:
-            generation_config = GenerationConfig(**generation_config)
-        generation_configs = [generation_config] * len(prompts)
+#     # override dataset's generation config
+#     if generation_config is not None:
+#         if type(generation_config) is dict:
+#             generation_config = GenerationConfig(**generation_config)
+#         generation_configs = [generation_config] * len(prompts)
 
-    opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id, tmp_path)
+#     opt_model, hf_tokenizer, models_path = download_and_convert_model(model_id, tmp_path)
 
-    hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_configs)
-    ov_results = run_continuous_batching(models_path, scheduler_config, prompts, generation_configs)
+#     hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_configs)
+#     ov_results = run_continuous_batching(models_path, scheduler_config, prompts, generation_configs)
 
-    compare_generation_results(prompts, hf_results, ov_results, generation_configs)
+#     compare_generation_results(prompts, hf_results, ov_results, generation_configs)
 
 
 # TODO: remove after Generator property is supported by LLMPipeline / VLMPipeline
-def generate_and_compare_with_reference_text(models_path: Path,
-                                             prompts: List[str],
-                                             reference_texts_per_prompt: List[List[str]],
-                                             generation_configs: List[GenerationConfig],
-                                             scheduler_config: SchedulerConfig):
-    ov_results : List[GenerationResult] = run_continuous_batching(models_path, scheduler_config, prompts, generation_configs)
-
-    assert len(prompts) == len(reference_texts_per_prompt)
-    assert len(prompts) == len(ov_results)
-
-    for prompt, ref_texts_for_this_prompt, ov_result in zip(prompts, reference_texts_per_prompt, ov_results):
-        print(f"Prompt = {prompt}\nref text = {ref_texts_for_this_prompt}\nOV result = {ov_result.m_generation_ids}")
-
-        assert len(ref_texts_for_this_prompt) == len(ov_result.m_generation_ids)
-        for ref_text, ov_text in zip(ref_texts_for_this_prompt, ov_result.m_generation_ids):
-            assert ref_text == ov_text
-
+# def generate_and_compare_with_reference_text(models_path: Path,
+#                                              prompts: List[str],
+#                                              reference_texts_per_prompt: List[List[str]],
+#                                              generation_configs: List[GenerationConfig],
+#                                              scheduler_config: SchedulerConfig):
+#     ov_results : List[GenerationResult] = run_continuous_batching(models_path, scheduler_config, prompts, generation_configs)
+
+#     assert len(prompts) == len(reference_texts_per_prompt)
+#     assert len(prompts) == len(ov_results)
+
+#     for prompt, ref_texts_for_this_prompt, ov_result in zip(prompts, reference_texts_per_prompt, ov_results):
+#         print(f"Prompt = {prompt}\nref text = {ref_texts_for_this_prompt}\nOV result = {ov_result.m_generation_ids}")
+
+#         assert len(ref_texts_for_this_prompt) == len(ov_result.m_generation_ids)
+#         for ref_text, ov_text in zip(ref_texts_for_this_prompt, ov_result.m_generation_ids):
+#             assert ref_text == ov_text
+
+
+def generate_and_compare(model: Path | str,
+                         prompts : List[str],
+                         generation_config: List[GenerationConfig] | GenerationConfig | dict,
+                         pipeline_type: PipelineType = PipelineType.PAGED_ATTENTION,
+                         scheduler_config: SchedulerConfig | dict = SchedulerConfig(),
+                         ref : List[List[str]] = None,
+                         streamer: StreamerWithResults | Callable | StreamerBase = None,
+                         tmp_path: Path | TemporaryDirectory = TemporaryDirectory()) :
+    if type(generation_config) is dict:
+        gen_config = GenerationConfig(**generation_config)
+    elif isinstance(generation_config, GenerationConfig):
+        gen_config = [generation_config] * len(prompts)
+    else:
+        gen_config = generation_config
+
+    if isinstance(scheduler_config, SchedulerConfig):
+        scheduler_config_ = scheduler_config
+    else:
+        scheduler_config_= dict_to_scheduler_config(scheduler_config)
+
+    if isinstance(model, Path):
+        models_path = model
+    else:
+        opt_model, hf_tokenizer, models_path = download_and_convert_model(model, Path(tmp_path.name))
+
+    ov_results = run_ov_pipeline(models_path=models_path,
+                                 prompt=prompts,
+                                 generation_config=gen_config,
+                                 pipeline_type=pipeline_type,
+                                 streamer=streamer.accumulate if isinstance(streamer, StreamerWithResults) else streamer,
+                                 scheduler_config=scheduler_config_,
+                                 ov_config=get_default_llm_properties())
+    if ref is None:
+        ref_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)
+        compare_generation_results(prompts, ref_results, ov_results, generation_config)
+    else:
+        assert len(prompts) == len(ref)
+        assert len(prompts) == len(ov_results)
+
+        for prompt, ref_texts_for_this_prompt, ov_result in zip(prompts, ref, ov_results):
+            print(f"Prompt = {prompt}\nref text = {ref_texts_for_this_prompt}\nOV result = {ov_result.m_generation_ids}")
+
+            assert len(ref_texts_for_this_prompt) == len(ov_result.m_generation_ids)
+            for ref_text, ov_text in zip(ref_texts_for_this_prompt, ov_result.m_generation_ids):
+                assert ref_text == ov_text
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
@@ -11,7 +11,7 @@
 
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  draft_model
 
-from common import generate_and_compare_with_reference_text, run_cb_pipeline_with_ref
+from common import generate_and_compare
 from test_sampling import RandomSamplingTestStruct, get_current_platform_ref_texts
 
 from utils.generation_config import get_greedy, get_beam_search, \
@@ -40,19 +40,19 @@ def read_models_list(file_name: str):
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit")))
 def test_e2e_precommit(tmp_path, model_id):
-    run_cb_pipeline_with_ref(tmp_path, model_id)
+    generate_and_compare(tmp_path=tmp_path, model=model_id)
 
 
 @pytest.mark.nightly
 @pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "nightly")))
 def test_e2e_nightly(tmp_path, model_id):
-    run_cb_pipeline_with_ref(tmp_path, model_id)
+    generate_and_compare(tmp_path=tmp_path, model=model_id)
 
 
 @pytest.mark.real_models
 @pytest.mark.parametrize("model_id", read_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "real_models")))
 def test_e2e_real_models(tmp_path, model_id):
-    run_cb_pipeline_with_ref(tmp_path, model_id)
+    generate_and_compare(tmp_path=tmp_path, model=model_id)
 
 #
 # Comparison with stateful
@@ -208,8 +208,11 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
 @pytest.mark.parametrize("params", scheduler_params_list)
 @pytest.mark.precommit
 def test_preemption(tmp_path, params):
-    run_cb_pipeline_with_ref(tmp_path, "facebook/opt-125m", scheduler_params=params[0], generation_config=params[1])
+    model_id = "facebook/opt-125m"
+    scheduler_params = params[0]
+    generation_config = params[1]
 
+    generate_and_compare(tmp_path=tmp_path, model=model_id, scheduler_config=scheduler_params, generation_config=generation_config)
 
 multinomial_params = RandomSamplingTestStruct(
     generation_config=[
@@ -261,7 +264,12 @@ def test_preemption_with_multinomial(tmp_path, dynamic_split_fuse):
     model, hf_tokenizer, models_path = download_and_convert_model(model_id, tmp_path)
 
     scheduler_config = dict_to_scheduler_config({"num_kv_blocks": 3, "dynamic_split_fuse": dynamic_split_fuse, "max_num_batched_tokens": 256, "max_num_seqs": 256})
-    generate_and_compare_with_reference_text(models_path, multinomial_params.prompts, multinomial_params.ref_texts, generation_configs, scheduler_config)
+    generate_and_compare(model=models_path,
+                         pipeline_type=PipelineType.CONTINIOUS_BATCHING,
+                         prompts=multinomial_params.prompts,
+                         ref=multinomial_params.ref_texts,
+                         generation_config=generation_configs,
+                         scheduler_config=scheduler_config)
 
 
 multinomial_params_n_seq = RandomSamplingTestStruct(
@@ -337,7 +345,12 @@ def test_preemption_with_multinomial_n_seq(tmp_path, dynamic_split_fuse):
 
     # needed kv_blocks - 16 (2 blocks per sequence (30 tokens to generated text + prompt (> 2 tokens)) * (1 + 3 + 4) seq )
     scheduler_config = dict_to_scheduler_config({"num_kv_blocks": 8, "dynamic_split_fuse": dynamic_split_fuse, "max_num_batched_tokens": 256, "max_num_seqs": 256})
-    generate_and_compare_with_reference_text(models_path, multinomial_params_n_seq.prompts, multinomial_params_n_seq.ref_texts, multinomial_params_n_seq.generation_config, scheduler_config)
+    generate_and_compare(model=models_path,
+                         pipeline_type=PipelineType.CONTINIOUS_BATCHING,
+                         prompts=multinomial_params_n_seq.prompts,
+                         ref=multinomial_params_n_seq.ref_texts,
+                         generation_config=multinomial_params_n_seq.generation_config,
+                         scheduler_config=scheduler_config)
 
 def get_data_by_pipeline_type(model_path: Path, pipeline_type: str, generation_config: GenerationConfig):
     device = "CPU"