diff --git a/defog_utils/utils_llm.py b/defog_utils/utils_llm.py index bd7064b..998db60 100644 --- a/defog_utils/utils_llm.py +++ b/defog_utils/utils_llm.py @@ -170,6 +170,7 @@ def chat_openai( """ Returns the response from the OpenAI API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used. We use max_completion_tokens here, instead of using max_tokens. This is to support o1 models. + Note this function also supports DeepSeek models as it uses the same API. Simply use the base URL "https://api.deepseek.com" """ from openai import OpenAI @@ -242,10 +243,12 @@ async def chat_openai_async( timeout=100, base_url: str = "https://api.openai.com/v1/", api_key: str = os.environ.get("OPENAI_API_KEY", ""), + prediction: Dict[str,str] = None, ) -> LLMResponse: """ Returns the response from the OpenAI API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used. We use max_completion_tokens here, instead of using max_tokens. This is to support o1 models. + Note this function also supports DeepSeek models as it uses the same API. Simply use the base URL "https://api.deepseek.com" """ from openai import AsyncOpenAI @@ -274,6 +277,11 @@ async def chat_openai_async( "timeout": timeout, "response_format": response_format, } + + if model in ["gpt-4o", "gpt-4o-mini"] and prediction: + request_params["prediction"] = prediction + del request_params["max_completion_tokens"] + del request_params["response_format"] # completion with prediction output does not support max_completion_tokens and response_format if model in ["o1-mini", "o1-preview", "o1", "deepseek-chat", "deepseek-reasoner"]: del request_params["temperature"] @@ -283,8 +291,6 @@ async def chat_openai_async( if "response_format" in request_params and request_params["response_format"]: del request_params["stop"] # cannot have stop when using response_format, as that often leads to invalid JSON - - if "response_format" in request_params and request_params["response_format"]: response = await client_openai.beta.chat.completions.parse(**request_params) print(response) content = response.choices[0].message.parsed diff --git a/defog_utils/utils_multi_llm.py b/defog_utils/utils_multi_llm.py index 10894a7..c1deccd 100644 --- a/defog_utils/utils_multi_llm.py +++ b/defog_utils/utils_multi_llm.py @@ -69,7 +69,8 @@ async def chat_async( store=True, metadata=None, timeout=100, # in seconds - backup_model=None + backup_model=None, + prediction=None ) -> LLMResponse: """ Returns the response from the LLM API for a single model that is passed in. @@ -89,18 +90,32 @@ async def chat_async( model = backup_model llm_function = map_model_to_chat_fn_async(model) if not model.startswith("deepseek"): - return await llm_function( - model=model, - messages=messages, - max_completion_tokens=max_completion_tokens, - temperature=temperature, - stop=stop, - response_format=response_format, - seed=seed, - store=store, - metadata=metadata, - timeout=timeout, - ) + if prediction and "gpt-4o" in model: + # predicted output completion does not support response_format and max_completion_tokens + return await llm_function( + model=model, + messages=messages, + temperature=temperature, + stop=stop, + seed=seed, + store=store, + metadata=metadata, + timeout=timeout, + prediction=prediction + ) + else: + return await llm_function( + model=model, + messages=messages, + max_completion_tokens=max_completion_tokens, + temperature=temperature, + stop=stop, + response_format=response_format, + seed=seed, + store=store, + metadata=metadata, + timeout=timeout, + ) else: if not os.getenv("DEEPSEEK_API_KEY"): raise Exception("DEEPSEEK_API_KEY is not set")