diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index d949d7200559..a7008d4b92af 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -5,6 +5,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne
 | Features | Description |
 | -------- | ----------- |
 | Agents | Creating and using [agents](../../semantic_kernel/agents/) in Semantic Kernel |
+| Audio | Using services that support audio-to-text and text-to-audio conversion |
 | AutoFunctionCalling | Using `Auto Function Calling` to allow function call capable models to invoke Kernel Functions automatically |
 | ChatCompletion | Using [`ChatCompletion`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/chat_completion_client_base.py) messaging capable service with models  |
 | ChatHistory | Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py) |
diff --git a/python/samples/concepts/chat_completion/azure_chat_gpt_api.py b/python/samples/concepts/chat_completion/azure_chat_gpt_api.py
deleted file mode 100644
index d2f372ec762f..000000000000
--- a/python/samples/concepts/chat_completion/azure_chat_gpt_api.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-logging.basicConfig(level=logging.WARNING)
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-chat_service = AzureChatCompletion(
-    service_id=service_id,
-)
-kernel.add_service(chat_service)
-
-## there are three ways to create the request settings in code: # noqa: E266
-# Note: the prompt_execution_settings are a dictionary with the service_id as the key and the request settings as the value. # noqa: E501
-
-## 1. create the request settings from the base class: # noqa: E266
-# from semantic_kernel.connectors.ai.chat_completion_client_base import PromptExecutionSettings
-# req_settings = PromptExecutionSettings(extension_data = { "max_tokens": 2000, "temperature": 0.7, "top_p": 0.8} )
-## This method (using the PromptExecutionSettings base class) is the most generic, and it allows you to store request settings for different services in the same extension_data field. There are two downsides to this approach: the specific request setting class will be created dynamically for each call, this is overhead when using just a single service. and the request settings are not type checked, so you will receive error messages once the dynamic creation of the request settings class fails. # noqa: E501 E266
-
-## 2. create the request settings directly for the service you are using: # noqa: E266
-# req_settings = sk_oai.AzureChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
-
-## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat.  # noqa: E501 E266
-## 3. create the request settings from the kernel based on the registered service class: # noqa: E266
-req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-req_settings.max_tokens = 2000
-req_settings.temperature = 0.7
-req_settings.top_p = 0.8
-req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": []})
-## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266
-
-
-chat_function = kernel.add_function(
-    prompt=system_message + """{{$chat_history}}{{$user_input}}""",
-    function_name="chat",
-    plugin_name="chat",
-    prompt_execution_settings=req_settings,
-)
-
-history = ChatHistory()
-history.add_user_message("Hi there, who are you?")
-history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="")
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    history.add_user_message(user_input)
-    history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/azure_chat_image_input.py b/python/samples/concepts/chat_completion/azure_chat_image_input.py
deleted file mode 100644
index 5a813ee13eba..000000000000
--- a/python/samples/concepts/chat_completion/azure_chat_image_input.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents import ChatHistory, ChatMessageContent, ImageContent, TextContent
-
-logging.basicConfig(level=logging.WARNING)
-
-system_message = """
-You are an image reviewing chat bot. Your name is Mosscap and you have one goal
-critiquing images that are supplied.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-chat_service = AzureChatCompletion(service_id=service_id)
-kernel.add_service(chat_service)
-
-req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-req_settings.max_tokens = 2000
-req_settings.temperature = 0.7
-req_settings.top_p = 0.8
-req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": []})
-
-chat_function = kernel.add_function(
-    prompt=system_message + """{{$chat_history}}""",
-    function_name="chat",
-    plugin_name="chat",
-    prompt_execution_settings=req_settings,
-)
-
-
-async def chat(uri: str | None = None, image_path: str | None = None) -> bool:
-    history = ChatHistory()
-    if uri:
-        history.add_message(
-            ChatMessageContent(
-                role="user",
-                items=[TextContent(text="What is in this image?"), ImageContent(uri=uri)],
-            )
-        )
-    elif image_path:
-        history.add_message(
-            ChatMessageContent(
-                role="user",
-                items=[TextContent(text="What is in this image?"), ImageContent.from_image_path(image_path)],
-            )
-        )
-    else:
-        history.add_user_message("Hi there, who are you?")
-    answer = kernel.invoke_stream(
-        chat_function,
-        chat_history=history,
-    )
-    print("Mosscap:> ", end="")
-    async for message in answer:
-        print(str(message[0]), end="")
-    print("\n")
-
-
-async def main() -> None:
-    print("Get a description of a image from a URL.")
-    await chat(
-        uri="https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
-    )
-    print("Get a description of the same image but now from a local file!")
-    await chat(image_path="samples/concepts/resources/sample_image.jpg")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_anthropic_api.py b/python/samples/concepts/chat_completion/chat_anthropic_api.py
deleted file mode 100644
index 4494a07362c5..000000000000
--- a/python/samples/concepts/chat_completion/chat_anthropic_api.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "mistral-ai-chat"
-kernel.add_service(AnthropicChatCompletion(service_id=service_id, ai_model_id="claude-3-opus-20240229"))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.system = system_message
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="", flush=True)
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_bedrock_api.py b/python/samples/concepts/chat_completion/chat_bedrock_api.py
deleted file mode 100644
index cd56cefb7a47..000000000000
--- a/python/samples/concepts/chat_completion/chat_bedrock_api.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "bedrock-chat"
-kernel.add_service(BedrockChatCompletion(service_id=service_id, model_id="cohere.command-r-v1:0"))
-
-settings = BedrockChatPromptExecutionSettings(
-    max_tokens=2000,
-    temperature=0.7,
-    top_p=0.8,
-    # Cohere Command specific settings: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
-    extension_data={
-        "presence_penalty": 0.5,
-        "seed": 5,
-    },
-)
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="", flush=True)
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_gpt_api.py b/python/samples/concepts/chat_completion/chat_gpt_api.py
deleted file mode 100644
index 66a3839800b8..000000000000
--- a/python/samples/concepts/chat_completion/chat_gpt_api.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.functions import KernelArguments
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="gpt-3.5-turbo"))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory(system_message=system_message)
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-chat_history.add_user_message("I want to find a hotel in Seattle with free wifi and a pool.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    print(f"Mosscap:> {answer}")
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_mistral_api.py b/python/samples/concepts/chat_completion/chat_mistral_api.py
deleted file mode 100644
index adada91a944c..000000000000
--- a/python/samples/concepts/chat_completion/chat_mistral_api.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "mistral-ai-chat"
-kernel.add_service(MistralAIChatCompletion(service_id=service_id))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory(system_message=system_message)
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-chat_history.add_user_message("I want to find a hotel in Seattle with free wifi and a pool.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="")
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_streaming.py b/python/samples/concepts/chat_completion/chat_streaming.py
deleted file mode 100644
index bad6e9ebd09a..000000000000
--- a/python/samples/concepts/chat_completion/chat_streaming.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from functools import reduce
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
-from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
-
-prompt = """
-ChatBot can have a conversation with you about any topic.
-It can give explicit instructions or say 'I don't know'
-when it doesn't know the answer.
-
-{{$chat_history}}
-
-User:> {{$user_input}}
-ChatBot:>
-"""
-
-kernel = Kernel()
-
-service_id = "chat"
-kernel.add_service(OpenAIChatCompletion(service_id=service_id))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-prompt_template_config = PromptTemplateConfig(
-    template=prompt,
-    name="chat",
-    template_format="semantic-kernel",
-    input_variables=[
-        InputVariable(
-            name="user_input",
-            description="The user input",
-            is_required=True,
-            default="",
-        ),
-        InputVariable(
-            name="chat_history",
-            description="The history of the conversation",
-            is_required=True,
-        ),
-    ],
-    execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
-)
-
-
-async def chat(chat_history: ChatHistory) -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    print("ChatBot:> ", end="")
-    streamed_chunks: list[StreamingChatMessageContent] = []
-    responses = kernel.invoke_stream(chat_function, user_input=user_input, chat_history=chat_history)
-    async for message in responses:
-        streamed_chunks.append(message[0])
-        print(str(message[0]), end="")
-    print("")
-    chat_history.add_user_message(user_input)
-    if streamed_chunks:
-        streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
-        chat_history.add_message(streaming_chat_message)
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat(chat_history)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/openai_logit_bias.py b/python/samples/concepts/chat_completion/openai_logit_bias.py
deleted file mode 100644
index f39416d0370e..000000000000
--- a/python/samples/concepts/chat_completion/openai_logit_bias.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from typing import Any
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai import PromptExecutionSettings
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextCompletion
-from semantic_kernel.contents import AuthorRole, ChatHistory
-from semantic_kernel.functions import KernelArguments
-from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
-
-"""
-Logit bias enables prioritizing certain tokens within a given output.
-To utilize the logit bias function, you will need to know the token ids of the words you are using.
-See the GPT Tokenizer to obtain token ids: https://platform.openai.com/tokenizer
-Read more about logit bias and how to configure output: https://help.openai.com/en/articles/5247780-using-logit-bias-to-define-token-probability
-"""
-
-
-def _config_ban_tokens(settings: PromptExecutionSettings, keys: dict[Any, Any]):
-    if settings.logit_bias is None:
-        settings.logit_bias = {}
-    # Map each token in the keys list to a bias value from -100 (a potential ban) to 100 (exclusive selection)
-    for k in keys:
-        # -100 to potentially ban all tokens in the list
-        settings.logit_bias[k] = -100
-    return settings
-
-
-def _prepare_input_chat(chat: ChatHistory):
-    return "".join([f"{msg.role}: {msg.content}\n" for msg in chat])
-
-
-async def chat_request_example(kernel: Kernel):
-    service_id = "chat_service"
-    openai_chat_completion = OpenAIChatCompletion(
-        service_id=service_id,
-        ai_model_id="gpt-3.5-turbo",
-    )
-    kernel.add_service(openai_chat_completion)
-
-    # Spaces and capitalization affect the token ids.
-    # The following is the token ids of basketball related words.
-    keys = [
-        2032,
-        680,
-        9612,
-        26675,
-        3438,
-        42483,
-        21265,
-        6057,
-        11230,
-        1404,
-        2484,
-        12494,
-        35,
-        822,
-        11108,
-    ]
-    banned_words = [
-        "swish",
-        "screen",
-        "score",
-        "dominant",
-        "basketball",
-        "game",
-        "GOAT",
-        "Shooting",
-        "Dribbling",
-    ]
-
-    # Model will try its best to avoid using any of the above words
-    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-    settings = _config_ban_tokens(settings, keys)
-
-    prompt_template_config = PromptTemplateConfig(
-        template="{{$user_input}}",
-        name="chat",
-        template_format="semantic-kernel",
-        input_variables=[
-            InputVariable(
-                name="user_input", description="The history of the conversation", is_required=True, default=""
-            ),
-        ],
-        execution_settings=settings,
-    )
-
-    chat = ChatHistory()
-
-    chat.add_user_message("Hi there, who are you?")
-    chat.add_assistant_message("I am an AI assistant here to answer your questions.")
-
-    chat_function = kernel.add_function(
-        plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
-    )
-
-    chat.add_system_message("You are a basketball expert")
-    chat.add_user_message("I love the LA Lakers, tell me an interesting fact about LeBron James.")
-
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    chat.add_user_message("What are his best all-time stats?")
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    print(chat)
-
-    kernel.remove_all_services()
-
-    return chat, banned_words
-
-
-async def text_complete_request_example(kernel: Kernel):
-    service_id = "text_service"
-    openai_text_completion = OpenAITextCompletion(
-        service_id=service_id,
-        ai_model_id="gpt-3.5-turbo-instruct",
-    )
-    kernel.add_service(openai_text_completion)
-
-    # Spaces and capitalization affect the token ids.
-    # The following is the token ids of pie related words.
-    keys = [
-        18040,
-        17180,
-        16108,
-        4196,
-        79,
-        931,
-        5116,
-        30089,
-        36724,
-        47,
-        931,
-        5116,
-        431,
-        5171,
-        613,
-        5171,
-        350,
-        721,
-        272,
-        47,
-        721,
-        272,
-    ]
-    banned_words = [
-        "apple",
-        " apple",
-        "Apple",
-        " Apple",
-        "pumpkin",
-        " pumpkin",
-        " Pumpkin",
-        "pecan",
-        " pecan",
-        " Pecan",
-        "Pecan",
-    ]
-
-    # Model will try its best to avoid using any of the above words
-    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-    settings = _config_ban_tokens(settings, keys)
-
-    prompt_template_config = PromptTemplateConfig(
-        template="{{$user_input}}",
-        name="chat",
-        template_format="semantic-kernel",
-        input_variables=[
-            InputVariable(
-                name="user_input", description="The history of the conversation", is_required=True, default=""
-            ),
-        ],
-        execution_settings=settings,
-    )
-
-    chat = ChatHistory()
-
-    chat.add_user_message("The best pie flavor to have in autumn is")
-
-    text_function = kernel.add_function(
-        plugin_name="TextBot", function_name="TextCompletion", prompt_template_config=prompt_template_config
-    )
-
-    answer = await kernel.invoke(text_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    print(chat)
-
-    kernel.remove_all_services()
-
-    return chat, banned_words
-
-
-def _check_banned_words(banned_list, actual_list) -> bool:
-    passed = True
-    for word in banned_list:
-        if word in actual_list:
-            print(f'The banned word "{word}" was found in the answer')
-            passed = False
-    return passed
-
-
-def _format_output(chat, banned_words) -> None:
-    print("--- Checking for banned words ---")
-    chat_bot_ans_words = [
-        word for msg in chat.messages if msg.role == AuthorRole.ASSISTANT for word in msg.content.split()
-    ]
-    if _check_banned_words(banned_words, chat_bot_ans_words):
-        print("None of the banned words were found in the answer")
-
-
-async def main() -> None:
-    kernel = Kernel()
-
-    print("Chat completion example:")
-    print("------------------------")
-    chat, banned_words = await chat_request_example(kernel)
-    _format_output(chat, banned_words)
-
-    print("------------------------")
-
-    print("\nText completion example:")
-    print("------------------------")
-    chat, banned_words = await text_complete_request_example(kernel)
-    _format_output(chat, banned_words)
-
-    return
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot.py b/python/samples/concepts/chat_completion/simple_chatbot.py
new file mode 100644
index 000000000000..a52b52aaace1
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot.py
@@ -0,0 +1,89 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot. This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
new file mode 100644
index 000000000000..361e4e706d5d
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This sample shows how to create a chatbot using a kernel function.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Get the chat message content from the chat completion service.
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        # Use keyword arguments to pass the chat history and user input to the kernel function.
+        chat_history=chat_history,
+        user_input=user_input,
+    )
+
+    answer = await kernel.invoke(chat_function, kernel_arguments)
+    # Alternatively, you can invoke the function directly with the kernel as an argument:
+    # answer = await chat_function.invoke(kernel, kernel_arguments)
+
+    print(f"Mosscap:> {answer}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
new file mode 100644
index 000000000000..63fa49e1dc4c
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
@@ -0,0 +1,111 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot that whose output can be biased using logit bias.
+# This sample uses the following three main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - a list of tokens whose bias value will be reduced, meaning the likelihood of these tokens appearing
+#   in the output will be reduced.
+# The chatbot in this sample is called Mosscap, who is an expert in basketball.
+
+# To learn more about logit bias, see: https://help.openai.com/en/articles/5247780-using-logit-bias-to-define-token-probability
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot whose expertise is basketball.
+Your name is Mosscap and you have one goal: to answer questions about basketball.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+# Create a list of tokens whose bias value will be reduced.
+# The token ids of these words can be obtained using the GPT Tokenizer: https://platform.openai.com/tokenizer
+# the targeted model series is GPT-4o & GPT-4o mini
+# banned_words = ["basketball", "NBA", "player", "career", "points"]
+banned_tokens = [
+    # "basketball"
+    106622,
+    5052,
+    # "NBA"
+    99915,
+    # " NBA"
+    32272,
+    # "player"
+    6450,
+    # " player"
+    5033,
+    # "career"
+    198069,
+    # " career"
+    8461,
+    # "points"
+    14011,
+    # " points"
+    5571,
+]
+# Configure the logit bias settings to minimize the likelihood of the
+# tokens in the banned_tokens list appearing in the output.
+request_settings.logit_bias = {k: -100 for k in banned_tokens}
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Who has the most career points in NBA history?
+    # Mosscap:> As of October 2023, the all-time leader in total regular-season scoring in the history of the National
+    #           Basketball Association (N.B.A.) is Kareem Abdul-Jabbar, who scored 38,387 total regular-seasonPoints
+    #           during his illustrious 20-year playing Career.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
new file mode 100644
index 000000000000..21744ffd53c2
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
@@ -0,0 +1,102 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from functools import reduce
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot that streams responses.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+# Please note that not all models support streaming responses. Make sure to select a model that supports streaming.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    # The response is an async generator that streams the response in chunks.
+    response = chat_completion_service.get_streaming_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+
+    # Capture the chunks of the response and print them as they come in.
+    chunks = []
+    print("Mosscap:> ", end="")
+    async for chunk in response:
+        chunks.append(chunk)
+        print(chunk, end="")
+    print("")
+
+    # Combine the chunks into a single message to add to the chat history.
+    full_message = reduce(lambda first, second: first + second, chunks)
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_message(full_message)
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
new file mode 100644
index 000000000000..f7fac3448816
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
@@ -0,0 +1,132 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.image_content import ImageContent
+from semantic_kernel.contents.text_content import TextContent
+
+# This sample shows how to create a chatbot that responds to user messages with image input.
+# This sample uses the following three main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - an ImageContent: This component is responsible for representing image content.
+# The chatbot in this sample is called Mosscap.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+IMAGE_URI = "https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
+IMAGE_PATH = "samples/concepts/resources/sample_image.jpg"
+
+# Create an image content with the image URI.
+image_content_remote = ImageContent(uri=IMAGE_URI)
+# You can also create an image content with a local image path.
+image_content_local = ImageContent.from_image_file(IMAGE_PATH)
+
+
+# [NOTE]
+# Not all models support image input. Make sure to select a model that supports image input.
+# Not all services support image input from an image URI. If your image is saved in a remote location,
+# make sure to use a service that supports image input from a URI.
+
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are an image reviewing chat bot. Your name is Mosscap and you have one goal critiquing images that are supplied.
+"""
+
+# Create a chat history object with the system message and an initial user message with an image input.
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_message(
+    ChatMessageContent(
+        role="user",
+        items=[TextContent(text="What is in this image?"), image_content_local],
+    )
+)
+
+
+async def chat(skip_user_input: bool = False) -> bool:
+    """Chat with the chatbot.
+
+    Args:
+        skip_user_input (bool): Whether to skip user input. Defaults to False.
+    """
+    if not skip_user_input:
+        try:
+            user_input = input("User:> ")
+        except KeyboardInterrupt:
+            print("\n\nExiting chat...")
+            return False
+        except EOFError:
+            print("\n\nExiting chat...")
+            return False
+
+        if user_input == "exit":
+            print("\n\nExiting chat...")
+            return False
+
+        # Add the user message to the chat history so that the chatbot can respond to it.
+        chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat with the image input.
+    await chat(skip_user_input=True)
+    # Continue the chat. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # Mosscap:> The image features a large, historic building that exhibits a traditional half-timbered architectural
+    #           style. The structure is located near a dense forest, characterized by lush green trees. The sky above
+    #           is partly cloudy, suggesting a pleasant day. The building itself appears well-maintained, with distinct
+    #           features such as a turret or spire and decorative wood framing, creating an elegant and charming
+    #           appearance in its natural setting.
+    # User:> What do you think about the composition of the photo?
+    # Mosscap:> The composition of the photo is quite effective. Here are a few observations:
+    #           1. **Framing**: The building is positioned slightly off-center, which can create a more dynamic and
+    #           engaging image. This drawing of attention to the structure, while still showcasing the surrounding
+    #           landscape.
+    #           2. **Foreground and Background**: The green foliage and trees in the foreground provide a nice contrast
+    #           to the building, enhancing its visual appeal. The dense forest in the background adds depth and context
+    #           to the scene.
+    #           3. **Lighting**: The light appears to be favorable, suggesting a well-lit scene. The clouds add texture
+    #           to the sky without overwhelming the overall brightness.
+    #           4. **Perspective**: The angle from which the photo is taken allows viewers to appreciate both the
+    #           architecture of the building and its natural environment, creating a harmonious balance.
+    #           Overall, the composition successfully highlights the building while incorporating its natural
+    #           surroundings, inviting viewers to appreciate both elements together.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
new file mode 100644
index 000000000000..903b59f42928
--- /dev/null
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -0,0 +1,299 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from enum import Enum
+
+from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.azure_ai_inference import (
+    AzureAIInferenceChatCompletion,
+    AzureAIInferenceChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
+from semantic_kernel.connectors.ai.open_ai import (
+    AzureChatCompletion,
+    AzureChatPromptExecutionSettings,
+    OpenAIChatCompletion,
+    OpenAIChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
+class Services(Enum):
+    """Enum for supported chat completion services.
+
+    For service specific settings, refer to this documentation:
+    https://github.com/microsoft/semantic-kernel/blob/main/python/samples/concepts/setup/ALL_SETTINGS.md
+    """
+
+    OPENAI = "openai"
+    AZURE_OPENAI = "azure_openai"
+    AZURE_AI_INFERENCE = "azure_ai_inference"
+    ANTHROPIC = "anthropic"
+    BEDROCK = "bedrock"
+    GOOGLE_AI = "google_ai"
+    MISTRAL_AI = "mistral_ai"
+    OLLAMA = "ollama"
+    ONNX = "onnx"
+    VERTEX_AI = "vertex_ai"
+
+
+def get_chat_completion_service_and_request_settings(
+    service_name: str,
+) -> tuple[ChatCompletionClientBase, PromptExecutionSettings]:
+    """Return service and request settings."""
+    chat_services = {
+        Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
+        Services.AZURE_OPENAI: get_azure_openai_chat_completion_service_and_request_settings,
+        Services.AZURE_AI_INFERENCE: get_azure_ai_inference_chat_completion_service_and_request_settings,
+        Services.ANTHROPIC: get_anthropic_chat_completion_service_and_request_settings,
+        Services.BEDROCK: get_bedrock_chat_completion_service_and_request_settings,
+        Services.GOOGLE_AI: get_google_ai_chat_completion_service_and_request_settings,
+        Services.MISTRAL_AI: get_mistral_ai_chat_completion_service_and_request_settings,
+        Services.OLLAMA: get_ollama_chat_completion_service_and_request_settings,
+        Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
+        Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
+    }
+
+    return chat_services[service_name]()
+
+
+def get_openai_chat_completion_service_and_request_settings() -> tuple[
+    OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
+]:
+    """Return OpenAI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
+    """
+    chat_service = OpenAIChatCompletion()
+    request_settings = OpenAIChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
+
+    return chat_service, request_settings
+
+
+def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
+    AzureChatCompletion, AzureChatPromptExecutionSettings
+]:
+    """Return Azure OpenAI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AzureChatCompletion()
+    request_settings = AzureChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
+    AzureAIInferenceChatCompletion, AzureAIInferenceChatPromptExecutionSettings
+]:
+    """Return Azure AI Inference chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AzureAIInferenceChatCompletion(
+        ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
+    )
+    request_settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
+    AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+]:
+    """Return Anthropic chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AnthropicChatCompletion()
+    request_settings = AnthropicChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
+    BedrockChatCompletion, BedrockChatPromptExecutionSettings
+]:
+    """Return Anthropic chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = BedrockChatCompletion(model_id="cohere.command-r-v1:0")
+    request_settings = BedrockChatPromptExecutionSettings(
+        # For model specific settings, specify them in the extension_data dictionary.
+        # For example, for Cohere Command specific settings, refer to:
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+        extension_data={
+            "presence_penalty": 0.5,
+            "seed": 5,
+        },
+    )
+
+    return chat_service, request_settings
+
+
+def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
+    GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+]:
+    """Return Google AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = GoogleAIChatCompletion()
+    request_settings = GoogleAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
+    MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+]:
+    """Return Mistral AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = MistralAIChatCompletion()
+    request_settings = MistralAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_ollama_chat_completion_service_and_request_settings() -> tuple[
+    OllamaChatCompletion, OllamaChatPromptExecutionSettings
+]:
+    """Return Ollama chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = OllamaChatCompletion()
+    request_settings = OllamaChatPromptExecutionSettings(
+        # For model specific settings, specify them in the options dictionary.
+        # For more information on the available options, refer to the Ollama API documentation:
+        # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+        options={
+            "temperature": 0.8,
+        }
+    )
+
+    return chat_service, request_settings
+
+
+def get_onnx_chat_completion_service_and_request_settings() -> tuple[
+    OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
+]:
+    """Return Onnx chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3)
+    request_settings = OnnxGenAIPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
+    VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+]:
+    """Return Vertex AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = VertexAIChatCompletion()
+    request_settings = VertexAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
diff --git a/python/samples/concepts/setup/openai_env_setup.py b/python/samples/concepts/setup/openai_env_setup.py
index b5dd5875629c..390e795a815d 100644
--- a/python/samples/concepts/setup/openai_env_setup.py
+++ b/python/samples/concepts/setup/openai_env_setup.py
@@ -1,63 +1,44 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-# Semantic Kernel allows you multiple ways to setup your connectors.
-# this sample shows that for OpenAI Connectors.
-
-# After installing the semantic-kernel package
-# you can use the following code to setup OpenAI Connector
-
-# From environment settings
-# using this method will try to find the required settings in the environment variables
-# this is done using pydantic settings, see the full docs of that here: https://docs.pydantic.dev/latest/concepts/pydantic_settings/#usage
-# We use a prefix for all the settings and then have names defined in the OpenAISettings class
-# for OpenAI that is OPENAI_ as the prefix, with the following settings:
-# - api_key (OPENAI_API_KEY): OpenAI API key, see https://platform.openai.com/account/api-keys
-# - org_id (OPENAI_ORG_ID): This is usually optional unless your account belongs to multiple organizations.
-# - chat_model_id (OPENAI_CHAT_MODEL_ID): The OpenAI chat model ID to use, for example, gpt-3.5-turbo or gpt-4,
-#   this variable is used in the OpenAIChatCompletion class and get's passed to the ai_model_id there.
-# - text_model_id (OPENAI_TEXT_MODEL_ID): The OpenAI text model ID to use, for example, gpt-3.5-turbo-instruct,
-#   this variable is used in the OpenAITextCompletion class and get's passed to the ai_model_id there.
-# - embedding_model_id (OPENAI_EMBEDDING_MODEL_ID): The embedding model ID to use, for example, text-embedding-ada-002,
-#   this variable is used in the OpenAITextEmbedding class and get's passed to the ai_model_id there.
-
 import os
 
 from pydantic import ValidationError
 
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
 
+# Semantic Kernel allows you multiple ways to setup your connectors. This sample shows that for OpenAI Connectors.
+# After installing the semantic-kernel package, you can use the following code to setup OpenAI Connector
+
+# 1. From environment settings
+# Using this method will try to find the required settings in the environment variables.
+# This is done using pydantic settings, see the full docs of that here: https://docs.pydantic.dev/latest/concepts/pydantic_settings/#usage
+# We use a prefix for all the settings and then have names defined in the OpenAISettings class.
+# For OpenAI that is OPENAI_ as the prefix. For a full list of OpenAI settings, refer to:
+# https://github.com/microsoft/semantic-kernel/blob/main/python/samples/concepts/setup/ALL_SETTINGS.md
 try:
-    # when nothing is passed to the constructor,
-    # it will use the above environment variable names to find the required settings,
-    # in this case it will only fail if the OPENAI_CHAT_MODEL_ID and OPENAI_API_KEY are not found
+    # When nothing is passed to the constructor, it will use the above environment variable names
+    # to find the required settings. In this case it will only fail if the OPENAI_CHAT_MODEL_ID and
+    # OPENAI_API_KEY are not found
     service = OpenAIChatCompletion(service_id="openai_chat_service")
 except ValidationError as e:
     print(e)
 
-# From a .env file
-# when you want to store and use your settings from a specific file (any file as long as it is in the .env format)
-# you can pass the path to the file to the constructor
-# this will still look at the same names of the settings as above, but will try to load them from the file
-
+# 2. From a .env file
+# When you want to store and use your settings from a specific file (any file as long as it is in the .env format),
+# you can pass the path to the file to the constructor. This will still look at the same names of the settings as above,
+# but will try to load them from the file
 try:
-    # this will try to load the settings from the file at the given path
+    # This will try to load the settings from the file at the given path
     service = OpenAIChatCompletion(service_id="openai_chat_service", env_file_path="path/to/env_file")
 except ValidationError as e:
     print(e)
 
-# From a different value
-# if you want to pass the settings yourself, you can do that by passing the values to the constructor
-# this will ignore the environment variables and the .env file
-# in this case our API_KEY is stored in a env variable called MY_API_KEY_VAR_NAME
-# if using a file for this value, then we first need to uncomment and
-# run the following code to load the .env file from the same folder as this file:
-# from dotenv import load_dotenv
-# dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
-# load_dotenv(dotenv_path)
-# and after that pass the value directly to the constructor as shown below
-# we can also fix another value, in this case the ai_model_id,
-# which becomes chat_model_id in the settings, fixed to gpt-4o
-
+# 3. From a different value
+# If you want to pass the settings yourself, you can do that by passing the values to the constructor.
+# This will ignore the environment variables and the .env file.
+# In this case our API_KEY is stored in an env variable called MY_API_KEY_VAR_NAME.
+# We can also hardcode another value, in this case the ai_model_id, which becomes chat_model_id in the
+# settings, to gpt-4o
 try:
     # this will use the given values as the settings
     api_key = os.getenv("MY_API_KEY_VAR_NAME")
diff --git a/python/semantic_kernel/connectors/ai/onnx/__init__.py b/python/semantic_kernel/connectors/ai/onnx/__init__.py
index 3aa09740beef..ecce90d12615 100644
--- a/python/semantic_kernel/connectors/ai/onnx/__init__.py
+++ b/python/semantic_kernel/connectors/ai/onnx/__init__.py
@@ -1,9 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import (
-    OnnxGenAIPromptExecutionSettings,
-)
+from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
 from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_chat_completion import OnnxGenAIChatCompletion
 from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_text_completion import OnnxGenAITextCompletion
+from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate
 
-__all__ = ["OnnxGenAIChatCompletion", "OnnxGenAIPromptExecutionSettings", "OnnxGenAITextCompletion"]
+__all__ = ["ONNXTemplate", "OnnxGenAIChatCompletion", "OnnxGenAIPromptExecutionSettings", "OnnxGenAITextCompletion"]
diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py
index 23b4e509c363..abce5d4018f8 100644
--- a/python/tests/samples/test_concepts.py
+++ b/python/tests/samples/test_concepts.py
@@ -12,11 +12,11 @@
 from samples.concepts.auto_function_calling.functions_defined_in_yaml_prompt import (
     main as function_defined_in_yaml_prompt,
 )
-from samples.concepts.chat_completion.azure_chat_gpt_api import main as azure_chat_gpt_api
-from samples.concepts.chat_completion.azure_chat_image_input import main as azure_chat_image_input
-from samples.concepts.chat_completion.chat_gpt_api import main as chat_gpt_api
-from samples.concepts.chat_completion.chat_streaming import main as chat_streaming
-from samples.concepts.chat_completion.openai_logit_bias import main as openai_logit_bias
+from samples.concepts.chat_completion.simple_chatbot import main as simple_chatbot
+from samples.concepts.chat_completion.simple_chatbot_kernel_function import main as simple_chatbot_kernel_function
+from samples.concepts.chat_completion.simple_chatbot_logit_bias import main as simple_chatbot_logit_bias
+from samples.concepts.chat_completion.simple_chatbot_streaming import main as simple_chatbot_streaming
+from samples.concepts.chat_completion.simple_chatbot_with_image import main as simple_chatbot_with_image
 from samples.concepts.filtering.auto_function_invoke_filters import main as auto_function_invoke_filters
 from samples.concepts.filtering.function_invocation_filters import main as function_invocation_filters
 from samples.concepts.filtering.function_invocation_filters_stream import main as function_invocation_filters_stream
@@ -58,10 +58,19 @@
 
 concepts = [
     param(chat_gpt_api_function_calling, ["What is 3+3?", "exit"], id="chat_gpt_api_function_calling"),
-    param(azure_chat_gpt_api, ["Why is the sky blue?", "exit"], id="azure_chat_gpt_api"),
-    param(chat_gpt_api, ["What is life?", "exit"], id="chat_gpt_api"),
-    param(chat_streaming, ["Why is the sun hot?", "exit"], id="chat_streaming"),
-    param(openai_logit_bias, [], id="openai_logit_bias"),
+    param(simple_chatbot, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot"),
+    param(simple_chatbot_streaming, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot_streaming"),
+    param(simple_chatbot_with_image, ["exit"], id="simple_chatbot_with_image"),
+    param(
+        simple_chatbot_logit_bias,
+        ["Who has the most career points in NBA history?", "exit"],
+        id="simple_chatbot_logit_bias",
+    ),
+    param(
+        simple_chatbot_kernel_function,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot_kernel_function",
+    ),
     param(auto_function_invoke_filters, ["What is 3+3?", "exit"], id="auto_function_invoke_filters"),
     param(function_invocation_filters, ["What is 3+3?", "exit"], id="function_invocation_filters"),
     param(function_invocation_filters_stream, ["What is 3+3?", "exit"], id="function_invocation_filters_stream"),
@@ -100,7 +109,6 @@
         id="bing_search_plugin",
         marks=pytest.mark.skip(reason="Flaky test due to Azure OpenAI content policy"),
     ),
-    param(azure_chat_image_input, [], id="azure_chat_image_input"),
     param(custom_service_selector, [], id="custom_service_selector"),
     param(function_defined_in_json_prompt, ["What is 3+3?", "exit"], id="function_defined_in_json_prompt"),
     param(function_defined_in_yaml_prompt, ["What is 3+3?", "exit"], id="function_defined_in_yaml_prompt"),