From f29c6e3ce7282288cd6085d6631912e08b505f92 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:24:54 -0500 Subject: [PATCH 01/19] FEATURE: Enable stream overwrite for LLM Chat at the event level (#66) * Allowing stream overwrite at event level for LLM Chat * Added overwrite flag --- examples/llm/anthropic_chat.yaml | 31 +++++++- .../llm/langchain/langchain_chat_model.py | 48 +++++++++++- .../langchain/langchain_chat_model_base.py | 76 ++++++++++++++++++- .../langchain_chat_model_with_history.py | 51 ++----------- .../general/llm/litellm/litellm_base.py | 38 ---------- .../llm/litellm/litellm_chat_model_base.py | 63 ++++++++++++++- .../litellm_chat_model_with_history.py | 9 ++- .../llm/openai/openai_chat_model_base.py | 26 ++++++- .../openai/openai_chat_model_with_history.py | 10 ++- 9 files changed, 252 insertions(+), 100 deletions(-) diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml index 76fe7ef0..60cf3963 100644 --- a/examples/llm/anthropic_chat.yaml +++ b/examples/llm/anthropic_chat.yaml @@ -5,7 +5,8 @@ # # The input message has the following schema: # { -# "text": "" +# "query": "", +# "stream": false # } # # It will then send an event back to Solace with the topic: `demo/question/response` @@ -66,17 +67,23 @@ flows: base_url: ${ANTHROPIC_API_ENDPOINT} model: ${MODEL_NAME} temperature: 0.01 + llm_mode: stream + allow_overwrite_llm_mode: true + stream_to_flow: stream_output input_transforms: - type: copy source_expression: | template:You are a helpful AI assistant. Please help with the user's request below: - {{text://input.payload:text}} + {{text://input.payload:query}} dest_expression: user_data.llm_input:messages.0.content - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role + - type: copy + source_expression: input.payload:stream + dest_expression: user_data.llm_input:stream input_selection: source_expression: user_data.llm_input @@ -97,3 +104,23 @@ flows: dest_expression: user_data.output:topic input_selection: source_expression: user_data.output + + - name: stream_output + components: + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: input.payload + dest_expression: user_data.output:payload + - type: copy + source_value: demo/question/stream + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model.py index 85cd194e..510323e2 100644 --- a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model.py +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model.py @@ -1,6 +1,8 @@ # This is a wrapper around all the LangChain chat models # The configuration will control dynamic loading of the chat models +from uuid import uuid4 from copy import deepcopy +from collections import namedtuple from .langchain_chat_model_base import ( LangChainChatModelBase, info_base, @@ -17,6 +19,48 @@ def __init__(self, **kwargs): super().__init__(info, **kwargs) def invoke_model( - self, input_message, messages, session_id=None, clear_history=False + self, + input_message, + messages, + session_id=None, + clear_history=False, + stream=False, ): - return self.component.invoke(messages) + if not stream: + return self.component.invoke(messages) + + aggregate_result = "" + current_batch = "" + response_uuid = str(uuid4()) + first_chunk = True + + for chunk in self.component.stream(messages): + aggregate_result += chunk.content + current_batch += chunk.content + if len(current_batch) >= self.stream_batch_size: + if self.stream_to_flow: + self.send_streaming_message( + input_message, + current_batch, + aggregate_result, + response_uuid, + first_chunk, + ) + current_batch = "" + first_chunk = False + + if self.stream_to_flow: + self.send_streaming_message( + input_message, + current_batch, + aggregate_result, + response_uuid, + first_chunk, + True, + ) + + result = namedtuple("Result", ["content", "response_uuid"])( + aggregate_result, response_uuid + ) + + return result diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py index 58c7ae5d..089ec083 100644 --- a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py @@ -5,6 +5,7 @@ from abc import abstractmethod from langchain_core.output_parsers import JsonOutputParser +from .....common.message import Message from .....common.utils import get_obj_text from langchain.schema.messages import ( HumanMessage, @@ -39,6 +40,28 @@ "description": "Model specific configuration for the chat model. " "See documentation for valid parameter names.", }, + { + "name": "llm_mode", + "required": False, + "description": "The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response.", + }, + { + "name": "allow_overwrite_llm_mode", + "required": False, + "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", + }, + { + "name": "stream_to_flow", + "required": False, + "description": "Name the flow to stream the output to - this must be configured for llm_mode='stream'.", + "default": "", + }, + { + "name": "stream_batch_size", + "required": False, + "description": "The minimum number of words in a single streaming result. Default: 15.", + "default": 15, + }, { "name": "llm_response_format", "required": False, @@ -88,10 +111,18 @@ class LangChainChatModelBase(LangChainBase): + + def __init__(self, info, **kwargs): + super().__init__(info, **kwargs) + self.llm_mode = self.get_config("llm_mode", "none") + self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") + self.stream_to_flow = self.get_config("stream_to_flow", "") + self.stream_batch_size = self.get_config("stream_batch_size", 15) + def invoke(self, message, data): messages = [] - for item in data["messages"]: + for item in data.get("messages"): if item["role"] == "system": messages.append(SystemMessage(content=item["content"])) elif item["role"] == "user" or item["role"] == "human": @@ -109,9 +140,22 @@ def invoke(self, message, data): session_id = data.get("session_id", None) clear_history = data.get("clear_history", False) + stream = data.get("stream") + + should_stream = self.llm_mode == "stream" + if ( + self.allow_overwrite_llm_mode + and stream is not None + and isinstance(stream, bool) + ): + should_stream = stream llm_res = self.invoke_model( - message, messages, session_id=session_id, clear_history=clear_history + message, + messages, + session_id=session_id, + clear_history=clear_history, + stream=should_stream, ) res_format = self.get_config("llm_response_format", "text") @@ -134,6 +178,32 @@ def invoke(self, message, data): @abstractmethod def invoke_model( - self, input_message, messages, session_id=None, clear_history=False + self, + input_message, + messages, + session_id=None, + clear_history=False, + stream=False, ): pass + + def send_streaming_message( + self, + input_message, + chunk, + aggregate_result, + response_uuid, + first_chunk=False, + last_chunk=False, + ): + message = Message( + payload={ + "chunk": chunk, + "content": aggregate_result, + "response_uuid": response_uuid, + "first_chunk": first_chunk, + "last_chunk": last_chunk, + }, + user_properties=input_message.get_user_properties(), + ) + self.send_to_flow(self.stream_to_flow, message) diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_with_history.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_with_history.py index 4569c30a..7e708dd7 100644 --- a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_with_history.py @@ -16,7 +16,6 @@ SystemMessage, ) -from .....common.message import Message from .langchain_chat_model_base import ( LangChainChatModelBase, info_base, @@ -78,23 +77,6 @@ "description": "The configuration for the history class.", "type": "object", }, - { - "name": "stream_to_flow", - "required": False, - "description": "Name the flow to stream the output to - this must be configured for llm_mode='stream'.", - "default": "", - }, - { - "name": "llm_mode", - "required": False, - "description": "The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response.", - }, - { - "name": "stream_batch_size", - "required": False, - "description": "The minimum number of words in a single streaming result. Default: 15.", - "default": 15, - }, { "name": "set_response_uuid_in_user_properties", "required": False, @@ -128,15 +110,17 @@ def __init__(self, **kwargs): ) self.history_max_tokens = self.get_config("history_max_tokens", 8000) self.history_max_time = self.get_config("history_max_time", None) - self.stream_to_flow = self.get_config("stream_to_flow", "") - self.llm_mode = self.get_config("llm_mode", "none") - self.stream_batch_size = self.get_config("stream_batch_size", 15) self.set_response_uuid_in_user_properties = self.get_config( "set_response_uuid_in_user_properties", False ) def invoke_model( - self, input_message, messages, session_id=None, clear_history=False + self, + input_message, + messages, + session_id=None, + clear_history=False, + stream=False, ): if clear_history: @@ -171,7 +155,7 @@ def invoke_model( history_messages_key="chat_history", ) - if self.llm_mode == "none": + if not stream: return runnable.invoke( {"input": human_message}, config={ @@ -221,27 +205,6 @@ def invoke_model( return result - def send_streaming_message( - self, - input_message, - chunk, - aggregate_result, - response_uuid, - first_chunk=False, - last_chunk=False, - ): - message = Message( - payload={ - "chunk": chunk, - "content": aggregate_result, - "response_uuid": response_uuid, - "first_chunk": first_chunk, - "last_chunk": last_chunk, - }, - user_properties=input_message.get_user_properties(), - ) - self.send_to_flow(self.stream_to_flow, message) - def create_history(self): history_class = self.load_component( diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index f9d42823..4f8d5aaf 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -32,40 +32,6 @@ "description": "Sampling temperature to use", "default": 0.7, }, - { - "name": "stream_to_flow", - "required": False, - "description": ( - "Name the flow to stream the output to - this must be configured for " - "llm_mode='stream'. This is mutually exclusive with stream_to_next_component." - ), - "default": "", - }, - { - "name": "stream_to_next_component", - "required": False, - "description": ( - "Whether to stream the output to the next component in the flow. " - "This is mutually exclusive with stream_to_flow." - ), - "default": False, - }, - { - "name": "llm_mode", - "required": False, - "description": ( - "The mode for streaming results: 'sync' or 'stream'. 'stream' " - "will just stream the results to the named flow. 'none' will " - "wait for the full response." - ), - "default": "none", - }, - { - "name": "stream_batch_size", - "required": False, - "description": "The minimum number of words in a single streaming result. Default: 15.", - "default": 15, - }, { "name": "set_response_uuid_in_user_properties", "required": False, @@ -91,10 +57,6 @@ def __init__(self, module_info, **kwargs): def init(self): litellm.suppress_debug_info = True self.load_balancer = self.get_config("load_balancer") - self.stream_to_flow = self.get_config("stream_to_flow") - self.stream_to_next_component = self.get_config("stream_to_next_component") - self.llm_mode = self.get_config("llm_mode") - self.stream_batch_size = self.get_config("stream_batch_size") self.set_response_uuid_in_user_properties = self.get_config( "set_response_uuid_in_user_properties" ) diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py index a1ecff51..6336a4bd 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py @@ -28,6 +28,10 @@ "required": ["role", "content"], }, }, + "stream": { + "type": "boolean", + "description": "Whether to stream the response - overwrites llm_mode", + }, }, "required": ["messages"], }, @@ -63,18 +67,75 @@ }, }, ) +litellm_chat_info_base["config_parameters"].extend( + [ + { + "name": "stream_to_flow", + "required": False, + "description": ( + "Name the flow to stream the output to - this must be configured for " + "llm_mode='stream'. This is mutually exclusive with stream_to_next_component." + ), + "default": "", + }, + { + "name": "stream_to_next_component", + "required": False, + "description": ( + "Whether to stream the output to the next component in the flow. " + "This is mutually exclusive with stream_to_flow." + ), + "default": False, + }, + { + "name": "llm_mode", + "required": False, + "description": ( + "The mode for streaming results: 'none' or 'stream'. 'stream' " + "will just stream the results to the named flow. 'none' will " + "wait for the full response." + ), + "default": "none", + }, + { + "name": "allow_overwrite_llm_mode", + "required": False, + "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", + }, + { + "name": "stream_batch_size", + "required": False, + "description": "The minimum number of words in a single streaming result. Default: 15.", + "default": 15, + }, + ] +) class LiteLLMChatModelBase(LiteLLMBase): def __init__(self, info, **kwargs): super().__init__(info, **kwargs) + self.stream_to_flow = self.get_config("stream_to_flow") + self.stream_to_next_component = self.get_config("stream_to_next_component") + self.llm_mode = self.get_config("llm_mode") + self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") + self.stream_batch_size = self.get_config("stream_batch_size") def invoke(self, message, data): """invoke the model""" messages = data.get("messages", []) + stream = data.get("stream") + + should_stream = self.llm_mode == "stream" + if ( + self.allow_overwrite_llm_mode + and stream is not None + and isinstance(stream, bool) + ): + should_stream = stream - if self.llm_mode == "stream": + if should_stream: return self.invoke_stream(message, messages) else: return self.invoke_non_stream(messages) diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_with_history.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_with_history.py index c98e3535..4ee0b02c 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_with_history.py @@ -32,7 +32,9 @@ "description": "Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history.", } + class LiteLLMChatModelWithHistory(LiteLLMChatModelBase, ChatHistoryHandler): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.history_max_turns = self.get_config("history_max_turns", 10) @@ -45,7 +47,7 @@ def __init__(self, **kwargs): def invoke(self, message, data): session_id = data.get("session_id") if not session_id: - raise ValueError("session_id is not provided") + raise ValueError("session_id is not provided") clear_history_but_keep_depth = data.get("clear_history_but_keep_depth") try: @@ -55,6 +57,7 @@ def invoke(self, message, data): log.error("Invalid clear_history_but_keep_depth value. Defaulting to 0.") clear_history_but_keep_depth = 0 messages = data.get("messages", []) + stream = data.get("stream") with self.get_lock(self.history_key): history = self.kv_store_get(self.history_key) or {} @@ -88,7 +91,7 @@ def invoke(self, message, data): self.prune_history(session_id, history) response = super().invoke( - message, {"messages": history[session_id]["messages"]} + message, {"messages": history[session_id]["messages"], "stream": stream} ) # Add the assistant's response to the history @@ -102,4 +105,4 @@ def invoke(self, message, data): self.kv_store_set(self.history_key, history) log.debug(f"Updated history: {history}") - return response \ No newline at end of file + return response diff --git a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py index beabd07f..3ac9bcd5 100755 --- a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py @@ -56,12 +56,17 @@ "name": "llm_mode", "required": False, "description": ( - "The mode for streaming results: 'sync' or 'stream'. 'stream' " + "The mode for streaming results: 'none' or 'stream'. 'stream' " "will just stream the results to the named flow. 'none' will " "wait for the full response." ), "default": "none", }, + { + "name": "allow_overwrite_llm_mode", + "required": False, + "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", + }, { "name": "stream_batch_size", "required": False, @@ -97,6 +102,10 @@ "required": ["role", "content"], }, }, + "stream": { + "type": "boolean", + "description": "Whether to stream the response - overwrites llm_mode", + }, }, "required": ["messages"], }, @@ -134,6 +143,7 @@ class OpenAIChatModelBase(ComponentBase): + def __init__(self, module_info, **kwargs): super().__init__(module_info, **kwargs) self.init() @@ -144,6 +154,7 @@ def init(self): self.stream_to_flow = self.get_config("stream_to_flow") self.stream_to_next_component = self.get_config("stream_to_next_component") self.llm_mode = self.get_config("llm_mode") + self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") self.stream_batch_size = self.get_config("stream_batch_size") self.response_format = self.get_config("response_format", "text") self.set_response_uuid_in_user_properties = self.get_config( @@ -156,12 +167,21 @@ def init(self): def invoke(self, message, data): messages = data.get("messages", []) + stream = data.get("stream") client = OpenAI( api_key=self.get_config("api_key"), base_url=self.get_config("base_url") ) - if self.llm_mode == "stream": + should_stream = self.llm_mode == "stream" + if ( + self.allow_overwrite_llm_mode + and stream is not None + and isinstance(stream, bool) + ): + should_stream = stream + + if should_stream: return self.invoke_stream(client, message, messages) else: max_retries = 3 @@ -171,7 +191,7 @@ def invoke(self, message, data): messages=messages, model=self.model, temperature=self.temperature, - response_format={"type": self.response_format} + response_format={"type": self.response_format}, ) return {"content": response.choices[0].message.content} except Exception as e: diff --git a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_with_history.py b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_with_history.py index e9f0da80..fb164c77 100644 --- a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_with_history.py @@ -35,6 +35,7 @@ class OpenAIChatModelWithHistory(OpenAIChatModelBase, ChatHistoryHandler): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.history_max_turns = self.get_config("history_max_turns", 10) @@ -47,8 +48,8 @@ def __init__(self, **kwargs): def invoke(self, message, data): session_id = data.get("session_id") if not session_id: - raise ValueError("session_id is not provided") - + raise ValueError("session_id is not provided") + clear_history_but_keep_depth = data.get("clear_history_but_keep_depth") try: if clear_history_but_keep_depth is not None: @@ -56,6 +57,7 @@ def invoke(self, message, data): except (TypeError, ValueError): clear_history_but_keep_depth = 0 messages = data.get("messages", []) + stream = data.get("stream") with self.get_lock(self.history_key): history = self.kv_store_get(self.history_key) or {} @@ -89,7 +91,7 @@ def invoke(self, message, data): self.prune_history(session_id, history) response = super().invoke( - message, {"messages": history[session_id]["messages"]} + message, {"messages": history[session_id]["messages"], "stream": stream} ) # Add the assistant's response to the history @@ -102,4 +104,4 @@ def invoke(self, message, data): self.kv_store_set(self.history_key, history) - return response \ No newline at end of file + return response From 1ca1c0e7580fab8f73fe0e993f0961e7aeb595ab Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Tue, 26 Nov 2024 14:47:12 -0500 Subject: [PATCH 02/19] AI-95: Enhance request/response handling for streaming LLM access (#69) * Changes for request/response for streaming LLM access * Updated with main * update --------- Co-authored-by: Edward Funnekotter --- docs/components/broker_request_response.md | 4 ++-- .../langchain_chat_model_with_history.md | 6 ----- docs/components/litellm_chat_model.md | 8 +++++++ docs/components/litellm_embeddings.md | 8 +++++++ examples/llm/anthropic_chat.yaml | 1 - .../components/component_base.py | 10 ++++++++ .../langchain/langchain_chat_model_base.py | 18 ++------------- .../general/llm/litellm/litellm_base.py | 5 ---- .../llm/litellm/litellm_chat_model_base.py | 23 ++++++------------- .../llm/openai/openai_chat_model_base.py | 23 ++++--------------- .../inputs_outputs/broker_request_response.py | 8 +++---- .../flow/request_response_flow_controller.py | 7 ++++-- 12 files changed, 50 insertions(+), 71 deletions(-) mode change 100755 => 100644 src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py diff --git a/docs/components/broker_request_response.md b/docs/components/broker_request_response.md index 333b0504..30ee6167 100644 --- a/docs/components/broker_request_response.md +++ b/docs/components/broker_request_response.md @@ -17,7 +17,7 @@ component_config: payload_format: response_topic_prefix: response_topic_suffix: - reply_queue_prefix: + response_queue_prefix: request_expiry_ms: streaming: streaming_complete_expression: @@ -34,7 +34,7 @@ component_config: | payload_format | False | json | Format for the payload (json, yaml, text) | | response_topic_prefix | False | reply | Prefix for reply topics | | response_topic_suffix | False | | Suffix for reply topics | -| reply_queue_prefix | False | reply-queue | Prefix for reply queues | +| response_queue_prefix | False | reply-queue | Prefix for reply queues | | request_expiry_ms | False | 60000 | Expiry time for cached requests in milliseconds | | streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | | streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md index e66e9a6a..8686061e 100644 --- a/docs/components/langchain_chat_model_with_history.md +++ b/docs/components/langchain_chat_model_with_history.md @@ -19,9 +19,6 @@ component_config: history_module: history_class: history_config: - stream_to_flow: - llm_mode: - stream_batch_size: set_response_uuid_in_user_properties: ``` @@ -38,9 +35,6 @@ component_config: | history_module | False | langchain_community.chat_message_histories | The module that contains the history class. Default: 'langchain_community.chat_message_histories' | | history_class | False | ChatMessageHistory | The class to use for the history. Default: 'ChatMessageHistory' | | history_config | False | | The configuration for the history class. | -| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | -| llm_mode | False | | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | -| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | diff --git a/docs/components/litellm_chat_model.md b/docs/components/litellm_chat_model.md index 556ee083..acd19851 100644 --- a/docs/components/litellm_chat_model.md +++ b/docs/components/litellm_chat_model.md @@ -20,6 +20,10 @@ component_config: history_max_time: history_max_turns: history_max_time: + stream_to_flow: + stream_to_next_component: + llm_mode: + stream_batch_size: ``` | Parameter | Required | Default | Description | @@ -36,6 +40,10 @@ component_config: | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | +| stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | ## Component Input Schema diff --git a/docs/components/litellm_embeddings.md b/docs/components/litellm_embeddings.md index 6542c299..83930083 100644 --- a/docs/components/litellm_embeddings.md +++ b/docs/components/litellm_embeddings.md @@ -20,6 +20,10 @@ component_config: history_max_time: history_max_turns: history_max_time: + stream_to_flow: + stream_to_next_component: + llm_mode: + stream_batch_size: ``` | Parameter | Required | Default | Description | @@ -36,6 +40,10 @@ component_config: | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | +| stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | ## Component Input Schema diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml index 60cf3963..cc6e8fd8 100644 --- a/examples/llm/anthropic_chat.yaml +++ b/examples/llm/anthropic_chat.yaml @@ -68,7 +68,6 @@ flows: model: ${MODEL_NAME} temperature: 0.01 llm_mode: stream - allow_overwrite_llm_mode: true stream_to_flow: stream_output input_transforms: - type: copy diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 0d04dae8..63dd8fb3 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -303,6 +303,16 @@ def setup_broker_request_response(self): "broker_config": broker_config, "request_expiry_ms": request_expiry_ms, } + + if "response_topic_prefix" in self.broker_request_response_config: + rrc_config["response_topic_prefix"] = self.broker_request_response_config[ + "response_topic_prefix" + ] + if "response_queue_prefix" in self.broker_request_response_config: + rrc_config["response_queue_prefix"] = self.broker_request_response_config[ + "response_queue_prefix" + ] + self.broker_request_response_controller = RequestResponseFlowController( config=rrc_config, connector=self.connector ) diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py index 089ec083..86cc5c25 100644 --- a/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_chat_model_base.py @@ -45,11 +45,6 @@ "required": False, "description": "The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response.", }, - { - "name": "allow_overwrite_llm_mode", - "required": False, - "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", - }, { "name": "stream_to_flow", "required": False, @@ -115,7 +110,6 @@ class LangChainChatModelBase(LangChainBase): def __init__(self, info, **kwargs): super().__init__(info, **kwargs) self.llm_mode = self.get_config("llm_mode", "none") - self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") self.stream_to_flow = self.get_config("stream_to_flow", "") self.stream_batch_size = self.get_config("stream_batch_size", 15) @@ -140,22 +134,14 @@ def invoke(self, message, data): session_id = data.get("session_id", None) clear_history = data.get("clear_history", False) - stream = data.get("stream") - - should_stream = self.llm_mode == "stream" - if ( - self.allow_overwrite_llm_mode - and stream is not None - and isinstance(stream, bool) - ): - should_stream = stream + stream = data.get("stream", self.llm_mode == "stream") llm_res = self.invoke_model( message, messages, session_id=session_id, clear_history=clear_history, - stream=should_stream, + stream=stream, ) res_format = self.get_config("llm_response_format", "text") diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index 4f8d5aaf..22bb2e02 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -3,7 +3,6 @@ import litellm from ....component_base import ComponentBase -from .....common.message import Message from .....common.log import log litellm_info_base = { @@ -60,10 +59,6 @@ def init(self): self.set_response_uuid_in_user_properties = self.get_config( "set_response_uuid_in_user_properties" ) - if self.stream_to_flow and self.stream_to_next_component: - raise ValueError( - "stream_to_flow and stream_to_next_component are mutually exclusive" - ) self.router = None def init_load_balancer(self): diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py index 6336a4bd..24358bde 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py @@ -97,11 +97,6 @@ ), "default": "none", }, - { - "name": "allow_overwrite_llm_mode", - "required": False, - "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", - }, { "name": "stream_batch_size", "required": False, @@ -119,23 +114,19 @@ def __init__(self, info, **kwargs): self.stream_to_flow = self.get_config("stream_to_flow") self.stream_to_next_component = self.get_config("stream_to_next_component") self.llm_mode = self.get_config("llm_mode") - self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") self.stream_batch_size = self.get_config("stream_batch_size") + if self.stream_to_flow and self.stream_to_next_component: + raise ValueError( + "stream_to_flow and stream_to_next_component are mutually exclusive" + ) + def invoke(self, message, data): """invoke the model""" messages = data.get("messages", []) - stream = data.get("stream") - - should_stream = self.llm_mode == "stream" - if ( - self.allow_overwrite_llm_mode - and stream is not None - and isinstance(stream, bool) - ): - should_stream = stream + stream = data.get("stream", self.llm_mode == "stream") - if should_stream: + if stream: return self.invoke_stream(message, messages) else: return self.invoke_non_stream(messages) diff --git a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py old mode 100755 new mode 100644 index 3ac9bcd5..012d1dfe --- a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py @@ -62,11 +62,6 @@ ), "default": "none", }, - { - "name": "allow_overwrite_llm_mode", - "required": False, - "description": "Whether to allow the llm_mode to be overwritten by the `stream` from the input message.", - }, { "name": "stream_batch_size", "required": False, @@ -104,7 +99,8 @@ }, "stream": { "type": "boolean", - "description": "Whether to stream the response - overwrites llm_mode", + "description": "Whether to stream the response. It is is not provided, it will default to the value of llm_mode.", + "required": False, }, }, "required": ["messages"], @@ -154,9 +150,7 @@ def init(self): self.stream_to_flow = self.get_config("stream_to_flow") self.stream_to_next_component = self.get_config("stream_to_next_component") self.llm_mode = self.get_config("llm_mode") - self.allow_overwrite_llm_mode = self.get_config("allow_overwrite_llm_mode") self.stream_batch_size = self.get_config("stream_batch_size") - self.response_format = self.get_config("response_format", "text") self.set_response_uuid_in_user_properties = self.get_config( "set_response_uuid_in_user_properties" ) @@ -167,21 +161,13 @@ def init(self): def invoke(self, message, data): messages = data.get("messages", []) - stream = data.get("stream") + stream = data.get("stream", self.llm_mode == "stream") client = OpenAI( api_key=self.get_config("api_key"), base_url=self.get_config("base_url") ) - should_stream = self.llm_mode == "stream" - if ( - self.allow_overwrite_llm_mode - and stream is not None - and isinstance(stream, bool) - ): - should_stream = stream - - if should_stream: + if stream: return self.invoke_stream(client, message, messages) else: max_retries = 3 @@ -191,7 +177,6 @@ def invoke(self, message, data): messages=messages, model=self.model, temperature=self.temperature, - response_format={"type": self.response_format}, ) return {"content": response.choices[0].message.content} except Exception as e: diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index 88068fdb..bdaea627 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -73,7 +73,7 @@ "default": "", }, { - "name": "reply_queue_prefix", + "name": "response_queue_prefix", "required": False, "description": "Prefix for reply queues", "default": "reply-queue", @@ -168,11 +168,11 @@ def __init__(self, **kwargs): self.response_topic_suffix = ensure_slash_on_start( self.get_config("response_topic_suffix") ) - self.reply_queue_prefix = ensure_slash_on_end( - self.get_config("reply_queue_prefix") + self.response_queue_prefix = ensure_slash_on_end( + self.get_config("response_queue_prefix") ) self.requestor_id = str(uuid.uuid4()) - self.reply_queue_name = f"{self.reply_queue_prefix}{self.requestor_id}" + self.reply_queue_name = f"{self.response_queue_prefix}{self.requestor_id}" self.response_topic = f"{self.response_topic_prefix}{self.requestor_id}{self.response_topic_suffix}" self.response_thread = None self.streaming = self.get_config("streaming") diff --git a/src/solace_ai_connector/flow/request_response_flow_controller.py b/src/solace_ai_connector/flow/request_response_flow_controller.py index 36dda712..37a4fe90 100644 --- a/src/solace_ai_connector/flow/request_response_flow_controller.py +++ b/src/solace_ai_connector/flow/request_response_flow_controller.py @@ -30,6 +30,7 @@ # This is a very basic component which will be stitched onto the final component in the flow class RequestResponseControllerOuputComponent: + def __init__(self, controller): self.controller = controller @@ -39,6 +40,7 @@ def enqueue(self, event): # This is the main class that will be used to send messages to a flow and receive the response class RequestResponseFlowController: + def __init__(self, config: Dict[str, Any], connector): self.config = config self.connector = connector @@ -55,14 +57,15 @@ def __init__(self, config: Dict[str, Any], connector): self.flow.run() def create_broker_request_response_flow(self): - self.broker_config["request_expiry_ms"] = self.request_expiry_ms + full_config = self.broker_config.copy() + full_config.update(self.config) config = { "name": "_internal_broker_request_response_flow", "components": [ { "component_name": "_internal_broker_request_response", "component_module": "broker_request_response", - "component_config": self.broker_config, + "component_config": full_config, } ], } From 264fce25f76bbbed60b4f11e927d12684b51cf35 Mon Sep 17 00:00:00 2001 From: John Corpuz Date: Tue, 26 Nov 2024 10:03:41 -0500 Subject: [PATCH 03/19] Chore: Enable Whitesource scan --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6acdaf68..691d9be7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,9 +17,13 @@ jobs: uses: SolaceDev/solace-public-workflows/.github/workflows/hatch_ci.yml@main with: min-python-version: "3.10" + whitesource_product_name: "solaceai" secrets: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} SONAR_HOST_URL: ${{ vars.SONAR_HOST_URL }} + WHITESOURCE_API_KEY: ${{ secrets.WHITESOURCE_API_KEY }} + MANIFEST_AWS_ACCESS_KEY_ID: ${{ secrets.MANIFEST_READ_ONLY_AWS_ACCESS_KEY_ID }} + MANIFEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.MANIFEST_READ_ONLY_AWS_SECRET_ACCESS_KEY }} structure-test: name: Test Docker Image Structure runs-on: ubuntu-latest From b122cb2468e15f85f84b18040d0520f030bc8c63 Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:20:17 -0500 Subject: [PATCH 04/19] Alireza/none/hotfix bugs (#67) * fix: add dependencies to the toml file * fix: handled miss configurations * fix: resolve conflicts * FEATURE: Enable stream overwrite for LLM Chat at the event level (#66) * Allowing stream overwrite at event level for LLM Chat * Added overwrite flag * AI-95: Enhance request/response handling for streaming LLM access (#69) * Changes for request/response for streaming LLM access * Updated with main * update --------- Co-authored-by: Edward Funnekotter * fix: add exception handler * fix: add exception handler --------- Co-authored-by: Art Morozov Co-authored-by: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Co-authored-by: Edward Funnekotter --- .../common/messaging/solace_messaging.py | 4 ++++ src/solace_ai_connector/main.py | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 4b03e7a5..875e677b 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -35,6 +35,7 @@ class MessageHandlerImpl(MessageHandler): + def __init__(self, persistent_receiver: PersistentMessageReceiver): self.receiver: PersistentMessageReceiver = persistent_receiver self.persistent_receiver: PersistentMessageReceiver = None @@ -54,6 +55,7 @@ def on_message(self, message: InboundMessage): class MessagePublishReceiptListenerImpl(MessagePublishReceiptListener): + def __init__(self, callback=None): self.callback = callback @@ -67,6 +69,7 @@ def on_publish_receipt(self, publish_receipt: PublishReceipt): class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): + def on_reconnected(self, service_event: ServiceEvent): log.debug("Reconnected to broker: %s", service_event.get_cause()) log.debug("Message: %s", service_event.get_message()) @@ -92,6 +95,7 @@ def set_python_solace_log_level(level: str): # Create SolaceMessaging class inheriting from Messaging class SolaceMessaging(Messaging): + def __init__(self, broker_properties: dict): super().__init__(broker_properties) self.persistent_receivers = [] diff --git a/src/solace_ai_connector/main.py b/src/solace_ai_connector/main.py index fe2ccf62..24ef1007 100644 --- a/src/solace_ai_connector/main.py +++ b/src/solace_ai_connector/main.py @@ -108,20 +108,24 @@ def shutdown(): """Shutdown the application.""" print("Stopping Solace AI Connector") app.stop() - app.cleanup() + app.cleanup() print("Solace AI Connector exited successfully!") os._exit(0) + atexit.register(shutdown) # Start the application - app.run() + try: + app.run() + except KeyboardInterrupt: + shutdown() try: app.wait_for_flows() except KeyboardInterrupt: shutdown() + if __name__ == "__main__": # Read in the configuration yaml filenames from the args - main() From 4f3dfe9f5f0837fa6fd0965c715d48b57f11ab10 Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:40:57 -0500 Subject: [PATCH 05/19] feat: drop error messages when the queue is full (#75) --- examples/error_handler.yaml | 3 +++ .../components/inputs_outputs/error_input.py | 27 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/examples/error_handler.yaml b/examples/error_handler.yaml index f2277949..8c4a97b4 100644 --- a/examples/error_handler.yaml +++ b/examples/error_handler.yaml @@ -10,6 +10,8 @@ # } # If value is not a number, the error will be caught, logged to file and send back to the Solace broker. # +# Subscribe to `ai_connector_error/*/*/*` to see the error messages. +# # required ENV variables: # - SOLACE_BROKER_URL # - SOLACE_BROKER_USERNAME @@ -38,6 +40,7 @@ flows: - component_name: error_input component_module: error_input component_config: + max_queue_depth: 100 - component_name: error_logger component_module: file_output input_transforms: diff --git a/src/solace_ai_connector/components/inputs_outputs/error_input.py b/src/solace_ai_connector/components/inputs_outputs/error_input.py index 62720134..2c71fcc2 100644 --- a/src/solace_ai_connector/components/inputs_outputs/error_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/error_input.py @@ -28,6 +28,15 @@ ), "default": None, }, + { + "name": "max_queue_depth", + "required": False, + "description": ( + "Maximum number of messages that can be queued in the input queue." + "If the queue is full, the new message is dropped." + ), + "default": 1000, + }, ], "output_schema": { "type": "object", @@ -100,9 +109,11 @@ class ErrorInput(ComponentBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.max_rate = self.get_config("max_rate") + self.max_queue_depth = self.get_config("max_queue_depth") self.error_count_in_last_second = 0 self.error_count_start_time = time.time() @@ -112,7 +123,10 @@ def __init__(self, **kwargs): self.error_queue = None def invoke(self, message, data): - if self.discard_message_due_to_input_rate(): + if ( + self.discard_message_due_to_input_rate() + or self.discard_message_due_to_full_queue() + ): return None return data @@ -135,5 +149,16 @@ def discard_message_due_to_input_rate(self): return True return False + def discard_message_due_to_full_queue(self): + if self.input_queue.qsize() < self.max_queue_depth: + return False + + log.warning( + "Discarding error message due to queue size. " + "Error queue reached max queue depth of %d.", + self.max_queue_depth, + ) + return True + def get_input_data(self, message): return message.get_data("input.payload") From b335985840d721065196b0ef895ffce626afa1bf Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:11:12 -0500 Subject: [PATCH 06/19] Add a text splitter component (#76) * feat: drop error messages when the queue is full * feat: add a text splitter component * feat: updated docs * fix: return the original example --- docs/components/broker_request_response.md | 8 + docs/components/error_input.md | 2 + docs/components/index.md | 1 + docs/components/langchain_chat_model.md | 6 + .../langchain_chat_model_with_history.md | 6 + docs/components/langchain_split_text.md | 42 ++++ docs/components/litellm_chat_model.md | 8 +- .../litellm_chat_model_with_history.md | 8 +- docs/components/litellm_embeddings.md | 6 +- docs/components/openai_chat_model.md | 6 +- .../openai_chat_model_with_history.md | 4 +- examples/llm/openai_chroma_rag.yaml | 2 +- .../llm/openai_chroma_rag_with_splitter.yaml | 217 ++++++++++++++++++ .../llm/langchain/langchain_split_text.py | 90 ++++++++ 14 files changed, 393 insertions(+), 13 deletions(-) create mode 100644 docs/components/langchain_split_text.md create mode 100644 examples/llm/openai_chroma_rag_with_splitter.yaml create mode 100644 src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py diff --git a/docs/components/broker_request_response.md b/docs/components/broker_request_response.md index 30ee6167..a408e2e7 100644 --- a/docs/components/broker_request_response.md +++ b/docs/components/broker_request_response.md @@ -21,6 +21,10 @@ component_config: request_expiry_ms: streaming: streaming_complete_expression: + streaming: + streaming_complete_expression: + streaming: + streaming_complete_expression: ``` | Parameter | Required | Default | Description | @@ -38,6 +42,10 @@ component_config: | request_expiry_ms | False | 60000 | Expiry time for cached requests in milliseconds | | streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | | streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | +| streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | +| streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | +| streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | +| streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | ## Component Input Schema diff --git a/docs/components/error_input.md b/docs/components/error_input.md index de06a883..a4bb24cc 100644 --- a/docs/components/error_input.md +++ b/docs/components/error_input.md @@ -9,11 +9,13 @@ component_name: component_module: error_input component_config: max_rate: + max_queue_depth: ``` | Parameter | Required | Default | Description | | --- | --- | --- | --- | | max_rate | False | None | Maximum rate of errors to process per second. Any errors above this rate will be dropped. If not set, all errors will be processed. | +| max_queue_depth | False | 1000 | Maximum number of messages that can be queued in the input queue.If the queue is full, the new message is dropped. | diff --git a/docs/components/index.md b/docs/components/index.md index 1b3b8516..292ada2e 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -14,6 +14,7 @@ | [langchain_chat_model](langchain_chat_model.md) | Provide access to all the LangChain chat models via configuration | | [langchain_chat_model_with_history](langchain_chat_model_with_history.md) | A chat model based on LangChain that includes keeping per-session history of the conversation. Note that this component will only take the first system message and the first human message in the messages array. | | [langchain_embeddings](langchain_embeddings.md) | Provide access to all the LangChain Text Embeddings components via configuration | +| [langchain_split_text](langchain_split_text.md) | Split a long text into smaller parts using the LangChain text splitter module | | [langchain_vector_store_delete](langchain_vector_store_delete.md) | This component allows for entries in a LangChain Vector Store to be deleted. This is needed for the continued maintenance of the vector store. Due to the nature of langchain vector stores, you need to specify an embedding component even though it is not used in this component. | | [langchain_vector_store_embedding_index](langchain_vector_store_embedding_index.md) | Use LangChain Vector Stores to index text for later semantic searches. This will take text, run it through an embedding model and then store it in a vector database. | | [langchain_vector_store_embedding_search](langchain_vector_store_embedding_search.md) | Use LangChain Vector Stores to search a vector store with a semantic search. This will take text, run it through an embedding model with a query embedding and then find the closest matches in the store. | diff --git a/docs/components/langchain_chat_model.md b/docs/components/langchain_chat_model.md index b65dea59..fda44bf6 100644 --- a/docs/components/langchain_chat_model.md +++ b/docs/components/langchain_chat_model.md @@ -11,6 +11,9 @@ component_config: langchain_module: langchain_class: langchain_component_config: + llm_mode: + stream_to_flow: + stream_batch_size: llm_response_format: ``` @@ -19,6 +22,9 @@ component_config: | langchain_module | True | | The chat model module - e.g. 'langchain_openai.chat_models' | | langchain_class | True | | The chat model class to use - e.g. ChatOpenAI | | langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | +| llm_mode | False | | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md index 8686061e..e7483225 100644 --- a/docs/components/langchain_chat_model_with_history.md +++ b/docs/components/langchain_chat_model_with_history.md @@ -11,6 +11,9 @@ component_config: langchain_module: langchain_class: langchain_component_config: + llm_mode: + stream_to_flow: + stream_batch_size: llm_response_format: history_max_turns: history_max_message_size: @@ -27,6 +30,9 @@ component_config: | langchain_module | True | | The chat model module - e.g. 'langchain_openai.chat_models' | | langchain_class | True | | The chat model class to use - e.g. ChatOpenAI | | langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | +| llm_mode | False | | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | | history_max_turns | False | 20 | The maximum number of turns to keep in the history. If not set, the history will be limited to 20 turns. | | history_max_message_size | False | 1000 | The maximum amount of characters to keep in a single message in the history. | diff --git a/docs/components/langchain_split_text.md b/docs/components/langchain_split_text.md new file mode 100644 index 00000000..aa313039 --- /dev/null +++ b/docs/components/langchain_split_text.md @@ -0,0 +1,42 @@ +# LangChainTextSplitter + +Split a long text into smaller parts using the LangChain text splitter module + +## Configuration Parameters + +```yaml +component_name: +component_module: langchain_split_text +component_config: + langchain_module: + langchain_class: + langchain_component_config: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| langchain_module | True | | The text split module - e.g. 'langchain_text_splitters' | +| langchain_class | True | | The text split class to use - e.g. TokenTextSplitter | +| langchain_component_config | True | | Model specific configuration for the text splitting. See documentation for valid parameter names.https://python.langchain.com/docs/how_to/split_by_token/#nltk | + + +## Component Input Schema + +``` +{ + text: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| text | True | | + + +## Component Output Schema + +``` +[ + , + ... +] +``` diff --git a/docs/components/litellm_chat_model.md b/docs/components/litellm_chat_model.md index acd19851..e617a772 100644 --- a/docs/components/litellm_chat_model.md +++ b/docs/components/litellm_chat_model.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -31,11 +31,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -57,6 +57,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -65,6 +66,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response - overwrites llm_mode | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | diff --git a/docs/components/litellm_chat_model_with_history.md b/docs/components/litellm_chat_model_with_history.md index 29aa640c..67ca587b 100644 --- a/docs/components/litellm_chat_model_with_history.md +++ b/docs/components/litellm_chat_model_with_history.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -27,11 +27,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -49,6 +49,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -57,6 +58,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response - overwrites llm_mode | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | diff --git a/docs/components/litellm_embeddings.md b/docs/components/litellm_embeddings.md index 83930083..4e3e739e 100644 --- a/docs/components/litellm_embeddings.md +++ b/docs/components/litellm_embeddings.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -31,11 +31,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | diff --git a/docs/components/openai_chat_model.md b/docs/components/openai_chat_model.md index e41c6692..62120978 100644 --- a/docs/components/openai_chat_model.md +++ b/docs/components/openai_chat_model.md @@ -27,7 +27,7 @@ component_config: | base_url | False | None | Base URL for OpenAI API | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | @@ -42,7 +42,8 @@ component_config: content: }, ... - ] + ], + stream: } ``` | Field | Required | Description | @@ -50,6 +51,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response. It is is not provided, it will default to the value of llm_mode. | ## Component Output Schema diff --git a/docs/components/openai_chat_model_with_history.md b/docs/components/openai_chat_model_with_history.md index 9c7c4dc3..ce306a5b 100644 --- a/docs/components/openai_chat_model_with_history.md +++ b/docs/components/openai_chat_model_with_history.md @@ -29,7 +29,7 @@ component_config: | base_url | False | None | Base URL for OpenAI API | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -47,6 +47,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -55,6 +56,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response. It is is not provided, it will default to the value of llm_mode. | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | diff --git a/examples/llm/openai_chroma_rag.yaml b/examples/llm/openai_chroma_rag.yaml index f78bfc09..1e0f0385 100644 --- a/examples/llm/openai_chroma_rag.yaml +++ b/examples/llm/openai_chroma_rag.yaml @@ -180,4 +180,4 @@ flows: source_expression: template:{{text://input.topic}}/response dest_expression: user_data.output:topic input_selection: - source_expression: user_data.output + source_expression: user_data.output \ No newline at end of file diff --git a/examples/llm/openai_chroma_rag_with_splitter.yaml b/examples/llm/openai_chroma_rag_with_splitter.yaml new file mode 100644 index 00000000..57c29d55 --- /dev/null +++ b/examples/llm/openai_chroma_rag_with_splitter.yaml @@ -0,0 +1,217 @@ +# OpenAI RAG (Retrieval Augmented Generation) example using ChromaDB +# This will create 2 flows like these: +# +# Solace[topic:demo/rag/data] -> embed and store in ChromaDB +# Solace[topic:demo/rag/query] -> search in ChromaDB -> OpenAI -> Solace[topic:demo/rag/query/response] +# +# Load Data: +# Send data to Solace topic `demo/rag/data` with the following payload format: +# { +# "text": text +# } +# +# RAG Query: +# Send query to Solace topic `demo/rag/query` with the following payload format: +# { +# "query": "" +# } +# The response will be sent to Solace topic `demo/rag/query/response` +# +# Dependencies: +# pip install -U langchain_openai openai chromadb langchain-chroma +# +# Required ENV variables: +# - OPENAI_API_KEY +# - OPENAI_API_ENDPOINT +# - OPENAI_EMBEDDING_MODEL_NAME +# - OPENAI_MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +# Data ingestion and augmented inference flows +flows: + # Data ingestion to chromaDB for RAG + - name: chroma_ingest + components: + # Data Input from a Solace broker for ingestion + - component_name: solace_data_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_rag_data + broker_subscriptions: + - topic: demo/rag/data + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Split text + - component_name: text_splitter + component_module: langchain_split_text + component_config: + langchain_module: langchain_text_splitters + langchain_class: TokenTextSplitter + langchain_component_config: + chunk_size: 10 + chunk_overlap: 1 + input_transforms: + - type: copy + source_expression: input.payload:text + dest_expression: user_data.input:text + input_selection: + source_expression: user_data.input + + # Embedding data & ChromaDB ingest + - component_name: chroma_embed + component_module: langchain_vector_store_embedding_index + component_config: + vector_store_component_path: langchain_chroma + vector_store_component_name: Chroma + vector_store_component_config: + persist_directory: ./chroma_data + collection_name: rag + embedding_component_path: langchain_openai + embedding_component_name: OpenAIEmbeddings + embedding_component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_EMBEDDING_MODEL_NAME} + input_transforms: + - type: copy + source_value: topic:demo/rag/data + dest_expression: user_data.vector_input:metadatas.source + - type: copy + source_expression: previous + dest_expression: user_data.vector_input:texts + input_selection: + source_expression: user_data.vector_input + + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:demo/rag/response + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output + + # RAG Inference flow + - name: OpenAI_RAG + components: + # Inference Input from a Solace broker for completion + - component_name: solace_completion_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_rag_query + broker_subscriptions: + - topic: demo/rag/query + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Retrieve the top-k documents from ChromaDB + - component_name: chroma_search + component_module: langchain_vector_store_embedding_search + component_config: + vector_store_component_path: langchain_chroma + vector_store_component_name: Chroma + vector_store_component_config: + persist_directory: ./chroma_data + collection_name: rag + embedding_component_path: langchain_openai + embedding_component_name: OpenAIEmbeddings + embedding_component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_EMBEDDING_MODEL_NAME} + max_results: 5 + input_transforms: + - type: copy + source_expression: input.payload:query + dest_expression: user_data.vector_input:text + input_selection: + source_expression: user_data.vector_input + + # Generate response using the retrieved data + - component_name: llm_request + component_module: openai_chat_model + component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_MODEL_NAME} + temperature: 0.01 + input_transforms: + # Extract and format the retrieved data + - type: map + source_list_expression: previous:result + source_expression: | + template:{{text://item:text}}\n\n + dest_list_expression: user_data.retrieved_data + + - type: copy + source_expression: | + template:You are a helpful AI assistant. Using the provided context, help with the user's request below. Refrain to use any knowledge outside from the provided context. If the user query can not be answered using the provided context, reject user's query. + + + {{text://user_data.retrieved_data}} + + + + {{text://input.payload:query}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + input_selection: + source_expression: user_data.llm_input + + # Send response back to broker with completion and retrieved data + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous:content + dest_expression: user_data.output:payload.response + - type: copy + source_expression: input.payload:query + dest_expression: user_data.output:payload.query + - type: copy + source_expression: user_data.retrieved_data + dest_expression: user_data.output:payload.retrieved_data + - type: copy + source_expression: template:{{text://input.topic}}/response + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py new file mode 100644 index 00000000..c5eac049 --- /dev/null +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py @@ -0,0 +1,90 @@ +# This component splits a long text into smaller parts using the LangChain text splitter module + +from .....common.log import log + +from .langchain_base import ( + LangChainBase, +) + + +info = { + "class_name": "LangChainTextSplitter", + "description": "Split a long text into smaller parts using the LangChain text splitter module", + "config_parameters": [ + { + "name": "langchain_module", + "required": True, + "description": "The text split module - e.g. 'langchain_text_splitters'", + }, + { + "name": "langchain_class", + "required": True, + "description": "The text split class to use - e.g. TokenTextSplitter", + }, + { + "name": "langchain_component_config", + "required": True, + "description": "Model specific configuration for the text splitting. " + "See documentation for valid parameter names." + "https://python.langchain.com/docs/how_to/split_by_token/#nltk", + }, + ], + "input_schema": { + "type": "object", + "properties": { + "text": { + "type": "string", + }, + }, + "required": ["text"], + }, + "output_schema": { + "type": "array", + "items": { + "type": "string", + }, + "description": ("A list of the split text"), + }, +} + + +class LangChainTextSplitter(LangChainBase): + """ + A class to split a long text into smaller parts using the LangChain text splitter module. + + This class inherits from LangChainBase and utilizes the LangChain text splitter module + to divide a given text into smaller segments based on the specified configuration. + """ + + def __init__(self, **kwargs): + """ + Initialize the LangChainTextSplitter with the provided configuration. + + Args: + **kwargs: Arbitrary keyword arguments containing configuration parameters. + """ + super().__init__(info, **kwargs) + + def invoke(self, message, data): + """ + Split the provided text into smaller parts using the LangChain text splitter module. + + Args: + message (Message): The message object containing metadata. + data (dict): A dictionary containing the input text to be split. + + Returns: + list: A list of strings representing the split text segments. + """ + if "text" not in data: + log.error("Text not provided in input data") + return [] + + try: + text = data.get("text") + texts = self.component.split_text(text) + log.debug(f"Split text: {texts}") + return texts + except Exception as e: + log.error(f"Error splitting text: {str(e)}") + return [] From f04ad515d9f2d5c4eb820b0b4fb492ebcd6090fa Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Wed, 11 Dec 2024 10:38:45 -0500 Subject: [PATCH 07/19] AI-354: Add configuration for broker-request-response for placing the reply topic in the message (#74) * If requested, insert the response topic according to the response_topic_insertion_expression * More fixes after testing --- .../components/component_base.py | 11 ++--- .../inputs_outputs/broker_request_response.py | 48 +++++++++++++++++-- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 63dd8fb3..1930df09 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -304,14 +304,9 @@ def setup_broker_request_response(self): "request_expiry_ms": request_expiry_ms, } - if "response_topic_prefix" in self.broker_request_response_config: - rrc_config["response_topic_prefix"] = self.broker_request_response_config[ - "response_topic_prefix" - ] - if "response_queue_prefix" in self.broker_request_response_config: - rrc_config["response_queue_prefix"] = self.broker_request_response_config[ - "response_queue_prefix" - ] + for key in ["response_topic_prefix", "response_queue_prefix", "response_topic_insertion_expression"]: + if key in self.broker_request_response_config: + rrc_config[key] = self.broker_request_response_config[key] self.broker_request_response_controller = RequestResponseFlowController( config=rrc_config, connector=self.connector diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index 55b004e5..2fdb05fc 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -6,14 +6,11 @@ import queue from copy import deepcopy -# from typing import Dict, Any - from ...common.log import log from .broker_base import BrokerBase from ...common.message import Message from ...common.utils import ensure_slash_on_end, ensure_slash_on_start -# from ...common.event import Event, EventType info = { "class_name": "BrokerRequestResponse", @@ -72,6 +69,21 @@ "description": "Suffix for reply topics", "default": "", }, + { + "name": "response_topic_insertion_expression", + "required": False, + "description": ( + "Expression to insert the reply topic into the " + "request message. " + "If not set, the reply topic will only be added to the " + "request_response_metadata. The expression uses the " + "same format as other data expressions: " + "(e.g input.payload:myObj.replyTopic). " + "If there is no object type in the expression, " + "it will default to 'input.payload'." + ), + "default": "", + }, { "name": "response_queue_prefix", "required": False, @@ -183,6 +195,7 @@ class BrokerRequestResponse(BrokerBase): + """Request-Response broker component for the Solace AI Event Connector""" def __init__(self, **kwargs): super().__init__(info, **kwargs) @@ -220,6 +233,15 @@ def __init__(self, **kwargs): ] self.test_mode = False + self.response_topic_insertion_expression = self.get_config( + "response_topic_insertion_expression" + ) + if self.response_topic_insertion_expression: + if ":" not in self.response_topic_insertion_expression: + self.response_topic_insertion_expression = ( + f"input.payload:{self.response_topic_insertion_expression}" + ) + if self.broker_type == "test" or self.broker_type == "test_streaming": self.test_mode = True self.setup_test_pass_through() @@ -283,6 +305,10 @@ def process_response(self, broker_message): topic = broker_message.get("topic") user_properties = broker_message.get("user_properties", {}) + if not user_properties: + log.error("Received response without user properties: %s", payload) + return + streaming_complete_expression = None metadata_json = user_properties.get( "__solace_ai_connector_broker_request_reply_metadata__" @@ -416,7 +442,21 @@ def invoke(self, message, data): ] = json.dumps(metadata) data["user_properties"][ "__solace_ai_connector_broker_request_response_topic__" - ] = self.response_topic + ] = topic + + # If we are configured to also insert the response topic into the request message + # then create a temporary message to do so + if self.response_topic_insertion_expression: + tmp_message = Message( + payload=data["payload"], + user_properties=data["user_properties"], + topic=data["topic"], + ) + tmp_message.set_data( + self.response_topic_insertion_expression, self.response_topic + ) + data["payload"] = tmp_message.get_payload() + data["user_properties"] = tmp_message.get_user_properties() if self.test_mode: if self.broker_type == "test_streaming": From 27fe0215b9795c6d1333abd4fb7aa63cd8892b5a Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 12 Dec 2024 15:14:50 -0500 Subject: [PATCH 08/19] Add a traceback if there is an error during startup (#73) * FEATURE: Enable stream overwrite for LLM Chat at the event level (#66) * Allowing stream overwrite at event level for LLM Chat * Added overwrite flag * AI-95: Enhance request/response handling for streaming LLM access (#69) * Changes for request/response for streaming LLM access * Updated with main * update --------- Co-authored-by: Edward Funnekotter * Include stack dump if there is an error on startup --------- Co-authored-by: Art Morozov Co-authored-by: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> --- src/solace_ai_connector/solace_ai_connector.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index ec2d5f07..0153621f 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -2,6 +2,7 @@ import threading import queue +import traceback from datetime import datetime from typing import List @@ -47,6 +48,7 @@ def run(self): log.info("Solace AI Event Connector started successfully") except Exception as e: log.error("Error during Solace AI Event Connector startup: %s", str(e)) + log.error("Traceback: %s", traceback.format_exc()) self.stop() self.cleanup() raise From 299c04a9b20250cb5461920ee56d8708ecbf0bd8 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Fri, 20 Dec 2024 09:56:36 -0500 Subject: [PATCH 09/19] JDE: Add MongoDB insert component. (#78) * Added mongodb insert component * type * added search component * applied comments * updated docs --- docs/components/index.md | 3 + docs/components/mongo_base.md | 42 ++++ docs/components/mongo_insert.md | 42 ++++ docs/components/mongo_search.md | 42 ++++ pyproject.toml | 1 + src/solace_ai_connector/common/utils.py | 1 + .../components/general/db/__init__.py | 0 .../components/general/db/mongo/__init__.py | 0 .../components/general/db/mongo/mongo_base.py | 88 +++++++++ .../general/db/mongo/mongo_handler.py | 179 ++++++++++++++++++ .../general/db/mongo/mongo_insert.py | 21 ++ .../general/db/mongo/mongo_search.py | 38 ++++ 12 files changed, 457 insertions(+) create mode 100644 docs/components/mongo_base.md create mode 100644 docs/components/mongo_insert.md create mode 100644 docs/components/mongo_search.md create mode 100644 src/solace_ai_connector/components/general/db/__init__.py create mode 100644 src/solace_ai_connector/components/general/db/mongo/__init__.py create mode 100644 src/solace_ai_connector/components/general/db/mongo/mongo_base.py create mode 100644 src/solace_ai_connector/components/general/db/mongo/mongo_handler.py create mode 100644 src/solace_ai_connector/components/general/db/mongo/mongo_insert.py create mode 100644 src/solace_ai_connector/components/general/db/mongo/mongo_search.py diff --git a/docs/components/index.md b/docs/components/index.md index 292ada2e..ba760a0b 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -22,6 +22,9 @@ | [litellm_chat_model_with_history](litellm_chat_model_with_history.md) | LiteLLM model handler component with conversation history | | [litellm_embeddings](litellm_embeddings.md) | Embed text using a LiteLLM model | | [message_filter](message_filter.md) | A filtering component. This will apply a user configurable expression. If the expression evaluates to True, the message will be passed on. If the expression evaluates to False, the message will be discarded. If the message is discarded, any previous components that require an acknowledgement will be acknowledged. | +| [mongo_base](mongo_base.md) | Base MongoDB database component | +| [mongo_insert](mongo_insert.md) | Inserts data into a MongoDB database. | +| [mongo_search](mongo_search.md) | Searches a MongoDB database. | | [openai_chat_model](openai_chat_model.md) | OpenAI chat model component | | [openai_chat_model_with_history](openai_chat_model_with_history.md) | OpenAI chat model component with conversation history | | [parser](parser.md) | Parse input from the given type to output type. | diff --git a/docs/components/mongo_base.md b/docs/components/mongo_base.md new file mode 100644 index 00000000..d0e977b5 --- /dev/null +++ b/docs/components/mongo_base.md @@ -0,0 +1,42 @@ +# MongoDBBaseComponent + +Base MongoDB database component + +## Configuration Parameters + +```yaml +component_name: +component_module: mongo_base +component_config: + database_host: + database_port: + database_user: + database_password: + database_name: + database_collection: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| database_host | True | | MongoDB host | +| database_port | True | | MongoDB port | +| database_user | False | | MongoDB user | +| database_password | False | | MongoDB password | +| database_name | True | | Database name | +| database_collection | False | | Collection name - if not provided, all collections will be used | + + +## Component Input Schema + +``` +{ + collection: , + query: { + + } +} +``` +| Field | Required | Description | +| --- | --- | --- | +| collection | False | The collection to search in. | +| query | False | The query pipeline to execute. if string is provided, it will be converted to JSON. | diff --git a/docs/components/mongo_insert.md b/docs/components/mongo_insert.md new file mode 100644 index 00000000..23b18be8 --- /dev/null +++ b/docs/components/mongo_insert.md @@ -0,0 +1,42 @@ +# MongoDBInsertComponent + +Inserts data into a MongoDB database. + +## Configuration Parameters + +```yaml +component_name: +component_module: mongo_insert +component_config: + database_host: + database_port: + database_user: + database_password: + database_name: + database_collection: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| database_host | True | | MongoDB host | +| database_port | True | | MongoDB port | +| database_user | False | | MongoDB user | +| database_password | False | | MongoDB password | +| database_name | True | | Database name | +| database_collection | False | | Collection name - if not provided, all collections will be used | + + +## Component Input Schema + +``` +{ + collection: , + query: { + + } +} +``` +| Field | Required | Description | +| --- | --- | --- | +| collection | False | The collection to search in. | +| query | False | The query pipeline to execute. if string is provided, it will be converted to JSON. | diff --git a/docs/components/mongo_search.md b/docs/components/mongo_search.md new file mode 100644 index 00000000..f27b6cab --- /dev/null +++ b/docs/components/mongo_search.md @@ -0,0 +1,42 @@ +# MongoDBSearchComponent + +Searches a MongoDB database. + +## Configuration Parameters + +```yaml +component_name: +component_module: mongo_search +component_config: + database_host: + database_port: + database_user: + database_password: + database_name: + database_collection: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| database_host | True | | MongoDB host | +| database_port | True | | MongoDB port | +| database_user | False | | MongoDB user | +| database_password | False | | MongoDB password | +| database_name | True | | Database name | +| database_collection | False | | Collection name - if not provided, all collections will be used | + + +## Component Input Schema + +``` +{ + collection: , + query: { + + } +} +``` +| Field | Required | Description | +| --- | --- | --- | +| collection | False | The collection to search in. | +| query | False | The query pipeline to execute. if string is provided, it will be converted to JSON. | diff --git a/pyproject.toml b/pyproject.toml index e24e4284..9f8820e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", + "pymongo~=4.10.1", ] [project.urls] diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index aaf9ee4f..19bbf3fd 100755 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -128,6 +128,7 @@ def import_module(module, base_path=None, component_package=None): ".components.general.llm.langchain", ".components.general.llm.openai", ".components.general.llm.litellm", + ".components.general.db.mongo", ".components.general.websearch", ".components.inputs_outputs", ".transforms", diff --git a/src/solace_ai_connector/components/general/db/__init__.py b/src/solace_ai_connector/components/general/db/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/solace_ai_connector/components/general/db/mongo/__init__.py b/src/solace_ai_connector/components/general/db/mongo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/solace_ai_connector/components/general/db/mongo/mongo_base.py b/src/solace_ai_connector/components/general/db/mongo/mongo_base.py new file mode 100644 index 00000000..fec56c22 --- /dev/null +++ b/src/solace_ai_connector/components/general/db/mongo/mongo_base.py @@ -0,0 +1,88 @@ +"""MongoDB Agent Component for handling database search.""" + +from .mongo_handler import MongoHandler +from ....component_base import ComponentBase + + +info = { + "class_name": "MongoDBBaseComponent", + "description": "Base MongoDB database component", + "config_parameters": [ + { + "name": "database_host", + "required": True, + "description": "MongoDB host", + "type": "string", + }, + { + "name": "database_port", + "required": True, + "description": "MongoDB port", + "type": "integer", + }, + { + "name": "database_user", + "required": False, + "description": "MongoDB user", + "type": "string", + }, + { + "name": "database_password", + "required": False, + "description": "MongoDB password", + "type": "string", + }, + { + "name": "database_name", + "required": True, + "description": "Database name", + "type": "string", + }, + { + "name": "database_collection", + "required": False, + "description": "Collection name - if not provided, all collections will be used", + }, + ], + "input_schema": { + "type": "object", + "properties": { + "collection": { + "type": "string", + "description": "The collection to search in.", + }, + "query": { + "type": "object", + "description": "The query pipeline to execute. if string is provided, it will be converted to JSON.", + } + }, + }, +} + + +class MongoDBBaseComponent(ComponentBase): + """Component for handling MongoDB database operations.""" + + def __init__(self, module_info, **kwargs): + """Initialize the MongoDB component. + + Args: + **kwargs: Additional keyword arguments. + + Raises: + ValueError: If required database configuration is missing. + """ + super().__init__(module_info, **kwargs) + + # Initialize MongoDB handler + self.db_handler = MongoHandler( + self.get_config("database_host"), + self.get_config("database_port"), + self.get_config("database_user"), + self.get_config("database_password"), + self.get_config("database_collection"), + self.get_config("database_name"), + ) + + def invoke(self, message, data): + pass diff --git a/src/solace_ai_connector/components/general/db/mongo/mongo_handler.py b/src/solace_ai_connector/components/general/db/mongo/mongo_handler.py new file mode 100644 index 00000000..29716c76 --- /dev/null +++ b/src/solace_ai_connector/components/general/db/mongo/mongo_handler.py @@ -0,0 +1,179 @@ +"""MongoDB database handler for MongoDB agent.""" + +from pymongo import MongoClient +from typing import List, Dict, Any, Tuple +import threading + +from .....common.log import log + + +class MongoHandler: + """Handler for MongoDB database operations.""" + + def __init__(self, host, port, user, password, collection, database_name): + """Initialize the MongoDB handler. + + Args: + host: MongoDB host + port: MongoDB port + user: MongoDB user + password: MongoDB password + collection: Collection name + database_name: Database name + """ + self.host = host + self.port = port + self.user = user + self.password = password + self.collection = collection + self.database_name = database_name + self.local = threading.local() + + def get_connection(self): + """Get or create a thread-local database connection.""" + if not hasattr(self.local, 'client'): + try: + if self.user and self.password: + connection_string = f"mongodb://{self.user}:{self.password}@{self.host}:{self.port}" + else: + connection_string = f"mongodb://{self.host}:{self.port}" + + self.local.client = MongoClient(connection_string) + self.local.db = self.local.client[self.database_name] + log.info("Successfully connected to MongoDB database") + except Exception as e: + log.error("Error connecting to MongoDB database: %s", str(e)) + raise + return self.local.db + + def insert_documents(self, documents: List[Dict[str, Any]], collection: str = None) -> List[str]: + if not documents: + return [] + if not collection: + log.debug("No collection specified, using default collection: %s", self.collection) + collection = self.collection + if not isinstance(documents, dict) and not isinstance(documents, list): + log.error("Documents must be a dictionary or list of dictionaries") + raise ValueError("Documents must be a dictionary or list of dictionaries") + if isinstance(documents, dict): + documents = [documents] + if not documents or not isinstance(documents[0], dict): + log.error("Documents must be a dictionary or list of dictionaries") + raise ValueError("Documents must be a dictionary or list of dictionaries") + db = self.get_connection() + result = db[collection].insert_many(documents) + log.debug("Successfully inserted %d documents into %s", len(result.inserted_ids), collection) + return result.inserted_ids + + def execute_query(self, collection: str, pipeline: List[Dict]) -> List[Dict[str, Any]]: + """Execute an aggregation pipeline on MongoDB. + + Args: + collection: Name of the collection to query + pipeline: List of aggregation pipeline stages + + Returns: + List of dictionaries containing the query results. + + Raises: + Exception: If there's an error executing the pipeline. + ValueError: If pipeline is not a valid aggregation pipeline. + """ + if not isinstance(pipeline, list): + raise ValueError("Pipeline must be a list of aggregation stages") + + # Validate each pipeline stage + for stage in pipeline: + if not isinstance(stage, dict) or not stage: + log.error("Each pipeline stage must be a non-empty dictionary") + raise ValueError("Each pipeline stage must be a non-empty dictionary") + if not any(key.startswith('$') for key in stage.keys()): + log.error("Invalid pipeline stage: %s. Each stage must start with '$'", stage) + raise ValueError(f"Invalid pipeline stage: {stage}. Each stage must start with '$'") + + try: + db = self.get_connection() + if not collection: + log.debug("No collection specified, using default collection: %s", self.collection) + collection = self.collection + cursor = db[collection].aggregate(pipeline) + result = list(cursor) + result = self._remove_object_ids(result) + return result + except Exception as e: + log.error("Error executing MongoDB query: %s", str(e)) + raise Exception(f"Failed to execute MongoDB query: {str(e)}") + + def get_collections(self) -> List[str]: + """Get all collection names in the database. + + Returns: + List of collection names. + """ + db = self.get_connection() + return db.list_collection_names() + + def get_fields(self, collection: str) -> List[str]: + """Get all field names for a given collection. + + Args: + collection: Name of the collection. + + Returns: + List of field names. + """ + db = self.get_connection() + # Sample a few documents to get field names + pipeline = [ + {"$sample": {"size": 100}}, + {"$project": {"arrayofkeyvalue": {"$objectToArray": "$$ROOT"}}}, + {"$unwind": "$arrayofkeyvalue"}, + {"$group": {"_id": None, "allkeys": {"$addToSet": "$arrayofkeyvalue.k"}}} + ] + result = list(db[collection].aggregate(pipeline)) + if result: + # Remove _id from fields list as it's always present + fields = [f for f in result[0]["allkeys"] if f != "_id"] + return sorted(fields) + return [] + + def get_sample_values(self, collection: str, field: str, min: int = 3, max: int = 10) -> Tuple[List[str], bool]: + """Get unique sample values for a given field in a collection. If the number of unique values is less than + the maximum, return all unique values. Otherwise, return a random sample of unique values up to the manimum. + + Args: + collection: Name of the collection. + field: Name of the field. + limit: Maximum number of unique values to return. + + Returns: + List of unique sample values as strings, + and a boolean indicating whether all unique values were returned. + """ + db = self.get_connection() + pipeline = [ + {"$match": {field: {"$exists": True}}}, + {"$group": {"_id": f"${field}"}}, + {"$sample": {"size": max+1}}, + {"$project": {"value": "$_id", "_id": 0}} + ] + + results = list(db[collection].aggregate(pipeline)) + if len(results) > max: + return [str(result["value"]) for result in results[:min]], False + + return [str(result["value"]) for result in results], True + + def _remove_object_ids(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Remove the _id field from a list of MongoDB documents. + + Args: + results: List of MongoDB documents. + + Returns: + List of MongoDB documents with the _id field removed. + """ + for result in results: + if "_id" in result: + del result["_id"] + return results diff --git a/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py b/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py new file mode 100644 index 00000000..351b9657 --- /dev/null +++ b/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py @@ -0,0 +1,21 @@ +"""MongoDB Agent Component for handling database insert.""" + +from .mongo_base import MongoDBBaseComponent, info as base_info + +info = base_info.copy() +info["class_name"] = "MongoDBInsertComponent" +info["description"] = "Inserts data into a MongoDB database." + + +class MongoDBInsertComponent(MongoDBBaseComponent): + """Component for handling MongoDB database operations.""" + + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + + def invoke(self, message, data): + if not data: + raise ValueError( + "Invalid data provided for MongoDB insert. Expected a dictionary or a list of dictionary." + ) + return self.db_handler.insert_documents(data) diff --git a/src/solace_ai_connector/components/general/db/mongo/mongo_search.py b/src/solace_ai_connector/components/general/db/mongo/mongo_search.py new file mode 100644 index 00000000..bdbc0e94 --- /dev/null +++ b/src/solace_ai_connector/components/general/db/mongo/mongo_search.py @@ -0,0 +1,38 @@ +"""MongoDB Agent Component for handling database search.""" + +import json + +from .mongo_base import MongoDBBaseComponent, info as base_info + +info = base_info.copy() +info["class_name"] = "MongoDBSearchComponent" +info["description"] = "Searches a MongoDB database." +info["input_schema"] = { + "type": "object", + "properties": { + "collection": { + "type": "string", + "description": "The collection to search in.", + }, + "query": { + "type": "object", + "description": "The query pipeline to execute. if string is provided, it will be converted to JSON.", + }, + }, +} + + +class MongoDBSearchComponent(MongoDBBaseComponent): + """Component for handling MongoDB database operations.""" + + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + + def invoke(self, message, data): + collection = data.get("collection") + query = data.get("query") + if not query: + raise ValueError("No query provided") + if isinstance(query, str): + query = json.loads(query) + return self.db_handler.execute_query(collection, query) From 8c42f7e8668f713bc0a28a3295882bd476d70797 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Fri, 3 Jan 2025 14:23:30 -0500 Subject: [PATCH 10/19] REQUEST-RESPONSE: Support custom keys for reply and metadata in request response user properties (#79) * Added the option to support custom keys for reply and metadata for request reponse user properties * fixed issue --- examples/request_reply.yaml | 5 +- pyproject.toml | 2 +- src/solace_ai_connector/common/message.py | 120 +----------- src/solace_ai_connector/common/utils.py | 182 ++++++++++++++++++ .../components/component_base.py | 10 +- .../inputs_outputs/broker_request_response.py | 82 ++++---- 6 files changed, 248 insertions(+), 153 deletions(-) diff --git a/examples/request_reply.yaml b/examples/request_reply.yaml index a6fb75cc..0480d700 100644 --- a/examples/request_reply.yaml +++ b/examples/request_reply.yaml @@ -33,6 +33,9 @@ flows: component_config: <<: *broker_connection request_expiry_ms: 30000 # 30 seconds + user_properties_reply_topic_key: :response.user.topic # nested with : + user_properties_reply_metadata_key: response.user.metadata # string literal + input_transforms: - type: copy source_expression: input.payload @@ -77,7 +80,7 @@ flows: source_expression: input.user_properties dest_expression: user_data.output:user_properties - type: copy - source_expression: input.user_properties:__solace_ai_connector_broker_request_response_topic__ + source_expression: input.user_properties:response.user.topic dest_expression: user_data.output:topic input_selection: source_expression: user_data.output diff --git a/pyproject.toml b/pyproject.toml index 9f8820e4..5d6a4a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "PyYAML~=6.0.1", "Requests~=2.32.3", "solace_pubsubplus>=1.8.0", - "litellm~=1.51.3", + "litellm>=1.51.3", "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", diff --git a/src/solace_ai_connector/common/message.py b/src/solace_ai_connector/common/message.py index 73c0fbf6..45c1ce30 100644 --- a/src/solace_ai_connector/common/message.py +++ b/src/solace_ai_connector/common/message.py @@ -8,7 +8,7 @@ from .log import log from .trace_message import TraceMessage - +from .utils import set_data_value, get_data_value class Message: def __init__(self, payload=None, topic=None, user_properties=None): @@ -59,7 +59,7 @@ def get_data(self, expression, calling_object=None, data_type=None): if expression.startswith("static:"): return expression.split(":", 1)[1] data_object = self.get_data_object(expression, calling_object=calling_object) - data = self.get_data_value(data_object, expression) + data = get_data_value(data_object, expression) if data_type: data = self.convert_data_type(data, data_type) @@ -89,7 +89,7 @@ def set_data(self, expression, value): create_if_not_exists=True, create_value={} if not first_part.isdigit() else [], ) - self.set_data_value(data_object, expression, value) + set_data_value(data_object, expression, value) def get_data_object( self, @@ -158,120 +158,6 @@ def set_data_object(self, expression, value): f"Unknown data type '{data_type}' in expression '{expression}'" ) - def get_data_value(self, data_object, expression): - if ":" not in expression: - return data_object - - # If the data_object is a value, return it - if ( - not isinstance(data_object, dict) - and not isinstance(data_object, list) - and not isinstance(data_object, object) - ): - return data_object - - data_name = expression.split(":")[1] - - if data_name == "": - return data_object - - # Split the data_name by dots to get the path - path_parts = data_name.split(".") - - # Start with the entire data_object - current_data = data_object - - # Traverse the path - for part in path_parts: - # If the current data is a dictionary, get the value with the key 'part' - if isinstance(current_data, dict): - current_data = current_data.get(part) - # If the current data is a list and 'part' is a number, get the value at - # the index 'part' - elif isinstance(current_data, list) and part.isdigit(): - current_data = current_data[int(part)] - # If the current data is neither a dictionary nor a list, or if 'part' is - # not a number, return None - elif isinstance(current_data, object): - current_data = getattr(current_data, part, None) - else: - raise ValueError( - f"Could not get data value for expression '{expression}' - data " - "is not a dictionary or list" - ) - - # If at any point we get None, stop and return None - if current_data is None: - return None - - # Return the final data - return current_data - - # Similar to get_data_value, we need to use the expression to find the place to set the value - # except that we will create objects along the way if they don't exist - def set_data_value(self, data_object, expression, value): - data_name = expression.split(":")[1] - - # It is an error if the data_object is None or not a dictionary or list - if data_object is None: - raise ValueError( - f"Could not set data value for expression '{expression}' - data_object is None" - ) - if not isinstance(data_object, dict) and not isinstance(data_object, list): - raise ValueError( - f"Could not set data value for expression '{expression}' - data_object " - "is not a dictionary or list" - ) - - # It is an error if the data_name is empty - if data_name == "": - raise ValueError( - f"Could not set data value for expression '{expression}' - data_name is empty" - ) - - # Split the data_name by dots to get the path - path_parts = data_name.split(".") - - # Start with the entire data_object - current_data = data_object - - # Traverse the path - for i, part in enumerate(path_parts): - # If we're at the last part of the path, set the value - if i == len(path_parts) - 1: - if isinstance(current_data, dict): - current_data[part] = value - elif isinstance(current_data, list) and part.isdigit(): - while len(current_data) <= int(part): - current_data.append(None) - current_data[int(part)] = value - else: - log.error( - "Could not set data value for expression '%s' - " - "data is not a dictionary or list", - expression, - ) - # If we're not at the last part of the path, move to the next part - else: - next_part_is_digit = path_parts[i + 1].isdigit() - if isinstance(current_data, dict): - current_data = current_data.setdefault( - part, [] if next_part_is_digit else {} - ) - elif isinstance(current_data, list) and part.isdigit(): - while len(current_data) <= int(part): - current_data.append(None) - if current_data[int(part)] is None: - current_data[int(part)] = [] if next_part_is_digit else {} - current_data = current_data[int(part)] - else: - log.error( - "Could not set data value for expression '%s' - data " - "is not a dictionary or list", - expression, - ) - return - def set_iteration_data(self, item, index): self.iteration_data["item"] = item self.iteration_data["index"] = index diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 19bbf3fd..5e5341ab 100755 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -390,3 +390,185 @@ def decode_payload(payload, encoding, payload_format): payload = yaml.safe_load(payload) return payload + + +def get_data_value(data_object, expression, resolve_none_colon=False): + # If the data_object is a value, return it + if ( + not isinstance(data_object, dict) + and not isinstance(data_object, list) + and not isinstance(data_object, object) + ): + return data_object + + if ":" not in expression: + if resolve_none_colon: + return (data_object or {}).get(expression) + else: + return data_object + + data_name = expression.split(":")[1] + + if data_name == "": + return data_object + + # Split the data_name by dots to get the path + path_parts = data_name.split(".") + + # Start with the entire data_object + current_data = data_object + + # Traverse the path + for part in path_parts: + # If the current data is a dictionary, get the value with the key 'part' + if isinstance(current_data, dict): + current_data = current_data.get(part) + # If the current data is a list and 'part' is a number, get the value at + # the index 'part' + elif isinstance(current_data, list) and part.isdigit(): + current_data = current_data[int(part)] + # If the current data is neither a dictionary nor a list, or if 'part' is + # not a number, return None + elif isinstance(current_data, object): + current_data = getattr(current_data, part, None) + else: + raise ValueError( + f"Could not get data value for expression '{expression}' - data " + "is not a dictionary or list" + ) + + # If at any point we get None, stop and return None + if current_data is None: + return None + + # Return the final data + return current_data + +# Similar to get_data_value, we need to use the expression to find the place to set the value +# except that we will create objects along the way if they don't exist +def set_data_value(data_object, expression, value): + if ":" not in expression: + data_object[expression] = value + return + + data_name = expression.split(":")[1] + + # It is an error if the data_object is None or not a dictionary or list + if data_object is None: + raise ValueError( + f"Could not set data value for expression '{expression}' - data_object is None" + ) + if not isinstance(data_object, dict) and not isinstance(data_object, list): + raise ValueError( + f"Could not set data value for expression '{expression}' - data_object " + "is not a dictionary or list" + ) + + # It is an error if the data_name is empty + if data_name == "": + raise ValueError( + f"Could not set data value for expression '{expression}' - data_name is empty" + ) + + # Split the data_name by dots to get the path + path_parts = data_name.split(".") + + # Start with the entire data_object + current_data = data_object + + # Traverse the path + for i, part in enumerate(path_parts): + # If we're at the last part of the path, set the value + if i == len(path_parts) - 1: + if isinstance(current_data, dict): + current_data[part] = value + elif isinstance(current_data, list) and part.isdigit(): + while len(current_data) <= int(part): + current_data.append(None) + current_data[int(part)] = value + else: + log.error( + "Could not set data value for expression '%s' - " + "data is not a dictionary or list", + expression, + ) + # If we're not at the last part of the path, move to the next part + else: + next_part_is_digit = path_parts[i + 1].isdigit() + if isinstance(current_data, dict): + current_data = current_data.setdefault( + part, [] if next_part_is_digit else {} + ) + elif isinstance(current_data, list) and part.isdigit(): + while len(current_data) <= int(part): + current_data.append(None) + if current_data[int(part)] is None: + current_data[int(part)] = [] if next_part_is_digit else {} + current_data = current_data[int(part)] + else: + log.error( + "Could not set data value for expression '%s' - data " + "is not a dictionary or list", + expression, + ) + return + +def remove_data_value(data_object, expression): + if ":" not in expression: + data_object.pop(expression, None) + return + + data_name = expression.split(":")[1] + + # It is an error if the data_object is None or not a dictionary or list + if data_object is None: + raise ValueError( + f"Could not remove data value for expression '{expression}' - data_object is None" + ) + if not isinstance(data_object, dict) and not isinstance(data_object, list): + raise ValueError( + f"Could not remove data value for expression '{expression}' - data_object " + "is not a dictionary or list" + ) + + # It is an error if the data_name is empty + if data_name == "": + raise ValueError( + f"Could not remove data value for expression '{expression}' - data_name is empty" + ) + + # Split the data_name by dots to get the path + path_parts = data_name.split(".") + + # Start with the entire data_object + current_data = data_object + + # Traverse the path + for i, part in enumerate(path_parts): + # If we're at the last part of the path, remove the value + if i == len(path_parts) - 1: + if isinstance(current_data, dict): + current_data.pop(part, None) + elif isinstance(current_data, list) and part.isdigit(): + if len(current_data) > int(part): + current_data.pop(int(part)) + else: + log.error( + "Could not remove data value for expression '%s' - " + "data is not a dictionary or list", + expression, + ) + # If we're not at the last part of the path, move to the next part + else: + if isinstance(current_data, dict): + current_data = current_data.get(part, {}) + elif isinstance(current_data, list) and part.isdigit(): + if len(current_data) > int(part): + current_data = current_data[int(part)] + else: + log.error( + "Could not remove data value for expression '%s' - data " + "is not a dictionary or list", + expression, + ) + return \ No newline at end of file diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 1930df09..802252f3 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -304,7 +304,15 @@ def setup_broker_request_response(self): "request_expiry_ms": request_expiry_ms, } - for key in ["response_topic_prefix", "response_queue_prefix", "response_topic_insertion_expression"]: + optional_keys = [ + "response_topic_prefix", + "response_queue_prefix", + "user_properties_reply_topic_key", + "user_properties_reply_metadata_key", + "response_topic_insertion_expression", + ] + + for key in optional_keys: if key in self.broker_request_response_config: rrc_config[key] = self.broker_request_response_config[key] diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index 2fdb05fc..f8888ad3 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -7,6 +7,7 @@ from copy import deepcopy from ...common.log import log +from ...common.utils import set_data_value, get_data_value, remove_data_value from .broker_base import BrokerBase from ...common.message import Message from ...common.utils import ensure_slash_on_end, ensure_slash_on_start @@ -90,6 +91,18 @@ "description": "Prefix for reply queues", "default": "reply-queue", }, + { + "name": "user_properties_reply_topic_key", + "required": False, + "description": "Key to store the reply topic in the user properties. Start with : for nested object", + "default": "__solace_ai_connector_broker_request_response_topic__", + }, + { + "name": "user_properties_reply_metadata_key", + "required": False, + "description": "Key to store the reply metadata in the user properties. Start with : for nested object", + "default": "__solace_ai_connector_broker_request_reply_metadata__", + }, { "name": "request_expiry_ms", "required": False, @@ -193,6 +206,9 @@ }, } +DEFAULT_REPLY_TOPIC_KEY = "__solace_ai_connector_broker_request_response_topic__" +DEFAULT_REPLY_METADATA_KEY = "__solace_ai_connector_broker_request_reply_metadata__" + class BrokerRequestResponse(BrokerBase): """Request-Response broker component for the Solace AI Event Connector""" @@ -210,6 +226,12 @@ def __init__(self, **kwargs): self.response_queue_prefix = ensure_slash_on_end( self.get_config("response_queue_prefix") ) + self.user_properties_reply_topic_key = self.get_config( + "user_properties_reply_topic_key", DEFAULT_REPLY_TOPIC_KEY + ) + self.user_properties_reply_metadata_key = self.get_config( + "user_properties_reply_metadata_key", DEFAULT_REPLY_METADATA_KEY + ) self.requestor_id = str(uuid.uuid4()) self.reply_queue_name = f"{self.response_queue_prefix}{self.requestor_id}" self.response_topic = f"{self.response_topic_prefix}{self.requestor_id}{self.response_topic_suffix}" @@ -310,9 +332,7 @@ def process_response(self, broker_message): return streaming_complete_expression = None - metadata_json = user_properties.get( - "__solace_ai_connector_broker_request_reply_metadata__" - ) + metadata_json = get_data_value(user_properties, self.user_properties_reply_metadata_key, True) if not metadata_json: log.error("Received response without metadata: %s", payload) return @@ -357,21 +377,21 @@ def process_response(self, broker_message): # Update the metadata in the response if metadata_stack: - response["user_properties"][ - "__solace_ai_connector_broker_request_reply_metadata__" - ] = json.dumps(metadata_stack) + set_data_value( + response["user_properties"], + self.user_properties_reply_metadata_key, + json.dumps(metadata_stack), + ) # Put the last reply topic back in the user properties - response["user_properties"][ - "__solace_ai_connector_broker_request_response_topic__" - ] = metadata_stack[-1]["response_topic"] + set_data_value( + response["user_properties"], + self.user_properties_reply_topic_key, + metadata_stack[-1]["response_topic"], + ) else: # Remove the metadata and reply topic from the user properties - response["user_properties"].pop( - "__solace_ai_connector_broker_request_reply_metadata__", None - ) - response["user_properties"].pop( - "__solace_ai_connector_broker_request_response_topic__", None - ) + remove_data_value(response["user_properties"], self.user_properties_reply_metadata_key) + remove_data_value(response["user_properties"], self.user_properties_reply_topic_key) message = Message( payload=payload, @@ -410,16 +430,14 @@ def invoke(self, message, data): metadata = {"request_id": request_id, "response_topic": topic} - if ( - "__solace_ai_connector_broker_request_reply_metadata__" - in data["user_properties"] - ): + existing_metadata_json = get_data_value( + data["user_properties"], + self.user_properties_reply_metadata_key, + True + ) + if existing_metadata_json: try: - existing_metadata = json.loads( - data["user_properties"][ - "__solace_ai_connector_broker_request_reply_metadata__" - ] - ) + existing_metadata = json.loads(existing_metadata_json) if isinstance(existing_metadata, list): existing_metadata.append(metadata) metadata = existing_metadata @@ -430,19 +448,17 @@ def invoke(self, message, data): except json.JSONDecodeError: log.warning( "Failed to decode existing metadata JSON: %s", - data["user_properties"][ - "__solace_ai_connector_broker_request_reply_metadata__" - ], + existing_metadata_json, ) else: metadata = [metadata] - data["user_properties"][ - "__solace_ai_connector_broker_request_reply_metadata__" - ] = json.dumps(metadata) - data["user_properties"][ - "__solace_ai_connector_broker_request_response_topic__" - ] = topic + set_data_value( + data["user_properties"], self.user_properties_reply_metadata_key, json.dumps(metadata) + ) + set_data_value( + data["user_properties"], self.user_properties_reply_topic_key, topic + ) # If we are configured to also insert the response topic into the request message # then create a temporary message to do so From e9968225dc3d6b3acf7d17a1066058eb812dc322 Mon Sep 17 00:00:00 2001 From: Reuben D'Souza <46090211+reubenjds@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:07:53 -0500 Subject: [PATCH 11/19] DATAGO-91907: Investigate Solace AI connector (other solace ai libs) whitesoure scan results. (#80) Investigate Solace AI connector (other solace ai libs) whitesoure scan results. (#80) --------- Co-authored-by: John Corpuz --- .github/workflows/ci.yml | 2 ++ wss-unified-agent.config | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 wss-unified-agent.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 691d9be7..fa63931c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,8 @@ jobs: with: min-python-version: "3.10" whitesource_product_name: "solaceai" + whitesource_config_file: "wss-unified-agent.config" + secrets: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} SONAR_HOST_URL: ${{ vars.SONAR_HOST_URL }} diff --git a/wss-unified-agent.config b/wss-unified-agent.config new file mode 100644 index 00000000..bf2e0771 --- /dev/null +++ b/wss-unified-agent.config @@ -0,0 +1,47 @@ +############################################################### +# WhiteSource Unified-Agent configuration file +############################################################### +# GENERAL SCAN MODE: Files and Package Managers +############################################################### +# Organization vitals +###################### +apiKey=WS_APIKEY +projectName=__PROJECT_NAME__ +projectToken= +productName=__PROJECT_NAME__ +productToken= +# Change the below URL to your WhiteSource server. +# Use the 'WhiteSource Server URL' which can be retrieved +# from your 'Profile' page on the 'Server URLs' panel. +# Then, add the '/agent' path to it. +wss.url=https://saas.whitesourcesoftware.com/agent +############ +# Policies # +############ +checkPolicies=false +forceCheckAllDependencies=false +forceUpdate=false +forceUpdate.failBuildOnPolicyViolation=false +########### +# General # +########### +offline=false +######################################## +# Package Manager Dependency resolvers # +######################################## +resolveAllDependencies=true +python.resolveDependencies=true +python.ignoreSourceFiles=true +python.ignorePipInstallErrors=false +python.installVirtualenv=true +python.resolveHierarchyTree=false +python.requirementsFileIncludes=requirements.txt +python.resolveSetupPyFiles=false +python.runPipenvPreStep=false +python.pipenvDevDependencies=false +python.IgnorePipenvInstallErrors=false +includes=**/*.whl +#Exclude file extensions or specific directories by adding **/*. or **//** +excludes= **/.git/** **/.git **/*.config +case.sensitive.glob=false +followSymbolicLinks=true \ No newline at end of file From b657ae93f64096171a1cf3203b8dbbed69d072e6 Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:16:35 -0500 Subject: [PATCH 12/19] Alireza/ai 278/add forever broker reconnection config (#77) * feat: add the forever retry * feat: keep connecting * feat: replace the reconnection * ref: moved settings to a new yaml file * feat: update documents * ref: move common settings to base broker * feat: generate documents * fix: retrieve litellm config --- docs/components/broker_input.md | 10 -- docs/components/broker_output.md | 10 -- docs/components/broker_request_response.md | 2 + examples/broker_input_output.yaml | 65 ++++++++ .../common/messaging/solace_messaging.py | 60 ++++++- .../components/inputs_outputs/broker_base.py | 60 ++++++- .../components/inputs_outputs/broker_input.py | 129 +++++++-------- .../inputs_outputs/broker_output.py | 150 ++++++++---------- 8 files changed, 298 insertions(+), 188 deletions(-) create mode 100644 examples/broker_input_output.yaml diff --git a/docs/components/broker_input.md b/docs/components/broker_input.md index ece65fbc..4556d13e 100644 --- a/docs/components/broker_input.md +++ b/docs/components/broker_input.md @@ -8,11 +8,6 @@ Connect to a messaging broker and receive messages from it. The component will o component_name: component_module: broker_input component_config: - broker_type: - broker_url: - broker_username: - broker_password: - broker_vpn: broker_queue_name: temporary_queue: broker_subscriptions: @@ -22,11 +17,6 @@ component_config: | Parameter | Required | Default | Description | | --- | --- | --- | --- | -| broker_type | True | | Type of broker (Solace, MQTT, etc.) | -| broker_url | True | | Broker URL (e.g. tcp://localhost:55555) | -| broker_username | True | | Client username for broker | -| broker_password | True | | Client password for broker | -| broker_vpn | True | | Client VPN for broker | | broker_queue_name | False | | Queue name for broker, if not provided it will use a temporary queue | | temporary_queue | False | False | Whether to create a temporary queue that will be deleted after disconnection, defaulted to True if broker_queue_name is not provided | | broker_subscriptions | True | | Subscriptions for broker | diff --git a/docs/components/broker_output.md b/docs/components/broker_output.md index 3389bfca..54b2fdea 100644 --- a/docs/components/broker_output.md +++ b/docs/components/broker_output.md @@ -8,11 +8,6 @@ Connect to a messaging broker and send messages to it. Note that this component component_name: component_module: broker_output component_config: - broker_type: - broker_url: - broker_username: - broker_password: - broker_vpn: payload_encoding: payload_format: propagate_acknowledgements: @@ -23,11 +18,6 @@ component_config: | Parameter | Required | Default | Description | | --- | --- | --- | --- | -| broker_type | True | | Type of broker (Solace, MQTT, etc.) | -| broker_url | True | | Broker URL (e.g. tcp://localhost:55555) | -| broker_username | True | | Client username for broker | -| broker_password | True | | Client password for broker | -| broker_vpn | True | | Client VPN for broker | | payload_encoding | False | utf-8 | Encoding for the payload (utf-8, base64, gzip, none) | | payload_format | False | json | Format for the payload (json, yaml, text) | | propagate_acknowledgements | False | True | Propagate acknowledgements from the broker to the previous components | diff --git a/docs/components/broker_request_response.md b/docs/components/broker_request_response.md index a408e2e7..c90614f9 100644 --- a/docs/components/broker_request_response.md +++ b/docs/components/broker_request_response.md @@ -17,6 +17,7 @@ component_config: payload_format: response_topic_prefix: response_topic_suffix: + response_topic_insertion_expression: response_queue_prefix: request_expiry_ms: streaming: @@ -38,6 +39,7 @@ component_config: | payload_format | False | json | Format for the payload (json, yaml, text) | | response_topic_prefix | False | reply | Prefix for reply topics | | response_topic_suffix | False | | Suffix for reply topics | +| response_topic_insertion_expression | False | | Expression to insert the reply topic into the request message. If not set, the reply topic will only be added to the request_response_metadata. The expression uses the same format as other data expressions: (e.g input.payload:myObj.replyTopic). If there is no object type in the expression, it will default to 'input.payload'. | | response_queue_prefix | False | reply-queue | Prefix for reply queues | | request_expiry_ms | False | 60000 | Expiry time for cached requests in milliseconds | | streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | diff --git a/examples/broker_input_output.yaml b/examples/broker_input_output.yaml new file mode 100644 index 00000000..0a5d0f23 --- /dev/null +++ b/examples/broker_input_output.yaml @@ -0,0 +1,65 @@ +# Example for the broker inputs and outputs +# +# It will subscribe to `demo/messages` and expect an event with the payload: +# +# The input message has the following schema: +# { +# "content": "", +# } +# +# It will then send an event back to Solace with the topic: `demo/messages/output` +# +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + reconnection_strategy: forever_retry # options: forever_retry, parametrized_retry + retry_interval: 1000 # in milliseconds + + +flows: + - name: Simple input flow + components: + # Input from a Solace broker + - component_name: solace_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: demo/messages + payload_encoding: utf-8 + payload_format: json + + # Send messages back to broker + - component_name: solace_output + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous:payload + dest_expression: user_data.output:payload + - type: copy + source_expression: template:{{text://input.topic}}/output + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 875e677b..eac51c1c 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -130,15 +130,59 @@ def connect(self): or os.path.dirname(certifi.where()) or "/usr/share/ca-certificates/mozilla/", } - # print (f"Broker Properties: {self.broker_properties}") - self.messaging_service = ( - MessagingService.builder() - .from_properties(broker_props) - .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + strategy = self.broker_properties.get("reconnection_strategy") + if strategy and strategy == "forever_retry": + retry_interval = self.broker_properties.get("retry_interval") + if not retry_interval: + log.warning("retry_interval not provided, using default value of 3000") + retry_interval = 3000 + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.forever_retry(retry_interval) + ) + .with_connection_retry_strategy( + RetryStrategy.forever_retry(retry_interval) + ) + .build() + ) + elif strategy and strategy == "parametrized_retry": + retry_count = self.broker_properties.get("retry_count") + retry_wait = self.broker_properties.get("retry_wait") + if not retry_count: + log.warning("retry_count not provided, using default value of 20") + retry_count = 20 + if not retry_wait: + log.warning("retry_wait not provided, using default value of 3000") + retry_wait = 3000 + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.parametrized_retry(retry_count, retry_wait) + ) + .with_connection_retry_strategy( + RetryStrategy.parametrized_retry(retry_count, retry_wait) + ) + .build() + ) + else: + # default + log.info( + "Using default reconnection strategy. 20 retries with 3000ms interval" + ) + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.parametrized_retry(20, 3000) + ) + .with_connection_retry_strategy( + RetryStrategy.parametrized_retry(20, 3000) + ) + .build() ) - .build() - ) # Blocking connect thread self.messaging_service.connect() diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index c312740b..550b262f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -28,8 +28,59 @@ # queue binding and that object is used to retrieve the next message rather than # the message_service object. +base_info = { + "class_name": "BrokerBase", + "description": "Base class for broker input/output components", + "config_parameters": [ + { + "name": "broker_type", + "required": True, + "description": "Type of broker (Solace, MQTT, etc.)", + }, + { + "name": "broker_url", + "required": True, + "description": "Broker URL (e.g. tcp://localhost:55555)", + }, + { + "name": "broker_username", + "required": True, + "description": "Client username for broker", + }, + { + "name": "broker_password", + "required": True, + "description": "Client password for broker", + }, + { + "name": "broker_vpn", + "required": True, + "description": "Client VPN for broker", + }, + { + "name": "reconnection_strategy", + "required": False, + "description": "Reconnection strategy for the broker (forever_retry, parametrized_retry)", + "default": "forever_retry", + }, + { + "name": "retry_interval", + "required": False, + "description": "Reconnection retry interval in seconds for the broker", + "default": 10000, # in milliseconds + }, + { + "name": "retry_count", + "required": False, + "description": "Number of reconnection retries. Only used if reconnection_strategy is parametrized_retry", + "default": 10, + }, + ], +} + class BrokerBase(ComponentBase): + def __init__(self, module_info, **kwargs): super().__init__(module_info, **kwargs) self.broker_properties = self.get_broker_properties() @@ -43,6 +94,7 @@ def __init__(self, module_info, **kwargs): self.messages_to_ack = [] self.connected = False self.needs_acknowledgement = True + self.connection_repeat_sleep_time = 5 @abstractmethod def invoke(self, message, data): @@ -51,12 +103,12 @@ def invoke(self, message, data): def connect(self): if not self.connected: self.messaging_service.connect() - self.connected = True + self.connected = self.messaging_service.is_connected def disconnect(self): if self.connected: self.messaging_service.disconnect() - self.connected = False + self.connected = self.messaging_service.is_connected def stop_component(self): self.disconnect() @@ -94,6 +146,10 @@ def get_broker_properties(self): "subscriptions": self.get_config("broker_subscriptions"), "trust_store_path": self.get_config("trust_store_path"), "temporary_queue": self.get_config("temporary_queue"), + "reconnection_strategy": self.get_config("reconnection_strategy"), + "retry_interval": self.get_config("retry_interval"), + "retry_count": self.get_config("retry_count"), + "retry_interval": self.get_config("retry_interval"), } return broker_properties diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 38be39f4..a487d825 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -1,93 +1,74 @@ """Input broker component for the Solace AI Event Connector""" +import copy from ...common.log import log from .broker_base import BrokerBase +from .broker_base import base_info from ...common.message import Message -info = { - "class_name": "BrokerInput", - "description": ( - "Connect to a messaging broker and receive messages from it. " - "The component will output the payload, topic, and user properties of the message." - ), - "config_parameters": [ - { - "name": "broker_type", - "required": True, - "description": "Type of broker (Solace, MQTT, etc.)", - }, - { - "name": "broker_url", - "required": True, - "description": "Broker URL (e.g. tcp://localhost:55555)", - }, - { - "name": "broker_username", - "required": True, - "description": "Client username for broker", - }, - { - "name": "broker_password", - "required": True, - "description": "Client password for broker", - }, - { - "name": "broker_vpn", - "required": True, - "description": "Client VPN for broker", - }, - { - "name": "broker_queue_name", - "required": False, - "description": "Queue name for broker, if not provided it will use a temporary queue", - }, - { - "name": "temporary_queue", - "required": False, - "description": "Whether to create a temporary queue that will be deleted " - "after disconnection, defaulted to True if broker_queue_name is not provided", - "default": False, - }, - { - "name": "broker_subscriptions", - "required": True, - "description": "Subscriptions for broker", - }, - { - "name": "payload_encoding", - "required": False, - "description": "Encoding for the payload (utf-8, base64, gzip, none)", - "default": "utf-8", - }, - { - "name": "payload_format", - "required": False, - "description": "Format for the payload (json, yaml, text)", - "default": "json", - }, - ], - "output_schema": { - "type": "object", - "properties": { - "payload": { - "type": "string", +info = copy.deepcopy(base_info) +info.update( + { + "class_name": "BrokerInput", + "description": ( + "Connect to a messaging broker and receive messages from it. " + "The component will output the payload, topic, and user properties of the message." + ), + "config_parameters": [ + { + "name": "broker_queue_name", + "required": False, + "description": "Queue name for broker, if not provided it will use a temporary queue", + }, + { + "name": "temporary_queue", + "required": False, + "description": "Whether to create a temporary queue that will be deleted " + "after disconnection, defaulted to True if broker_queue_name is not provided", + "default": False, }, - "topic": { - "type": "string", + { + "name": "broker_subscriptions", + "required": True, + "description": "Subscriptions for broker", }, - "user_properties": { - "type": "object", + { + "name": "payload_encoding", + "required": False, + "description": "Encoding for the payload (utf-8, base64, gzip, none)", + "default": "utf-8", }, + { + "name": "payload_format", + "required": False, + "description": "Format for the payload (json, yaml, text)", + "default": "json", + }, + ], + "output_schema": { + "type": "object", + "properties": { + "payload": { + "type": "string", + }, + "topic": { + "type": "string", + }, + "user_properties": { + "type": "object", + }, + }, + "required": ["payload", "topic", "user_properties"], }, - "required": ["payload", "topic", "user_properties"], - }, -} + } +) # We always need a timeout so that we can check if we should stop DEFAULT_TIMEOUT_MS = 1000 class BrokerInput(BrokerBase): + def __init__(self, module_info=None, **kwargs): module_info = module_info or info super().__init__(module_info, **kwargs) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_output.py b/src/solace_ai_connector/components/inputs_outputs/broker_output.py index 25809b80..dc43581f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_output.py @@ -1,98 +1,80 @@ """Output broker component for sending messages from the Solace AI Event Connector to a broker""" +import copy from ...common.log import log -from .broker_base import ( - BrokerBase, -) +from .broker_base import BrokerBase +from .broker_base import base_info from ...common.message import Message -info = { - "class_name": "BrokerOutput", - "description": ( - "Connect to a messaging broker and send messages to it. " - "Note that this component requires that the data is transformed into the input schema." - ), - "config_parameters": [ - { - "name": "broker_type", - "required": True, - "description": "Type of broker (Solace, MQTT, etc.)", - }, - { - "name": "broker_url", - "required": True, - "description": "Broker URL (e.g. tcp://localhost:55555)", - }, - { - "name": "broker_username", - "required": True, - "description": "Client username for broker", - }, - { - "name": "broker_password", - "required": True, - "description": "Client password for broker", - }, - { - "name": "broker_vpn", - "required": True, - "description": "Client VPN for broker", - }, - { - "name": "payload_encoding", - "required": False, - "description": "Encoding for the payload (utf-8, base64, gzip, none)", - "default": "utf-8", - }, - { - "name": "payload_format", - "required": False, - "description": "Format for the payload (json, yaml, text)", - "default": "json", - }, - { - "name": "propagate_acknowledgements", - "required": False, - "description": "Propagate acknowledgements from the broker to the previous components", - "default": True, - }, - { - "name": "copy_user_properties", - "required": False, - "description": "Copy user properties from the input message", - "default": False, - }, - { - "name": "decrement_ttl", - "required": False, - "description": "If present, decrement the user_properties.ttl by 1", - }, - { - "name": "discard_on_ttl_expiration", - "required": False, - "description": "If present, discard the message when the user_properties.ttl is 0", - "default": False, - }, - ], - "input_schema": { - "type": "object", - "properties": { - "payload": { - "type": "any", - "description": "Payload of the message sent to the broker", +info = copy.deepcopy(base_info) +info.update( + { + "class_name": "BrokerOutput", + "description": ( + "Connect to a messaging broker and send messages to it. " + "Note that this component requires that the data is transformed into the input schema." + ), + "config_parameters": [ + { + "name": "payload_encoding", + "required": False, + "description": "Encoding for the payload (utf-8, base64, gzip, none)", + "default": "utf-8", + }, + { + "name": "payload_format", + "required": False, + "description": "Format for the payload (json, yaml, text)", + "default": "json", + }, + { + "name": "propagate_acknowledgements", + "required": False, + "description": "Propagate acknowledgements from the broker to the previous components", + "default": True, + }, + { + "name": "copy_user_properties", + "required": False, + "description": "Copy user properties from the input message", + "default": False, }, - "topic": {"type": "string", "description": "Topic to send the message to"}, - "user_properties": { - "type": "object", - "description": "User properties to send with the message", + { + "name": "decrement_ttl", + "required": False, + "description": "If present, decrement the user_properties.ttl by 1", }, + { + "name": "discard_on_ttl_expiration", + "required": False, + "description": "If present, discard the message when the user_properties.ttl is 0", + "default": False, + }, + ], + "input_schema": { + "type": "object", + "properties": { + "payload": { + "type": "any", + "description": "Payload of the message sent to the broker", + }, + "topic": { + "type": "string", + "description": "Topic to send the message to", + }, + "user_properties": { + "type": "object", + "description": "User properties to send with the message", + }, + }, + "required": ["payload", "topic"], }, - "required": ["payload", "topic"], - }, -} + } +) class BrokerOutput(BrokerBase): + def __init__(self, module_info=None, **kwargs): module_info = module_info or info super().__init__(module_info, **kwargs) From 2ab4d6340fba82393d6f4d7b87fdbe8dfe2ecfd2 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Tue, 21 Jan 2025 08:25:17 -0500 Subject: [PATCH 13/19] JDE: Added MongoDB insert action + example (#81) * Added mongodb insert component * type * added search component * applied comments * updated docs * Added the option to support custom keys for reply and metadata for request reponse user properties * fixed issue * Updated insert with type * added docs * added config value validation * added value check for mongo insert --- docs/components/broker_request_response.md | 4 + docs/components/mongo_insert.md | 2 + examples/db/mongodb_insert.yaml | 56 +++++++++++++ .../general/db/mongo/mongo_insert.py | 80 ++++++++++++++++++- 4 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 examples/db/mongodb_insert.yaml diff --git a/docs/components/broker_request_response.md b/docs/components/broker_request_response.md index c90614f9..74fd32fd 100644 --- a/docs/components/broker_request_response.md +++ b/docs/components/broker_request_response.md @@ -19,6 +19,8 @@ component_config: response_topic_suffix: response_topic_insertion_expression: response_queue_prefix: + user_properties_reply_topic_key: + user_properties_reply_metadata_key: request_expiry_ms: streaming: streaming_complete_expression: @@ -41,6 +43,8 @@ component_config: | response_topic_suffix | False | | Suffix for reply topics | | response_topic_insertion_expression | False | | Expression to insert the reply topic into the request message. If not set, the reply topic will only be added to the request_response_metadata. The expression uses the same format as other data expressions: (e.g input.payload:myObj.replyTopic). If there is no object type in the expression, it will default to 'input.payload'. | | response_queue_prefix | False | reply-queue | Prefix for reply queues | +| user_properties_reply_topic_key | False | __solace_ai_connector_broker_request_response_topic__ | Key to store the reply topic in the user properties. Start with : for nested object | +| user_properties_reply_metadata_key | False | __solace_ai_connector_broker_request_reply_metadata__ | Key to store the reply metadata in the user properties. Start with : for nested object | | request_expiry_ms | False | 60000 | Expiry time for cached requests in milliseconds | | streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | | streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | diff --git a/docs/components/mongo_insert.md b/docs/components/mongo_insert.md index 23b18be8..31deec67 100644 --- a/docs/components/mongo_insert.md +++ b/docs/components/mongo_insert.md @@ -14,6 +14,7 @@ component_config: database_password: database_name: database_collection: + data_types: ``` | Parameter | Required | Default | Description | @@ -24,6 +25,7 @@ component_config: | database_password | False | | MongoDB password | | database_name | True | | Database name | | database_collection | False | | Collection name - if not provided, all collections will be used | +| data_types | False | | An array of key value pairs to specify the data types for each field in the data. Used for non-JSON types like Date. Supports nested dotted names | ## Component Input Schema diff --git a/examples/db/mongodb_insert.yaml b/examples/db/mongodb_insert.yaml new file mode 100644 index 00000000..adf64438 --- /dev/null +++ b/examples/db/mongodb_insert.yaml @@ -0,0 +1,56 @@ +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +trace: + trace_file: solace_ai_connector_trace.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +flows: + # Data ingestion to MongoDB for context mesh + - name: real_time_data_ingest + components: + # Data Input from Solace broker + - component_name: solace_data_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_data_ingest + broker_subscriptions: + - topic: data/ingest + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Batch messages to avoid frequent calls to DB + - component_name: batch_aggregate + component_module: aggregate + component_config: + max_items: 100 + max_time_ms: 3000 + input_selection: + source_expression: input.payload:event + + # Insert into MongoDB + - component_name: mongo_insert + component_module: mongo_insert + component_config: + database_host: ${MONGO_HOST} + database_port: ${MONGO_PORT} + database_user: ${MONGO_USER} + database_password: ${MONGO_PASSWORD} + database_name: ${MONGO_DB} + database_collection: ${MONGO_COLLECTION} + data_types: + timestamp: Date + input_selection: + source_expression: previous diff --git a/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py b/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py index 351b9657..8c0425f5 100644 --- a/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py +++ b/src/solace_ai_connector/components/general/db/mongo/mongo_insert.py @@ -1,21 +1,99 @@ """MongoDB Agent Component for handling database insert.""" +import datetime +import dateutil.parser from .mongo_base import MongoDBBaseComponent, info as base_info info = base_info.copy() info["class_name"] = "MongoDBInsertComponent" info["description"] = "Inserts data into a MongoDB database." +info["config_parameters"].extend([ + { + "name": "data_types", + "required": False, + "description": "Key value pairs to specify the data types for each field in the data. Used for non-JSON types like Date. Supports nested dotted names", + }, +]) +POSSIBLE_TYPES = ["date", "timestamp", "int", "int32", "int64", "float", "double", "bool", "string", "null"] class MongoDBInsertComponent(MongoDBBaseComponent): """Component for handling MongoDB database operations.""" def __init__(self, **kwargs): super().__init__(info, **kwargs) + self.data_types_map = self.get_config("data_types") + if self.data_types_map: + if not isinstance(self.data_types_map, dict): + raise ValueError( + "Invalid data types provided for MongoDB insert. Expected a dictionary." + ) + for key, field_type in self.data_types_map.items(): + if not isinstance(key, str) or not isinstance(field_type, str) or field_type.lower() not in POSSIBLE_TYPES: + raise ValueError( + "Invalid data types provided for MongoDB insert. Expected a dictionary with key value pairs where key is a string and value is a string from the following list: " + + ", ".join(POSSIBLE_TYPES) + ) + def invoke(self, message, data): - if not data: + if not data or not isinstance(data, dict) and not isinstance(data, list): raise ValueError( "Invalid data provided for MongoDB insert. Expected a dictionary or a list of dictionary." ) + + if self.data_types_map: + for key, field_type in self.data_types_map.items(): + if isinstance(data, list): + new_data = [] + for item in data: + new_data.append(self._convert_data_type(item, key, field_type)) + data = new_data + else: + data = self._convert_data_type(data, key, field_type) return self.db_handler.insert_documents(data) + + def _convert_data_type(self, data, key, field_type): + if not key or not field_type: + return data + if not isinstance(data, list) and not isinstance(data, dict): + return data + if "." in key: + segments = key.split(".") + segment = segments[0] + if segment not in data: + if key in data: + data[key] = self._convert_field_type(data[key], field_type) + return data + if len(segments) > 1: + data[segment] = self._convert_data_type(data[segment], ".".join(segments[1:]), field_type) + else: + data[segment] = self._convert_field_type(data[segment], field_type) + else: + if key in data: + data[key] = self._convert_field_type(data[key], field_type) + return data + + def _convert_field_type(self, value, field_type): + field_type = field_type.lower() + if field_type == "date" or field_type == "timestamp": + if isinstance(value, str): + return dateutil.parser.parse(value) + elif isinstance(value, int) or isinstance(value, float): + return datetime.datetime.fromtimestamp(value) + else: + return value + if field_type == "int" or field_type == "int32" or field_type == "int64": + return int(value) + if field_type == "float" or field_type == "double": + return float(value) + if field_type == "bool": + if isinstance(value, str) and value.lower() == "false": + return False + return bool(value) + if field_type == "string": + return str(value) + if field_type == "null": + return None + return value + From 5fb33ebe1566e3807a8354e5b21527ab2f8f6464 Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Wed, 22 Jan 2025 08:56:31 -0500 Subject: [PATCH 14/19] Stabilize the connector and add monitoring (#70) * feat: add monitring component * fix: resolve a bug * fix: add sleep time * fix: add sleep time * feat: add readiness and handle excessive logs * fix: handle sleep error * fix: handle sleep error * feat: gracefully exit * feat: set the log back * fix: rename log fields * fix: disabled monitoring * fix: resolve log naming * fix: resolved logging issues * fix: resolve log * fix: resolve log * feat: remove dependency to Langchain * feat: update monitoring * feat: drop error messages when the queue is full * feat: add a text splitter component * feat: updated docs * fix: resolve graceful termination issues * fix: remove payloads from logs * feat: add the forever retry * feat: keep connecting * Feat: add monitoring * feat: replace the reconnection * feat: refactor monitoring * feat: add connection metric * convert connection to async * get metrics enum * add types of metrics * use metrics rather than metric values * fix bug * update type * convert monitoring output to dictionary * fix bug * feat: add connection status * feat: add reconnecting status * feat: add reconnecting log and handled signals * fix: update status * fix: update log * fix: fix bug * fix: fix bug * fix: resolve connection logs * fix: handle threads * fix: update connection state machine * feat: add prefix to the broker logs * fix: synchronize logs with connection attempts * fix: remove datadog dependency * fix: cover an exception * ref: upgrade to latest pubsub and replace a metric * ref: capsulate some variables * ref: enable daemon for threads to close them safely * ref: remove useless variable --- examples/llm/anthropic_chat.yaml | 2 +- examples/llm/bedrock_anthropic_chat.yaml | 2 +- .../langchain_openai_with_history_chat.yaml | 2 +- examples/llm/litellm_chat.yaml | 13 +- examples/llm/mixture_of_agents.yaml | 2 +- examples/llm/openai_chat.yaml | 2 +- examples/llm/openai_chroma_rag.yaml | 40 +++- .../openai_component_request_response.yaml | 2 +- examples/llm/vertexai_chat.yaml | 2 +- pyproject.toml | 7 +- requirements.txt | 9 +- src/solace_ai_connector/common/log.py | 80 ++++++- .../common/messaging/messaging_builder.py | 9 +- .../common/messaging/solace_messaging.py | 194 +++++++++++++--- src/solace_ai_connector/common/monitoring.py | 207 ++++++++++++++++++ src/solace_ai_connector/common/utils.py | 4 +- .../components/__init__.py | 38 +--- .../components/component_base.py | 117 +++++++++- .../general/llm/litellm/litellm_base.py | 4 +- .../llm/openai/openai_chat_model_base.py | 4 +- .../components/inputs_outputs/broker_base.py | 20 +- .../components/inputs_outputs/broker_input.py | 26 ++- .../inputs_outputs/broker_output.py | 10 +- .../inputs_outputs/broker_request_response.py | 30 ++- .../components/inputs_outputs/timer_input.py | 3 +- .../inputs_outputs/websocket_output.py | 3 +- src/solace_ai_connector/flow/timer_manager.py | 4 +- src/solace_ai_connector/main.py | 14 +- .../services/cache_service.py | 23 +- .../solace_ai_connector.py | 41 +++- 30 files changed, 755 insertions(+), 159 deletions(-) create mode 100644 src/solace_ai_connector/common/monitoring.py diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml index cc6e8fd8..fb58a585 100644 --- a/examples/llm/anthropic_chat.yaml +++ b/examples/llm/anthropic_chat.yaml @@ -12,7 +12,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain-anthropic +# pip install -U langchain-anthropic langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - ANTHROPIC_API_KEY diff --git a/examples/llm/bedrock_anthropic_chat.yaml b/examples/llm/bedrock_anthropic_chat.yaml index 421ce428..c6e69f9d 100644 --- a/examples/llm/bedrock_anthropic_chat.yaml +++ b/examples/llm/bedrock_anthropic_chat.yaml @@ -11,7 +11,7 @@ # } # # Dependencies: -# pip install langchain_aws +# pip install langchain_aws langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - AWS_BEDROCK_ANTHROPIC_CLAUDE_MODEL_ID diff --git a/examples/llm/langchain_openai_with_history_chat.yaml b/examples/llm/langchain_openai_with_history_chat.yaml index bef45afd..5eb672f2 100755 --- a/examples/llm/langchain_openai_with_history_chat.yaml +++ b/examples/llm/langchain_openai_with_history_chat.yaml @@ -12,7 +12,7 @@ # It will then send an event back to Solace with the topic: `demo/joke/subject/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/litellm_chat.yaml b/examples/llm/litellm_chat.yaml index 83ba283a..428a55aa 100644 --- a/examples/llm/litellm_chat.yaml +++ b/examples/llm/litellm_chat.yaml @@ -33,9 +33,16 @@ --- log: - stdout_log_level: INFO + stdout_log_level: DEBUG log_file_level: DEBUG - log_file: solace_ai_connector.log + log_file: ${LOG_FILE} + log_format: jsonl + logback: + rollingpolicy: + file-name-pattern: "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" + max-file-size: 100MB + max-history: 5 + total-size-cap: 1GB shared_config: - broker_config: &broker_connection @@ -44,6 +51,8 @@ shared_config: broker_username: ${SOLACE_BROKER_USERNAME} broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} + reconnection_strategy: forever_retry # options: forever_retry, parametrized_retry + retry_interval: 10000 # in milliseconds # Take from input broker and publish back to Solace flows: diff --git a/examples/llm/mixture_of_agents.yaml b/examples/llm/mixture_of_agents.yaml index dd72e93e..cc92ee30 100644 --- a/examples/llm/mixture_of_agents.yaml +++ b/examples/llm/mixture_of_agents.yaml @@ -11,7 +11,7 @@ # NOTE: For horizontal scaling, partitioned queues must be used. This is not implemented in this example. # # Dependencies: -# pip install -U langchain-google-vertexai langchain_anthropic langchain_openai openai +# pip install -U langchain-google-vertexai langchain_anthropic langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 54db782f..770fdedb 100755 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -11,7 +11,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/openai_chroma_rag.yaml b/examples/llm/openai_chroma_rag.yaml index 1e0f0385..c8072499 100644 --- a/examples/llm/openai_chroma_rag.yaml +++ b/examples/llm/openai_chroma_rag.yaml @@ -7,7 +7,7 @@ # Load Data: # Send data to Solace topic `demo/rag/data` with the following payload format: # { -# "texts": [. , ...] +# "text": text # } # # RAG Query: @@ -18,7 +18,7 @@ # The response will be sent to Solace topic `demo/rag/query/response` # # Dependencies: -# pip install -U langchain_openai openai chromadb langchain-chroma +# pip install -U langchain_openai openai chromadb langchain-chroma langchain-core~=0.3.0 langchain~=0.3.0 # # Required ENV variables: # - OPENAI_API_KEY @@ -61,6 +61,22 @@ flows: payload_encoding: utf-8 payload_format: json + # Split text + - component_name: text_splitter + component_module: langchain_split_text + component_config: + langchain_module: langchain_text_splitters + langchain_class: TokenTextSplitter + langchain_component_config: + chunk_size: 10 + chunk_overlap: 1 + input_transforms: + - type: copy + source_expression: input.payload:text + dest_expression: user_data.input:text + input_selection: + source_expression: user_data.input + # Embedding data & ChromaDB ingest - component_name: chroma_embed component_module: langchain_vector_store_embedding_index @@ -81,11 +97,29 @@ flows: source_value: topic:demo/rag/data dest_expression: user_data.vector_input:metadatas.source - type: copy - source_expression: input.payload:texts + source_expression: previous dest_expression: user_data.vector_input:texts input_selection: source_expression: user_data.vector_input + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:demo/rag/response + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output + # RAG Inference flow - name: OpenAI_RAG components: diff --git a/examples/llm/openai_component_request_response.yaml b/examples/llm/openai_component_request_response.yaml index f00ec8e3..1cb83d68 100644 --- a/examples/llm/openai_component_request_response.yaml +++ b/examples/llm/openai_component_request_response.yaml @@ -28,7 +28,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/vertexai_chat.yaml b/examples/llm/vertexai_chat.yaml index 19e77ece..c61578a0 100644 --- a/examples/llm/vertexai_chat.yaml +++ b/examples/llm/vertexai_chat.yaml @@ -11,7 +11,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain-google-vertexai +# pip install -U langchain-google-vertexai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file diff --git a/pyproject.toml b/pyproject.toml index 5d6a4a79..b61270a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,12 +19,9 @@ classifiers = [ ] dependencies = [ "boto3~=1.34.122", - "langchain-core~=0.3.0", - "langchain~=0.3.0", "PyYAML~=6.0.1", "Requests~=2.32.3", - "solace_pubsubplus>=1.8.0", - "litellm>=1.51.3", + "solace_pubsubplus>=1.9.0", "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", @@ -44,7 +41,7 @@ solace-ai-connector-gen-docs = "solace_ai_connector.tools.gen_component_docs:mai [tool.hatch.envs.hatch-test] installer = "pip" -# # Specify minimum and maximum Python versions to test +# Specify minimum and maximum Python versions to test [[tool.hatch.envs.hatch-test.matrix]] python = ["3.10", "3.12"] diff --git a/requirements.txt b/requirements.txt index c7e90475..1383f3f8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ boto3~=1.34.122 -langchain-core~=0.3.0 -langchain~=0.3.0 PyYAML~=6.0.1 Requests~=2.32.3 -solace_pubsubplus~=1.8.0 -litellm~=1.51.3 +solace_pubsubplus~=1.9.0 Flask~=3.0.3 Flask-SocketIO~=5.4.1 -build~=1.2.2.post1 \ No newline at end of file +build~=1.2.2.post1 +datadog~=0.50.2 +SQLAlchemy~=2.0.36 \ No newline at end of file diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index ac151513..9472b52c 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -2,6 +2,8 @@ import logging import logging.handlers import json +import os +from datetime import datetime log = logging.getLogger("solace_ai_connector") @@ -35,7 +37,22 @@ def format(self, record): return json.dumps(log_record) -def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): +def convert_to_bytes(size_str): + size_str = size_str.upper() + size_units = {"KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4, "B": 1} + for unit in size_units: + if size_str.endswith(unit): + return int(size_str[: -len(unit)]) * size_units[unit] + return int(size_str) + + +def setup_log( + logFilePath, + stdOutLogLevel, + fileLogLevel, + logFormat, + logBack, +): """ Set up the configuration for the logger. @@ -44,8 +61,9 @@ def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): stdOutLogLevel (int): Logging level for standard output. fileLogLevel (int): Logging level for the log file. logFormat (str): Format of the log output ('jsonl' or 'pipe-delimited'). - + logBack (dict): Rolling log file configuration. """ + # Set the global logger level to the lowest of the two levels log.setLevel(min(stdOutLogLevel, fileLogLevel)) @@ -54,17 +72,61 @@ def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): stream_formatter = logging.Formatter("%(message)s") stream_handler.setFormatter(stream_formatter) - # Create an empty file at logFilePath (this will overwrite any existing content) - with open(logFilePath, "w") as file: - file.write("") - - # file_handler = logging.handlers.TimedRotatingFileHandler( - # filename=logFilePath, when='midnight', backupCount=30, mode='w') - file_handler = logging.FileHandler(filename=logFilePath, mode="a") if logFormat == "jsonl": file_formatter = JsonlFormatter() else: file_formatter = logging.Formatter("%(asctime)s | %(levelname)s: %(message)s") + + if logBack: + rollingpolicy = logBack.get("rollingpolicy", {}) + if rollingpolicy: + if "file-name-pattern" not in rollingpolicy: + log.warning( + "file-name-pattern is required in rollingpolicy. Continuing with default value '{LOG_FILE}.%d{yyyy-MM-dd}.%i'." + ) + file_name_pattern = rollingpolicy.get( + "file-name-pattern", "{LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" + ) + + if "max-file-size" not in rollingpolicy: + log.warning( + "max-file-size is required in rollingpolicy. Continuing with default value '1GB'." + ) + max_file_size = rollingpolicy.get("max-file-size", "1GB") + + if "max-history" not in rollingpolicy: + log.warning( + "max-history is required in rollingpolicy. Continuing with default value '7'." + ) + max_history = rollingpolicy.get("max-history", 7) + + if "total-size-cap" not in rollingpolicy: + log.warning( + "total-size-cap is required in rollingpolicy. Continuing with default value '1TB'." + ) + total_size_cap = rollingpolicy.get("total-size-cap", "1TB") + + # Convert size strings to bytes + max_file_size = convert_to_bytes(max_file_size) + total_size_cap = convert_to_bytes(total_size_cap) + + # Generate the log file name using the pattern + log_file_name = logFilePath + + # Overwrite the file handler with a rotating file handler + file_handler = logging.handlers.RotatingFileHandler( + filename=log_file_name, + backupCount=max_history, + maxBytes=max_file_size, + ) + file_handler.namer = ( + lambda name: file_name_pattern.replace("${LOG_FILE}", logFilePath) + .replace("%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d")) + .replace("%i", str(name.split(".")[-1])) + ) + else: + file_handler = logging.FileHandler(filename=logFilePath, mode="a") + file_handler.setFormatter(file_formatter) file_handler.setLevel(fileLogLevel) diff --git a/src/solace_ai_connector/common/messaging/messaging_builder.py b/src/solace_ai_connector/common/messaging/messaging_builder.py index 826cdd45..439ba7b2 100644 --- a/src/solace_ai_connector/common/messaging/messaging_builder.py +++ b/src/solace_ai_connector/common/messaging/messaging_builder.py @@ -6,10 +6,13 @@ # Make a Messaging Service builder - this is a factory for Messaging Service objects class MessagingServiceBuilder: - def __init__(self, flow_lock_manager, flow_kv_store): + + def __init__(self, flow_lock_manager, flow_kv_store, broker_name, stop_signal): self.broker_properties = {} self.flow_lock_manager = flow_lock_manager self.flow_kv_store = flow_kv_store + self.stop_signal = stop_signal + self.broker_name = broker_name def from_properties(self, broker_properties: dict): self.broker_properties = broker_properties @@ -17,7 +20,9 @@ def from_properties(self, broker_properties: dict): def build(self): if self.broker_properties["broker_type"] == "solace": - return SolaceMessaging(self.broker_properties) + return SolaceMessaging( + self.broker_properties, self.broker_name, self.stop_signal + ) elif self.broker_properties["broker_type"] == "dev_broker": return DevBroker( self.broker_properties, self.flow_lock_manager, self.flow_kv_store diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index eac51c1c..68bfb821 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -3,6 +3,8 @@ import logging import os import certifi +import threading +from enum import Enum from solace.messaging.messaging_service import ( MessagingService, @@ -34,6 +36,17 @@ from ..log import log +class ConnectionStatus(Enum): + RECONNECTING = 2 + CONNECTED = 1 + DISCONNECTED = 0 + + +def change_connection_status(connection_properties: dict, status): + with connection_properties["lock"]: + connection_properties["status"] = status + + class MessageHandlerImpl(MessageHandler): def __init__(self, persistent_receiver: PersistentMessageReceiver): @@ -70,17 +83,77 @@ class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): + def __init__( + self, + stop_signal, + strategy, + retry_count, + retry_interval, + connection_properties, + error_prefix="", + ): + self.stop_signal = stop_signal + self.error_prefix = error_prefix + self.strategy = strategy + self.retry_count = retry_count + self.retry_interval = retry_interval + self.connection_properties = connection_properties + def on_reconnected(self, service_event: ServiceEvent): - log.debug("Reconnected to broker: %s", service_event.get_cause()) - log.debug("Message: %s", service_event.get_message()) + change_connection_status(self.connection_properties, ConnectionStatus.CONNECTED) + log.error( + f"{self.error_prefix} Reconnected to broker: %s", + service_event.get_cause(), + ) + log.error( + f"{self.error_prefix} Message: %s", + service_event.get_message(), + ) def on_reconnecting(self, event: "ServiceEvent"): - log.debug("Reconnecting - Error cause: %s", event.get_cause()) - log.debug("Message: %s", event.get_message()) + change_connection_status( + self.connection_properties, ConnectionStatus.RECONNECTING + ) + + def log_reconnecting(): + + while ( + not self.stop_signal.is_set() + and self.connection_properties["status"] + == ConnectionStatus.RECONNECTING + ): + # update retry count + if self.strategy and self.strategy == "parametrized_retry": + if self.retry_count <= 0: + log.error( + f"{self.error_prefix} Reconnection attempts exhausted. Stopping..." + ) + break + else: + self.retry_count -= 1 + + log.error( + f"{self.error_prefix} Reconnecting to broker: %s", + event.get_cause(), + ) + log.error( + f"{self.error_prefix} Message: %s", + event.get_message(), + ) + self.stop_signal.wait(timeout=self.retry_interval / 1000) + + log_thread = threading.Thread(target=log_reconnecting, daemon=True) + log_thread.start() def on_service_interrupted(self, event: "ServiceEvent"): - log.debug("Service interrupted - Error cause: %s", event.get_cause()) - log.debug("Message: %s", event.get_message()) + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) + log.debug( + f"{self.error_prefix} Service interrupted - Error cause: %s", + event.get_cause(), + ) + log.debug(f"{self.error_prefix} Message: %s", event.get_message()) def set_python_solace_log_level(level: str): @@ -96,19 +169,29 @@ def set_python_solace_log_level(level: str): # Create SolaceMessaging class inheriting from Messaging class SolaceMessaging(Messaging): - def __init__(self, broker_properties: dict): + def __init__(self, broker_properties: dict, broker_name, stop_signal): super().__init__(broker_properties) self.persistent_receivers = [] self.messaging_service = None self.service_handler = None self.publisher = None self.persistent_receiver: PersistentMessageReceiver = None + self.stop_signal = stop_signal + self.connection_properties = { + "status": ConnectionStatus.DISCONNECTED, + "lock": threading.Lock(), + } + + self.error_prefix = f"broker[{broker_name}]:" # MessagingService.set_core_messaging_log_level( # level="DEBUG", file="/home/efunnekotter/core.log" # ) # set_python_solace_log_level("DEBUG") def __del__(self): + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) self.disconnect() def connect(self): @@ -131,10 +214,14 @@ def connect(self): or "/usr/share/ca-certificates/mozilla/", } strategy = self.broker_properties.get("reconnection_strategy") + retry_interval = 3000 # default + retry_count = 20 # default if strategy and strategy == "forever_retry": retry_interval = self.broker_properties.get("retry_interval") if not retry_interval: - log.warning("retry_interval not provided, using default value of 3000") + log.warning( + f"{self.error_prefix} retry_interval not provided, using default value of 3000 milliseconds" + ) retry_interval = 3000 self.messaging_service = ( MessagingService.builder() @@ -149,46 +236,92 @@ def connect(self): ) elif strategy and strategy == "parametrized_retry": retry_count = self.broker_properties.get("retry_count") - retry_wait = self.broker_properties.get("retry_wait") + retry_interval = self.broker_properties.get("retry_wait") if not retry_count: - log.warning("retry_count not provided, using default value of 20") + log.warning( + f"{self.error_prefix} retry_count not provided, using default value of 20" + ) retry_count = 20 - if not retry_wait: - log.warning("retry_wait not provided, using default value of 3000") - retry_wait = 3000 + if not retry_interval: + log.warning( + f"{self.error_prefix} retry_wait not provided, using default value of 3000" + ) + retry_interval = 3000 self.messaging_service = ( MessagingService.builder() .from_properties(broker_props) .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(retry_count, retry_wait) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .with_connection_retry_strategy( - RetryStrategy.parametrized_retry(retry_count, retry_wait) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .build() ) else: - # default + # set default log.info( - "Using default reconnection strategy. 20 retries with 3000ms interval" + f"{self.error_prefix} Using default reconnection strategy. 20 retries with 3000ms interval" ) + strategy = "parametrized_retry" self.messaging_service = ( MessagingService.builder() .from_properties(broker_props) .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .with_connection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .build() ) # Blocking connect thread - self.messaging_service.connect() + result = self.messaging_service.connect_async() + + # log connection attempts + # note: the connection/reconnection handler API does not log connection attempts + self.stop_connection_log = threading.Event() + + def log_connecting(): + temp_retry_count = retry_count + while not ( + self.stop_signal.is_set() + or self.stop_connection_log.is_set() + or result.done() + ): + # update retry count + if strategy and strategy == "parametrized_retry": + if temp_retry_count <= 0: + log.error( + f"{self.error_prefix} Connection attempts exhausted. Stopping..." + ) + break + else: + temp_retry_count -= 1 + + log.info(f"{self.error_prefix} Connecting to broker...") + self.stop_signal.wait(timeout=retry_interval / 1000) + + log_thread = threading.Thread(target=log_connecting, daemon=True) + log_thread.start() + + if result.result() is None: + log.error(f"{self.error_prefix} Failed to connect to broker") + return False + self.stop_connection_log.set() + + change_connection_status(self.connection_properties, ConnectionStatus.CONNECTED) # Event Handling for the messaging service - self.service_handler = ServiceEventHandler() + self.service_handler = ServiceEventHandler( + self.stop_signal, + strategy, + retry_count, + retry_interval, + self.connection_properties, + self.error_prefix, + ) self.messaging_service.add_reconnection_listener(self.service_handler) self.messaging_service.add_reconnection_attempt_listener(self.service_handler) self.messaging_service.add_service_interruption_listener(self.service_handler) @@ -234,7 +367,7 @@ def bind_to_queue( self.persistent_receiver.start() log.debug( - "Persistent receiver started... Bound to Queue [%s] (Temporary: %s)", + f"{self.error_prefix} Persistent receiver started... Bound to Queue [%s] (Temporary: %s)", queue.get_name(), temporary, ) @@ -242,7 +375,7 @@ def bind_to_queue( # Handle API exception except PubSubPlusClientError as exception: log.warning( - "Error creating persistent receiver for queue [%s], %s", + f"{self.error_prefix} Error creating persistent receiver for queue [%s], %s", queue_name, exception, ) @@ -255,18 +388,21 @@ def bind_to_queue( for subscription in subscriptions: sub = TopicSubscription.of(subscription.get("topic")) self.persistent_receiver.add_subscription(sub) - log.debug("Subscribed to topic: %s", subscription) + log.debug(f"{self.error_prefix} Subscribed to topic: %s", subscription) return self.persistent_receiver def disconnect(self): try: self.messaging_service.disconnect() + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) except Exception as exception: # pylint: disable=broad-except - log.debug("Error disconnecting: %s", exception) + log.debug(f"{self.error_prefix} Error disconnecting: %s", exception) - def is_connected(self): - return self.messaging_service.is_connected() + def get_connection_status(self): + return self.connection_properties["status"] def send_message( self, @@ -318,4 +454,6 @@ def ack_message(self, broker_message): if "_original_message" in broker_message: self.persistent_receiver.ack(broker_message["_original_message"]) else: - log.warning("Cannot acknowledge message: original Solace message not found") + log.warning( + f"{self.error_prefix} Cannot acknowledge message: original Solace message not found" + ) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py new file mode 100644 index 00000000..09a2af5c --- /dev/null +++ b/src/solace_ai_connector/common/monitoring.py @@ -0,0 +1,207 @@ +from typing import Any, List +from enum import Enum +from threading import Lock + +from ..common.messaging.solace_messaging import ConnectionStatus + + +class Metrics(Enum): + SOLCLIENT_STATS_RX_SETTLE_ACCEPTED = "SOLCLIENT_STATS_RX_SETTLE_ACCEPTED" + SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS = ( + "SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS" + ) + + @staticmethod + def get_type(metric: "Metrics") -> str: + """ + Get the type of the metric. + + :param metric: Metric + :return: Type of the metric + """ + if metric in [ + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, + ]: + return "integer" + # Add more cases here if needed + return "unknown" + + +class Monitoring: + """ + A singleton class to collect and send metrics. + """ + + _instance = None + _initialized = False + _interval = 10 + + def __new__(cls, *args, **kwargs): + if not cls._instance: + cls._instance = super(Monitoring, cls).__new__(cls) + return cls._instance + + def __init__(self, config: dict[str, Any] = None) -> None: + """ + Initialize the MetricCollector with Datadog configuration. + + :param config: Configuration for Datadog + """ + + if self._initialized: + return + + self._initialized = True + self._collected_metrics = {} + self._connection_status = {} + self._lock = Lock() + self._initialize_metrics() + + def _initialize_metrics(self) -> None: + """ + Initialize the MetricCollector. + """ + self._required_metrics = [metric for metric in Metrics] + + def get_required_metrics(self) -> List[Metrics]: + """ + Get the required metrics for the MetricCollector. + + :return: List of required metrics + """ + return self._required_metrics + + def set_required_metrics(self, required_metrics: List[Metrics]) -> None: + """ + Set the required metrics for the MetricCollector. + + :param required_metrics: List of required metrics + """ + self._required_metrics = [metric for metric in required_metrics] + + def set_interval(self, interval: int) -> None: + """ + Set the interval for the MetricCollector. + + :param interval: Interval + """ + self._interval = interval + + def get_interval(self) -> int: + """ + Get the interval for the MetricCollector. + + :return: Interval + """ + return self._interval + + def set_connection_status(self, key, value: int) -> None: + """ + Set the connection status of the broker. + + :param key: Key + """ + self._connection_status[key] = value + + def get_connection_status(self) -> int: + """ + Get the connection status of the broker. + """ + started = True + # default status is disconnected + status = ConnectionStatus.DISCONNECTED + for _, value in self._connection_status.items(): + if started: + status = value + started = False + + # if a module is connecting, the status is connecting + if ( + status == ConnectionStatus.CONNECTED + and value == ConnectionStatus.RECONNECTING + ): + status = ConnectionStatus.RECONNECTING + + # if a module is disconnected, the status is disconnected + if value == ConnectionStatus.DISCONNECTED: + status = ConnectionStatus.DISCONNECTED + break + + return status + + def collect_metrics(self, metrics: dict[Metrics, dict[Metrics, Any]]) -> None: + """ + Collect metrics. + + :param metrics: Dictionary of metrics + """ + with self._lock: + for key, value in metrics.items(): + self._collected_metrics[key] = value + + def get_detailed_metrics(self) -> List[dict[str, Any]]: + """ + Retrieve collected metrics. + + :return: Dictionary of collected metrics + """ + return self._collected_metrics + + def get_aggregated_metrics( + self, required_metrics: List[Metrics] = None + ) -> List[dict[str, Any]]: + """ + Retrieve collected metrics. + + :return: Dictionary of collected metrics + """ + aggregated_metrics = {} + for key, value in self._collected_metrics.items(): + # get metric + metric = next(item[1] for item in key if item[0] == "metric") + + # skip metrics that are not required + if required_metrics and metric not in required_metrics: + continue + + # filter flow, flow_index, component, component_index from key + new_key = tuple( + item + for item in key + if item[0] + not in ["flow", "flow_index", "component_module", "component_index"] + ) + + if new_key not in aggregated_metrics: + aggregated_metrics[new_key] = value + else: + # aggregate metrics: sum + aggregated_timestamp = aggregated_metrics[new_key].timestamp + metric_value = value.value + metric_timestamp = value.timestamp + + if metric in [ + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, + ]: # add metrics that need to be aggregated by sum + aggregated_metrics[new_key].value += sum(metric_value) + + # set timestamp to the latest + if metric_timestamp > aggregated_timestamp: + aggregated_metrics[new_key].timestamp = metric_timestamp + + # convert to dictionary + formatted_metrics = [] + for key, value in aggregated_metrics.items(): + metric_dict = dict(key) + formatted_metrics.append( + { + "component": metric_dict.get("component"), + "metric": metric_dict.get("metric"), + "timestamp": value["timestamp"], + "value": value["value"], + } + ) + + return formatted_metrics diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 5e5341ab..09735536 100755 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -73,7 +73,7 @@ def get_subdirectories(path=None): def resolve_config_values(config, allow_source_expression=False): """Resolve any config module values in the config by processing 'invoke' entries""" - log.debug("Resolving config values in %s", config) + # log.debug("Resolving config values in %s", config) if not isinstance(config, (dict, list)): return config if isinstance(config, list): @@ -382,7 +382,7 @@ def decode_payload(payload, encoding, payload_format): ): payload = payload.decode("utf-8") elif encoding == "unicode_escape": - payload = payload.decode('unicode_escape') + payload = payload.decode("unicode_escape") if payload_format == "json": payload = json.loads(payload) diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py index d20da981..bfc358bc 100755 --- a/src/solace_ai_connector/components/__init__.py +++ b/src/solace_ai_connector/components/__init__.py @@ -16,7 +16,6 @@ delay, iterate, message_filter, - parser, ) from .general.for_testing import ( @@ -25,26 +24,7 @@ give_ack_output, ) -from .general.llm.langchain import ( - langchain_embeddings, - langchain_vector_store_delete, - langchain_chat_model, - langchain_chat_model_with_history, - langchain_vector_store_embedding_index, - langchain_vector_store_embedding_search, -) - -from .general.llm.litellm import ( - litellm_chat_model, - litellm_embeddings, - litellm_chat_model_with_history, -) - -from .general.websearch import ( - websearch_duckduckgo, - websearch_google, - websearch_bing -) +from .general.websearch import (websearch_duckduckgo, websearch_google, websearch_bing) # Also import the components from the submodules from .inputs_outputs.error_input import ErrorInput @@ -62,20 +42,6 @@ from .general.delay import Delay from .general.iterate import Iterate from .general.message_filter import MessageFilter -from .general.parser import Parser -from .general.llm.langchain.langchain_base import LangChainBase -from .general.llm.langchain.langchain_embeddings import LangChainEmbeddings -from .general.llm.langchain.langchain_vector_store_delete import LangChainVectorStoreDelete -from .general.llm.langchain.langchain_chat_model import LangChainChatModel -from .general.llm.langchain.langchain_chat_model_with_history import ( - LangChainChatModelWithHistory, -) -from .general.llm.langchain.langchain_vector_store_embedding_index import ( - LangChainVectorStoreEmbeddingsIndex, -) -from .general.llm.langchain.langchain_vector_store_embedding_search import ( - LangChainVectorStoreEmbeddingsSearch, -) from .general.websearch.websearch_duckduckgo import WebSearchDuckDuckGo from .general.websearch.websearch_google import WebSearchGoogle -from .general.websearch.websearch_bing import WebSearchBing \ No newline at end of file +from .general.websearch.websearch_bing import WebSearchBing diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 802252f3..4f456bbf 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -2,15 +2,20 @@ import queue import traceback import pprint +import time from abc import abstractmethod +from typing import Any from ..common.log import log from ..common.utils import resolve_config_values from ..common.utils import get_source_expression from ..transforms.transforms import Transforms from ..common.message import Message +from ..common.messaging.solace_messaging import ConnectionStatus from ..common.trace_message import TraceMessage from ..common.event import Event, EventType from ..flow.request_response_flow_controller import RequestResponseFlowController +from ..common.monitoring import Monitoring +from ..common.monitoring import Metrics DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -51,6 +56,7 @@ def __init__(self, module_info, **kwargs): self.stop_thread_event = threading.Event() self.current_message = None self.current_message_has_been_discarded = False + self.event_message_repeat_sleep_time = 1 self.log_identifier = f"[{self.instance_name}.{self.flow_name}.{self.name}] " @@ -59,24 +65,56 @@ def __init__(self, module_info, **kwargs): self.setup_communications() self.setup_broker_request_response() + self.monitoring = Monitoring() + + def grow_sleep_time(self): + if self.event_message_repeat_sleep_time < 60: + self.event_message_repeat_sleep_time *= 2 + + def reset_sleep_time(self): + self.event_message_repeat_sleep_time = 1 + def create_thread_and_run(self): - self.thread = threading.Thread(target=self.run) + self.thread = threading.Thread(target=self.run, daemon=True) self.thread.start() return self.thread def run(self): + # Start the micro monitoring thread + monitoring_thread = threading.Thread( + target=self.run_micro_monitoring, daemon=True + ) + connection_status_thread = threading.Thread( + target=self.run_connection_status_monitoring, daemon=True + ) + monitoring_thread.start() + connection_status_thread.start() + # Process events until the stop signal is set while not self.stop_signal.is_set(): event = None try: event = self.get_next_event() if event is not None: self.process_event_with_tracing(event) + self.reset_sleep_time() except AssertionError as e: - raise e + try: + self.stop_signal.wait(timeout=self.event_message_repeat_sleep_time) + except KeyboardInterrupt: + self.handle_component_error(e, event) + self.grow_sleep_time() + self.handle_component_error(e, event) except Exception as e: + try: + self.stop_signal.wait(timeout=self.event_message_repeat_sleep_time) + except KeyboardInterrupt: + self.handle_component_error(e, event) + self.grow_sleep_time() self.handle_component_error(e, event) self.stop_component() + monitoring_thread.join() + connection_status_thread.join() def process_event_with_tracing(self, event): if self.trace_queue: @@ -109,9 +147,7 @@ def get_next_event(self): timeout = self.queue_timeout_ms or DEFAULT_QUEUE_TIMEOUT_MS event = self.input_queue.get(timeout=timeout / 1000) log.debug( - "%sComponent received event %s from input queue", - self.log_identifier, - event, + "%sComponent received event from input queue", self.log_identifier ) return event except queue.Empty: @@ -161,9 +197,7 @@ def process_post_invoke(self, result, message): # Finally send the message to the next component - or if this is the last component, # the component will override send_message and do whatever it needs to do with the message - log.debug( - "%sSending message from %s: %s", self.log_identifier, self.name, message - ) + log.debug("%sSending message from %s", self.log_identifier, self.name) self.send_message(message) @abstractmethod @@ -455,3 +489,70 @@ def do_broker_request_response( raise ValueError( f"Broker request response controller not found for component {self.name}" ) + + def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: + metrics = {} + required_metrics = self.monitoring.get_required_metrics() + + pure_metrics = self.get_metrics() + for metric, value in pure_metrics.items(): + # filter metrics + if metric in required_metrics: + key = tuple( + [ + ("flow", self.flow_name), + ("flow_index", self.index), + ("component", self.name), + ("component_module", self.config.get("component_module")), + ("component_index", self.component_index), + ("metric", metric), + ] + ) + + value = {"value": value, "timestamp": int(time.time())} + + metrics[key] = value + return metrics + + def get_metrics(self) -> dict[Metrics, Any]: + return {} + + def get_connection_status(self) -> ConnectionStatus: + pass + + def run_connection_status_monitoring(self) -> None: + """ + Get connection status + """ + try: + if self.config.get("component_module") in {"broker_input", "broker_output"}: + while not self.stop_signal.is_set(): + key = tuple( + [ + ("flow", self.flow_name), + ("flow_index", self.index), + ("component", self.name), + ("component_index", self.component_index), + ] + ) + value = self.get_connection_status() + self.monitoring.set_connection_status(key, value) + # Wait 1 second for the next interval + self.stop_signal.wait(timeout=1) + except KeyboardInterrupt: + log.info("Monitoring connection status stopped.") + + def run_micro_monitoring(self) -> None: + """ + Start the metric collection process in a loop. + """ + try: + while not self.stop_signal.is_set(): + # Collect metrics + metrics = self.get_metrics_with_header() + self.monitoring.collect_metrics(metrics) + # Wait for the next interval + sleep_interval = self.monitoring.get_interval() + self.stop_signal.wait(timeout=sleep_interval) + except KeyboardInterrupt: + log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index 22bb2e02..f56a2fd8 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -65,7 +65,7 @@ def init_load_balancer(self): """initialize a load balancer""" try: self.router = litellm.Router(model_list=self.load_balancer) - log.debug("Load balancer initialized with models: %s", self.load_balancer) + log.debug("Litellm Load balancer was initialized") except Exception as e: raise ValueError(f"Error initializing load balancer: {e}") @@ -74,7 +74,7 @@ def load_balance(self, messages, stream): response = self.router.completion( model=self.load_balancer[0]["model_name"], messages=messages, stream=stream ) - log.debug("Load balancer response: %s", response) + log.debug("Load balancer responded") return response def invoke(self, message, data): diff --git a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py index 012d1dfe..3df74580 100644 --- a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py @@ -185,7 +185,7 @@ def invoke(self, message, data): if max_retries <= 0: raise e else: - time.sleep(1) + self.stop_signal.wait(timeout=1) def invoke_stream(self, client, message, messages): response_uuid = str(uuid.uuid4()) @@ -239,7 +239,7 @@ def invoke_stream(self, client, message, messages): raise e else: # Small delay before retrying - time.sleep(1) + self.stop_signal.wait(timeout=1) if self.stream_to_next_component: # Just return the last chunk diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 550b262f..9866e8a6 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -1,11 +1,13 @@ """Base class for broker input/output components for the Solace AI Event Connector""" import uuid +from typing import List from abc import abstractmethod from ..component_base import ComponentBase from ...common.message import Message +from ...common.messaging.solace_messaging import ConnectionStatus from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload @@ -86,29 +88,33 @@ def __init__(self, module_info, **kwargs): self.broker_properties = self.get_broker_properties() if self.broker_properties["broker_type"] not in ["test", "test_streaming"]: self.messaging_service = ( - MessagingServiceBuilder(self.flow_lock_manager, self.flow_kv_store) + MessagingServiceBuilder( + self.flow_lock_manager, + self.flow_kv_store, + self.name, + self.stop_signal, + ) .from_properties(self.broker_properties) .build() ) self.current_broker_message = None self.messages_to_ack = [] - self.connected = False + self.connected = ConnectionStatus.DISCONNECTED self.needs_acknowledgement = True - self.connection_repeat_sleep_time = 5 @abstractmethod def invoke(self, message, data): pass def connect(self): - if not self.connected: + if self.connected == ConnectionStatus.DISCONNECTED: self.messaging_service.connect() - self.connected = self.messaging_service.is_connected + self.connected = ConnectionStatus.CONNECTED def disconnect(self): - if self.connected: + if self.connected == ConnectionStatus.CONNECTED: self.messaging_service.disconnect() - self.connected = self.messaging_service.is_connected + self.connected = ConnectionStatus.DISCONNECTED def stop_component(self): self.disconnect() diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index a487d825..a45326f1 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -1,10 +1,13 @@ """Input broker component for the Solace AI Event Connector""" import copy +from solace.messaging.utils.manageable import ApiMetrics, Metric as SolaceMetrics from ...common.log import log from .broker_base import BrokerBase from .broker_base import base_info from ...common.message import Message +from ...common.monitoring import Metrics + info = copy.deepcopy(base_info) info.update( @@ -104,12 +107,7 @@ def get_next_message(self, timeout_ms=None): topic = broker_message.get("topic") user_properties = broker_message.get("user_properties", {}) - log.debug( - "Received message from broker: topic=%s, user_properties=%s, payload length=%d", - topic, - user_properties, - len(payload) if payload is not None else 0, - ) + log.debug("Received message from broker: topic=%s", topic) return Message(payload=payload, topic=topic, user_properties=user_properties) def acknowledge_message(self, broker_message): @@ -118,3 +116,19 @@ def acknowledge_message(self, broker_message): def get_acknowledgement_callback(self): current_broker_message = self.current_broker_message return lambda: self.acknowledge_message(current_broker_message) + + def get_connection_status(self): + return self.messaging_service.get_connection_status() + + def get_metrics(self): + required_metrics = [ + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, + ] + stats_dict = {} + metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() + for metric_key in required_metrics: + metric = SolaceMetrics(metric_key.value) + stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) + + return stats_dict diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_output.py b/src/solace_ai_connector/components/inputs_outputs/broker_output.py index dc43581f..2892a468 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_output.py @@ -109,12 +109,7 @@ def send_message(self, message: Message): log.info("Discarding message due to TTL expiration: %s", message) return - log.debug( - "Sending message to broker: topic=%s, user_properties=%s, payload=%s", - topic, - user_properties, - payload, - ) + log.debug("Sending message to broker: topic=%s", topic) user_context = None if self.propagate_acknowledgements: user_context = { @@ -134,3 +129,6 @@ def handle_message_ack_from_broker(self, context): message.call_acknowledgements() else: log.error("No message found in context for acknowledgement") + + def get_metrics(self): + return {} diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index f8888ad3..04d5a5da 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -287,10 +287,12 @@ def setup_test_pass_through(self): def start_response_thread(self): if self.test_mode: self.response_thread = threading.Thread( - target=self.handle_test_pass_through + target=self.handle_test_pass_through, daemon=True ) else: - self.response_thread = threading.Thread(target=self.handle_responses) + self.response_thread = threading.Thread( + target=self.handle_responses, daemon=True + ) self.response_thread.start() def handle_responses(self): @@ -332,7 +334,9 @@ def process_response(self, broker_message): return streaming_complete_expression = None - metadata_json = get_data_value(user_properties, self.user_properties_reply_metadata_key, True) + metadata_json = get_data_value( + user_properties, self.user_properties_reply_metadata_key, True + ) if not metadata_json: log.error("Received response without metadata: %s", payload) return @@ -390,8 +394,12 @@ def process_response(self, broker_message): ) else: # Remove the metadata and reply topic from the user properties - remove_data_value(response["user_properties"], self.user_properties_reply_metadata_key) - remove_data_value(response["user_properties"], self.user_properties_reply_topic_key) + remove_data_value( + response["user_properties"], self.user_properties_reply_metadata_key + ) + remove_data_value( + response["user_properties"], self.user_properties_reply_topic_key + ) message = Message( payload=payload, @@ -431,9 +439,7 @@ def invoke(self, message, data): metadata = {"request_id": request_id, "response_topic": topic} existing_metadata_json = get_data_value( - data["user_properties"], - self.user_properties_reply_metadata_key, - True + data["user_properties"], self.user_properties_reply_metadata_key, True ) if existing_metadata_json: try: @@ -454,7 +460,9 @@ def invoke(self, message, data): metadata = [metadata] set_data_value( - data["user_properties"], self.user_properties_reply_metadata_key, json.dumps(metadata) + data["user_properties"], + self.user_properties_reply_metadata_key, + json.dumps(metadata), ) set_data_value( data["user_properties"], self.user_properties_reply_topic_key, topic @@ -521,3 +529,7 @@ def cleanup(self): if self.response_thread: self.response_thread.join() super().cleanup() + + def get_metrics(self): + # override because it removes messaging_service from the BrokerBase + return {} diff --git a/src/solace_ai_connector/components/inputs_outputs/timer_input.py b/src/solace_ai_connector/components/inputs_outputs/timer_input.py index 756d44f3..38a52e58 100644 --- a/src/solace_ai_connector/components/inputs_outputs/timer_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/timer_input.py @@ -36,6 +36,7 @@ class TimerInput(ComponentBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.interval_ms = self.get_config("interval_ms") @@ -61,7 +62,7 @@ def get_next_message(self): else: # Sleep for the remaining time sleep_time = (self.interval_ms - delta_time) / 1000 - time.sleep(sleep_time) + self.stop_signal.wait(timeout=sleep_time) self.last_message_time = self.get_current_time() return Message(payload={}) diff --git a/src/solace_ai_connector/components/inputs_outputs/websocket_output.py b/src/solace_ai_connector/components/inputs_outputs/websocket_output.py index d7320645..7f69ac36 100644 --- a/src/solace_ai_connector/components/inputs_outputs/websocket_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/websocket_output.py @@ -30,6 +30,7 @@ class WebsocketOutput(WebsocketBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.payload_encoding = self.get_config("payload_encoding") @@ -38,7 +39,7 @@ def __init__(self, **kwargs): def run(self): if self.listen_port: - self.server_thread = threading.Thread(target=self.run_server) + self.server_thread = threading.Thread(target=self.run_server, daemon=True) self.server_thread.start() super().run() diff --git a/src/solace_ai_connector/flow/timer_manager.py b/src/solace_ai_connector/flow/timer_manager.py index 6b091a74..94a400f3 100644 --- a/src/solace_ai_connector/flow/timer_manager.py +++ b/src/solace_ai_connector/flow/timer_manager.py @@ -6,6 +6,7 @@ class Timer: + def __init__(self, expiration, interval, component, timer_id, payload=None): self.expiration = expiration self.interval = interval @@ -18,12 +19,13 @@ def __lt__(self, other): class TimerManager: + def __init__(self, stop_signal): self.timers = [] self.lock = threading.Lock() self.stop_signal = stop_signal self.event = threading.Event() - self.thread = threading.Thread(target=self.run) + self.thread = threading.Thread(target=self.run, daemon=True) self.thread.start() def add_timer(self, delay_ms, component, timer_id, interval_ms=None, payload=None): diff --git a/src/solace_ai_connector/main.py b/src/solace_ai_connector/main.py index 24ef1007..2cb96e75 100644 --- a/src/solace_ai_connector/main.py +++ b/src/solace_ai_connector/main.py @@ -2,7 +2,7 @@ import sys import re import yaml -import atexit +import signal from .solace_ai_connector import SolaceAiConnector @@ -110,18 +110,18 @@ def shutdown(): app.stop() app.cleanup() print("Solace AI Connector exited successfully!") - os._exit(0) + sys.exit(0) - atexit.register(shutdown) + signal.signal(signal.SIGINT, lambda s, f: shutdown()) + signal.signal(signal.SIGTERM, lambda s, f: shutdown()) # Start the application try: app.run() - except KeyboardInterrupt: - shutdown() - - try: app.wait_for_flows() + except Exception as e: + print(f"Error running Solace AI Connector: {e}", file=sys.stderr) + shutdown() except KeyboardInterrupt: shutdown() diff --git a/src/solace_ai_connector/services/cache_service.py b/src/solace_ai_connector/services/cache_service.py index 0b0ff3f0..135d40d9 100644 --- a/src/solace_ai_connector/services/cache_service.py +++ b/src/solace_ai_connector/services/cache_service.py @@ -11,6 +11,7 @@ class CacheStorageBackend(ABC): + @abstractmethod def get(self, key: str, include_meta=False) -> Any: pass @@ -35,6 +36,7 @@ def get_all(self) -> Dict[str, Tuple[Any, Optional[Dict], Optional[float]]]: class InMemoryStorage(CacheStorageBackend): + def __init__(self): self.store: Dict[str, Dict[str, Any]] = {} self.lock = Lock() @@ -97,6 +99,7 @@ class CacheItem(Base): class SQLAlchemyStorage(CacheStorageBackend): + def __init__(self, connection_string: str): self.engine = create_engine(connection_string) Base.metadata.create_all(self.engine) @@ -112,12 +115,16 @@ def get(self, key: str, include_meta=False) -> Any: session.delete(item) session.commit() return None - if include_meta: + if include_meta: return { "value": pickle.loads(item.value), - "metadata": pickle.loads(item.item_metadata) if item.item_metadata else None, + "metadata": pickle.loads(item.item_metadata) + if item.item_metadata + else None, "expiry": item.expiry, - "component": self._get_component_from_reference(item.component_reference), + "component": self._get_component_from_reference( + item.component_reference + ), } return pickle.loads(item.value), ( pickle.loads(item.item_metadata) if item.item_metadata else None @@ -191,12 +198,15 @@ def _get_component_from_reference(self, reference): class CacheService: + def __init__(self, storage_backend: CacheStorageBackend): self.storage = storage_backend self.next_expiry = None self.expiry_event = threading.Event() self.stop_event = threading.Event() - self.expiry_thread = threading.Thread(target=self._expiry_check_loop) + self.expiry_thread = threading.Thread( + target=self._expiry_check_loop, daemon=True + ) self.expiry_thread.start() self.lock = Lock() @@ -269,11 +279,12 @@ def _check_expirations(self): self.storage.delete(key) self.next_expiry = next_expiry - + for key, metadata, component, value in expired_keys: if component: event = Event( - EventType.CACHE_EXPIRY, {"key": key, "metadata": metadata, "expired_data": value} + EventType.CACHE_EXPIRY, + {"key": key, "metadata": metadata, "expired_data": value}, ) component.enqueue(event) diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 0153621f..1790ee80 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -12,6 +12,7 @@ from .flow.timer_manager import TimerManager from .common.event import Event, EventType from .services.cache_service import CacheService, create_storage_backend +from .common.monitoring import Monitoring class SolaceAiConnector: @@ -33,6 +34,7 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.instance_name = self.config.get("instance_name", "solace_ai_connector") self.timer_manager = TimerManager(self.stop_signal) self.cache_service = self.setup_cache_service() + self.monitoring = Monitoring(config) def run(self): """Run the Solace AI Event Connector""" @@ -107,24 +109,52 @@ def cleanup(self): """Clean up resources and ensure all threads are properly joined""" log.info("Cleaning up Solace AI Event Connector") for flow in self.flows: - flow.cleanup() + try: + flow.cleanup() + except Exception as e: + log.error(f"Error cleaning up flow: {e}") self.flows.clear() + + # Clean up queues + for queue_name, queue in self.flow_input_queues.items(): + try: + while not queue.empty(): + queue.get_nowait() + except Exception as e: + log.error(f"Error cleaning queue {queue_name}: {e}") + self.flow_input_queues.clear() + if hasattr(self, "trace_queue") and self.trace_queue: self.trace_queue.put(None) # Signal the trace thread to stop if self.trace_thread: self.trace_thread.join() if hasattr(self, "cache_check_thread"): self.cache_check_thread.join() + if hasattr(self, "error_queue"): + self.error_queue.put(None) + self.timer_manager.cleanup() + log.info("Cleanup completed") def setup_logging(self): """Setup logging""" + log_config = self.config.get("log", {}) stdout_log_level = log_config.get("stdout_log_level", "INFO") - log_file_level = log_config.get("log_file_level", "DEBUG") + log_file_level = log_config.get("log_file_level", "INFO") log_file = log_config.get("log_file", "solace_ai_connector.log") log_format = log_config.get("log_format", "pipe-delimited") - setup_log(log_file, stdout_log_level, log_file_level, log_format) + + # Get logback values + logback = log_config.get("logback", {}) + + setup_log( + log_file, + stdout_log_level, + log_file_level, + log_format, + logback, + ) def setup_trace(self): """Setup trace""" @@ -136,7 +166,7 @@ def setup_trace(self): self.trace_queue = queue.Queue() # Start a new thread to handle trace messages self.trace_thread = threading.Thread( - target=self.handle_trace, args=(trace_file,) + target=self.handle_trace, args=(trace_file,), daemon=True ) self.trace_thread.start() @@ -217,7 +247,10 @@ def stop(self): """Stop the Solace AI Event Connector""" log.info("Stopping Solace AI Event Connector") self.stop_signal.set() + + # Stop core services first self.timer_manager.stop() # Stop the timer manager first self.cache_service.stop() # Stop the cache service + if self.trace_thread: self.trace_thread.join() From 5eacb8270ca2cc23a6b343256a4f4d80e24651de Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Fri, 24 Jan 2025 15:34:26 -0500 Subject: [PATCH 15/19] DATAGO-90835:add nack (#83) * feat: add monitring component * fix: resolve a bug * fix: add sleep time * fix: add sleep time * feat: add readiness and handle excessive logs * fix: handle sleep error * fix: handle sleep error * feat: gracefully exit * feat: set the log back * fix: rename log fields * fix: disabled monitoring * fix: resolve log naming * fix: resolved logging issues * fix: resolve log * fix: resolve log * feat: remove dependency to Langchain * feat: update monitoring * feat: drop error messages when the queue is full * feat: add a text splitter component * feat: updated docs * fix: resolve graceful termination issues * fix: remove payloads from logs * feat: add the forever retry * feat: keep connecting * Feat: add monitoring * feat: replace the reconnection * feat: refactor monitoring * feat: add connection metric * convert connection to async * get metrics enum * add types of metrics * use metrics rather than metric values * fix bug * update type * convert monitoring output to dictionary * fix bug * feat: add connection status * feat: add reconnecting status * feat: add reconnecting log and handled signals * fix: update status * fix: update log * fix: fix bug * fix: fix bug * fix: resolve connection logs * fix: handle threads * fix: update connection state machine * feat: add prefix to the broker logs * fix: synchronize logs with connection attempts * fix: remove datadog dependency * fix: cover an exception * ref: upgrade to latest pubsub and replace a metric * feat: add retry and timeout to litellm * feat: add nack * fix: replace exception with exception type * fix: remove useless exceptions * Create pull_request_template.md * fix: update the default nack * ref: replace nack string status with enumerations * ref: generate docs * ref: remove default value * ref: move common imports to a module * ref: update imports * ref: update import --- .github/pull_request_template.md | 5 + docs/components/litellm_chat_model.md | 10 +- .../litellm_chat_model_with_history.md | 10 +- docs/components/litellm_embeddings.md | 10 +- docs/components/mongo_insert.md | 2 +- examples/llm/litellm_chat.yaml | 14 +++ src/solace_ai_connector/common/__init__.py | 3 +- src/solace_ai_connector/common/message.py | 16 ++- .../common/messaging/dev_broker_messaging.py | 17 ++++ .../common/messaging/solace_messaging.py | 25 +++++ .../components/component_base.py | 39 +++++++- .../general/llm/litellm/litellm_base.py | 99 ++++++++++++++++++- .../llm/litellm/litellm_chat_model_base.py | 93 ++++++++--------- .../components/inputs_outputs/broker_base.py | 8 ++ .../components/inputs_outputs/broker_input.py | 33 ++++++- 15 files changed, 310 insertions(+), 74 deletions(-) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..f860b828 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,5 @@ +### What is the purpose of this change? + +### How is this accomplished? + +### Anything reviews should focus on/be aware of? diff --git a/docs/components/litellm_chat_model.md b/docs/components/litellm_chat_model.md index e617a772..2af7e92d 100644 --- a/docs/components/litellm_chat_model.md +++ b/docs/components/litellm_chat_model.md @@ -12,14 +12,15 @@ component_config: embedding_params: temperature: set_response_uuid_in_user_properties: + timeout: + retry_policy: + allowed_fails_policy: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: history_max_turns: history_max_time: - history_max_turns: - history_max_time: stream_to_flow: stream_to_next_component: llm_mode: @@ -32,14 +33,15 @@ component_config: | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | +| timeout | False | 60 | Request timeout in seconds | +| retry_policy | False | | Retry policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | +| allowed_fails_policy | False | | Allowed fails policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | | llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | -| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | -| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | | llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | diff --git a/docs/components/litellm_chat_model_with_history.md b/docs/components/litellm_chat_model_with_history.md index 67ca587b..16706cd0 100644 --- a/docs/components/litellm_chat_model_with_history.md +++ b/docs/components/litellm_chat_model_with_history.md @@ -12,14 +12,15 @@ component_config: embedding_params: temperature: set_response_uuid_in_user_properties: + timeout: + retry_policy: + allowed_fails_policy: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: history_max_turns: history_max_time: - history_max_turns: - history_max_time: ``` | Parameter | Required | Default | Description | @@ -28,14 +29,15 @@ component_config: | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | +| timeout | False | 60 | Request timeout in seconds | +| retry_policy | False | | Retry policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | +| allowed_fails_policy | False | | Allowed fails policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | | llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | -| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | -| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | ## Component Input Schema diff --git a/docs/components/litellm_embeddings.md b/docs/components/litellm_embeddings.md index 4e3e739e..7e59c5ca 100644 --- a/docs/components/litellm_embeddings.md +++ b/docs/components/litellm_embeddings.md @@ -12,14 +12,15 @@ component_config: embedding_params: temperature: set_response_uuid_in_user_properties: + timeout: + retry_policy: + allowed_fails_policy: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: history_max_turns: history_max_time: - history_max_turns: - history_max_time: stream_to_flow: stream_to_next_component: llm_mode: @@ -32,14 +33,15 @@ component_config: | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | +| timeout | False | 60 | Request timeout in seconds | +| retry_policy | False | | Retry policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | +| allowed_fails_policy | False | | Allowed fails policy for the load balancer. Find more at https://docs.litellm.ai/docs/routing#cooldowns | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | | llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | -| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | -| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | | llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | diff --git a/docs/components/mongo_insert.md b/docs/components/mongo_insert.md index 31deec67..5b47e2b4 100644 --- a/docs/components/mongo_insert.md +++ b/docs/components/mongo_insert.md @@ -25,7 +25,7 @@ component_config: | database_password | False | | MongoDB password | | database_name | True | | Database name | | database_collection | False | | Collection name - if not provided, all collections will be used | -| data_types | False | | An array of key value pairs to specify the data types for each field in the data. Used for non-JSON types like Date. Supports nested dotted names | +| data_types | False | | Key value pairs to specify the data types for each field in the data. Used for non-JSON types like Date. Supports nested dotted names | ## Component Input Schema diff --git a/examples/llm/litellm_chat.yaml b/examples/llm/litellm_chat.yaml index 428a55aa..03eb6f1a 100644 --- a/examples/llm/litellm_chat.yaml +++ b/examples/llm/litellm_chat.yaml @@ -78,6 +78,20 @@ flows: component_module: litellm_chat_model component_config: llm_mode: none # options: none or stream + retry_policy: # retry the request per error type + ContentPolicyViolationErrorRetries: 1 + AuthenticationErrorRetries: 1 + BadRequestErrorRetries: 1 + TimeoutErrorRetries: 1 + RateLimitErrorRetries: 1 + InternalServerErrorRetries: 1 + allowed_fails_policy: # allow X failures per minute before cooling down + ContentPolicyViolationErrorAllowedFails: 1000 + RateLimitErrorAllowedFails: 1000 + AuthenticationErrorAllowedFails: 1000 + TimeoutErrorAllowedFails: 1000 + InternalServerErrorAllowedFails: 1000 + timeout: 10 # in second load_balancer: - model_name: "gpt-4o" # model alias litellm_params: diff --git a/src/solace_ai_connector/common/__init__.py b/src/solace_ai_connector/common/__init__.py index 1ae4a76e..252c795e 100644 --- a/src/solace_ai_connector/common/__init__.py +++ b/src/solace_ai_connector/common/__init__.py @@ -1 +1,2 @@ -# Directory for all common code \ No newline at end of file +# Directory for all common code +from solace.messaging.config.message_acknowledgement_configuration import Outcome as Message_NACK_Outcome diff --git a/src/solace_ai_connector/common/message.py b/src/solace_ai_connector/common/message.py index 45c1ce30..ac79138b 100644 --- a/src/solace_ai_connector/common/message.py +++ b/src/solace_ai_connector/common/message.py @@ -5,17 +5,20 @@ import yaml import pprint - from .log import log from .trace_message import TraceMessage from .utils import set_data_value, get_data_value +from ..common import Message_NACK_Outcome + class Message: + def __init__(self, payload=None, topic=None, user_properties=None): self.payload = payload self.topic = topic self.user_properties = user_properties or {} self.ack_callbacks = [] + self.nack_callbacks = [] self.topic_delimiter = "/" self.private_data = {} self.iteration_data = {} @@ -274,6 +277,9 @@ def get_previous(self): def add_acknowledgement(self, callback): self.ack_callbacks.append(callback) + def add_negative_acknowledgements(self, callback): + self.nack_callbacks.append(callback) + def call_acknowledgements(self): """Call all the ack callbacks. This is used to notify the previous components that the message has been acknowledged.""" @@ -282,6 +288,14 @@ def call_acknowledgements(self): for callback in ack_callbacks: callback() + def call_negative_acknowledgements(self, nack=Message_NACK_Outcome.REJECTED): + """Call all the ack callbacks. This is used to notify the previous components that the + message has been acknowledged.""" + nack_callbacks = self.nack_callbacks + self.nack_callbacks = [] + for callback in nack_callbacks: + callback(nack) + def set_topic_delimiter(self, topic_delimiter): self.topic_delimiter = topic_delimiter diff --git a/src/solace_ai_connector/common/messaging/dev_broker_messaging.py b/src/solace_ai_connector/common/messaging/dev_broker_messaging.py index 0d10cd6b..61811eb4 100644 --- a/src/solace_ai_connector/common/messaging/dev_broker_messaging.py +++ b/src/solace_ai_connector/common/messaging/dev_broker_messaging.py @@ -5,10 +5,13 @@ import queue import re from copy import deepcopy + from .messaging import Messaging +from ...common import Message_NACK_Outcome class DevBroker(Messaging): + def __init__(self, broker_properties: dict, flow_lock_manager, flow_kv_store): super().__init__(broker_properties) self.flow_lock_manager = flow_lock_manager @@ -87,6 +90,20 @@ def subscribe(self, subscription: str, queue_name: str): def ack_message(self, message): pass + def nack_message(self, broker_message, outcome: Message_NACK_Outcome): + """ + This method handles the negative acknowledgment (nack) of a broker message. + If the broker message contains an "_original_message" key, it settles the message + with the given outcome using the persistent receiver. If the "_original_message" + key is not found, it logs a warning indicating that the original Solace message + could not be found and therefore cannot be dropped. + + Args: + broker_message (dict): The broker message to be nacked. + outcome (Message_NACK_Outcome): The outcome to be used for settling the message. + """ + pass + def _get_matching_queue_names(self, topic: str) -> List[str]: matching_queue_names = [] with self.subscriptions_lock: diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 68bfb821..eb0deca0 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -34,6 +34,7 @@ from .messaging import Messaging from ..log import log +from ...common import Message_NACK_Outcome class ConnectionStatus(Enum): @@ -362,6 +363,9 @@ def bind_to_queue( .with_missing_resources_creation_strategy( MissingResourcesCreationStrategy.CREATE_ON_START ) + .with_required_message_outcome_support( + Message_NACK_Outcome.FAILED, Message_NACK_Outcome.REJECTED + ) .build(queue) ) self.persistent_receiver.start() @@ -457,3 +461,24 @@ def ack_message(self, broker_message): log.warning( f"{self.error_prefix} Cannot acknowledge message: original Solace message not found" ) + + def nack_message(self, broker_message, outcome: Message_NACK_Outcome): + """ + This method handles the negative acknowledgment (nack) of a broker message. + If the broker message contains an "_original_message" key, it settles the message + with the given outcome using the persistent receiver. If the "_original_message" + key is not found, it logs a warning indicating that the original Solace message + could not be found and therefore cannot be dropped. + + Args: + broker_message (dict): The broker message to be nacked. + outcome (Message_NACK_Outcome): The outcome to be used for settling the message. + """ + if "_original_message" in broker_message: + self.persistent_receiver.settle( + broker_message["_original_message"], outcome + ) + else: + log.warning( + f"{self.error_prefix} Cannot drop message: original Solace message not found" + ) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 4f456bbf..a90c727e 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -5,6 +5,7 @@ import time from abc import abstractmethod from typing import Any + from ..common.log import log from ..common.utils import resolve_config_values from ..common.utils import get_source_expression @@ -16,6 +17,7 @@ from ..flow.request_response_flow_controller import RequestResponseFlowController from ..common.monitoring import Monitoring from ..common.monitoring import Metrics +from ..common import Message_NACK_Outcome DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -167,7 +169,14 @@ def process_event(self, event): self.trace_data(data) self.current_message_has_been_discarded = False - result = self.invoke(message, data) + try: + result = self.invoke(message, data) + except Exception as e: + self.current_message = None + self.handle_negative_acknowledgements(message, e) + raise e + finally: + self.current_message = None if self.current_message_has_been_discarded: message.call_acknowledgements() @@ -184,6 +193,11 @@ def process_event(self, event): ) def process_pre_invoke(self, message): + # add nack callback to the message + callback = self.get_negative_acknowledgement_callback() # pylint: disable=assignment-from-none + if callback is not None: + message.add_negative_acknowledgements(callback) + self.apply_input_transforms(message) return self.get_input_data(message) @@ -490,6 +504,29 @@ def do_broker_request_response( f"Broker request response controller not found for component {self.name}" ) + def handle_negative_acknowledgements(self, message, exception): + """Handle NACK for the message.""" + log.error( + "%sComponent failed to process message: %s\n%s", + self.log_identifier, + exception, + traceback.format_exc(), + ) + nack = self.nack_reaction_to_exception(type(exception)) + message.call_negative_acknowledgements(nack) + self.handle_error(exception, Event(EventType.MESSAGE, message)) + + @abstractmethod + def get_negative_acknowledgement_callback(self): + """This should be overridden by the component if it needs to NACK messages.""" + return None + + @abstractmethod + def nack_reaction_to_exception(self, exception_type): + """This should be overridden by the component if it needs to determine + NACK reaction regarding the exception type.""" + return Message_NACK_Outcome.REJECTED + def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: metrics = {} required_metrics = self.monitoring.get_required_metrics() diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index f56a2fd8..974aac0d 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -2,8 +2,13 @@ import litellm +from litellm.exceptions import APIConnectionError +from litellm.router import RetryPolicy +from litellm.router import AllowedFailsPolicy + from ....component_base import ComponentBase from .....common.log import log +from .....common import Message_NACK_Outcome litellm_info_base = { "class_name": "LiteLLMChatModelBase", @@ -42,6 +47,28 @@ "default": False, "type": "boolean", }, + { + "name": "timeout", + "required": False, + "description": "Request timeout in seconds", + "default": 60, + }, + { + "name": "retry_policy", + "required": False, + "description": ( + "Retry policy for the load balancer. " + "Find more at https://docs.litellm.ai/docs/routing#cooldowns" + ), + }, + { + "name": "allowed_fails_policy", + "required": False, + "description": ( + "Allowed fails policy for the load balancer. " + "Find more at https://docs.litellm.ai/docs/routing#cooldowns" + ), + }, ], } @@ -55,7 +82,10 @@ def __init__(self, module_info, **kwargs): def init(self): litellm.suppress_debug_info = True - self.load_balancer = self.get_config("load_balancer") + self.timeout = self.get_config("timeout") + self.retry_policy_config = self.get_config("retry_policy") + self.allowed_fails_policy_config = self.get_config("allowed_fails_policy") + self.load_balancer_config = self.get_config("load_balancer") self.set_response_uuid_in_user_properties = self.get_config( "set_response_uuid_in_user_properties" ) @@ -64,7 +94,61 @@ def init(self): def init_load_balancer(self): """initialize a load balancer""" try: - self.router = litellm.Router(model_list=self.load_balancer) + + if self.retry_policy_config: + retry_policy = RetryPolicy( + ContentPolicyViolationErrorRetries=self.retry_policy_config.get( + "ContentPolicyViolationErrorRetries", None + ), + AuthenticationErrorRetries=self.retry_policy_config.get( + "AuthenticationErrorRetries", None + ), + BadRequestErrorRetries=self.retry_policy_config.get( + "BadRequestErrorRetries", None + ), + TimeoutErrorRetries=self.retry_policy_config.get( + "TimeoutErrorRetries", None + ), + RateLimitErrorRetries=self.retry_policy_config.get( + "RateLimitErrorRetries", None + ), + InternalServerErrorRetries=self.retry_policy_config.get( + "InternalServerErrorRetries", None + ), + ) + else: + retry_policy = RetryPolicy() + + if self.allowed_fails_policy_config: + allowed_fails_policy = AllowedFailsPolicy( + ContentPolicyViolationErrorAllowedFails=self.allowed_fails_policy_config.get( + "ContentPolicyViolationErrorAllowedFails", None + ), + RateLimitErrorAllowedFails=self.allowed_fails_policy_config.get( + "RateLimitErrorAllowedFails", None + ), + BadRequestErrorAllowedFails=self.allowed_fails_policy_config.get( + "BadRequestErrorAllowedFails", None + ), + AuthenticationErrorAllowedFails=self.allowed_fails_policy_config.get( + "AuthenticationErrorAllowedFails", None + ), + TimeoutErrorAllowedFails=self.allowed_fails_policy_config.get( + "TimeoutErrorAllowedFails", None + ), + InternalServerErrorAllowedFails=self.allowed_fails_policy_config.get( + "InternalServerErrorAllowedFails", None + ), + ) + else: + allowed_fails_policy = AllowedFailsPolicy() + + self.router = litellm.Router( + model_list=self.load_balancer_config, + retry_policy=retry_policy, + allowed_fails_policy=allowed_fails_policy, + timeout=self.timeout, + ) log.debug("Litellm Load balancer was initialized") except Exception as e: raise ValueError(f"Error initializing load balancer: {e}") @@ -72,7 +156,9 @@ def init_load_balancer(self): def load_balance(self, messages, stream): """load balance the messages""" response = self.router.completion( - model=self.load_balancer[0]["model_name"], messages=messages, stream=stream + model=self.load_balancer_config[0]["model_name"], + messages=messages, + stream=stream, ) log.debug("Load balancer responded") return response @@ -80,3 +166,10 @@ def load_balance(self, messages, stream): def invoke(self, message, data): """invoke the model""" pass + + def nack_reaction_to_exception(self, exception_type): + """get the nack reaction to an exception""" + if exception_type in {APIConnectionError}: + return Message_NACK_Outcome.FAILED + else: + return Message_NACK_Outcome.REJECTED diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py index 24358bde..2341f18b 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_chat_model_base.py @@ -1,8 +1,8 @@ """LiteLLM chat model component""" -import time import uuid -from .litellm_base import LiteLLMBase, litellm_info_base +from .litellm_base import LiteLLMBase +from .litellm_base import litellm_info_base from .....common.message import Message from .....common.log import log @@ -133,18 +133,12 @@ def invoke(self, message, data): def invoke_non_stream(self, messages): """invoke the model without streaming""" - max_retries = 3 - while max_retries > 0: - try: - response = self.load_balance(messages, stream=False) - return {"content": response.choices[0].message.content} - except Exception as e: - log.error("Error invoking LiteLLM: %s", e) - max_retries -= 1 - if max_retries <= 0: - raise e - else: - time.sleep(1) + try: + response = self.load_balance(messages, stream=False) + return {"content": response.choices[0].message.content} + except Exception as e: + log.error("Error invoking LiteLLM: %s", e) + raise e def invoke_stream(self, message, messages): """invoke the model with streaming""" @@ -156,47 +150,38 @@ def invoke_stream(self, message, messages): current_batch = "" first_chunk = True - max_retries = 3 - while max_retries > 0: - try: - response = self.load_balance(messages, stream=True) + try: + response = self.load_balance(messages, stream=True) - for chunk in response: - # If we get any response, then don't retry - max_retries = 0 - if chunk.choices[0].delta.content is not None: - content = chunk.choices[0].delta.content - aggregate_result += content - current_batch += content - if len(current_batch.split()) >= self.stream_batch_size: - if self.stream_to_flow: - self.send_streaming_message( - message, - current_batch, - aggregate_result, - response_uuid, - first_chunk, - False, - ) - elif self.stream_to_next_component: - self.send_to_next_component( - message, - current_batch, - aggregate_result, - response_uuid, - first_chunk, - False, - ) - current_batch = "" - first_chunk = False - except Exception as e: - log.error("Error invoking LiteLLM: %s", e) - max_retries -= 1 - if max_retries <= 0: - raise e - else: - # Small delay before retrying - time.sleep(1) + for chunk in response: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + aggregate_result += content + current_batch += content + if len(current_batch.split()) >= self.stream_batch_size: + if self.stream_to_flow: + self.send_streaming_message( + message, + current_batch, + aggregate_result, + response_uuid, + first_chunk, + False, + ) + elif self.stream_to_next_component: + self.send_to_next_component( + message, + current_batch, + aggregate_result, + response_uuid, + first_chunk, + False, + ) + current_batch = "" + first_chunk = False + except Exception as e: + log.error("Error invoking LiteLLM: %s", e) + raise e if self.stream_to_next_component: # Just return the last chunk diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 9866e8a6..bccb5c37 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -141,6 +141,9 @@ def get_egress_user_properties(self, message: Message): def acknowledge_message(self, broker_message): pass + def negative_acknowledge_message(self, broker_message, nack): + pass + def get_broker_properties(self): broker_properties = { "broker_type": self.get_config("broker_type"), @@ -162,6 +165,11 @@ def get_broker_properties(self): def get_acknowledgement_callback(self): pass + @abstractmethod + def get_negative_acknowledgement_callback(self): + """Base method for getting NACK callback""" + return None + def start(self): pass diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index a45326f1..7934d15a 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -2,11 +2,13 @@ import copy from solace.messaging.utils.manageable import ApiMetrics, Metric as SolaceMetrics -from ...common.log import log + from .broker_base import BrokerBase from .broker_base import base_info +from ...common.log import log from ...common.message import Message from ...common.monitoring import Metrics +from ...common import Message_NACK_Outcome info = copy.deepcopy(base_info) @@ -113,10 +115,39 @@ def get_next_message(self, timeout_ms=None): def acknowledge_message(self, broker_message): self.messaging_service.ack_message(broker_message) + def negative_acknowledge_message( + self, broker_message, nack=Message_NACK_Outcome.REJECTED + ): + """ + Negative acknowledge a message + Args: + broker_message: The message to NACK + nack: The type of NACK to send (FAILED or REJECTED) + """ + if nack == Message_NACK_Outcome.FAILED: + self.messaging_service.nack_message( + broker_message, Message_NACK_Outcome.FAILED + ) + else: + self.messaging_service.nack_message( + broker_message, Message_NACK_Outcome.REJECTED + ) + def get_acknowledgement_callback(self): current_broker_message = self.current_broker_message return lambda: self.acknowledge_message(current_broker_message) + def get_negative_acknowledgement_callback(self): + """ + Get a callback function for negative acknowledgement + """ + current_broker_message = self.current_broker_message + + def callback(nack): + return self.negative_acknowledge_message(current_broker_message, nack) + + return callback + def get_connection_status(self): return self.messaging_service.get_connection_status() From d95f53b8af650f114682d0a105d471e48691e2e5 Mon Sep 17 00:00:00 2001 From: Greg Meldrum Date: Tue, 28 Jan 2025 09:29:09 -0500 Subject: [PATCH 16/19] Add missing functionality to dev broker that was causing error logs --- .../common/messaging/dev_broker_messaging.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/solace_ai_connector/common/messaging/dev_broker_messaging.py b/src/solace_ai_connector/common/messaging/dev_broker_messaging.py index 61811eb4..40d69a46 100644 --- a/src/solace_ai_connector/common/messaging/dev_broker_messaging.py +++ b/src/solace_ai_connector/common/messaging/dev_broker_messaging.py @@ -5,10 +5,24 @@ import queue import re from copy import deepcopy +from enum import Enum from .messaging import Messaging from ...common import Message_NACK_Outcome +class DevConnectionStatus(Enum): + CONNECTED = "CONNECTED" + DISCONNECTED = "DISCONNECTED" + +class DevMetricValue: + def get_value(self, metric_name): + # Return 0 for all metrics + return 0 + +class DevMessagingService: + + def metrics(self): + return DevMetricValue() class DevBroker(Messaging): @@ -17,6 +31,7 @@ def __init__(self, broker_properties: dict, flow_lock_manager, flow_kv_store): self.flow_lock_manager = flow_lock_manager self.flow_kv_store = flow_kv_store self.connected = False + self.messaging_service = DevMessagingService() self.subscriptions_lock = self.flow_lock_manager.get_lock("subscriptions") with self.subscriptions_lock: self.subscriptions = self.flow_kv_store.get("dev_broker:subscriptions") @@ -40,6 +55,9 @@ def connect(self): def disconnect(self): self.connected = False + def get_connection_status(self): + return DevConnectionStatus.CONNECTED if self.connected else DevConnectionStatus.DISCONNECTED + def receive_message(self, timeout_ms, queue_name: str): if not self.connected: raise RuntimeError("DevBroker is not connected") From 15924de20d0c5997f02a4f0a113900239c9e86c2 Mon Sep 17 00:00:00 2001 From: Ali Parvizi <91437594+alimosaed@users.noreply.github.com> Date: Sun, 2 Feb 2025 21:10:40 -0500 Subject: [PATCH 17/19] fix: remove security vulnerabilities (#87) --- examples/websocket/websocket_example_app.html | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/websocket/websocket_example_app.html b/examples/websocket/websocket_example_app.html index 02d54dbc..3331e94d 100644 --- a/examples/websocket/websocket_example_app.html +++ b/examples/websocket/websocket_example_app.html @@ -177,7 +177,7 @@

Received Messages

- +