diff --git a/app/jobs/get_next_ai_message_job.rb b/app/jobs/get_next_ai_message_job.rb index 076d4274..8ad4aeb2 100644 --- a/app/jobs/get_next_ai_message_job.rb +++ b/app/jobs/get_next_ai_message_job.rb @@ -1,3 +1,4 @@ +require "open-uri" include ActionView::RecordIdentifier require "nokogiri/xml/node" class ::Gemini::Errors::ConfigurationError < ::Gemini::Errors::GeminiError; end @@ -204,6 +205,7 @@ def call_tools_before_wrapping_up end index = @message.index + url_of_dalle_generated_image = nil msgs.each do |tool_message| # one message for each tool executed @conversation.messages.create!( assistant: @assistant, @@ -215,6 +217,13 @@ def call_tools_before_wrapping_up index: index += 1, processed_at: Time.current, ) + + parsed = JSON.parse(tool_message[:content]) rescue nil + + if parsed.is_a?(Hash) && parsed.has_key?("url_of_dalle_generated_image") + url_of_dalle_generated_image = parsed["url_of_dalle_generated_image"] + end + end assistant_reply = @conversation.messages.create!( @@ -225,6 +234,12 @@ def call_tools_before_wrapping_up index: index += 1 ) + unless url_of_dalle_generated_image.nil? + d = Document.new + d.file.attach(io: URI.open(url_of_dalle_generated_image), filename: "image.png") + assistant_reply.documents << d + end + GetNextAIMessageJob.perform_later( @user.id, assistant_reply.id, diff --git a/app/services/toolbox.rb b/app/services/toolbox.rb index c1617d6d..35ea76eb 100644 --- a/app/services/toolbox.rb +++ b/app/services/toolbox.rb @@ -6,6 +6,7 @@ def self.descendants [ test_env && Toolbox::HelloWorld, Toolbox::OpenMeteo, + Toolbox::Dalle, Toolbox::Memory, Toolbox::GoogleSearch, gmail_active && Toolbox::Gmail, diff --git a/app/services/toolbox/dalle.rb b/app/services/toolbox/dalle.rb new file mode 100644 index 00000000..8cf85c3d --- /dev/null +++ b/app/services/toolbox/dalle.rb @@ -0,0 +1,33 @@ +class Toolbox::Dalle < Toolbox + + describe :generate_an_image, <<~S + Generate an image based on what the user asks you to generate. You will pass the user's prompt and will get back a URL to an image. + S + + def generate_an_image(image_generation_prompt_s:) + response = client.images.generate( + parameters: { + prompt: image_generation_prompt_s, + model: "dall-e-3", + size: "1024x1792", + quality: "standard" + } + ) + + dalle_url = response.dig("data", 0, "url") + + { + prompt_given: image_generation_prompt_s, + url_of_dalle_generated_image: dalle_url, + note_to_assistant: "The image at the URL is already being shown on screen so reply with a nice message confirming the image has been generated, maybe re-describing it, but don't include the link to it." + } + end + + private + + def client + OpenAI::Client.new( + access_token: Current.message.assistant.api_service.effective_token + ) + end +end diff --git a/test/fixtures/conversations.yml b/test/fixtures/conversations.yml index fdaabe64..0f3b083a 100644 --- a/test/fixtures/conversations.yml +++ b/test/fixtures/conversations.yml @@ -86,6 +86,12 @@ weather: title: Weather last_assistant_message: weather_explained +image_generation: + user: keith + assistant: samantha + title: Generating an image + last_assistant_message: image_generation_explained + memorize: user: keith assistant: samantha diff --git a/test/fixtures/messages.yml b/test/fixtures/messages.yml index d442f24d..c7e09258 100644 --- a/test/fixtures/messages.yml +++ b/test/fixtures/messages.yml @@ -679,6 +679,45 @@ weather_explained: version: 1 +image_generation_tool_call: + assistant: samantha + conversation: image_generation + role: assistant + tool_call_id: + content_text: + content_tool_calls: '[{"index": 0, "id": "def456", "type": "function", "function": {"name": "generate_an_image", "arguments": {"image_generation_prompt_s": "World"}}}]' + content_document: + created_at: 2024-08-25 1:01:00 + processed_at: 2024-08-25 1:01:00 + index: 1 + version: 1 + +image_generation_tool_result: + assistant: samantha + conversation: image_generation + role: tool + tool_call_id: def456 + content_text: weather is + content_tool_calls: + content_document: + created_at: 2024-08-25 1:02:00 + processed_at: 2024-08-25 1:02:00 + index: 2 + version: 1 + +image_generation_explained: + assistant: samantha + conversation: image_generation + role: assistant + tool_call_id: + content_text: The weather in Austin is + content_tool_calls: + content_document: + created_at: 2024-08-25 1:03:00 + processed_at: 2024-08-25 1:03:00 + index: 3 + version: 1 + # Next conversation trees_explained: diff --git a/test/jobs/get_next_ai_message_job_openai_test.rb b/test/jobs/get_next_ai_message_job_openai_test.rb index 890e330d..9d9e8f27 100644 --- a/test/jobs/get_next_ai_message_job_openai_test.rb +++ b/test/jobs/get_next_ai_message_job_openai_test.rb @@ -56,6 +56,66 @@ class GetNextAIMessageJobOpenaiTest < ActiveJob::TestCase refute second_new_message.finished?, "This message SHOULD NOT be considered finished yet" end + test "properly handles a tool response call from the assistant when images are included" do + @image_generation = conversations(:image_generation) + @image_generation.messages.create! role: :user, content_text: "Generate an image", assistant: @image_generation.assistant + @image_generation_message = @image_generation.latest_message_for_version(:latest) + + @image_generation.assistant.language_model.update!(supports_tools: true) + + image_generation_prompt = "Kitten" + + response = { + data: [{ + url: "https://example.com/image.jpg" + }] + } + + images_mock = Minitest::Mock.new + images_mock.expect :generate, response, parameters: { + prompt: image_generation_prompt, + model: "dall-e-3", + size: "1024x1792", + quality: "standard" + } + + assert_difference "@image_generation.messages.reload.length", 2 do + OpenAI::Client.stub_any_instance :images, images_mock do + TestClient::OpenAI.stub :function, "dalle_generate_an_image" do + TestClient::OpenAI.stub :arguments, { :image_generation_prompt=>image_generation_prompt } do + assert GetNextAIMessageJob.perform_now(@user.id, @image_generation_message.id, @image_generation.assistant.id) + end + end + end + end + + @image_generation_message.reload + assert @image_generation_message.content_text.blank? + assert @image_generation_message.tool_call_id.nil? + assert @image_generation_message.content_tool_calls.present?, "Assistant should have decided to call a tool" + + @new_messages = @image_generation.messages.where("id > ?", @image_generation_message.id).order(:created_at) + + # first + first_new_message = @new_messages.first + assert first_new_message.tool? + content_text = first_new_message.content_text + json_content_text = JSON.parse(content_text) + assert_equal image_generation_prompt, json_content_text["prompt_given"], "First new message should have the result of calling the tool" + assert first_new_message.tool_call_id.present? + assert first_new_message.content_tool_calls.present? + assert_equal @image_generation_message.content_tool_calls.dig(0, :id), first_new_message.tool_call_id, "ID of tool execution should have matched decision to call the tool" + assert first_new_message.finished?, "This message SHOULD HAVE been considered finished" + + # second + second_new_message = @new_messages.second + assert second_new_message.assistant?, "Second new message should be queued up for the assistant to reply" + assert second_new_message.content_text.nil?, "The content should be nil to indicate that it hasn't even started processing" + assert second_new_message.tool_call_id.nil? + assert second_new_message.content_tool_calls.blank? + refute second_new_message.finished?, "This message SHOULD NOT be considered finished yet" + end + test "returns early if the message id was invalid" do refute GetNextAIMessageJob.perform_now(@user.id, 0, @assistant.id) end