From 25c59b9eebc661b0945c554a1957430f23a9953c Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:43:42 +0100 Subject: [PATCH 01/24] bedrock changelog --- changelog/unreleased/kong/ai-proxy-aws-bedrock.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/unreleased/kong/ai-proxy-aws-bedrock.yml diff --git a/changelog/unreleased/kong/ai-proxy-aws-bedrock.yml b/changelog/unreleased/kong/ai-proxy-aws-bedrock.yml new file mode 100644 index 000000000000..adc608b92b04 --- /dev/null +++ b/changelog/unreleased/kong/ai-proxy-aws-bedrock.yml @@ -0,0 +1,5 @@ +message: | + Kong AI Gateway (AI Proxy and associated plugin family) now supports + all AWS Bedrock "Converse API" models. +type: feature +scope: Plugin From bdbbf9b22350d851b4a9c32dd7132560b0e48b97 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:44:30 +0100 Subject: [PATCH 02/24] add aws_stream library --- kong-3.8.0-0.rockspec | 1 + kong/tools/aws_stream.lua | 186 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 kong/tools/aws_stream.lua diff --git a/kong-3.8.0-0.rockspec b/kong-3.8.0-0.rockspec index 8581f9cf0f92..12859251c30c 100644 --- a/kong-3.8.0-0.rockspec +++ b/kong-3.8.0-0.rockspec @@ -203,6 +203,7 @@ build = { ["kong.tools.cjson"] = "kong/tools/cjson.lua", ["kong.tools.emmy_debugger"] = "kong/tools/emmy_debugger.lua", ["kong.tools.redis.schema"] = "kong/tools/redis/schema.lua", + ["kong.tools.aws_stream"] = "kong/tools/aws_stream.lua", ["kong.runloop.handler"] = "kong/runloop/handler.lua", ["kong.runloop.events"] = "kong/runloop/events.lua", diff --git a/kong/tools/aws_stream.lua b/kong/tools/aws_stream.lua new file mode 100644 index 000000000000..cee1c9ed4be0 --- /dev/null +++ b/kong/tools/aws_stream.lua @@ -0,0 +1,186 @@ +--- Stream class. +-- Decodes AWS response-stream types, currently application/vnd.amazon.eventstream +-- @classmod Stream + +local buf = require("string.buffer") +local to_hex = require("resty.string").to_hex + +local Stream = {} +Stream.__index = Stream + + +local _HEADER_EXTRACTORS = { + -- bool true + [0] = function(stream) + return true, 0 + end, + + -- bool false + [1] = function(stream) + return false, 0 + end, + + -- string type + [7] = function(stream) + local header_value_len = stream:next_int(16) + return stream:next_utf_8(header_value_len), header_value_len + 2 -- add the 2 bits read for the length + end, + + -- TODO ADD THE REST OF THE DATA TYPES + -- EVEN THOUGH THEY'RE NOT REALLY USED +} + +--- Constructor. +-- @function aws:Stream +-- @param chunk string complete AWS response stream chunk for decoding +-- @param is_hex boolean specify if the chunk bytes are already decoded to hex +-- @usage +-- local stream_parser = stream:new("00000120af0310f.......", true) +-- local next, err = stream_parser:next_message() +function Stream:new(chunk, is_hex) + local self = {} -- override 'self' to be the new object/class + setmetatable(self, Stream) + + if #chunk < ((is_hex and 32) or 16) then + return nil, "cannot parse a chunk less than 16 bytes long" + end + + self.read_count = 0 + self.chunk = buf.new() + self.chunk:put((is_hex and chunk) or to_hex(chunk)) + + return self +end + + +--- return the next `count` ascii bytes from the front of the chunk +--- and then trims the chunk of those bytes +-- @param count number whole utf-8 bytes to return +-- @return string resulting utf-8 string +function Stream:next_utf_8(count) + local utf_bytes = self:next_bytes(count) + + local ascii_string = "" + for i = 1, #utf_bytes, 2 do + local hex_byte = utf_bytes:sub(i, i + 1) + local ascii_byte = string.char(tonumber(hex_byte, 16)) + ascii_string = ascii_string .. ascii_byte + end + return ascii_string +end + +--- returns the next `count` bytes from the front of the chunk +--- and then trims the chunk of those bytes +-- @param count number whole integer of bytes to return +-- @return string hex-encoded next `count` bytes +function Stream:next_bytes(count) + if not self.chunk then + return nil, "function cannot be called on its own - initialise a chunk reader with :new(chunk)" + end + + local bytes = self.chunk:get(count * 2) + self.read_count = (count) + self.read_count + + return bytes +end + +--- returns the next unsigned int from the front of the chunk +--- and then trims the chunk of those bytes +-- @param size integer bit length (8, 16, 32, etc) +-- @return number whole integer of size specified +-- @return string the original bytes, for reference/checksums +function Stream:next_int(size) + if not self.chunk then + return nil, nil, "function cannot be called on its own - initialise a chunk reader with :new(chunk)" + end + + if size < 8 then + return nil, nil, "cannot work on integers smaller than 8 bits long" + end + + local int, err = self:next_bytes(size / 8, trim) + if err then + return nil, nil, err + end + + return tonumber(int, 16), int +end + +--- returns the next message in the chunk, as a table. +--- can be used as an iterator. +-- @return table formatted next message from the given constructor chunk +function Stream:next_message() + if not self.chunk then + return nil, "function cannot be called on its own - initialise a chunk reader with :new(chunk)" + end + + if #self.chunk < 1 then + return false + end + + -- get the message length and pull that many bytes + -- + -- this is a chicken and egg problem, because we need to + -- read the message to get the length, to then re-read the + -- whole message at correct offset + local msg_len, orig_len, err = self:next_int(32) + if err then + return err + end + + -- get the headers length + local headers_len, orig_headers_len, err = self:next_int(32) + + -- get the preamble checksum + local preamble_checksum, orig_preamble_checksum, err = self:next_int(32) + + -- TODO: calculate checksum + -- local result = crc32(orig_len .. origin_headers_len, preamble_checksum) + -- if not result then + -- return nil, "preamble checksum failed - message is corrupted" + -- end + + -- pull the headers from the buf + local headers = {} + local headers_bytes_read = 0 + + while headers_bytes_read < headers_len do + -- the next 8-bit int is the "header key length" + local header_key_len = self:next_int(8) + local header_key = self:next_utf_8(header_key_len) + headers_bytes_read = 1 + header_key_len + headers_bytes_read + + -- next 8-bits is the header type, which is an enum + local header_type = self:next_int(8) + headers_bytes_read = 1 + headers_bytes_read + + -- depending on the header type, depends on how long the header should max out at + local header_value, header_value_len = _HEADER_EXTRACTORS[header_type](self) + headers_bytes_read = header_value_len + headers_bytes_read + + headers[header_key] = header_value + end + + -- finally, extract the body as a string by + -- subtracting what's read so far from the + -- total length obtained right at the start + local body = self:next_utf_8(msg_len - self.read_count - 4) + + -- last 4 bytes is a body checksum + local msg_checksum = self:next_int(32) + -- TODO CHECK FULL MESSAGE CHECKSUM + -- local result = crc32(original_full_msg, msg_checksum) + -- if not result then + -- return nil, "preamble checksum failed - message is corrupted" + -- end + + -- rewind the tape + self.read_count = 0 + + return { + headers = headers, + body = body, + } +end + +return Stream \ No newline at end of file From cb0642bda5c06ae8a6a5c2c78c92f4906bf7e29b Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:47:06 +0100 Subject: [PATCH 03/24] add bedrock shared-scaffolding --- kong/llm/drivers/shared.lua | 40 +++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 0e1d0d18a962..53be31c3552f 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -1,11 +1,12 @@ local _M = {} -- imports -local cjson = require("cjson.safe") -local http = require("resty.http") -local fmt = string.format -local os = os -local parse_url = require("socket.url").parse +local cjson = require("cjson.safe") +local http = require("resty.http") +local fmt = string.format +local os = os +local parse_url = require("socket.url").parse +local aws_stream = require("kong.tools.aws_stream") -- -- static @@ -56,15 +57,25 @@ _M.streaming_has_token_counts = { ["llama2"] = true, ["anthropic"] = true, ["gemini"] = true, + ["bedrock"] = true, +} + +_M.bedrock_unsupported_system_role_patterns = { + "amazon.titan.-.*", + "cohere.command.-text.-.*", + "cohere.command.-light.-text.-.*", + "mistral.mistral.-7b.-instruct.-.*", + "mistral.mixtral.-8x7b.-instruct.-.*", } _M.upstream_url_format = { - openai = fmt("%s://api.openai.com:%s", (openai_override and "http") or "https", (openai_override) or "443"), - anthropic = "https://api.anthropic.com:443", - cohere = "https://api.cohere.com:443", - azure = "https://%s.openai.azure.com:443/openai/deployments/%s", - gemini = "https://generativelanguage.googleapis.com", + openai = fmt("%s://api.openai.com:%s", (openai_override and "http") or "https", (openai_override) or "443"), + anthropic = "https://api.anthropic.com:443", + cohere = "https://api.cohere.com:443", + azure = "https://%s.openai.azure.com:443/openai/deployments/%s", + gemini = "https://generativelanguage.googleapis.com", gemini_vertex = "https://%s", + bedrock = "https://bedrock-runtime.%s.amazonaws.com", } _M.operation_map = { @@ -120,6 +131,12 @@ _M.operation_map = { method = "POST", }, }, + bedrock = { + ["llm/v1/chat"] = { + path = "/model/%s/%s", + method = "POST", + }, + }, } _M.clear_response_headers = { @@ -138,6 +155,9 @@ _M.clear_response_headers = { gemini = { "Set-Cookie", }, + bedrock = { + "Set-Cookie", + }, } --- From c43d300976f49f7e254a93c96d50dd08fc4855a1 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:48:05 +0100 Subject: [PATCH 04/24] change stream parser framework to run by-provider; add bedrock stream parser --- kong/llm/drivers/shared.lua | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 53be31c3552f..d5f7423c6bda 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -239,7 +239,7 @@ end -- @param {string} frame input string to format into SSE events -- @param {boolean} raw_json sets application/json byte-parser mode -- @return {table} n number of split SSE messages, or empty table -function _M.frame_to_events(frame, raw_json_mode) +function _M.frame_to_events(frame, provider) local events = {} if (not frame) or (#frame < 1) or (type(frame)) ~= "string" then @@ -248,7 +248,7 @@ function _M.frame_to_events(frame, raw_json_mode) -- some new LLMs return the JSON object-by-object, -- because that totally makes sense to parse?! - if raw_json_mode then + if provider == "gemini" then -- if this is the first frame, it will begin with array opener '[' frame = (string.sub(str_ltrim(frame), 1, 1) == "[" and string.sub(str_ltrim(frame), 2)) or frame @@ -263,6 +263,18 @@ function _M.frame_to_events(frame, raw_json_mode) events[#events+1] = { data = v } end + elseif provider == "bedrock" then + local parser = aws_stream:new(frame) + while true do + local msg = parser:next_message() + + if not msg then + break + end + + events[#events+1] = { data = cjson.encode(msg) } + end + -- check if it's raw json and just return the split up data frame -- Cohere / Other flat-JSON format parser -- just return the split up data frame From 119e1e8effc465924d5297024ea68ca807519c1b Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:48:29 +0100 Subject: [PATCH 05/24] return nil on empty response json from all LLM --- kong/llm/drivers/shared.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index d5f7423c6bda..2336ffbee0c9 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -542,6 +542,10 @@ end function _M.post_request(conf, response_object) local body_string, err + if not response_object then + return + end + if type(response_object) == "string" then -- set raw string body first, then decode body_string = response_object From 2d04d6801d0f6f284654608ed2602e6c91d699c7 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:51:01 +0100 Subject: [PATCH 06/24] add complex provider signing support to ai-transformer plugin functions --- kong/llm/init.lua | 65 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/kong/llm/init.lua b/kong/llm/init.lua index 85802e54b9c7..5a3454d293db 100644 --- a/kong/llm/init.lua +++ b/kong/llm/init.lua @@ -91,20 +91,54 @@ do function LLM:ai_introspect_body(request, system_prompt, http_opts, response_regex_match) local err, _ - -- set up the request - local ai_request = { - messages = { - [1] = { - role = "system", - content = system_prompt, + -- set up the LLM request for transformation instructions + local ai_request + + -- mistral, cohere, titan (via Bedrock) don't support system commands + if self.driver == "bedrock" then + for _, p in ipairs(ai_shared.bedrock_unsupported_system_role_patterns) do + if request.model:find(p) then + ai_request = { + messages = { + [1] = { + role = "user", + content = system_prompt, + }, + [2] = { + role = "assistant", + content = "What is the message?", + }, + [3] = { + role = "user", + content = request, + } + }, + stream = false, + } + break + end + end + end + + -- not Bedrock, or didn't match banned pattern - continue as normal + if not ai_request then + ai_request = { + messages = { + [1] = { + role = "system", + content = system_prompt, + }, + [2] = { + role = "user", + content = request, + } }, - [2] = { - role = "user", - content = request, - } - }, - stream = false, - } + stream = false, + } + end + + -- needed for some drivers later + self.conf.model.source = "transformer-plugins" -- convert it to the specified driver format ai_request, _, err = self.driver.to_format(ai_request, self.conf.model, "llm/v1/chat") @@ -204,8 +238,9 @@ do } setmetatable(self, LLM) - local provider = (self.conf.model or {}).provider or "NONE_SET" - local driver_module = "kong.llm.drivers." .. provider + self.provider = (self.conf.model or {}).provider or "NONE_SET" + local driver_module = "kong.llm.drivers." .. self.provider + local ok ok, self.driver = pcall(require, driver_module) if not ok then From e21d531de31e5b92fbbddc8af848eec143e9a16b Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:52:02 +0100 Subject: [PATCH 07/24] add bedrock provider options to shared schema --- kong/llm/schemas/init.lua | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/kong/llm/schemas/init.lua b/kong/llm/schemas/init.lua index 9dc68f16db8a..b9f7c40cb488 100644 --- a/kong/llm/schemas/init.lua +++ b/kong/llm/schemas/init.lua @@ -2,6 +2,19 @@ local typedefs = require("kong.db.schema.typedefs") local fmt = string.format +local bedrock_options_schema = { + type = "record", + required = false, + fields = { + { aws_region = { + description = "If using AWS providers (Bedrock) you can override the `AWS_REGION` " .. + "environment variable by setting this option.", + type = "string", + required = false }}, + }, +} + + local gemini_options_schema = { type = "record", required = false, @@ -68,6 +81,20 @@ local auth_schema = { "environment variable `GCP_SERVICE_ACCOUNT`.", required = false, referenceable = true }}, + { aws_access_key_id = { + type = "string", + description = "Set this if you are using an AWS provider (Bedrock, SageMaker) and you are authenticating " .. + "using static IAM User credentials.", + required = false, + encrypted = true, + referenceable = true }}, + { aws_secret_access_key = { + type = "string", + description = "Set this if you are using an AWS provider (Bedrock, SageMaker) and you are authenticating " .. + "using static IAM User credentials.", + required = false, + encrypted = true, + referenceable = true }}, } } @@ -144,6 +171,7 @@ local model_options_schema = { type = "string", required = false }}, { gemini = gemini_options_schema }, + { bedrock = bedrock_options_schema }, } } @@ -157,7 +185,7 @@ local model_schema = { type = "string", description = "AI provider request format - Kong translates " .. "requests to and from the specified backend compatible formats.", required = true, - one_of = { "openai", "azure", "anthropic", "cohere", "mistral", "llama2", "gemini" }}}, + one_of = { "openai", "azure", "anthropic", "cohere", "mistral", "llama2", "gemini", "bedrock" }}}, { name = { type = "string", description = "Model name to execute.", From 14e6fba02531120ae861c58662e02d36fdb361f5 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:53:45 +0100 Subject: [PATCH 08/24] add aws provider to keybastion loose table --- kong/plugins/ai-proxy/handler.lua | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index 5ff894c5e054..88edbf02aa3f 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -5,13 +5,24 @@ local kong_utils = require("kong.tools.gzip") local kong_meta = require("kong.meta") local buffer = require "string.buffer" local strip = require("kong.tools.utils").strip +local to_hex = require("resty.string").to_hex -- cloud auth/sdk providers local GCP_SERVICE_ACCOUNT do GCP_SERVICE_ACCOUNT = os.getenv("GCP_SERVICE_ACCOUNT") end +local AWS_REGION do + AWS_REGION = os.getenv("AWS_REGION") +end +local AWS_ACCESS_KEY_ID do + AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") +end +local AWS_SECRET_ACCESS_KEY do + AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") +end local GCP = require("resty.gcp.request.credentials.accesstoken") +local AWS = require("resty.aws") -- @@ -48,6 +59,39 @@ local _KEYBASTION = setmetatable({}, { end return { interface = nil, error = "cloud-authentication with GCP failed" } + + elseif plugin_config.model.provider == "bedrock" then + ngx.log(ngx.NOTICE, "loading aws sdk for plugin ", kong.plugin.get_id()) + local aws + + local region = plugin_config.model.options + and plugin_config.model.options.bedrock + and plugin_config.model.options.bedrock.aws_region + or AWS_REGION + + local access_key = (plugin_config.auth and plugin_config.auth.aws_access_key_id) + or AWS_ACCESS_KEY_ID + + local secret_key = (plugin_config.auth and plugin_config.auth.aws_secret_access_key) + or AWS_SECRET_ACCESS_KEY + + if access_key and secret_key then + aws = AWS({ + -- if any of these are nil, they either use the SDK default or + -- are deliberately null so that a different auth chain is used + region = region, + aws_access_key_id = access_key, + aws_secret_access_key = secret_key, + }) + else + aws = AWS({ + region = region, + }) + end + + this_cache[plugin_config] = { interface = aws, error = nil } + + return this_cache[plugin_config] end end, }) From 1942f0a95af28f81bbf623c8168cbf453cbbca94 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:55:19 +0100 Subject: [PATCH 09/24] adjust sdk to use new streaming parser format --- kong/plugins/ai-proxy/handler.lua | 3 +-- spec/03-plugins/38-ai-proxy/01-unit_spec.lua | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index 88edbf02aa3f..4bfa8ed97fd7 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -143,8 +143,7 @@ local function handle_streaming_frame(conf) chunk = kong_utils.inflate_gzip(ngx.arg[1]) end - local is_raw_json = conf.model.provider == "gemini" - local events = ai_shared.frame_to_events(chunk, is_raw_json ) + local events = ai_shared.frame_to_events(chunk, conf.model.provider) if not events then -- usually a not-supported-transformer or empty frames. diff --git a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua index aeb42600d639..bb444a8b28b1 100644 --- a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua +++ b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua @@ -664,7 +664,7 @@ describe(PLUGIN_NAME .. ": (unit)", function() it("transforms truncated-json type (beginning of stream)", function() local input = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-beginning/input.bin")) - local events = ai_shared.frame_to_events(input, true) + local events = ai_shared.frame_to_events(input, "gemini") local expected = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-beginning/expected-output.json")) local expected_events = cjson.decode(expected) @@ -674,7 +674,7 @@ describe(PLUGIN_NAME .. ": (unit)", function() it("transforms truncated-json type (end of stream)", function() local input = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/input.bin")) - local events = ai_shared.frame_to_events(input, true) + local events = ai_shared.frame_to_events(input, "gemini") local expected = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/expected-output.json")) local expected_events = cjson.decode(expected) @@ -684,7 +684,7 @@ describe(PLUGIN_NAME .. ": (unit)", function() it("transforms complete-json type", function() local input = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/complete-json/input.bin")) - local events = ai_shared.frame_to_events(input, false) -- not "truncated json mode" like Gemini + local events = ai_shared.frame_to_events(input, "cohere") -- not "truncated json mode" like Gemini local expected = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/complete-json/expected-output.json")) local expected_events = cjson.decode(expected) @@ -694,7 +694,7 @@ describe(PLUGIN_NAME .. ": (unit)", function() it("transforms text/event-stream type", function() local input = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/text-event-stream/input.bin")) - local events = ai_shared.frame_to_events(input, false) -- not "truncated json mode" like Gemini + local events = ai_shared.frame_to_events(input, "openai") -- not "truncated json mode" like Gemini local expected = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/text-event-stream/expected-output.json")) local expected_events = cjson.decode(expected) From 810891e91c87b5b8bea9b0f003526973e56bb870 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:55:42 +0100 Subject: [PATCH 10/24] add bedrock format test scaffolding --- spec/03-plugins/38-ai-proxy/01-unit_spec.lua | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua index bb444a8b28b1..0f3b15bbdee3 100644 --- a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua +++ b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua @@ -237,6 +237,20 @@ local FORMATS = { }, }, }, + bedrock = { + ["llm/v1/chat"] = { + config = { + name = "bedrock", + provider = "bedrock", + options = { + max_tokens = 8192, + temperature = 0.8, + top_k = 1, + top_p = 0.6, + }, + }, + }, + }, } local STREAMS = { From c0374808ae4fadc1ee0054bff04becdeda140858 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:56:20 +0100 Subject: [PATCH 11/24] add bedrock driver class itself --- kong/llm/drivers/bedrock.lua | 437 +++++++++++++++++++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 kong/llm/drivers/bedrock.lua diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua new file mode 100644 index 000000000000..cdf03401ea34 --- /dev/null +++ b/kong/llm/drivers/bedrock.lua @@ -0,0 +1,437 @@ +local _M = {} + +-- imports +local cjson = require("cjson.safe") +local fmt = string.format +local ai_shared = require("kong.llm.drivers.shared") +local socket_url = require("socket.url") +local string_gsub = string.gsub +local buffer = require("string.buffer") +local table_insert = table.insert +local string_lower = string.lower +local string_sub = string.sub +local signer = require("resty.aws.request.sign") +-- + +-- globals +local DRIVER_NAME = "bedrock" +-- + +local _OPENAI_ROLE_MAPPING = { + ["system"] = "assistant", + ["user"] = "user", + ["assistant"] = "assistant", +} + +local function to_bedrock_generation_config(request_table) + return { + ["maxTokens"] = request_table.max_tokens, + ["stopSequences"] = request_table.stop, + ["temperature"] = request_table.temperature, + ["topP"] = request_table.top_p, + } +end + +local function to_additional_request_fields(request_table) + return { + request_table.bedrock.additionalModelRequestFields + } +end + +local function to_tool_config(request_table) + return { + request_table.bedrock.toolConfig + } +end + +local function handle_stream_event(event_t, model_info, route_type) + local new_event, metadata + + if (not event_t) or (not event_t.data) then + return "", nil, nil + end + + -- decode and determine the event type + local event = cjson.decode(event_t.data) + local event_type = event and event.headers and event.headers[":event-type"] + + if not event_type then + return "", nil, nil + end + + local body = event.body and cjson.decode(event.body) + + if not body then + return "", nil, nil + end + + if event_type == "messageStart" then + new_event = { + choices = { + [1] = { + delta = { + content = "", + role = body.role, + }, + index = 0, + logprobs = cjson.null, + }, + }, + model = model_info.name, + object = "chat.completion.chunk", + system_fingerprint = cjson.null, + } + + elseif event_type == "contentBlockDelta" then + new_event = { + choices = { + [1] = { + delta = { + content = (body.delta + and body.delta.text) + or "", + }, + index = 0, + finish_reason = cjson.null, + logprobs = cjson.null, + }, + }, + model = model_info.name, + object = "chat.completion.chunk", + } + + elseif event_type == "messageStop" then + new_event = { + choices = { + [1] = { + delta = {}, + index = 0, + finish_reason = body.stopReason, + logprobs = cjson.null, + }, + }, + model = model_info.name, + object = "chat.completion.chunk", + } + + elseif event_type == "metadata" then + metadata = { + prompt_tokens = body.usage and body.usage.inputTokens or 0, + completion_tokens = body.usage and body.usage.outputTokens or 0, + } + + new_event = "[DONE]" + + elseif event_type == "contentBlockStop" then + -- placeholder - I don't think this does anything yet + end + + if new_event then + if new_event ~= "[DONE]" then + new_event = cjson.encode(new_event) + end + + return new_event, nil, metadata + else + return nil, nil, metadata -- caller code will handle "unrecognised" event types + end +end + +local function to_bedrock_chat_openai(request_table, model_info, route_type) + if not request_table then -- try-catch type mechanism + local err = "empty request table received for transformation" + ngx.log(ngx.ERR, err) + return nil, nil, err + end + + local new_r = {} + + -- anthropic models support variable versions, just like self-hosted + new_r.anthropic_version = model_info.options and model_info.options.anthropic_version + or "bedrock-2023-05-31" + + if request_table.messages and #request_table.messages > 0 then + local system_prompts = {} + + for i, v in ipairs(request_table.messages) do + -- for 'system', we just concat them all into one Gemini instruction + if v.role and v.role == "system" then + system_prompts[#system_prompts+1] = { text = v.content } + + else + -- for any other role, just construct the chat history as 'parts.text' type + new_r.messages = new_r.messages or {} + table_insert(new_r.messages, { + role = _OPENAI_ROLE_MAPPING[v.role or "user"], -- default to 'user' + content = { + { + text = v.content or "" + }, + }, + }) + end + end + + -- only works for some models + if #system_prompts > 0 then + for _, p in ipairs(ai_shared.bedrock_unsupported_system_role_patterns) do + if model_info.name:find(p) then + return nil, nil, "system prompts are unsupported for model '" .. model_info.name + end + end + + new_r.system = system_prompts + end + end + + new_r.inferenceConfig = to_bedrock_generation_config(request_table) + + new_r.toolConfig = request_table.bedrock + and request_table.bedrock.toolConfig + and to_tool_config(request_table) + + new_r.additionalModelRequestFields = request_table.bedrock + and request_table.bedrock.additionalModelRequestFields + and to_additional_request_fields(request_table) + + return new_r, "application/json", nil +end + +local function from_bedrock_chat_openai(response, model_info, route_type) + local response, err = cjson.decode(response) + + if err then + local err_client = "failed to decode response from Bedrock" + ngx.log(ngx.ERR, fmt("%s: %s", err_client, err)) + return nil, err_client + end + + -- messages/choices table is only 1 size, so don't need to static allocate + local client_response = {} + client_response.choices = {} + + if response.output + and response.output.message + and response.output.message.content + and #response.output.message.content > 0 + and response.output.message.content[1].text then + + client_response.choices[1] = { + index = 0, + message = { + role = "assistant", + content = response.output.message.content[1].text, + }, + finish_reason = string_lower(response.stopReason), + } + client_response.object = "chat.completion" + client_response.model = model_info.name + + else -- probably a server fault or other unexpected response + local err = "no generation candidates received from Bedrock, or max_tokens too short" + ngx.log(ngx.ERR, err) + return nil, err + end + + -- process analytics + if response.usage then + client_response.usage = { + prompt_tokens = response.usage.inputTokens, + completion_tokens = response.usage.outputTokens, + total_tokens = response.usage.totalTokens, + } + end + + return cjson.encode(client_response) +end + +local transformers_to = { + ["llm/v1/chat"] = to_bedrock_chat_openai, +} + +local transformers_from = { + ["llm/v1/chat"] = from_bedrock_chat_openai, + ["stream/llm/v1/chat"] = handle_stream_event, +} + +function _M.from_format(response_string, model_info, route_type) + ngx.log(ngx.DEBUG, "converting from ", model_info.provider, "://", route_type, " type to kong") + + -- MUST return a string, to set as the response body + if not transformers_from[route_type] then + return nil, fmt("no transformer available from format %s://%s", model_info.provider, route_type) + end + + local ok, response_string, err, metadata = pcall(transformers_from[route_type], response_string, model_info, route_type) + if not ok or err then + return nil, fmt("transformation failed from type %s://%s: %s", + model_info.provider, + route_type, + err or "unexpected_error" + ) + end + + return response_string, nil, metadata +end + +function _M.to_format(request_table, model_info, route_type) + ngx.log(ngx.DEBUG, "converting from kong type to ", model_info.provider, "/", route_type) + + if route_type == "preserve" then + -- do nothing + return request_table, nil, nil + end + + if not transformers_to[route_type] then + return nil, nil, fmt("no transformer for %s://%s", model_info.provider, route_type) + end + + request_table = ai_shared.merge_config_defaults(request_table, model_info.options, model_info.route_type) + + local ok, response_object, content_type, err = pcall( + transformers_to[route_type], + request_table, + model_info + ) + if err or (not ok) then + return nil, nil, fmt("error transforming to %s://%s: %s", model_info.provider, route_type, err) + end + + return response_object, content_type, nil +end + +function _M.subrequest(body, conf, http_opts, return_res_table) + -- use shared/standard subrequest routine + local body_string, err + + if type(body) == "table" then + body_string, err = cjson.encode(body) + if err then + return nil, nil, "failed to parse body to json: " .. err + end + elseif type(body) == "string" then + body_string = body + else + return nil, nil, "body must be table or string" + end + + -- may be overridden + local url = (conf.model.options and conf.model.options.upstream_url) + or fmt( + "%s%s", + ai_shared.upstream_url_format[DRIVER_NAME], + ai_shared.operation_map[DRIVER_NAME][conf.route_type].path + ) + + local method = ai_shared.operation_map[DRIVER_NAME][conf.route_type].method + + local headers = { + ["Accept"] = "application/json", + ["Content-Type"] = "application/json", + } + + if conf.auth and conf.auth.header_name then + headers[conf.auth.header_name] = conf.auth.header_value + end + + local res, err, httpc = ai_shared.http_request(url, body_string, method, headers, http_opts, return_res_table) + if err then + return nil, nil, "request to ai service failed: " .. err + end + + if return_res_table then + return res, res.status, nil, httpc + else + -- At this point, the entire request / response is complete and the connection + -- will be closed or back on the connection pool. + local status = res.status + local body = res.body + + if status > 299 then + return body, res.status, "status code " .. status + end + + return body, res.status, nil + end +end + +function _M.header_filter_hooks(body) + -- nothing to parse in header_filter phase +end + +function _M.post_request(conf) + if ai_shared.clear_response_headers[DRIVER_NAME] then + for i, v in ipairs(ai_shared.clear_response_headers[DRIVER_NAME]) do + kong.response.clear_header(v) + end + end +end + +function _M.pre_request(conf, body) + -- disable gzip for bedrock because it breaks streaming + kong.service.request.set_header("Accept-Encoding", "gzip, identity") + + return true, nil +end + +-- returns err or nil +function _M.configure_request(conf, aws_sdk) + local operation = kong.ctx.shared.ai_proxy_streaming_mode and "converse-stream" + or "converse" + + local f_url = conf.model.options and conf.model.options.upstream_url + + if not f_url then -- upstream_url override is not set + local uri = fmt(ai_shared.upstream_url_format[DRIVER_NAME], aws_sdk.config.region) + local path = fmt( + ai_shared.operation_map[DRIVER_NAME][conf.route_type].path, + conf.model.name, + operation) + + f_url = fmt("%s%s", uri, path) + end + + local parsed_url = socket_url.parse(f_url) + + if conf.model.options and conf.model.options.upstream_path then + -- upstream path override is set (or templated from request params) + parsed_url.path = conf.model.options.upstream_path + end + + -- if the path is read from a URL capture, ensure that it is valid + parsed_url.path = string_gsub(parsed_url.path, "^/*", "/") + + kong.service.request.set_path(parsed_url.path) + kong.service.request.set_scheme(parsed_url.scheme) + kong.service.set_target(parsed_url.host, (tonumber(parsed_url.port) or 443)) + + -- do the IAM auth and signature headers + aws_sdk.config.signatureVersion = "v4" + aws_sdk.config.endpointPrefix = "bedrock" + + local r = { + headers = {}, + method = ai_shared.operation_map[DRIVER_NAME][conf.route_type].method, + path = parsed_url.path, + host = parsed_url.host, + port = tonumber(parsed_url.port) or 443, + body = kong.request.get_raw_body() + } + + local signature = signer(aws_sdk.config, r) + + if not signature then + return nil, "failed to sign AWS request" + end + kong.service.request.set_header("Authorization", signature.headers["Authorization"]) + if signature.headers["X-Amz-Security-Token"] then + kong.service.request.set_header("X-Amz-Security-Token", signature.headers["X-Amz-Security-Token"]) + end + if signature.headers["X-Amz-Date"] then + kong.service.request.set_header("X-Amz-Date", signature.headers["X-Amz-Date"]) + end + + return true +end + +return _M \ No newline at end of file From 0cb31f2416f0f1177075d4bb5db87d36ef182745 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 22:57:56 +0100 Subject: [PATCH 12/24] add bedrock drivers to rockspec --- kong-3.8.0-0.rockspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong-3.8.0-0.rockspec b/kong-3.8.0-0.rockspec index 12859251c30c..f7ead8c8957b 100644 --- a/kong-3.8.0-0.rockspec +++ b/kong-3.8.0-0.rockspec @@ -613,8 +613,8 @@ build = { ["kong.llm.drivers.anthropic"] = "kong/llm/drivers/anthropic.lua", ["kong.llm.drivers.mistral"] = "kong/llm/drivers/mistral.lua", ["kong.llm.drivers.llama2"] = "kong/llm/drivers/llama2.lua", - ["kong.llm.drivers.gemini"] = "kong/llm/drivers/gemini.lua", + ["kong.llm.drivers.bedrock"] = "kong/llm/drivers/bedrock.lua", ["kong.plugins.ai-prompt-decorator.handler"] = "kong/plugins/ai-prompt-decorator/handler.lua", ["kong.plugins.ai-prompt-decorator.schema"] = "kong/plugins/ai-prompt-decorator/schema.lua", From 9252142ffe74aa593bbe9a95246da9b8b2a8b933 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Tue, 9 Jul 2024 23:13:29 +0100 Subject: [PATCH 13/24] fix(ai-proxy)(general): body_filter should not run on failure --- kong/plugins/ai-proxy/handler.lua | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index 4bfa8ed97fd7..8e46e66d07cc 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -339,6 +339,11 @@ function _M:body_filter(conf) return end + -- only act on 200 in first release - pass the unmodifed response all the way through if any failure + if kong.response.get_status() ~= 200 then + return + end + local route_type = conf.route_type if kong_ctx_shared.skip_response_transformer and (route_type ~= "preserve") then From 7275453ff2a7c6c5c411eb07b271bab571e3d803 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Wed, 10 Jul 2024 01:03:49 +0100 Subject: [PATCH 14/24] add aws streaming format parser test --- spec/03-plugins/38-ai-proxy/01-unit_spec.lua | 14 ++++++++++++ .../aws/expected-output.json | 20 ++++++++++++++++++ .../streaming-chunk-formats/aws/input.bin | Bin 0 -> 1506 bytes 3 files changed, 34 insertions(+) create mode 100644 spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/expected-output.json create mode 100644 spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/input.bin diff --git a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua index 0f3b15bbdee3..009f079195d0 100644 --- a/spec/03-plugins/38-ai-proxy/01-unit_spec.lua +++ b/spec/03-plugins/38-ai-proxy/01-unit_spec.lua @@ -716,6 +716,20 @@ describe(PLUGIN_NAME .. ": (unit)", function() assert.same(events, expected_events) end) + it("transforms application/vnd.amazon.eventstream (AWS) type", function() + local input = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/input.bin")) + local events = ai_shared.frame_to_events(input, "bedrock") + + local expected = pl_file.read(fmt("spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/expected-output.json")) + local expected_events = cjson.decode(expected) + + assert.equal(#events, #expected_events) + for i, _ in ipairs(expected_events) do + -- tables are random ordered, so we need to compare each serialized event + assert.same(cjson.decode(events[i].data), cjson.decode(expected_events[i].data)) + end + end) + end) end) diff --git a/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/expected-output.json b/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/expected-output.json new file mode 100644 index 000000000000..8761c5593608 --- /dev/null +++ b/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/expected-output.json @@ -0,0 +1,20 @@ +[ + { + "data": "{\"body\":\"{\\\"p\\\":\\\"abcdefghijkl\\\",\\\"role\\\":\\\"assistant\\\"}\",\"headers\":{\":event-type\":\"messageStart\",\":content-type\":\"application\/json\",\":message-type\":\"event\"}}" + }, + { + "data": "{\"body\":\"{\\\"contentBlockIndex\\\":0,\\\"delta\\\":{\\\"text\\\":\\\"Hello! Relativity is a set of physical theories that are collectively known as special relativity and general relativity, proposed by Albert Einstein. These theories revolutionized our understanding of space, time, and gravity, and have had far-reach\\\"},\\\"p\\\":\\\"abcd\\\"}\",\"headers\":{\":event-type\":\"contentBlockDelta\",\":content-type\":\"application\\/json\",\":message-type\":\"event\"}}" + }, + { + "data": "{\"headers\":{\":event-type\":\"contentBlockDelta\",\":message-type\":\"event\",\":content-type\":\"application\\/json\"},\"body\":\"{\\\"contentBlockIndex\\\":0,\\\"delta\\\":{\\\"text\\\":\\\"ing implications in various scientific and technological fields. Special relativity applies to all physical phenomena in the absence of gravity, while general relativity explains the law of gravity and its effects on the nature of space, time, and matter.\\\"},\\\"p\\\":\\\"abcdefghijk\\\"}\"}" + }, + { + "data": "{\"body\":\"{\\\"contentBlockIndex\\\":0,\\\"p\\\":\\\"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQR\\\"}\",\"headers\":{\":content-type\":\"application\\/json\",\":event-type\":\"contentBlockStop\",\":message-type\":\"event\"}}" + }, + { + "data": "{\"body\":\"{\\\"p\\\":\\\"abcdefghijklm\\\",\\\"stopReason\\\":\\\"end_turn\\\"}\",\"headers\":{\":message-type\":\"event\",\":content-type\":\"application\\/json\",\":event-type\":\"messageStop\"}}" + }, + { + "data": "{\"headers\":{\":message-type\":\"event\",\":content-type\":\"application\\/json\",\":event-type\":\"metadata\"},\"body\":\"{\\\"metrics\\\":{\\\"latencyMs\\\":2613},\\\"p\\\":\\\"abcdefghijklmnopqrstuvwxyzABCDEF\\\",\\\"usage\\\":{\\\"inputTokens\\\":9,\\\"outputTokens\\\":97,\\\"totalTokens\\\":106}}\"}" + } +] \ No newline at end of file diff --git a/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/input.bin b/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/aws/input.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f9d03b4f7e02272e671c429836521d8db102676 GIT binary patch literal 1506 zcmc&!!A=xG5FHOj6C{B@XxpO;f+VOLJ%FH~g0c{ZiAri`YImCL?QXk!c6T820bVq6 z(Zr)S4+cGXF!2}s0ZsS-6XQWWTC*&$t9ud?4?UUb>3aRD-m40N;873+rF(C#lm?2} zK;=h#YVbsGs*cV<6_-6&KUfU4@`pc%z!)h)@ItF|8&0diV&`}#`gj{^iyL0#P!1!k zRAaGGlf1yKmYDB4C!7c6d1lrxr$KP+84+2#xHj>km&kDE>S?LN+6+n$f6b;FXEE7k8(_2+~OWvo|w&{l=?I)p``p8!lz6)2F#&ny24DHI?7x!AEUMc9ld~OV3CDd zN(?CmWy)$wHx&cNoWW4gd3%TlDq>YsXnVE`%vf!-!-$nrHYLkwJ)0HEc@%1tt;Fc? z@K%crEt6aTu}Kj+u`HpKY+lnysA|sD83?h!yr|Q&eW`^!p}mh78pvXZSOx5eMF?Y_ z6%G{R*^_sl?~*Jpb6Mo~kx&0wmOLj>Kd_x+La_!|p%bccD9D_mBEoi>9>Z8^sjF?F zDgtHXM%i*7A#xk%5^4rg9%^f1RJx7|@=Hi)24#mT#Js50{Teo7A+e8+3|mG5>DG>v z(Cmt8(-Yn?tW?MWNmox8sB^v7X z`?A;OA2-hL&0Sa8JR}1qjbg3=orDT8q2i?mz8ibFz*eA~?dh zApk2vmyZ@2CFGxUfj-Vpj!$&;+5TkgS3IJg Date: Wed, 24 Jul 2024 23:52:16 +0100 Subject: [PATCH 15/24] feat(ai-proxy): bedrock unit tests --- kong/llm/drivers/bedrock.lua | 2 +- kong/plugins/ai-proxy/handler.lua | 8 +-- .../02-openai_integration_spec.lua | 4 +- .../bedrock/llm-v1-chat.json | 55 +++++++++++++++++++ .../bedrock/llm-v1-chat.json | 19 +++++++ .../real-responses/bedrock/llm-v1-chat.json | 21 +++++++ 6 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 spec/fixtures/ai-proxy/unit/expected-requests/bedrock/llm-v1-chat.json create mode 100644 spec/fixtures/ai-proxy/unit/expected-responses/bedrock/llm-v1-chat.json create mode 100644 spec/fixtures/ai-proxy/unit/real-responses/bedrock/llm-v1-chat.json diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index cdf03401ea34..ae981f87442d 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -154,7 +154,7 @@ local function to_bedrock_chat_openai(request_table, model_info, route_type) local system_prompts = {} for i, v in ipairs(request_table.messages) do - -- for 'system', we just concat them all into one Gemini instruction + -- for 'system', we just concat them all into one Bedrock instruction if v.role and v.role == "system" then system_prompts[#system_prompts+1] = { text = v.content } diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index 8e46e66d07cc..da3cb4f77ab8 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -339,17 +339,14 @@ function _M:body_filter(conf) return end - -- only act on 200 in first release - pass the unmodifed response all the way through if any failure - if kong.response.get_status() ~= 200 then - return - end - local route_type = conf.route_type if kong_ctx_shared.skip_response_transformer and (route_type ~= "preserve") then local response_body + if kong_ctx_shared.parsed_response then response_body = kong_ctx_shared.parsed_response + elseif kong.response.get_status() == 200 then response_body = kong.service.response.get_raw_body() if not response_body then @@ -368,6 +365,7 @@ function _M:body_filter(conf) if err then kong.log.warn("issue when transforming the response body for analytics in the body filter phase, ", err) + elseif new_response_string then ai_shared.post_request(conf, new_response_string) end diff --git a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua index b67d815fa07e..b1cd81295026 100644 --- a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua @@ -902,12 +902,12 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then }, body = pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/requests/good.json"), }) - + -- check we got internal server error local body = assert.res_status(500 , r) local json = cjson.decode(body) assert.is_truthy(json.error) - assert.equals(json.error.message, "transformation failed from type openai://llm/v1/chat: 'choices' not in llm/v1/chat response") + assert.same(json.error.message, "transformation failed from type openai://llm/v1/chat: 'choices' not in llm/v1/chat response") end) it("bad request", function() diff --git a/spec/fixtures/ai-proxy/unit/expected-requests/bedrock/llm-v1-chat.json b/spec/fixtures/ai-proxy/unit/expected-requests/bedrock/llm-v1-chat.json new file mode 100644 index 000000000000..ad68f6b28338 --- /dev/null +++ b/spec/fixtures/ai-proxy/unit/expected-requests/bedrock/llm-v1-chat.json @@ -0,0 +1,55 @@ +{ + "system": [ + { + "text": "You are a mathematician." + } + ], + "messages": [ + { + "content": [ + { + "text": "What is 1 + 2?" + } + ], + "role": "user" + }, + { + "content": [ + { + "text": "The sum of 1 + 2 is 3. If you have any more math questions or if there's anything else I can help you with, feel free to ask!" + } + ], + "role": "assistant" + }, + { + "content": [ + { + "text": "Multiply that by 2" + } + ], + "role": "user" + }, + { + "content": [ + { + "text": "Certainly! If you multiply 3 by 2, the result is 6. If you have any more questions or if there's anything else I can help you with, feel free to ask!" + } + ], + "role": "assistant" + }, + { + "content": [ + { + "text": "Why can't you divide by zero?" + } + ], + "role": "user" + } + ], + "inferenceConfig": { + "maxTokens": 8192, + "temperature": 0.8, + "topP": 0.6 + }, + "anthropic_version": "bedrock-2023-05-31" +} \ No newline at end of file diff --git a/spec/fixtures/ai-proxy/unit/expected-responses/bedrock/llm-v1-chat.json b/spec/fixtures/ai-proxy/unit/expected-responses/bedrock/llm-v1-chat.json new file mode 100644 index 000000000000..948d3fb47465 --- /dev/null +++ b/spec/fixtures/ai-proxy/unit/expected-responses/bedrock/llm-v1-chat.json @@ -0,0 +1,19 @@ +{ + "choices": [ + { + "finish_reason": "end_turn", + "index": 0, + "message": { + "content": "You cannot divide by zero because it is not a valid operation in mathematics.", + "role": "assistant" + } + } + ], + "object": "chat.completion", + "usage": { + "completion_tokens": 119, + "prompt_tokens": 19, + "total_tokens": 138 + }, + "model": "bedrock" +} \ No newline at end of file diff --git a/spec/fixtures/ai-proxy/unit/real-responses/bedrock/llm-v1-chat.json b/spec/fixtures/ai-proxy/unit/real-responses/bedrock/llm-v1-chat.json new file mode 100644 index 000000000000..e995bbd984d1 --- /dev/null +++ b/spec/fixtures/ai-proxy/unit/real-responses/bedrock/llm-v1-chat.json @@ -0,0 +1,21 @@ +{ + "metrics": { + "latencyMs": 14767 + }, + "output": { + "message": { + "content": [ + { + "text": "You cannot divide by zero because it is not a valid operation in mathematics." + } + ], + "role": "assistant" + } + }, + "stopReason": "end_turn", + "usage": { + "completion_tokens": 119, + "prompt_tokens": 19, + "total_tokens": 138 + } +} \ No newline at end of file From d0447355b8a33f970be49637a9b0e6b006d492f1 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Wed, 24 Jul 2024 23:52:38 +0100 Subject: [PATCH 16/24] fix(ai-proxy): fix gemini streaming; fix gemini analytics --- kong/llm/drivers/bedrock.lua | 5 +- kong/llm/drivers/gemini.lua | 59 +++++++--------- kong/llm/drivers/shared.lua | 21 +++++- kong/llm/init.lua | 3 - kong/plugins/ai-proxy/handler.lua | 24 ++++--- kong/tools/aws_stream.lua | 27 +++----- .../real-responses/gemini/llm-v1-chat.json | 67 ++++++++++--------- .../partial-json-end/expected-output.json | 3 + 8 files changed, 109 insertions(+), 100 deletions(-) diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index ae981f87442d..21690fa32f54 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -6,10 +6,8 @@ local fmt = string.format local ai_shared = require("kong.llm.drivers.shared") local socket_url = require("socket.url") local string_gsub = string.gsub -local buffer = require("string.buffer") local table_insert = table.insert local string_lower = string.lower -local string_sub = string.sub local signer = require("resty.aws.request.sign") -- @@ -122,8 +120,7 @@ local function handle_stream_event(event_t, model_info, route_type) new_event = "[DONE]" - elseif event_type == "contentBlockStop" then - -- placeholder - I don't think this does anything yet + -- "contentBlockStop" is absent because it is not used for anything here end if new_event then diff --git a/kong/llm/drivers/gemini.lua b/kong/llm/drivers/gemini.lua index 59296ee9160b..f76488dcb19c 100644 --- a/kong/llm/drivers/gemini.lua +++ b/kong/llm/drivers/gemini.lua @@ -41,30 +41,32 @@ local function is_response_content(content) and content.candidates[1].content.parts[1].text end -local function is_response_finished(content) - return content - and content.candidates - and #content.candidates > 0 - and content.candidates[1].finishReason -end - -local function handle_stream_event(event_t, model_info, route_type) +local function handle_stream_event(event_t, model_info, route_type) -- discard empty frames, it should either be a random new line, or comment if (not event_t.data) or (#event_t.data < 1) then return end - + + if event_t.data == "[DONE]" then + return "[DONE]", nil, nil + end + local event, err = cjson.decode(event_t.data) if err then ngx.log(ngx.WARN, "failed to decode stream event frame from gemini: " .. err) return nil, "failed to decode stream event frame from gemini", nil end - local new_event - local metadata = nil - if is_response_content(event) then - new_event = { + local metadata = {} + metadata.finished_reason = event.candidates + and #event.candidates > 0 + and event.candidates[1].finishReason + or "STOP" + metadata.completion_tokens = event.usageMetadata and event.usageMetadata.candidatesTokenCount or 0 + metadata.prompt_tokens = event.usageMetadata and event.usageMetadata.promptTokenCount or 0 + + local new_event = { choices = { [1] = { delta = { @@ -75,28 +77,8 @@ local function handle_stream_event(event_t, model_info, route_type) }, }, } - end - if is_response_finished(event) then - metadata = metadata or {} - metadata.finished_reason = event.candidates[1].finishReason - new_event = "[DONE]" - end - - if event.usageMetadata then - metadata = metadata or {} - metadata.completion_tokens = event.usageMetadata.candidatesTokenCount or 0 - metadata.prompt_tokens = event.usageMetadata.promptTokenCount or 0 - end - - if new_event then - if new_event ~= "[DONE]" then - new_event = cjson.encode(new_event) - end - - return new_event, nil, metadata - else - return nil, nil, metadata -- caller code will handle "unrecognised" event types + return cjson.encode(new_event), nil, metadata end end @@ -206,6 +188,15 @@ local function from_gemini_chat_openai(response, model_info, route_type) messages.object = "chat.completion" messages.model = model_info.name + -- process analytics + if response.usageMetadata then + messages.usage = { + prompt_tokens = response.usageMetadata.promptTokenCount, + completion_tokens = response.usageMetadata.candidatesTokenCount, + total_tokens = response.usageMetadata.totalTokenCount, + } + end + else -- probably a server fault or other unexpected response local err = "no generation candidates received from Gemini, or max_tokens too short" ngx.log(ngx.ERR, err) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 2336ffbee0c9..6b3714ca72b0 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -19,6 +19,10 @@ local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy local function str_ltrim(s) -- remove leading whitespace from string. return type(s) == "string" and s:gsub("^%s*", "") end + +local function str_rtrim(s) -- remove trailing whitespace from string. + return type(s) == "string" and s:match('^(.*%S)%s*$') +end -- local log_entry_keys = { @@ -249,20 +253,31 @@ function _M.frame_to_events(frame, provider) -- some new LLMs return the JSON object-by-object, -- because that totally makes sense to parse?! if provider == "gemini" then + local done = false + -- if this is the first frame, it will begin with array opener '[' frame = (string.sub(str_ltrim(frame), 1, 1) == "[" and string.sub(str_ltrim(frame), 2)) or frame -- it may start with ',' which is the start of the new frame frame = (string.sub(str_ltrim(frame), 1, 1) == "," and string.sub(str_ltrim(frame), 2)) or frame - -- finally, it may end with the array terminator ']' indicating the finished stream - frame = (string.sub(str_ltrim(frame), -1) == "]" and string.sub(str_ltrim(frame), 1, -2)) or frame + -- it may end with the array terminator ']' indicating the finished stream + if string.sub(str_rtrim(frame), -1) == "]" then + frame = string.sub(str_rtrim(frame), 1, -2) + done = true + end -- for multiple events that arrive in the same frame, split by top-level comma for _, v in ipairs(split(frame, "\n,")) do events[#events+1] = { data = v } end + if done then + -- add the done signal here + -- but we have to retrieve the metadata from a previous filter run + events[#events+1] = { data = "[DONE]" } + end + elseif provider == "bedrock" then local parser = aws_stream:new(frame) while true do @@ -609,7 +624,7 @@ function _M.post_request(conf, response_object) end if response_object.usage.prompt_tokens and response_object.usage.completion_tokens - and conf.model.options.input_cost and conf.model.options.output_cost then + and conf.model.options and conf.model.options.input_cost and conf.model.options.output_cost then request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.COST] = (response_object.usage.prompt_tokens * conf.model.options.input_cost + response_object.usage.completion_tokens * conf.model.options.output_cost) / 1000000 -- 1 million diff --git a/kong/llm/init.lua b/kong/llm/init.lua index 5a3454d293db..266f5e355a5c 100644 --- a/kong/llm/init.lua +++ b/kong/llm/init.lua @@ -137,9 +137,6 @@ do } end - -- needed for some drivers later - self.conf.model.source = "transformer-plugins" - -- convert it to the specified driver format ai_request, _, err = self.driver.to_format(ai_request, self.conf.model, "llm/v1/chat") if err then diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index da3cb4f77ab8..fccb66545f8a 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -5,7 +5,6 @@ local kong_utils = require("kong.tools.gzip") local kong_meta = require("kong.meta") local buffer = require "string.buffer" local strip = require("kong.tools.utils").strip -local to_hex = require("resty.string").to_hex -- cloud auth/sdk providers local GCP_SERVICE_ACCOUNT do @@ -230,14 +229,21 @@ local function handle_streaming_frame(conf) end if conf.logging and conf.logging.log_statistics and metadata then - kong_ctx_plugin.ai_stream_completion_tokens = - (kong_ctx_plugin.ai_stream_completion_tokens or 0) + - (metadata.completion_tokens or 0) - or kong_ctx_plugin.ai_stream_completion_tokens - kong_ctx_plugin.ai_stream_prompt_tokens = - (kong_ctx_plugin.ai_stream_prompt_tokens or 0) + - (metadata.prompt_tokens or 0) - or kong_ctx_plugin.ai_stream_prompt_tokens + -- gemini metadata specifically, works differently + if conf.model.provider == "gemini" then + print(metadata.completion_tokens) + kong_ctx_plugin.ai_stream_completion_tokens = metadata.completion_tokens or 0 + kong_ctx_plugin.ai_stream_prompt_tokens = metadata.prompt_tokens or 0 + else + kong_ctx_plugin.ai_stream_completion_tokens = + (kong_ctx_plugin.ai_stream_completion_tokens or 0) + + (metadata.completion_tokens or 0) + or kong_ctx_plugin.ai_stream_completion_tokens + kong_ctx_plugin.ai_stream_prompt_tokens = + (kong_ctx_plugin.ai_stream_prompt_tokens or 0) + + (metadata.prompt_tokens or 0) + or kong_ctx_plugin.ai_stream_prompt_tokens + end end end end diff --git a/kong/tools/aws_stream.lua b/kong/tools/aws_stream.lua index cee1c9ed4be0..ebefc2c26566 100644 --- a/kong/tools/aws_stream.lua +++ b/kong/tools/aws_stream.lua @@ -98,7 +98,7 @@ function Stream:next_int(size) return nil, nil, "cannot work on integers smaller than 8 bits long" end - local int, err = self:next_bytes(size / 8, trim) + local int, err = self:next_bytes(size / 8) if err then return nil, nil, err end @@ -123,22 +123,20 @@ function Stream:next_message() -- this is a chicken and egg problem, because we need to -- read the message to get the length, to then re-read the -- whole message at correct offset - local msg_len, orig_len, err = self:next_int(32) + local msg_len, _, err = self:next_int(32) if err then return err end -- get the headers length - local headers_len, orig_headers_len, err = self:next_int(32) + local headers_len, _, err = self:next_int(32) + if err then + return err + end -- get the preamble checksum - local preamble_checksum, orig_preamble_checksum, err = self:next_int(32) - - -- TODO: calculate checksum - -- local result = crc32(orig_len .. origin_headers_len, preamble_checksum) - -- if not result then - -- return nil, "preamble checksum failed - message is corrupted" - -- end + -- skip it because we're not using UDP + self:next_int(32) -- pull the headers from the buf local headers = {} @@ -167,12 +165,9 @@ function Stream:next_message() local body = self:next_utf_8(msg_len - self.read_count - 4) -- last 4 bytes is a body checksum - local msg_checksum = self:next_int(32) - -- TODO CHECK FULL MESSAGE CHECKSUM - -- local result = crc32(original_full_msg, msg_checksum) - -- if not result then - -- return nil, "preamble checksum failed - message is corrupted" - -- end + -- skip it because we're not using UDP + self:next_int(32) + -- rewind the tape self.read_count = 0 diff --git a/spec/fixtures/ai-proxy/unit/real-responses/gemini/llm-v1-chat.json b/spec/fixtures/ai-proxy/unit/real-responses/gemini/llm-v1-chat.json index 80781b6eb72a..96933d9835e6 100644 --- a/spec/fixtures/ai-proxy/unit/real-responses/gemini/llm-v1-chat.json +++ b/spec/fixtures/ai-proxy/unit/real-responses/gemini/llm-v1-chat.json @@ -1,34 +1,39 @@ { - "candidates": [ - { - "content": { - "parts": [ - { - "text": "Ah, vous voulez savoir le double de ce résultat ? Eh bien, le double de 2 est **4**. \n" - } - ], - "role": "model" - }, - "finishReason": "STOP", - "index": 0, - "safetyRatings": [ - { - "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "probability": "NEGLIGIBLE" - }, - { - "category": "HARM_CATEGORY_HATE_SPEECH", - "probability": "NEGLIGIBLE" - }, - { - "category": "HARM_CATEGORY_HARASSMENT", - "probability": "NEGLIGIBLE" - }, + "candidates": [ + { + "content": { + "parts": [ { - "category": "HARM_CATEGORY_DANGEROUS_CONTENT", - "probability": "NEGLIGIBLE" + "text": "Ah, vous voulez savoir le double de ce résultat ? Eh bien, le double de 2 est **4**. \n" } - ] - } - ] - } \ No newline at end of file + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0, + "safetyRatings": [ + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "probability": "NEGLIGIBLE" + }, + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "probability": "NEGLIGIBLE" + }, + { + "category": "HARM_CATEGORY_HARASSMENT", + "probability": "NEGLIGIBLE" + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "probability": "NEGLIGIBLE" + } + ] + } + ], + "usageMetadata": { + "promptTokenCount": 14, + "candidatesTokenCount": 128, + "totalTokenCount": 142 + } +} diff --git a/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/expected-output.json b/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/expected-output.json index ba6a64384d95..f35aaf6f9dba 100644 --- a/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/expected-output.json +++ b/spec/fixtures/ai-proxy/unit/streaming-chunk-formats/partial-json-end/expected-output.json @@ -4,5 +4,8 @@ }, { "data": "\n{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": [\n {\n \"text\": \" not a limit.\\n\\nIf you're interested in learning more about relativity, I encourage you to explore further resources online or in books. There are many excellent introductory materials available. \\n\"\n }\n ],\n \"role\": \"model\"\n },\n \"finishReason\": \"STOP\",\n \"index\": 0,\n \"safetyRatings\": [\n {\n \"category\": \"HARM_CATEGORY_SEXUALLY_EXPLICIT\",\n \"probability\": \"NEGLIGIBLE\"\n },\n {\n \"category\": \"HARM_CATEGORY_HATE_SPEECH\",\n \"probability\": \"NEGLIGIBLE\"\n },\n {\n \"category\": \"HARM_CATEGORY_HARASSMENT\",\n \"probability\": \"NEGLIGIBLE\"\n },\n {\n \"category\": \"HARM_CATEGORY_DANGEROUS_CONTENT\",\n \"probability\": \"NEGLIGIBLE\"\n }\n ]\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": 6,\n \"candidatesTokenCount\": 547,\n \"totalTokenCount\": 553\n }\n}\n" + }, + { + "data": "[DONE]" } ] \ No newline at end of file From 154d31d521905f768d7736c5d3242c3f0eb157c3 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Thu, 25 Jul 2024 14:23:43 +0100 Subject: [PATCH 17/24] fix(ai-proxy): bedrock: cluster compat --- kong/clustering/compat/checkers.lua | 36 ++++++----- kong/clustering/compat/removed_fields.lua | 9 +++ .../09-hybrid_mode/09-config-compat_spec.lua | 62 ++++++++++++++++++- 3 files changed, 89 insertions(+), 18 deletions(-) diff --git a/kong/clustering/compat/checkers.lua b/kong/clustering/compat/checkers.lua index 7128b0f79078..b644e7182ce6 100644 --- a/kong/clustering/compat/checkers.lua +++ b/kong/clustering/compat/checkers.lua @@ -40,37 +40,43 @@ local compatible_checkers = { if plugin.name == 'ai-proxy' then local config = plugin.config - if config.model.provider == "gemini" then + if config.model.provider == "gemini" or config.model.provider == "bedrock" then + log_warn_message('configures ' .. plugin.name .. ' plugin with' .. + ' "openai preserve mode", because ' .. config.model.provider .. ' provider ' .. + ' is not supported in this release', + dp_version, log_suffix) + config.model.provider = "openai" config.route_type = "preserve" - log_warn_message('configures ' .. plugin.name .. ' plugin with' .. - ' "openai preserve mode", because gemini' .. - ' provider is not supported in this release', - dp_version, log_suffix) + has_update = true end end if plugin.name == 'ai-request-transformer' then local config = plugin.config - if config.llm.model.provider == "gemini" then - config.llm.model.provider = "openai" + if config.llm.model.provider == "gemini" or config.llm.model.provider == "bedrock" then log_warn_message('configures ' .. plugin.name .. ' plugin with' .. - ' "openai preserve mode", because gemini' .. - ' provider is not supported in this release', - dp_version, log_suffix) + ' "openai preserve mode", because ' .. config.llm.model.provider .. ' provider ' .. + ' is not supported in this release', + dp_version, log_suffix) + + config.llm.model.provider = "openai" + has_update = true end end if plugin.name == 'ai-response-transformer' then local config = plugin.config - if config.llm.model.provider == "gemini" then - config.llm.model.provider = "openai" + if config.llm.model.provider == "gemini" or config.llm.model.provider == "bedrock" then log_warn_message('configures ' .. plugin.name .. ' plugin with' .. - ' "openai preserve mode", because gemini' .. - ' provider is not supported in this release', - dp_version, log_suffix) + ' "openai preserve mode", because ' .. config.llm.model.provider .. ' provider ' .. + ' is not supported in this release', + dp_version, log_suffix) + + config.llm.model.provider = "openai" + has_update = true end end diff --git a/kong/clustering/compat/removed_fields.lua b/kong/clustering/compat/removed_fields.lua index f98965036f5d..ade547ae02d6 100644 --- a/kong/clustering/compat/removed_fields.lua +++ b/kong/clustering/compat/removed_fields.lua @@ -172,6 +172,9 @@ return { "model.options.gemini", "auth.gcp_use_service_account", "auth.gcp_service_account_json", + "model.options.bedrock", + "auth.aws_access_key_id", + "auth.aws_secret_access_key", }, ai_prompt_decorator = { "max_request_body_size", @@ -188,12 +191,18 @@ return { "llm.model.options.gemini", "llm.auth.gcp_use_service_account", "llm.auth.gcp_service_account_json", + "llm.model.options.bedrock", + "llm.auth.aws_access_key_id", + "llm.auth.aws_secret_access_key", }, ai_response_transformer = { "max_request_body_size", "llm.model.options.gemini", "llm.auth.gcp_use_service_account", "llm.auth.gcp_service_account_json", + "llm.model.options.bedrock", + "llm.auth.aws_access_key_id", + "llm.auth.aws_secret_access_key", }, prometheus = { "ai_metrics", diff --git a/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua b/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua index 955f1d73681a..a6844b92e493 100644 --- a/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua +++ b/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua @@ -482,7 +482,7 @@ describe("CP/DP config compat transformations #" .. strategy, function() end) describe("ai plugins supported providers", function() - it("[ai-proxy] tries to use unsupported gemini on older Kong versions", function() + it("[ai-proxy] tries to use unsupported providers on older Kong versions", function() -- [[ 3.8.x ]] -- local ai_proxy = admin.plugins:insert { name = "ai-proxy", @@ -516,10 +516,20 @@ describe("CP/DP config compat transformations #" .. strategy, function() local expected = cycle_aware_deep_copy(ai_proxy) + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.auth.gcp_service_account_json = nil expected.config.auth.gcp_use_service_account = nil expected.config.model.options.gemini = nil + + -- bedrock fields + expected.config.auth.aws_access_key_id = nil + expected.config.auth.aws_secret_access_key = nil + expected.config.model.options.bedrock = nil + + -- 'ai fallback' field sets expected.config.route_type = "preserve" expected.config.model.provider = "openai" @@ -535,7 +545,7 @@ describe("CP/DP config compat transformations #" .. strategy, function() admin.plugins:remove({ id = ai_proxy.id }) end) - it("[ai-request-transformer] tries to use unsupported gemini on older Kong versions", function() + it("[ai-request-transformer] tries to use unsupported providers on older Kong versions", function() -- [[ 3.8.x ]] -- local ai_request_transformer = admin.plugins:insert { name = "ai-request-transformer", @@ -571,10 +581,20 @@ describe("CP/DP config compat transformations #" .. strategy, function() local expected = cycle_aware_deep_copy(ai_request_transformer) + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.llm.auth.gcp_service_account_json = nil expected.config.llm.auth.gcp_use_service_account = nil expected.config.llm.model.options.gemini = nil + + -- bedrock fields + expected.config.llm.auth.aws_access_key_id = nil + expected.config.llm.auth.aws_secret_access_key = nil + expected.config.llm.model.options.bedrock = nil + + -- 'ai fallback' field sets expected.config.llm.model.provider = "openai" do_assert(uuid(), "3.7.0", expected) @@ -588,7 +608,7 @@ describe("CP/DP config compat transformations #" .. strategy, function() admin.plugins:remove({ id = ai_request_transformer.id }) end) - it("[ai-response-transformer] tries to use unsupported gemini on older Kong versions", function() + it("[ai-response-transformer] tries to use unsupported providers on older Kong versions", function() -- [[ 3.8.x ]] -- local ai_response_transformer = admin.plugins:insert { name = "ai-response-transformer", @@ -624,10 +644,20 @@ describe("CP/DP config compat transformations #" .. strategy, function() local expected = cycle_aware_deep_copy(ai_response_transformer) + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.llm.auth.gcp_service_account_json = nil expected.config.llm.auth.gcp_use_service_account = nil expected.config.llm.model.options.gemini = nil + + -- bedrock fields + expected.config.llm.auth.aws_access_key_id = nil + expected.config.llm.auth.aws_secret_access_key = nil + expected.config.llm.model.options.bedrock = nil + + -- 'ai fallback' field sets expected.config.llm.model.provider = "openai" do_assert(uuid(), "3.7.0", expected) @@ -671,11 +701,19 @@ describe("CP/DP config compat transformations #" .. strategy, function() local expected = cycle_aware_deep_copy(ai_proxy) + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.auth.gcp_service_account_json = nil expected.config.auth.gcp_use_service_account = nil expected.config.model.options.gemini = nil + -- bedrock fields + expected.config.auth.aws_access_key_id = nil + expected.config.auth.aws_secret_access_key = nil + expected.config.model.options.bedrock = nil + do_assert(uuid(), "3.7.0", expected) expected.config.response_streaming = nil @@ -720,11 +758,20 @@ describe("CP/DP config compat transformations #" .. strategy, function() -- ]] local expected = cycle_aware_deep_copy(ai_request_transformer) + + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.llm.auth.gcp_service_account_json = nil expected.config.llm.auth.gcp_use_service_account = nil expected.config.llm.model.options.gemini = nil + -- bedrock fields + expected.config.llm.auth.aws_access_key_id = nil + expected.config.llm.auth.aws_secret_access_key = nil + expected.config.llm.model.options.bedrock = nil + do_assert(uuid(), "3.7.0", expected) expected.config.llm.model.options.upstream_path = nil @@ -765,11 +812,20 @@ describe("CP/DP config compat transformations #" .. strategy, function() --]] local expected = cycle_aware_deep_copy(ai_response_transformer) + + -- max body size expected.config.max_request_body_size = nil + + -- gemini fields expected.config.llm.auth.gcp_service_account_json = nil expected.config.llm.auth.gcp_use_service_account = nil expected.config.llm.model.options.gemini = nil + -- bedrock fields + expected.config.llm.auth.aws_access_key_id = nil + expected.config.llm.auth.aws_secret_access_key = nil + expected.config.llm.model.options.bedrock = nil + do_assert(uuid(), "3.7.0", expected) expected.config.llm.model.options.upstream_path = nil From 5e004267e02546d8d2571c760179775e4ddc05e9 Mon Sep 17 00:00:00 2001 From: Jack Tysoe <91137069+tysoekong@users.noreply.github.com> Date: Thu, 25 Jul 2024 14:24:52 +0100 Subject: [PATCH 18/24] Update bedrock.lua --- kong/llm/drivers/bedrock.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index 21690fa32f54..df7483b7ac01 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -365,7 +365,7 @@ function _M.post_request(conf) end function _M.pre_request(conf, body) - -- disable gzip for bedrock because it breaks streaming + -- force gzip for bedrock because brotli and others break streaming kong.service.request.set_header("Accept-Encoding", "gzip, identity") return true, nil @@ -431,4 +431,4 @@ function _M.configure_request(conf, aws_sdk) return true end -return _M \ No newline at end of file +return _M From da4fcabe913d1dbe2dbaacd2263c0243bbf5d2a6 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Thu, 25 Jul 2024 20:01:20 +0100 Subject: [PATCH 19/24] fix(clustering): faster comparisons for unsupported AI providers --- kong/clustering/compat/checkers.lua | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/kong/clustering/compat/checkers.lua b/kong/clustering/compat/checkers.lua index b644e7182ce6..06cea810a873 100644 --- a/kong/clustering/compat/checkers.lua +++ b/kong/clustering/compat/checkers.lua @@ -19,8 +19,24 @@ do KONG_VERSION, hint, dp_version, action) ngx_log(ngx_WARN, _log_prefix, msg, log_suffix) end -end + _AI_PROVIDERS_ADDED = { + [3008000000] = { + "gemini", + "bedrock", + }, + } + + _AI_PROVIDER_INCOMPATIBLE = function(provider, ver) + for _, v in ipairs(_AI_PROVIDERS_ADDED[ver]) do + if v == provider then + return true + end + + return false + end + end +end local compatible_checkers = { { 3008000000, --[[ 3.8.0.0 ]] @@ -40,7 +56,7 @@ local compatible_checkers = { if plugin.name == 'ai-proxy' then local config = plugin.config - if config.model.provider == "gemini" or config.model.provider == "bedrock" then + if _AI_PROVIDER_INCOMPATIBLE(config.model.provider, 3008000000) then log_warn_message('configures ' .. plugin.name .. ' plugin with' .. ' "openai preserve mode", because ' .. config.model.provider .. ' provider ' .. ' is not supported in this release', @@ -55,7 +71,7 @@ local compatible_checkers = { if plugin.name == 'ai-request-transformer' then local config = plugin.config - if config.llm.model.provider == "gemini" or config.llm.model.provider == "bedrock" then + if _AI_PROVIDER_INCOMPATIBLE(config.llm.model.provider, 3008000000) then log_warn_message('configures ' .. plugin.name .. ' plugin with' .. ' "openai preserve mode", because ' .. config.llm.model.provider .. ' provider ' .. ' is not supported in this release', @@ -69,7 +85,7 @@ local compatible_checkers = { if plugin.name == 'ai-response-transformer' then local config = plugin.config - if config.llm.model.provider == "gemini" or config.llm.model.provider == "bedrock" then + if _AI_PROVIDER_INCOMPATIBLE(config.llm.model.provider, 3008000000) then log_warn_message('configures ' .. plugin.name .. ' plugin with' .. ' "openai preserve mode", because ' .. config.llm.model.provider .. ' provider ' .. ' is not supported in this release', From 682684200aad614853816f754afdc3ad2e5a113d Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Thu, 25 Jul 2024 20:12:01 +0100 Subject: [PATCH 20/24] fix(clustering): faster comparisons for unsupported AI providers --- kong/clustering/compat/checkers.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kong/clustering/compat/checkers.lua b/kong/clustering/compat/checkers.lua index 06cea810a873..55dcbbc2bd4a 100644 --- a/kong/clustering/compat/checkers.lua +++ b/kong/clustering/compat/checkers.lua @@ -2,7 +2,7 @@ local ipairs = ipairs local type = type -local log_warn_message +local log_warn_message, _AI_PROVIDER_INCOMPATIBLE do local ngx_log = ngx.log local ngx_WARN = ngx.WARN @@ -20,7 +20,7 @@ do ngx_log(ngx_WARN, _log_prefix, msg, log_suffix) end - _AI_PROVIDERS_ADDED = { + local _AI_PROVIDERS_ADDED = { [3008000000] = { "gemini", "bedrock", @@ -32,9 +32,9 @@ do if v == provider then return true end - - return false end + + return false end end From dcd1d21ec2414cf5ed27c3ca25a978d36280d95f Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Fri, 26 Jul 2024 10:21:12 +0100 Subject: [PATCH 21/24] fix(ai-proxy): bedrock: refactor auth chain --- kong/llm/drivers/bedrock.lua | 6 +++--- kong/plugins/ai-proxy/handler.lua | 34 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index df7483b7ac01..bf477c2a911e 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -415,11 +415,11 @@ function _M.configure_request(conf, aws_sdk) body = kong.request.get_raw_body() } - local signature = signer(aws_sdk.config, r) - + local signature, err = signer(aws_sdk.config, r) if not signature then - return nil, "failed to sign AWS request" + return nil, "failed to sign AWS request: " .. (err or "NONE") end + kong.service.request.set_header("Authorization", signature.headers["Authorization"]) if signature.headers["X-Amz-Security-Token"] then kong.service.request.set_header("X-Amz-Security-Token", signature.headers["X-Amz-Security-Token"]) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index fccb66545f8a..c78133d4ba35 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -10,18 +10,13 @@ local strip = require("kong.tools.utils").strip local GCP_SERVICE_ACCOUNT do GCP_SERVICE_ACCOUNT = os.getenv("GCP_SERVICE_ACCOUNT") end -local AWS_REGION do - AWS_REGION = os.getenv("AWS_REGION") -end -local AWS_ACCESS_KEY_ID do - AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") -end -local AWS_SECRET_ACCESS_KEY do - AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") -end local GCP = require("resty.gcp.request.credentials.accesstoken") +local aws_config = require "resty.aws.config" -- reads environment variables whilst available local AWS = require("resty.aws") +local AWS_REGION do + AWS_REGION = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") +end -- @@ -68,20 +63,27 @@ local _KEYBASTION = setmetatable({}, { and plugin_config.model.options.bedrock.aws_region or AWS_REGION - local access_key = (plugin_config.auth and plugin_config.auth.aws_access_key_id) - or AWS_ACCESS_KEY_ID + if not region then + return { interface = nil, error = "AWS region not specified anywhere" } + end - local secret_key = (plugin_config.auth and plugin_config.auth.aws_secret_access_key) - or AWS_SECRET_ACCESS_KEY + local access_key_set = plugin_config.auth and plugin_config.auth.aws_access_key_id + local secret_key_set = plugin_config.auth and plugin_config.auth.aws_secret_access_key - if access_key and secret_key then + if access_key_set and secret_key_set then aws = AWS({ -- if any of these are nil, they either use the SDK default or -- are deliberately null so that a different auth chain is used region = region, - aws_access_key_id = access_key, - aws_secret_access_key = secret_key, }) + + -- Override credential config according to plugin config, if set + local creds = aws:Credentials { + accessKeyId = access_key_set, + secretAccessKey = secret_key_set, + } + + aws.config.credentials = creds else aws = AWS({ region = region, From 8d4ded7c418102d0e58589ab1a6f7638a1f520fd Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Fri, 26 Jul 2024 10:32:04 +0100 Subject: [PATCH 22/24] lint --- kong/plugins/ai-proxy/handler.lua | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index c78133d4ba35..d206a834ef24 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -67,8 +67,10 @@ local _KEYBASTION = setmetatable({}, { return { interface = nil, error = "AWS region not specified anywhere" } end - local access_key_set = plugin_config.auth and plugin_config.auth.aws_access_key_id + local access_key_set = (plugin_config.auth and plugin_config.auth.aws_access_key_id) + or aws_config.global.AWS_ACCESS_KEY_ID local secret_key_set = plugin_config.auth and plugin_config.auth.aws_secret_access_key + or aws_config.global.AWS_SECRET_ACCESS_KEY if access_key_set and secret_key_set then aws = AWS({ From 334df7f2abcb1c9b225c7de738582faa3e178d34 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Fri, 26 Jul 2024 17:17:33 +0100 Subject: [PATCH 23/24] fix(ai-proxy): (bedrock, gemini): comments rollup --- kong/llm/drivers/anthropic.lua | 2 +- kong/llm/drivers/bedrock.lua | 20 ++++++++++++++------ kong/llm/drivers/cohere.lua | 4 ++-- kong/llm/drivers/gemini.lua | 4 ++-- kong/llm/drivers/shared.lua | 16 ++++++---------- kong/llm/init.lua | 2 +- kong/llm/schemas/init.lua | 10 ++++++---- kong/plugins/ai-proxy/handler.lua | 6 +++--- 8 files changed, 35 insertions(+), 29 deletions(-) diff --git a/kong/llm/drivers/anthropic.lua b/kong/llm/drivers/anthropic.lua index fcc6419d33b8..77c9f363f9b6 100644 --- a/kong/llm/drivers/anthropic.lua +++ b/kong/llm/drivers/anthropic.lua @@ -225,7 +225,7 @@ local function handle_stream_event(event_t, model_info, route_type) return delta_to_event(event_data, model_info) elseif event_id == "message_stop" then - return "[DONE]", nil, nil + return ai_shared._CONST.SSE_TERMINATOR, nil, nil elseif event_id == "ping" then return nil, nil, nil diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index bf477c2a911e..372a57fa8276 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -21,6 +21,14 @@ local _OPENAI_ROLE_MAPPING = { ["assistant"] = "assistant", } +_M.bedrock_unsupported_system_role_patterns = { + "amazon.titan.-.*", + "cohere.command.-text.-.*", + "cohere.command.-light.-text.-.*", + "mistral.mistral.-7b.-instruct.-.*", + "mistral.mixtral.-8x7b.-instruct.-.*", +} + local function to_bedrock_generation_config(request_table) return { ["maxTokens"] = request_table.max_tokens, @@ -118,13 +126,13 @@ local function handle_stream_event(event_t, model_info, route_type) completion_tokens = body.usage and body.usage.outputTokens or 0, } - new_event = "[DONE]" + new_event = ai_shared._CONST.SSE_TERMINATOR -- "contentBlockStop" is absent because it is not used for anything here end if new_event then - if new_event ~= "[DONE]" then + if new_event ~= ai_shared._CONST.SSE_TERMINATOR then new_event = cjson.encode(new_event) end @@ -137,7 +145,7 @@ end local function to_bedrock_chat_openai(request_table, model_info, route_type) if not request_table then -- try-catch type mechanism local err = "empty request table received for transformation" - ngx.log(ngx.ERR, err) + ngx.log(ngx.ERR, "[bedrock] ", err) return nil, nil, err end @@ -171,7 +179,7 @@ local function to_bedrock_chat_openai(request_table, model_info, route_type) -- only works for some models if #system_prompts > 0 then - for _, p in ipairs(ai_shared.bedrock_unsupported_system_role_patterns) do + for _, p in ipairs(_M.bedrock_unsupported_system_role_patterns) do if model_info.name:find(p) then return nil, nil, "system prompts are unsupported for model '" .. model_info.name end @@ -199,7 +207,7 @@ local function from_bedrock_chat_openai(response, model_info, route_type) if err then local err_client = "failed to decode response from Bedrock" - ngx.log(ngx.ERR, fmt("%s: %s", err_client, err)) + ngx.log(ngx.ERR, fmt("[bedrock] %s: %s", err_client, err)) return nil, err_client end @@ -226,7 +234,7 @@ local function from_bedrock_chat_openai(response, model_info, route_type) else -- probably a server fault or other unexpected response local err = "no generation candidates received from Bedrock, or max_tokens too short" - ngx.log(ngx.ERR, err) + ngx.log(ngx.ERR, "[bedrock] ", err) return nil, err end diff --git a/kong/llm/drivers/cohere.lua b/kong/llm/drivers/cohere.lua index b96cbbbc2d46..1aafc9405b0c 100644 --- a/kong/llm/drivers/cohere.lua +++ b/kong/llm/drivers/cohere.lua @@ -97,7 +97,7 @@ local function handle_stream_event(event_t, model_info, route_type) elseif event.event_type == "stream-end" then -- return a metadata object, with the OpenAI termination event - new_event = "[DONE]" + new_event = ai_shared._CONST.SSE_TERMINATOR metadata = { completion_tokens = event.response @@ -123,7 +123,7 @@ local function handle_stream_event(event_t, model_info, route_type) end if new_event then - if new_event ~= "[DONE]" then + if new_event ~= ai_shared._CONST.SSE_TERMINATOR then new_event = cjson.encode(new_event) end diff --git a/kong/llm/drivers/gemini.lua b/kong/llm/drivers/gemini.lua index f76488dcb19c..57ca7127ef29 100644 --- a/kong/llm/drivers/gemini.lua +++ b/kong/llm/drivers/gemini.lua @@ -47,8 +47,8 @@ local function handle_stream_event(event_t, model_info, route_type) return end - if event_t.data == "[DONE]" then - return "[DONE]", nil, nil + if event_t.data == ai_shared._CONST.SSE_TERMINATOR then + return ai_shared._CONST.SSE_TERMINATOR, nil, nil end local event, err = cjson.decode(event_t.data) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 6b3714ca72b0..6f9341884f25 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -56,6 +56,10 @@ local log_entry_keys = { local openai_override = os.getenv("OPENAI_TEST_PORT") +_M._CONST = { + ["SSE_TERMINATOR"] = "[DONE]", +} + _M.streaming_has_token_counts = { ["cohere"] = true, ["llama2"] = true, @@ -64,14 +68,6 @@ _M.streaming_has_token_counts = { ["bedrock"] = true, } -_M.bedrock_unsupported_system_role_patterns = { - "amazon.titan.-.*", - "cohere.command.-text.-.*", - "cohere.command.-light.-text.-.*", - "mistral.mistral.-7b.-instruct.-.*", - "mistral.mixtral.-8x7b.-instruct.-.*", -} - _M.upstream_url_format = { openai = fmt("%s://api.openai.com:%s", (openai_override and "http") or "https", (openai_override) or "443"), anthropic = "https://api.anthropic.com:443", @@ -275,7 +271,7 @@ function _M.frame_to_events(frame, provider) if done then -- add the done signal here -- but we have to retrieve the metadata from a previous filter run - events[#events+1] = { data = "[DONE]" } + events[#events+1] = { data = _M._CONST.SSE_TERMINATOR } end elseif provider == "bedrock" then @@ -448,7 +444,7 @@ function _M.from_ollama(response_string, model_info, route_type) end end - if output and output ~= "[DONE]" then + if output and output ~= _M._CONST.SSE_TERMINATOR then output, err = cjson.encode(output) end diff --git a/kong/llm/init.lua b/kong/llm/init.lua index 266f5e355a5c..b4b7bba5ae7a 100644 --- a/kong/llm/init.lua +++ b/kong/llm/init.lua @@ -96,7 +96,7 @@ do -- mistral, cohere, titan (via Bedrock) don't support system commands if self.driver == "bedrock" then - for _, p in ipairs(ai_shared.bedrock_unsupported_system_role_patterns) do + for _, p in ipairs(self.driver.bedrock_unsupported_system_role_patterns) do if request.model:find(p) then ai_request = { messages = { diff --git a/kong/llm/schemas/init.lua b/kong/llm/schemas/init.lua index b9f7c40cb488..c975c49c26f0 100644 --- a/kong/llm/schemas/init.lua +++ b/kong/llm/schemas/init.lua @@ -83,15 +83,17 @@ local auth_schema = { referenceable = true }}, { aws_access_key_id = { type = "string", - description = "Set this if you are using an AWS provider (Bedrock, SageMaker) and you are authenticating " .. - "using static IAM User credentials.", + description = "Set this if you are using an AWS provider (Bedrock) and you are authenticating " .. + "using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID " .. + "environment variable for this plugin instance.", required = false, encrypted = true, referenceable = true }}, { aws_secret_access_key = { type = "string", - description = "Set this if you are using an AWS provider (Bedrock, SageMaker) and you are authenticating " .. - "using static IAM User credentials.", + description = "Set this if you are using an AWS provider (Bedrock) and you are authenticating " .. + "using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY " .. + "environment variable for this plugin instance.", required = false, encrypted = true, referenceable = true }}, diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index d206a834ef24..7a09a1ea1c1f 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -188,7 +188,7 @@ local function handle_streaming_frame(conf) local err if formatted then -- only stream relevant frames back to the user - if conf.logging and conf.logging.log_payloads and (formatted ~= "[DONE]") then + if conf.logging and conf.logging.log_payloads and (formatted ~= ai_shared._CONST.SSE_TERMINATOR) then -- append the "choice" to the buffer, for logging later. this actually works! if not event_t then event_t, err = cjson.decode(formatted) @@ -206,7 +206,7 @@ local function handle_streaming_frame(conf) -- handle event telemetry if conf.logging and conf.logging.log_statistics then if not ai_shared.streaming_has_token_counts[conf.model.provider] then - if formatted ~= "[DONE]" then + if formatted ~= ai_shared._CONST.SSE_TERMINATOR then if not event_t then event_t, err = cjson.decode(formatted) end @@ -229,7 +229,7 @@ local function handle_streaming_frame(conf) framebuffer:put("data: ") framebuffer:put(formatted or "") - framebuffer:put((formatted ~= "[DONE]") and "\n\n" or "") + framebuffer:put((formatted ~= ai_shared._CONST.SSE_TERMINATOR) and "\n\n" or "") end if conf.logging and conf.logging.log_statistics and metadata then From 8184ee0faaee6adf6ff71de0f8f5ce88fbfa326e Mon Sep 17 00:00:00 2001 From: Jack Tysoe <91137069+tysoekong@users.noreply.github.com> Date: Mon, 29 Jul 2024 15:33:59 +0100 Subject: [PATCH 24/24] Update kong/plugins/ai-proxy/handler.lua Co-authored-by: Keery Nie --- kong/plugins/ai-proxy/handler.lua | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kong/plugins/ai-proxy/handler.lua b/kong/plugins/ai-proxy/handler.lua index 7a09a1ea1c1f..bc7288d30075 100644 --- a/kong/plugins/ai-proxy/handler.lua +++ b/kong/plugins/ai-proxy/handler.lua @@ -72,13 +72,13 @@ local _KEYBASTION = setmetatable({}, { local secret_key_set = plugin_config.auth and plugin_config.auth.aws_secret_access_key or aws_config.global.AWS_SECRET_ACCESS_KEY - if access_key_set and secret_key_set then - aws = AWS({ - -- if any of these are nil, they either use the SDK default or - -- are deliberately null so that a different auth chain is used - region = region, - }) + aws = AWS({ + -- if any of these are nil, they either use the SDK default or + -- are deliberately null so that a different auth chain is used + region = region, + }) + if access_key_set and secret_key_set then -- Override credential config according to plugin config, if set local creds = aws:Credentials { accessKeyId = access_key_set, @@ -86,10 +86,6 @@ local _KEYBASTION = setmetatable({}, { } aws.config.credentials = creds - else - aws = AWS({ - region = region, - }) end this_cache[plugin_config] = { interface = aws, error = nil }