From b79fa88ccec9db0af5bbff32a60be23195f362aa Mon Sep 17 00:00:00 2001 From: hanxiantao <601803023@qq.com> Date: Thu, 30 Jan 2025 09:48:01 +0800 Subject: [PATCH] =?UTF-8?q?ai-proxy=20e2e=20=E6=94=AF=E6=8C=81=E8=B1=86?= =?UTF-8?q?=E5=8C=85=E5=92=8C=E9=80=9A=E4=B9=89=E5=8D=83=E9=97=AE=E5=85=BC?= =?UTF-8?q?=E5=AE=B9=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/wasm-go/extensions/ai-proxy/README.md | 9 +- .../wasm-go/extensions/ai-proxy/README_EN.md | 1 + .../e2e/conformance/tests/go-wasm-ai-proxy.go | 122 +++++++++++++++++- .../conformance/tests/go-wasm-ai-proxy.yaml | 60 +++++++++ 4 files changed, 187 insertions(+), 5 deletions(-) diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index 8f2ae49a5d..f0af574922 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -130,10 +130,11 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下: 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|--------------------|-----------------|------|-----|------------------------------------------------------------------| -| `qwenEnableSearch` | boolean | 非必填 | - | 是否启用通义千问内置的互联网搜索功能。 | -| `qwenFileIds` | array of string | 非必填 | - | 通过文件接口上传至Dashscope的文件 ID,其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ | +| `qwenEnableSearch` | boolean | 非必填 | - | 是否启用通义千问内置的互联网搜索功能。 | +| `qwenFileIds` | array of string | 非必填 | - | 通过文件接口上传至Dashscope的文件 ID,其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 | +| `qwenEnableCompatible` | boolean | 非必填 | false | 开启通义千问兼容模式。启用通义千问兼容模式后,将调用千问的兼容模式接口,同时对请求/响应不做修改。 | #### 百川智能 (Baichuan AI) diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md index 4400e248d0..891503a4fc 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md +++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md @@ -106,6 +106,7 @@ For Qwen (Tongyi Qwen), the corresponding `type` is `qwen`. Its unique configura |--------------------|-----------------|----------------------|---------------|------------------------------------------------------------------------------------------------------------------------| | `qwenEnableSearch` | boolean | Optional | - | Whether to enable the built-in Internet search function provided by Qwen. | | `qwenFileIds` | array of string | Optional | - | The file IDs uploaded via the Dashscope file interface, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. | +| `qwenEnableCompatible` | boolean | Optional | false | Enable Qwen compatibility mode. When Qwen compatibility mode is enabled, the compatible mode interface of Qwen will be called, and the request/response will not be modified. | #### Baichuan AI diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.go b/test/e2e/conformance/tests/go-wasm-ai-proxy.go index 7baef22334..ebd590f12a 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.go +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.go @@ -24,7 +24,7 @@ import ( // The llm-mock service response has a fixed id of `chatcmpl-llm-mock`. // The created field is fixed to 10. // The response content is echoed back as the request content. -// The usage field is fixed to {"prompt_tokens":9,"completion_tokens":1,"total_tokens":10} (specific values may vary based on the corresponding response fields). +// The usage field is fixed to `{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}` (specific values may vary based on the corresponding response fields). func init() { Register(WasmPluginsAiProxy) @@ -37,6 +37,66 @@ var WasmPluginsAiProxy = suite.ConformanceTest{ Manifests: []string{"tests/go-wasm-ai-proxy.yaml"}, Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { testcases := []http.Assertion{ + { + Meta: http.AssertionMeta{ + TestCaseName: "doubao case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "ark.cn-beijing.volces.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "doubao case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "ark.cn-beijing.volces.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, { Meta: http.AssertionMeta{ TestCaseName: "minimax case 1: proxy completion V2 API, non-streaming request", @@ -153,6 +213,66 @@ data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistan data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}} +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "qwen case 1: compatible mode, non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "dashscope.aliyuncs.com-compatible-mode", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "qwen case 2: compatible mode, streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "dashscope.aliyuncs.com-compatible-mode", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + `), }, }, diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml index 7693ab3559..74e42d954c 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml @@ -13,6 +13,25 @@ # limitations under the License. apiVersion: networking.k8s.io/v1 kind: Ingress +metadata: + name: wasmplugin-ai-proxy-doubao + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "ark.cn-beijing.volces.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress metadata: name: wasmplugin-ai-proxy-minimax-v2-api namespace: higress-conformance-ai-backend @@ -49,6 +68,25 @@ spec: port: number: 3000 --- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-qwen-compatible-mode + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "dashscope.aliyuncs.com-compatible-mode" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin metadata: @@ -59,6 +97,15 @@ spec: phase: UNSPECIFIED_PHASE priority: 100 matchRules: + - config: + provider: + apiTokens: + - fake_token + modelMapping: + '*': fake_doubao_endpoint + type: doubao + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao - config: provider: apiTokens: @@ -83,4 +130,17 @@ spec: minimaxGroupId: 1 ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-pro-api + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': qwen-turbo + 'gpt-35-turbo': qwen-plus + 'gpt-4-*': qwen-max + '*': qwen-turbo + type: qwen + qwenEnableCompatible: true + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen-compatible-mode url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0