ai-proxy e2e 支持豆包和通义千问兼容模式

alibaba · Jan 30, 2025 · b79fa88 · b79fa88
1 parent 586686c
commit b79fa88
Show file tree

Hide file tree

Showing 4 changed files with 187 additions and 5 deletions.
diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -130,10 +130,11 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：
 
 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下：
 
-| 名称                 | 数据类型            | 填写要求 | 默认值 | 描述                                                               |
-|--------------------|-----------------|------|-----|------------------------------------------------------------------|
-| `qwenEnableSearch` | boolean         | 非必填  | -   | 是否启用通义千问内置的互联网搜索功能。                          |
-| `qwenFileIds`      | array of string | 非必填  | -   | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                         |
+| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ |
+| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                       |
+| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。 |
 
 #### 百川智能 (Baichuan AI)
 

diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -106,6 +106,7 @@ For Qwen (Tongyi Qwen), the corresponding `type` is `qwen`. Its unique configura
 |--------------------|-----------------|----------------------|---------------|------------------------------------------------------------------------------------------------------------------------|
 | `qwenEnableSearch`  | boolean          | Optional             | -             | Whether to enable the built-in Internet search function provided by Qwen.                                             |
 | `qwenFileIds`       | array of string   | Optional             | -             | The file IDs uploaded via the Dashscope file interface, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. |
+| `qwenEnableCompatible` | boolean          | Optional | false         | Enable Qwen compatibility mode. When Qwen compatibility mode is enabled, the compatible mode interface of Qwen will be called, and the request/response will not be modified. |
 
 #### Baichuan AI
 

diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.go b/test/e2e/conformance/tests/go-wasm-ai-proxy.go
@@ -24,7 +24,7 @@ import (
 // The llm-mock service response has a fixed id of `chatcmpl-llm-mock`.
 // The created field is fixed to 10.
 // The response content is echoed back as the request content.
-// The usage field is fixed to {"prompt_tokens":9,"completion_tokens":1,"total_tokens":10} (specific values may vary based on the corresponding response fields).
+// The usage field is fixed to `{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}` (specific values may vary based on the corresponding response fields).
 
 func init() {
 	Register(WasmPluginsAiProxy)
@@ -37,6 +37,66 @@ var WasmPluginsAiProxy = suite.ConformanceTest{
 	Manifests:   []string{"tests/go-wasm-ai-proxy.yaml"},
 	Test: func(t *testing.T, suite *suite.ConformanceTestSuite) {
 		testcases := []http.Assertion{
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "doubao case 1: non-streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "ark.cn-beijing.volces.com",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":false}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好，你是谁？"},"finish_reason":"stop"}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`),
+					},
+				},
+			},
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "doubao case 2: streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "ark.cn-beijing.volces.com",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":true}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeTextEventStream,
+						Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"，"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"？"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}}
+
+data: [DONE]
+
+`),
+					},
+				},
+			},
 			{
 				Meta: http.AssertionMeta{
 					TestCaseName:  "minimax case 1: proxy completion V2 API, non-streaming request",
@@ -153,6 +213,66 @@ data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistan
 
 data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你好，你是谁？"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}
 
+`),
+					},
+				},
+			},
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "qwen case 1: compatible mode, non-streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "dashscope.aliyuncs.com-compatible-mode",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":false}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好，你是谁？"},"finish_reason":"stop"}],"created":10,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`),
+					},
+				},
+			},
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "qwen case 2: compatible mode, streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "dashscope.aliyuncs.com-compatible-mode",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":true}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeTextEventStream,
+						Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"，"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"？"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}}
+
+data: [DONE]
+
 `),
 					},
 				},

diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml
@@ -13,6 +13,25 @@
 # limitations under the License.
 apiVersion: networking.k8s.io/v1
 kind: Ingress
+metadata:
+  name: wasmplugin-ai-proxy-doubao
+  namespace: higress-conformance-ai-backend
+spec:
+  ingressClassName: higress
+  rules:
+    - host: "ark.cn-beijing.volces.com"
+      http:
+        paths:
+          - pathType: Prefix
+            path: "/"
+            backend:
+              service:
+                name: llm-mock-service
+                port:
+                  number: 3000
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
 metadata:
   name: wasmplugin-ai-proxy-minimax-v2-api
   namespace: higress-conformance-ai-backend
@@ -49,6 +68,25 @@ spec:
                 port:
                   number: 3000
 ---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: wasmplugin-ai-proxy-qwen-compatible-mode
+  namespace: higress-conformance-ai-backend
+spec:
+  ingressClassName: higress
+  rules:
+    - host: "dashscope.aliyuncs.com-compatible-mode"
+      http:
+        paths:
+          - pathType: Prefix
+            path: "/"
+            backend:
+              service:
+                name: llm-mock-service
+                port:
+                  number: 3000
+---
 apiVersion: extensions.higress.io/v1alpha1
 kind: WasmPlugin
 metadata:
@@ -59,6 +97,15 @@ spec:
   phase: UNSPECIFIED_PHASE
   priority: 100
   matchRules:
+    - config:
+        provider:
+          apiTokens:
+            - fake_token
+          modelMapping:
+            '*': fake_doubao_endpoint
+          type: doubao
+      ingress:
+        - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao
     - config:
         provider:
           apiTokens:
@@ -83,4 +130,17 @@ spec:
           minimaxGroupId: 1
       ingress:
         - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-pro-api
+    - config:
+        provider:
+          apiTokens:
+            - fake_token
+          modelMapping:
+            'gpt-3': qwen-turbo
+            'gpt-35-turbo': qwen-plus
+            'gpt-4-*': qwen-max
+            '*': qwen-turbo
+          type: qwen
+          qwenEnableCompatible: true
+      ingress:
+        - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen-compatible-mode
   url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0