From 5f170cf7e7f30717e46cafdca9e3a41863a04b68 Mon Sep 17 00:00:00 2001
From: zijiren <84728412+zijiren233@users.noreply.github.com>
Date: Fri, 22 Nov 2024 18:15:33 +0800
Subject: [PATCH] feat: qwen vl image (#5226)

---
 service/aiproxy/relay/adaptor/ali/adaptor.go |  11 +-
 service/aiproxy/relay/adaptor/ali/main.go    | 161 +------------------
 service/aiproxy/relay/adaptor/ali/model.go   |  73 ---------
 3 files changed, 6 insertions(+), 239 deletions(-)

diff --git a/service/aiproxy/relay/adaptor/ali/adaptor.go b/service/aiproxy/relay/adaptor/ali/adaptor.go
index e5d72dbe95a..40de93dc11e 100644
--- a/service/aiproxy/relay/adaptor/ali/adaptor.go
+++ b/service/aiproxy/relay/adaptor/ali/adaptor.go
@@ -4,10 +4,10 @@ import (
 	"errors"
 	"io"
 	"net/http"
-	"strings"
 
 	"github.com/gin-gonic/gin"
 	"github.com/labring/sealos/service/aiproxy/relay/adaptor"
+	"github.com/labring/sealos/service/aiproxy/relay/adaptor/openai"
 	"github.com/labring/sealos/service/aiproxy/relay/meta"
 	"github.com/labring/sealos/service/aiproxy/relay/model"
 	"github.com/labring/sealos/service/aiproxy/relay/relaymode"
@@ -30,10 +30,7 @@ func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
 	case relaymode.ImagesGenerations:
 		return meta.BaseURL + "/api/v1/services/aigc/text2image/image-synthesis", nil
 	default:
-		if strings.HasPrefix(meta.ActualModelName, "qwen-vl") {
-			return meta.BaseURL + "/api/v1/services/aigc/multimodal-generation/generation", nil
-		}
-		return meta.BaseURL + "/api/v1/services/aigc/text-generation/generation", nil
+		return meta.BaseURL + "/compatible-mode/v1/chat/completions", nil
 	}
 }
 
@@ -91,7 +88,7 @@ func (a *Adaptor) ConvertTTSRequest(*model.TextToSpeechRequest) (any, error) {
 
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
 	if meta.IsStream {
-		err, usage = StreamHandler(c, resp)
+		err, _, usage = openai.StreamHandler(c, resp, meta.Mode)
 	} else {
 		switch meta.Mode {
 		case relaymode.Embeddings:
@@ -99,7 +96,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
 		case relaymode.ImagesGenerations:
 			err, usage = ImageHandler(c, resp, meta.APIKey)
 		default:
-			err, usage = Handler(c, resp)
+			err, usage = openai.Handler(c, resp, meta.PromptTokens, meta.ActualModelName)
 		}
 	}
 	return
diff --git a/service/aiproxy/relay/adaptor/ali/main.go b/service/aiproxy/relay/adaptor/ali/main.go
index be39baf7227..c9658ccca44 100644
--- a/service/aiproxy/relay/adaptor/ali/main.go
+++ b/service/aiproxy/relay/adaptor/ali/main.go
@@ -1,20 +1,13 @@
 package ali
 
 import (
-	"bufio"
 	"net/http"
-	"slices"
 	"strings"
 
 	json "github.com/json-iterator/go"
-	"github.com/labring/sealos/service/aiproxy/common/conv"
 	"github.com/labring/sealos/service/aiproxy/common/ctxkey"
-	"github.com/labring/sealos/service/aiproxy/common/render"
 
 	"github.com/gin-gonic/gin"
-	"github.com/labring/sealos/service/aiproxy/common"
-	"github.com/labring/sealos/service/aiproxy/common/helper"
-	"github.com/labring/sealos/service/aiproxy/common/logger"
 	"github.com/labring/sealos/service/aiproxy/relay/adaptor/openai"
 	"github.com/labring/sealos/service/aiproxy/relay/model"
 )
@@ -23,33 +16,11 @@ import (
 
 const EnableSearchModelSuffix = "-internet"
 
-func ConvertRequest(request *model.GeneralOpenAIRequest) *ChatRequest {
-	enableSearch := false
-	aliModel := request.Model
-	if strings.HasSuffix(aliModel, EnableSearchModelSuffix) {
-		enableSearch = true
-		aliModel = strings.TrimSuffix(aliModel, EnableSearchModelSuffix)
-	}
+func ConvertRequest(request *model.GeneralOpenAIRequest) *model.GeneralOpenAIRequest {
 	if request.TopP != nil && *request.TopP >= 1 {
 		*request.TopP = 0.9999
 	}
-	return &ChatRequest{
-		Model: aliModel,
-		Input: Input{
-			Messages: request.Messages,
-		},
-		Parameters: Parameters{
-			EnableSearch:      enableSearch,
-			IncrementalOutput: request.Stream,
-			Seed:              uint64(request.Seed),
-			MaxTokens:         request.MaxTokens,
-			Temperature:       request.Temperature,
-			TopP:              request.TopP,
-			TopK:              request.TopK,
-			ResultFormat:      "message",
-			Tools:             request.Tools,
-		},
-	}
+	return request
 }
 
 func ConvertEmbeddingRequest(request *model.GeneralOpenAIRequest) *EmbeddingRequest {
@@ -127,131 +98,3 @@ func embeddingResponseAli2OpenAI(response *EmbeddingResponse) *openai.EmbeddingR
 	}
 	return &openAIEmbeddingResponse
 }
-
-func responseAli2OpenAI(response *ChatResponse) *openai.TextResponse {
-	fullTextResponse := openai.TextResponse{
-		ID:      response.RequestID,
-		Object:  "chat.completion",
-		Created: helper.GetTimestamp(),
-		Choices: response.Output.Choices,
-		Usage: model.Usage{
-			PromptTokens:     response.Usage.InputTokens,
-			CompletionTokens: response.Usage.OutputTokens,
-			TotalTokens:      response.Usage.InputTokens + response.Usage.OutputTokens,
-		},
-	}
-	return &fullTextResponse
-}
-
-func streamResponseAli2OpenAI(aliResponse *ChatResponse) *openai.ChatCompletionsStreamResponse {
-	if len(aliResponse.Output.Choices) == 0 {
-		return nil
-	}
-	aliChoice := aliResponse.Output.Choices[0]
-	var choice openai.ChatCompletionsStreamResponseChoice
-	choice.Delta = aliChoice.Message
-	if aliChoice.FinishReason != "null" {
-		finishReason := aliChoice.FinishReason
-		choice.FinishReason = &finishReason
-	}
-	response := openai.ChatCompletionsStreamResponse{
-		ID:      aliResponse.RequestID,
-		Object:  "chat.completion.chunk",
-		Created: helper.GetTimestamp(),
-		Model:   "qwen",
-		Choices: []openai.ChatCompletionsStreamResponseChoice{choice},
-	}
-	return &response
-}
-
-func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
-	defer resp.Body.Close()
-
-	var usage model.Usage
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
-		if atEOF && len(data) == 0 {
-			return 0, nil, nil
-		}
-		if i := slices.Index(data, '\n'); i >= 0 {
-			return i + 1, data[0:i], nil
-		}
-		if atEOF {
-			return len(data), data, nil
-		}
-		return 0, nil, nil
-	})
-
-	common.SetEventStreamHeaders(c)
-
-	for scanner.Scan() {
-		data := scanner.Bytes()
-		if len(data) < 5 || conv.BytesToString(data[:5]) != "data:" {
-			continue
-		}
-		data = data[5:]
-
-		if conv.BytesToString(data) == "[DONE]" {
-			break
-		}
-
-		var aliResponse ChatResponse
-		err := json.Unmarshal(data, &aliResponse)
-		if err != nil {
-			logger.SysError("error unmarshalling stream response: " + err.Error())
-			continue
-		}
-		if aliResponse.Usage.OutputTokens != 0 {
-			usage.PromptTokens = aliResponse.Usage.InputTokens
-			usage.CompletionTokens = aliResponse.Usage.OutputTokens
-			usage.TotalTokens = aliResponse.Usage.InputTokens + aliResponse.Usage.OutputTokens
-		}
-		response := streamResponseAli2OpenAI(&aliResponse)
-		if response == nil {
-			continue
-		}
-		err = render.ObjectData(c, response)
-		if err != nil {
-			logger.SysError(err.Error())
-		}
-	}
-
-	if err := scanner.Err(); err != nil {
-		logger.SysError("error reading stream: " + err.Error())
-	}
-
-	render.Done(c)
-
-	return nil, &usage
-}
-
-func Handler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
-	defer resp.Body.Close()
-
-	var aliResponse ChatResponse
-	err := json.NewDecoder(resp.Body).Decode(&aliResponse)
-	if err != nil {
-		return openai.ErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-	if aliResponse.Code != "" {
-		return &model.ErrorWithStatusCode{
-			Error: model.Error{
-				Message: aliResponse.Message,
-				Type:    aliResponse.Code,
-				Param:   aliResponse.RequestID,
-				Code:    aliResponse.Code,
-			},
-			StatusCode: resp.StatusCode,
-		}, nil
-	}
-	fullTextResponse := responseAli2OpenAI(&aliResponse)
-	fullTextResponse.Model = "qwen"
-	jsonResponse, err := json.Marshal(fullTextResponse)
-	if err != nil {
-		return openai.ErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, _ = c.Writer.Write(jsonResponse)
-	return nil, &fullTextResponse.Usage
-}
diff --git a/service/aiproxy/relay/adaptor/ali/model.go b/service/aiproxy/relay/adaptor/ali/model.go
index a7e858a7ff6..d1f1344670b 100644
--- a/service/aiproxy/relay/adaptor/ali/model.go
+++ b/service/aiproxy/relay/adaptor/ali/model.go
@@ -1,33 +1,5 @@
 package ali
 
-import (
-	"github.com/labring/sealos/service/aiproxy/relay/adaptor/openai"
-	"github.com/labring/sealos/service/aiproxy/relay/model"
-)
-
-type Input struct {
-	// Prompt   string       `json:"prompt"`
-	Messages []model.Message `json:"messages"`
-}
-
-type Parameters struct {
-	TopP              *float64     `json:"top_p,omitempty"`
-	Temperature       *float64     `json:"temperature,omitempty"`
-	ResultFormat      string       `json:"result_format,omitempty"`
-	Tools             []model.Tool `json:"tools,omitempty"`
-	TopK              int          `json:"top_k,omitempty"`
-	Seed              uint64       `json:"seed,omitempty"`
-	MaxTokens         int          `json:"max_tokens,omitempty"`
-	EnableSearch      bool         `json:"enable_search,omitempty"`
-	IncrementalOutput bool         `json:"incremental_output,omitempty"`
-}
-
-type ChatRequest struct {
-	Model      string     `json:"model"`
-	Input      Input      `json:"input"`
-	Parameters Parameters `json:"parameters,omitempty"`
-}
-
 type ImageRequest struct {
 	Input struct {
 		Prompt         string `json:"prompt"`
@@ -68,39 +40,6 @@ type TaskResponse struct {
 	StatusCode int   `json:"status_code,omitempty"`
 }
 
-type Header struct {
-	Attributes   any    `json:"attributes,omitempty"`
-	Action       string `json:"action,omitempty"`
-	Streaming    string `json:"streaming,omitempty"`
-	TaskID       string `json:"task_id,omitempty"`
-	Event        string `json:"event,omitempty"`
-	ErrorCode    string `json:"error_code,omitempty"`
-	ErrorMessage string `json:"error_message,omitempty"`
-}
-
-type Payload struct {
-	Model     string `json:"model,omitempty"`
-	Task      string `json:"task,omitempty"`
-	TaskGroup string `json:"task_group,omitempty"`
-	Function  string `json:"function,omitempty"`
-	Input     struct {
-		Text string `json:"text,omitempty"`
-	} `json:"input,omitempty"`
-	Parameters struct {
-		Format     string  `json:"format,omitempty"`
-		SampleRate int     `json:"sample_rate,omitempty"`
-		Rate       float64 `json:"rate,omitempty"`
-	} `json:"parameters,omitempty"`
-	Usage struct {
-		Characters int `json:"characters,omitempty"`
-	} `json:"usage,omitempty"`
-}
-
-type WSSMessage struct {
-	Header  Header  `json:"header,omitempty"`
-	Payload Payload `json:"payload,omitempty"`
-}
-
 type EmbeddingRequest struct {
 	Parameters *struct {
 		TextType string `json:"text_type,omitempty"`
@@ -135,15 +74,3 @@ type Usage struct {
 	OutputTokens int `json:"output_tokens"`
 	TotalTokens  int `json:"total_tokens"`
 }
-
-type Output struct {
-	// Text         string                      `json:"text"`
-	// FinishReason string                      `json:"finish_reason"`
-	Choices []openai.TextResponseChoice `json:"choices"`
-}
-
-type ChatResponse struct {
-	Error
-	Output Output `json:"output"`
-	Usage  Usage  `json:"usage"`
-}