Skip to content

Commit

Permalink
集成腾讯TMT接口, 由于官方QPS限制, 导致接口可能不太稳定;
Browse files Browse the repository at this point in the history
  • Loading branch information
speauty committed Mar 3, 2023
1 parent 789477f commit 2d9846f
Show file tree
Hide file tree
Showing 6 changed files with 325 additions and 11 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ require (
github.com/alibabacloud-go/tea-utils/v2 v2.0.1
github.com/golang-module/carbon v1.7.3
github.com/lxn/walk v0.0.0-20210112085537-c389da54e794
github.com/lxn/win v0.0.0-20210218163916-a377121e959e
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.608
)

require (
Expand All @@ -29,7 +31,6 @@ require (
github.com/gobuffalo/packr v1.30.1 // indirect
github.com/joho/godotenv v1.3.0 // indirect
github.com/json-iterator/go v1.1.10 // indirect
github.com/lxn/win v0.0.0-20210218163916-a377121e959e // indirect
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/rogpeppe/go-internal v1.3.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.608 h1:yLiHPyhxJDEpNRCVF5hfA3e1eV9VJMvbCqLJq1VG5/U=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.608/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y=
github.com/tjfoc/gmsm v1.3.2 h1:7JVkAn5bvUJ7HtU08iW6UiD+UTmJTIToHCfeFzkcCxM=
github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
Expand Down
127 changes: 127 additions & 0 deletions src/logic/translate/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"gui.subtitle/src/srv/mt"
aliyun2 "gui.subtitle/src/srv/mt/aliyun"
"gui.subtitle/src/srv/mt/bd"
"gui.subtitle/src/srv/mt/tencent"
"gui.subtitle/src/srv/mt/youdao"
"gui.subtitle/src/util"
"gui.subtitle/src/util/lang"
Expand Down Expand Up @@ -389,6 +390,110 @@ func Translate(ctx context.Context, mtEngine interface{}, contents []*Block, fro
}
}(ctx, coroutineCtrlCtx, coroutineCtrlCtxCancelFunc, wg, coroutineIdx, blockChan)
}
case mt.IdTencent:
blockChunked, cntBlockChunked := chunkBlocksForTencent(contents, 2000, fromLanguage)
if cntBlockChunked < maxCoroutine {
maxCoroutine = cntBlockChunked
}
if maxCoroutine > 4 { // TMT的QPS: 5
maxCoroutine = 4
}
blockChan := make(chan []string, maxCoroutine*3)
wg.Add(1)

go func(currentCtx context.Context, currentWG *sync.WaitGroup) {
defer currentWG.Done()
for _, block := range blockChunked {
blockChan <- block
}
close(blockChan)
}(ctx, wg)

for coroutineIdx := 0; coroutineIdx < maxCoroutine; coroutineIdx++ {
if util.IsCtxDone(coroutineCtrlCtx) {
results = append(results, fmt.Sprintf("[%s]%s失败, 错误: 协程出现中断信号, 停止继续创建协程", carbon.Now(), mtEngine.(mt.MT).GetName()))
break
}

wg.Add(1)
go func(localCtx context.Context, localCoroutineCtrlCtx context.Context, localCoroutineCtrlCtxCancelFunc context.CancelFunc, localWG *sync.WaitGroup, localCoroutineIdx int, localBlockChan chan []string) {
defer localWG.Done()
localCoroutineCntTranslated := 0
localCoroutineTimeStart := carbon.Now()
for {
select {
case block, isOpen := <-blockChan:
if !isOpen {
results = append(results, fmt.Sprintf(
"[%s]协程结束, 引擎: %s, 协程序号: %d, 处理字幕行数: %d, 运行时长(s): %d, 原因: 数据通道关闭, 无数据, 主动退出当前协程",
carbon.Now(), mtEngine.(mt.MT).GetName(), localCoroutineIdx,
localCoroutineCntTranslated, carbon.Now().DiffAbsInSeconds(localCoroutineTimeStart),
))
runtime.Goexit()
return
}
args := &tencent.TextBatchTranslateArg{
FromLanguage: fromLanguage.ToString(),
ToLanguage: toLanguage.ToString(),
TextList: block,
}
var translateResp []mt.TextTranslateResp
var err error

for failIdx := 0; failIdx < 3; failIdx++ {
translateResp, err = mtEngine.(mt.MT).TextBatchTranslate(ctx, args)
if err != nil || translateResp == nil {
err = fmt.Errorf("[%s]%s翻译失败, 协程序号: %d, 错误: %s", carbon.Now(), mtEngine.(mt.MT).GetName(), localCoroutineIdx, err)
time.Sleep(time.Second)
continue
}
break
}
if err != nil {
results = append(results, err.Error())
cntError.Add(1)
lastError = err

if bd.ErrSign.IsExit(err) {
localCoroutineCtrlCtxCancelFunc()
runtime.Goexit()
return
}
}
for _, translateRes := range translateResp {
for idx, content := range contents {
if fromLanguage == lang.ZH {
if content.TextZH == translateRes.Idx {
contents[idx].TextEN = translateRes.StrTranslated
cntBlockTranslated.Add(1)
localCoroutineCntTranslated++
}
} else if fromLanguage == lang.EN {
if content.TextEN == translateRes.Idx {
contents[idx].TextZH = translateRes.StrTranslated
cntBlockTranslated.Add(1)
localCoroutineCntTranslated++
}
}
}
}
default:
if util.IsCtxDone(localCoroutineCtrlCtx) {
results = append(results, fmt.Sprintf(
"[%s]协程结束, 引擎: %s, 协程序号: %d, 处理字幕行数: %d, 运行时长(s): %d, 错误: 协程出现中断信号, 强制退出",
carbon.Now(), mtEngine.(mt.MT).GetName(), localCoroutineIdx,
localCoroutineCntTranslated, carbon.Now().DiffAbsInSeconds(localCoroutineTimeStart),
))
runtime.Goexit()
return
}
}
}
}(ctx, coroutineCtrlCtx, coroutineCtrlCtxCancelFunc, wg, coroutineIdx, blockChan)
}
default:
lastError = fmt.Errorf("当前引擎[%s]暂未实现, 尽请期待", mtEngine.(mt.MT).GetName())
return nil, 0, lastError
}
wg.Wait()
resStr := "成功"
Expand Down Expand Up @@ -480,6 +585,28 @@ func chunkBlocksForBaiDu(contents []*Block, lenLimited int, fromLanguage lang.St
return blockChunked, len(blockChunked)
}

// chunkBlocksForTencent 腾讯翻译的专属分包函数
func chunkBlocksForTencent(contents []*Block, lenLimited int, fromLanguage lang.StrLang) (res [][]string, cnt int) {
var tmpStack []string
for _, content := range contents {
sourceText := content.TextEN
if fromLanguage == lang.ZH {
sourceText = content.TextZH
}
if len(strings.Join(tmpStack, ""))+len(sourceText) >= lenLimited {
res = append(res, tmpStack)
tmpStack = []string{}
}
tmpStack = append(tmpStack, sourceText)
}
if len(tmpStack) > 0 {
res = append(res, tmpStack)
tmpStack = []string{}
}
cnt = len(res)
return
}

func hasZH(str string) bool {
for _, r := range str {
if unicode.Is(unicode.Scripts["Han"], r) || (regexp.MustCompile("[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]").MatchString(string(r))) {
Expand Down
131 changes: 131 additions & 0 deletions src/srv/mt/tencent/tencent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package tencent

import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common"
tencentHttp "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/http"
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/profile"
"gui.subtitle/src/srv/mt"
)

const APIVersion = "2018-03-21"

type Cfg struct {
SecretId string
SecretKey string
Region string
}

type MT struct {
tencentClient *common.Client
cfg *Cfg
}

func (m *MT) GetId() mt.Id {
return mt.IdTencent
}

func (m *MT) GetName() string {
return mt.EngineTencent.GetZH()
}

func (m *MT) GetCfg() interface{} {
return m.cfg
}

func (m *MT) Init(_ context.Context, cfg interface{}) error {
if _, ok := cfg.(*Cfg); !ok {
return fmt.Errorf("the cfg's mismatched")
}
if m.cfg != nil || m.tencentClient != nil {
return nil
}
m.cfg = cfg.(*Cfg)
tmpClient, tmpErr := common.NewClientWithSecretId(m.cfg.SecretId, m.cfg.SecretKey, m.cfg.Region)
if tmpErr != nil {
return tmpErr
}

tmpClient.WithProfile(profile.NewClientProfile())
m.tencentClient = tmpClient
return nil
}

func (m *MT) TextTranslate(ctx context.Context, args interface{}) ([]mt.TextTranslateResp, error) {
return nil, nil
}

type TextTranslateBatchRequest struct {
*tencentHttp.BaseRequest
Source *string `json:"Source,omitempty" name:"Source"`
Target *string `json:"Target,omitempty" name:"Target"`
ProjectId *int64 `json:"ProjectId,omitempty" name:"ProjectId"`
SourceTextList []*string `json:"SourceTextList,omitempty" name:"SourceTextList"`
}

type TextTranslateBatchResponseParams struct {
Source *string `json:"Source,omitempty" name:"Source"`
Target *string `json:"Target,omitempty" name:"Target"`
TargetTextList []*string `json:"TargetTextList,omitempty" name:"TargetTextList"`
RequestId *string `json:"RequestId,omitempty" name:"RequestId"`
}

type TextTranslateBatchResponse struct {
*tencentHttp.BaseResponse
Response *TextTranslateBatchResponseParams `json:"Response"`
}

func (r *TextTranslateBatchResponse) ToJsonString() string {
b, _ := json.Marshal(r)
return string(b)
}

func (r *TextTranslateBatchResponse) FromJsonString(s string) error {
return json.Unmarshal([]byte(s), &r)
}

type TextBatchTranslateArg struct {
FromLanguage string
ToLanguage string
TextList []string
}

func (m *MT) TextBatchTranslate(ctx context.Context, args interface{}) ([]mt.TextTranslateResp, error) {
if _, ok := args.(*TextBatchTranslateArg); !ok {
return nil, fmt.Errorf("the args for ALiMT.TextBatchTranslate mismatched")
}
projectId := int64(0)
request := &TextTranslateBatchRequest{
BaseRequest: &tencentHttp.BaseRequest{},
Source: &args.(*TextBatchTranslateArg).FromLanguage,
Target: &args.(*TextBatchTranslateArg).ToLanguage,
ProjectId: &projectId,
}
for idx, _ := range args.(*TextBatchTranslateArg).TextList {
request.SourceTextList = append(request.SourceTextList, &args.(*TextBatchTranslateArg).TextList[idx])
}

request.Init().WithApiInfo("tmt", APIVersion, "TextTranslateBatch")
request.SetContext(ctx)
if m.tencentClient.GetCredential() == nil {
return nil, errors.New("文本批量翻译需要凭证")
}
response := &TextTranslateBatchResponse{
BaseResponse: &tencentHttp.BaseResponse{},
}
err := m.tencentClient.Send(request, response)
if err != nil {
return nil, err
}
var resp []mt.TextTranslateResp
for sourceIdx, sourceText := range args.(*TextBatchTranslateArg).TextList {
resp = append(resp, mt.TextTranslateResp{
Idx: sourceText,
StrTranslated: *response.Response.TargetTextList[sourceIdx],
})
}
return resp, nil
}
11 changes: 7 additions & 4 deletions src/srv/mt/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ type MT interface {
type Id string

const (
IdALiYun Id = "EngineALi"
IdBaiDu Id = "EngineBaiDu"
IdYouDao Id = "EngineYouDao"
IdALiYun Id = "EngineALi"
IdBaiDu Id = "EngineBaiDu"
IdYouDao Id = "EngineYouDao"
IdTencent Id = "EngineTencent"
)

type Engine int
Expand Down Expand Up @@ -51,9 +52,11 @@ const (
EngineBaiDu
// EngineYouDao 有道翻译
EngineYouDao
// EngineTencent 腾讯翻译
EngineTencent
)

var engineZHMaps = []string{"阿里云", "百度", "有道"}
var engineZHMaps = []string{"阿里云", "百度", "有道", "腾讯"}

const BlockSep string = "\n"
const BlockIdxContentSep string = "@<"
Expand Down
Loading

0 comments on commit 2d9846f

Please sign in to comment.