Skip to content

Commit

Permalink
Feat: kokoro tts channel
Browse files Browse the repository at this point in the history
  • Loading branch information
jianchang512 committed Feb 4, 2025
1 parent 354bc1c commit 8ce510b
Show file tree
Hide file tree
Showing 15 changed files with 703 additions and 394 deletions.
Binary file added test_openai.mp3
Binary file not shown.
4 changes: 2 additions & 2 deletions videotrans/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-

VERSION = "v3.47"
VERSION_NUM = 120347
VERSION = "v3.48"
VERSION_NUM = 120348
5 changes: 3 additions & 2 deletions videotrans/component/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
HebingsrtForm, DoubaoForm, FishTTSForm, CosyVoiceForm, AI302Form, SetINIForm, WatermarkForm, \
GetaudioForm, HunliuForm, VASForm, Fanyisrt, Recognform, Peiyinform, Videoandaudioform, Videoandsrtform, \
OpenAITTSForm, RecognAPIForm, OpenaiRecognAPIForm, DownloadModelForm, FormatcoverForm, SubtitlescoverForm, \
SubtitleEditer,SttAPIForm,VolcEngineTTSForm,F5TTSForm,DeepgramForm,ClaudeForm,LibreForm,AliForm,FreeAIForm
SubtitleEditer,SttAPIForm,VolcEngineTTSForm,F5TTSForm,DeepgramForm,ClaudeForm,LibreForm,AliForm,FreeAIForm,KokoroForm

__all__ = [
"BaiduForm",
Expand Down Expand Up @@ -58,5 +58,6 @@
"ClaudeForm",
"LibreForm",
"AliForm",
"FreeAIForm"
"FreeAIForm",
"KokoroForm"
]
7 changes: 7 additions & 0 deletions videotrans/component/set_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from videotrans.ui.gptsovits import Ui_gptsovitsform
from videotrans.ui.hunliu import Ui_hunliu
from videotrans.ui.info import Ui_infoform
from videotrans.ui.kokoro import Ui_kokoroform
from videotrans.ui.libretranslate import Ui_libretranslateform
from videotrans.ui.localllm import Ui_localllmform
from videotrans.ui.openairecognapi import Ui_openairecognapiform
Expand Down Expand Up @@ -273,6 +274,12 @@ def __init__(self, parent=None):
self.setupUi(self)
self.setWindowIcon(QIcon(f"{config.ROOT_DIR}/videotrans/styles/icon.ico"))

class KokoroForm(QDialog, Ui_kokoroform): # <===
def __init__(self, parent=None):
super(KokoroForm, self).__init__(parent)
self.setupUi(self)
self.setWindowIcon(QIcon(f"{config.ROOT_DIR}/videotrans/styles/icon.ico"))


class ChatttsForm(QDialog, Ui_chatttsform): # <===
def __init__(self, parent=None):
Expand Down
2 changes: 2 additions & 0 deletions videotrans/configure/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,8 @@ def getset_params(obj=None):
"openaitts_model": "tts-1",
"openaitts_role": "alloy,echo,fable,onyx,nova,shimmer",

"kokoro_api":"",

"openairecognapi_url": "",
"openairecognapi_key": "",
"openairecognapi_prompt": "",
Expand Down
4 changes: 3 additions & 1 deletion videotrans/mainwin/_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def click_reglabel(self):

# 是否属于 配音角色 随所选目标语言变化的配音渠道 是 edgeTTS AzureTTS 或 302.ai同时 ai302tts_model=azure
def change_by_lang(self, type):
if type in [tts.EDGE_TTS, tts.AZURE_TTS, tts.VOLCENGINE_TTS,tts.AI302_TTS]:
if type in [tts.EDGE_TTS, tts.AZURE_TTS, tts.VOLCENGINE_TTS,tts.AI302_TTS,tts.KOKORO_TTS]:
return True
return False

Expand Down Expand Up @@ -242,6 +242,8 @@ def set_voice_role(self, t):
tts_type = self.main.tts_type.currentIndex()
if tts_type == tts.EDGE_TTS:
show_rolelist = tools.get_edge_rolelist()
elif tts_type == tts.KOKORO_TTS:
show_rolelist = tools.get_kokoro_rolelist()
elif tts_type == tts.AI302_TTS:
show_rolelist = tools.get_302ai()
elif tts_type == tts.VOLCENGINE_TTS:
Expand Down
1 change: 1 addition & 0 deletions videotrans/mainwin/_main_win.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def _start_subform(self):
self.actiondeepLX_address.triggered.connect(winform.deepLX.openwin)
self.actionott_address.triggered.connect(winform.ott.openwin)
self.actionclone_address.triggered.connect(winform.clone.openwin)
self.actionkokoro_address.triggered.connect(winform.kokoro.openwin)
self.actionchattts_address.triggered.connect(winform.chattts.openwin)
self.actiontts_api.triggered.connect(winform.ttsapi.openwin)
self.actionrecognapi.triggered.connect(winform.recognapi.openwin)
Expand Down
15 changes: 14 additions & 1 deletion videotrans/tts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
TTS_API = 11
VOLCENGINE_TTS = 12
F5_TTS = 13
KOKORO_TTS = 14

TTS_NAME_LIST = [
"Edge-TTS(免费)" if config.defaulelang=='zh' else 'Edge-TTS',
Expand All @@ -32,7 +33,8 @@
"Google TTS",
"自定义TTSAPI" if config.defaulelang == 'zh' else 'Customize API',
"字节火山语音合成" if config.defaulelang == 'zh' else 'VolcEngine TTS',
"F5-TTS(本地)" if config.defaulelang=='zh' else 'F5-TTS'
"F5-TTS(本地)" if config.defaulelang=='zh' else 'F5-TTS',
"kokoro-TTS(本地)" if config.defaulelang=='zh' else 'kokoro-TTS',
]

DOUBAO_302AI={
Expand Down Expand Up @@ -117,6 +119,8 @@ def is_allow_lang(langcode: str = None, tts_type: int = None):

if tts_type == VOLCENGINE_TTS and langcode[:2] not in ['zh', 'ja', 'en','pt','es','th','vi','id']:
return '字节火山语音合成 仅支持中、日、英、葡萄牙、西班牙、泰语、越南、印尼语言配音' if config.defaulelang == 'zh' else 'Byte VolcEngine TTS only supports Chinese, English, Japanese, Portuguese, Spanish, Thai, Vietnamese, Indonesian'
if tts_type == KOKORO_TTS and langcode[:2] not in ['zh', 'ja', 'en','pt','es','it','hi','fr']:
return 'kokoro tts 仅支持中、日、英、葡萄牙、西班牙、意大利、印度、法语配音' if config.defaulelang == 'zh' else 'Kokoro TTS only supports Chinese, English, Japanese, Portuguese, Spanish, it, hi, fr'
if tts_type == F5_TTS and langcode[:2] not in ['zh', 'en']:
return 'F5-TTS语音合成 仅支持中、英语言配音' if config.defaulelang == 'zh' else 'F5-TTS only supports Chinese, English'

Expand All @@ -132,6 +136,12 @@ def is_input_api(tts_type: int = None,return_str=False):
from videotrans.winform import openaitts as openaitts_win
openaitts_win.openwin()
return False
if tts_type == KOKORO_TTS and not config.params["kokoro_api"]:
if return_str:
return "Please configure the api information of the kokoro tts channel first."
from videotrans.winform import kokoro
kokoro.openwin()
return False
if tts_type == AI302_TTS and not config.params["ai302_key"]:
if return_str:
return "Please configure the api and key information of the 302.AI TTS channel first."
Expand Down Expand Up @@ -234,6 +244,9 @@ def run(*, queue_tts=None, language=None, inst=None, uuid=None, play=False, is_t
elif tts_type == FISHTTS:
from videotrans.tts._fishtts import FishTTS
FishTTS(**kwargs).run()
elif tts_type == KOKORO_TTS:
from videotrans.tts._kokoro import KokoroTTS
KokoroTTS(**kwargs).run()
elif tts_type == GPTSOVITS_TTS:
from videotrans.tts._gptsovits import GPTSoVITS
GPTSoVITS(**kwargs).run()
Expand Down
61 changes: 61 additions & 0 deletions videotrans/tts/_kokoro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import copy
import re
import time
from pathlib import Path

import requests
from pydub import AudioSegment

from videotrans.configure import config
from videotrans.tts._base import BaseTTS
from videotrans.util import tools


# 线程池并发 返回wav数据,转为mp3

class KokoroTTS(BaseTTS):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
api_url = config.params['kokoro_api'].strip().rstrip('/').lower()
self.api_url = 'http://' + api_url.replace('http://', '')
if not self.api_url.endswith('/v1/audio/speech'):
self.api_url+='/v1/audio/speech'
self.proxies={"http": "", "https": ""}

def _exec(self):
self._local_mul_thread()

def _item_task(self, data_item: dict = None):
if self._exit():
return
if not data_item:
return
try:
text = data_item['text'].strip()
speed = 1.0
if self.rate:
rate = float(self.rate.replace('%', '')) / 100
speed += rate
data = {"input": text, "voice": data_item['role'],"speed":speed}

res = requests.post(self.api_url, json=data, proxies=self.proxies, timeout=3600)
res.raise_for_status()

with open(data_item['filename'], 'wb') as f:
f.write(res.content)
if self.inst and self.inst.precent < 80:
self.inst.precent += 0.1
self.error = ''
self.has_done += 1
except (requests.ConnectionError, requests.Timeout) as e:
self.error="连接失败,请检查是否启动了api服务" if config.defaulelang=='zh' else 'Connection failed, please check if the api service is started'
except Exception as e:
Path(data_item['filename']).unlink(missing_ok=True)
self.error = str(e)
config.logger.exception(e, exc_info=True)
finally:
if self.error:
self._signal(text=self.error)
else:
self._signal(text=f'{config.transobj["kaishipeiyin"]} {self.has_done}/{self.len}')
5 changes: 5 additions & 0 deletions videotrans/ui/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,8 @@ def setupUi(self, MainWindow):

self.actionclone_address = QtGui.QAction(MainWindow)
self.actionclone_address.setObjectName("actionclone_address")
self.actionkokoro_address = QtGui.QAction(MainWindow)
self.actionkokoro_address.setObjectName("actionkokoro_address")
self.actionchattts_address = QtGui.QAction(MainWindow)
self.actionchattts_address.setObjectName("actionchattts_address")

Expand Down Expand Up @@ -852,6 +854,8 @@ def setupUi(self, MainWindow):

self.menu_TTS.addAction(self.actionclone_address)
self.menu_TTS.addSeparator()
self.menu_TTS.addAction(self.actionkokoro_address)
self.menu_TTS.addSeparator()
self.menu_TTS.addAction(self.actionchattts_address)
self.menu_TTS.addSeparator()
self.menu_TTS.addAction(self.actiontts_gptsovits)
Expand Down Expand Up @@ -1035,6 +1039,7 @@ def retranslateUi(self):
self.actiondeepLX_address.setText("DeepLX Api")
self.actionott_address.setText("OTT离线翻译Api" if config.defaulelang == 'zh' else "OTT Api")
self.actionclone_address.setText("clone-voice" if config.defaulelang == 'zh' else "Clone-Voice TTS")
self.actionkokoro_address.setText("Kokoro TTS")
self.actionchattts_address.setText("ChatTTS")
self.actiontts_api.setText("自定义TTS API" if config.defaulelang == 'zh' else "TTS API")
self.actiontrans_api.setText("自定义翻译API" if config.defaulelang == 'zh' else "Transate API")
Expand Down
91 changes: 91 additions & 0 deletions videotrans/ui/kokoro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# run again. Do not edit this file unless you know what you are doing.


from PySide6 import QtCore, QtWidgets
from PySide6.QtCore import Qt
from PySide6.QtWidgets import QLabel

from videotrans.configure import config
from videotrans.util import tools


class Ui_kokoroform(object):
def setupUi(self, kokoro):
self.has_done = False
kokoro.setObjectName("kokoro")
kokoro.setWindowModality(QtCore.Qt.NonModal)
kokoro.resize(500, 223)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(kokoro.sizePolicy().hasHeightForWidth())
kokoro.setSizePolicy(sizePolicy)
kokoro.setMaximumSize(QtCore.QSize(500, 300))


self.verticalLayout = QtWidgets.QVBoxLayout(kokoro)
self.verticalLayout.setObjectName("verticalLayout")
self.formLayout_2 = QtWidgets.QFormLayout()
self.formLayout_2.setSizeConstraint(QtWidgets.QLayout.SetMinimumSize)
self.formLayout_2.setFormAlignment(QtCore.Qt.AlignJustify | QtCore.Qt.AlignVCenter)
self.formLayout_2.setObjectName("formLayout_2")
self.label = QtWidgets.QLabel(kokoro)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth())
self.label.setSizePolicy(sizePolicy)
self.label.setMinimumSize(QtCore.QSize(100, 35))
self.label.setAlignment(QtCore.Qt.AlignJustify | QtCore.Qt.AlignVCenter)
self.label.setObjectName("label")

self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label)
self.kokoro_address = QtWidgets.QLineEdit(kokoro)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.kokoro_address.sizePolicy().hasHeightForWidth())
self.kokoro_address.setSizePolicy(sizePolicy)
self.kokoro_address.setMinimumSize(QtCore.QSize(400, 35))
self.kokoro_address.setObjectName("kokoro_address")

self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.kokoro_address)
self.verticalLayout.addLayout(self.formLayout_2)


self.set_kokoro = QtWidgets.QPushButton(kokoro)
self.set_kokoro.setMinimumSize(QtCore.QSize(0, 35))
self.set_kokoro.setObjectName("set_kokoro")

self.test = QtWidgets.QPushButton(kokoro)
self.test.setMinimumSize(QtCore.QSize(0, 30))
self.test.setObjectName("test")
help_btn = QtWidgets.QPushButton()
help_btn.setMinimumSize(QtCore.QSize(0, 35))
help_btn.setStyleSheet("background-color: rgba(255, 255, 255,0)")
help_btn.setObjectName("help_btn")
help_btn.setCursor(Qt.PointingHandCursor)
help_btn.setText("查看填写教程" if config.defaulelang == 'zh' else "Fill out the tutorial")
help_btn.clicked.connect(lambda: tools.open_url(url='https://pyvideotrans.com/kokorotts'))

self.layout_btn = QtWidgets.QHBoxLayout()
self.layout_btn.setObjectName("layout_btn")

self.layout_btn.addWidget(self.set_kokoro)
self.layout_btn.addWidget(self.test)
self.layout_btn.addWidget(help_btn)

self.verticalLayout.addLayout(self.layout_btn)



self.retranslateUi(kokoro)
QtCore.QMetaObject.connectSlotsByName(kokoro)

def retranslateUi(self, kokoro):
kokoro.setWindowTitle("Kokoro TTS")
self.label.setText("http地址" if config.defaulelang == 'zh' else 'kokoro api')
self.kokoro_address.setPlaceholderText(
'kokoro-uiapi启动后的地址,默认请填写 http://127.0.0.1:5066' if config.defaulelang == 'zh' else 'Fill in the HTTP address after the kokoro program starts')
self.set_kokoro.setText('保存' if config.defaulelang == 'zh' else "Save")
self.test.setText('测试' if config.defaulelang == 'zh' else "Test")
Loading

0 comments on commit 8ce510b

Please sign in to comment.