diff --git a/README.md b/README.md index 1033d16be4..0a3bccca4e 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ The following extras allow the installation of optional dependencies: |------|-------------| | `all` | All optional dependencies, except `dev` and `docs` | | `dev` | Development dependencies | -| `dev` | Dependencies for building the documentation | +| `docs` | Dependencies for building the documentation | | `notebooks` | Dependencies only used in notebooks | | `server` | Dependencies to run the TTS server | | `bn` | Bangla G2P | @@ -270,11 +270,10 @@ You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tt and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms). ```python -# TTS with on the fly voice conversion +# TTS with fairseq models api = TTS("tts_models/deu/fairseq/vits") -api.tts_with_vc_to_file( +api.tts_to_file( "Wie sage ich auf Italienisch, dass ich dich liebe?", - speaker_wav="target/speaker.wav", file_path="output.wav" ) ``` diff --git a/TTS/__init__.py b/TTS/__init__.py index e69de29bb2..9e87bca4be 100644 --- a/TTS/__init__.py +++ b/TTS/__init__.py @@ -0,0 +1,3 @@ +import importlib.metadata + +__version__ = importlib.metadata.version("coqui-tts") diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index dd74db6fae..a15df716e7 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -3,6 +3,8 @@ import logging import re import subprocess +import tempfile +from pathlib import Path from typing import Optional from packaging.version import Version @@ -50,7 +52,7 @@ def get_espeakng_version() -> str: _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]: +def _espeak_exe(espeak_lib: str, args: list) -> list[str]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -59,32 +61,18 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte "1", # UTF8 text encoding ] cmd.extend(args) - logger.debug("espeakng: executing %s", repr(cmd)) - - with subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as p: - res = iter(p.stdout.readline, b"") - err = iter(p.stderr.readline, b"") - for line in err: - logger.warning("espeakng: %s", line.decode("utf-8").strip()) - if not sync: - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - return res - res2 = list(res) - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - p.wait() - return res2 + logger.debug("Executing: %s", repr(cmd)) + + p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True) + for line in p.stderr.strip().split("\n"): + if line.strip() != "": + logger.warning("%s: %s", espeak_lib, line.strip()) + res = [] + for line in p.stdout.strip().split("\n"): + if line.strip() != "": + logger.debug("%s: %s", espeak_lib, line.strip()) + res.append(line.strip()) + return res class ESpeak(BasePhonemizer): @@ -198,12 +186,15 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False if tie: args.append("--tie=%s" % tie) - args.append(text) + tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8") + tmp.write(text) + tmp.close() + args.append("-f") + args.append(tmp.name) + # compute phonemes phonemes = "" - for line in _espeak_exe(self.backend, args, sync=True): - logger.debug("line: %s", repr(line)) - ph_decoded = line.decode("utf8").strip() + for line in _espeak_exe(self.backend, args): # espeak: # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" # espeak-ng: @@ -213,9 +204,10 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." # phonemize needs to remove the language flags of the returned text: # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." - ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) + ph_decoded = re.sub(r"\(.+?\)", "", line) phonemes += ph_decoded.strip() + Path(tmp.name).unlink() return phonemes.replace("_", separator) def _phonemize(self, text: str, separator: str = "") -> str: @@ -232,14 +224,12 @@ def supported_languages() -> dict[str, str]: return {} args = ["--voices"] langs = {} - for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)): - line = line.decode("utf8").strip() + for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)): if count > 0: cols = line.split() lang_code = cols[1] lang_name = cols[3] langs[lang_code] = lang_name - logger.debug("line: %s", repr(line)) return langs def version(self) -> str: diff --git a/docs/source/conf.py b/docs/source/conf.py index 2a9f62b3b0..e7d36c1f43 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,7 +20,7 @@ autodoc_mock_imports = ["soundfile"] # -- Project information ----------------------------------------------------- -project = "TTS" +project = "coqui-tts" copyright = "2021 Coqui GmbH, 2020 TTS authors" author = "Coqui GmbH" diff --git a/pyproject.toml b/pyproject.toml index 9b2fe41c0e..ff2ff32dd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ include = ["TTS*"] [project] name = "coqui-tts" -version = "0.24.0" +version = "0.24.1" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" @@ -69,7 +69,7 @@ dependencies = [ "gruut[de,es,fr]==2.2.3", # Tortoise "einops>=0.6.0", - "transformers>=4.33.0", + "transformers>=4.33.0,<4.41.0", # Bark "encodec>=0.1.1", # XTTS diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index ca25b302c5..f9067530e6 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -116,6 +116,12 @@ def setUp(self): output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt) + # UTF8 characters + text = "źrebię" + gt = "ʑrˈɛbjɛ" + output = ESpeak("pl").phonemize(text, separator="") + self.assertEqual(output, gt) + def test_name(self): self.assertEqual(self.phonemizer.name(), "espeak")