Skip to content

Commit

Permalink
Merge pull request #35 from idiap/dev
Browse files Browse the repository at this point in the history
v0.24.1
  • Loading branch information
eginhard authored May 29, 2024
2 parents 7652f0a + a682fa8 commit 439fb45
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 42 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ The following extras allow the installation of optional dependencies:
|------|-------------|
| `all` | All optional dependencies, except `dev` and `docs` |
| `dev` | Development dependencies |
| `dev` | Dependencies for building the documentation |
| `docs` | Dependencies for building the documentation |
| `notebooks` | Dependencies only used in notebooks |
| `server` | Dependencies to run the TTS server |
| `bn` | Bangla G2P |
Expand Down Expand Up @@ -270,11 +270,10 @@ You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tt
and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms).

```python
# TTS with on the fly voice conversion
# TTS with fairseq models
api = TTS("tts_models/deu/fairseq/vits")
api.tts_with_vc_to_file(
api.tts_to_file(
"Wie sage ich auf Italienisch, dass ich dich liebe?",
speaker_wav="target/speaker.wav",
file_path="output.wav"
)
```
Expand Down
3 changes: 3 additions & 0 deletions TTS/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import importlib.metadata

__version__ = importlib.metadata.version("coqui-tts")
60 changes: 25 additions & 35 deletions TTS/tts/utils/text/phonemizers/espeak_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

from packaging.version import Version
Expand Down Expand Up @@ -50,7 +52,7 @@ def get_espeakng_version() -> str:
_DEF_ESPEAK_VER = None


def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]:
def _espeak_exe(espeak_lib: str, args: list) -> list[str]:
"""Run espeak with the given arguments."""
cmd = [
espeak_lib,
Expand All @@ -59,32 +61,18 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte
"1", # UTF8 text encoding
]
cmd.extend(args)
logger.debug("espeakng: executing %s", repr(cmd))

with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as p:
res = iter(p.stdout.readline, b"")
err = iter(p.stderr.readline, b"")
for line in err:
logger.warning("espeakng: %s", line.decode("utf-8").strip())
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
p.wait()
return res2
logger.debug("Executing: %s", repr(cmd))

p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True)
for line in p.stderr.strip().split("\n"):
if line.strip() != "":
logger.warning("%s: %s", espeak_lib, line.strip())
res = []
for line in p.stdout.strip().split("\n"):
if line.strip() != "":
logger.debug("%s: %s", espeak_lib, line.strip())
res.append(line.strip())
return res


class ESpeak(BasePhonemizer):
Expand Down Expand Up @@ -198,12 +186,15 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
if tie:
args.append("--tie=%s" % tie)

args.append(text)
tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)

# compute phonemes
phonemes = ""
for line in _espeak_exe(self.backend, args, sync=True):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
for line in _espeak_exe(self.backend, args):
# espeak:
# version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
# espeak-ng:
Expand All @@ -213,9 +204,10 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
# "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
# phonemize needs to remove the language flags of the returned text:
# "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded)
ph_decoded = re.sub(r"\(.+?\)", "", line)

phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator)

def _phonemize(self, text: str, separator: str = "") -> str:
Expand All @@ -232,14 +224,12 @@ def supported_languages() -> dict[str, str]:
return {}
args = ["--voices"]
langs = {}
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)):
line = line.decode("utf8").strip()
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)):
if count > 0:
cols = line.split()
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
return langs

def version(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
autodoc_mock_imports = ["soundfile"]

# -- Project information -----------------------------------------------------
project = "TTS"
project = "coqui-tts"
copyright = "2021 Coqui GmbH, 2020 TTS authors"
author = "Coqui GmbH"

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include = ["TTS*"]

[project]
name = "coqui-tts"
version = "0.24.0"
version = "0.24.1"
description = "Deep learning for Text to Speech."
readme = "README.md"
requires-python = ">=3.9, <3.13"
Expand Down Expand Up @@ -69,7 +69,7 @@ dependencies = [
"gruut[de,es,fr]==2.2.3",
# Tortoise
"einops>=0.6.0",
"transformers>=4.33.0",
"transformers>=4.33.0,<4.41.0",
# Bark
"encodec>=0.1.1",
# XTTS
Expand Down
6 changes: 6 additions & 0 deletions tests/text_tests/test_phonemizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ def setUp(self):
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)

# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)

def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak")

Expand Down

0 comments on commit 439fb45

Please sign in to comment.