diff --git a/client/tts.py b/client/tts.py index 1f69c77..bf991e6 100644 --- a/client/tts.py +++ b/client/tts.py @@ -9,7 +9,6 @@ """ import os import platform -import re import tempfile import subprocess import pipes @@ -103,15 +102,11 @@ def play_mp3(self, filename, remove=False): if output: self._logger.debug("Output was: '%s'", output) - @abstractmethod - def get_speech(self, phrase): - pass - def say(self, phrase, cache=False): self._logger.debug(u"Saying '%s' with '%s'", phrase, self.SLUG) - cache_file_path = dingdangpath.data( - 'audio', - self.SLUG + phrase + '.mp3' + cache_file_path = os.path.join( + dingdangpath.TEMP_PATH, + self.SLUG + phrase.replace(' ', '') + '.mp3' ) if cache and os.path.exists(cache_file_path): self._logger.info( @@ -140,317 +135,9 @@ class SimpleMp3Player(AbstractMp3TTSEngine): def is_available(cls): return True - def say(self, phrase): - self._logger.info(phrase) - - -class DummyTTS(AbstractTTSEngine): - """ - Dummy TTS engine that logs phrases with INFO level instead of synthesizing - speech. - """ - - SLUG = "dummy-tts" - - @classmethod - def is_available(cls): - return True - - def say(self, phrase): + def say(self, phrase, cache=False): self._logger.info(phrase) - def play(self, filename): - self._logger.debug("Playback of file '%s' requested") - pass - - -class EspeakTTS(AbstractTTSEngine): - """ - Uses the eSpeak speech synthesizer included in the Dingdang disk image - Requires espeak to be available - """ - - SLUG = "espeak-tts" - - def __init__(self, voice='default+m3', pitch_adjustment=40, - words_per_minute=160): - super(self.__class__, self).__init__() - self.voice = voice - self.pitch_adjustment = pitch_adjustment - self.words_per_minute = words_per_minute - - @classmethod - def get_config(cls): - # FIXME: Replace this as soon as we have a config module - config = {} - # HMM dir - # Try to get hmm_dir from config - profile_path = dingdangpath.config('profile.yml') - if os.path.exists(profile_path): - with open(profile_path, 'r') as f: - profile = yaml.safe_load(f) - if 'espeak-tts' in profile: - if 'voice' in profile['espeak-tts']: - config['voice'] = profile['espeak-tts']['voice'] - if 'pitch_adjustment' in profile['espeak-tts']: - config['pitch_adjustment'] = \ - profile['espeak-tts']['pitch_adjustment'] - if 'words_per_minute' in profile['espeak-tts']: - config['words_per_minute'] = \ - profile['espeak-tts']['words_per_minute'] - return config - - @classmethod - def is_available(cls): - return (super(cls, cls).is_available() and - diagnose.check_executable('espeak')) - - def say(self, phrase): - self._logger.debug("Saying '%s' with '%s'", phrase, self.SLUG) - with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: - fname = f.name - cmd = ['espeak', '-v', self.voice, - '-p', self.pitch_adjustment, - '-s', self.words_per_minute, - '-w', fname, - phrase] - cmd = [str(x) for x in cmd] - self._logger.debug('Executing %s', ' '.join([pipes.quote(arg) - for arg in cmd])) - with tempfile.TemporaryFile() as f: - subprocess.call(cmd, stdout=f, stderr=f) - f.seek(0) - output = f.read() - if output: - self._logger.debug("Output was: '%s'", output) - self.play(fname) - os.remove(fname) - - -class FestivalTTS(AbstractTTSEngine): - """ - Uses the festival speech synthesizer - Requires festival (text2wave) to be available - """ - - SLUG = 'festival-tts' - - @classmethod - def is_available(cls): - if (super(cls, cls).is_available() and - diagnose.check_executable('text2wave') and - diagnose.check_executable('festival')): - - logger = logging.getLogger(__name__) - cmd = ['festival', '--pipe'] - with tempfile.SpooledTemporaryFile() as out_f: - with tempfile.SpooledTemporaryFile() as in_f: - logger.debug('Executing %s', ' '.join([pipes.quote(arg) - for arg in cmd])) - subprocess.call(cmd, stdin=in_f, stdout=out_f, - stderr=out_f) - out_f.seek(0) - output = out_f.read().strip() - if output: - logger.debug("Output was: '%s'", output) - return ('No default voice found' not in output) - return False - - def say(self, phrase): - self._logger.debug("Saying '%s' with '%s'", phrase, self.SLUG) - cmd = ['text2wave'] - with tempfile.NamedTemporaryFile(suffix='.wav') as out_f: - with tempfile.SpooledTemporaryFile() as in_f: - in_f.write(phrase) - in_f.seek(0) - with tempfile.SpooledTemporaryFile() as err_f: - self._logger.debug('Executing %s', - ' '.join([pipes.quote(arg) - for arg in cmd])) - subprocess.call(cmd, stdin=in_f, stdout=out_f, - stderr=err_f) - err_f.seek(0) - output = err_f.read() - if output: - self._logger.debug("Output was: '%s'", output) - self.play(out_f.name) - - -class FliteTTS(AbstractTTSEngine): - """ - Uses the flite speech synthesizer - Requires flite to be available - """ - - SLUG = 'flite-tts' - - def __init__(self, voice=''): - super(self.__class__, self).__init__() - self.voice = voice if voice and voice in self.get_voices() else '' - - @classmethod - def get_voices(cls): - cmd = ['flite', '-lv'] - voices = [] - with tempfile.SpooledTemporaryFile() as out_f: - subprocess.call(cmd, stdout=out_f) - out_f.seek(0) - for line in out_f: - if line.startswith('Voices available: '): - voices.extend([x.strip() for x in line[18:].split() - if x.strip()]) - return voices - - @classmethod - def get_config(cls): - # FIXME: Replace this as soon as we have a config module - config = {} - # HMM dir - # Try to get hmm_dir from config - profile_path = dingdangpath.config('profile.yml') - if os.path.exists(profile_path): - with open(profile_path, 'r') as f: - profile = yaml.safe_load(f) - if 'flite-tts' in profile: - if 'voice' in profile['flite-tts']: - config['voice'] = profile['flite-tts']['voice'] - return config - - @classmethod - def is_available(cls): - return (super(cls, cls).is_available() and - diagnose.check_executable('flite') and - len(cls.get_voices()) > 0) - - def say(self, phrase): - self._logger.debug("Saying '%s' with '%s'", phrase, self.SLUG) - cmd = ['flite'] - if self.voice: - cmd.extend(['-voice', self.voice]) - cmd.extend(['-t', phrase]) - with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: - fname = f.name - cmd.append(fname) - with tempfile.SpooledTemporaryFile() as out_f: - self._logger.debug('Executing %s', - ' '.join([pipes.quote(arg) - for arg in cmd])) - subprocess.call(cmd, stdout=out_f, stderr=out_f) - out_f.seek(0) - output = out_f.read().strip() - if output: - self._logger.debug("Output was: '%s'", output) - self.play(fname) - os.remove(fname) - - -class MacOSXTTS(AbstractTTSEngine): - """ - Uses the OS X built-in 'say' command - """ - - SLUG = "osx-tts" - - @classmethod - def is_available(cls): - return (platform.system().lower() == 'darwin' and - diagnose.check_executable('say') and - diagnose.check_executable('afplay')) - - def say(self, phrase): - self._logger.debug("Saying '%s' with '%s'", phrase, self.SLUG) - cmd = ['say', str(phrase)] - self._logger.debug('Executing %s', ' '.join([pipes.quote(arg) - for arg in cmd])) - with tempfile.TemporaryFile() as f: - subprocess.call(cmd, stdout=f, stderr=f) - f.seek(0) - output = f.read() - if output: - self._logger.debug("Output was: '%s'", output) - - def play(self, filename): - cmd = ['aplay', str(filename)] - self._logger.debug('Executing %s', ' '.join([pipes.quote(arg) - for arg in cmd])) - with tempfile.TemporaryFile() as f: - subprocess.call(cmd, stdout=f, stderr=f) - f.seek(0) - output = f.read() - if output: - self._logger.debug("Output was: '%s'", output) - - -class PicoTTS(AbstractTTSEngine): - """ - Uses the svox-pico-tts speech synthesizer - Requires pico2wave to be available - """ - - SLUG = "pico-tts" - - def __init__(self, language="en-US"): - super(self.__class__, self).__init__() - self.language = language - - @classmethod - def is_available(cls): - return (super(cls, cls).is_available() and - diagnose.check_executable('pico2wave')) - - @classmethod - def get_config(cls): - # FIXME: Replace this as soon as we have a config module - config = {} - # HMM dir - # Try to get hmm_dir from config - profile_path = dingdangpath.config('profile.yml') - if os.path.exists(profile_path): - with open(profile_path, 'r') as f: - profile = yaml.safe_load(f) - if 'pico-tts' in profile and 'language' in profile['pico-tts']: - config['language'] = profile['pico-tts']['language'] - - return config - - @property - def languages(self): - cmd = ['pico2wave', '-l', 'NULL', - '-w', os.devnull, - 'NULL'] - with tempfile.SpooledTemporaryFile() as f: - subprocess.call(cmd, stderr=f) - f.seek(0) - output = f.read() - pattern = re.compile(r'Unknown language: NULL\nValid languages:\n' + - r'((?:[a-z]{2}-[A-Z]{2}\n)+)') - matchobj = pattern.match(output) - if not matchobj: - raise RuntimeError("pico2wave: valid languages not detected") - langs = matchobj.group(1).split() - return langs - - def say(self, phrase): - self._logger.debug("Saying '%s' with '%s'", phrase, self.SLUG) - with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: - fname = f.name - cmd = ['pico2wave', '--wave', fname] - if self.language not in self.languages: - raise ValueError("Language '%s' not supported by '%s'", - self.language, self.SLUG) - cmd.extend(['-l', self.language]) - cmd.append(phrase) - self._logger.debug('Executing %s', ' '.join([pipes.quote(arg) - for arg in cmd])) - with tempfile.TemporaryFile() as f: - subprocess.call(cmd, stdout=f, stderr=f) - f.seek(0) - output = f.read() - if output: - self._logger.debug("Output was: '%s'", output) - self.play(fname) - os.remove(fname) - class BaiduTTS(AbstractMp3TTSEngine): """