From edf2125bb9200c8fc33d062afbe986717725e94d Mon Sep 17 00:00:00 2001 From: Daniel Schnell Date: Mon, 29 Jan 2024 16:26:31 +0000 Subject: [PATCH] TTSService: remove cache item only for RTF > 50 Increase the limit for caching an audio item if the RealtimeFactor of the utterance exceeds 50.0. We observe that it makes still sense to cache audio even for RTF == 25. Therefore, increase the limit to 50. Additionally, set the Latency to VERY_LOW for all voices but ONNX, where we set the latency to NORMAL (~50ms) Signed-off-by: Daniel Schnell --- .../com/grammatek/simaromur/TTSService.java | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/app/src/main/java/com/grammatek/simaromur/TTSService.java b/app/src/main/java/com/grammatek/simaromur/TTSService.java index 40997e9..6a697d9 100644 --- a/app/src/main/java/com/grammatek/simaromur/TTSService.java +++ b/app/src/main/java/com/grammatek/simaromur/TTSService.java @@ -291,7 +291,9 @@ private void handleProcessingResult(SynthesisCallback callback, CacheItem item, mRepository.getUtteranceCache().deleteCacheItem(item.getUuid()); } else if (mRmCacheItemForFastVoices && !isCached) { // if the voice is fast, we can delete the cache item after playing - if (rtf > 20.0f) { + // TODO: something we should think about more carefully: we need + // much bigger RTF for not needing to cache the item + if (rtf > 50.0f) { Log.v(LOG_TAG, "rm_cache_item_for_fast_voices: delete cache item " + rcvdTtsRequest.serialize()); mRepository.getUtteranceCache().deleteCacheItem(item.getUuid()); @@ -499,19 +501,27 @@ public List onGetVoices() List announcedVoiceList = new ArrayList<>(); for (final com.grammatek.simaromur.db.Voice voice : mRepository.getCachedVoices()) { - int quality = Voice.QUALITY_VERY_LOW; - int latency = Voice.LATENCY_LOW; + int quality = Voice.QUALITY_NORMAL; + // TODO: experiment with this setting: which impact does it have ? + int latency = Voice.LATENCY_VERY_LOW; // this is for latency < 20ms, which we can + // barely reach for cached items boolean needsNetwork = false; Set features = new HashSet<>(); - if (voice.type.equals(com.grammatek.simaromur.db.Voice.TYPE_NETWORK)) { + switch(voice.type) { + case com.grammatek.simaromur.db.Voice.TYPE_NETWORK: latency = Voice.LATENCY_VERY_HIGH; - quality = Voice.QUALITY_HIGH; features.add(TextToSpeech.Engine.KEY_FEATURE_NETWORK_RETRIES_COUNT); needsNetwork = true; - } else if (voice.type.equals(com.grammatek.simaromur.db.Voice.TYPE_TORCH)) { - quality = Voice.QUALITY_VERY_HIGH; + break; + case com.grammatek.simaromur.db.Voice.TYPE_TORCH: latency = Voice.LATENCY_VERY_HIGH; + break; + case com.grammatek.simaromur.db.Voice.TYPE_ONNX: + break; + default: + latency = Voice.LATENCY_NORMAL; + break; } if (voice.needsDownload()) { features.add(TextToSpeech.Engine.KEY_FEATURE_NOT_INSTALLED);