Skip to content

Commit

Permalink
Merge pull request #147 from grammatek/master
Browse files Browse the repository at this point in the history
Merge v1.3.x into master
  • Loading branch information
lumpidu authored Jan 23, 2024
2 parents 28a53b6 + 56d7141 commit e4a8f9d
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 32 deletions.
5 changes: 4 additions & 1 deletion app/src/main/java/com/grammatek/simaromur/AppRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.grammatek.simaromur.db.Voice;
import com.grammatek.simaromur.db.VoiceDao;
import com.grammatek.simaromur.device.DownloadVoiceManager;
import com.grammatek.simaromur.device.SymbolsLvLIs;
import com.grammatek.simaromur.device.TTSAudioControl;
import com.grammatek.simaromur.device.TTSEngineController;
import com.grammatek.simaromur.device.pojo.DeviceVoice;
Expand Down Expand Up @@ -901,8 +902,10 @@ public CacheItem executeFrontendAndSaveIntoCache(String text, CacheItem item, co
// we always need to normalize the text, but it doesn't hurt, if we always do G2P as well
// for network voices, this is currently all that is needed. But there is an audible
// problem with trailing "." though, so we remove it
final String normalizedText = mFrontend.getNormalizationManager().process(text).replaceAll("\\.+$", "");
String normalizedText = mFrontend.getNormalizationManager().process(text).replaceAll("\\.+$", "");
final String phonemes = mFrontend.transcribe(normalizedText, voice.type, voice.version);
// prevent the network voices from pronouncing 'sil'
normalizedText = normalizedText.replaceAll(SymbolsLvLIs.TagPause, ",");
Log.v(LOG_TAG, "onSynthesizeText: original (\"" + text + "\"), normalized (\"" + normalizedText + "\"), phonemes (\"" + phonemes + "\")");
Utterance updatedUtterance = UtteranceCacheManager.newUtterance(text, normalizedText, List.of(phonemes));
item = mUtteranceCacheManager.saveUtterance(updatedUtterance);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ public enum Type {
private final static String SymbolsSpecial = SymbolShortPause + " " + SymbolSpokenNoise + " " +
SymbolSilence;

// tags
public final static String TagPause = "<sil>";

// IPA symbols as HashMap
private static final HashMap<String, Integer> IPASymbolMap;
static {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import android.content.Context;
import android.util.Log;

import com.grammatek.simaromur.device.SymbolsLvLIs;

import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;

Expand Down Expand Up @@ -75,11 +77,11 @@ private List<String> normalize(final List<String> tokenized) {
// Some very basic phrasing for longer sentences
// TODO: improve!
if (tags.length >= 10) {
postNormalized = postNormalized.replace(" og ", " <sil> og ");
postNormalized = postNormalized.replace(" en ", " <sil> en ");
postNormalized = postNormalized.replace(" þegar ", " <sil> þegar ");
postNormalized = postNormalized.replace(" sem ", " <sil> sem ");
postNormalized = postNormalized.replace(" ef ", " <sil> ef ");
postNormalized = postNormalized.replace(" og ", " " + SymbolsLvLIs.TagPause + " og ");
postNormalized = postNormalized.replace(" en ", " " + SymbolsLvLIs.TagPause + " en ");
postNormalized = postNormalized.replace(" þegar ", " " + SymbolsLvLIs.TagPause + " þegar ");
postNormalized = postNormalized.replace(" sem ", " " + SymbolsLvLIs.TagPause + " sem ");
postNormalized = postNormalized.replace(" ef ", " " + SymbolsLvLIs.TagPause + " ef ");
}
normalized.add(postNormalized);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ public String convert(String transcribed, String fromAlphabet, String toAlphabet

@NonNull
private String transcribeString(String text, boolean isFlitev02) {
final String silToken = "<sil>";
String[] tokens = text.split(" ");
StringBuilder sb = new StringBuilder();
for (String tok : tokens) {
Expand All @@ -139,7 +138,7 @@ private String transcribeString(String text, boolean isFlitev02) {
else if (mPronDict.containsKey(tok)) {
transcr = mPronDict.get(tok).getTranscript().trim();
}
else if (tok.equals(silToken)){
else if (tok.equals(SymbolsLvLIs.TagPause)){
transcr = SymbolsLvLIs.SymbolShortPause;
}
else {
Expand All @@ -154,6 +153,10 @@ else if (tok.equals(silToken)){
if (isFlitev02 && transcr.matches(".+s I n s"))
transcr = transcr.replaceAll("s I n s", "s I n n s");

// bug in Thrax grammar, catch the error here: insert space before C if missing
// like in 'Vilhjálmsdóttur' -> 'v I lC au l m s t ou h t Y r'
// TODO: remove when Thrax grammar is fixed!
transcr = transcr.replaceAll("([a-zA-Z])C", "$1 C");
sb.append(transcr).append(" ");
}
return sb.toString().trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import androidx.annotation.NonNull;

import com.grammatek.simaromur.device.SymbolsLvLIs;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -297,7 +299,7 @@ else if (numberToken.matches(NumberHelper.FRACTION_PTRN)) {
part2 = normalizeThousandDigit(part2, nextTag);
else
part2 = normalizeNumber(part2, nextTag);
normalized = part1 + " <sil> " + part2;
normalized = part1 + " " + SymbolsLvLIs.TagPause + " " + part2;
}
// 01. (what kind of ordinal is this?)
else if (numberToken.matches("^0\\d\\.$")) {
Expand Down Expand Up @@ -473,7 +475,7 @@ private String normalizeDigitOrdinal(String token) {
* nominative representation, also '+', '/', ':' are replaced.
*/
private String normalizeDigits(String token) {
token = token.replaceAll(" ", "<sil> ");
token = token.replaceAll(" ", SymbolsLvLIs.TagPause + " ");
for (String digit : NumberHelper.DIGIT_NUMBERS.keySet()) {
final String replacement = NumberHelper.DIGIT_NUMBERS.get(digit);
if (replacement != null) {
Expand Down
Loading

0 comments on commit e4a8f9d

Please sign in to comment.