From 408ab6a6dfbb7d70d0c1b8384efb4370c85a8b83 Mon Sep 17 00:00:00 2001 From: Daniel Schnell Date: Sun, 4 Feb 2024 18:48:31 +0000 Subject: [PATCH] Optimize number handling runtime further frontend/CategoryTuplesjava: - precompile all tuples - move String interpolation inside tuple initialization frontend/TTSNormalizer.java: - fillDict(): optimize lookups - avoid unnecessary operations - const correctness Signed-off-by: Daniel Schnell --- .../simaromur/frontend/CategoryTuple.java | 14 +++--- .../simaromur/frontend/TTSNormalizer.java | 49 +++++++++++-------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/CategoryTuple.java b/app/src/main/java/com/grammatek/simaromur/frontend/CategoryTuple.java index 0e4f318a..8271c801 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/CategoryTuple.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/CategoryTuple.java @@ -1,5 +1,7 @@ package com.grammatek.simaromur.frontend; +import java.util.regex.Pattern; + /** * A CategoryTuple typically holds information on the expansion of a certain regex pattern * belonging to a certain category, according to a rule (pos-tag pattern) @@ -17,22 +19,22 @@ public class CategoryTuple { - private final String numberPattern; - private final String rule; + private final Pattern numberPattern; + private final Pattern rule; private final String category; private final String expansion; public CategoryTuple(String pattern, String rule, String category, String expansion) { - this.numberPattern = pattern; - this.rule = rule; + this.numberPattern = Pattern.compile(".*" + pattern + ".*"); + this.rule = Pattern.compile(".*" + rule); this.category = category; this.expansion = expansion; } - public String getNumberPattern() { + public Pattern getNumberPattern() { return this.numberPattern; } - public String getRule() { + public Pattern getRule() { return this.rule; } public String getCategory() { diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java index 726c338d..90f23c6c 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java @@ -519,7 +519,7 @@ private String normalizeDigits(String token) { * Initializes a map to hold the digit positions for a token, e.g.: * {token: {thousands: "", hundreds: "", dozens: "", ones: ""}} */ - private Map> makeDict(String token, String[] columns) { + private Map> makeDict(String token, final String[] columns) { Map> valueDict = new HashMap<>(); Map innerMap = new HashMap<>(); for (String s : columns) @@ -528,33 +528,42 @@ private Map> makeDict(String token, String[] columns return valueDict; } - /* + /** * Fills a map that holds the digit positions for a token, e.g.: - * {"1983": {thousands: "", hundreds: "nítján hundruð", dozens: " áttatíu og", ones: "þrjú"}} + * {"1983": {thousands: "", hundreds: "nítján hundruð", dozens: " áttatíu og", ones: "þrjú"}} * Returns a string combined of the values, e.g.: "nítján hundruð áttatíu og þrjú" + * + * @param token the token to fill the dictionary for + * @param tag the tag to fill the dictionary for + * @param tuples the list of tuples to use for filling the dictionary + * @param typeDict the dictionary to fill + * @param columns the columns to fill in the dictionary + * @return the string combined of the values */ - private String fillDict(String token, String tag, List tuples, Map> typeDict, String[] columns) { + private String fillDict(String token, String tag, final List tuples, Map> typeDict, final String[] columns) { StringBuilder result = new StringBuilder(); - for (int i = 0; i < tuples.size(); i++) { - String numberPattern = tuples.get(i).getNumberPattern(); - String rule = tuples.get(i).getRule(); - if (token.matches(".*" + numberPattern + ".*") && tag.matches(".*" + rule)) { - if (typeDict.containsKey(token)) { - final String category = tuples.get(i).getCategory(); - if (typeDict.get(token).containsKey(category)) { - Map tmp = typeDict.get(token); - tmp.put(category, tuples.get(i).getExpansion()); - // not really necessary, since the previous assignment updates the map in - // typeDict, but this is more clear - typeDict.put(token, tmp); - } - } + Map tokenMap = typeDict.get(token); + if (tokenMap == null) { + // this must have been done inside makeDict(), which needs to be called right before + // this method + throw new IllegalArgumentException("Token " + token + " not found in typeDict"); + } + + for (final CategoryTuple tuple : tuples) { + final Pattern numberPattern = tuple.getNumberPattern(); + final Pattern rule = tuple.getRule(); + if (numberPattern.matcher(token).matches() && rule.matcher(tag).matches()) { + tokenMap.put(tuple.getCategory(), tuple.getExpansion()); } } - for (String s : columns) - result.append(typeDict.get(token).get(s)); + for (final String column : columns) { + final String value = tokenMap.get(column); + if (value != null) { + result.append(value); + } + } return result.toString(); }