diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/NumberHelper.java b/app/src/main/java/com/grammatek/simaromur/frontend/NumberHelper.java index f7863e9..9011f61 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/NumberHelper.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/NumberHelper.java @@ -67,8 +67,8 @@ private NumberHelper() {} //1.234 or 1 or 12 or 123 public static final String CARDINAL_THOUSAND_PTRN = "^([1-9]\\.?\\d{3}|[1-9]\\d{0,2})$"; - //1.234 or 12.345 or 123.456 - public static final String CARDINAL_MILLION_PTRN = "^[1-9]\\d{0,2}\\.\\d{3}$"; + //1.234 or 12.345 or 123.456 or 123468 + public static final String CARDINAL_MILLION_PTRN = "^[1-9]\\d{0,2}\\.?\\d{3}$"; public static final String CARDINAL_BIG_PTRN = "^[1-9]\\d{0,2}(\\.\\d{3}){2,3}$"; //1.123,4 or 1232,4 or 123,4 or 12,42345 or 1,489 diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java index 89fafe8..a9713cc 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java @@ -33,13 +33,12 @@ public class TTSNormalizer { CardinalThousandTuples.getTuples()) .flatMap(Collection::stream) .collect(Collectors.toList()); - private final List ThousandsMillionsTupleList = Stream.of(CardinalThousandTuples.getTuples(), CardinalMillionTuples.getTuples()) - .flatMap(Collection::stream) - .collect(Collectors.toList()); - private final List OnesThousandsCardinalTupleList = Stream.of(CardinalOnesTuples.getTuples(), CardinalThousandTuples.getTuples()) .flatMap(Collection::stream) .collect(Collectors.toList()); + private final List ThousandsMillionsTupleList = Stream.of(OnesThousandsCardinalTupleList, CardinalMillionTuples.getTuples()) + .flatMap(Collection::stream) + .collect(Collectors.toList()); private final List DecimalThousandsTupleList = Stream.of(OnesThousandsCardinalTupleList, DecimalThousandTuples.getTuples()) .flatMap(Collection::stream) .collect(Collectors.toList()); diff --git a/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java b/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java index be8232f..49bcee1 100644 --- a/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java +++ b/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java @@ -77,6 +77,14 @@ private Map getDigits() { Map digits = new HashMap<>(); // POS-tagger tags 'mínúta' as accusative, hence the wrong case for 32. // should be 'þrítugasta og önnur' (accusative is 'mínútu') + digits.put("7", "sjö ."); + digits.put("77", "sjötíu og sjö ."); + digits.put("777", "sjö hundruð sjötíu og sjö ."); + digits.put("7777", "sjö þúsund sjö hundruð sjötíu og sjö ."); + digits.put("77777", "sjötíu og sjö þúsund sjö hundruð sjötíu og sjö ."); + digits.put("119273", "hundrað og nítján þúsund tvö hundruð sjötíu og þrjú ."); + digits.put("77.777", "sjötíu og sjö þúsund sjö hundruð sjötíu og sjö ."); + digits.put("119.273", "hundrað og nítján þúsund tvö hundruð sjötíu og þrjú ."); digits.put("(32. mín)", " þrítugustu og aðra mínúta ."); digits.put("(37. mín)", " þrítugustu og sjöundu mínúta ."); digits.put("(24. mín)", " tuttugustu og fjórðu mínúta .");