From 98747b1037c05b5d57dd1ea3b1d357ddff4dfefc Mon Sep 17 00:00:00 2001 From: Isin Demirsahin Date: Wed, 15 May 2024 15:41:02 -0700 Subject: [PATCH] Add Feature pairing to Symbol. PiperOrigin-RevId: 634099532 --- .../script/inventories/latn.py | 29 ++++++-- .../script/inventories/latn_test.py | 6 ++ .../scripts/natural_translit/utils/feature.py | 9 +-- .../scripts/natural_translit/utils/symbol.py | 66 ++++++++++++++++++- 4 files changed, 100 insertions(+), 10 deletions(-) diff --git a/nisaba/scripts/natural_translit/script/inventories/latn.py b/nisaba/scripts/natural_translit/script/inventories/latn.py index fb08f4d1..db5c0ef3 100644 --- a/nisaba/scripts/natural_translit/script/inventories/latn.py +++ b/nisaba/scripts/natural_translit/script/inventories/latn.py @@ -24,25 +24,44 @@ def _build_inventory() -> grapheme.Grapheme.Inventory: g = grapheme.Grapheme grf = g.GR_FEATURES inventory = g.Inventory(g.GR_FEATURES.script.latn) - lowercase_features = f.Set( - grf.script.latn, - grf.case.lower, - ) lowercase_args = [ ['a', grf.ph_class.vwl], + ['b', grf.ph_class.cons], + ['c', grf.ph_class.cons], + ['d', grf.ph_class.cons], + ['e', grf.ph_class.vwl], + ['f', grf.ph_class.cons], + ['g', grf.ph_class.cons], + ['h', grf.ph_class.cons], + ['i', grf.ph_class.vwl], + ['j', grf.ph_class.cons], ['k', grf.ph_class.cons], ['l', grf.ph_class.cons], + ['m', grf.ph_class.cons], + ['n', grf.ph_class.cons], + ['o', grf.ph_class.vwl], ['p', grf.ph_class.cons], + ['q', grf.ph_class.cons], + ['r', grf.ph_class.cons], ['s', grf.ph_class.cons], + ['t', grf.ph_class.cons], ['u', grf.ph_class.vwl], + ['v', grf.ph_class.cons], + ['w', grf.ph_class.cons], + ['x', grf.ph_class.cons], + ['y', [grf.ph_class.cons, grf.ph_class.vwl]], + ['z', grf.ph_class.cons], ] inventory.add_graphemes( *[ - g.from_char(char, char, f.Set(lowercase_features, features)) + g.from_char(char, char, f.Set(grf.script.latn, features)) for char, features in lowercase_args ], list_alias='lower', ) + for lower in inventory.lower: + upper = g.from_char(lower.raw.upper(), lower.alias + '_uc', lower.features) + inventory.add_pairs(grf.case.lower, grf.case.upper, (lower, upper)) return inventory graphemes = _build_inventory() diff --git a/nisaba/scripts/natural_translit/script/inventories/latn_test.py b/nisaba/scripts/natural_translit/script/inventories/latn_test.py index 1a1f73fb..54d3941d 100644 --- a/nisaba/scripts/natural_translit/script/inventories/latn_test.py +++ b/nisaba/scripts/natural_translit/script/inventories/latn_test.py @@ -34,5 +34,11 @@ def test_in_raw_dict(self): def test_lowercase_list(self): self.assertIn(_latn.a, _latn.lower) + def test_uppercase(self): + self.assertEqual(_latn.text_lookup('A'), _latn.a_uc) + self.assertEqual(_latn.a.upper, _latn.a_uc) + self.assertEqual(_latn.a_uc.lower, _latn.a) + self.assertIn(_latn.a_uc, _latn.upper) + if __name__ == '__main__': absltest.main() diff --git a/nisaba/scripts/natural_translit/utils/feature.py b/nisaba/scripts/natural_translit/utils/feature.py index fd898b57..f39ca9ac 100644 --- a/nisaba/scripts/natural_translit/utils/feature.py +++ b/nisaba/scripts/natural_translit/utils/feature.py @@ -228,14 +228,15 @@ def aspect_dict( distances.update({feature.aspect: value_set.add(feature)}) return distances - def remove(self, *features) -> 'Feature.Set': + def remove(self, *features: 'Feature.ITERABLE') -> 'Feature.Set': for feature in Feature.Set(*features): self._item_set().discard(feature) return self def replace( - self, old: tuple['Feature.ITERABLE', ...], - new: tuple['Feature.ITERABLE', ...] + self, + old: 'Feature.ITERABLE', + new: 'Feature.ITERABLE', ) -> 'Feature.Set': for feature in Feature.Set(old): self.remove(feature) @@ -329,7 +330,7 @@ class ValueList(ty.IterableThing): isoceles: 0.00 round: 1.00 } - resulting in all troangular rooms to match in terms of shape, but + resulting in all triangular rooms to match in terms of shape, but there can be different rules for the precise shapes eg. if room.has_feature(equilateral): do x diff --git a/nisaba/scripts/natural_translit/utils/symbol.py b/nisaba/scripts/natural_translit/utils/symbol.py index 65765087..94f35c48 100644 --- a/nisaba/scripts/natural_translit/utils/symbol.py +++ b/nisaba/scripts/natural_translit/utils/symbol.py @@ -71,6 +71,7 @@ class Symbol(ty.Thing): value of the inventory is Symbol.Inventory.EMPTY. """ + OR_NOTHING = Union['Symbol', ty.Nothing] SYM_FEATURES = _symbol_features() class ReservedIndex(enum.IntEnum): @@ -142,6 +143,33 @@ def descriptions( + '\n' ) + # Stub functions to avoid rewrites when symbol features are profiles instead + # of sets. + def has_feature(self, feature: ft.Feature) -> bool: + return feature in self.features + + def add_features(self, *features: ft.Feature.ITERABLE) -> None: + self.features.add(features) + + def remove_features(self, *features: ft.Feature.ITERABLE) -> None: + self.features.remove(features) + + def replace_features( + self, old: ft.Feature.ITERABLE, new: ft.Feature.ITERABLE + ) -> None: + self.features.replace(old, new) + + def pair( + self, + from_feature: ft.Feature, + to_feature: ft.Feature, + symbol: 'Symbol', + ) -> None: + setattr(self, to_feature.text, symbol) + setattr(symbol, from_feature.text, self) + self.replace_features(to_feature, from_feature) + symbol.replace_features(from_feature, to_feature) + class Inventory(inventory.Inventory): """Symbol inventory. @@ -237,7 +265,7 @@ def lookup( self, key: ..., source_dict: Union[dict[Any, 'Symbol'], str], - default: Union['Symbol', ty.Nothing] = ty.UNSPECIFIED, + default: 'Symbol.OR_NOTHING' = ty.UNSPECIFIED, ) -> 'Symbol': """Get symbol by key from source_dict. @@ -272,6 +300,42 @@ def text_lookup(self, text: str) -> 'Symbol': """Get symbol by its text field.""" return log.dbg_return(self.lookup(text, self.text_dict)) + def add_pairs( + self, + from_feature: ft.Feature, + to_feature: ft.Feature, + *pairs: tuple['Symbol', 'Symbol'], + ) -> None: + self.make_suppl(from_feature.alias, []) + self.make_suppl(to_feature.alias, []) + from_list = self.get(from_feature.alias) + to_list = self.get(to_feature.alias) + for sym1, sym2 in pairs: + self.add_symbols(sym1, sym2) + sym1.pair(from_feature, to_feature, sym2) + if sym1 not in from_list: + from_list.append(sym1) + if sym2 not in to_list: + to_list.append(sym2) + + def pair_lookup(self, symbol: 'Symbol', feature: ft.Feature) -> 'Symbol': + """Gets feature pair of a symbol. + + Args: + symbol: The symbol whose pair is being looked up. + feature: The opposing feature that the target symbol differs from the + given symbol. + + Returns: + The symbol that is paired with the given symbol and feature. If no pair + is found, returns the symbol itself. + """ + if hasattr(symbol, feature.text): + pair = getattr(symbol, feature.text) + if isinstance(pair, Symbol): + return pair + return symbol + def raw_from_unknown(self, raw: str = '') -> 'Symbol': """Makes and adds a new raw symbol to the inventory from a string.""" self.unknown_count += 1