From bbe5603bcdaa7c2e9396cd6c1f0167d5dfa1f60f Mon Sep 17 00:00:00 2001 From: federicotdn Date: Thu, 3 Jan 2019 15:44:11 -0300 Subject: [PATCH] Agrega archivos iniciales del proyecto. --- .pylintrc | 567 ++++++++ .travis.yml | 7 + MANIFEST.in | 5 + Makefile | 10 + georef_ar_address.py | 309 ++++ grammars/address-ar.cfg | 79 + requirements-dev.txt | 2 + requirements.txt | 1 + setup.cfg | 2 + setup.py | 33 + tests/__init__.py | 0 tests/real_cases.json | 147 ++ tests/test_cases.json | 2416 +++++++++++++++++++++++++++++++ tests/test_georef_ar_address.py | 57 + 14 files changed, 3635 insertions(+) create mode 100644 .pylintrc create mode 100644 .travis.yml create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 georef_ar_address.py create mode 100644 grammars/address-ar.cfg create mode 100644 requirements-dev.txt create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/real_cases.json create mode 100644 tests/test_cases.json create mode 100644 tests/test_georef_ar_address.py diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..851f3b9 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,567 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=print-statement, + parameter-unpacking, + unpacking-in-except, + old-raise-syntax, + backtick, + long-suffix, + old-ne-operator, + old-octal-literal, + import-star-module-level, + non-ascii-bytes-literal, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + apply-builtin, + basestring-builtin, + buffer-builtin, + cmp-builtin, + coerce-builtin, + execfile-builtin, + file-builtin, + long-builtin, + raw_input-builtin, + reduce-builtin, + standarderror-builtin, + unicode-builtin, + xrange-builtin, + coerce-method, + delslice-method, + getslice-method, + setslice-method, + no-absolute-import, + old-division, + dict-iter-method, + dict-view-method, + next-method-called, + metaclass-assignment, + indexing-exception, + raising-string, + reload-builtin, + oct-method, + hex-method, + nonzero-method, + cmp-method, + input-builtin, + round-builtin, + intern-builtin, + unichr-builtin, + map-builtin-not-iterating, + zip-builtin-not-iterating, + range-builtin-not-iterating, + filter-builtin-not-iterating, + using-cmp-argument, + eq-without-hash, + div-method, + idiv-method, + rdiv-method, + exception-message-attribute, + invalid-str-codec, + sys-max-int, + bad-python3-import, + deprecated-string-function, + deprecated-str-translate-call, + deprecated-itertools-function, + deprecated-types-field, + next-method-defined, + dict-items-not-iterating, + dict-keys-not-iterating, + dict-values-not-iterating, + deprecated-operator-function, + deprecated-urllib-function, + xreadlines-attribute, + deprecated-sys-function, + exception-escape, + comprehension-escape, + # Agregados por georef-ar-address: + missing-docstring, + invalid-name, + too-few-public-methods, + fixme, + no-self-use + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package.. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma, + dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[LOGGING] + +# Format style used to check logging format string. `old` means using % +# formatting, while `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[IMPORTS] + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement. +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception". +overgeneral-exceptions=Exception diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..04ccef2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: python +python: + - 3.6 +install: + - pip install -r requirements.txt -r requirements-dev.txt +script: + - make code_checks diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..0b1ec1d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include georef_ar_address.py +recursive-include grammars *.cfg +include requirements.txt +include LICENSE +include README.md diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..71ad933 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +# Makefile para georef-ar-address +# +# Contiene recetas para ejecutar tests y linters de código. + +test: + python -m unittest + +code_checks: + flake8 georef_ar_address.py tests/test_georef_ar_address.py + pylint georef_ar_address.py tests/test_georef_ar_address.py diff --git a/georef_ar_address.py b/georef_ar_address.py new file mode 100644 index 0000000..2831529 --- /dev/null +++ b/georef_ar_address.py @@ -0,0 +1,309 @@ +"""Módulo georef_ar_address.py + +Contiene clases y funciones utilizadas para extraer información de direcciones +de calles en Argentina. Los tipos de direcciones aceptadas son: + + + + y + y + y + entre y + entre y + entre y +""" + +import re +import os +import copy +import nltk + +GRAMMARS_DIR = 'grammars' +GRAMMAR_PATH = os.path.join(os.path.dirname(__file__), GRAMMARS_DIR, + 'address-ar.cfg') +START_PRODUCTION = 'address' + +ADDRESS_DATA_TEMPLATE = { + 'address': None, + 'street_names': [], + 'door_number': { + 'value': None, + 'unit': None + }, + 'floor': None, + 'type': 'none' +} + +_SEPARATION_REGEXP = r'([^\W\d]{2,}\.?)(\d)' + +_NORMALIZATION_REGEXPS = [ + r'\((ex|antes|frente|mano|(al\s)?lado).+?\)', # Remover aclaraciones + r'\([sneo]\)', # Remover aclaraciones de orientación + r',(\s|$)|\s,', # Comas utilizadas para separar texto + r'[()"?]', # Caracteres no deseados + r'-+$', # Guiones al final + r'\s-\s', # Guiones entre espacios + r'(b[°ºª]|barrio\s|bo\.\s).*', # Indicadores de barrio + r'\sal\s+(?=\d)' # Palabra 'al' antes de un número +] + +_TOKEN_TYPES = [ + ('AND_WORD', r'y\s(?=\D)|e\s(?=i)'), + ('AND_NUM', r'y\s(?=\d)'), + ('OF', r'de\s'), + ('FLOOR', r'piso(\s|$)'), + ('DOOR_TYPE', r'(d(e?p)?to\.?|departamento|oficina|of\.)\s'), + ('GROUNDL', r'(p\.?b\.?|planta\sbaja)(\s|$)'), + ('ISCT_SEP', r'esquina|esq\.|esq\s|esq/'), + ('BTWN_SEP', r'e/(calles)?|entre\scalles'), + ('BETWEEN', r'entre\s'), + ('KM', r'kil[oó]metro|km\.?'), + ('MISSING_NAME', r's/nombre'), + ('MISSING_NUM', r'(sin\s|s/)(n[uú]mero|n(ro\.?|[°º]))'), + ('S_N', r's[/-]n|sn(\s|$)'), + ('STREET_TYPE_S', r'(avda|av|bv|diag)[\s.]'), + ('STREET_TYPE_L', r'calle\s|avenida|bo?ulevard?|diagonal'), + ('ROUTE', r'ruta|(rta|rn|rp)[\s.]'), + ('NUM_LABEL_S', r'n\s?[°ºª*]|#|n(?=\d)'), + ('NUM_LABEL_L', r'nro[\s.]|n[uú]mero'), + ('DECIMAL', r'\d+[.,]\d+'), + ('NUM_RANGE', r'\d+[/-]\d+([/-]\d+)*'), + ('ORDINAL', r'\d+(era?|nd[oa]|[nmtvr][oa])(\s|$|\.)'), + ('NUM', r'\d+((\s|$)|[°º])'), + ('N', r'n\s'), + ('LETTER', r'[^\d\W](\s|$|\.)'), + ('WORD', r'(\w|\.|\'|`|´|:|/)+'), + ('WS', r'\s'), + ('UNKNOWN', '.+') +] + + +class InvalidTokenException(Exception): + pass + + +class InvalidGrammarException(Exception): + pass + + +def with_labels(labels): + return lambda t: t.label() in set(labels) + + +def load_grammar(grammar_path): + grammar = nltk.data.load('file:{}'.format(grammar_path)) + + if grammar.start().symbol() != START_PRODUCTION: + raise InvalidGrammarException('Start rule must be "{}"'.format( + START_PRODUCTION)) + + if not grammar.is_nonempty(): + raise InvalidGrammarException('Empty productions are not allowed') + + nonterminals = set() + terminals = {token_name for token_name, _ in _TOKEN_TYPES} + + for production in grammar.productions(): + nonterminals.add(production.lhs().symbol()) + + for production in grammar.productions(): + for element in production.rhs(): + symbol = str(element) + + if isinstance(element, nltk.Nonterminal): + if symbol not in nonterminals: + raise InvalidGrammarException( + 'Invalid nonterminal: {}'.format(symbol)) + elif symbol not in terminals: + raise InvalidGrammarException( + 'Invalid terminal: {}'.format(symbol)) + + return grammar + + +class TreeVisitor: + __slots__ = ['_tree', '_rank'] + + def __init__(self, tree): + self._tree = tree + self._rank = None + + def extract_data(self, tokens): + tree = self._tree.copy(deep=True) + + # Agregar valores reales a las hojas del árbol + # TODO: Cachear los índices de las hojas de cada subarbol y usar + # eso directamente. + for i, tree_pos in enumerate(tree.treepositions('leaves')): + tree[tree_pos] = tokens[i][0] + + street_names = [] + door_number_value = None + door_number_unit = None + floor = None + condition = with_labels([ + 'street', + 'door_number_value', + 'door_number_unit', + 'floor' + ]) + + for subtree in tree.subtrees(condition): + label = subtree.label() + subtree_text = ' '.join(subtree.leaves()) + + if label == 'street': + street_names.append(subtree_text) + elif label == 'door_number_value': + door_number_value = subtree_text + elif label == 'door_number_unit': + door_number_unit = subtree_text + elif label == 'floor': + floor = subtree_text + + return street_names, door_number_value, door_number_unit, floor + + def _get_rank(self): + has_door_number = False + unnamed_streets = 0 + + condition = with_labels(['street_no_num', 'street_with_num']) + for subtree in self._tree.subtrees(condition): + if subtree.label() == 'street_with_num': + has_door_number = True + + if subtree[0][0].label() == 'unnamed_street': + unnamed_streets += 1 + + if has_door_number: + ranks = ['isct', 'simple', 'btwn'] + else: + ranks = ['simple', 'isct', 'btwn'] + + rank = ranks.index(self.address_type) + + return (unnamed_streets, int(has_door_number), rank) + + @property + def rank(self): + if not self._rank: + self._rank = self._get_rank() + + return self._rank + + @property + def address_type(self): + return self._tree.label() + + +class AddressParser: + def __init__(self, cache=None): + self._parser = nltk.EarleyChartParser(load_grammar(GRAMMAR_PATH)) + + self._token_regexp = re.compile( + '|'.join('(?P<{}>{})'.format(*tt) for tt in _TOKEN_TYPES), + re.IGNORECASE) + + self._separation_regexp = re.compile(_SEPARATION_REGEXP, re.IGNORECASE) + + self._normalization_regexp = re.compile( + '|'.join(_NORMALIZATION_REGEXPS), + re.IGNORECASE + ) + + self._cache = cache + + def _tokenize_address(self, address): + tokens = [] + for mo in self._token_regexp.finditer(address): + kind = mo.lastgroup + value = mo.group().strip() + + if kind == 'UNKNOWN': + raise InvalidTokenException('Value: {}'.format(value)) + elif kind != 'WS': + tokens.append((value, kind)) + + return tokens + + def _preprocess_address(self, address): + # Reemplazar partes no deseadas por espacios + normalized = self._normalization_regexp.sub(' ', address) + + # Separar dos o más letras pegadas a números (en ese orden): + # Sí: 'hola123' -> 'hola 123' + # Sí: 'ruta nac.3' -> 'ruta nac. 3' + # No: '1ro de Mayo' -> '1ro de Mayo' + # No: 'Lote 14 M2' -> 'Lote 14 M2' + normalized = self._separation_regexp.sub(r'\1 \2', normalized) + + # Normalizar espacios (también remueve trailing/leading whitespace) + return ' '.join(normalized.split()) + + def _disambiguate_trees(self, visitors): + if len(visitors) > 1: + visitors.sort(key=lambda v: v.rank, reverse=True) + + # La lista de árboles ahora está ordenada de mejor a peor. Comparar + # el rank (puntaje) del primer elemento con el del segundo: si son + # iguales, entonces hay dos (o más) árboles con el mismo puntaje + # maximal. Esto quiere decir que todos estos árboles son una + # solución viable, pero no es posible distinguir cuál de ellos es + # el más adecuado. Si sucede esto, devolver None. + if visitors[0].rank == visitors[1].rank: + return None + + return visitors[0] + + def _tokens_parse_tree(self, token_types): + visitors = [ + TreeVisitor(tree[0]) # tree['address'] + for tree in + self._parser.parse(token_types) + ] + + return self._disambiguate_trees(visitors) if visitors else None + + def _parse_token_types(self, token_types): + if self._cache is not None: + tokens_hash = hash(tuple(token_types)) + + if tokens_hash in self._cache: + return self._cache[tokens_hash] + + tree = self._tokens_parse_tree(token_types) + self._cache[tokens_hash] = tree + return tree + + return self._tokens_parse_tree(token_types) + + def parse(self, address): + # Remover espacios al comienzo y al final + address = address.strip() + processed = self._preprocess_address(address) + + data = copy.deepcopy(ADDRESS_DATA_TEMPLATE) + data['address'] = address + + if not processed: + return data + + try: + tokens = self._tokenize_address(processed) + except InvalidTokenException: + return data + + visitor = self._parse_token_types([ + t_type for _, t_type in tokens + ]) + + if visitor: + street_names, door_number_value, door_number_unit, floor = \ + visitor.extract_data(tokens) + data['type'] = visitor.address_type + data['street_names'] = street_names + data['door_number']['value'] = door_number_value + data['door_number']['unit'] = door_number_unit + data['floor'] = floor + + return data diff --git a/grammars/address-ar.cfg b/grammars/address-ar.cfg new file mode 100644 index 0000000..a49434d --- /dev/null +++ b/grammars/address-ar.cfg @@ -0,0 +1,79 @@ +# Start production + +address -> btwn | isct | simple + +# Address types + +and -> 'AND_WORD' | 'AND_NUM' +isct_separator -> 'ISCT_SEP' | 'AND_WORD' 'ISCT_SEP' | and +btwn_separator -> 'BTWN_SEP' | 'BETWEEN' + +btwn -> street_with_num btwn_separator street_no_num and street_no_num +btwn -> street_no_num btwn_separator street_no_num and street_with_num +btwn -> street_no_num btwn_separator street_no_num and street_no_num + +isct -> street_with_num isct_separator street_no_num +isct -> street_no_num isct_separator street_with_num +isct -> street_no_num isct_separator street_no_num + +simple -> street_with_num +simple -> street_no_num + +# Street name + door number + +street_no_num -> street +street_with_num -> street door_number | street door_number floor + +# Floor number + +floor_part -> 'FLOOR' | 'NUM' | 'ORDINAL' | 'GROUNDL' | 'LETTER' | 'DOOR_TYPE' +floor_part_recursive -> floor_part_recursive floor_part | floor_part +floor_long -> floor_part_recursive floor_part +floor_short -> 'GROUNDL' + +floor -> floor_long | floor_short + +# Street names + +street_name_part -> 'WORD' | 'NUM' | 'ORDINAL' | 'LETTER' +street_name_part_nonfinal -> 'N' | 'AND_WORD' | 'BETWEEN' | 'OF' +street_name_part_any -> street_name_part | street_name_part_nonfinal +street_name_part_any_recursive -> street_name_part_any_recursive street_name_part_any +street_name_part_any_recursive -> street_name_part_any + +street_name -> street_name_part +street_name -> street_name_part_any_recursive street_name_part + +street_type -> 'STREET_TYPE_S' | 'STREET_TYPE_L' +missing_name -> 'MISSING_NAME' | 'S_N' +number_indicator -> 'N' | 'NUM_LABEL_S' | 'NUM_LABEL_L' +street_number -> number_indicator 'NUM' | 'NUM' + +named_street -> street_name +named_street -> street_type street_name +named_street -> street_name street_type +named_street -> 'ROUTE' street_name + +numbered_street -> street_type street_number +numbered_street -> 'NUM' + +numbered_route -> 'ROUTE' street_number +numbered_route -> 'ROUTE' 'WORD' street_number +numbered_route -> street_name 'ROUTE' street_number + +unnamed_street -> numbered_street +unnamed_street -> numbered_route +unnamed_street -> street_type missing_name +unnamed_street -> missing_name + +street -> unnamed_street +street -> named_street + +# Door numbers + +door_number_value -> 'NUM' | 'DECIMAL' | 'NUM_RANGE' | 'NUM' 'NUM_RANGE' +door_number_value -> 'MISSING_NUM' | 'S_N' +door_number_unit -> 'KM' | number_indicator + +door_number -> door_number_unit door_number_value +door_number -> door_number_value \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..803a235 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pylint==2.2.2 +flake8==3.6.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d368e9e --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +nltk==3.4 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..224a779 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..aa433b7 --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +from distutils.core import setup + +VERSION = '0.0.1' + + +with open('requirements.txt') as f: + requires = f.read().splitlines() + +with open('README.md') as f: + long_description = f.read() + +setup( + name='georef-ar-address', + py_modules=['georef_ar_address'], + version=VERSION, + description='Librería escrita en Python para la identificación de componentes de direcciones argentinas', + long_description=long_description, + long_description_content_type='text/markdown', + author='Datos Argentina', + author_email='datos@modernizacion.gob.ar', + install_requires=requires, + python_requires='>=3', + url='https://github.com/datosgobar/georef-ar-address', + download_url='https://github.com/datosgobar/georef-ar-address/archive/{}.tar.gz'.format(VERSION), + keywords=['georef', 'datos', 'argentina', 'direccion', 'calle', 'altura', 'json', 'nltk'], + license='MIT', + classifiers=[ + 'Programming Language :: Python :: 3 :: Only', + 'License :: OSI Approved :: MIT License', + 'Topic :: Text Processing', + 'Topic :: Software Development :: Libraries' + ] +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/real_cases.json b/tests/real_cases.json new file mode 100644 index 0000000..7092ab1 --- /dev/null +++ b/tests/real_cases.json @@ -0,0 +1,147 @@ +[ + { + "address": "Calle 12 Nro. 745-749 e/ 13 y 14", + "type": "btwn", + "street_names": [ + "Calle 12", + "13", + "14" + ], + "door_number": { + "unit": "Nro.", + "value": "745-749" + } + }, + { + "address": "Dr. Udo Bress e/44 y 45 S/N", + "type": "btwn", + "street_names": ["Dr. Udo Bress", "44", "45"], + "door_number": { + "unit": null, + "value": "S/N" + } + }, + { + "address": "CATAMARCA 1033 PISO 3° B", + "type": "simple", + "street_names": ["CATAMARCA"], + "door_number": { + "unit": null, + "value": "1033" + }, + "floor": "PISO 3° B" + }, + { + "address": "Calle 33 N° 434 1er. P", + "type": "simple", + "street_names": ["Calle 33"], + "door_number": { + "unit": "N°", + "value": "434" + }, + "floor": "1er. P" + }, + { + "address": "Av. Maipú N° 124 5to.Piso Dto. E", + "type": "simple", + "street_names": ["Av. Maipú"], + "door_number": { + "unit": "N°", + "value": "124" + }, + "floor": "5to. Piso Dto. E" + }, + { + "address": "av. arturo frondizi 2000 y av. 12 de septiembre", + "type": "isct", + "street_names": [ + "av. arturo frondizi", + "av. 12 de septiembre" + ], + "door_number": { + "unit": null, + "value": "2000" + } + }, + { + "address": "10 DE SEPTIEMBRE DE 1861 N° 4000 P. B", + "type": "simple", + "street_names": ["10 DE SEPTIEMBRE DE 1861"], + "door_number": { + "unit": "N°", + "value": "4000" + }, + "floor": "P. B" + }, + { + "address": "9 de Julio Nº 565, Piso 4º, Dpto. A", + "type": "simple", + "street_names": ["9 de Julio"], + "door_number": { + "unit": "Nº", + "value": "565" + }, + "floor": "Piso 4º Dpto. A" + }, + { + "address": "CHILE 3000 PB. DPTO. A", + "type": "simple", + "street_names": ["CHILE"], + "door_number": { + "unit": null, + "value": "3000" + }, + "floor": "PB. DPTO. A" + }, + { + "address": "Colombia e/R. de Esc. y San Lorenzo S/N", + "type": "btwn", + "street_names": [ + "Colombia", + "R. de Esc.", + "San Lorenzo" + ], + "door_number": { + "unit": null, + "value": "S/N" + } + }, + { + "address": "29 Y 11 NRO. 302", + "type": "isct", + "street_names": ["29", "11"], + "door_number": { + "unit": "NRO.", + "value": "302" + } + }, + { + "address": "Av. José León Suarez y esq. Canadá 4322", + "type": "isct", + "street_names": [ + "Av. José León Suarez", + "Canadá" + ], + "door_number": { + "unit": null, + "value": "4322" + } + }, + { + "address": "AV. QUESADA Y 2 DE ABRIL DE 1982", + "type": "isct", + "street_names": [ + "AV. QUESADA", + "2 DE ABRIL DE 1982" + ] + }, + { + "address": "Av. 104 (Ex - R. Balbin) 2000", + "type": "simple", + "street_names": ["Av. 104"], + "door_number": { + "unit": null, + "value": "2000" + } + } +] diff --git a/tests/test_cases.json b/tests/test_cases.json new file mode 100644 index 0000000..02f89c4 --- /dev/null +++ b/tests/test_cases.json @@ -0,0 +1,2416 @@ +[ + { + "address": "foo y bar y baz", + "type": "none", + "door_number": { + "value": null, + "unit": null + } + }, + { + "address": "foo entre bar y baz y foo", + "type": "none", + "door_number": { + "value": null, + "unit": null + } + }, + { + "address": "juan", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "juan" + ] + }, + { + "address": "juan E Gómez", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "juan E Gómez" + ] + }, + { + "address": "santa fe", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "entre ríos", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "entre ríos" + ] + }, + { + "address": "adolfo calle", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "adolfo calle" + ] + }, + { + "address": "9 de julio", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "9 de julio" + ] + }, + { + "address": "av. 9 de julio", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. 9 de julio" + ] + }, + { + "address": "av.9 de julio", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. 9 de julio" + ] + }, + { + "address": "av. formosa", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. formosa" + ] + }, + { + "address": "av.formosa", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. formosa" + ] + }, + { + "address": "o'higgins", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "o'higgins" + ] + }, + { + "address": "ruta nacional nro 3", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta nacional nro 3" + ] + }, + { + "address": "ruta provincial N°3", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta provincial N° 3" + ] + }, + { + "address": "ruta 3", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta 3" + ] + }, + { + "address": "ruta de la tradicion", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta de la tradicion" + ] + }, + { + "address": "rp 3", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "rp 3" + ] + }, + { + "address": "calle 10", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle 10" + ] + }, + { + "address": "calle N° 10", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle N° 10" + ] + }, + { + "address": "calle nro. 10", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle nro. 10" + ] + }, + { + "address": "s/n", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "s/n" + ] + }, + { + "address": "calle s/n", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle s/n" + ] + }, + { + "address": "calle sn", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle sn" + ] + }, + { + "address": "bv. s/nombre", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "bv. s/nombre" + ] + }, + { + "address": "av. s/nombre", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. s/nombre" + ] + }, + { + "address": "foo al bar", + "type": "simple", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "foo al bar" + ] + }, + { + "address": "septiembre de 1999", + "type": "simple", + "street_names": [ + "septiembre de 1999" + ] + }, + { + "address": "vicente lopez y planes 2000", + "type": "simple", + "door_number": { + "value": "2000", + "unit": null + }, + "street_names": [ + "vicente lopez y planes" + ] + }, + { + "address": "ruta de la tradicion km 1", + "type": "simple", + "door_number": { + "value": "1", + "unit": "km" + }, + "street_names": [ + "ruta de la tradicion" + ] + }, + { + "address": "25 de mayo 2000", + "type": "simple", + "door_number": { + "value": "2000", + "unit": null + }, + "street_names": [ + "25 de mayo" + ] + }, + { + "address": "1° de mayo 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "1° de mayo" + ] + }, + { + "address": "humberto 1° 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "humberto 1°" + ] + }, + { + "address": "1ra junta 2000", + "type": "simple", + "door_number": { + "value": "2000", + "unit": null + }, + "street_names": [ + "1ra junta" + ] + }, + { + "address": "1ra. junta 2000", + "type": "simple", + "door_number": { + "value": "2000", + "unit": null + }, + "street_names": [ + "1ra. junta" + ] + }, + { + "address": "cordoba 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "cordoba" + ] + }, + { + "address": "adolfo calle 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "adolfo calle" + ] + }, + { + "address": "leandro n alem 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "leandro n alem" + ] + }, + { + "address": "leandro n alem n 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "n" + }, + "street_names": [ + "leandro n alem" + ] + }, + { + "address": "calle 99 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle juan 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle juan" + ] + }, + { + "address": "calle 99 1000 piso 1", + "floor": "piso 1", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 1° A", + "floor": "1° A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 1er piso A", + "floor": "1er piso A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 2ndo A", + "floor": "2ndo A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 piso 1° A", + "floor": "piso 1° A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 1°A", + "floor": "1° A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 2 A", + "floor": "2 A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 2 E", + "floor": "2 E", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 1° piso depto A", + "floor": "1° piso depto A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 1° piso A", + "floor": "1° piso A", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 PB", + "floor": "PB", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 1000 P.b. C", + "floor": "P.b. C", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "calle 99 n 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "n" + }, + "street_names": [ + "calle 99" + ] + }, + { + "address": "99 n 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "n" + }, + "street_names": [ + "99" + ] + }, + { + "address": "99 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "99" + ] + }, + { + "address": "s/n 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "s/n" + ] + }, + { + "address": "calle s/n s/n", + "type": "simple", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "calle s/n" + ] + }, + { + "address": "calle 10 s/n", + "type": "simple", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "calle 10" + ] + }, + { + "address": "av. 10 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": null + }, + "street_names": [ + "av. 10" + ] + }, + { + "address": "avenida belgrano 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "avenida belgrano" + ] + }, + { + "address": "belgrano avenida 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "belgrano avenida" + ] + }, + { + "address": "calle s/n 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "calle s/n" + ] + }, + { + "address": "tucuman s/numero", + "type": "simple", + "door_number": { + "value": "s/numero", + "unit": null + }, + "street_names": [ + "tucuman" + ] + }, + { + "address": "tucuman sin numero", + "type": "simple", + "door_number": { + "value": "sin numero", + "unit": null + }, + "street_names": [ + "tucuman" + ] + }, + { + "address": "tucuman sin número", + "type": "simple", + "door_number": { + "value": "sin número", + "unit": null + }, + "street_names": [ + "tucuman" + ] + }, + { + "address": "av espora 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "av espora" + ] + }, + { + "address": "av: espora 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "av: espora" + ] + }, + { + "address": "av. yak 100", + "type": "simple", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "av. yak" + ] + }, + { + "address": "santa fe 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 B° foobar", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe al 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 (N)", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe (E) 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe \"1000\"", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000,", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 -", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe (ex rosario) 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe(ex rosario) 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe (ex rosario), 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/n", + "type": "simple", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe sn", + "type": "simple", + "door_number": { + "value": "sn", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s-n", + "type": "simple", + "door_number": { + "value": "s-n", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/nro", + "type": "simple", + "door_number": { + "value": "s/nro", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/numero", + "type": "simple", + "door_number": { + "value": "s/numero", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/n°", + "type": "simple", + "door_number": { + "value": "s/n°", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/nº", + "type": "simple", + "door_number": { + "value": "s/nº", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe sin nº", + "type": "simple", + "door_number": { + "value": "sin nº", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe sin nro.", + "type": "simple", + "door_number": { + "value": "sin nro.", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe s/nro.", + "type": "simple", + "door_number": { + "value": "s/nro.", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe nro 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "nro" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe nro. 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "nro." + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N° 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N°" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N ° 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N °" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N°1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N°" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe Nº 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "Nº" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe #1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "#" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N* 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N*" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe N º 1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "N º" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe Nº1000", + "type": "simple", + "door_number": { + "value": "1000", + "unit": "Nº" + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000.1", + "type": "simple", + "door_number": { + "value": "1000.1", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000,1", + "type": "simple", + "door_number": { + "value": "1000,1", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000/1001", + "type": "simple", + "door_number": { + "value": "1000/1001", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1/2/3", + "type": "simple", + "door_number": { + "value": "1/2/3", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1/2/3/4/5", + "type": "simple", + "door_number": { + "value": "1/2/3/4/5", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 100-101", + "type": "simple", + "door_number": { + "value": "100-101", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 piso 1", + "floor": "piso 1", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000, piso 1", + "floor": "piso 1", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 piso 1º", + "floor": "piso 1º", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 1000 piso 1°", + "floor": "piso 1°", + "type": "simple", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 211 PB", + "floor": "PB", + "type": "simple", + "door_number": { + "value": "211", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "santa fe 211 PB A", + "floor": "PB A", + "type": "simple", + "door_number": { + "value": "211", + "unit": null + }, + "street_names": [ + "santa fe" + ] + }, + { + "address": "ruta 8 km 123", + "type": "simple", + "door_number": { + "value": "123", + "unit": "km" + }, + "street_names": [ + "ruta 8" + ] + }, + { + "address": "ruta nac.8 km 123", + "type": "simple", + "door_number": { + "value": "123", + "unit": "km" + }, + "street_names": [ + "ruta nac. 8" + ] + }, + { + "address": "ruta 8, km 123", + "type": "simple", + "door_number": { + "value": "123", + "unit": "km" + }, + "street_names": [ + "ruta 8" + ] + }, + { + "address": "ruta 8 km 123.3", + "type": "simple", + "door_number": { + "value": "123.3", + "unit": "km" + }, + "street_names": [ + "ruta 8" + ] + }, + { + "address": "ruta 8 km 123,3", + "type": "simple", + "door_number": { + "value": "123,3", + "unit": "km" + }, + "street_names": [ + "ruta 8" + ] + }, + { + "address": "ruta 8 km 123 1/2", + "type": "simple", + "door_number": { + "value": "123 1/2", + "unit": "km" + }, + "street_names": [ + "ruta 8" + ] + }, + { + "address": "ruta Nº 8 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta Nº 8" + ] + }, + { + "address": "ruta N º 8 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta N º 8" + ] + }, + { + "address": "ruta N° 8 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta N° 8" + ] + }, + { + "address": "ruta N°8 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta N° 8" + ] + }, + { + "address": "rn N° 10 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "rn N° 10" + ] + }, + { + "address": "rn N 10 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "rn N 10" + ] + }, + { + "address": "rn 10 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "rn 10" + ] + }, + { + "address": "ex ruta 11 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ex ruta 11" + ] + }, + { + "address": "acceso ruta 11 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "acceso ruta 11" + ] + }, + { + "address": "colectora oeste ruta 11 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "colectora oeste ruta 11" + ] + }, + { + "address": "ruta nacional N° 9 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta nacional N° 9" + ] + }, + { + "address": "ruta nacional N ° 9 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta nacional N ° 9" + ] + }, + { + "address": "ruta nacional N° 9 - km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta nacional N° 9" + ] + }, + { + "address": "ruta nacional N 9 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta nacional N 9" + ] + }, + { + "address": "ruta nacional nro 9 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "ruta nacional nro 9" + ] + }, + { + "address": "rta provincial n 9 km 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "km" + }, + "street_names": [ + "rta provincial n 9" + ] + }, + { + "address": "rta provincial n 9 kilometro 10", + "type": "simple", + "door_number": { + "value": "10", + "unit": "kilometro" + }, + "street_names": [ + "rta provincial n 9" + ] + }, + { + "address": "cordoba y jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba y 25 de mayo", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "25 de mayo" + ] + }, + { + "address": "cordoba y entre ríos", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "entre ríos" + ] + }, + { + "address": "cordoba y calle 10", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "calle 10" + ] + }, + { + "address": "cordoba e independencia", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "independencia" + ] + }, + { + "address": "cordoba esq jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba esquina jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba esq/ jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba esq/jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba esq. jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba (esq jujuy)", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba (esq. jujuy)", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba (esq.jujuy)", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba (esquina jujuy)", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba, esq jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba, esquina jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba, esq. jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "cordoba, esq.jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "calle 48 bis esq.100", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle 48 bis", + "100" + ] + }, + { + "address": "cordoba (Ex tucuman), esq. jujuy", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "cordoba", + "jujuy" + ] + }, + { + "address": "1 esq. 2", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "1", + "2" + ] + }, + { + "address": "1 y 2", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "1", + "2" + ] + }, + { + "address": "1era junta y 2ndo de mayo", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "1era junta", + "2ndo de mayo" + ] + }, + { + "address": "calle s/n y cordoba", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle s/n", + "cordoba" + ] + }, + { + "address": "av. entre rios y cordoba", + "type": "isct", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. entre rios", + "cordoba" + ] + }, + { + "address": "santa fe 1000 y cordoba", + "type": "isct", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe", + "cordoba" + ] + }, + { + "address": "santa fe 1000 y avenida s/n", + "type": "isct", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "santa fe", + "avenida s/n" + ] + }, + { + "address": "1 y 2 S/N", + "type": "isct", + "door_number": { + "value": "S/N", + "unit": null + }, + "street_names": [ + "1", + "2" + ] + }, + { + "address": "cordoba y 12 de octubre 1000", + "type": "isct", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "cordoba", + "12 de octubre" + ] + }, + { + "address": "ruta 5 y ruta 2 km 8", + "type": "isct", + "door_number": { + "value": "8", + "unit": "km" + }, + "street_names": [ + "ruta 5", + "ruta 2" + ] + }, + { + "address": "santa fe s/n y cordoba", + "type": "isct", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "santa fe", + "cordoba" + ] + }, + { + "address": "calle s/n 3000 y cordoba", + "type": "isct", + "door_number": { + "value": "3000", + "unit": null + }, + "street_names": [ + "calle s/n", + "cordoba" + ] + }, + { + "address": "calle s/n s/n y cordoba", + "type": "isct", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "calle s/n", + "cordoba" + ] + }, + { + "address": "1 3000 y 2", + "type": "isct", + "door_number": { + "value": "3000", + "unit": null + }, + "street_names": [ + "1", + "2" + ] + }, + { + "address": "ruta nacional n° 3 km 3 y tucuman", + "type": "isct", + "door_number": { + "value": "3", + "unit": "km" + }, + "street_names": [ + "ruta nacional n° 3", + "tucuman" + ] + }, + { + "address": "ruta nacional n° 3 (ex ruta 4) km 3 y tucuman", + "type": "isct", + "door_number": { + "value": "3", + "unit": "km" + }, + "street_names": [ + "ruta nacional n° 3", + "tucuman" + ] + }, + { + "address": "entre rios s/n 2 E e independencia", + "floor": "2 E", + "type": "isct", + "door_number": { + "value": "s/n", + "unit": null + }, + "street_names": [ + "entre rios", + "independencia" + ] + }, + { + "address": "salta entre sarmiento y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "sarmiento", + "mitre" + ] + }, + { + "address": "salta entre entre ríos y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "entre ríos", + "mitre" + ] + }, + { + "address": "salta entre sarmiento (EX santa fe) y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "sarmiento", + "mitre" + ] + }, + { + "address": "salta e/ sarmiento y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "sarmiento", + "mitre" + ] + }, + { + "address": "av. entre rios entre salta y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. entre rios", + "salta", + "mitre" + ] + }, + { + "address": "salta e/calles sarmiento y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "sarmiento", + "mitre" + ] + }, + { + "address": "salta, e/calles sarmiento y mitre", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "salta", + "sarmiento", + "mitre" + ] + }, + { + "address": "10, e/calles 11 y 12", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "10", + "11", + "12" + ] + }, + { + "address": "calle 10, e/calles 11 y 12", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "calle 10", + "11", + "12" + ] + }, + { + "address": "10, e/ 11 y 12", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "10", + "11", + "12" + ] + }, + { + "address": "10, e/ 11 y 9 de julio", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "10", + "11", + "9 de julio" + ] + }, + { + "address": "9 de julio, entre 11 y 12", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "9 de julio", + "11", + "12" + ] + }, + { + "address": "9 de julio, entre 11 e independencia", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "9 de julio", + "11", + "independencia" + ] + }, + { + "address": "9 de julio e/ 25 de mayo y tucuman", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "9 de julio", + "25 de mayo", + "tucuman" + ] + }, + { + "address": "av.1 entre 2 y 3", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "av. 1", + "2", + "3" + ] + }, + { + "address": "1 entre 2 y 3", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "1", + "2", + "3" + ] + }, + { + "address": "ruta N°3 entre ruta N° 4 y ruta nacional nro 33", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta N° 3", + "ruta N° 4", + "ruta nacional nro 33" + ] + }, + { + "address": "ruta N°3 entre cordoba y tucuman", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "ruta N° 3", + "cordoba", + "tucuman" + ] + }, + { + "address": "mitre e/cordoba y tucuman", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "mitre", + "cordoba", + "tucuman" + ] + }, + { + "address": "29 e/1 y 9 de julio", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "29", + "1", + "9 de julio" + ] + }, + { + "address": "29 e/1 (ex 2) y 9", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "29", + "1", + "9" + ] + }, + { + "address": "29 e/ruta n°2 (ex ruta n°3) y 9", + "type": "btwn", + "door_number": { + "value": null, + "unit": null + }, + "street_names": [ + "29", + "ruta n° 2", + "9" + ] + }, + { + "address": "tucuman 100 entre cordoba y salta", + "type": "btwn", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "tucuman", + "cordoba", + "salta" + ] + }, + { + "address": "tucuman s/numero entre cordoba y salta", + "type": "btwn", + "door_number": { + "value": "s/numero", + "unit": null + }, + "street_names": [ + "tucuman", + "cordoba", + "salta" + ] + }, + { + "address": "tucuman 100 entre 10 y 11", + "type": "btwn", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "tucuman", + "10", + "11" + ] + }, + { + "address": "tucuman 100, entre 10 y 11", + "type": "btwn", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "tucuman", + "10", + "11" + ] + }, + { + "address": "tucuman 100 entre 10 y 11 bis", + "type": "btwn", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "tucuman", + "10", + "11 bis" + ] + }, + { + "address": "av. 7 n° 1000 entre calles 10 y 11", + "type": "btwn", + "door_number": { + "value": "1000", + "unit": "n°" + }, + "street_names": [ + "av. 7", + "10", + "11" + ] + }, + { + "address": "44 1000 entre 10 y 11", + "type": "btwn", + "door_number": { + "value": "1000", + "unit": null + }, + "street_names": [ + "44", + "10", + "11" + ] + }, + { + "address": "tucuman 100 e/ruta n°2 (ex ruta n°3) y 9", + "type": "btwn", + "door_number": { + "value": "100", + "unit": null + }, + "street_names": [ + "tucuman", + "ruta n° 2", + "9" + ] + }, + { + "address": "ruta nac. nro. 11 km 100 e/calles 10 y 11", + "type": "btwn", + "door_number": { + "value": "100", + "unit": "km" + }, + "street_names": [ + "ruta nac. nro. 11", + "10", + "11" + ] + }, + { + "address": "ruta nac. nro. 11 km 12 1/2 e/calles 10 y calle sn", + "type": "btwn", + "door_number": { + "value": "12 1/2", + "unit": "km" + }, + "street_names": [ + "ruta nac. nro. 11", + "10", + "calle sn" + ] + }, + { + "address": "ruta nac. nro. 11 (ex 12) - km 12 1/2 e/calles 10 y calle sn", + "type": "btwn", + "door_number": { + "value": "12 1/2", + "unit": "km" + }, + "street_names": [ + "ruta nac. nro. 11", + "10", + "calle sn" + ] + }, + { + "address": "vicente lopez y planes N°2000 2ndo E e/calle nro. 42 y entre ríos (ex cordoba)", + "type": "btwn", + "door_number": { + "value": "2000", + "unit": "N°" + }, + "floor": "2ndo E", + "street_names": [ + "vicente lopez y planes", + "calle nro. 42", + "entre ríos" + ] + } +] diff --git a/tests/test_georef_ar_address.py b/tests/test_georef_ar_address.py new file mode 100644 index 0000000..e7ca644 --- /dev/null +++ b/tests/test_georef_ar_address.py @@ -0,0 +1,57 @@ +import json +import os +from unittest import TestCase +from georef_ar_address import AddressParser, ADDRESS_DATA_TEMPLATE + + +def test_file_path(filename): + return os.path.join(os.path.dirname(__file__), filename) + + +class BaseClasses: + class AddressParserTest(TestCase): + @classmethod + def setUpClass(cls): + cls._parser = AddressParser(cache={}) + + with open(cls._test_file) as f: # pylint: disable=no-member + cls._test_cases = json.load(f) + + assert cls._test_cases + + def test_none_cases(self): + self.assert_address_cases('none') + + def test_simple_cases(self): + self.assert_address_cases('simple') + + def test_isct_cases(self): + self.assert_address_cases('isct') + + def test_btwn_cases(self): + self.assert_address_cases('btwn') + + def assert_address_cases(self, address_type): + test_cases = [ + test_case for test_case in self._test_cases + if test_case['type'] == address_type + ] + + for test_case in test_cases: + for key in ADDRESS_DATA_TEMPLATE: + if key not in test_case: + test_case[key] = ADDRESS_DATA_TEMPLATE[key] + + self.assert_address_data(test_case['address'], test_case) + + def assert_address_data(self, address, data): + parsed = self._parser.parse(address) + self.assertDictEqual(parsed, data) + + +class MockAddressParserTest(BaseClasses.AddressParserTest): + _test_file = test_file_path('test_cases.json') + + +class RealAddressParserTest(BaseClasses.AddressParserTest): + _test_file = test_file_path('real_cases.json')