diff --git a/Dockerfile b/Dockerfile index 3e2bbbbf9..25efd3d76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Using Ubuntu base for access to GDAL PPA -FROM ubuntu:22.04 +FROM public.ecr.aws/ubuntu/ubuntu:22.04 WORKDIR /smbackend # tzdata installation requires settings frontend diff --git a/requirements-dev.txt b/requirements-dev.txt index c5ffa75c8..b034df310 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile requirements-dev.in # asttokens==2.4.1 # via stack-data -certifi==2024.2.2 +certifi==2024.6.2 # via requests cffi==1.16.0 # via @@ -14,58 +14,56 @@ cffi==1.16.0 # pynacl charset-normalizer==3.3.2 # via requests -cryptography==42.0.3 +cryptography==42.0.8 # via pyjwt decorator==5.1.1 # via ipython deprecated==1.2.14 # via pygithub -exceptiongroup==1.2.0 - # via ipython executing==2.0.1 # via stack-data -idna==3.6 +idna==3.7 # via requests -ipython==8.21.0 +ipython==8.25.0 # via -r requirements-dev.in jedi==0.19.1 # via ipython -matplotlib-inline==0.1.6 +matplotlib-inline==0.1.7 # via ipython -parso==0.8.3 +parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.46 # via ipython ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 # via stack-data -pycparser==2.21 +pycparser==2.22 # via cffi -pygithub==2.2.0 +pygithub==2.3.0 # via -r requirements-dev.in -pygments==2.17.2 +pygments==2.18.0 # via ipython pyjwt[crypto]==2.8.0 - # via - # pygithub - # pyjwt + # via pygithub pynacl==1.5.0 # via pygithub -requests==2.31.0 +requests==2.32.3 # via pygithub six==1.16.0 # via asttokens stack-data==0.6.3 # via ipython -traitlets==5.14.1 +traitlets==5.14.3 # via # ipython # matplotlib-inline -typing-extensions==4.9.0 - # via pygithub +typing-extensions==4.12.1 + # via + # ipython + # pygithub urllib3==1.26.18 # via # -r requirements-dev.in diff --git a/requirements.in b/requirements.in index 916de754a..bc115a755 100644 --- a/requirements.in +++ b/requirements.in @@ -6,7 +6,7 @@ django-modeltranslation flake8 requests requests_cache -git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.82#egg=django-munigeo +git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.86#egg=django-munigeo pytz django-cors-headers django-extensions diff --git a/requirements.txt b/requirements.txt index 169356820..130ee0fbb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,25 +4,25 @@ # # pip-compile requirements.in # -asgiref==3.7.2 +asgiref==3.8.1 # via # django # django-cors-headers -attrs==23.2.0 +attrs==24.2.0 # via # cattrs # jsonschema # referencing # requests-cache -black==24.2.0 +black==24.8.0 # via -r requirements.in -bmi-arcgis-restapi==2.4.7 +bmi-arcgis-restapi==2.4.9 # via -r requirements.in -build==1.0.3 +build==1.2.1 # via pip-tools cattrs==23.2.3 # via requests-cache -certifi==2024.2.2 +certifi==2024.7.4 # via # requests # sentry-sdk @@ -32,11 +32,9 @@ click==8.1.7 # via # black # pip-tools -coverage[toml]==7.4.1 - # via - # coverage - # pytest-cov -django==5.0.2 +coverage[toml]==7.6.1 + # via pytest-cov +django==5.1 # via # -r requirements.in # django-cors-headers @@ -48,17 +46,17 @@ django==5.0.2 # django-polymorphic # djangorestframework # drf-spectacular -django-cors-headers==4.3.1 +django-cors-headers==4.4.0 # via -r requirements.in django-environ==0.11.2 # via -r requirements.in django-extensions==3.2.3 # via -r requirements.in -django-filter==23.5 +django-filter==24.3 # via -r requirements.in django-js-asset==2.2.0 # via django-mptt -django-modeltranslation==0.18.11 +django-modeltranslation==0.19.7 # via # -r requirements.in # django-munigeo @@ -66,23 +64,23 @@ django-mptt==0.16.0 # via # -r requirements.in # django-munigeo -django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.82 +django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.86 # via -r requirements.in django-polymorphic==3.1.0 # via -r requirements.in -djangorestframework==3.14.0 +djangorestframework==3.15.2 # via # -r requirements.in # drf-spectacular djangorestframework-jsonp==1.0.2 # via -r requirements.in -drf-spectacular==0.27.1 +drf-spectacular==0.27.2 # via -r requirements.in -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via # cattrs # pytest -flake8==7.0.0 +flake8==7.1.1 # via # -r requirements.in # pep8-naming @@ -90,7 +88,7 @@ geographiclib==2.0 # via geopy geopy==2.4.1 # via -r requirements.in -idna==3.6 +idna==3.7 # via requests inflection==0.5.1 # via drf-spectacular @@ -100,13 +98,13 @@ isort==5.13.2 # via -r requirements.in jedi==0.19.1 # via -r requirements.in -jsonschema==4.21.1 +jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2023.12.1 # via jsonschema libvoikko==4.3 # via -r requirements.in -lxml==5.1.0 +lxml==5.3.0 # via -r requirements.in mccabe==0.7.0 # via flake8 @@ -114,83 +112,80 @@ munch==4.0.0 # via bmi-arcgis-restapi mypy-extensions==1.0.0 # via black -packaging==23.2 +packaging==24.1 # via # black # build # pytest -parso==0.8.3 +parso==0.8.4 # via # -r requirements.in # jedi pathspec==0.12.1 # via black -pep8-naming==0.13.3 +pep8-naming==0.14.1 # via -r requirements.in -pip-tools==7.4.0 +pip-tools==7.4.1 # via -r requirements.in -platformdirs==4.2.0 +platformdirs==4.2.2 # via # black # requests-cache -pluggy==1.4.0 +pluggy==1.5.0 # via pytest psycopg2==2.9.9 # via -r requirements.in -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via flake8 pyflakes==3.2.0 # via flake8 -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via # build # pip-tools -pytest==8.0.1 +pytest==8.3.2 # via # pytest-cov # pytest-django -pytest-cov==4.1.0 +pytest-cov==5.0.0 # via -r requirements.in pytest-django==4.8.0 # via -r requirements.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via -r requirements.in pytz==2024.1 - # via - # -r requirements.in - # djangorestframework -pyyaml==6.0.1 + # via -r requirements.in +pyyaml==6.0.2 # via # django-munigeo # drf-spectacular -referencing==0.33.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -requests==2.31.0 +requests==2.32.3 # via # -r requirements.in # bmi-arcgis-restapi # django-munigeo # requests-cache # requests-mock -requests-cache==1.2.0 +requests-cache==1.2.1 # via -r requirements.in -requests-mock==1.11.0 +requests-mock==1.12.1 # via -r requirements.in -rpds-py==0.18.0 +rpds-py==0.20.0 # via # jsonschema # referencing -sentry-sdk==1.40.5 +sentry-sdk==2.13.0 # via -r requirements.in six==1.16.0 # via # django-munigeo # python-dateutil - # requests-mock # url-normalize -sqlparse==0.4.4 +sqlparse==0.5.1 # via django tomli==2.0.1 # via @@ -198,11 +193,10 @@ tomli==2.0.1 # build # coverage # pip-tools - # pyproject-hooks # pytest -tqdm==4.66.2 +tqdm==4.66.5 # via -r requirements.in -typing-extensions==4.9.0 +typing-extensions==4.12.2 # via # asgiref # black @@ -214,16 +208,16 @@ uritemplate==4.1.1 # via drf-spectacular url-normalize==1.4.3 # via requests-cache -urllib3==1.26.18 +urllib3==1.26.19 # via # -r requirements.in # bmi-arcgis-restapi # requests # requests-cache # sentry-sdk -wheel==0.42.0 +wheel==0.44.0 # via pip-tools -whitenoise==6.6.0 +whitenoise==6.7.0 # via -r requirements.in # The following packages are considered to be unsafe in a requirements file: diff --git a/services/api.py b/services/api.py index bd7e3c6d0..f45a26f4f 100644 --- a/services/api.py +++ b/services/api.py @@ -140,7 +140,7 @@ def __init__(self, *args, **kwargs): self.translated_fields = [] return - self.translated_fields = trans_opts.fields.keys() + self.translated_fields = trans_opts.all_fields.keys() # Remove the pre-existing data in the bundle. for field_name in self.translated_fields: for lang in LANGUAGES: diff --git a/services/fixtures/exclusion_words.json b/services/fixtures/exclusion_words.json new file mode 100644 index 000000000..dd36151f8 --- /dev/null +++ b/services/fixtures/exclusion_words.json @@ -0,0 +1,18 @@ +[ + { + "model": "services.exclusionword", + "pk": 1, + "fields": { + "word": "katu", + "language_short": "fi" + } + }, + { + "model": "services.exclusionword", + "pk": 2, + "fields": { + "word": "tie", + "language_short": "fi" + } + } +] \ No newline at end of file diff --git a/services/management/commands/index_search_columns.py b/services/management/commands/index_search_columns.py index 11f9820c0..85fc05ba8 100644 --- a/services/management/commands/index_search_columns.py +++ b/services/management/commands/index_search_columns.py @@ -1,11 +1,14 @@ import logging +from datetime import datetime, timedelta from django.contrib.postgres.search import SearchVector from django.core.management.base import BaseCommand +from django.utils import timezone from munigeo.models import Address, AdministrativeDivision from services.models import Service, ServiceNode, Unit -from services.search.utils import hyphenate +from services.search.constants import HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS +from services.search.utils import get_foreign_key_attr, hyphenate logger = logging.getLogger("services.management") @@ -27,17 +30,29 @@ def get_search_column(model, lang): return search_column -def generate_syllables(model): +def generate_syllables( + model, hyphenate_all_addresses=False, hyphenate_addresses_from=None +): """ Generates syllables for the given model. """ # Disable sending of signals model._meta.auto_created = True + save_kwargs = {} num_populated = 0 - for row in model.objects.all(): + if model.__name__ == "Address" and not hyphenate_all_addresses: + save_kwargs["skip_modified_at"] = True + if not hyphenate_addresses_from: + hyphenate_addresses_from = Address.objects.latest( + "modified_at" + ).modified_at - timedelta(days=HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS) + qs = model.objects.filter(modified_at__gte=hyphenate_addresses_from) + else: + qs = model.objects.all() + for row in qs.iterator(chunk_size=10000): row.syllables_fi = [] for column in model.get_syllable_fi_columns(): - row_content = getattr(row, column, None) + row_content = get_foreign_key_attr(row, column) if row_content: # Rows might be of type str or Array, if str # cast to array by splitting. @@ -45,9 +60,10 @@ def generate_syllables(model): row_content = row_content.split() for word in row_content: syllables = hyphenate(word) - for s in syllables: - row.syllables_fi.append(s) - row.save() + if len(syllables) > 1: + for s in syllables: + row.syllables_fi.append(s) + row.save(**save_kwargs) num_populated += 1 # Enable sending of signals model._meta.auto_created = False @@ -85,13 +101,43 @@ def index_servicenodes(lang): class Command(BaseCommand): - def handle(self, *args, **kwargs): + def add_arguments(self, parser): + parser.add_argument( + "--hyphenate_addresses_from", + nargs="?", + type=str, + help="Hyphenate addresses whose modified_at timestamp starts at given timestamp YYYY-MM-DDTHH:MM:SS", + ) + + parser.add_argument( + "--hyphenate_all_addresses", + action="store_true", + help="Hyphenate all addresses", + ) + + def handle(self, *args, **options): + hyphenate_all_addresses = options.get("hyphenate_all_addresses", False) + hyphenate_addresses_from = options.get("hyphenate_addresses_from", None) + + if hyphenate_addresses_from: + try: + hyphenate_addresses_from = timezone.make_aware( + datetime.strptime(hyphenate_addresses_from, "%Y-%m-%dT%H:%M:%S") + ) + except ValueError as err: + raise ValueError(err) for lang in ["fi", "sv", "en"]: key = "search_column_%s" % lang # Only generate syllables for the finnish language if lang == "fi": logger.info(f"Generating syllables for language: {lang}.") logger.info(f"Syllables generated for {generate_syllables(Unit)} Units") + num_populated = generate_syllables( + Address, + hyphenate_all_addresses=hyphenate_all_addresses, + hyphenate_addresses_from=hyphenate_addresses_from, + ) + logger.info(f"Syllables generated for {num_populated} Addresses") logger.info( f"Syllables generated for {generate_syllables(Service)} Services" ) diff --git a/services/management/commands/lipas_import_3d.py b/services/management/commands/lipas_import_3d.py index b764efa04..e74e3c497 100644 --- a/services/management/commands/lipas_import_3d.py +++ b/services/management/commands/lipas_import_3d.py @@ -30,7 +30,7 @@ def _save_geometries(self, geometries, units_by_lipas_id): unit.geometry_3d = geometry unit.save() else: - logger.error( + logger.warning( f"Failed to save unit {unit.name_fi} because of a missing z coordinate.", ) diff --git a/services/management/commands/services_import/departments.py b/services/management/commands/services_import/departments.py index 2c0e8a831..0448a1d80 100644 --- a/services/management/commands/services_import/departments.py +++ b/services/management/commands/services_import/departments.py @@ -48,7 +48,7 @@ def import_departments(noop=False, logger=None, fetch_resource=pk_get): parent = Department.objects.get(uuid=parent_id) obj.parent_id = parent.id except Department.DoesNotExist: - logger and logger.error( + logger and logger.warning( "Department import: no parent with uuid {} found for {}".format( parent_id, d["id"] ) diff --git a/services/migrations/0117_exclusionword.py b/services/migrations/0117_exclusionword.py new file mode 100644 index 000000000..4fae5f0ad --- /dev/null +++ b/services/migrations/0117_exclusionword.py @@ -0,0 +1,37 @@ +# Generated by Django 5.0.6 on 2024-05-20 10:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("services", "0116_alter_unit_address_postal_full_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="ExclusionWord", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("word", models.CharField(max_length=100, verbose_name="Word")), + ( + "language_short", + models.CharField(max_length=2, verbose_name="Language short"), + ), + ], + options={ + "verbose_name": "Exclusion word", + "verbose_name_plural": "Exclusion words", + "ordering": ["-id"], + }, + ), + ] diff --git a/services/models/__init__.py b/services/models/__init__.py index 59683c18b..2b74047de 100644 --- a/services/models/__init__.py +++ b/services/models/__init__.py @@ -4,7 +4,7 @@ from .keyword import Keyword from .mobility import MobilityServiceNode from .notification import Announcement, ErrorMessage -from .search_rule import ExclusionRule +from .search_rule import ExclusionRule, ExclusionWord from .service import Service, UnitServiceDetails from .service_mapping import ServiceMapping from .service_node import ServiceNode diff --git a/services/models/search_rule.py b/services/models/search_rule.py index 78c9c32b9..e1f0d8fe1 100644 --- a/services/models/search_rule.py +++ b/services/models/search_rule.py @@ -13,3 +13,16 @@ class Meta: def __str__(self): return "%s : %s" % (self.word, self.exclusion) + + +class ExclusionWord(models.Model): + word = models.CharField(max_length=100, verbose_name=_("Word")) + language_short = models.CharField(max_length=2, verbose_name=_("Language short")) + + class Meta: + ordering = ["-id"] + verbose_name = _("Exclusion word") + verbose_name_plural = _("Exclusion words") + + def __str__(self): + return self.word diff --git a/services/search/api.py b/services/search/api.py index 3d5db1548..9d9bd1bf2 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -27,9 +27,10 @@ from drf_spectacular.utils import extend_schema, OpenApiParameter from munigeo import api as munigeo_api from munigeo.models import Address, AdministrativeDivision -from rest_framework import serializers +from rest_framework import serializers, status from rest_framework.exceptions import ParseError from rest_framework.generics import GenericAPIView +from rest_framework.response import Response from services.api import ( TranslatedModelSerializer, @@ -60,6 +61,7 @@ get_preserved_order, get_service_node_results, get_trigram_results, + has_exclusion_word_in_query, set_address_fields, set_service_node_unit_count, set_service_unit_count, @@ -166,8 +168,12 @@ def to_representation(self, obj): if self.context["geometry"]: if hasattr(obj, "geometry"): + if isinstance(obj, AdministrativeDivision): + geometry = obj.geometry.boundary + else: + geometry = obj.geometry representation["geometry"] = munigeo_api.geom_to_json( - obj.geometry, DEFAULT_SRS + geometry, DEFAULT_SRS ) else: representation["geometry"] = None @@ -191,6 +197,12 @@ def to_representation(self, obj): representation["connections"] = UnitConnectionSerializer( obj.connections, many=True ).data + elif "department" in include_field: + representation["department"] = DepartmentSerializer( + obj.department + ).data + elif "municipality" in include_field: + representation["municipality"] = obj.municipality.id else: if hasattr(obj, include_field): representation[include_field] = getattr( @@ -454,6 +466,10 @@ def get(self, request): config_language = LANGUAGES[language_short] search_query_str = None # Used in the raw sql + # Replace multiple consecutive vertical bars with a single vertical bar to be used as an OR operator. + q_val = re.sub(r"\|+", "|", q_val) + # Remove vertical bars that are not between words to avoid errors in the query. + q_val = re.sub(r"(?, "street__name_fi" + ) + """ + fields = field.split("__") + if len(fields) == 1: + return getattr(obj, fields[0], None) + else: + first_field = fields[0] + remaining_fields = "__".join(fields[1:]) + return get_foreign_key_attr(getattr(obj, first_field), remaining_fields) + + def is_compound_word(word): result = voikko.analyze(word) if len(result) == 0: @@ -21,7 +48,7 @@ def is_compound_word(word): def hyphenate(word): """ - Returns a list of syllables of the word if it is a compound word. + Returns a list of syllables of the word, if it is a compound word. """ word = word.strip() if is_compound_word(word): @@ -199,15 +226,42 @@ def get_preserved_order(ids): def get_trigram_results( model, model_name, field, q_val, threshold=DEFAULT_TRIGRAM_THRESHOLD ): - sql = f"""SELECT id, similarity({field}, '{q_val}') AS sml + sql = f"""SELECT id, similarity({field}, %s) AS sml FROM {model_name} - WHERE similarity({field}, '{q_val}') >= {threshold} + WHERE similarity({field}, %s) >= {threshold} ORDER BY sml DESC; """ cursor = connection.cursor() - cursor.execute(sql) + try: + cursor.execute(sql, [q_val, q_val]) + except Exception as e: + logger.error(f"Error in similarity query: {e}") + raise ParseError("Similarity query failed.") all_results = cursor.fetchall() - ids = [row[0] for row in all_results] objs = model.objects.filter(id__in=ids) return objs + + +def get_search_exclusions(q): + """ + To add/modify search exclusion rules edit: services/fixtures/exclusion_rules + To import rules: ./manage.py loaddata services/fixtures/exclusion_rules.json + """ + rule = ExclusionRule.objects.filter(word__iexact=q).first() + if rule: + return rule.exclusion + return "" + + +def has_exclusion_word_in_query(q_vals, language_short): + """ + To add/modify search exclusion words edit: services/fixtures/exclusion_words.json + To import words: ./manage.py loaddata services/fixtures/exclusion_words.json + """ + return ( + ExclusionWord.objects.filter(language_short=language_short) + .annotate(word_lower=Lower("word")) + .filter(word_lower__in=[q.lower() for q in q_vals]) + .exists() + ) diff --git a/setup.py b/setup.py index d1d4b408c..a3572bd37 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="smbackend", - version="240507", + version="240508", license="AGPLv3", packages=find_packages(), include_package_data=True,