diff --git a/integreat_cms/cms/forms/regions/region_form.py b/integreat_cms/cms/forms/regions/region_form.py index b3a29df3f3..8850265fa4 100644 --- a/integreat_cms/cms/forms/regions/region_form.py +++ b/integreat_cms/cms/forms/regions/region_form.py @@ -24,7 +24,7 @@ from ....nominatim_api.nominatim_api_client import NominatimApiClient from ...constants import duplicate_pbo_behaviors, status from ...models import LanguageTreeNode, OfferTemplate, Page, PageTranslation, Region -from ...models.regions.region import format_mt_help_text +from ...models.regions.region import format_mt_help_text, format_summ_ai_help_text from ...utils.slug_utils import generate_unique_slug_helper from ...utils.translation_utils import gettext_many_lazy as __ from ..custom_model_form import CustomModelForm @@ -131,6 +131,17 @@ class RegionForm(CustomModelForm): required=False, ) + summ_ai_midyear_start_enabled = forms.BooleanField( + required=False, + label=_("Budget year start differs from the renewal date"), + help_text=__( + _("Enable to set starting date differing from the renewal date."), + format_summ_ai_help_text( + _("Budget will be set as a monthly fraction of {} credits") + ), + ), + ) + mt_midyear_start_enabled = forms.BooleanField( required=False, label=_("Budget year start differs from the renewal date"), @@ -194,6 +205,9 @@ class Meta: "timezone", "fallback_translations_enabled", "summ_ai_enabled", + "summ_ai_renewal_month", + "summ_ai_midyear_start_enabled", + "summ_ai_midyear_start_month", "hix_enabled", "mt_renewal_month", "mt_addon_booked", @@ -231,6 +245,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.fields["summ_ai_enabled"].disabled = True if not settings.TEXTLAB_API_ENABLED and not self.instance.hix_enabled: self.fields["hix_enabled"].disabled = True + self.fields["summ_ai_midyear_start_enabled"].initial = ( + self.instance.summ_ai_midyear_start_month is not None + ) self.fields["mt_midyear_start_enabled"].initial = ( self.instance.mt_midyear_start_month is not None ) @@ -348,7 +365,25 @@ def clean(self) -> dict[str, Any]: else: cleaned_data["matomo_id"] = None - # If MT budget year differs from renewal date is set, make sure a budget year start date is set + # If Summ AI budget year differs from the set renewal date, make sure a budget year start date is set + if ( + cleaned_data["summ_ai_midyear_start_enabled"] + and cleaned_data["summ_ai_midyear_start_month"] is None + ): + self.add_error( + "summ_ai_midyear_start_month", + _( + "Please provide a valid budget year start date for simplified language translation." + ), + ) + elif ( + not cleaned_data["summ_ai_midyear_start_enabled"] + or cleaned_data["summ_ai_midyear_start_month"] + == cleaned_data["summ_ai_renewal_month"] + ): + cleaned_data["summ_ai_midyear_start_month"] = None + + # If MT budget year differs from the set renewal date, make sure a budget year start date is set if ( cleaned_data["mt_midyear_start_enabled"] and cleaned_data["mt_midyear_start_month"] is None diff --git a/integreat_cms/cms/migrations/0103_region_summ_ai_budget_used_and_more.py b/integreat_cms/cms/migrations/0103_region_summ_ai_budget_used_and_more.py new file mode 100644 index 0000000000..4e0d28120e --- /dev/null +++ b/integreat_cms/cms/migrations/0103_region_summ_ai_budget_used_and_more.py @@ -0,0 +1,69 @@ +# Generated by Django 4.2.13 on 2024-08-22 10:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + """ + Add fields for tracking SUMM.AI budget + """ + + dependencies = [ + ("cms", "0102_alter_contact_poi"), + ] + + operations = [ + migrations.AddField( + model_name="region", + name="summ_ai_budget_used", + field=models.PositiveIntegerField(default=0, verbose_name="used budget"), + ), + migrations.AddField( + model_name="region", + name="summ_ai_midyear_start_month", + field=models.PositiveIntegerField( + blank=True, + choices=[ + (0, "January"), + (1, "February"), + (2, "March"), + (3, "April"), + (4, "May"), + (5, "June"), + (6, "July"), + (7, "August"), + (8, "September"), + (9, "October"), + (10, "November"), + (11, "December"), + ], + default=None, + help_text="Month from which SUMM.AI was booked", + null=True, + verbose_name="Budget year start date for simplified language translation", + ), + ), + migrations.AddField( + model_name="region", + name="summ_ai_renewal_month", + field=models.PositiveIntegerField( + choices=[ + (0, "January"), + (1, "February"), + (2, "March"), + (3, "April"), + (4, "May"), + (5, "June"), + (6, "July"), + (7, "August"), + (8, "September"), + (9, "October"), + (10, "November"), + (11, "December"), + ], + default=0, + help_text="Budget usage will be reset on the 1st of the month", + verbose_name="Credits renewal date for simplified language translation", + ), + ), + ] diff --git a/integreat_cms/cms/models/regions/region.py b/integreat_cms/cms/models/regions/region.py index aa1bb13f78..2be0b7bf04 100644 --- a/integreat_cms/cms/models/regions/region.py +++ b/integreat_cms/cms/models/regions/region.py @@ -46,6 +46,19 @@ logger = logging.getLogger(__name__) +@keep_lazy_text +def format_summ_ai_help_text(help_text: Promise) -> str: + """ + Helper function to lazily format help text with number separators + + :param help_text: MT field help text to format + :return: formatted help text + """ + return help_text.format( + floatformat(settings.SUMM_AI_CREDITS, "g"), + ) + + @keep_lazy_text def format_mt_help_text(help_text: Promise) -> str: """ @@ -358,6 +371,27 @@ class Region(AbstractBaseModel): ), ) + summ_ai_midyear_start_month = models.PositiveIntegerField( + default=None, + blank=True, + null=True, + choices=months.CHOICES, + verbose_name=_("Budget year start date for simplified language translation"), + help_text=_("Month from which SUMM.AI was booked"), + ) + + summ_ai_renewal_month = models.PositiveIntegerField( + choices=months.CHOICES, + default=months.JANUARY, + verbose_name=_("Credits renewal date for simplified language translation"), + help_text=_("Budget usage will be reset on the 1st of the month"), + ) + + summ_ai_budget_used = models.PositiveIntegerField( + default=0, + verbose_name=_("used budget"), + ) + mt_renewal_month = models.PositiveIntegerField( choices=months.CHOICES, default=months.JANUARY, @@ -805,6 +839,25 @@ def mt_budget(self) -> int: multiplier = (months_difference % 12) / 12 return int(multiplier * settings.MT_CREDITS_ADDON + settings.MT_CREDITS_FREE) + @property + def summ_ai_budget(self) -> int: + """ + Calculate the maximum translation credit budget (number of words) for simplified translations + + :return: The region's total budget for simplified translations + """ + # All regions which did book the add-on, but not mid-year, get the add-on credits + if not self.summ_ai_midyear_start_month: + return settings.SUMM_AI_CREDITS + # All regions which booked the add-on in mid-year get a fraction of the add-on credits + # Calculate how many months lie between the renewal month and the start month of the add-on + months_difference = ( + self.summ_ai_renewal_month - self.summ_ai_midyear_start_month + ) + # Calculate the available fraction of the add-on + multiplier = (months_difference % 12) / 12 + return int(multiplier * settings.SUMM_AI_CREDITS) + @property def mt_budget_remaining(self) -> int: """ @@ -814,6 +867,15 @@ def mt_budget_remaining(self) -> int: """ return max(0, self.mt_budget - self.mt_budget_used) + @property + def summ_ai_budget_remaining(self) -> int: + """ + Calculate the remaining translation credit budget (number of words) for simplified translations + + :return: The region's remaining budget for simplified translations + """ + return max(0, self.summ_ai_budget - self.summ_ai_budget_used) + @cached_property def backend_edit_link(self) -> str: """ diff --git a/integreat_cms/cms/templates/regions/region_form.html b/integreat_cms/cms/templates/regions/region_form.html index e26eee7cb2..9aea81626f 100644 --- a/integreat_cms/cms/templates/regions/region_form.html +++ b/integreat_cms/cms/templates/regions/region_form.html @@ -275,7 +275,9 @@

- + {% render_field form.summ_ai_enabled class+="inline-block" %}
{% endif %} + {% if form.instance.id %} +
+
+ +

+ {{ form.instance.summ_ai_budget|intcomma }} +

+
+
+ +

+ {{ form.instance.summ_ai_budget_used|intcomma }} +

+
+
+ +

+ {{ form.instance.summ_ai_budget_remaining|intcomma }} +

+
+
+ {% endif %} + + {% render_field form.summ_ai_renewal_month %} +
+ {{ form.summ_ai_renewal_month.help_text }} +
+
+ {% render_field form.summ_ai_midyear_start_enabled %} + +
+ {{ form.summ_ai_midyear_start_enabled.help_text }} +
+
+ + {% render_field form.summ_ai_midyear_start_month %} +
+ {{ form.summ_ai_midyear_start_month.help_text }} +
+
+
diff --git a/integreat_cms/core/settings.py b/integreat_cms/core/settings.py index 9403a5e0a9..d15da0cc07 100644 --- a/integreat_cms/core/settings.py +++ b/integreat_cms/core/settings.py @@ -979,6 +979,9 @@ #: This is ``True`` if SUMM_AI_API_KEY is set, ``False`` otherwise. SUMM_AI_ENABLED: bool = bool(SUMM_AI_API_KEY) +#: An integer specifying the number of translation credits for simplified translations that can be bought as an add-on +SUMM_AI_CREDITS: Final[int] = int(os.environ.get("SUMM_AI_CREDITS", 10_000)) + #: Whether requests to the SUMM.AI are done with the ``is_test`` flag SUMM_AI_TEST_MODE: Final[bool] = strtobool( os.environ.get("INTEGREAT_CMS_SUMM_AI_TEST_MODE", str(DEBUG)) @@ -1041,6 +1044,13 @@ ).splitlines() ] +#: A floating point that specifies the percentage of SUMM_AI_CREDITS used as a soft margin +SUMM_AI_SOFT_MARGIN_FRACTION: Final[float] = float( + os.environ.get("INTEGREAT_CMS_SUMM_AI_SOFT_MARGIN", MT_SOFT_MARGIN_FRACTION) +) + +#: The actual number of words which are used as soft margin +SUMM_AI_SOFT_MARGIN: Final[int] = int(SUMM_AI_SOFT_MARGIN_FRACTION * SUMM_AI_CREDITS) ################ # STATIC FILES # diff --git a/integreat_cms/core/utils/machine_translation_api_client.py b/integreat_cms/core/utils/machine_translation_api_client.py index 58bdcde3a2..ff8b4d4085 100644 --- a/integreat_cms/core/utils/machine_translation_api_client.py +++ b/integreat_cms/core/utils/machine_translation_api_client.py @@ -17,13 +17,11 @@ from ...cms.models import ( Event, - EventTranslation, Page, - PageTranslation, POI, - POITranslation, Region, ) + from ...cms.models.abstract_content_translation import AbstractContentTranslation from .word_count import word_count @@ -79,16 +77,15 @@ def translate_object(self, obj: Event | Page | POI, language_slug: str) -> None: def check_usage( self, region: Region, - source_translation: EventTranslation | (PageTranslation | POITranslation), + source_translation: str | AbstractContentTranslation, ) -> tuple[bool, int]: """ This function checks if the attempted translation would exceed the region's word limit :param region: region for which to check usage :param source_translation: single content object - :return: translation would exceed limit, region budget, attempted translation word count + :return: translation would exceed limit, word count of attempted translation """ - words = word_count(source_translation) # Check if translation would exceed MT usage limit diff --git a/integreat_cms/core/utils/word_count.py b/integreat_cms/core/utils/word_count.py index 6997947561..d07f349e80 100644 --- a/integreat_cms/core/utils/word_count.py +++ b/integreat_cms/core/utils/word_count.py @@ -1,28 +1,31 @@ from __future__ import annotations from html import unescape -from typing import TYPE_CHECKING from django.utils.html import strip_tags -if TYPE_CHECKING: - - from ...cms.models import EventTranslation, PageTranslation, POITranslation +from ...cms.models.abstract_content_translation import AbstractContentTranslation def word_count( - translation: EventTranslation | (PageTranslation | POITranslation), + translation: str | AbstractContentTranslation, ) -> int: """ This function counts the number of words in a content translation """ - attributes = [ - getattr(translation, attr, None) - for attr in ["title", "content", "meta_description"] - ] + if isinstance(translation, AbstractContentTranslation): + attributes = [ + getattr(translation, attr, None) + for attr in ["title", "content", "meta_description"] + ] + + content_to_translate = [ + unescape(strip_tags(attr)) for attr in attributes if attr + ] + content_to_translate_str = " ".join(content_to_translate) + else: + content_to_translate_str = translation - content_to_translate = [unescape(strip_tags(attr)) for attr in attributes if attr] - content_to_translate_str = " ".join(content_to_translate) for char in "-;:,;!?\n": content_to_translate_str = content_to_translate_str.replace(char, " ") diff --git a/integreat_cms/locale/de/LC_MESSAGES/django.po b/integreat_cms/locale/de/LC_MESSAGES/django.po index 48c44ac241..33836c3d46 100644 --- a/integreat_cms/locale/de/LC_MESSAGES/django.po +++ b/integreat_cms/locale/de/LC_MESSAGES/django.po @@ -2261,15 +2261,21 @@ msgid "Budget year start differs from the renewal date" msgstr "Unterjähriger Start des Abrechnungszeitraums" #: cms/forms/regions/region_form.py -msgid "Enable to set an add-on starting date differing from the renewal date." +msgid "Enable to set starting date differing from the renewal date." msgstr "" -"Aktivieren, um ein Startdatum für das Add-On Paket anzugeben, welches sich " -"vom Zurücksetzungsdatum unterscheidet." +"Aktivieren, um ein Startdatum anzugeben, welches sich vom " +"Zurücksetzungsdatum unterscheidet." #: cms/forms/regions/region_form.py msgid "Budget will be set as a monthly fraction of {} credits" msgstr "Das Budget wird monatsanteilig von {} Credits berechnet." +#: cms/forms/regions/region_form.py +msgid "Enable to set an add-on starting date differing from the renewal date." +msgstr "" +"Aktivieren, um ein Startdatum für das Add-On Paket anzugeben, welches sich " +"vom Zurücksetzungsdatum unterscheidet." + #: cms/forms/regions/region_form.py msgid "Zammad forms" msgstr "Zammad Formulare" @@ -2292,6 +2298,14 @@ msgstr "" msgid "The provided access token is invalid." msgstr "Das eingegebene Zugangstoken ist ungültig." +#: cms/forms/regions/region_form.py +msgid "" +"Please provide a valid budget year start date for simplified language " +"translation." +msgstr "" +"Bitte geben Sie ein gültiges Startdatum für den Abrechnungszeitraum der " +"Übersetzung in Einfache Sprache an." + #: cms/forms/regions/region_form.py msgid "" "Please provide a valid budget year start date for foreign language " @@ -4072,13 +4086,31 @@ msgstr "" "Ob automatische Übersetzungen in Leichte Sprache mit SUMM.AI aktiviert sind" #: cms/models/regions/region.py -msgid "Credits renewal date for foreign language translation" -msgstr "Credits Zurücksetzungsdatum für Fremdsprachenübersetzung" +msgid "Budget year start date for simplified language translation" +msgstr "" +"Unterjähriger Start des Abrechnungszeitraums für Übersetzung in Einfache " +"Sprache" + +#: cms/models/regions/region.py +msgid "Month from which SUMM.AI was booked" +msgstr "Monat ab welchem SUMM.AI gebucht wurde" + +#: cms/models/regions/region.py +msgid "Credits renewal date for simplified language translation" +msgstr "Credits Zurücksetzungsdatum für Übersetzung in Einfache Sprache" #: cms/models/regions/region.py msgid "Budget usage will be reset on the 1st of the month" msgstr "Das Budget wird zum 1. des angegebenen Monats zurückgesetzt" +#: cms/models/regions/region.py +msgid "used budget" +msgstr "verbrauchtes Budget" + +#: cms/models/regions/region.py +msgid "Credits renewal date for foreign language translation" +msgstr "Credits Zurücksetzungsdatum für Fremdsprachenübersetzung" + #: cms/models/regions/region.py msgid "Add-on package for foreign languages booked" msgstr "Add-On Paket für Fremdsprachenübersetzung gebucht" @@ -4100,10 +4132,6 @@ msgstr "" msgid "Month from which the add-on package was booked" msgstr "Monat ab welchem das Add-On Paket gebucht wurde" -#: cms/models/regions/region.py -msgid "used budget" -msgstr "verbrauchtes Budget" - #: cms/models/regions/region.py cms/templates/_base.html #: cms/templates/organizations/organization_form.html msgid "Pages" @@ -7822,12 +7850,8 @@ msgid "Features" msgstr "Funktionen" #: cms/templates/regions/region_form.html -msgid "Currently HIX is globally deactivated" -msgstr "Derzeit ist HIX global deaktiviert" - -#: cms/templates/regions/region_form.html -msgid "Foreign language machine translation" -msgstr "Maschinelle Übersetzungen für Fremdsprachen verwalten" +msgid "Simplified language machine translation" +msgstr "Maschinelle Übersetzungen für Einfache Sprache verwalten" #: cms/templates/regions/region_form.html msgid "Current total budget" @@ -7841,6 +7865,14 @@ msgstr "Bereits verbraucht" msgid "Remaining words" msgstr "Verbleibende Wörter" +#: cms/templates/regions/region_form.html +msgid "Currently HIX is globally deactivated" +msgstr "Derzeit ist HIX global deaktiviert" + +#: cms/templates/regions/region_form.html +msgid "Foreign language machine translation" +msgstr "Maschinelle Übersetzungen für Fremdsprachen verwalten" + #: cms/templates/regions/region_form.html msgid "" "An access token for this region exists, but is not shown here for security " @@ -10781,6 +10813,9 @@ msgstr "" #~ "Der Ort kann nicht archiviert werden, da er von einer Veranstaltung " #~ "verwendet wird." +#~ msgid "Add-on package for simplified language booked" +#~ msgstr "Add-On Paket für Übersetzung in Einfache Sprache gebucht" + #~ msgid "\"{} {}\" was not in translation process." #~ msgid_plural "The following \"{}\" were not in translation process: \"{}\"" #~ msgstr[0] "\"{} {}\" war nicht im Überestzungsprozess" diff --git a/integreat_cms/static/src/js/conditional-fields.ts b/integreat_cms/static/src/js/conditional-fields.ts index 03d936edde..767b7ced6f 100644 --- a/integreat_cms/static/src/js/conditional-fields.ts +++ b/integreat_cms/static/src/js/conditional-fields.ts @@ -2,6 +2,8 @@ window.addEventListener("load", () => { // event handler to toggle form fields const toggleables = [ ["id_statistics_enabled", "statistics-toggle-div"], + ["id_summ_ai_enabled", "summ-ai-toggle-div"], + ["id_summ_ai_midyear_start_enabled", "summ-ai-renewal-toggle-div"], ["id_mt_addon_booked", "mt-toggle-div"], ["id_mt_midyear_start_enabled", "mt-renewal-toggle-div"], ["id_automatic_translation", "language-options"], diff --git a/integreat_cms/summ_ai_api/summ_ai_api_client.py b/integreat_cms/summ_ai_api/summ_ai_api_client.py index 1ddd8d97b0..d031c4ee0b 100644 --- a/integreat_cms/summ_ai_api/summ_ai_api_client.py +++ b/integreat_cms/summ_ai_api/summ_ai_api_client.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING import aiohttp +from asgiref.sync import sync_to_async from django.conf import settings from django.contrib import messages from django.utils.translation import gettext_lazy as _ @@ -20,6 +21,7 @@ from ..core.utils.machine_translation_api_client import MachineTranslationApiClient from ..core.utils.machine_translation_provider import MachineTranslationProvider from .utils import ( + BudgetEstimate, HTMLSegment, PatientTaskQueue, SummAiInvalidJSONError, @@ -39,7 +41,11 @@ from django.forms.models import ModelFormMetaclass from django.http import HttpRequest + from ..cms.models.abstract_content_translation import AbstractContentTranslation from ..cms.models.pages.page import Page + from ..cms.models.regions.region import Region + +from ..core.utils.word_count import word_count logger = logging.getLogger(__name__) @@ -69,6 +75,31 @@ def __init__(self, request: HttpRequest, form_class: ModelFormMetaclass) -> None if not self.region.summ_ai_enabled: raise RuntimeError(f"SUMM.AI is disabled in {self.region!r}.") + def check_usage( + self, + region: Region, + source_translation: str | AbstractContentTranslation, + allocated_budget: int = 0, + ) -> tuple[bool, int]: + """ + This function checks if the attempted translation would exceed the region's word limit + + :param region: region for which to check usage + :param source_translation: single content object + :param allocated_budget: how many additional words should be considered already spent + :return: translation would exceed limit, word count of attempted translation + """ + words = word_count(source_translation) + + region.refresh_from_db() + # Allow up to SUMM_AI_SOFT_MARGIN more words than the actual limit + word_count_leeway = max( + 1, words + allocated_budget - settings.SUMM_AI_SOFT_MARGIN + ) + translation_exceeds_limit = region.summ_ai_budget_remaining < word_count_leeway + + return (translation_exceeds_limit, words) + async def translate_text_field( self, session: ClientSession, text_field: TextField ) -> TextField: @@ -107,6 +138,7 @@ async def translate_text_field( # something must have gone wrong. # Raise an exception without immediately catching it! raise SummAiRuntimeError("Field to translate is None or empty") + try: async with session.post( settings.SUMM_AI_API_URL, @@ -138,7 +170,11 @@ async def translate_text_field( text_field.translate(response_data["translated_text"]) # If text is not in response, validate_response() # will raise exceptions - so we don't need an else branch. - except (aiohttp.ClientError, asyncio.TimeoutError, SummAiRuntimeError) as e: + except ( + aiohttp.ClientError, + asyncio.TimeoutError, + SummAiRuntimeError, + ) as e: logger.error( "SUMM.AI translation of %r failed because of %s: %s", text_field, @@ -149,7 +185,7 @@ async def translate_text_field( return text_field async def translate_text_fields( - self, loop: AbstractEventLoop, text_fields: Iterator[TextField] + self, loop: AbstractEventLoop, translation_helpers: list[TranslationHelper] ) -> chain[list[TextField]]: """ Translate a list of text fields from German into Easy German. @@ -158,24 +194,14 @@ async def translate_text_fields( for each entry. :param loop: The asyncio event loop - :param text_fields: The text fields to be translated + :param translation_helpers: The translation helper to be translated :returns: The list of completed text fields """ # Set a custom SUMM.AI timeout timeout = aiohttp.ClientTimeout(total=60 * settings.SUMM_AI_TIMEOUT) + translations = iter(translation_helpers) async with aiohttp.ClientSession(timeout=timeout) as session: - # Create tasks for each text field - tasks = [ - # translate_text_field() gives us a coroutine that can be executed - # asynchronously as a task. If we have to repeat the task - # (e.g. if we run into rate limiting and have to resend the request), - # we need a NEW coroutine object. - # For that case, we need a representation of our function which can be - # evaluated when needed, giving a new coroutine for the task each time. - partial(self.translate_text_field, session, text_field) - for text_field in text_fields - ] # If the translation is aborted, set the exception field # to both signal that this wasn't translated and to display a reason why @@ -186,7 +212,67 @@ def abort_function(task: partial, reason: Any) -> None: field.exception = f"Machine translation aborted: {reason}" # A "patient" task queue which only hands out sleep tasks after a task was reported as failed - task_generator = PatientTaskQueue(tasks, abort_function=abort_function) + task_generator: PatientTaskQueue[partial] = PatientTaskQueue( + [], abort_function=abort_function + ) + + async def manage() -> None: + """ + A management task to schedule more translation tasks, but only if the sum of all text fields + of the whole content translation won't exceed the regions remaining budget. + + The goal is to add all text fields of the next content translation if it fits in the budget, + as long as there are fewer than ``SUMM_AI_MAX_CONCURRENT_REQUESTS`` tasks in the :class:`~integreat_cms.summ_ai_api.utils.PatientTaskQueue` + – but at least one content translation worth of text fields per run (unless there are no more translation objects to try). + Finally, if successful and we were able to add any tasks, schedule another ``manage()`` to repeat this once the workers processed all the items. + + This tries to always keep the queue filled with at least as many tasks as there are workers, + such that there is no situation that a worker attempts to fetch + """ + # Telling the task_generator to stall workers asking for new tasks until we're done adding them + task_generator.more_tasks_pending += 1 + while translation_helper := next(translations, None): + # allocate_budget has to run in a sync context for django/db reasons, hence why it looks this ugly. + # thread_sensitive does not technically need to be changed to False, + # but the way we test this django app would give us a + # RuntimeError: You cannot submit onto CurrentThreadExecutor from its own thread + # otherwise. + if not await sync_to_async( + translation_helper.allocate_budget, thread_sensitive=False + )(): + # This content translation object does not fit in the budget, but maybe the next will be smaller? + continue + + # Create tasks for each text field + task_generator.extend( + [ + # translate_text_field() gives us a coroutine that can be executed + # asynchronously as a task. If we have to repeat the task + # (e.g. if we run into rate limiting and have to resend the request), + # we need a NEW coroutine object. + # For that case, we need a representation of our function which can be + # evaluated when needed, giving a new coroutine for the task each time. + partial(self.translate_text_field, session, text_field) + for text_field in translation_helper.get_text_fields() + ] + ) + + # We're adding 1 because the next manage will also be a task + if ( + len(task_generator) + 1 + >= settings.SUMM_AI_MAX_CONCURRENT_REQUESTS + ): + task_generator.append(partial(manage)) + break + + # Telling the task_generator we're done adding new tasks and + task_generator.more_tasks_pending -= 1 + + # We want to fill the queue before the worker can get to work. + # This is critical because if the queue starts out with only the first manage task, + # then while the first worker is busy creating the next tasks, the other workers + # might already find an empty queue and quit, leaving only the first worker to all tasks. + await manage() # Wait for all tasks to finish and collect the results worker_results = await asyncio.gather( @@ -207,33 +293,40 @@ def translate_queryset(self, queryset: list[Page], language_slug: str) -> None: :param queryset: The queryset which should be translated :param language_slug: The target language slug to translate into """ + region = self.request.region # Make sure both languages exist - self.request.region.get_language_or_404(settings.SUMM_AI_GERMAN_LANGUAGE_SLUG) - easy_german = self.request.region.get_language_or_404( + region.get_language_or_404(settings.SUMM_AI_GERMAN_LANGUAGE_SLUG) + easy_german = region.get_language_or_404( settings.SUMM_AI_EASY_GERMAN_LANGUAGE_SLUG ) + budget_estimate = BudgetEstimate( + check_usage=lambda source_translation, allocated_budget: self.check_usage( + region, source_translation, allocated_budget + ) + ) + # Initialize translation helpers for each object instance translation_helpers = [ - TranslationHelper(self.request, self.form_class, object_instance) + TranslationHelper( + self.request, + self.form_class, + object_instance, + budget_estimate=budget_estimate, + ) for object_instance in queryset ] - # Aggregate all strings that need to be translated - text_fields = chain( - *[ - translation_helper.get_text_fields() - for translation_helper in translation_helpers - ] - ) - # Initialize async event loop loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) # Translate queryset asynchronously in parallel - loop.run_until_complete(self.translate_text_fields(loop, text_fields)) + loop.run_until_complete(self.translate_text_fields(loop, translation_helpers)) + + # Refresh the region object in case the budget used changed in the meantime + region.refresh_from_db() # Commit changes to the database successes = [] @@ -247,6 +340,9 @@ def translate_queryset(self, queryset: list[Page], language_slug: str) -> None: else: errors.append(translation_helper.german_translation.title) + region.summ_ai_budget_used += translation_helper.word_count + region.save() + if translation_helpers: meta = type(translation_helpers[0].object_instance)._meta model_name = meta.verbose_name.title() diff --git a/integreat_cms/summ_ai_api/utils.py b/integreat_cms/summ_ai_api/utils.py index 58ede925c2..c295156c29 100644 --- a/integreat_cms/summ_ai_api/utils.py +++ b/integreat_cms/summ_ai_api/utils.py @@ -9,6 +9,7 @@ import logging import time from collections import deque +from collections.abc import Callable from html import unescape from typing import Generic, TYPE_CHECKING, TypeVar @@ -19,7 +20,6 @@ from lxml.html import fromstring, HtmlElement, tostring if TYPE_CHECKING: - from collections.abc import Callable from functools import partial from typing import Any @@ -217,19 +217,16 @@ class TranslationHelper: :param form_class: The subclass of the current content type :param object_instance: The current object instance to be translated :param german_translation: The German source translation of the object instance - :param valid: Wether or not the translation was successful :param text_fields: The text fields of this helper :param html_fields: The HTML fields of this helper """ - #: Wether or not the translation was successful - valid: bool = True - def __init__( self, request: HttpRequest, form_class: ModelFormMetaclass, object_instance: Page, + budget_estimate: BudgetEstimate, ) -> None: """ Constructor initializes the class variables @@ -253,7 +250,6 @@ def __init__( object_instance.best_translation.title, ), ) - self.valid = False return self.text_fields: list[TextField] = [ TextField(name=text_field, translation=self.german_translation) @@ -263,6 +259,56 @@ def __init__( HTMLField(name=html_field, translation=self.german_translation) for html_field in settings.SUMM_AI_HTML_FIELDS ] + self.budget_estimate = budget_estimate + + @property + def valid(self) -> bool: + """ + Wether or not the translation was successful + """ + return self.german_translation is not None + + def check_usage(self) -> tuple[bool, int]: + """ + This function checks if the attempted translation would exceed word limit. + + This value is cached the first time and not re-evaluated on subsequent calls. + + :return: translation would exceed limit, word count of attempted translation + """ + return self.budget_estimate.check_usage(self.plain_text) + + @property + def would_exceed_limit(self) -> bool: + """ + Whether an attempted translation would exceed the word limit + + This value is cached the first time and not re-evaluated on subsequent calls. + + :return: whether translation would exceed limit + """ + return self.check_usage()[0] + + @property + def word_count(self) -> int: + """ + How many words need to be translated.. + + This value is cached the first time and not re-evaluated on subsequent calls. + + :return: translation would exceed limit, word count of attempted translation + """ + return self.check_usage()[1] + + @property + def plain_text(self) -> str: + """ + All relevant fields to translate concatenated into a single string. + Useful for determining the word count required. + + :return: all translatable content as a plain text string + """ + return "\n".join([x.text for x in self.get_text_fields()]) @property def fields(self) -> list[HTMLField | TextField]: @@ -273,6 +319,14 @@ def fields(self) -> list[HTMLField | TextField]: """ return self.text_fields + self.html_fields + def allocate_budget(self) -> bool: + """ + Allocate budget for the translation if it fits + + :returns: Whether the budget could be allocated or would have exceeded the limit + """ + return self.budget_estimate.allocate(self.plain_text) + def get_text_fields(self) -> list[HTMLSegment]: """ Get all text fields of this helper instance @@ -378,6 +432,38 @@ def __repr__(self) -> str: return f"" +class BudgetEstimate: + """ + A helper class to keep track of the allocated budget for asynchronous translations + """ + + def __init__(self, check_usage: Callable[[str, int], tuple[bool, int]]): + self._check_usage = check_usage + self.allocated = 0 + + def check_usage( + self, source_translation: str | AbstractContentTranslation + ) -> tuple[bool, int]: + """ + This function checks if the attempted translation would exceed word limit. + + :return: translation would exceed limit, word count of attempted translation + """ + return self._check_usage(source_translation, self.allocated) + + def allocate(self, source_translation: str | AbstractContentTranslation) -> bool: + """ + Attempt to allocate the required budget for the translation + + :returns: ``True`` if budget was allocated, ``False`` if the budget limit would be exceeded + """ + (translation_exceeds_limit, budget) = self.check_usage(source_translation) + if translation_exceeds_limit: + return False + self.allocated += budget + return True + + T = TypeVar("T") @@ -401,6 +487,9 @@ class PatientTaskQueue(deque, Generic[T]): #: Maximum amount of retries for a string to translate before giving up max_retries: int = settings.SUMM_AI_MAX_RETRIES + #: When greater than 0, more tasks are being added and workers asking for tasks when we have run dry should be stalled until new tasks become available + more_tasks_pending: int = 0 + def __init__( self, tasks: list[T], @@ -458,6 +547,11 @@ async def __anext__(self) -> T: wait_time_remaining, ) await asyncio.sleep(wait_time_remaining) + + while not self and self.more_tasks_pending > 0: + # If we currently have no tasks but are promised more, wait just a moment + await asyncio.sleep(0.001) + try: task = self.popleft() self._in_progress.append(task) diff --git a/tests/cms/test_duplicate_regions.py b/tests/cms/test_duplicate_regions.py index f67ac12055..a3896c2a4a 100644 --- a/tests/cms/test_duplicate_regions.py +++ b/tests/cms/test_duplicate_regions.py @@ -57,6 +57,7 @@ def test_duplicate_regions( "zammad_url": "https://zammad-test.example.com", "timezone": "Europe/Berlin", "mt_renewal_month": 6, + "summ_ai_renewal_month": 6, }, ) print(response.headers) diff --git a/tests/cms/views/view_config.py b/tests/cms/views/view_config.py index 28626566b4..74690d78d8 100644 --- a/tests/cms/views/view_config.py +++ b/tests/cms/views/view_config.py @@ -784,6 +784,7 @@ "latitude": 1, "timezone": "Europe/Berlin", "mt_renewal_month": 6, + "summ_ai_renewal_month": 6, "offers": [3], "zammad_offers": [5], "zammad_url": "https://zammad-test.example.com", @@ -802,6 +803,7 @@ "latitude": 1, "timezone": "Europe/Berlin", "mt_renewal_month": 6, + "summ_ai_renewal_month": 6, "offers": [3], "zammad_offers": [5], "zammad_url": "https://zammad-test.example.com",