diff --git a/src/viur/core/bones/text.py b/src/viur/core/bones/text.py index 7e792a965..75f189fcf 100644 --- a/src/viur/core/bones/text.py +++ b/src/viur/core/bones/text.py @@ -2,51 +2,32 @@ The `text` module contains the `Textbone` and a custom HTML-Parser to validate and extract client data for the `TextBone`. """ +import html import string +import typing as t import warnings -from base64 import urlsafe_b64decode -from datetime import datetime -from html import entities as htmlentitydefs from html.parser import HTMLParser -import typing as t - from viur.core import db, conf from viur.core.bones.base import BaseBone, ReadFromClientError, ReadFromClientErrorSeverity -_defaultTags = { - "validTags": [ # List of HTML-Tags which are valid - 'b', 'a', 'i', 'u', 'span', 'div', 'p', 'img', 'ol', 'ul', 'li', 'abbr', 'sub', 'sup', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th', 'br', - 'hr', 'strong', 'blockquote', 'em'], - "validAttrs": { # Mapping of valid parameters for each tag (if a tag is not listed here: no parameters allowed) - "a": ["href", "target", "title"], - "abbr": ["title"], - "span": ["title"], - "img": ["src", "alt", "title"], # "srcset" must not be in this list. It will be injected by ViUR - "td": ["colspan", "rowspan"], - "p": ["data-indent"], - "blockquote": ["cite"] - }, - "validStyles": [ - "color" - ], # List of CSS-Directives we allow - "validClasses": ["vitxt-*", "viur-txt-*"], # List of valid class-names that are valid - "singleTags": ["br", "img", "hr"] # List of tags, which don't have a corresponding end tag -} -""" -A dictionary containing default configurations for handling HTML content in TextBone instances. - -- validTags (list[str]): - A list of valid HTML tags allowed in TextBone instances. -- validAttrs (dict[str, list[str]]): - A dictionary mapping valid attributes for each tag. If a tag is not listed, no attributes are allowed for that tag. -- validStyles (list[str]): - A list of allowed CSS directives for the TextBone instances. -- validClasses (list[str]): - A list of valid CSS class names allowed in TextBone instances. -- singleTags (list[str]): - A list of self-closing HTML tags that don't have corresponding end tags. -""" + +class HtmlBoneConfiguration(t.TypedDict): + """A dictionary containing configurations for handling HTML content in TextBone instances.""" + + validTags: list[str] + """A list of valid HTML tags allowed in TextBone instances.""" + + validAttrs: dict[str, list[str]] + """A dictionary mapping valid attributes for each tag. If a tag is not listed, this tag accepts no attributes.""" + + validStyles: list[str] + """A list of allowed CSS directives for the TextBone instances.""" + + validClasses: list[str] + """A list of valid CSS class names allowed in TextBone instances.""" + + singleTags: list[str] + """A list of self-closing HTML tags that don't have corresponding end tags.""" class CollectBlobKeys(HTMLParser): @@ -75,7 +56,7 @@ def handle_starttag(self, tag, attrs): self.blobs.add(filepath.dlkey) -class HtmlSerializer(HTMLParser): # html.parser.HTMLParser +class HtmlSerializer(HTMLParser): """ A custom HTML parser that extends the HTMLParser class to sanitize and serialize HTML content by removing invalid tags and attributes while retaining the valid ones. @@ -91,7 +72,7 @@ class HtmlSerializer(HTMLParser): # html.parser.HTMLParser "\n": "", "\0": ""}) - def __init__(self, validHtml=None, srcSet=None, convert_charrefs: bool = True): + def __init__(self, validHtml: HtmlBoneConfiguration = None, srcSet=None, convert_charrefs: bool = True): super().__init__(convert_charrefs=convert_charrefs) self.result = "" # The final result that will be returned self.openTagsList = [] # List of tags that still need to be closed @@ -127,7 +108,7 @@ def handle_entityref(self, name): # FIXME :param str name: The name of the entity reference. """ - if name in htmlentitydefs.entitydefs.keys(): + if name in html.entities.entitydefs.keys(): self.flushCache() self.result += f"&{name};" @@ -311,13 +292,13 @@ class TextBone(BaseBone): only specific HTML tags and attributes, and enforce a maximum length. Supports the use of srcset for embedded images. - :param Union[None, Dict] validHtml: A dictionary containing allowed HTML tags and their attributes. Defaults - to _defaultTags. Must be a structured like :prop:_defaultTags - :param int max_length: The maximum allowed length for the content. Defaults to 200000. + :param validHtml: A dictionary containing allowed HTML tags and their attributes. + Defaults to `conf.bone_html_default_allow`. + :param max_length: The maximum allowed length for the content. Defaults to 200000. :param languages: If set, this bone can store a different content for each language - :param Dict[str, List] srcSet: An optional dictionary containing width and height for srcset generation. + :param srcSet: An optional dictionary containing width and height for srcset generation. Must be a dict of "width": [List of Ints], "height": [List of Ints], eg {"height": [720, 1080]} - :param bool indexed: Whether the content should be indexed for searching. Defaults to False. + :param indexed: Whether the content should be indexed for searching. Defaults to False. :param kwargs: Additional keyword arguments to be passed to the base class constructor. """ @@ -329,14 +310,14 @@ class __undefinedC__: def __init__( self, *, - validHtml: None | dict = __undefinedC__, + validHtml: None | HtmlBoneConfiguration = __undefinedC__, max_length: int = 200000, srcSet: t.Optional[dict[str, list]] = None, indexed: bool = False, **kwargs ): """ - :param validHtml: If set, must be a structure like :prop:_defaultTags + :param validHtml: If set, must be a structure like `conf.bone_html_default_allow` :param languages: If set, this bone can store a different content for each language :param max_length: Limit content to max_length bytes :param indexed: Must not be set True, unless you limit max_length accordingly @@ -350,8 +331,7 @@ def __init__( super().__init__(indexed=indexed, **kwargs) if validHtml == TextBone.__undefinedC__: - global _defaultTags - validHtml = _defaultTags + validHtml = conf.bone_html_default_allow self.validHtml = validHtml self.max_length = max_length diff --git a/src/viur/core/config.py b/src/viur/core/config.py index d51e91404..2a1af93eb 100644 --- a/src/viur/core/config.py +++ b/src/viur/core/config.py @@ -11,11 +11,13 @@ from viur.core.version import __version__ if t.TYPE_CHECKING: # pragma: no cover + from viur.core.bones.text import HtmlBoneConfiguration from viur.core.email import EmailTransport from viur.core.skeleton import SkeletonInstance from viur.core.module import Module from viur.core.tasks import CustomEnvironmentHandler + # Construct an alias with a generic type to be able to write Multiple[str] # TODO: Backward compatible implementation, refactor when viur-core # becomes >= Python 3.12 with a type statement (PEP 695) @@ -642,6 +644,87 @@ class Conf(ConfigType): bone_boolean_str2true: Multiple[str | int] = ("true", "yes", "1") """Allowed values that define a str to evaluate to true""" + bone_html_default_allow: "HtmlBoneConfiguration" = { + "validTags": [ + "a", + "abbr", + "b", + "blockquote", + "br", + "div", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "i", + "img", + "li", + "ol", + "p", + "span", + "strong", + "sub", + "sup", + "table", + "tbody", + "td", + "tfoot", + "th", + "thead", + "tr", + "u", + "ul", + ], + "validAttrs": { + "a": [ + "href", + "target", + "title", + ], + "abbr": [ + "title", + ], + "blockquote": [ + "cite", + ], + "img": [ + "src", + "alt", + "title", + ], + "p": [ + "data-indent", + ], + "span": [ + "title", + ], + "td": [ + "colspan", + "rowspan", + ], + + }, + "validStyles": [ + "color", + ], + "validClasses": [ + "vitxt-*", + "viur-txt-*" + ], + "singleTags": [ + "br", + "hr", + "img", + ] + } + """ + A dictionary containing default configurations for handling HTML content in TextBone instances. + """ + cache_environment_key: t.Optional[t.Callable[[], str]] = None """If set, this function will be called for each cache-attempt and the result will be included in the computed cache-key""" diff --git a/src/viur/core/modules/moduleconf.py b/src/viur/core/modules/moduleconf.py index c642110ae..79f76f930 100644 --- a/src/viur/core/modules/moduleconf.py +++ b/src/viur/core/modules/moduleconf.py @@ -1,12 +1,17 @@ import logging +import typing as t from viur.core import Module, conf, db, current, i18n, tasks, skeleton from viur.core.bones import StringBone, TextBone, SelectBone, TreeLeafBone -from viur.core.bones.text import _defaultTags +from viur.core.bones.text import HtmlBoneConfiguration from viur.core.prototypes import List MODULECONF_KINDNAME = "viur-module-conf" +_LIMITED_HTML: t.Final[HtmlBoneConfiguration] = conf.bone_html_default_allow | { + "validTags": "a abbr b blockquote br div em h1 h2 h3 h4 h5 h6 hr i li ol p span strong sub sup u ul".split(), +} + class ModuleConfScriptSkel(skeleton.RelSkel): @@ -59,11 +64,6 @@ class ModuleConfScriptSkel(skeleton.RelSkel): class ModuleConfSkel(skeleton.Skeleton): kindName = MODULECONF_KINDNAME - _valid_tags = ['b', 'a', 'i', 'u', 'span', 'div', 'p', 'ol', 'ul', 'li', 'abbr', 'sub', 'sup', 'h1', 'h2', 'h3', - 'h4', 'h5', 'h6', 'br', 'hr', 'strong', 'blockquote', 'em'] - _valid_html = _defaultTags.copy() - _valid_html["validTags"] = _valid_tags - name = StringBone( descr=i18n.translate("modulename"), readOnly=True, @@ -71,17 +71,17 @@ class ModuleConfSkel(skeleton.Skeleton): help_text = TextBone( descr=i18n.translate("module helptext"), - validHtml=_valid_html, + validHtml=_LIMITED_HTML, ) help_text_add = TextBone( descr=i18n.translate("add helptext"), - validHtml=_valid_html, + validHtml=_LIMITED_HTML, ) help_text_edit = TextBone( descr=i18n.translate("edit helptext"), - validHtml=_valid_html, + validHtml=_LIMITED_HTML, ) scripts = TreeLeafBone(