Skip to content

Commit

Permalink
feat: conf.bone_html_default_allow (#1278)
Browse files Browse the repository at this point in the history
Provide the HtmlBone (TextBone) "validHtml"-default as a config
variable, so it can be changed globally.

This feature is useful to some projects, and is a first improvement on
the messy TextBone class.

Relates to and partly replaces #631.

---------

Co-authored-by: Sven Eberth <[email protected]>
  • Loading branch information
phorward and sveneberth authored Oct 9, 2024
1 parent 763ab2e commit 77df9ee
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 60 deletions.
82 changes: 31 additions & 51 deletions src/viur/core/bones/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,51 +2,32 @@
The `text` module contains the `Textbone` and a custom HTML-Parser
to validate and extract client data for the `TextBone`.
"""
import html
import string
import typing as t
import warnings
from base64 import urlsafe_b64decode
from datetime import datetime
from html import entities as htmlentitydefs
from html.parser import HTMLParser
import typing as t

from viur.core import db, conf
from viur.core.bones.base import BaseBone, ReadFromClientError, ReadFromClientErrorSeverity

_defaultTags = {
"validTags": [ # List of HTML-Tags which are valid
'b', 'a', 'i', 'u', 'span', 'div', 'p', 'img', 'ol', 'ul', 'li', 'abbr', 'sub', 'sup',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th', 'br',
'hr', 'strong', 'blockquote', 'em'],
"validAttrs": { # Mapping of valid parameters for each tag (if a tag is not listed here: no parameters allowed)
"a": ["href", "target", "title"],
"abbr": ["title"],
"span": ["title"],
"img": ["src", "alt", "title"], # "srcset" must not be in this list. It will be injected by ViUR
"td": ["colspan", "rowspan"],
"p": ["data-indent"],
"blockquote": ["cite"]
},
"validStyles": [
"color"
], # List of CSS-Directives we allow
"validClasses": ["vitxt-*", "viur-txt-*"], # List of valid class-names that are valid
"singleTags": ["br", "img", "hr"] # List of tags, which don't have a corresponding end tag
}
"""
A dictionary containing default configurations for handling HTML content in TextBone instances.
- validTags (list[str]):
A list of valid HTML tags allowed in TextBone instances.
- validAttrs (dict[str, list[str]]):
A dictionary mapping valid attributes for each tag. If a tag is not listed, no attributes are allowed for that tag.
- validStyles (list[str]):
A list of allowed CSS directives for the TextBone instances.
- validClasses (list[str]):
A list of valid CSS class names allowed in TextBone instances.
- singleTags (list[str]):
A list of self-closing HTML tags that don't have corresponding end tags.
"""

class HtmlBoneConfiguration(t.TypedDict):
"""A dictionary containing configurations for handling HTML content in TextBone instances."""

validTags: list[str]
"""A list of valid HTML tags allowed in TextBone instances."""

validAttrs: dict[str, list[str]]
"""A dictionary mapping valid attributes for each tag. If a tag is not listed, this tag accepts no attributes."""

validStyles: list[str]
"""A list of allowed CSS directives for the TextBone instances."""

validClasses: list[str]
"""A list of valid CSS class names allowed in TextBone instances."""

singleTags: list[str]
"""A list of self-closing HTML tags that don't have corresponding end tags."""


class CollectBlobKeys(HTMLParser):
Expand Down Expand Up @@ -75,7 +56,7 @@ def handle_starttag(self, tag, attrs):
self.blobs.add(filepath.dlkey)


class HtmlSerializer(HTMLParser): # html.parser.HTMLParser
class HtmlSerializer(HTMLParser):
"""
A custom HTML parser that extends the HTMLParser class to sanitize and serialize HTML content
by removing invalid tags and attributes while retaining the valid ones.
Expand All @@ -91,7 +72,7 @@ class HtmlSerializer(HTMLParser): # html.parser.HTMLParser
"\n": "",
"\0": ""})

def __init__(self, validHtml=None, srcSet=None, convert_charrefs: bool = True):
def __init__(self, validHtml: HtmlBoneConfiguration = None, srcSet=None, convert_charrefs: bool = True):
super().__init__(convert_charrefs=convert_charrefs)
self.result = "" # The final result that will be returned
self.openTagsList = [] # List of tags that still need to be closed
Expand Down Expand Up @@ -127,7 +108,7 @@ def handle_entityref(self, name): # FIXME
:param str name: The name of the entity reference.
"""
if name in htmlentitydefs.entitydefs.keys():
if name in html.entities.entitydefs.keys():
self.flushCache()
self.result += f"&{name};"

Expand Down Expand Up @@ -311,13 +292,13 @@ class TextBone(BaseBone):
only specific HTML tags and attributes, and enforce a maximum length. Supports the use of
srcset for embedded images.
:param Union[None, Dict] validHtml: A dictionary containing allowed HTML tags and their attributes. Defaults
to _defaultTags. Must be a structured like :prop:_defaultTags
:param int max_length: The maximum allowed length for the content. Defaults to 200000.
:param validHtml: A dictionary containing allowed HTML tags and their attributes.
Defaults to `conf.bone_html_default_allow`.
:param max_length: The maximum allowed length for the content. Defaults to 200000.
:param languages: If set, this bone can store a different content for each language
:param Dict[str, List] srcSet: An optional dictionary containing width and height for srcset generation.
:param srcSet: An optional dictionary containing width and height for srcset generation.
Must be a dict of "width": [List of Ints], "height": [List of Ints], eg {"height": [720, 1080]}
:param bool indexed: Whether the content should be indexed for searching. Defaults to False.
:param indexed: Whether the content should be indexed for searching. Defaults to False.
:param kwargs: Additional keyword arguments to be passed to the base class constructor.
"""

Expand All @@ -329,14 +310,14 @@ class __undefinedC__:
def __init__(
self,
*,
validHtml: None | dict = __undefinedC__,
validHtml: None | HtmlBoneConfiguration = __undefinedC__,
max_length: int = 200000,
srcSet: t.Optional[dict[str, list]] = None,
indexed: bool = False,
**kwargs
):
"""
:param validHtml: If set, must be a structure like :prop:_defaultTags
:param validHtml: If set, must be a structure like `conf.bone_html_default_allow`
:param languages: If set, this bone can store a different content for each language
:param max_length: Limit content to max_length bytes
:param indexed: Must not be set True, unless you limit max_length accordingly
Expand All @@ -350,8 +331,7 @@ def __init__(
super().__init__(indexed=indexed, **kwargs)

if validHtml == TextBone.__undefinedC__:
global _defaultTags
validHtml = _defaultTags
validHtml = conf.bone_html_default_allow

self.validHtml = validHtml
self.max_length = max_length
Expand Down
83 changes: 83 additions & 0 deletions src/viur/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from viur.core.version import __version__

if t.TYPE_CHECKING: # pragma: no cover
from viur.core.bones.text import HtmlBoneConfiguration
from viur.core.email import EmailTransport
from viur.core.skeleton import SkeletonInstance
from viur.core.module import Module
from viur.core.tasks import CustomEnvironmentHandler


# Construct an alias with a generic type to be able to write Multiple[str]
# TODO: Backward compatible implementation, refactor when viur-core
# becomes >= Python 3.12 with a type statement (PEP 695)
Expand Down Expand Up @@ -642,6 +644,87 @@ class Conf(ConfigType):
bone_boolean_str2true: Multiple[str | int] = ("true", "yes", "1")
"""Allowed values that define a str to evaluate to true"""

bone_html_default_allow: "HtmlBoneConfiguration" = {
"validTags": [
"a",
"abbr",
"b",
"blockquote",
"br",
"div",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"i",
"img",
"li",
"ol",
"p",
"span",
"strong",
"sub",
"sup",
"table",
"tbody",
"td",
"tfoot",
"th",
"thead",
"tr",
"u",
"ul",
],
"validAttrs": {
"a": [
"href",
"target",
"title",
],
"abbr": [
"title",
],
"blockquote": [
"cite",
],
"img": [
"src",
"alt",
"title",
],
"p": [
"data-indent",
],
"span": [
"title",
],
"td": [
"colspan",
"rowspan",
],

},
"validStyles": [
"color",
],
"validClasses": [
"vitxt-*",
"viur-txt-*"
],
"singleTags": [
"br",
"hr",
"img",
]
}
"""
A dictionary containing default configurations for handling HTML content in TextBone instances.
"""

cache_environment_key: t.Optional[t.Callable[[], str]] = None
"""If set, this function will be called for each cache-attempt
and the result will be included in the computed cache-key"""
Expand Down
18 changes: 9 additions & 9 deletions src/viur/core/modules/moduleconf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import logging
import typing as t
from viur.core import Module, conf, db, current, i18n, tasks, skeleton
from viur.core.bones import StringBone, TextBone, SelectBone, TreeLeafBone
from viur.core.bones.text import _defaultTags
from viur.core.bones.text import HtmlBoneConfiguration
from viur.core.prototypes import List


MODULECONF_KINDNAME = "viur-module-conf"

_LIMITED_HTML: t.Final[HtmlBoneConfiguration] = conf.bone_html_default_allow | {
"validTags": "a abbr b blockquote br div em h1 h2 h3 h4 h5 h6 hr i li ol p span strong sub sup u ul".split(),
}


class ModuleConfScriptSkel(skeleton.RelSkel):

Expand Down Expand Up @@ -59,29 +64,24 @@ class ModuleConfScriptSkel(skeleton.RelSkel):
class ModuleConfSkel(skeleton.Skeleton):
kindName = MODULECONF_KINDNAME

_valid_tags = ['b', 'a', 'i', 'u', 'span', 'div', 'p', 'ol', 'ul', 'li', 'abbr', 'sub', 'sup', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'br', 'hr', 'strong', 'blockquote', 'em']
_valid_html = _defaultTags.copy()
_valid_html["validTags"] = _valid_tags

name = StringBone(
descr=i18n.translate("modulename"),
readOnly=True,
)

help_text = TextBone(
descr=i18n.translate("module helptext"),
validHtml=_valid_html,
validHtml=_LIMITED_HTML,
)

help_text_add = TextBone(
descr=i18n.translate("add helptext"),
validHtml=_valid_html,
validHtml=_LIMITED_HTML,
)

help_text_edit = TextBone(
descr=i18n.translate("edit helptext"),
validHtml=_valid_html,
validHtml=_LIMITED_HTML,
)

scripts = TreeLeafBone(
Expand Down

0 comments on commit 77df9ee

Please sign in to comment.