Skip to content

Commit

Permalink
Merge pull request #720 from SUNET/lundberg_remove_request_sanitation
Browse files Browse the repository at this point in the history
Remove request sanitation for every request
  • Loading branch information
helylle authored Dec 13, 2024
2 parents c67bf6a + b8a1489 commit b667eeb
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 325 deletions.
2 changes: 0 additions & 2 deletions src/eduid/webapp/common/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from eduid.webapp.common.api.debug import init_app_debug
from eduid.webapp.common.api.exceptions import init_exception_handlers, init_sentry
from eduid.webapp.common.api.middleware import PrefixMiddleware
from eduid.webapp.common.api.request import Request
from eduid.webapp.common.authn.utils import no_authn_views
from eduid.webapp.common.session.eduid_session import SessionFactory

Expand Down Expand Up @@ -82,7 +81,6 @@ def __init__(

# App setup
self.wsgi_app = ProxyFix(self.wsgi_app) # type: ignore[method-assign]
self.request_class = Request
# autocorrect location header means that redirects defaults to an absolute path
# werkzeug 2.1.0 changed default value to False
self.response_class.autocorrect_location_header = True
Expand Down
258 changes: 0 additions & 258 deletions src/eduid/webapp/common/api/request.py

This file was deleted.

41 changes: 39 additions & 2 deletions src/eduid/webapp/common/api/sanitation.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
from typing import AnyStr
from collections.abc import Iterable, Mapping, Sequence
from typing import Any, AnyStr
from urllib.parse import quote, unquote

from bleach import clean
from werkzeug.exceptions import BadRequest

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -122,7 +124,8 @@ def _sanitize_input(

return cleaned_text

def _safe_clean(self, untrusted_text: str, strip_characters: bool = False) -> str:
@staticmethod
def _safe_clean(untrusted_text: str, strip_characters: bool = False) -> str:
"""
Wrapper for the clean function of bleach to be able
to catch when illegal UTF-8 is processed.
Expand All @@ -140,3 +143,37 @@ def _safe_clean(self, untrusted_text: str, strip_characters: bool = False) -> st
"user input."
)
raise SanitationProblem("Illegal UTF-8")


def sanitize_map(data: Mapping[str, Any]) -> dict[str, Any]:
return {str(sanitize_item(k)): sanitize_item(v) for k, v in data.items()}


def sanitize_iter(data: Iterable[str] | Iterable[Sequence[Any]]) -> list[str | dict[str, Any] | list[Any] | None]:
return [sanitize_item(item) for item in data]


def sanitize_item(
data: str | dict[str, Any] | Sequence[Any] | list[Sequence[Any]] | None,
) -> str | dict[str, Any] | list[Any] | None:
match data:
case None:
return None
case dict():
return sanitize_map(data)
case list():
return sanitize_iter(data)
case str():
san = Sanitizer()
try:
assert isinstance(data, str)
safe_data = san.sanitize_input(data)
if safe_data != data:
logger.warning("Sanitized input from unsafe characters")
logger.debug(f"data: {data} -> safe_data: {safe_data}")
except SanitationProblem:
logger.exception("There was a problem sanitizing inputs")
raise BadRequest()
return str(safe_data)
case _:
raise SanitationProblem(f"incompatible type {type(data)}")
16 changes: 16 additions & 0 deletions src/eduid/webapp/common/api/schemas/sanitize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from collections.abc import Mapping
from typing import Any, AnyStr

from marshmallow.fields import String

from eduid.webapp.common.api.sanitation import Sanitizer

__author__ = "lundberg"


class SanitizedString(String):
sanitizer = Sanitizer()

def _deserialize(self, value: AnyStr, attr: str | None, data: Mapping[str, Any] | None, **kwargs: Any) -> str:
_value = self.sanitizer.sanitize_input(untrusted_text=value)
return super()._deserialize(_value, attr, data, **kwargs)
Loading

0 comments on commit b667eeb

Please sign in to comment.