Skip to content

Commit

Permalink
Adds cleanVector helper
Browse files Browse the repository at this point in the history
  • Loading branch information
octoberapp committed Oct 2, 2023
1 parent 0203662 commit 336db5e
Showing 1 changed file with 43 additions and 2 deletions.
45 changes: 43 additions & 2 deletions src/Html/HtmlBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,9 @@ public static function minify($html)

/**
* clean HTML to prevent most XSS attacks.
* @param string $html HTML
* @return string Cleaned HTML
* @todo shift to external library
* @param string $html
* @return string
*/
public static function clean($html)
{
Expand Down Expand Up @@ -542,6 +543,46 @@ public static function clean($html)
return $html;
}

/**
* clean XML to prevent most XSS attacks in vector files (SVGs). Same as clean except:
* - allowed tags: xml, title
* - allowed attributes: xmlns
* @todo shift to external library
*/
public static function cleanVector(string $html): string
{
do {
$oldHtml = $html;

// Fix &entity\n;
$html = str_replace(['&','<','>'], ['&','<','>'], $html);
$html = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $html);
$html = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $html);
$html = html_entity_decode($html, ENT_COMPAT, 'UTF-8');

// Remove any attribute starting with "on" or xmlns
$html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on)[^>]*>#iUu', "$1>", $html);

// Remove javascript: and vbscript: protocols
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $html);
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $html);
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $html);
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $html);

// Only works in IE: <span style="width: expression(alert('Ping!'));"></span>
$html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])style[^>]*>#iUu', "$1>", $html);

// Remove namespaced elements (we do not need them)
$html = preg_replace('#</*\w+:\w[^>]*>#i', "", $html);

// Remove really unwanted tags
$html = preg_replace('#</*(applet|meta|blink|link|style|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|base)[^>]*>#i', "", $html);
}
while ($oldHtml !== $html);

return $html;
}

/**
* isValidColor determines if a given string is a valid CSS color value
*/
Expand Down

0 comments on commit 336db5e

Please sign in to comment.