From b7de2713b83eb345469c021902643798fe1f5249 Mon Sep 17 00:00:00 2001 From: Jager Mesh Date: Wed, 1 Jul 2015 14:42:15 -0400 Subject: [PATCH] Remove controls characters from input HTML There is a problem with control characters for server with libxml 2.6.7 (most of current Linux) servers. In some cases HTML become incorrect (extra closing/opening body/html tags added): Input: string(128) "BELnormal" Output: string(250) " normal " --- phpQuery/phpQuery.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phpQuery/phpQuery.php b/phpQuery/phpQuery.php index 08f22fc..4298e81 100644 --- a/phpQuery/phpQuery.php +++ b/phpQuery/phpQuery.php @@ -32,8 +32,8 @@ */ abstract class phpQuery { /** - * XXX: Workaround for mbstring problems - * + * XXX: Workaround for mbstring problems + * * @var bool */ public static $mbstringSupport = true; @@ -270,6 +270,7 @@ public static function getDocument($id = null) { public static function newDocument($markup = null, $contentType = null) { if (! $markup) $markup = ''; + $markup = preg_replace('/[\x00-\x1F\x7F]/', '', $markup); $documentID = phpQuery::createDocumentWrapper($markup, $contentType); return new phpQueryObject($documentID); }