Skip to content

Commit

Permalink
trying to get encoding from response headerss
Browse files Browse the repository at this point in the history
  • Loading branch information
xmendez committed Oct 30, 2018
1 parent 7072ea3 commit 62df89a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 9 deletions.
55 changes: 52 additions & 3 deletions src/wfuzz/externals/reqresp/Response.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import re
import cgi

import string
from io import BytesIO
import gzip
Expand All @@ -8,6 +11,47 @@
from wfuzz.utils import python2_3_convert_from_unicode


def get_encoding_from_headers(headers):
"""Returns encodings from given HTTP Header Dict.
:param headers: dictionary to extract encoding from.
:rtype: str
"""

content_type = headers.get('Content-Type')

if not content_type:
return None

content_type, params = cgi.parse_header(content_type)

if 'charset' in params:
return params['charset'].strip("'\"")

if 'text' in content_type:
return 'ISO-8859-1'

if 'image' in content_type:
return 'utf-8'

if 'application/json' in content_type:
return 'utf-8'


def get_encodings_from_content(content):
"""Returns encodings from given content string.
:param content: bytestring to extract encodings from.
"""
charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

return (charset_re.findall(content) +
pragma_re.findall(content) +
xml_re.findall(content))


class Response:
def __init__(self, protocol="", code="", message=""):
self.protocol = protocol # HTTP/1.1
Expand Down Expand Up @@ -172,6 +216,11 @@ def parseResponse(self, rawheader, rawbody=None, type="curl"):
rawbody = deflated_data
self.delHeader("Content-Encoding")

# TODO: Try to get encoding from content
self.__content = python2_3_convert_from_unicode(rawbody.decode("unicode_escape", errors='replace'))
# self.__content = python2_3_convert_from_unicode(rawbody.decode("utf-8", errors='replace'))
# Try to get charset encoding from headers
content_encoding = get_encoding_from_headers(dict(self.getHeaders()))

# fallback to default encoding
if content_encoding is None:
content_encoding = "utf-8"

self.__content = python2_3_convert_from_unicode(rawbody.decode(content_encoding, errors='replace'))
14 changes: 8 additions & 6 deletions tests/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,22 @@
# script args

testing_tests = [
# not working due to content being decode as unicode not utf-8
# ("test_encode_cookie2_utf8_return", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(cookie=["test=FUZZ"], filter="content~'test=\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/anything')], None),
# ("test_encode_header_utf8_return", "%s/headers" % HTTPBIN_URL, [["は国"]], dict(headers=[("myheader", "FUZZ")], filter="content~'Myheader' and content~'\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/headers')], None),
]

basic_tests = [
# encoding tests
("test_encode_cookie2_utf8_return", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(cookie=["test=FUZZ"], filter="content~'test=\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/anything')], None),
("test_encode_header_utf8_return", "%s/headers" % HTTPBIN_URL, [["は国"]], dict(headers=[("myheader", "FUZZ")], filter="content~'Myheader' and content~'\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/headers')], None),
("test_encode_path", "%s/FUZZ" % HTTPBIN_URL, [["は国"]], dict(), [(404, '/は国')], None),
("test_encode_basic_auth", "%s/basic-auth/FUZZ/FUZZ" % HTTPBIN_URL, [["は国"]], dict(auth=("basic", "FUZZ:FUZZ")), [(200, '/basic-auth/は国/は国')], None),
("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'は国'"), [(200, '/anything')], None),
# ("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'は国'"), [(200, '/anything')], None),
("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'\\\\u306f\\\\u56fd'"), [(200, '/anything')], None),
("test_encode_url_filter", "%s/FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="url~'は国'"), [(404, '/は国')], None),
("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"は国\"'"), [(200, '/anything')], None),
# ("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"は国\"'"), [(200, '/anything')], None),
("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"\\\\u306f\\\\u56fd\"'"), [(200, '/anything')], None),
("test_encode_redirect", "%s/redirect-to?url=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="headers.response.Location='%C3%A3%C2%81%C2%AF%C3%A5%C2%9B%C2%BD'"), [(302, '/redirect-to')], None),
("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~FUZZ"), [(200, '/cookies')], None),
# ("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~FUZZ"), [(200, '/cookies')], None),
("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~'\\\\u306f\\\\u56fd'"), [(200, '/cookies')], None),

# postdata tests
# pycurl does not allow it ("test_get_postdata", "%s/FUZZ?var=1&var2=2" % HTTPBIN_URL, [["anything"]], dict(postdata='a=1', filter="content~'\"form\":{\"a\":\"1\"}'"), [(200, '/anything')], None),
Expand Down

0 comments on commit 62df89a

Please sign in to comment.