From 62df89aef648b74ef605b28309d7f9b849c6e754 Mon Sep 17 00:00:00 2001 From: javi Date: Tue, 30 Oct 2018 22:53:29 +0100 Subject: [PATCH] trying to get encoding from response headerss --- src/wfuzz/externals/reqresp/Response.py | 55 +++++++++++++++++++++++-- tests/test_acceptance.py | 14 ++++--- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/wfuzz/externals/reqresp/Response.py b/src/wfuzz/externals/reqresp/Response.py index c4de2b73..f1392442 100644 --- a/src/wfuzz/externals/reqresp/Response.py +++ b/src/wfuzz/externals/reqresp/Response.py @@ -1,3 +1,6 @@ +import re +import cgi + import string from io import BytesIO import gzip @@ -8,6 +11,47 @@ from wfuzz.utils import python2_3_convert_from_unicode +def get_encoding_from_headers(headers): + """Returns encodings from given HTTP Header Dict. + + :param headers: dictionary to extract encoding from. + :rtype: str + """ + + content_type = headers.get('Content-Type') + + if not content_type: + return None + + content_type, params = cgi.parse_header(content_type) + + if 'charset' in params: + return params['charset'].strip("'\"") + + if 'text' in content_type: + return 'ISO-8859-1' + + if 'image' in content_type: + return 'utf-8' + + if 'application/json' in content_type: + return 'utf-8' + + +def get_encodings_from_content(content): + """Returns encodings from given content string. + + :param content: bytestring to extract encodings from. + """ + charset_re = re.compile(r']', flags=re.I) + pragma_re = re.compile(r']', flags=re.I) + xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') + + return (charset_re.findall(content) + + pragma_re.findall(content) + + xml_re.findall(content)) + + class Response: def __init__(self, protocol="", code="", message=""): self.protocol = protocol # HTTP/1.1 @@ -172,6 +216,11 @@ def parseResponse(self, rawheader, rawbody=None, type="curl"): rawbody = deflated_data self.delHeader("Content-Encoding") - # TODO: Try to get encoding from content - self.__content = python2_3_convert_from_unicode(rawbody.decode("unicode_escape", errors='replace')) - # self.__content = python2_3_convert_from_unicode(rawbody.decode("utf-8", errors='replace')) + # Try to get charset encoding from headers + content_encoding = get_encoding_from_headers(dict(self.getHeaders())) + + # fallback to default encoding + if content_encoding is None: + content_encoding = "utf-8" + + self.__content = python2_3_convert_from_unicode(rawbody.decode(content_encoding, errors='replace')) diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index fee89156..0d20a04c 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -39,20 +39,22 @@ # script args testing_tests = [ - # not working due to content being decode as unicode not utf-8 - # ("test_encode_cookie2_utf8_return", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(cookie=["test=FUZZ"], filter="content~'test=\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/anything')], None), - # ("test_encode_header_utf8_return", "%s/headers" % HTTPBIN_URL, [["は国"]], dict(headers=[("myheader", "FUZZ")], filter="content~'Myheader' and content~'\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/headers')], None), ] basic_tests = [ # encoding tests + ("test_encode_cookie2_utf8_return", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(cookie=["test=FUZZ"], filter="content~'test=\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/anything')], None), + ("test_encode_header_utf8_return", "%s/headers" % HTTPBIN_URL, [["は国"]], dict(headers=[("myheader", "FUZZ")], filter="content~'Myheader' and content~'\\\\u00e3\\\\u0081\\\\u00af\\\\u00e5\\\\u009b\\\\u00bd'"), [(200, '/headers')], None), ("test_encode_path", "%s/FUZZ" % HTTPBIN_URL, [["は国"]], dict(), [(404, '/は国')], None), ("test_encode_basic_auth", "%s/basic-auth/FUZZ/FUZZ" % HTTPBIN_URL, [["は国"]], dict(auth=("basic", "FUZZ:FUZZ")), [(200, '/basic-auth/は国/は国')], None), - ("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'は国'"), [(200, '/anything')], None), + # ("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'は国'"), [(200, '/anything')], None), + ("test_encode_postdata", "%s/anything" % HTTPBIN_URL, [["は国"]], dict(postdata="a=FUZZ", filter="content~'\\\\u306f\\\\u56fd'"), [(200, '/anything')], None), ("test_encode_url_filter", "%s/FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="url~'は国'"), [(404, '/は国')], None), - ("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"は国\"'"), [(200, '/anything')], None), + # ("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"は国\"'"), [(200, '/anything')], None), + ("test_encode_var", "%s/anything?var=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="content~'\"\\\\u306f\\\\u56fd\"'"), [(200, '/anything')], None), ("test_encode_redirect", "%s/redirect-to?url=FUZZ" % HTTPBIN_URL, [["は国"]], dict(filter="headers.response.Location='%C3%A3%C2%81%C2%AF%C3%A5%C2%9B%C2%BD'"), [(302, '/redirect-to')], None), - ("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~FUZZ"), [(200, '/cookies')], None), + # ("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~FUZZ"), [(200, '/cookies')], None), + ("test_encode_cookie", "%s/cookies" % HTTPBIN_URL, [["は国"]], dict(cookie=["cookie1=FUZZ"], follow=True, filter="content~'\\\\u306f\\\\u56fd'"), [(200, '/cookies')], None), # postdata tests # pycurl does not allow it ("test_get_postdata", "%s/FUZZ?var=1&var2=2" % HTTPBIN_URL, [["anything"]], dict(postdata='a=1', filter="content~'\"form\":{\"a\":\"1\"}'"), [(200, '/anything')], None),