diff --git a/CHANGES.txt b/CHANGES.txt index acc85104..779bd048 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,17 @@ +1.4.1 (2019-12-??) +------------------ + +Security Fixes +~~~~~~~~~~~~~~ + +- Waitress did not properly validate that the HTTP headers it received were + properly formed, thereby potentially allowing a front-end server to treat a + request different from Waitress. This could lead to HTTP request + smuggling/splitting. + + Please see the security advisory for more information: + https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4 + 1.4.0 (2019-12-20) ------------------ @@ -36,6 +50,11 @@ Security Fixes For more information I can highly recommend the blog post by ZeddYu Lu https://blog.zeddyu.info/2019/12/08/HTTP-Smuggling-en/ + Please see the security advisory for more information: + https://github.com/Pylons/waitress/security/advisories/GHSA-pg36-wpm5-g57p + + CVE-ID: CVE-2019-16785 + - Waitress used to treat LF the same as CRLF in ``Transfer-Encoding: chunked`` requests, while the maintainer doesn't believe this could lead to a security issue, this is no longer supported and all chunks are now validated to be @@ -61,6 +80,11 @@ Security Fixes ``Transfer-Encoding: chunked`` instead of ``Transfer-Encoding: identity, chunked``. + PLease see the security advisory for more information: + https://github.com/Pylons/waitress/security/advisories/GHSA-g2xc-35jw-c63p + + CVE-ID: CVE-2019-16786 + - While validating the ``Transfer-Encoding`` header, Waitress now properly handles line-folded ``Transfer-Encoding`` headers or those that contain multiple comma seperated values. This closes a potential issue where a @@ -75,3 +99,6 @@ Security Fixes for a potential request to be split and treated as two requests by HTTP pipelining support in Waitress. If Waitress is now unable to parse the Content-Length header, a 400 Bad Request is sent back to the client. + + Please see the security advisory for more information: + https://github.com/Pylons/waitress/security/advisories/GHSA-4ppp-gpcr-7qf6 diff --git a/setup.py b/setup.py index 96846d13..15e11d5b 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ setup( name="waitress", - version="1.4.0", + version="1.4.1", author="Zope Foundation and Contributors", author_email="zope-dev@zope.org", maintainer="Pylons Project", diff --git a/waitress/parser.py b/waitress/parser.py index dd591f2d..8b07dd6a 100644 --- a/waitress/parser.py +++ b/waitress/parser.py @@ -29,6 +29,7 @@ ServerNotImplemented, find_double_newline, ) +from .rfc7230 import HEADER_FIELD class ParsingError(Exception): @@ -38,7 +39,6 @@ class ParsingError(Exception): class TransferEncodingNotImplemented(Exception): pass - class HTTPRequestParser(object): """A structure that collects the HTTP request. @@ -208,26 +208,27 @@ def parse_header(self, header_plus): headers = self.headers for line in lines: - index = line.find(b":") - if index > 0: - key = line[:index] - - if key != key.strip(): - raise ParsingError("Invalid whitespace after field-name") - - if b"_" in key: - continue - value = line[index + 1 :].strip() - key1 = tostr(key.upper().replace(b"-", b"_")) - # If a header already exists, we append subsequent values - # seperated by a comma. Applications already need to handle - # the comma seperated values, as HTTP front ends might do - # the concatenation for you (behavior specified in RFC2616). - try: - headers[key1] += tostr(b", " + value) - except KeyError: - headers[key1] = tostr(value) - # else there's garbage in the headers? + header = HEADER_FIELD.match(line) + + if not header: + raise ParsingError("Invalid header") + + key, value = header.group('name', 'value') + + if b"_" in key: + # TODO(xistence): Should we drop this request instead? + continue + + value = value.strip() + key1 = tostr(key.upper().replace(b"-", b"_")) + # If a header already exists, we append subsequent values + # seperated by a comma. Applications already need to handle + # the comma seperated values, as HTTP front ends might do + # the concatenation for you (behavior specified in RFC2616). + try: + headers[key1] += tostr(b", " + value) + except KeyError: + headers[key1] = tostr(value) # command, uri, version will be bytes command, uri, version = crack_first_line(first_line) @@ -352,6 +353,9 @@ def get_header_lines(header): r = [] lines = header.split(b"\r\n") for line in lines: + if not line: + continue + if b"\r" in line or b"\n" in line: raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line)) diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py new file mode 100644 index 00000000..a9f047c8 --- /dev/null +++ b/waitress/rfc7230.py @@ -0,0 +1,44 @@ +""" +This contains a bunch of RFC7230 definitions and regular expressions that are +needed to properly parse HTTP messages. +""" + +import re + +from .compat import tobytes + +WS = "[ \t]" +OWS = WS + "{0,}?" +RWS = WS + "{1,}?" +BWS = OWS + +# RFC 7230 Section 3.2.6 "Field Value Components": +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# obs-text = %x80-FF +TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]" +OBS_TEXT = r"\x80-\xff" + +TOKEN = TCHAR + "{1,}" + +# RFC 5234 Appendix B.1 "Core Rules": +# VCHAR = %x21-7E +# ; visible (printing) characters +VCHAR = r"\x21-\x7e" + +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text + +FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]" +FIELD_CONTENT = FIELD_VCHAR + "(" + RWS + FIELD_VCHAR + "){0,}" +FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}" + +HEADER_FIELD = re.compile( + tobytes( + "^(?P" + TOKEN + "):" + OWS + "(?P" + FIELD_VALUE + ")" + OWS + "$" + ) +) diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py index 1a95e23e..8d42600b 100644 --- a/waitress/tests/test_parser.py +++ b/waitress/tests/test_parser.py @@ -308,10 +308,93 @@ def test_parse_header_invalid_whitespace(self): try: self.parser.parse_header(data) except ParsingError as e: - self.assertIn("Invalid whitespace after field-name", e.args[0]) + self.assertIn("Invalid header", e.args[0]) else: # pragma: nocover self.assertTrue(False) + def test_parse_header_invalid_whitespace_vtab(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo:\x0bbar\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_no_colon(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nnotvalid\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_folding_spacing(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\t\x0bbaz\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_chars(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_empty(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nempty:\r\n" + self.parser.parse_header(data) + + self.assertIn("EMPTY", self.parser.headers) + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["EMPTY"], "") + self.assertEqual(self.parser.headers["FOO"], "bar") + + def test_parse_header_multiple_values(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever, more, please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + def test_parse_header_multiple_values_header_folded(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more, please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + def test_parse_header_multiple_values_header_folded_multiple(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more\r\nfoo: please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + class Test_split_uri(unittest.TestCase): def _callFUT(self, uri): diff --git a/waitress/utilities.py b/waitress/utilities.py index 03368974..556bed20 100644 --- a/waitress/utilities.py +++ b/waitress/utilities.py @@ -22,6 +22,8 @@ import stat import time +from .rfc7230 import OBS_TEXT, VCHAR + logger = logging.getLogger("waitress") queue_logger = logging.getLogger("waitress.queue") @@ -63,6 +65,7 @@ def group(s): long_day_reg = group(join(long_days, "|")) daymap = {} + for i in range(7): daymap[short_days[i]] = i daymap[long_days[i]] = i @@ -85,6 +88,7 @@ def group(s): ] monmap = {} + for i in range(12): monmap[months[i]] = i + 1 @@ -113,6 +117,7 @@ def group(s): def unpack_rfc822(m): g = m.group + return ( int(g(4)), # year monmap[g(3)], # month @@ -142,8 +147,10 @@ def unpack_rfc822(m): def unpack_rfc850(m): g = m.group yr = g(4) + if len(yr) == 2: yr = "19" + yr + return ( int(yr), # year monmap[g(3)], # month @@ -180,6 +187,7 @@ def unpack_rfc850(m): def build_http_date(when): year, month, day, hh, mm, ss, wd, y, z = time.gmtime(when) + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( weekdayname[wd], day, @@ -194,28 +202,31 @@ def build_http_date(when): def parse_http_date(d): d = d.lower() m = rfc850_reg.match(d) + if m and m.end() == len(d): retval = int(calendar.timegm(unpack_rfc850(m))) else: m = rfc822_reg.match(d) + if m and m.end() == len(d): retval = int(calendar.timegm(unpack_rfc822(m))) else: return 0 + return retval # RFC 5234 Appendix B.1 "Core Rules": # VCHAR = %x21-7E # ; visible (printing) characters -vchar_re = "\x21-\x7e" +vchar_re = VCHAR # RFC 7230 Section 3.2.6 "Field Value Components": # quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE # qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text # obs-text = %x80-FF # quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) -obs_text_re = "\x80-\xff" +obs_text_re = OBS_TEXT # The '\\' between \x5b and \x5d is needed to escape \x5d (']') qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"