Skip to content

Commit

Permalink
Merge pull request from GHSA-m5ff-3wj3-8ph4
Browse files Browse the repository at this point in the history
HTTP header-field stricter validation
  • Loading branch information
digitalresistor authored Dec 24, 2019
2 parents f11093a + a046a76 commit 11d9e13
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 25 deletions.
27 changes: 27 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
1.4.1 (2019-12-??)
------------------

Security Fixes
~~~~~~~~~~~~~~

- Waitress did not properly validate that the HTTP headers it received were
properly formed, thereby potentially allowing a front-end server to treat a
request different from Waitress. This could lead to HTTP request
smuggling/splitting.

Please see the security advisory for more information:
https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4

1.4.0 (2019-12-20)
------------------

Expand Down Expand Up @@ -36,6 +50,11 @@ Security Fixes
For more information I can highly recommend the blog post by ZeddYu Lu
https://blog.zeddyu.info/2019/12/08/HTTP-Smuggling-en/

Please see the security advisory for more information:
https://github.com/Pylons/waitress/security/advisories/GHSA-pg36-wpm5-g57p

CVE-ID: CVE-2019-16785

- Waitress used to treat LF the same as CRLF in ``Transfer-Encoding: chunked``
requests, while the maintainer doesn't believe this could lead to a security
issue, this is no longer supported and all chunks are now validated to be
Expand All @@ -61,6 +80,11 @@ Security Fixes
``Transfer-Encoding: chunked`` instead of ``Transfer-Encoding: identity,
chunked``.

PLease see the security advisory for more information:
https://github.com/Pylons/waitress/security/advisories/GHSA-g2xc-35jw-c63p

CVE-ID: CVE-2019-16786

- While validating the ``Transfer-Encoding`` header, Waitress now properly
handles line-folded ``Transfer-Encoding`` headers or those that contain
multiple comma seperated values. This closes a potential issue where a
Expand All @@ -75,3 +99,6 @@ Security Fixes
for a potential request to be split and treated as two requests by HTTP
pipelining support in Waitress. If Waitress is now unable to parse the
Content-Length header, a 400 Bad Request is sent back to the client.

Please see the security advisory for more information:
https://github.com/Pylons/waitress/security/advisories/GHSA-4ppp-gpcr-7qf6
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

setup(
name="waitress",
version="1.4.0",
version="1.4.1",
author="Zope Foundation and Contributors",
author_email="[email protected]",
maintainer="Pylons Project",
Expand Down
46 changes: 25 additions & 21 deletions waitress/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ServerNotImplemented,
find_double_newline,
)
from .rfc7230 import HEADER_FIELD


class ParsingError(Exception):
Expand All @@ -38,7 +39,6 @@ class ParsingError(Exception):
class TransferEncodingNotImplemented(Exception):
pass


class HTTPRequestParser(object):
"""A structure that collects the HTTP request.
Expand Down Expand Up @@ -208,26 +208,27 @@ def parse_header(self, header_plus):

headers = self.headers
for line in lines:
index = line.find(b":")
if index > 0:
key = line[:index]

if key != key.strip():
raise ParsingError("Invalid whitespace after field-name")

if b"_" in key:
continue
value = line[index + 1 :].strip()
key1 = tostr(key.upper().replace(b"-", b"_"))
# If a header already exists, we append subsequent values
# seperated by a comma. Applications already need to handle
# the comma seperated values, as HTTP front ends might do
# the concatenation for you (behavior specified in RFC2616).
try:
headers[key1] += tostr(b", " + value)
except KeyError:
headers[key1] = tostr(value)
# else there's garbage in the headers?
header = HEADER_FIELD.match(line)

if not header:
raise ParsingError("Invalid header")

key, value = header.group('name', 'value')

if b"_" in key:
# TODO(xistence): Should we drop this request instead?
continue

value = value.strip()
key1 = tostr(key.upper().replace(b"-", b"_"))
# If a header already exists, we append subsequent values
# seperated by a comma. Applications already need to handle
# the comma seperated values, as HTTP front ends might do
# the concatenation for you (behavior specified in RFC2616).
try:
headers[key1] += tostr(b", " + value)
except KeyError:
headers[key1] = tostr(value)

# command, uri, version will be bytes
command, uri, version = crack_first_line(first_line)
Expand Down Expand Up @@ -352,6 +353,9 @@ def get_header_lines(header):
r = []
lines = header.split(b"\r\n")
for line in lines:
if not line:
continue

if b"\r" in line or b"\n" in line:
raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))

Expand Down
44 changes: 44 additions & 0 deletions waitress/rfc7230.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
This contains a bunch of RFC7230 definitions and regular expressions that are
needed to properly parse HTTP messages.
"""

import re

from .compat import tobytes

WS = "[ \t]"
OWS = WS + "{0,}?"
RWS = WS + "{1,}?"
BWS = OWS

# RFC 7230 Section 3.2.6 "Field Value Components":
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# / DIGIT / ALPHA
# obs-text = %x80-FF
TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]"
OBS_TEXT = r"\x80-\xff"

TOKEN = TCHAR + "{1,}"

# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"

# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text

FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
FIELD_CONTENT = FIELD_VCHAR + "(" + RWS + FIELD_VCHAR + "){0,}"
FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"

HEADER_FIELD = re.compile(
tobytes(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
)
)
85 changes: 84 additions & 1 deletion waitress/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,93 @@ def test_parse_header_invalid_whitespace(self):
try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Invalid whitespace after field-name", e.args[0])
self.assertIn("Invalid header", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_invalid_whitespace_vtab(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo:\x0bbar\r\n"
try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Invalid header", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_invalid_no_colon(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nnotvalid\r\n"
try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Invalid header", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_invalid_folding_spacing(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\t\x0bbaz\r\n"
try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Invalid header", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_invalid_chars(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n"
try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Invalid header", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_empty(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nempty:\r\n"
self.parser.parse_header(data)

self.assertIn("EMPTY", self.parser.headers)
self.assertIn("FOO", self.parser.headers)
self.assertEqual(self.parser.headers["EMPTY"], "")
self.assertEqual(self.parser.headers["FOO"], "bar")

def test_parse_header_multiple_values(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever, more, please, yes\r\n"
self.parser.parse_header(data)

self.assertIn("FOO", self.parser.headers)
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")

def test_parse_header_multiple_values_header_folded(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more, please, yes\r\n"
self.parser.parse_header(data)

self.assertIn("FOO", self.parser.headers)
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")

def test_parse_header_multiple_values_header_folded_multiple(self):
from waitress.parser import ParsingError

data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more\r\nfoo: please, yes\r\n"
self.parser.parse_header(data)

self.assertIn("FOO", self.parser.headers)
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")



class Test_split_uri(unittest.TestCase):
def _callFUT(self, uri):
Expand Down
15 changes: 13 additions & 2 deletions waitress/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import stat
import time

from .rfc7230 import OBS_TEXT, VCHAR

logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")

Expand Down Expand Up @@ -63,6 +65,7 @@ def group(s):
long_day_reg = group(join(long_days, "|"))

daymap = {}

for i in range(7):
daymap[short_days[i]] = i
daymap[long_days[i]] = i
Expand All @@ -85,6 +88,7 @@ def group(s):
]

monmap = {}

for i in range(12):
monmap[months[i]] = i + 1

Expand Down Expand Up @@ -113,6 +117,7 @@ def group(s):

def unpack_rfc822(m):
g = m.group

return (
int(g(4)), # year
monmap[g(3)], # month
Expand Down Expand Up @@ -142,8 +147,10 @@ def unpack_rfc822(m):
def unpack_rfc850(m):
g = m.group
yr = g(4)

if len(yr) == 2:
yr = "19" + yr

return (
int(yr), # year
monmap[g(3)], # month
Expand Down Expand Up @@ -180,6 +187,7 @@ def unpack_rfc850(m):

def build_http_date(when):
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(when)

return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
weekdayname[wd],
day,
Expand All @@ -194,28 +202,31 @@ def build_http_date(when):
def parse_http_date(d):
d = d.lower()
m = rfc850_reg.match(d)

if m and m.end() == len(d):
retval = int(calendar.timegm(unpack_rfc850(m)))
else:
m = rfc822_reg.match(d)

if m and m.end() == len(d):
retval = int(calendar.timegm(unpack_rfc822(m)))
else:
return 0

return retval


# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
vchar_re = "\x21-\x7e"
vchar_re = VCHAR

# RFC 7230 Section 3.2.6 "Field Value Components":
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
# obs-text = %x80-FF
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
obs_text_re = "\x80-\xff"
obs_text_re = OBS_TEXT

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
Expand Down

0 comments on commit 11d9e13

Please sign in to comment.