From: Ben Darnell Date: Mon, 31 May 2010 05:16:11 +0000 (-0700) Subject: Improve parsing of multipart/form-data headers. X-Git-Tag: v1.0.0~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b0684028a9ca38318ce5038fa611e7274576e22;p=thirdparty%2Ftornado.git Improve parsing of multipart/form-data headers. This change was motivated by google app engine's xmpp support, which uses different spacing in the header than other common clients and quotes its boundary string. Based on changes by jehiah: http://github.com/jehiah/tornado/commit/18cb45ca73859fa81883bd10c9cd8e051865096a http://github.com/jehiah/tornado/commit/9d67963466878550368b932746bb4a244a593905 --- diff --git a/tornado/httpserver.py b/tornado/httpserver.py index 70e23c20a..6f448aded 100644 --- a/tornado/httpserver.py +++ b/tornado/httpserver.py @@ -305,11 +305,21 @@ class HTTPConnection(object): self._request.arguments.setdefault(name, []).extend( values) elif content_type.startswith("multipart/form-data"): - boundary = content_type[30:] - if boundary: self._parse_mime_body(boundary, data) + if 'boundary=' in content_type: + boundary = content_type.split('boundary=',1)[1] + if boundary: self._parse_mime_body(boundary, data) + else: + logging.warning("Invalid multipart/form-data") self.request_callback(self._request) def _parse_mime_body(self, boundary, data): + # The standard allows for the boundary to be quoted in the header, + # although it's rare (it happens at least for google app engine + # xmpp). I think we're also supposed to handle backslash-escapes + # here but I'll save that until we see a client that uses them + # in the wild. + if boundary.startswith('"') and boundary.endswith('"'): + boundary = boundary[1:-1] if data.endswith("\r\n"): footer_length = len(boundary) + 6 else: diff --git a/tornado/wsgi.py b/tornado/wsgi.py index c14bab9ca..181429cb3 100644 --- a/tornado/wsgi.py +++ b/tornado/wsgi.py @@ -126,8 +126,11 @@ class HTTPRequest(object): for name, values in cgi.parse_qs(self.body).iteritems(): self.arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): - boundary = content_type[30:] - if boundary: self._parse_mime_body(boundary) + if 'boundary=' in content_type: + boundary = content_type.split('boundary=',1)[1] + if boundary: self._parse_mime_body(boundary) + else: + logging.warning("Invalid multipart/form-data") self._start_time = time.time() self._finish_time = None @@ -148,6 +151,8 @@ class HTTPRequest(object): return self._finish_time - self._start_time def _parse_mime_body(self, boundary): + if boundary.startswith('"') and boundary.endswith('"'): + boundary = boundary[1:-1] if self.body.endswith("\r\n"): footer_length = len(boundary) + 6 else: