From: Ben Darnell Date: Sun, 5 Jun 2011 20:10:07 +0000 (-0700) Subject: Refactor redundant code out of httpserver.py and wsgi.py to httputil.py X-Git-Tag: v2.0.0~31 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4686042faf43665f11a1838dc30ecd8d9e2aa1f6;p=thirdparty%2Ftornado.git Refactor redundant code out of httpserver.py and wsgi.py to httputil.py --- diff --git a/tornado/httpserver.py b/tornado/httpserver.py index 5c575c8be..04a7157b8 100644 --- a/tornado/httpserver.py +++ b/tornado/httpserver.py @@ -405,54 +405,15 @@ class HTTPConnection(object): for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary" and v: - self._parse_mime_body(utf8(v), data) + httputil.parse_multipart_form_data( + utf8(v), data, + self._request.arguments, + self._request.files) break else: logging.warning("Invalid multipart/form-data") self.request_callback(self._request) - def _parse_mime_body(self, boundary, data): - # The standard allows for the boundary to be quoted in the header, - # although it's rare (it happens at least for google app engine - # xmpp). I think we're also supposed to handle backslash-escapes - # here but I'll save that until we see a client that uses them - # in the wild. - if boundary.startswith(b('"')) and boundary.endswith(b('"')): - boundary = boundary[1:-1] - if data.endswith(b("\r\n")): - footer_length = len(boundary) + 6 - else: - footer_length = len(boundary) + 4 - parts = data[:-footer_length].split(b("--") + boundary + b("\r\n")) - for part in parts: - if not part: continue - eoh = part.find(b("\r\n\r\n")) - if eoh == -1: - logging.warning("multipart/form-data missing headers") - continue - headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8")) - name_header = headers.get("Content-Disposition", "") - if not name_header.startswith("form-data;") or \ - not part.endswith(b("\r\n")): - logging.warning("Invalid multipart/form-data") - continue - value = part[eoh + 4:-2] - name_values = {} - for name_part in name_header[10:].split(";"): - name, name_value = name_part.strip().split("=", 1) - name_values[name] = name_value.strip('"') - if not name_values.get("name"): - logging.warning("multipart/form-data value missing name") - continue - name = name_values["name"] - if name_values.get("filename"): - ctype = headers.get("Content-Type", "application/unknown") - self._request.files.setdefault(name, []).append(dict( - filename=name_values["filename"], body=value, - content_type=ctype)) - else: - self._request.arguments.setdefault(name, []).append(value) - class HTTPRequest(object): """A single HTTP request. diff --git a/tornado/httputil.py b/tornado/httputil.py index 04a21fbc0..97d796b83 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -19,6 +19,8 @@ import urllib import re +from tornado.util import b + class HTTPHeaders(dict): """A dictionary that maintains Http-Header-Case for all keys. @@ -161,6 +163,54 @@ def url_concat(url, args): url += '&' if ('?' in url) else '?' return url + urllib.urlencode(args) +def parse_multipart_form_data(boundary, data, arguments, files): + """Parses a multipart/form-data body. + + The boundary and data parameters are both byte strings. + The dictionaries given in the arguments and files parameters + will be updated with the contents of the body. + """ + # The standard allows for the boundary to be quoted in the header, + # although it's rare (it happens at least for google app engine + # xmpp). I think we're also supposed to handle backslash-escapes + # here but I'll save that until we see a client that uses them + # in the wild. + if boundary.startswith(b('"')) and boundary.endswith(b('"')): + boundary = boundary[1:-1] + if data.endswith(b("\r\n")): + footer_length = len(boundary) + 6 + else: + footer_length = len(boundary) + 4 + parts = data[:-footer_length].split(b("--") + boundary + b("\r\n")) + for part in parts: + if not part: continue + eoh = part.find(b("\r\n\r\n")) + if eoh == -1: + logging.warning("multipart/form-data missing headers") + continue + headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) + name_header = headers.get("Content-Disposition", "") + if not name_header.startswith("form-data;") or \ + not part.endswith(b("\r\n")): + logging.warning("Invalid multipart/form-data") + continue + value = part[eoh + 4:-2] + name_values = {} + for name_part in name_header[10:].split(";"): + name, name_value = name_part.strip().split("=", 1) + name_values[name] = name_value.strip('"') + if not name_values.get("name"): + logging.warning("multipart/form-data value missing name") + continue + name = name_values["name"] + if name_values.get("filename"): + ctype = headers.get("Content-Type", "application/unknown") + files.setdefault(name, []).append(dict( + filename=name_values["filename"], body=value, + content_type=ctype)) + else: + arguments.setdefault(name, []).append(value) + def doctests(): import doctest diff --git a/tornado/wsgi.py b/tornado/wsgi.py index 20ec8975c..e500dfea9 100644 --- a/tornado/wsgi.py +++ b/tornado/wsgi.py @@ -139,7 +139,9 @@ class HTTPRequest(object): elif content_type.startswith("multipart/form-data"): if 'boundary=' in content_type: boundary = content_type.split('boundary=',1)[1] - if boundary: self._parse_mime_body(utf8(boundary)) + if boundary: + httputil.parse_multipart_form_data( + utf8(boundary), self.body, self.arguments, self.files) else: logging.warning("Invalid multipart/form-data") @@ -161,43 +163,6 @@ class HTTPRequest(object): else: return self._finish_time - self._start_time - def _parse_mime_body(self, boundary): - if boundary.startswith(b('"')) and boundary.endswith(b('"')): - boundary = boundary[1:-1] - if self.body.endswith(b("\r\n")): - footer_length = len(boundary) + 6 - else: - footer_length = len(boundary) + 4 - parts = self.body[:-footer_length].split(b("--") + boundary + b("\r\n")) - for part in parts: - if not part: continue - eoh = part.find(b("\r\n\r\n")) - if eoh == -1: - logging.warning("multipart/form-data missing headers") - continue - headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8")) - name_header = headers.get("Content-Disposition", "") - if not name_header.startswith("form-data;") or \ - not part.endswith(b("\r\n")): - logging.warning("Invalid multipart/form-data") - continue - value = part[eoh + 4:-2] - name_values = {} - for name_part in name_header[10:].split(";"): - name, name_value = name_part.strip().split("=", 1) - name_values[name] = name_value.strip('"') - if not name_values.get("name"): - logging.warning("multipart/form-data value missing name") - continue - name = name_values["name"] - if name_values.get("filename"): - ctype = headers.get("Content-Type", "application/unknown") - self.files.setdefault(name, []).append(dict( - filename=name_values["filename"], body=value, - content_type=ctype)) - else: - self.arguments.setdefault(name, []).append(value) - class WSGIContainer(object): """Makes a WSGI-compatible function runnable on Tornado's HTTP server.