From: Ben Darnell <ben@bendarnell.com>
Date: Sun, 5 Jun 2011 20:10:07 +0000 (-0700)
Subject: Refactor redundant code out of httpserver.py and wsgi.py to httputil.py
X-Git-Tag: v2.0.0~31
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4686042faf43665f11a1838dc30ecd8d9e2aa1f6;p=thirdparty%2Ftornado.git

Refactor redundant code out of httpserver.py and wsgi.py to httputil.py
---

diff --git a/tornado/httpserver.py b/tornado/httpserver.py
index 5c575c8be..04a7157b8 100644
--- a/tornado/httpserver.py
+++ b/tornado/httpserver.py
@@ -405,54 +405,15 @@ class HTTPConnection(object):
                 for field in fields:
                     k, sep, v = field.strip().partition("=")
                     if k == "boundary" and v:
-                        self._parse_mime_body(utf8(v), data)
+                        httputil.parse_multipart_form_data(
+                            utf8(v), data,
+                            self._request.arguments,
+                            self._request.files)
                         break
                 else:
                     logging.warning("Invalid multipart/form-data")
         self.request_callback(self._request)
 
-    def _parse_mime_body(self, boundary, data):
-        # The standard allows for the boundary to be quoted in the header,
-        # although it's rare (it happens at least for google app engine
-        # xmpp).  I think we're also supposed to handle backslash-escapes
-        # here but I'll save that until we see a client that uses them
-        # in the wild.
-        if boundary.startswith(b('"')) and boundary.endswith(b('"')):
-            boundary = boundary[1:-1]
-        if data.endswith(b("\r\n")):
-            footer_length = len(boundary) + 6
-        else:
-            footer_length = len(boundary) + 4
-        parts = data[:-footer_length].split(b("--") + boundary + b("\r\n"))
-        for part in parts:
-            if not part: continue
-            eoh = part.find(b("\r\n\r\n"))
-            if eoh == -1:
-                logging.warning("multipart/form-data missing headers")
-                continue
-            headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8"))
-            name_header = headers.get("Content-Disposition", "")
-            if not name_header.startswith("form-data;") or \
-               not part.endswith(b("\r\n")):
-                logging.warning("Invalid multipart/form-data")
-                continue
-            value = part[eoh + 4:-2]
-            name_values = {}
-            for name_part in name_header[10:].split(";"):
-                name, name_value = name_part.strip().split("=", 1)
-                name_values[name] = name_value.strip('"')
-            if not name_values.get("name"):
-                logging.warning("multipart/form-data value missing name")
-                continue
-            name = name_values["name"]
-            if name_values.get("filename"):
-                ctype = headers.get("Content-Type", "application/unknown")
-                self._request.files.setdefault(name, []).append(dict(
-                    filename=name_values["filename"], body=value,
-                    content_type=ctype))
-            else:
-                self._request.arguments.setdefault(name, []).append(value)
-
 
 class HTTPRequest(object):
     """A single HTTP request.
diff --git a/tornado/httputil.py b/tornado/httputil.py
index 04a21fbc0..97d796b83 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -19,6 +19,8 @@
 import urllib
 import re
 
+from tornado.util import b
+
 class HTTPHeaders(dict):
     """A dictionary that maintains Http-Header-Case for all keys.
 
@@ -161,6 +163,54 @@ def url_concat(url, args):
         url += '&' if ('?' in url) else '?'
     return url + urllib.urlencode(args)
 
+def parse_multipart_form_data(boundary, data, arguments, files):
+    """Parses a multipart/form-data body.
+
+    The boundary and data parameters are both byte strings.
+    The dictionaries given in the arguments and files parameters
+    will be updated with the contents of the body.
+    """
+    # The standard allows for the boundary to be quoted in the header,
+    # although it's rare (it happens at least for google app engine
+    # xmpp).  I think we're also supposed to handle backslash-escapes
+    # here but I'll save that until we see a client that uses them
+    # in the wild.
+    if boundary.startswith(b('"')) and boundary.endswith(b('"')):
+        boundary = boundary[1:-1]
+    if data.endswith(b("\r\n")):
+        footer_length = len(boundary) + 6
+    else:
+        footer_length = len(boundary) + 4
+    parts = data[:-footer_length].split(b("--") + boundary + b("\r\n"))
+    for part in parts:
+        if not part: continue
+        eoh = part.find(b("\r\n\r\n"))
+        if eoh == -1:
+            logging.warning("multipart/form-data missing headers")
+            continue
+        headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
+        name_header = headers.get("Content-Disposition", "")
+        if not name_header.startswith("form-data;") or \
+           not part.endswith(b("\r\n")):
+            logging.warning("Invalid multipart/form-data")
+            continue
+        value = part[eoh + 4:-2]
+        name_values = {}
+        for name_part in name_header[10:].split(";"):
+            name, name_value = name_part.strip().split("=", 1)
+            name_values[name] = name_value.strip('"')
+        if not name_values.get("name"):
+            logging.warning("multipart/form-data value missing name")
+            continue
+        name = name_values["name"]
+        if name_values.get("filename"):
+            ctype = headers.get("Content-Type", "application/unknown")
+            files.setdefault(name, []).append(dict(
+                filename=name_values["filename"], body=value,
+                content_type=ctype))
+        else:
+            arguments.setdefault(name, []).append(value)
+
 
 def doctests():
     import doctest
diff --git a/tornado/wsgi.py b/tornado/wsgi.py
index 20ec8975c..e500dfea9 100644
--- a/tornado/wsgi.py
+++ b/tornado/wsgi.py
@@ -139,7 +139,9 @@ class HTTPRequest(object):
         elif content_type.startswith("multipart/form-data"):
             if 'boundary=' in content_type:
                 boundary = content_type.split('boundary=',1)[1]
-                if boundary: self._parse_mime_body(utf8(boundary))
+                if boundary:
+                    httputil.parse_multipart_form_data(
+                        utf8(boundary), self.body, self.arguments, self.files)
             else:
                 logging.warning("Invalid multipart/form-data")
 
@@ -161,43 +163,6 @@ class HTTPRequest(object):
         else:
             return self._finish_time - self._start_time
 
-    def _parse_mime_body(self, boundary):
-        if boundary.startswith(b('"')) and boundary.endswith(b('"')):
-            boundary = boundary[1:-1]
-        if self.body.endswith(b("\r\n")):
-            footer_length = len(boundary) + 6
-        else:
-            footer_length = len(boundary) + 4
-        parts = self.body[:-footer_length].split(b("--") + boundary + b("\r\n"))
-        for part in parts:
-            if not part: continue
-            eoh = part.find(b("\r\n\r\n"))
-            if eoh == -1:
-                logging.warning("multipart/form-data missing headers")
-                continue
-            headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8"))
-            name_header = headers.get("Content-Disposition", "")
-            if not name_header.startswith("form-data;") or \
-               not part.endswith(b("\r\n")):
-                logging.warning("Invalid multipart/form-data")
-                continue
-            value = part[eoh + 4:-2]
-            name_values = {}
-            for name_part in name_header[10:].split(";"):
-                name, name_value = name_part.strip().split("=", 1)
-                name_values[name] = name_value.strip('"')
-            if not name_values.get("name"):
-                logging.warning("multipart/form-data value missing name")
-                continue
-            name = name_values["name"]
-            if name_values.get("filename"):
-                ctype = headers.get("Content-Type", "application/unknown")
-                self.files.setdefault(name, []).append(dict(
-                    filename=name_values["filename"], body=value,
-                    content_type=ctype))
-            else:
-                self.arguments.setdefault(name, []).append(value)
-
 
 class WSGIContainer(object):
     """Makes a WSGI-compatible function runnable on Tornado's HTTP server.