Refactor redundant code out of httpserver.py and wsgi.py to httputil.py

author Ben Darnell <ben@bendarnell.com>

Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)

committer Ben Darnell <ben@bendarnell.com>

Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)
author Ben Darnell <ben@bendarnell.com>
Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)
committer Ben Darnell <ben@bendarnell.com>
Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)
diff --git a/tornado/httpserver.py b/tornado/httpserver.py

index 5c575c8beb6849c310b959eea3ccfc9829694818..04a7157b8553d2e8a873d08d16f89c5120d92df7 100644 (file)
--- a/tornado/httpserver.py
+++ b/tornado/httpserver.py
@@ -405,54 +405,15 @@ class HTTPConnection(object):
                  for field in fields:
                      k, sep, v = field.strip().partition("=")
                      if k == "boundary" and v:
-                        self._parse_mime_body(utf8(v), data)
+                        httputil.parse_multipart_form_data(
+                            utf8(v), data,
+                            self._request.arguments,
+                            self._request.files)
                          break
                  else:
                      logging.warning("Invalid multipart/form-data")
          self.request_callback(self._request)
  
-    def _parse_mime_body(self, boundary, data):
-        # The standard allows for the boundary to be quoted in the header,
-        # although it's rare (it happens at least for google app engine
-        # xmpp).  I think we're also supposed to handle backslash-escapes
-        # here but I'll save that until we see a client that uses them
-        # in the wild.
-        if boundary.startswith(b('"')) and boundary.endswith(b('"')):
-            boundary = boundary[1:-1]
-        if data.endswith(b("\r\n")):
-            footer_length = len(boundary) + 6
-        else:
-            footer_length = len(boundary) + 4
-        parts = data[:-footer_length].split(b("--") + boundary + b("\r\n"))
-        for part in parts:
-            if not part: continue
-            eoh = part.find(b("\r\n\r\n"))
-            if eoh == -1:
-                logging.warning("multipart/form-data missing headers")
-                continue
-            headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8"))
-            name_header = headers.get("Content-Disposition", "")
-            if not name_header.startswith("form-data;") or \
-               not part.endswith(b("\r\n")):
-                logging.warning("Invalid multipart/form-data")
-                continue
-            value = part[eoh + 4:-2]
-            name_values = {}
-            for name_part in name_header[10:].split(";"):
-                name, name_value = name_part.strip().split("=", 1)
-                name_values[name] = name_value.strip('"')
-            if not name_values.get("name"):
-                logging.warning("multipart/form-data value missing name")
-                continue
-            name = name_values["name"]
-            if name_values.get("filename"):
-                ctype = headers.get("Content-Type", "application/unknown")
-                self._request.files.setdefault(name, []).append(dict(
-                    filename=name_values["filename"], body=value,
-                    content_type=ctype))
-            else:
-                self._request.arguments.setdefault(name, []).append(value)
-
  
  class HTTPRequest(object):
      """A single HTTP request.
diff --git a/tornado/httputil.py b/tornado/httputil.py

index 04a21fbc0276a6700b4cba5afd38b576d7cbf8e0..97d796b83ab1aa2b4fc14b438f3905e89507fc54 100644 (file)
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -19,6 +19,8 @@
  import urllib
  import re
  
+from tornado.util import b
+
  class HTTPHeaders(dict):
      """A dictionary that maintains Http-Header-Case for all keys.
  
@@ -161,6 +163,54 @@ def url_concat(url, args):
          url += '&' if ('?' in url) else '?'
      return url + urllib.urlencode(args)
  
+def parse_multipart_form_data(boundary, data, arguments, files):
+    """Parses a multipart/form-data body.
+
+    The boundary and data parameters are both byte strings.
+    The dictionaries given in the arguments and files parameters
+    will be updated with the contents of the body.
+    """
+    # The standard allows for the boundary to be quoted in the header,
+    # although it's rare (it happens at least for google app engine
+    # xmpp).  I think we're also supposed to handle backslash-escapes
+    # here but I'll save that until we see a client that uses them
+    # in the wild.
+    if boundary.startswith(b('"')) and boundary.endswith(b('"')):
+        boundary = boundary[1:-1]
+    if data.endswith(b("\r\n")):
+        footer_length = len(boundary) + 6
+    else:
+        footer_length = len(boundary) + 4
+    parts = data[:-footer_length].split(b("--") + boundary + b("\r\n"))
+    for part in parts:
+        if not part: continue
+        eoh = part.find(b("\r\n\r\n"))
+        if eoh == -1:
+            logging.warning("multipart/form-data missing headers")
+            continue
+        headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
+        name_header = headers.get("Content-Disposition", "")
+        if not name_header.startswith("form-data;") or \
+           not part.endswith(b("\r\n")):
+            logging.warning("Invalid multipart/form-data")
+            continue
+        value = part[eoh + 4:-2]
+        name_values = {}
+        for name_part in name_header[10:].split(";"):
+            name, name_value = name_part.strip().split("=", 1)
+            name_values[name] = name_value.strip('"')
+        if not name_values.get("name"):
+            logging.warning("multipart/form-data value missing name")
+            continue
+        name = name_values["name"]
+        if name_values.get("filename"):
+            ctype = headers.get("Content-Type", "application/unknown")
+            files.setdefault(name, []).append(dict(
+                filename=name_values["filename"], body=value,
+                content_type=ctype))
+        else:
+            arguments.setdefault(name, []).append(value)
+
  
  def doctests():
      import doctest
diff --git a/tornado/wsgi.py b/tornado/wsgi.py

index 20ec8975c1d9c8c570cb24610aa8591993924cf2..e500dfea9d526cbe1463ec77d28cdfc6b2cf284f 100644 (file)
--- a/tornado/wsgi.py
+++ b/tornado/wsgi.py
@@ -139,7 +139,9 @@ class HTTPRequest(object):
          elif content_type.startswith("multipart/form-data"):
              if 'boundary=' in content_type:
                  boundary = content_type.split('boundary=',1)[1]
-                if boundary: self._parse_mime_body(utf8(boundary))
+                if boundary:
+                    httputil.parse_multipart_form_data(
+                        utf8(boundary), self.body, self.arguments, self.files)
              else:
                  logging.warning("Invalid multipart/form-data")
  
@@ -161,43 +163,6 @@ class HTTPRequest(object):
          else:
              return self._finish_time - self._start_time
  
-    def _parse_mime_body(self, boundary):
-        if boundary.startswith(b('"')) and boundary.endswith(b('"')):
-            boundary = boundary[1:-1]
-        if self.body.endswith(b("\r\n")):
-            footer_length = len(boundary) + 6
-        else:
-            footer_length = len(boundary) + 4
-        parts = self.body[:-footer_length].split(b("--") + boundary + b("\r\n"))
-        for part in parts:
-            if not part: continue
-            eoh = part.find(b("\r\n\r\n"))
-            if eoh == -1:
-                logging.warning("multipart/form-data missing headers")
-                continue
-            headers = httputil.HTTPHeaders.parse(part[:eoh].decode("utf-8"))
-            name_header = headers.get("Content-Disposition", "")
-            if not name_header.startswith("form-data;") or \
-               not part.endswith(b("\r\n")):
-                logging.warning("Invalid multipart/form-data")
-                continue
-            value = part[eoh + 4:-2]
-            name_values = {}
-            for name_part in name_header[10:].split(";"):
-                name, name_value = name_part.strip().split("=", 1)
-                name_values[name] = name_value.strip('"')
-            if not name_values.get("name"):
-                logging.warning("multipart/form-data value missing name")
-                continue
-            name = name_values["name"]
-            if name_values.get("filename"):
-                ctype = headers.get("Content-Type", "application/unknown")
-                self.files.setdefault(name, []).append(dict(
-                    filename=name_values["filename"], body=value,
-                    content_type=ctype))
-            else:
-                self.arguments.setdefault(name, []).append(value)
-
  
  class WSGIContainer(object):
      """Makes a WSGI-compatible function runnable on Tornado's HTTP server.
author	Ben Darnell <ben@bendarnell.com>
	Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)
committer	Ben Darnell <ben@bendarnell.com>
	Sun, 5 Jun 2011 20:10:07 +0000 (13:10 -0700)
tornado/httpserver.py		patch \| blob \| blame \| history
tornado/httputil.py		patch \| blob \| blame \| history
tornado/wsgi.py		patch \| blob \| blame \| history