Improve parsing of multipart/form-data headers.

author Ben Darnell <bdarnell@beaker.local>

Mon, 31 May 2010 05:16:11 +0000 (22:16 -0700)

committer Ben Darnell <bdarnell@beaker.local>

Mon, 31 May 2010 06:38:50 +0000 (23:38 -0700)
author Ben Darnell <bdarnell@beaker.local>
Mon, 31 May 2010 05:16:11 +0000 (22:16 -0700)
committer Ben Darnell <bdarnell@beaker.local>
Mon, 31 May 2010 06:38:50 +0000 (23:38 -0700)
diff --git a/tornado/httpserver.py b/tornado/httpserver.py

index 70e23c20a2aea91ba56cf06a29d62abba8366bfd..6f448aded6f835da004a879c468f15eeb7455cd4 100644 (file)
--- a/tornado/httpserver.py
+++ b/tornado/httpserver.py
@@ -305,11 +305,21 @@ class HTTPConnection(object):
                          self._request.arguments.setdefault(name, []).extend(
                              values)
              elif content_type.startswith("multipart/form-data"):
-                boundary = content_type[30:]
-                if boundary: self._parse_mime_body(boundary, data)
+                if 'boundary=' in content_type:
+                    boundary = content_type.split('boundary=',1)[1]
+                    if boundary: self._parse_mime_body(boundary, data)
+                else:
+                    logging.warning("Invalid multipart/form-data")
          self.request_callback(self._request)
  
      def _parse_mime_body(self, boundary, data):
+        # The standard allows for the boundary to be quoted in the header,
+        # although it's rare (it happens at least for google app engine
+        # xmpp).  I think we're also supposed to handle backslash-escapes
+        # here but I'll save that until we see a client that uses them
+        # in the wild.
+        if boundary.startswith('"') and boundary.endswith('"'):
+            boundary = boundary[1:-1]
          if data.endswith("\r\n"):
              footer_length = len(boundary) + 6
          else:
diff --git a/tornado/wsgi.py b/tornado/wsgi.py

index c14bab9ca49c792d0f5d85ae3babc51ad96f704f..181429cb32310c900bd05e2a8396dde34aa146e7 100644 (file)
--- a/tornado/wsgi.py
+++ b/tornado/wsgi.py
@@ -126,8 +126,11 @@ class HTTPRequest(object):
              for name, values in cgi.parse_qs(self.body).iteritems():
                  self.arguments.setdefault(name, []).extend(values)
          elif content_type.startswith("multipart/form-data"):
-            boundary = content_type[30:]
-            if boundary: self._parse_mime_body(boundary)
+            if 'boundary=' in content_type:
+                boundary = content_type.split('boundary=',1)[1]
+                if boundary: self._parse_mime_body(boundary)
+            else:
+                logging.warning("Invalid multipart/form-data")
  
          self._start_time = time.time()
          self._finish_time = None
@@ -148,6 +151,8 @@ class HTTPRequest(object):
              return self._finish_time - self._start_time
  
      def _parse_mime_body(self, boundary):
+        if boundary.startswith('"') and boundary.endswith('"'):
+            boundary = boundary[1:-1]
          if self.body.endswith("\r\n"):
              footer_length = len(boundary) + 6
          else:
author	Ben Darnell <bdarnell@beaker.local>
	Mon, 31 May 2010 05:16:11 +0000 (22:16 -0700)
committer	Ben Darnell <bdarnell@beaker.local>
	Mon, 31 May 2010 06:38:50 +0000 (23:38 -0700)
tornado/httpserver.py		patch \| blob \| blame \| history
tornado/wsgi.py		patch \| blob \| blame \| history