]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #17214: Percent-encode non-ASCII bytes in redirect targets
authorMartin Panter <vadmium+py@gmail.com>
Mon, 16 May 2016 01:14:20 +0000 (01:14 +0000)
committerMartin Panter <vadmium+py@gmail.com>
Mon, 16 May 2016 01:14:20 +0000 (01:14 +0000)
Some servers send Location header fields with non-ASCII bytes, but "http.
client" requires the request target to be ASCII-encodable, otherwise a
UnicodeEncodeError is raised. Based on patch by Christian Heimes.

Python 2 does not suffer any problem because it allows non-ASCII bytes in the
HTTP request target.

Lib/test/test_urllib2.py
Lib/urllib/request.py
Misc/NEWS

index 58c30712877ff65b543538eaf3f584284808672f..eda7cccc6035a6b5d3742de67a521fb70895f4dc 100644 (file)
@@ -1224,6 +1224,41 @@ class HandlerTests(unittest.TestCase):
         fp = urllib.request.urlopen("http://python.org/path")
         self.assertEqual(fp.geturl(), "http://python.org/path?query")
 
+    def test_redirect_encoding(self):
+        # Some characters in the redirect target may need special handling,
+        # but most ASCII characters should be treated as already encoded
+        class Handler(urllib.request.HTTPHandler):
+            def http_open(self, req):
+                result = self.do_open(self.connection, req)
+                self.last_buf = self.connection.buf
+                # Set up a normal response for the next request
+                self.connection = test_urllib.fakehttp(
+                    b'HTTP/1.1 200 OK\r\n'
+                    b'Content-Length: 3\r\n'
+                    b'\r\n'
+                    b'123'
+                )
+                return result
+        handler = Handler()
+        opener = urllib.request.build_opener(handler)
+        tests = (
+            (b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
+            (b'/spaced%20path/', b'/spaced%20path/'),
+            (b'/spaced path/', b'/spaced%20path/'),
+            (b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
+        )
+        for [location, result] in tests:
+            with self.subTest(repr(location)):
+                handler.connection = test_urllib.fakehttp(
+                    b'HTTP/1.1 302 Redirect\r\n'
+                    b'Location: ' + location + b'\r\n'
+                    b'\r\n'
+                )
+                response = opener.open('http://example.com/')
+                expected = b'GET ' + result + b' '
+                request = handler.last_buf
+                self.assertTrue(request.startswith(expected), repr(request))
+
     def test_proxy(self):
         o = OpenerDirector()
         ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
index bbd2bdf685cb305518b596e758c6621baae6332f..1731fe3df10bc112f72c0cbd29908deaa64f73d7 100644 (file)
@@ -91,6 +91,7 @@ import os
 import posixpath
 import re
 import socket
+import string
 import sys
 import time
 import collections
@@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
         # from the user (of urllib.request, in this case).  In practice,
         # essentially all clients do redirect in this case, so we do
         # the same.
-        # be conciliant with URIs containing a space
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
+
         CONTENT_HEADERS = ("content-length", "content-type")
         newheaders = dict((k, v) for k, v in req.headers.items()
                           if k.lower() not in CONTENT_HEADERS)
@@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
             urlparts[2] = "/"
         newurl = urlunparse(urlparts)
 
+        # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
+        # original bytes and percent-encode non-ASCII bytes, and any special
+        # characters such as the space.
+        newurl = quote(
+            newurl, encoding="iso-8859-1", safe=string.punctuation)
         newurl = urljoin(req.full_url, newurl)
 
         # XXX Probably want to forget about the state of the current
index 9a147c407c53d8638cc813f56a0914c8a05033ee..0b9f6f0755cfe4175225eece4a15f6ca915e51c3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -121,6 +121,12 @@ Library
 - Issue #14132: Fix urllib.request redirect handling when the target only has
   a query string.  Original fix by Ján Janech.
 
+- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
+  bytes found in redirect target URLs.  Some servers send Location header
+  fields with non-ASCII bytes, but "http.client" requires the request target
+  to be ASCII-encodable, otherwise a UnicodeEncodeError is raised.  Based on
+  patch by Christian Heimes.
+
 - Issue #26892: Honor debuglevel flag in urllib.request.HTTPHandler. Patch
   contributed by Chi Hsuan Yen.