]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Fix Issue11703 - urllib2.geturl() does not return correct url when the original url...
authorSenthil Kumaran <orsenthil@gmail.com>
Tue, 12 Apr 2011 23:01:19 +0000 (07:01 +0800)
committerSenthil Kumaran <orsenthil@gmail.com>
Tue, 12 Apr 2011 23:01:19 +0000 (07:01 +0800)
Lib/test/test_urllib.py
Lib/test/test_urllib2.py
Lib/test/test_urllib2net.py
Lib/urllib/request.py

index 2b8852127bc7571edca5b18cb6cd5995622ba95f..462a2b03f83428dfc0b4321fa331470911e23e26 100644 (file)
@@ -171,6 +171,16 @@ class urlopen_HttpTests(unittest.TestCase):
         finally:
             self.unfakehttp()
 
+    def test_url_fragment(self):
+        # Issue #11703: geturl() omits fragments in the original URL.
+        url = 'http://docs.python.org/library/urllib.html#OK'
+        self.fakehttp(b'Hello!')
+        try:
+            fp = urllib.request.urlopen(url)
+            self.assertEqual(fp.geturl(), url)
+        finally:
+            self.unfakehttp()
+
     def test_read_bogus(self):
         # urlopen() should raise IOError for many error codes.
         self.fakehttp(b'''HTTP/1.1 401 Authentication Required
index 3fd7baa8ddc072a76561cfe6b556c57488061bef..62226b834ce0190f0100fb096cad5522bf592163 100644 (file)
@@ -1024,6 +1024,15 @@ class HandlerTests(unittest.TestCase):
         o.open("http://www.example.com/")
         self.assertFalse(hh.req.has_header("Cookie"))
 
+    def test_redirect_fragment(self):
+        redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
+        hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
+        hdeh = urllib.request.HTTPDefaultErrorHandler()
+        hrh = urllib.request.HTTPRedirectHandler()
+        o = build_test_opener(hh, hdeh, hrh)
+        fp = o.open('http://www.example.com')
+        self.assertEqual(fp.geturl(), redirected_url.strip())
+
     def test_proxy(self):
         o = OpenerDirector()
         ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
@@ -1339,12 +1348,16 @@ class RequestTests(unittest.TestCase):
         req = Request("<URL:http://www.python.org>")
         self.assertEqual("www.python.org", req.get_host())
 
-    def test_urlwith_fragment(self):
+    def test_url_fragment(self):
         req = Request("http://www.python.org/?qs=query#fragment=true")
         self.assertEqual("/?qs=query", req.get_selector())
         req = Request("http://www.python.org/#fun=true")
         self.assertEqual("/", req.get_selector())
 
+        # Issue 11703: geturl() omits fragment in the original URL.
+        url = 'http://docs.python.org/library/urllib2.html#OK'
+        req = Request(url)
+        self.assertEqual(req.get_full_url(), url)
 
 def test_main(verbose=None):
     from test import test_urllib2
index 63e25b4b6946213b21e0b7c57d9bd6f99b99c96a..e6c4ec17273bdec2145c7889cbd13bf5099eb377 100644 (file)
@@ -158,7 +158,7 @@ class OtherNetworkTests(unittest.TestCase):
             req = urllib.request.Request(urlwith_frag)
             res = urllib.request.urlopen(req)
             self.assertEqual(res.geturl(),
-                    "http://docs.python.org/glossary.html")
+                    "http://docs.python.org/glossary.html#glossary")
 
     def test_custom_headers(self):
         url = "http://www.example.com"
index 220dfe4b19bd17ec4386fe48640124cff3e06a46..6b299018647f3e065b026d3b215167eab9c3c9fa 100644 (file)
@@ -163,7 +163,7 @@ class Request:
                  origin_req_host=None, unverifiable=False):
         # unwrap('<URL:type://host/path>') --> 'type://host/path'
         self.full_url = unwrap(url)
-        self.full_url, fragment = splittag(self.full_url)
+        self.full_url, self.fragment = splittag(self.full_url)
         self.data = data
         self.headers = {}
         self._tunnel_host = None
@@ -202,7 +202,10 @@ class Request:
         return self.data
 
     def get_full_url(self):
-        return self.full_url
+        if self.fragment:
+            return '%s#%s' % (self.full_url, self.fragment)
+        else:
+            return self.full_url
 
     def get_type(self):
         return self.type
@@ -1106,7 +1109,7 @@ class AbstractHTTPHandler(BaseHandler):
         except socket.error as err:
             raise URLError(err)
 
-        r.url = req.full_url
+        r.url = req.get_full_url()
         # This line replaces the .msg attribute of the HTTPResponse
         # with .headers, because urllib clients expect the response to
         # have the reason in .msg.  It would be good to mark this