]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Merged revisions 83209 via svnmerge from
authorSenthil Kumaran <orsenthil@gmail.com>
Wed, 28 Jul 2010 16:35:35 +0000 (16:35 +0000)
committerSenthil Kumaran <orsenthil@gmail.com>
Wed, 28 Jul 2010 16:35:35 +0000 (16:35 +0000)
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines

  Fix Issue6325 - robotparse to honor urls with query strings.
........

Lib/robotparser.py
Lib/test/test_robotparser.py

index 447563fe654d86db1fe8e26c83bfd088d24bf378..730426f6ae75e551d1e5aa49834b829621308f70 100644 (file)
@@ -131,7 +131,12 @@ class RobotFileParser:
             return True
         # search for given user agent matches
         # the first match counts
-        url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
+        parsed_url = urlparse.urlparse(urllib.unquote(url))
+        url = urlparse.urlunparse(('', '', parsed_url.path,
+            parsed_url.params, parsed_url.query, parsed_url.fragment))
+        url = urllib.quote(url)
+        if not url:
+            url = "/"
         for entry in self.entries:
             if entry.applies_to(useragent):
                 return entry.allowance(url)
index 405d517d2e04867e4f43cc918f00454aa5f6005f..04158841077fc53dcbc30e773becb3e4d7aa6638 100644 (file)
@@ -202,6 +202,17 @@ bad = ['/folder1/anotherfile.html']
 RobotTest(13, doc, good, bad, agent="googlebot")
 
 
+# 14. For issue #6325 (query string support)
+doc = """
+User-agent: *
+Disallow: /some/path?name=value
+"""
+
+good = ['/some/path']
+bad = ['/some/path?name=value']
+
+RobotTest(14, doc, good, bad)
+
 
 class NetworkTestCase(unittest.TestCase):