]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Merged revisions 83449 via svnmerge from
authorGeorg Brandl <georg@python.org>
Sun, 1 Aug 2010 22:00:39 +0000 (22:00 +0000)
committerGeorg Brandl <georg@python.org>
Sun, 1 Aug 2010 22:00:39 +0000 (22:00 +0000)
svn+ssh://pythondev@svn.python.org/python/branches/release27-maint

................
  r83449 | georg.brandl | 2010-08-01 22:59:03 +0200 (So, 01 Aug 2010) | 9 lines

  Merged revisions 83238 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/branches/py3k

  ........
    r83238 | georg.brandl | 2010-07-29 19:55:01 +0200 (Do, 29 Jul 2010) | 1 line

    #4108: the first default entry (User-agent: *) wins.
  ........
................

Lib/robotparser.py
Lib/test/test_robotparser.py
Misc/NEWS

index 447563fe654d86db1fe8e26c83bfd088d24bf378..726854b49f538b89c39c3e62a9998cdd92444330 100644 (file)
@@ -68,7 +68,9 @@ class RobotFileParser:
     def _add_entry(self, entry):
         if "*" in entry.useragents:
             # the default entry is considered last
-            self.default_entry = entry
+            if self.default_entry is None:
+                # the first default entry wins
+                self.default_entry = entry
         else:
             self.entries.append(entry)
 
@@ -120,7 +122,7 @@ class RobotFileParser:
                         entry.rulelines.append(RuleLine(line[1], True))
                         state = 2
         if state == 2:
-            self.entries.append(entry)
+            self._add_entry(entry)
 
 
     def can_fetch(self, useragent, url):
index 431b8ffbd98e9e7ac02a1a4dd6fc70aa295a4daf..650b603fdb5badafd621c93f5df2058d874205eb 100644 (file)
@@ -202,6 +202,20 @@ bad = ['/folder1/anotherfile.html']
 RobotTest(13, doc, good, bad, agent="googlebot")
 
 
+# 14. For issue #4108 (obey first * entry)
+doc = """
+User-agent: *
+Disallow: /some/path
+
+User-agent: *
+Disallow: /another/path
+"""
+
+good = ['/another/path']
+bad = ['/some/path']
+
+RobotTest(14, doc, good, bad)
+
 
 class TestCase(unittest.TestCase):
     def runTest(self):
index b0e66a39b268396076149f249be864f4669b8693..37b143b51b1c4abc773e83f71c9dda39c69d4653 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -35,6 +35,9 @@ Core and Builtins
   when turned into an exception: in this case the exception simply
   gets ignored.
 
+- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *'
+  entries, consider the first one.
+
 - Issue #9354: Provide getsockopt() in asyncore's file_wrapper.
 
 - In the unicode/str.format(), raise a ValueError when indexes to arguments are