From 6eee2fe972301f12f8ba315240fb1b99570f9198 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 24 Aug 2005 22:20:46 +0000 Subject: [PATCH] Backport bug #735248: fix urllib2.parse_http_list(). --- Lib/test/test_urllib2.py | 8 +++++ Lib/urllib2.py | 75 +++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index c68d244a6c86..0b2a65061cd6 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -41,6 +41,14 @@ class TrivialTests(unittest.TestCase): buf = f.read() f.close() + def test_parse_http_list(self): + tests = [('a,b,c', ['a', 'b', 'c']), + ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), + ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), + ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] + for string, list in tests: + self.assertEquals(urllib2.parse_http_list(string), list) + class MockOpener: addheaders = [] diff --git a/Lib/urllib2.py b/Lib/urllib2.py index f4c15ba0cb51..fe9c7b2de8e4 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -1064,49 +1064,46 @@ def parse_keqv_list(l): def parse_http_list(s): """Parse lists as described by RFC 2068 Section 2. - + In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could - contain a comma. + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. """ - # XXX this function could probably use more testing - - list = [] - end = len(s) - i = 0 - inquote = 0 - start = 0 - while i < end: - cur = s[i:] - c = cur.find(',') - q = cur.find('"') - if c == -1: - list.append(s[start:]) - break - if q == -1: - if inquote: - raise ValueError, "unbalanced quotes" - else: - list.append(s[start:i+c]) - i = i + c + 1 + res = [] + part = '' + + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True continue - if inquote: - if q < c: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - inquote = 0 - else: - i = i + q - else: - if c < q: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - else: - inquote = 1 - i = i + q + 1 - return map(lambda x: x.strip(), list) + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL -- 2.47.3