From: Raymond Hettinger Date: Sat, 15 Oct 2005 16:44:57 +0000 (+0000) Subject: Teach unquote() to handle unicode inputs X-Git-Tag: v2.4.3c1~239 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ad2fa76ffa25cac1e7ddea05f23d9a8c4b222de4;p=thirdparty%2FPython%2Fcpython.git Teach unquote() to handle unicode inputs --- diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 36214767bdad..e8a6c6ac17be 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -409,6 +409,10 @@ class UnquotingTests(unittest.TestCase): self.assertEqual(expect, result, "using unquote_plus(): %s != %s" % (expect, result)) + def test_unquote_with_unicode(self): + r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc') + self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc') + class urlencode_Tests(unittest.TestCase): """Tests for urlencode()""" diff --git a/Lib/urllib.py b/Lib/urllib.py index bc16be0162aa..f00d02ff9eee 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1061,6 +1061,8 @@ def unquote(s): res[i] = _hextochr[item[:2]] + item[2:] except KeyError: res[i] = '%' + item + except UnicodeDecodeError: + res[i] = unichr(int(item[:2], 16)) + item[2:] return "".join(res) def unquote_plus(s): diff --git a/Misc/NEWS b/Misc/NEWS index e0ddfe879278..84ee619a0971 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -30,6 +30,9 @@ Extension Modules Library ------- +- urllib.unquote() now handles Unicode strings correctly. Formerly, it would + either ignore the substitution or raise UnicodeDecodeError. + - SF #1313496: the bisect module now accepts named arguments. - Bug #729103: pydoc.py: Fix docother() method to accept additional