]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Teach unquote() to handle unicode inputs
authorRaymond Hettinger <python@rcn.com>
Sat, 15 Oct 2005 16:44:57 +0000 (16:44 +0000)
committerRaymond Hettinger <python@rcn.com>
Sat, 15 Oct 2005 16:44:57 +0000 (16:44 +0000)
Lib/test/test_urllib.py
Lib/urllib.py
Misc/NEWS

index 36214767bdad3775ea76976b0aa8001d9781b922..e8a6c6ac17be5c644636fca996b9e3c4ac1de409 100644 (file)
@@ -409,6 +409,10 @@ class UnquotingTests(unittest.TestCase):
         self.assertEqual(expect, result,
                          "using unquote_plus(): %s != %s" % (expect, result))
 
+    def test_unquote_with_unicode(self):
+        r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
+        self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
+
 class urlencode_Tests(unittest.TestCase):
     """Tests for urlencode()"""
 
index bc16be0162aa922737f07d381f830f51c9bd6ced..f00d02ff9eeefa5b5d003f17fcce983a7c87c505 100644 (file)
@@ -1061,6 +1061,8 @@ def unquote(s):
             res[i] = _hextochr[item[:2]] + item[2:]
         except KeyError:
             res[i] = '%' + item
+        except UnicodeDecodeError:
+            res[i] = unichr(int(item[:2], 16)) + item[2:]
     return "".join(res)
 
 def unquote_plus(s):
index e0ddfe879278c15c3ed4c74914ac1aa03ed263f9..84ee619a0971311f8beca8ac2aca8bc48736bbb8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -30,6 +30,9 @@ Extension Modules
 Library
 -------
 
+- urllib.unquote() now handles Unicode strings correctly.  Formerly, it would
+  either ignore the substitution or raise UnicodeDecodeError.
+
 - SF #1313496:  the bisect module now accepts named arguments.
 
 - Bug #729103: pydoc.py: Fix docother() method to accept additional