]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-125660: Reject invalid unicode escapes for Python implementation of JSON...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 21 Oct 2024 13:08:10 +0000 (15:08 +0200)
committerGitHub <noreply@github.com>
Mon, 21 Oct 2024 13:08:10 +0000 (16:08 +0300)
(cherry picked from commit df751363e386d1f77c5ba9515a5539902457d386)

Co-authored-by: Nice Zombies <nineteendo19d0@gmail.com>
Lib/json/decoder.py
Lib/test/test_json/test_scanstring.py
Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst [new file with mode: 0644]

index c5d9ae2d0d5d040708f097fbf6450b86eef334dd..5e5effeac02551c7732b88c55a870216c526dc52 100644 (file)
@@ -50,17 +50,18 @@ _CONSTANTS = {
 }
 
 
+HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
 BACKSLASH = {
     '"': '"', '\\': '\\', '/': '/',
     'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
 }
 
-def _decode_uXXXX(s, pos):
-    esc = s[pos + 1:pos + 5]
-    if len(esc) == 4 and esc[1] not in 'xX':
+def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
+    esc = _m(s, pos + 1)
+    if esc is not None:
         try:
-            return int(esc, 16)
+            return int(esc.group(), 16)
         except ValueError:
             pass
     msg = "Invalid \\uXXXX escape"
index 2d3ee8a8bf0f92cba8d70461de6fc2d9735252ed..cca556a3b95bab71834857da3cfa25a5b0f708fd 100644 (file)
@@ -116,6 +116,11 @@ class TestScanstring:
             '"\\u012z"',
             '"\\u0x12"',
             '"\\u0X12"',
+            '"\\u{0}"'.format("\uff10" * 4),
+            '"\\u 123"',
+            '"\\u-123"',
+            '"\\u+123"',
+            '"\\u1_23"',
             '"\\ud834\\"',
             '"\\ud834\\u"',
             '"\\ud834\\ud"',
@@ -127,6 +132,11 @@ class TestScanstring:
             '"\\ud834\\udd2z"',
             '"\\ud834\\u0x20"',
             '"\\ud834\\u0X20"',
+            '"\\ud834\\u{0}"'.format("\uff10" * 4),
+            '"\\ud834\\u 123"',
+            '"\\ud834\\u-123"',
+            '"\\ud834\\u+123"',
+            '"\\ud834\\u1_23"',
         ]
         for s in bad_escapes:
             with self.assertRaises(self.JSONDecodeError, msg=s):
diff --git a/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
new file mode 100644 (file)
index 0000000..74d76c7
--- /dev/null
@@ -0,0 +1 @@
+Reject invalid unicode escapes for Python implementation of :func:`json.loads`.