]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-139757: Fix unintended bytecode specialization for non-ascii string (gh-143352)
authorDonghee Na <donghee.na@python.org>
Fri, 2 Jan 2026 14:05:08 +0000 (23:05 +0900)
committerGitHub <noreply@github.com>
Fri, 2 Jan 2026 14:05:08 +0000 (14:05 +0000)
Lib/test/test_opcache.py
Misc/NEWS.d/next/Core_and_Builtins/2026-01-02-22-35-12.gh-issue-139757.v5LRew.rst [new file with mode: 0644]
Python/specialize.c

index 4113b79ef5c80b0ec292e086866aaebba7a6a1e0..4df88d0f16885ffb0ff1590a3e0414a34502d976 100644 (file)
@@ -1785,6 +1785,15 @@ class TestSpecializer(TestBase):
         self.assert_specialized(binary_subscr_str_int, "BINARY_OP_SUBSCR_STR_INT")
         self.assert_no_opcode(binary_subscr_str_int, "BINARY_OP")
 
+        def binary_subscr_str_int_non_compact():
+            for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD):
+                a = "바이트코드_특수화"
+                for idx, expected in enumerate(a):
+                    self.assertEqual(a[idx], expected)
+
+        binary_subscr_str_int_non_compact()
+        self.assert_no_opcode(binary_subscr_str_int_non_compact, "BINARY_OP_SUBSCR_STR_INT")
+
         def binary_subscr_getitems():
             class C:
                 def __init__(self, val):
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-02-22-35-12.gh-issue-139757.v5LRew.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-02-22-35-12.gh-issue-139757.v5LRew.rst
new file mode 100644 (file)
index 0000000..ef1a4e4
--- /dev/null
@@ -0,0 +1,2 @@
+Fix unintended bytecode specialization for non-ascii string.
+Patch by Donghee Na, Ken Jin and Chris Eibl.
index e67078afdd9df34213553ec6650669f97aa376f3..e9302157e7782ad513a9836049fb852b2948a179 100644 (file)
@@ -2240,7 +2240,7 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
                     specialize(instr, BINARY_OP_SUBSCR_TUPLE_INT);
                     return;
                 }
-                if (PyUnicode_CheckExact(lhs)) {
+                if (PyUnicode_CheckExact(lhs) && PyUnicode_IS_COMPACT_ASCII(lhs)) {
                     specialize(instr, BINARY_OP_SUBSCR_STR_INT);
                     return;
                 }