]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-139246: zero-width word paste can be wrong in default repl (GH-139254)...
authorStan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Fri, 7 Nov 2025 12:51:03 +0000 (12:51 +0000)
committerGitHub <noreply@github.com>
Fri, 7 Nov 2025 12:51:03 +0000 (13:51 +0100)
(cherry picked from commit 4e6dba0ef74523a52f66547c16b9972664b18fd4)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: yihong <zouzou0208@gmail.com>
Co-authored-by: grayjk <grayjk@gmail.com>
Lib/_pyrepl/utils.py
Lib/test/test_pyrepl/test_utils.py
Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst [new file with mode: 0644]

index 7437fbe1ab937111ae3b0fffa9e9c3d99d706348..a30fbdee3a414f39d870b6b4a74c33e7e1466773 100644 (file)
@@ -14,6 +14,12 @@ ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
     w = unicodedata.east_asian_width(c)
     if w in ("N", "Na", "H", "A"):
         return 1
index 0d59968206a613149446cb729d1e3223bd96df49..70e2484ab5f3a1f0d8de28e8ea6cf62b44fab678 100644 (file)
@@ -5,10 +5,29 @@ from _pyrepl.utils import str_width, wlen
 
 class TestUtils(TestCase):
     def test_str_width(self):
-        characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9']
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
         for c in characters:
             self.assertEqual(str_width(c), 1)
 
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(str_width(c), 0)
+
         characters = [chr(99989), chr(99999)]
         for c in characters:
             self.assertEqual(str_width(c), 2)
@@ -25,3 +44,5 @@ class TestUtils(TestCase):
 
         self.assertEqual(wlen('hello'), 5)
         self.assertEqual(wlen('hello' + '\x1a'), 7)
+        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst
new file mode 100644 (file)
index 0000000..a816bda
--- /dev/null
@@ -0,0 +1 @@
+fix: paste zero-width in default repl width is wrong.