]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-139246: zero-width word paste can be wrong in default repl (GH-139254)
authoryihong <zouzou0208@gmail.com>
Thu, 30 Oct 2025 15:14:06 +0000 (23:14 +0800)
committerGitHub <noreply@github.com>
Thu, 30 Oct 2025 15:14:06 +0000 (16:14 +0100)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: grayjk <grayjk@gmail.com>
Lib/_pyrepl/utils.py
Lib/test/test_pyrepl/test_utils.py
Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst [new file with mode: 0644]

index 64708e843b685bd813287012e05648c59bcacbf8..06cddef851bb4042549f3d26a3d6fe50e70db77c 100644 (file)
@@ -63,6 +63,12 @@ class ColorSpan(NamedTuple):
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
     w = unicodedata.east_asian_width(c)
     if w in ("N", "Na", "H", "A"):
         return 1
index 05a4f3290598350a78771a4871f138795031868c..656a1e441e0e473874cd9fd34eac4a4768b25324 100644 (file)
@@ -5,10 +5,29 @@ from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
 
 class TestUtils(TestCase):
     def test_str_width(self):
-        characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9']
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
         for c in characters:
             self.assertEqual(str_width(c), 1)
 
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(str_width(c), 0)
+
         characters = [chr(99989), chr(99999)]
         for c in characters:
             self.assertEqual(str_width(c), 2)
@@ -25,6 +44,8 @@ class TestUtils(TestCase):
 
         self.assertEqual(wlen('hello'), 5)
         self.assertEqual(wlen('hello' + '\x1a'), 7)
+        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
 
     def test_prev_next_window(self):
         def gen_normal():
diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst
new file mode 100644 (file)
index 0000000..a816bda
--- /dev/null
@@ -0,0 +1 @@
+fix: paste zero-width in default repl width is wrong.