gh-139246: zero-width word paste can be wrong in default repl (GH-139254)

author yihong <zouzou0208@gmail.com>

Thu, 30 Oct 2025 15:14:06 +0000 (23:14 +0800)

committer GitHub <noreply@github.com>

Thu, 30 Oct 2025 15:14:06 +0000 (16:14 +0100)
author yihong <zouzou0208@gmail.com>
Thu, 30 Oct 2025 15:14:06 +0000 (23:14 +0800)
committer GitHub <noreply@github.com>
Thu, 30 Oct 2025 15:14:06 +0000 (16:14 +0100)
diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py

index 64708e843b685bd813287012e05648c59bcacbf8..06cddef851bb4042549f3d26a3d6fe50e70db77c 100644 (file)
--- a/Lib/_pyrepl/utils.py
+++ b/Lib/_pyrepl/utils.py
@@ -63,6 +63,12 @@ class ColorSpan(NamedTuple):
  def str_width(c: str) -> int:
      if ord(c) < 128:
          return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
      w = unicodedata.east_asian_width(c)
      if w in ("N", "Na", "H", "A"):
          return 1
diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py

index 05a4f3290598350a78771a4871f138795031868c..656a1e441e0e473874cd9fd34eac4a4768b25324 100644 (file)
--- a/Lib/test/test_pyrepl/test_utils.py
+++ b/Lib/test/test_pyrepl/test_utils.py
@@ -5,10 +5,29 @@ from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
  
  class TestUtils(TestCase):
      def test_str_width(self):
-        characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9']
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
          for c in characters:
              self.assertEqual(str_width(c), 1)
  
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(str_width(c), 0)
+
          characters = [chr(99989), chr(99999)]
          for c in characters:
              self.assertEqual(str_width(c), 2)
@@ -25,6 +44,8 @@ class TestUtils(TestCase):
  
          self.assertEqual(wlen('hello'), 5)
          self.assertEqual(wlen('hello' + '\x1a'), 7)
+        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
  
      def test_prev_next_window(self):
          def gen_normal():
diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst

new file mode 100644 (file)

index 0000000..a816bda
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst
@@ -0,0 +1 @@
+fix: paste zero-width in default repl width is wrong.
author	yihong <zouzou0208@gmail.com>
	Thu, 30 Oct 2025 15:14:06 +0000 (23:14 +0800)
committer	GitHub <noreply@github.com>
	Thu, 30 Oct 2025 15:14:06 +0000 (16:14 +0100)
Lib/_pyrepl/utils.py		patch \| blob \| blame \| history
Lib/test/test_pyrepl/test_utils.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst	[new file with mode: 0644]	patch \| blob