From: yihong Date: Thu, 30 Oct 2025 15:14:06 +0000 (+0800) Subject: gh-139246: zero-width word paste can be wrong in default repl (GH-139254) X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4e6dba0ef74523a52f66547c16b9972664b18fd4;p=thirdparty%2FPython%2Fcpython.git gh-139246: zero-width word paste can be wrong in default repl (GH-139254) Signed-off-by: yihong0618 Co-authored-by: grayjk --- diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 64708e843b68..06cddef851bb 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -63,6 +63,12 @@ class ColorSpan(NamedTuple): def str_width(c: str) -> int: if ord(c) < 128: return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 w = unicodedata.east_asian_width(c) if w in ("N", "Na", "H", "A"): return 1 diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 05a4f3290598..656a1e441e0e 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -5,10 +5,29 @@ from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors class TestUtils(TestCase): def test_str_width(self): - characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9'] + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', + ] for c in characters: self.assertEqual(str_width(c), 1) + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(str_width(c), 0) + characters = [chr(99989), chr(99999)] for c in characters: self.assertEqual(str_width(c), 2) @@ -25,6 +44,8 @@ class TestUtils(TestCase): self.assertEqual(wlen('hello'), 5) self.assertEqual(wlen('hello' + '\x1a'), 7) + self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) def test_prev_next_window(self): def gen_normal(): diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst new file mode 100644 index 000000000000..a816bda5cfe8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst @@ -0,0 +1 @@ +fix: paste zero-width in default repl width is wrong.