From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 7 Nov 2025 12:51:03 +0000 (+0000) Subject: [3.13] gh-139246: zero-width word paste can be wrong in default repl (GH-139254)... X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0923704b49e4b390eda4085e97cae590405660d4;p=thirdparty%2FPython%2Fcpython.git [3.13] gh-139246: zero-width word paste can be wrong in default repl (GH-139254) (GH-141166) (cherry picked from commit 4e6dba0ef74523a52f66547c16b9972664b18fd4) Signed-off-by: yihong0618 Co-authored-by: yihong Co-authored-by: grayjk --- diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 7437fbe1ab93..a30fbdee3a41 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -14,6 +14,12 @@ ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) def str_width(c: str) -> int: if ord(c) < 128: return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 w = unicodedata.east_asian_width(c) if w in ("N", "Na", "H", "A"): return 1 diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 0d59968206a6..70e2484ab5f3 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -5,10 +5,29 @@ from _pyrepl.utils import str_width, wlen class TestUtils(TestCase): def test_str_width(self): - characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9'] + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', + ] for c in characters: self.assertEqual(str_width(c), 1) + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(str_width(c), 0) + characters = [chr(99989), chr(99999)] for c in characters: self.assertEqual(str_width(c), 2) @@ -25,3 +44,5 @@ class TestUtils(TestCase): self.assertEqual(wlen('hello'), 5) self.assertEqual(wlen('hello' + '\x1a'), 7) + self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst new file mode 100644 index 000000000000..a816bda5cfe8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst @@ -0,0 +1 @@ +fix: paste zero-width in default repl width is wrong.