def str_width(c: str) -> int:
if ord(c) < 128:
return 1
+ # gh-139246 for zero-width joiner and combining characters
+ if unicodedata.combining(c):
+ return 0
+ category = unicodedata.category(c)
+ if category == "Cf" and c != "\u00ad":
+ return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
class TestUtils(TestCase):
def test_str_width(self):
- characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9']
+ characters = [
+ 'a',
+ '1',
+ '_',
+ '!',
+ '\x1a',
+ '\u263A',
+ '\uffb9',
+ '\N{LATIN SMALL LETTER E WITH ACUTE}', # é
+ '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+ '\u00ad',
+ ]
for c in characters:
self.assertEqual(str_width(c), 1)
+ zero_width_characters = [
+ '\N{COMBINING ACUTE ACCENT}',
+ '\N{ZERO WIDTH JOINER}',
+ ]
+ for c in zero_width_characters:
+ with self.subTest(character=c):
+ self.assertEqual(str_width(c), 0)
+
characters = [chr(99989), chr(99999)]
for c in characters:
self.assertEqual(str_width(c), 2)
self.assertEqual(wlen('hello'), 5)
self.assertEqual(wlen('hello' + '\x1a'), 7)
+ self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+ self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
def test_prev_next_window(self):
def gen_normal():