gh-133031: Support the full Unicode range in curses.textpad.Textbox (GH-152482)

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)

committer GitHub <noreply@github.com>

Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)
committer GitHub <noreply@github.com>
Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)
diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst

index 8987e82ee5d026ea132175e3f3b2a5b4157357a0..8d069ed8b7d1c4ace75038b1fa428fdb4eb8a6bf 100644 (file)
--- a/Doc/library/curses.rst
+++ b/Doc/library/curses.rst
@@ -2645,6 +2645,11 @@ You can instantiate a :class:`Textbox` object as follows:
     upper-left corner of the containing window, with coordinates ``(0, 0)``.
     The instance's :attr:`stripspaces` flag is initially on.
  
+   .. versionchanged:: next
+      Entering and reading back the full Unicode range, including combining
+      characters, is now supported when curses is built with wide-character
+      support.
+
     :class:`Textbox` objects have the following methods:
  
  
@@ -2659,6 +2664,10 @@ You can instantiate a :class:`Textbox` object as follows:
        string; whether blanks in the window are included is affected by the
        :attr:`stripspaces` attribute.
  
+      .. versionchanged:: next
+         *validate* is now called with a non-ASCII character as a string;
+         other keystrokes are still passed as an integer.
+
  
     .. method:: do_command(ch)
  
diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst

index cbe0df1c8a65ff054256c7a9bdd97307a35f7fe1..23b88c5d279c2dd7e9c5b16e0fa1f6969e415065 100644 (file)
--- a/Doc/whatsnew/3.16.rst
+++ b/Doc/whatsnew/3.16.rst
@@ -192,6 +192,11 @@ curses
    against an ncurses with ``NCURSES_EXT_FUNCS``.
    (Contributed by Serhiy Storchaka in :gh:`152334`.)
  
+* :class:`curses.textpad.Textbox` now supports entering and reading back the
+  full Unicode range, including combining characters, when curses is built with
+  wide-character support.
+  (Contributed by Serhiy Storchaka in :gh:`133031`.)
+
  gzip
  ----
  
diff --git a/Lib/curses/textpad.py b/Lib/curses/textpad.py

index c58a7174d194cf3dfbe3de8491b0786a75ccfb5e..70fa2c25f64632852a6f058fa8647d76f65a4010 100644 (file)
--- a/Lib/curses/textpad.py
+++ b/Lib/curses/textpad.py
@@ -57,32 +57,13 @@ class Textbox:
          self.maxx = maxx - 1
  
      def _decode(self, ch):
-        # The text of a chtype cell or input byte, decoded with the window's
-        # encoding.  A_CHARTEXT keeps the character byte, dropping the attributes.
+        # Decode an integer keystroke or byte to text with the window's encoding.
+        # A_CHARTEXT drops any attribute bits.
          return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace')
  
-    def _char_at(self, *yx):
-        # The text of the cell at the given position (default: the cursor).
-        # instr() re-encodes it to the window's encoding; inch() cannot
-        # represent a non-ASCII 8-bit-locale character on a wide build.
-        return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace')
-
-    def _cell_at(self, *yx):
-        # The cell at the given position (default: the cursor) as a chtype
-        # addch() can write back with its rendition.  inch() mangles a non-ASCII
-        # character on a wide build, so take the byte from instr() and the
-        # attributes from inch().
-        return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES
-
-    def _isprint(self, cell):
-        # Whether a chtype cell holds a printable character; _decode() drops the
-        # attribute bits.
-        return self._decode(cell).isprintable()
-
      def _printable_key(self, ch):
-        # Whether the integer keystroke is a printable character, not a key
-        # code.  0..255 are character bytes (decoded with the window's encoding);
-        # larger values are function and navigation keys.
+        # Whether the integer keystroke is a printable character, not a key code:
+        # 0..255 are character bytes, larger values are function keys.
          return ch <= 0xff and self._decode(ch).isprintable()
  
      def _end_of_line(self, y):
@@ -91,7 +72,8 @@ class Textbox:
          self._update_max_yx()
          last = self.maxx
          while True:
-            if self._char_at(y, last) != ' ':
+            # The text of the cell at (y, last).
+            if str(self.win.in_wch(y, last)) != ' ':
                  last = min(self.maxx, last+1)
                  break
              elif last == 0:
@@ -105,16 +87,22 @@ class Textbox:
          backyx = None
          while True:
              if self.insert_mode:
-                oldch = self._cell_at()
+                # The displaced cell, as a complexchar so addch() can rewrite it
+                # with its rendition.
+                oldch = self.win.in_wch()
              if y >= self.maxy and x >= self.maxx:
-                # Use insch() in the lower-right cell: addch() there would move
-                # the cursor out of the window, raising an error and scrolling
-                # a scrollable window.  Pass it as text: insch() does not decode
-                # an int byte through the locale on a wide build.
-                self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES)
+                # Use insch() in the lower-right cell; addch() there would push
+                # the cursor out of the window (an error, and it scrolls a
+                # scrollable window).  insch() does not decode an int byte
+                # through the locale on a wide build, so pass it as text.
+                if isinstance(ch, int):
+                    self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES)
+                else:
+                    self.win.insch(ch)
                  break
              self.win.addch(ch)
-            if not self.insert_mode or not self._isprint(oldch):
+            # In insert mode keep shifting cells right until a blank one.
+            if not self.insert_mode or not str(oldch).isprintable():
                  break
              ch = oldch
              (y, x) = self.win.getyx()
@@ -130,9 +118,17 @@ class Textbox:
          self._update_max_yx()
          (y, x) = self.win.getyx()
          self.lastcmd = ch
-        if self._printable_key(ch):
+        if isinstance(ch, str):
+            # A character from get_wch(); a control character is dispatched
+            # below by its code point.
+            if ch.isprintable():
+                self._insert_printable_char(ch)
+                return 1
+            ch = ord(ch)
+        elif self._printable_key(ch):
              self._insert_printable_char(ch)
-        elif ch == curses.ascii.SOH:                           # ^a
+            return 1
+        if ch == curses.ascii.SOH:                             # ^a
              self.win.move(y, 0)
          elif ch in (curses.ascii.STX,curses.KEY_LEFT,
                      curses.ascii.BS,
@@ -204,7 +200,7 @@ class Textbox:
              for x in range(self.maxx+1):
                  if self.stripspaces and x > stop:
                      break
-                result = result + self._char_at(y, x)
+                result = result + str(self.win.in_wch(y, x))
              if self.maxy > 0:
                  result = result + "\n"
          return result
@@ -212,7 +208,12 @@ class Textbox:
      def edit(self, validate=None):
          "Edit in the widget window and collect the results."
          while 1:
-            ch = self.win.getch()
+            ch = self.win.get_wch()
+            # Represent an ASCII keystroke by its code point, the way getch()
+            # always has, so that existing validators and the command dispatch
+            # keep working; only non-ASCII characters are passed as strings.
+            if isinstance(ch, str) and ch.isascii():
+                ch = ord(ch)
              if validate:
                  ch = validate(ch)
              if not ch:
diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py

index 079e69a52c1504207299cbd56fadd9864b5d021f..20b1441d98584c530ae413f4766c71e13cea5f97 100644 (file)
--- a/Lib/test/test_curses.py
+++ b/Lib/test/test_curses.py
@@ -2247,9 +2247,9 @@ class TestCurses(unittest.TestCase):
          self.assertEqual(box.gather(), 'abc\ndef\n')
  
      def test_textbox_8bit(self):
-        # A character of an 8-bit locale encoding is entered and read back
-        # through the byte API.  The byte path also runs on a wide build, so the
-        # test is not skipped there.  Run the suite under an 8-bit locale
+        # An 8-bit-locale character is entered as integer bytes -- the way
+        # do_command() receives getch() input -- and read back; runs on both
+        # builds.  Run the suite under an 8-bit locale
          # (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each
          # string is used only if the encoding maps it to single bytes.  'abc' is
          # ASCII, 'café' is common to the Latin encodings, and the rest are
@@ -2270,9 +2270,8 @@ class TestCurses(unittest.TestCase):
  
      def test_textbox_8bit_insert(self):
          # Insert mode shifts the rest of the line right by reading each cell back
-        # and rewriting it; a non-ASCII 8-bit-locale character must survive the
-        # shift, even on a wide build where inch() mangles it.  See
-        # test_textbox_8bit for the character choices.
+        # and rewriting it; an 8-bit-locale character entered as bytes must
+        # survive the shift.  See test_textbox_8bit for the character choices.
          encoding = self.stdscr.encoding
          for ch in ['é', '¤', '€', 'є']:
              try:
@@ -2290,8 +2289,8 @@ class TestCurses(unittest.TestCase):
                  self.assertEqual(box.gather(), 'ab' + ch + 'c ')
  
      def test_textbox_8bit_fill_last_cell(self):
-        # A non-ASCII 8-bit-locale character must survive being written to the
-        # lower-right cell, which uses insch() rather than addch().  See
+        # An 8-bit-locale character entered as bytes must survive being written
+        # to the lower-right cell, which uses insch() rather than addch().  See
          # test_textbox_8bit for the character choices.
          encoding = self.stdscr.encoding
          for ch in ['é', '¤', '€', 'є']:
@@ -2308,6 +2307,53 @@ class TestCurses(unittest.TestCase):
                      box.do_command(byte)
                  self.assertEqual(box.gather(), text)
  
+    def test_textbox_unicode(self):
+        # Like test_textbox_8bit, but characters are entered as strings -- the
+        # way do_command() receives get_wch() input -- rather than integer
+        # bytes.  Each string is used only if encodable in the current locale.
+        for text in ['abc', 'héšλ', 'café', 'naïve ¤', 'soupçon €Š', 'дякую єі']:
+            if self._encodable(text):
+                with self.subTest(text=text):
+                    box, win = self._make_textbox(1, 12)
+                    for ch in text:
+                        box.do_command(ch)
+                    self.assertEqual(box.gather(), text + ' ')
+
+    def test_textbox_unicode_insert_mode(self):
+        # Like test_textbox_8bit_insert, but the character is entered as a string
+        # (get_wch() input).  Each string is used only if encodable.
+        for text in ['abcd', 'aβλc', 'aéàc', 'a¤½c', 'a€Šc', 'aдві']:
+            if self._encodable(text):
+                with self.subTest(text=text):
+                    box, win = self._make_textbox(1, 10, insert_mode=True)
+                    for ch in text[0] + text[2:]:    # all but the 2nd character
+                        box.do_command(ch)
+                    win.move(0, 1)
+                    box.do_command(text[1])          # insert it at position 1
+                    self.assertEqual(box.gather(), text + ' ')
+
+    @requires_wide_build
+    def test_textbox_combining(self):
+        # A spacing character plus a combining mark is a single cell, which
+        # needs the wide build (a narrow build stores one byte per cell).
+        text = 'e\u0301'            # 'e' + COMBINING ACUTE ACCENT
+        if self._encodable(text):
+            box, win = self._make_textbox(1, 10)
+            for ch in text:
+                box.do_command(ch)
+            self.assertEqual(box.gather(), text + ' ')
+
+    def test_textbox_edit_wide(self):
+        # edit() reads characters through get_wch().  Each is used only if
+        # encodable in the current locale.
+        for ch in ['A', 'é', '¤', '€', 'д']:
+            if self._encodable(ch):
+                with self.subTest(ch=ch):
+                    box, win = self._make_textbox(1, 10)
+                    for c in reversed(['a', ch, chr(curses.ascii.BEL)]):
+                        curses.unget_wch(c)
+                    self.assertEqual(box.edit(), 'a' + ch + ' ')
+
      def test_textbox_movement(self):
          box, win = self._make_textbox(3, 10)
          self._type(box, 'abc')
diff --git a/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst b/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst

new file mode 100644 (file)

index 0000000..c333659
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst
@@ -0,0 +1,3 @@
+:class:`curses.textpad.Textbox` now supports entering and reading back the full
+Unicode range, including combining characters, when curses is built with
+wide-character support.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)
committer	GitHub <noreply@github.com>
	Sun, 28 Jun 2026 12:49:28 +0000 (15:49 +0300)
Doc/library/curses.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.16.rst		patch \| blob \| blame \| history
Lib/curses/textpad.py		patch \| blob \| blame \| history
Lib/test/test_curses.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst	[new file with mode: 0644]	patch \| blob