]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-131878: Fix input of unicode characters with two or more code points in new pyrepl...
authorSergey Miryanov <sergey.miryanov@gmail.com>
Mon, 5 May 2025 16:25:00 +0000 (09:25 -0700)
committerGitHub <noreply@github.com>
Mon, 5 May 2025 16:25:00 +0000 (18:25 +0200)
Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
Lib/_pyrepl/base_eventqueue.py
Lib/_pyrepl/windows_console.py
Lib/test/test_pyrepl/test_eventqueue.py
Misc/NEWS.d/next/Core_and_Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst [new file with mode: 0644]

index e018c4fc18308e31eea009788e5144c73a96c62b..842599bd1877fb4ec9d09e18917f419d0d1d77ef 100644 (file)
@@ -69,18 +69,14 @@ class BaseEventQueue:
         trace('added event {event}', event=event)
         self.events.append(event)
 
-    def push(self, char: int | bytes | str) -> None:
+    def push(self, char: int | bytes) -> None:
         """
         Processes a character by updating the buffer and handling special key mappings.
         """
+        assert isinstance(char, (int, bytes))
         ord_char = char if isinstance(char, int) else ord(char)
-        if ord_char > 255:
-            assert isinstance(char, str)
-            char = bytes(char.encode(self.encoding, "replace"))
-            self.buf.extend(char)
-        else:
-            char = bytes(bytearray((ord_char,)))
-            self.buf.append(ord_char)
+        char = ord_char.to_bytes()
+        self.buf.append(ord_char)
 
         if char in self.keymap:
             if self.keymap is self.compiled_keymap:
index 77985e59a932498f3dfb00d673107ecd39355d7c..587c6137f885c03d9f894924af9038c734658d56 100644 (file)
@@ -485,7 +485,8 @@ class WindowsConsole(Console):
                 return None
             elif self.__vt_support:
                 # If virtual terminal is enabled, scanning VT sequences
-                self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar)
+                for char in raw_key.encode(self.event_queue.encoding, "replace"):
+                    self.event_queue.push(char)
                 continue
 
             if key_event.dwControlKeyState & ALT_ACTIVE:
index afb557103424a6e688c1ead5c4463413533bd0d5..edfe6ac4748f33736c072a28b16818546c45ef0e 100644 (file)
@@ -53,7 +53,7 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": "b"}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "b")
@@ -63,7 +63,7 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"c": "d"}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "a")
@@ -73,13 +73,13 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": {b"b": "c"}}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertTrue(eq.empty())
-        eq.push("b")
+        eq.push(b"b")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "c")
-        eq.push("d")
+        eq.push(b"d")
         self.assertEqual(eq.events[1].evt, "key")
         self.assertEqual(eq.events[1].data, "d")
 
@@ -88,32 +88,32 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": {b"b": "c"}}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertTrue(eq.empty())
         eq.flush_buf()
-        eq.push("\033")
+        eq.push(b"\033")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\033")
-        eq.push("b")
+        eq.push(b"b")
         self.assertEqual(eq.events[1].evt, "key")
         self.assertEqual(eq.events[1].data, "b")
 
     def test_push_special_key(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("\x1b")
-        eq.push("[")
-        eq.push("A")
+        eq.push(b"\x1b")
+        eq.push(b"[")
+        eq.push(b"A")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\x1b")
 
     def test_push_unrecognized_escape_sequence(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("\x1b")
-        eq.push("[")
-        eq.push("Z")
+        eq.push(b"\x1b")
+        eq.push(b"[")
+        eq.push(b"Z")
         self.assertEqual(len(eq.events), 3)
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\x1b")
@@ -122,12 +122,54 @@ class EventQueueTestBase:
         self.assertEqual(eq.events[2].evt, "key")
         self.assertEqual(eq.events[2].data, "Z")
 
-    def test_push_unicode_character(self):
+    def test_push_unicode_character_as_str(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("ч")
-        self.assertEqual(eq.events[0].evt, "key")
-        self.assertEqual(eq.events[0].data, "ч")
+        with self.assertRaises(AssertionError):
+            eq.push("ч")
+        with self.assertRaises(AssertionError):
+            eq.push("ñ")
+
+    def test_push_unicode_character_two_bytes(self):
+        eq = self.make_eventqueue()
+        eq.keymap = {}
+
+        encoded = "ч".encode(eq.encoding, "replace")
+        self.assertEqual(len(encoded), 2)
+
+        eq.push(encoded[0])
+        e = eq.get()
+        self.assertIsNone(e)
+
+        eq.push(encoded[1])
+        e = eq.get()
+        self.assertEqual(e.evt, "key")
+        self.assertEqual(e.data, "ч")
+
+    def test_push_single_chars_and_unicode_character_as_str(self):
+        eq = self.make_eventqueue()
+        eq.keymap = {}
+
+        def _event(evt, data, raw=None):
+            r = raw if raw is not None else data.encode(eq.encoding)
+            e = Event(evt, data, r)
+            return e
+
+        def _push(keys):
+            for k in keys:
+                eq.push(k)
+
+        self.assertIsInstance("ñ", str)
+
+        # If an exception happens during push, the existing events must be
+        # preserved and we can continue to push.
+        _push(b"b")
+        with self.assertRaises(AssertionError):
+            _push("ñ")
+        _push(b"a")
+
+        self.assertEqual(eq.get(), _event("key", "b"))
+        self.assertEqual(eq.get(), _event("key", "a"))
 
 
 @unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows")
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst
new file mode 100644 (file)
index 0000000..b1223da
--- /dev/null
@@ -0,0 +1,2 @@
+Fix support of unicode characters with two or more codepoints on Windows in
+the new REPL.