]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-131878: Fix input of unicode characters with two or more code points in...
authorŁukasz Langa <lukasz@langa.pl>
Mon, 5 May 2025 21:08:09 +0000 (23:08 +0200)
committerGitHub <noreply@github.com>
Mon, 5 May 2025 21:08:09 +0000 (23:08 +0200)
(cherry picked from commit 0c5151bc81ec8e8588bef4389df12a9ab50e9fa0)

Co-authored-by: Sergey Miryanov <sergey.miryanov@gmail.com>
Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
Lib/_pyrepl/base_eventqueue.py
Lib/_pyrepl/windows_console.py
Lib/test/test_pyrepl/test_eventqueue.py
Misc/NEWS.d/next/Core and Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst [new file with mode: 0644]

index e018c4fc18308e31eea009788e5144c73a96c62b..842599bd1877fb4ec9d09e18917f419d0d1d77ef 100644 (file)
@@ -69,18 +69,14 @@ class BaseEventQueue:
         trace('added event {event}', event=event)
         self.events.append(event)
 
-    def push(self, char: int | bytes | str) -> None:
+    def push(self, char: int | bytes) -> None:
         """
         Processes a character by updating the buffer and handling special key mappings.
         """
+        assert isinstance(char, (int, bytes))
         ord_char = char if isinstance(char, int) else ord(char)
-        if ord_char > 255:
-            assert isinstance(char, str)
-            char = bytes(char.encode(self.encoding, "replace"))
-            self.buf.extend(char)
-        else:
-            char = bytes(bytearray((ord_char,)))
-            self.buf.append(ord_char)
+        char = ord_char.to_bytes()
+        self.buf.append(ord_char)
 
         if char in self.keymap:
             if self.keymap is self.compiled_keymap:
index da1452e59897d6dc7a0d74755b1f29b2f31e5a49..05df4f6a19e43dc1f88f8da46bc7c77376858d37 100644 (file)
@@ -469,7 +469,8 @@ class WindowsConsole(Console):
                 return None
             elif self.__vt_support:
                 # If virtual terminal is enabled, scanning VT sequences
-                self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar)
+                for char in raw_key.encode(self.event_queue.encoding, "replace"):
+                    self.event_queue.push(char)
                 continue
 
             if key_event.dwControlKeyState & ALT_ACTIVE:
index b25bdb956b0d144b317d3abd145415354dfbad9e..6ba2440426d8c029ccf6ec5e45c44f4ce1a29425 100644 (file)
@@ -54,7 +54,7 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": "b"}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "b")
@@ -64,7 +64,7 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"c": "d"}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "a")
@@ -74,13 +74,13 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": {b"b": "c"}}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertTrue(eq.empty())
-        eq.push("b")
+        eq.push(b"b")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "c")
-        eq.push("d")
+        eq.push(b"d")
         self.assertEqual(eq.events[1].evt, "key")
         self.assertEqual(eq.events[1].data, "d")
 
@@ -89,32 +89,32 @@ class EventQueueTestBase:
         mock_keymap.compile_keymap.return_value = {"a": "b"}
         eq = self.make_eventqueue()
         eq.keymap = {b"a": {b"b": "c"}}
-        eq.push("a")
+        eq.push(b"a")
         mock_keymap.compile_keymap.assert_called()
         self.assertTrue(eq.empty())
         eq.flush_buf()
-        eq.push("\033")
+        eq.push(b"\033")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\033")
-        eq.push("b")
+        eq.push(b"b")
         self.assertEqual(eq.events[1].evt, "key")
         self.assertEqual(eq.events[1].data, "b")
 
     def test_push_special_key(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("\x1b")
-        eq.push("[")
-        eq.push("A")
+        eq.push(b"\x1b")
+        eq.push(b"[")
+        eq.push(b"A")
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\x1b")
 
     def test_push_unrecognized_escape_sequence(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("\x1b")
-        eq.push("[")
-        eq.push("Z")
+        eq.push(b"\x1b")
+        eq.push(b"[")
+        eq.push(b"Z")
         self.assertEqual(len(eq.events), 3)
         self.assertEqual(eq.events[0].evt, "key")
         self.assertEqual(eq.events[0].data, "\x1b")
@@ -123,12 +123,54 @@ class EventQueueTestBase:
         self.assertEqual(eq.events[2].evt, "key")
         self.assertEqual(eq.events[2].data, "Z")
 
-    def test_push_unicode_character(self):
+    def test_push_unicode_character_as_str(self):
         eq = self.make_eventqueue()
         eq.keymap = {}
-        eq.push("ч")
-        self.assertEqual(eq.events[0].evt, "key")
-        self.assertEqual(eq.events[0].data, "ч")
+        with self.assertRaises(AssertionError):
+            eq.push("ч")
+        with self.assertRaises(AssertionError):
+            eq.push("ñ")
+
+    def test_push_unicode_character_two_bytes(self):
+        eq = self.make_eventqueue()
+        eq.keymap = {}
+
+        encoded = "ч".encode(eq.encoding, "replace")
+        self.assertEqual(len(encoded), 2)
+
+        eq.push(encoded[0])
+        e = eq.get()
+        self.assertIsNone(e)
+
+        eq.push(encoded[1])
+        e = eq.get()
+        self.assertEqual(e.evt, "key")
+        self.assertEqual(e.data, "ч")
+
+    def test_push_single_chars_and_unicode_character_as_str(self):
+        eq = self.make_eventqueue()
+        eq.keymap = {}
+
+        def _event(evt, data, raw=None):
+            r = raw if raw is not None else data.encode(eq.encoding)
+            e = Event(evt, data, r)
+            return e
+
+        def _push(keys):
+            for k in keys:
+                eq.push(k)
+
+        self.assertIsInstance("ñ", str)
+
+        # If an exception happens during push, the existing events must be
+        # preserved and we can continue to push.
+        _push(b"b")
+        with self.assertRaises(AssertionError):
+            _push("ñ")
+        _push(b"a")
+
+        self.assertEqual(eq.get(), _event("key", "b"))
+        self.assertEqual(eq.get(), _event("key", "a"))
 
 
 @unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows")
diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst b/Misc/NEWS.d/next/Core and Builtins/2025-03-30-19-49-00.gh-issue-131878.J8_cHB.rst
new file mode 100644 (file)
index 0000000..b1223da
--- /dev/null
@@ -0,0 +1,2 @@
+Fix support of unicode characters with two or more codepoints on Windows in
+the new REPL.