[3.13] gh-119614: Fix truncation of strings with embedded null characters in Tkinter...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Mon, 24 Jun 2024 09:45:45 +0000 (11:45 +0200)

committer GitHub <noreply@github.com>

Mon, 24 Jun 2024 09:45:45 +0000 (09:45 +0000)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 24 Jun 2024 09:45:45 +0000 (11:45 +0200)
committer GitHub <noreply@github.com>
Mon, 24 Jun 2024 09:45:45 +0000 (09:45 +0000)
diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py

index 443787d721d5fbf1b10942cdf2a3889b862c904e..d479f7d7515d9b00e44efabe3a393aebd56e1362 100644 (file)
--- a/Lib/test/test_tcl.py
+++ b/Lib/test/test_tcl.py
@@ -73,6 +73,18 @@ class TclTest(unittest.TestCase):
          tcl.call('set','a','1')
          self.assertEqual(tcl.call('set','a'),'1')
  
+    def test_call_passing_null(self):
+        tcl = self.interp
+        tcl.call('set', 'a', 'a\0b')  # ASCII-only
+        self.assertEqual(tcl.getvar('a'), 'a\x00b')
+        self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
+        self.assertEqual(tcl.eval('set a'), 'a\x00b')
+
+        tcl.call('set', 'a', '\u20ac\0')  # non-ASCII
+        self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
+        self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
+        self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
+
      def testCallException(self):
          tcl = self.interp
          self.assertRaises(TclError,tcl.call,'set','a')
@@ -98,6 +110,18 @@ class TclTest(unittest.TestCase):
          tcl.setvar('a','1')
          self.assertEqual(tcl.eval('set a'),'1')
  
+    def test_setvar_passing_null(self):
+        tcl = self.interp
+        tcl.setvar('a', 'a\0b')  # ASCII-only
+        self.assertEqual(tcl.getvar('a'), 'a\x00b')
+        self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
+        self.assertEqual(tcl.eval('set a'), 'a\x00b')
+
+        tcl.setvar('a', '\u20ac\0')  # non-ASCII
+        self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
+        self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
+        self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
+
      def testSetVarArray(self):
          tcl = self.interp
          tcl.setvar('a(1)','1')
diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py

index d9ea642881a1790c6d72b18677e238ca10b9bf15..b0b9ed600404439324fdf46d28a719288945adb8 100644 (file)
--- a/Lib/test/test_tkinter/test_misc.py
+++ b/Lib/test/test_tkinter/test_misc.py
@@ -476,6 +476,15 @@ class MiscTest(AbstractTkTest, unittest.TestCase):
              self.assertEqual(vi.micro, 0)
          self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
  
+    def test_embedded_null(self):
+        widget = tkinter.Entry(self.root)
+        widget.insert(0, 'abc\0def')  # ASCII-only
+        widget.selection_range(0, 'end')
+        self.assertEqual(widget.selection_get(), 'abc\x00def')
+        widget.insert(0, '\u20ac\0')  # non-ASCII
+        widget.selection_range(0, 'end')
+        self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
+
  
  class WmTest(AbstractTkTest, unittest.TestCase):
  
diff --git a/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst b/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst

new file mode 100644 (file)

index 0000000..d518265
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst
@@ -0,0 +1,2 @@
+Fix truncation of strings with embedded null characters in some internal
+operations in :mod:`tkinter`.
diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c

index 8fe2c5ba295148445af1125a446c360f5102e436..cd3722f54c24ce629eaaa1c3c5daa44418de335e 100644 (file)
--- a/Modules/_tkinter.c
+++ b/Modules/_tkinter.c
@@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
          else
              Py_UNREACHABLE();
      }
-#endif
+#endif /* USE_TCL_UNICODE */
      const char *s = Tcl_GetStringFromObj(value, &len);
      return unicodeFromTclStringAndSize(s, len);
  }
@@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
              PyErr_SetString(PyExc_OverflowError, "string is too long");
              return NULL;
          }
-        if (PyUnicode_IS_ASCII(value)) {
+        if (PyUnicode_IS_ASCII(value) &&
+            strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
+        {
              return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
                                      (int)size);
          }
@@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
                      "surrogatepass", NATIVE_BYTEORDER);
          else
              Py_UNREACHABLE();
-#else
-        encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
-#endif
          if (!encoded) {
              return NULL;
          }
@@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
              PyErr_SetString(PyExc_OverflowError, "string is too long");
              return NULL;
          }
-#if USE_TCL_UNICODE
          result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
                                     (int)(size / sizeof(Tcl_UniChar)));
  #else
+        encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
+        if (!encoded) {
+            return NULL;
+        }
+        size = PyBytes_GET_SIZE(encoded);
+        if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
+            /* The string contains embedded null characters.
+             * Tcl needs a null character to be represented as \xc0\x80 in
+             * the Modified UTF-8 encoding.  Otherwise the string can be
+             * truncated in some internal operations.
+             *
+             * NOTE: stringlib_replace() could be used here, but optimizing
+             * this obscure case isn't worth it unless stringlib_replace()
+             * was already exposed in the C API for other reasons. */
+            Py_SETREF(encoded,
+                      PyObject_CallMethod(encoded, "replace", "y#y#",
+                                          "\0", (Py_ssize_t)1,
+                                          "\xc0\x80", (Py_ssize_t)2));
+            if (!encoded) {
+                return NULL;
+            }
+            size = PyBytes_GET_SIZE(encoded);
+        }
+        if (size > INT_MAX) {
+            Py_DECREF(encoded);
+            PyErr_SetString(PyExc_OverflowError, "string is too long");
+            return NULL;
+        }
          result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
-#endif
+#endif /* USE_TCL_UNICODE */
          Py_DECREF(encoded);
          return result;
      }
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Mon, 24 Jun 2024 09:45:45 +0000 (11:45 +0200)
committer	GitHub <noreply@github.com>
	Mon, 24 Jun 2024 09:45:45 +0000 (09:45 +0000)
Lib/test/test_tcl.py		patch \| blob \| blame \| history
Lib/test/test_tkinter/test_misc.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-06-23-17-50-40.gh-issue-119614.vwPGLB.rst	[new file with mode: 0644]	patch \| blob
Modules/_tkinter.c		patch \| blob \| blame \| history