]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) (#104195)
authorLysandros Nikolaou <lisandrosnik@gmail.com>
Sun, 7 May 2023 10:12:04 +0000 (12:12 +0200)
committerGitHub <noreply@github.com>
Sun, 7 May 2023 10:12:04 +0000 (11:12 +0100)
Include/cpython/fileobject.h
Lib/test/test_ast.py
Lib/test/test_builtin.py
Lib/test/test_cmd_line_script.py
Lib/test/test_compile.py
Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst [new file with mode: 0644]
Objects/fileobject.c
Parser/tokenizer.c
Python/pythonrun.c

index cff2243d625e767381b9a4a2aa39d7076f273f08..b70ec318986d821580c9c76a6de0ec42fab25256 100644 (file)
@@ -3,6 +3,7 @@
 #endif
 
 PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
+PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);
 
 /* The std printer acts as a preliminary sys.stderr until the new io
    infrastructure is in place. */
index b4ec1fef5ade054eceb04acbf0282a61ec8253fa..7d9d0c431a66ff50a13a1d84bdac6ca70e4856b0 100644 (file)
@@ -857,6 +857,10 @@ class AST_Tests(unittest.TestCase):
         check_limit("a", "[0]")
         check_limit("a", "*a")
 
+    def test_null_bytes(self):
+        with self.assertRaises(SyntaxError,
+            msg="source code string cannot contain null bytes"):
+            ast.parse("a\0b")
 
 class ASTHelpers_Test(unittest.TestCase):
     maxDiff = None
index 0f6d2db0ecb6d4ec3e8b41b9a35041bb4d97fe28..9078c409cc44b22649e3f68145e933427604bfc5 100644 (file)
@@ -334,11 +334,10 @@ class BuiltinTest(unittest.TestCase):
         self.assertRaises(TypeError, compile)
         self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
         self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
-        self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
         self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
                           mode='eval', source='0', filename='tmp')
         compile('print("\xe5")\n', '', 'exec')
-        self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
+        self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
         self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
 
         # test the optimize argument
index 4dadbc0b64bdb791f86d8b5aca3d621b310fe1c0..d10012759c370bb0a90cf1863f968c6ebe231c8c 100644 (file)
@@ -657,6 +657,31 @@ class CmdLineTest(unittest.TestCase):
                 ],
             )
 
+    def test_syntaxerror_null_bytes(self):
+        script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
+        with os_helper.temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, 'script', script)
+            exitcode, stdout, stderr = assert_python_failure(script_name)
+            self.assertEqual(
+                stderr.splitlines()[-2:],
+                [   b"    x = '",
+                    b'SyntaxError: source code cannot contain null bytes'
+                ],
+            )
+
+    def test_syntaxerror_null_bytes_in_multiline_string(self):
+        scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
+        with os_helper.temp_dir() as script_dir:
+            for script in scripts:
+                script_name = _make_test_script(script_dir, 'script', script)
+                _, _, stderr = assert_python_failure(script_name)
+                self.assertEqual(
+                    stderr.splitlines()[-2:],
+                    [   b"    multilinestring",
+                        b'SyntaxError: source code cannot contain null bytes'
+                    ]
+                )
+
     def test_consistent_sys_path_for_direct_execution(self):
         # This test case ensures that the following all give the same
         # sys.path configuration:
index 54e90663ab51000dc38609eb2eb860e3c838adf1..c96ae4375df88337aaec41c31f23703ad0e0644c 100644 (file)
@@ -542,7 +542,7 @@ if 1:
             with open(fn, "wb") as fp:
                 fp.write(src)
             res = script_helper.run_python_until_end(fn)[0]
-        self.assertIn(b"Non-UTF-8", res.err)
+        self.assertIn(b"source code cannot contain null bytes", res.err)
 
     def test_yet_more_evil_still_undecodable(self):
         # Issue #25388
@@ -552,7 +552,7 @@ if 1:
             with open(fn, "wb") as fp:
                 fp.write(src)
             res = script_helper.run_python_until_end(fn)[0]
-        self.assertIn(b"Non-UTF-8", res.err)
+        self.assertIn(b"source code cannot contain null bytes", res.err)
 
     @support.cpython_only
     def test_compiler_recursion_limit(self):
@@ -588,9 +588,9 @@ if 1:
     def test_null_terminated(self):
         # The source code is null-terminated internally, but bytes-like
         # objects are accepted, which could be not terminated.
-        with self.assertRaisesRegex(ValueError, "cannot contain null"):
+        with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
             compile("123\x00", "<dummy>", "eval")
-        with self.assertRaisesRegex(ValueError, "cannot contain null"):
+        with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
             compile(memoryview(b"123\x00"), "<dummy>", "eval")
         code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
         self.assertEqual(eval(code), 23)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst b/Misc/NEWS.d/next/Core and Builtins/2022-09-27-11-59-13.gh-issue-96670.XrBBit.rst
new file mode 100644 (file)
index 0000000..eea90e7
--- /dev/null
@@ -0,0 +1,2 @@
+The parser now raises :exc:`SyntaxError` when parsing source code containing
+null bytes. Backported from ``aab01e3``. Patch by Pablo Galindo
index 8dba5b9aea6db18f9c66b578414cec610929c5ba..ffe55eb7c3730e9731f1bf49aa7cd5b43605513c 100644 (file)
@@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
     return 1;
 }
 
-/*
-** Py_UniversalNewlineFgets is an fgets variation that understands
-** all of \r, \n and \r\n conventions.
-** The stream should be opened in binary mode.
-** The fobj parameter exists solely for legacy reasons and must be NULL.
-** Note that we need no error handling: fgets() treats error and eof
-** identically.
-*/
 char *
-Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
 {
     char *p = buf;
     int c;
@@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
     }
     FUNLOCKFILE(stream);
     *p = '\0';
-    if (p == buf)
+    if (p == buf) {
         return NULL;
+    }
+    *size = p - buf;
     return buf;
 }
 
+/*
+** Py_UniversalNewlineFgets is an fgets variation that understands
+** all of \r, \n and \r\n conventions.
+** The stream should be opened in binary mode.
+** The fobj parameter exists solely for legacy reasons and must be NULL.
+** Note that we need no error handling: fgets() treats error and eof
+** identically.
+*/
+
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
+    size_t size;
+    return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
+}
+
 /* **************************** std printer ****************************
  * The stdprinter is used during the boot strapping phase as a preliminary
  * file like object for sys.stderr.
index ca11c7bebb4eb1944badd54b11048bcd69fcf458..b552b4171888f5a7d3896f30a0d256bd669420a6 100644 (file)
@@ -376,6 +376,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
     return 1;
 }
 
+static inline int
+contains_null_bytes(const char* str, size_t size) {
+    return memchr(str, 0, size) != NULL;
+}
+
 static int
 tok_readline_recode(struct tok_state *tok) {
     PyObject *line;
@@ -831,9 +836,9 @@ tok_readline_raw(struct tok_state *tok)
         if (!tok_reserve_buf(tok, BUFSIZ)) {
             return 0;
         }
-        char *line = Py_UniversalNewlineFgets(tok->inp,
-                                              (int)(tok->end - tok->inp),
-                                              tok->fp, NULL);
+        int n_chars = (int)(tok->end - tok->inp);
+        size_t line_size = 0;
+        char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
         if (line == NULL) {
             return 1;
         }
@@ -841,7 +846,7 @@ tok_readline_raw(struct tok_state *tok)
             tok_concatenate_interactive_new_line(tok, line) == -1) {
             return 0;
         }
-        tok->inp = strchr(tok->inp, '\0');
+        tok->inp += line_size;
         if (tok->inp == tok->buf) {
             return 0;
         }
@@ -1078,6 +1083,12 @@ tok_nextc(struct tok_state *tok)
             return EOF;
         }
         tok->line_start = tok->cur;
+
+        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
+            syntaxerror(tok, "source code cannot contain null bytes");
+            tok->cur = tok->inp;
+            return EOF;
+        }
     }
     Py_UNREACHABLE();
 }
@@ -1987,8 +1998,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         /* Get rest of string */
         while (end_quote_size != quote_size) {
             c = tok_nextc(tok);
-            if (tok->done == E_DECODE)
+            if (tok->done == E_ERROR) {
+                return ERRORTOKEN;
+            }
+            if (tok->done == E_DECODE) {
                 break;
+            }
             if (c == EOF || (quote_size == 1 && c == '\n')) {
                 assert(tok->multi_line_start != NULL);
                 // shift the tok_state's location into
index f12b9f6e9539a29245dd44d7f4f16d18c3b7bdb8..efa22b07256c82c87c9f84ae85fad702d8cc37f8 100644 (file)
@@ -1859,7 +1859,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
     }
 
     if (strlen(str) != (size_t)size) {
-        PyErr_SetString(PyExc_ValueError,
+        PyErr_SetString(PyExc_SyntaxError,
             "source code string cannot contain null bytes");
         Py_CLEAR(*cmd_copy);
         return NULL;