]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Add a fuzzer for `Py_CompileStringExFlags` (#111721)
authorBrad Larsen <brad@bradfordlarsen.com>
Sun, 10 Dec 2023 17:16:15 +0000 (12:16 -0500)
committerGitHub <noreply@github.com>
Sun, 10 Dec 2023 17:16:15 +0000 (12:16 -0500)
Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py [new file with mode: 0644]
Modules/_xxtestfuzz/fuzz_tests.txt
Modules/_xxtestfuzz/fuzzer.c
Tools/c-analyzer/cpython/ignored.tsv

diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict
new file mode 100644 (file)
index 0000000..c6a44d9
--- /dev/null
@@ -0,0 +1,165 @@
+# bits of syntax
+"( "
+") "
+"[ "
+"] "
+": "
+", "
+"; "
+"{ "
+"} "
+
+# operators
+"+ "
+"- "
+"* "
+"** "
+"/ "
+"// "
+"| "
+"& "
+"< "
+"> "
+"= "
+". "
+"% "
+"` "
+"^ "
+"~ "
+"@ "
+"== "
+"!= "
+"<> "
+"<< "
+"<= "
+">= "
+">> "
+"+= "
+"-= "
+"*= "
+"** "
+"/= "
+"//= "
+"|= "
+"%= "
+"&= "
+"^= "
+"<<= "
+">>= "
+"**= "
+":= "
+"@= "
+
+# whitespace
+"  "
+":\\n  "
+
+# type signatures and functions
+"-> "
+": List[int]"
+": Dict[int, str]"
+
+"# type:"
+"# type: List[int]"
+"# type: Dict[int, str]"
+
+", *"
+", /"
+", *args"
+", **kwargs"
+", x=42"
+
+
+# literals
+"0x0a"
+"0b0000"
+"42"
+"0o70"
+"42j"
+"42.01"
+"-5"
+"+42e-3"
+"0_0_0"
+"1e1_0"
+".1_4"
+
+"{}"
+
+# variable names
+"x"
+"y"
+
+# strings
+"r'x'"
+
+"b'x'"
+
+"rb\"x\""
+
+"br\"x\""
+
+"f'{x + 5}'"
+"f\"{x + 5}\""
+
+"'''"
+"\"\"\""
+
+"\\u"
+"\\x"
+
+# keywords
+"def "
+"del "
+"pass "
+"break "
+"continue "
+"return "
+"raise "
+"from "
+"import "
+".. "
+"... "
+"__future__ "
+"as "
+"global "
+"nonlocal "
+"assert "
+"print "
+"if "
+"elif "
+"else: "
+"while "
+"try: "
+"except "
+"finally: "
+"with "
+"lambda "
+"or "
+"and "
+"not "
+"None "
+"__peg_parser__"
+"True "
+"False "
+"yield "
+"async "
+"await "
+"for "
+"in "
+"is "
+"class "
+
+# shebangs and encodings
+"#!"
+"# coding:"
+"# coding="
+"# coding: latin-1"
+"# coding=latin-1"
+"# coding: utf-8"
+"# coding=utf-8"
+"# coding: ascii"
+"# coding=ascii"
+"# coding: cp860"
+"# coding=cp860"
+"# coding: gbk"
+"# coding=gbk"
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py
new file mode 100644 (file)
index 0000000..c43994d
--- /dev/null
@@ -0,0 +1,7 @@
+from __future__ import annotations
+
+def test() -> None:
+    x: list[int] = []
+    x: dict[int, str] = {}
+    x: set[bytes] = {}
+    print(5 + 42 * 3, x)
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py
new file mode 100644 (file)
index 0000000..7be326e
--- /dev/null
@@ -0,0 +1,5 @@
+class Foo(metaclass=42):
+    __slots__ = ['x']
+    pass
+
+foo = Foo()
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py
new file mode 100644 (file)
index 0000000..9bc3a45
--- /dev/null
@@ -0,0 +1,6 @@
+def evens():
+    i = 0
+    while True:
+        i += 1
+        if i % 2 == 0:
+            yield i
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py
new file mode 100644 (file)
index 0000000..490de90
--- /dev/null
@@ -0,0 +1,3 @@
+async def hello(name: str):
+    await name
+    print(name)
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py
new file mode 100644 (file)
index 0000000..4cfcfe5
--- /dev/null
@@ -0,0 +1,7 @@
+try:
+    eval('importer exporter... really long matches')
+except SyntaxError:
+    print("nothing to see here")
+finally:
+    print("all done here")
+    raise
diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py
new file mode 100644 (file)
index 0000000..d8e59ad
--- /dev/null
@@ -0,0 +1,8 @@
+"""Some module docstring"""
+import sys
+
+def main():
+    print("Hello world!", file=sys.stderr)
+
+if __name__ == '__main__':
+    main()
index 40aa22110e7d2728d6b7cd66b47a0c4424c84381..ea6f982eefc9da1f33034b86d4d4008132137eb3 100644 (file)
@@ -8,3 +8,4 @@ fuzz_csv_reader
 fuzz_struct_unpack
 fuzz_ast_literal_eval
 fuzz_elementtree_parsewhole
+fuzz_pycompile
index 77d29ce773a04b4d6f1b823900d1747d4421cdda..e133b4d3c4448096237d6692aa47618dab192eff 100644 (file)
@@ -501,6 +501,63 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) {
     return 0;
 }
 
+#define MAX_PYCOMPILE_TEST_SIZE 16384
+static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE];
+
+static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input};
+const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]);
+
+static const int optimize_vals[] = {-1, 0, 1, 2};
+const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]);
+
+/* Fuzz `PyCompileStringExFlags` using a variety of input parameters.
+ * That function is essentially behind the `compile` builtin */
+static int fuzz_pycompile(const char* data, size_t size) {
+    // Ignore overly-large inputs, and account for a NUL terminator
+    if (size > MAX_PYCOMPILE_TEST_SIZE - 1) {
+        return 0;
+    }
+
+    // Need 2 bytes for parameter selection
+    if (size < 2) {
+        return 0;
+    }
+
+    // Use first byte to determine element of `start_vals` to use
+    unsigned char start_idx = (unsigned char) data[0];
+    int start = start_vals[start_idx % NUM_START_VALS];
+
+    // Use second byte to determine element of `optimize_vals` to use
+    unsigned char optimize_idx = (unsigned char) data[1];
+    int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS];
+
+    // Create a NUL-terminated C string from the remaining input
+    memcpy(pycompile_scratch, data + 2, size - 2);
+    // Put a NUL terminator just after the copied data. (Space was reserved already.)
+    pycompile_scratch[size - 2] = '\0';
+
+    // XXX: instead of always using NULL for the `flags` value to
+    // `Py_CompileStringExFlags`, there are many flags that conditionally
+    // change parser behavior:
+    //
+    //     #define PyCF_TYPE_COMMENTS 0x1000
+    //     #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
+    //     #define PyCF_ONLY_AST 0x0400
+    //
+    // It would be good to test various combinations of these, too.
+    PyCompilerFlags *flags = NULL;
+
+    PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "<fuzz input>", start, flags, optimize);
+    if (result == NULL) {
+        /* compilation failed, most likely from a syntax error */
+        PyErr_Clear();
+    } else {
+        Py_DECREF(result);
+    }
+
+    return 0;
+}
+
 /* Run fuzzer and abort on failure. */
 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     int rv = fuzzer((const char*) data, size);
@@ -642,6 +699,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     }
 
     rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile)
+    rv |= _run_fuzz(data, size, fuzz_pycompile);
 #endif
   return rv;
 }
index d59e0ddcdfde4ed4ab599c583db69f060d993897..ff6e1ef4f993bae89805a4ab16e1e12675eb03a3 100644 (file)
@@ -599,6 +599,9 @@ Modules/_xxtestfuzz/fuzzer.c        -       re_error_exception      -
 Modules/_xxtestfuzz/fuzzer.c   -       struct_error    -
 Modules/_xxtestfuzz/fuzzer.c   -       struct_unpack_method    -
 Modules/_xxtestfuzz/fuzzer.c   -       xmlparser_type  -
+Modules/_xxtestfuzz/fuzzer.c   -       pycompile_scratch       -
+Modules/_xxtestfuzz/fuzzer.c   -       start_vals      -
+Modules/_xxtestfuzz/fuzzer.c   -       optimize_vals   -
 Modules/_xxtestfuzz/fuzzer.c   LLVMFuzzerTestOneInput  CSV_READER_INITIALIZED  -
 Modules/_xxtestfuzz/fuzzer.c   LLVMFuzzerTestOneInput  JSON_LOADS_INITIALIZED  -
 Modules/_xxtestfuzz/fuzzer.c   LLVMFuzzerTestOneInput  SRE_COMPILE_INITIALIZED -