--- /dev/null
+Fuzz Tests for CPython
+======================
+
+These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
+
+oss-fuzz works against a library exposing a function of the form
+``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
+that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
+some toy values -- no fuzzing occurs in Python's test suite.
+
+oss-fuzz will regularly pull from CPython, discover all the tests in
+``fuzz_tests.txt``, and run them -- so adding a new test here means it will
+automatically be run in oss-fuzz, while also being smoke-tested as part of
+CPython's test suite.
+
+Adding a new fuzz test
+----------------------
+
+Add the test name on a new line in ``fuzz_tests.txt``.
+
+In ``fuzzer.c``, add a function to be run::
+
+ int $test_name (const char* data, size_t size) {
+ ...
+ return 0;
+ }
+
+
+And invoke it from ``LLVMFuzzerTestOneInput``::
+
+ #if _Py_FUZZ_YES(fuzz_builtin_float)
+ rv |= _run_fuzz(data, size, fuzz_builtin_float);
+ #endif
+
+``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
+``fuzz_tests.txt`` run separately.
+
+What makes a good fuzz test
+---------------------------
+
+Libraries written in C that might handle untrusted data are worthwhile. The
+more complex the logic (e.g. parsing), the more likely this is to be a useful
+fuzz test. See the existing examples for reference, and refer to the
+`oss-fuzz`_ docs.
+
+.. _oss-fuzz: https://github.com/google/oss-fuzz
--- /dev/null
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
+ const char* buf;
+ Py_ssize_t size;
+ if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
+ return NULL;
+ }
+ int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+ if (rv != 0) {
+ // Nonzero return codes are reserved for future use.
+ PyErr_Format(
+ PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef module_methods[] = {
+ {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
+ {NULL},
+};
+
+static struct PyModuleDef _fuzzmodule = {
+ PyModuleDef_HEAD_INIT,
+ "_fuzz",
+ NULL,
+ 0,
+ module_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyMODINIT_FUNC
+PyInit__xxtestfuzz(void)
+{
+ PyObject *m = NULL;
+
+ if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
+ return NULL;
+ }
+ return m;
+}
--- /dev/null
+/* A fuzz test for CPython.
+
+ The only exposed function is LLVMFuzzerTestOneInput, which is called by
+ fuzzers and by the _fuzz module for smoke tests.
+
+ To build exactly one fuzz test, as when running in oss-fuzz etc.,
+ build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
+ LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
+ -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
+
+ See the source code for LLVMFuzzerTestOneInput for details. */
+
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/* Fuzz PyFloat_FromString as a proxy for float(str). */
+static int fuzz_builtin_float(const char* data, size_t size) {
+ PyObject* s = PyBytes_FromStringAndSize(data, size);
+ if (s == NULL) return 0;
+ PyObject* f = PyFloat_FromString(s);
+ if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
+
+ Py_XDECREF(f);
+ Py_DECREF(s);
+ return 0;
+}
+
+/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
+static int fuzz_builtin_int(const char* data, size_t size) {
+ /* Pick a random valid base. (When the fuzzed function takes extra
+ parameters, it's somewhat normal to hash the input to generate those
+ parameters. We want to exercise all code paths, so we do so here.) */
+ int base = _Py_HashBytes(data, size) % 37;
+ if (base == 1) {
+ // 1 is the only number between 0 and 36 that is not a valid base.
+ base = 0;
+ }
+ if (base == -1) {
+ return 0; // An error occurred, bail early.
+ }
+ if (base < 0) {
+ base = -base;
+ }
+
+ PyObject* s = PyUnicode_FromStringAndSize(data, size);
+ if (s == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ }
+ return 0;
+ }
+ PyObject* l = PyLong_FromUnicodeObject(s, base);
+ if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
+ PyErr_Clear();
+ Py_XDECREF(l);
+ Py_DECREF(s);
+ return 0;
+}
+
+/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
+static int fuzz_builtin_unicode(const char* data, size_t size) {
+ PyObject* s = PyUnicode_FromStringAndSize(data, size);
+ if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ }
+ Py_XDECREF(s);
+ return 0;
+}
+
+/* Run fuzzer and abort on failure. */
+static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
+ int rv = fuzzer((const char*) data, size);
+ if (PyErr_Occurred()) {
+ /* Fuzz tests should handle expected errors for themselves.
+ This is last-ditch check in case they didn't. */
+ PyErr_Print();
+ abort();
+ }
+ /* Someday the return value might mean something, propagate it. */
+ return rv;
+}
+
+/* CPython generates a lot of leak warnings for whatever reason. */
+int __lsan_is_turned_off(void) { return 1; }
+
+/* Fuzz test interface.
+ This returns the bitwise or of all fuzz test's return values.
+
+ All fuzz tests must return 0, as all nonzero return codes are reserved for
+ future use -- we propagate the return values for that future case.
+ (And we bitwise or when running multiple tests to verify that normally we
+ only return 0.) */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ if (!Py_IsInitialized()) {
+ /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
+ with no separate initialization phase, sadly, so we need to
+ initialize CPython ourselves on the first run. */
+ Py_InitializeEx(0);
+ }
+
+ int rv = 0;
+
+#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
+#if _Py_FUZZ_YES(fuzz_builtin_float)
+ rv |= _run_fuzz(data, size, fuzz_builtin_float);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_int)
+ rv |= _run_fuzz(data, size, fuzz_builtin_int);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_unicode)
+ rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#undef _Py_FUZZ_YES
+ return rv;
+}