From: Raymond Hettinger Date: Thu, 29 Aug 2019 16:02:58 +0000 (-0700) Subject: bpo-8425: Fast path for set inplace difference when the second set is large (GH-15590) X-Git-Tag: v3.9.0a1~733 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=88ea166dadb8aeb34541a0a464662dea222629e5;p=thirdparty%2FPython%2Fcpython.git bpo-8425: Fast path for set inplace difference when the second set is large (GH-15590) --- diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst new file mode 100644 index 000000000000..8e5ec0bfe874 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst @@ -0,0 +1,3 @@ +Optimize set difference_update for the case when the other set is much +larger than the base set. (Suggested by Evgeny Kapun with code contributed +by Michele Orrù). diff --git a/Objects/setobject.c b/Objects/setobject.c index 56858dbccfe1..fafc2fa9e46d 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1463,9 +1463,25 @@ set_difference_update_internal(PySetObject *so, PyObject *other) setentry *entry; Py_ssize_t pos = 0; + /* Optimization: When the other set is more than 8 times + larger than the base set, replace the other set with + interesection of the two sets. + */ + if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) { + other = set_intersection(so, other); + if (other == NULL) + return -1; + } else { + Py_INCREF(other); + } + while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry->key, entry->hash) < 0) + if (set_discard_entry(so, entry->key, entry->hash) < 0) { + Py_DECREF(other); return -1; + } + + Py_DECREF(other); } else { PyObject *key, *it; it = PyObject_GetIter(other);