From: Gregory P. Smith Date: Tue, 11 Dec 2012 02:15:46 +0000 (-0800) Subject: Fix the internals of our hash functions to used unsigned values during hash X-Git-Tag: v3.2.4rc1~274 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=27cbcd6241d787b5e99c6ed05ec8377051f397aa;p=thirdparty%2FPython%2Fcpython.git Fix the internals of our hash functions to used unsigned values during hash computation as the overflow behavior of signed integers is undefined. In practice we require compiling everything with -fwrapv which forces overflow to be defined as twos compliment but this keeps the code cleaner for checkers or in the case where someone has compiled it without -fwrapv or their compiler's equivalent. Found by Clang trunk's Undefined Behavior Sanitizer (UBSan). Cleanup only - no functionality or hash values change. --- diff --git a/Include/pyport.h b/Include/pyport.h index 9666cf28f972..c74ff9c7a4b4 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -145,7 +145,7 @@ Used in: PY_LONG_LONG #endif /* Prime multiplier used in string and various other hashes. */ -#define _PyHASH_MULTIPLIER 1000003 /* 0xf4243 */ +#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ /* Parameters used for the numeric hash implementation. See notes for _PyHash_Double in Objects/object.c. Numeric hashes are based on diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 796e400a8220..ef3a5a12a236 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -873,7 +873,7 @@ bytes_hash(PyBytesObject *a) { register Py_ssize_t len; register unsigned char *p; - register Py_hash_t x; + register Py_uhash_t x; /* Unsigned for defined overflow behavior. */ #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); diff --git a/Objects/setobject.c b/Objects/setobject.c index 3abeefb97d94..d8401f4ae746 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -77,7 +77,7 @@ NULL if the rich comparison returns an error. static setentry * set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; /* Unsigned for defined overflow behavior. */ register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -159,7 +159,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; /* Unsigned for defined overflow behavior. */ register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -768,7 +768,7 @@ static Py_hash_t frozenset_hash(PyObject *self) { PySetObject *so = (PySetObject *)self; - Py_hash_t h, hash = 1927868237L; + Py_uhash_t h, hash = 1927868237UL; setentry *entry; Py_ssize_t pos = 0; @@ -783,11 +783,11 @@ frozenset_hash(PyObject *self) hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u; + hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL; } - hash = hash * 69069L + 907133923L; + hash = hash * 69069UL + 907133923UL; if (hash == -1) - hash = 590923713L; + hash = 590923713UL; so->hash = hash; return hash; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index b3454600c9ba..c725227979bc 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -315,11 +315,11 @@ error: static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_hash_t x, y; + register Py_uhash_t x, y; /* Unsigned for defined overflow behavior. */ register Py_ssize_t len = Py_SIZE(v); register PyObject **p; - Py_hash_t mult = _PyHASH_MULTIPLIER; - x = 0x345678L; + Py_uhash_t mult = _PyHASH_MULTIPLIER; + x = 0x345678UL; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); @@ -327,9 +327,9 @@ tuplehash(PyTupleObject *v) return -1; x = (x ^ y) * mult; /* the cast might truncate len; that doesn't change hash stability */ - mult += (Py_hash_t)(82520L + len + len); + mult += (Py_uhash_t)(82520UL + len + len); } - x += 97531L; + x += 97531UL; if (x == -1) x = -2; return x; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 565d29827081..bb45b20b90cd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7686,7 +7686,7 @@ unicode_hash(PyUnicodeObject *self) { Py_ssize_t len; Py_UNICODE *p; - Py_hash_t x; + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized);