From: Gregory P. Smith Date: Tue, 11 Dec 2012 02:32:53 +0000 (-0800) Subject: Fix the internals of our hash functions to used unsigned values during hash X-Git-Tag: v3.3.1rc1~539 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c2176e46d797c8aea60cdcd2ddf44f6bb88394b4;p=thirdparty%2FPython%2Fcpython.git Fix the internals of our hash functions to used unsigned values during hash computation as the overflow behavior of signed integers is undefined. NOTE: This change is smaller compared to 3.2 as much of this cleanup had already been done. I added the comment that my change in 3.2 added so that the code would match up. Otherwise this just adds or synchronizes appropriate UL designations on some constants to be pedantic. In practice we require compiling everything with -fwrapv which forces overflow to be defined as twos compliment but this keeps the code cleaner for checkers or in the case where someone has compiled it without -fwrapv or their compiler's equivalent. Found by Clang trunk's Undefined Behavior Sanitizer (UBSan). Cleanup only - no functionality or hash values change. --- c2176e46d797c8aea60cdcd2ddf44f6bb88394b4 diff --cc Include/pyport.h index 41d9d42e639a,c74ff9c7a4b4..e4e3601d7bd0 --- a/Include/pyport.h +++ b/Include/pyport.h @@@ -145,10 -145,10 +145,10 @@@ Used in: PY_LONG_LON #endif /* Prime multiplier used in string and various other hashes. */ - #define _PyHASH_MULTIPLIER 1000003 /* 0xf4243 */ + #define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ /* Parameters used for the numeric hash implementation. See notes for - _PyHash_Double in Objects/object.c. Numeric hashes are based on + _Py_HashDouble in Objects/object.c. Numeric hashes are based on reduction modulo the prime 2**_PyHASH_BITS - 1. */ #if SIZEOF_VOID_P >= 8 diff --cc Objects/setobject.c index 723679a7e6ef,d8401f4ae746..c484dce41389 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@@ -775,11 -783,11 +775,11 @@@ frozenset_hash(PyObject *self hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747U) * 3644798167U; + hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL; } - hash = hash * 69069U + 907133923U; - hash = hash * 69069UL + 907133923UL; ++ hash = hash * 69069U + 907133923UL; if (hash == -1) - hash = 590923713U; + hash = 590923713UL; so->hash = hash; return hash; } diff --cc Objects/tupleobject.c index 9c843fa9814a,c725227979bc..ec3f91b2c65f --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@@ -327,8 -315,7 +327,8 @@@ error static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_uhash_t x; - register Py_uhash_t x, y; /* Unsigned for defined overflow behavior. */ ++ register Py_uhash_t x; /* Unsigned for defined overflow behavior. */ + register Py_hash_t y; register Py_ssize_t len = Py_SIZE(v); register PyObject **p; Py_uhash_t mult = _PyHASH_MULTIPLIER; @@@ -340,10 -327,10 +340,10 @@@ return -1; x = (x ^ y) * mult; /* the cast might truncate len; that doesn't change hash stability */ - mult += (Py_hash_t)(82520L + len + len); - mult += (Py_uhash_t)(82520UL + len + len); ++ mult += (Py_hash_t)(82520UL + len + len); } - x += 97531L; + x += 97531UL; - if (x == -1) + if (x == (Py_uhash_t)-1) x = -2; return x; } diff --cc Objects/unicodeobject.c index 60a74d620087,bb45b20b90cd..0b9d65291f70 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -11003,12 -7680,13 +11003,12 @@@ unicode_getitem(PyObject *self, Py_ssiz } /* Believe it or not, this produces the same value for ASCII strings - as string_hash(). */ + as bytes_hash(). */ static Py_hash_t -unicode_hash(PyUnicodeObject *self) +unicode_hash(PyObject *self) { Py_ssize_t len; - Py_uhash_t x; - Py_UNICODE *p; + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized);