]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051)
authorVictor Stinner <vstinner@python.org>
Tue, 12 May 2020 11:31:59 +0000 (13:31 +0200)
committerGitHub <noreply@github.com>
Tue, 12 May 2020 11:31:59 +0000 (13:31 +0200)
Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for
pointer keys:

* key_size == sizeof(void*)
* hash_func == _Py_hashtable_hash_ptr
* compare_func == _Py_hashtable_compare_direct

Changes:

* Add get_func and get_entry_func members to _Py_hashtable_t
* Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions
  to static nline functions.
* Add specialized get and get entry for pointer keys.

Include/internal/pycore_hashtable.h
Python/hashtable.c

index 585f76b51d7112de872e6bf085f2c8ed2feebacc..6e094e94376ad50b1762b7aeb36c0d94e413b7e6 100644 (file)
@@ -76,12 +76,17 @@ typedef struct {
 
 /* Forward declaration */
 struct _Py_hashtable_t;
+typedef struct _Py_hashtable_t _Py_hashtable_t;
 
-typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
+typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
                                                const void *pkey);
-typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
+typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
                                            const void *pkey,
                                            const _Py_hashtable_entry_t *he);
+typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
+                                                               const void *pkey);
+typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
+                                       const void *pkey, void *data);
 
 typedef struct {
     /* allocate a memory block */
@@ -93,18 +98,19 @@ typedef struct {
 
 
 /* _Py_hashtable: table */
-
-typedef struct _Py_hashtable_t {
+struct _Py_hashtable_t {
     size_t num_buckets;
     size_t entries; /* Total number of entries in the table. */
     _Py_slist_t *buckets;
     size_t key_size;
     size_t data_size;
 
+    _Py_hashtable_get_func get_func;
+    _Py_hashtable_get_entry_func get_entry_func;
     _Py_hashtable_hash_func hash_func;
     _Py_hashtable_compare_func compare_func;
     _Py_hashtable_allocator_t alloc;
-} _Py_hashtable_t;
+};
 
 /* hash a pointer (void*) */
 PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
@@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
 
    Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
    macro */
-PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
-    _Py_hashtable_t *ht,
-    size_t key_size,
-    const void *pkey);
+static inline _Py_hashtable_entry_t *
+_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
+{
+    assert(key_size == ht->key_size);
+    return ht->get_entry_func(ht, pkey);
+}
 
 #define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
     _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
@@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
    exists, return 0 if the entry does not exist.
 
    Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
-PyAPI_FUNC(int) _Py_hashtable_get(
-    _Py_hashtable_t *ht,
-    size_t key_size,
-    const void *pkey,
-    size_t data_size,
-    void *data);
+static inline int
+_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                  size_t data_size, void *data)
+{
+    assert(key_size == ht->key_size);
+    assert(data_size == ht->data_size);
+    return ht->get_func(ht, pkey, data);
+}
 
 #define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
     _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
index e9f02d8650e4f8f1f374d1a8aaff1b46ea11cdae..1548c2e4618c3ec65f11e0b15fe96b41a0fee278 100644 (file)
@@ -108,7 +108,6 @@ Py_uhash_t
 _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
 {
     void *key;
-
     _Py_HASHTABLE_READ_KEY(ht, pkey, key);
     return (Py_uhash_t)_Py_HashPointer(key);
 }
@@ -137,61 +136,6 @@ round_size(size_t s)
 }
 
 
-_Py_hashtable_t *
-_Py_hashtable_new_full(size_t key_size, size_t data_size,
-                       size_t init_size,
-                       _Py_hashtable_hash_func hash_func,
-                       _Py_hashtable_compare_func compare_func,
-                       _Py_hashtable_allocator_t *allocator)
-{
-    _Py_hashtable_t *ht;
-    size_t buckets_size;
-    _Py_hashtable_allocator_t alloc;
-
-    if (allocator == NULL) {
-        alloc.malloc = PyMem_Malloc;
-        alloc.free = PyMem_Free;
-    }
-    else {
-        alloc = *allocator;
-    }
-
-    ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
-    if (ht == NULL)
-        return ht;
-
-    ht->num_buckets = round_size(init_size);
-    ht->entries = 0;
-    ht->key_size = key_size;
-    ht->data_size = data_size;
-
-    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
-    ht->buckets = alloc.malloc(buckets_size);
-    if (ht->buckets == NULL) {
-        alloc.free(ht);
-        return NULL;
-    }
-    memset(ht->buckets, 0, buckets_size);
-
-    ht->hash_func = hash_func;
-    ht->compare_func = compare_func;
-    ht->alloc = alloc;
-    return ht;
-}
-
-
-_Py_hashtable_t *
-_Py_hashtable_new(size_t key_size, size_t data_size,
-                  _Py_hashtable_hash_func hash_func,
-                  _Py_hashtable_compare_func compare_func)
-{
-    return _Py_hashtable_new_full(key_size, data_size,
-                                  HASHTABLE_MIN_SIZE,
-                                  hash_func, compare_func,
-                                  NULL);
-}
-
-
 size_t
 _Py_hashtable_size(_Py_hashtable_t *ht)
 {
@@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
 
 
 _Py_hashtable_entry_t *
-_Py_hashtable_get_entry(_Py_hashtable_t *ht,
-                        size_t key_size, const void *pkey)
+_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
 {
-    Py_uhash_t key_hash;
-    size_t index;
-    _Py_hashtable_entry_t *entry;
-
-    assert(key_size == ht->key_size);
-
-    key_hash = ht->hash_func(ht, pkey);
-    index = key_hash & (ht->num_buckets - 1);
-
-    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
-        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
+    Py_uhash_t key_hash = ht->hash_func(ht, pkey);
+    size_t index = key_hash & (ht->num_buckets - 1);
+    _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
+    while (1) {
+        if (entry == NULL) {
+            return NULL;
+        }
+        if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
             break;
+        }
+        entry = ENTRY_NEXT(entry);
     }
-
     return entry;
 }
 
@@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
     /* Don't write the assertion on a single line because it is interesting
        to know the duplicated entry if the assertion failed. The entry can
        be read using a debugger. */
-    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
+    entry = ht->get_entry_func(ht, pkey);
     assert(entry == NULL);
 #endif
 
@@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
 
 
 int
-_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
-                  size_t data_size, void *data)
+_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
 {
-    _Py_hashtable_entry_t *entry;
-
     assert(data != NULL);
+    _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
+    if (entry != NULL) {
+        ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
 
-    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
-    if (entry == NULL)
+
+// Specialized for:
+// key_size == sizeof(void*)
+// hash_func == _Py_hashtable_hash_ptr
+// compare_func == _Py_hashtable_compare_direct
+_Py_hashtable_entry_t *
+_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
+{
+    Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
+    size_t index = key_hash & (ht->num_buckets - 1);
+    _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
+    while (1) {
+        if (entry == NULL) {
+            return NULL;
+        }
+        if (entry->key_hash == key_hash) {
+            const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
+            if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
+                break;
+            }
+        }
+        entry = ENTRY_NEXT(entry);
+    }
+    return entry;
+}
+
+
+// Specialized for:
+// key_size == sizeof(void*)
+// hash_func == _Py_hashtable_hash_ptr
+// compare_func == _Py_hashtable_compare_direct
+int
+_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
+{
+    assert(data != NULL);
+    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
+    if (entry != NULL) {
+        ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
+        return 1;
+    }
+    else {
         return 0;
-    ENTRY_READ_PDATA(ht, entry, data_size, data);
-    return 1;
+    }
 }
 
 
@@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
 }
 
 
+_Py_hashtable_t *
+_Py_hashtable_new_full(size_t key_size, size_t data_size,
+                       size_t init_size,
+                       _Py_hashtable_hash_func hash_func,
+                       _Py_hashtable_compare_func compare_func,
+                       _Py_hashtable_allocator_t *allocator)
+{
+    _Py_hashtable_t *ht;
+    size_t buckets_size;
+    _Py_hashtable_allocator_t alloc;
+
+    if (allocator == NULL) {
+        alloc.malloc = PyMem_Malloc;
+        alloc.free = PyMem_Free;
+    }
+    else {
+        alloc = *allocator;
+    }
+
+    ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
+    if (ht == NULL)
+        return ht;
+
+    ht->num_buckets = round_size(init_size);
+    ht->entries = 0;
+    ht->key_size = key_size;
+    ht->data_size = data_size;
+
+    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+    ht->buckets = alloc.malloc(buckets_size);
+    if (ht->buckets == NULL) {
+        alloc.free(ht);
+        return NULL;
+    }
+    memset(ht->buckets, 0, buckets_size);
+
+    ht->get_func = _Py_hashtable_get_generic;
+    ht->get_entry_func = _Py_hashtable_get_entry_generic;
+    ht->hash_func = hash_func;
+    ht->compare_func = compare_func;
+    ht->alloc = alloc;
+    if (ht->key_size == sizeof(void*)
+        && ht->hash_func == _Py_hashtable_hash_ptr
+        && ht->compare_func == _Py_hashtable_compare_direct)
+    {
+        ht->get_func = _Py_hashtable_get_ptr;
+        ht->get_entry_func = _Py_hashtable_get_entry_ptr;
+    }
+    return ht;
+}
+
+
+_Py_hashtable_t *
+_Py_hashtable_new(size_t key_size, size_t data_size,
+                  _Py_hashtable_hash_func hash_func,
+                  _Py_hashtable_compare_func compare_func)
+{
+    return _Py_hashtable_new_full(key_size, data_size,
+                                  HASHTABLE_MIN_SIZE,
+                                  hash_func, compare_func,
+                                  NULL);
+}
+
+
 void
 _Py_hashtable_clear(_Py_hashtable_t *ht)
 {