]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Allocate all parts of shmem hash table from a single contiguous area
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 3 Apr 2026 23:40:25 +0000 (02:40 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Fri, 3 Apr 2026 23:40:25 +0000 (02:40 +0300)
Previously, the shared header (HASHHDR) and the directory were
allocated by the caller, and passed to hash_create(), while the actual
elements were allocated separately with ShmemAlloc(). After this
commit, all the memory needed by the header, the directory, and all
the elements is allocated using a single ShmemInitStruct() call, and
the different parts are carved out of that allocation. This way the
ShmemIndex entries (and thus pg_shmem_allocations) reflect the size of
the whole hash table, rather than just the directories.

Commit f5930f9a98 attempted this earlier, but it had to be reverted.
The new strategy is to let dynahash.c perform all the allocations with
the alloc function, but have the alloc function carve out the parts
from the one larger allocation. The shared header and the directory
are now also allocated with alloc calls, instead of passing the area
for those directly from the caller.

Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://www.postgresql.org/message-id/01ab1d41-3eda-4705-8bbd-af898f5007f1@iki.fi

src/backend/storage/ipc/shmem.c
src/backend/utils/hash/dynahash.c
src/include/utils/hsearch.h
src/tools/pgindent/typedefs.list

index 8e002f5c7a616eab5d78c05c201aa0d437bdfa3f..bf1b3f1e8f14acec8faa2aea42344a4a5975b479 100644 (file)
@@ -90,11 +90,14 @@ typedef struct ShmemAllocatorData
        slock_t         shmem_lock;
 
        HASHHDR    *index;                      /* location of ShmemIndex */
+       size_t          index_size;             /* size of shmem region holding ShmemIndex */
        LWLock          index_lock;             /* protects ShmemIndex */
 } ShmemAllocatorData;
 
 #define ShmemIndexLock (&ShmemAllocator->index_lock)
 
+static HTAB *shmem_hash_create(void *location, size_t size, bool found,
+                                                          const char *name, int64 nelems, HASHCTL *infoP, int hash_flags);
 static void *ShmemHashAlloc(Size size, void *alloc_arg);
 static void *ShmemAllocRaw(Size size, Size *allocated_size);
 
@@ -112,6 +115,16 @@ static bool firstNumaTouch = true;
 
 Datum          pg_numa_available(PG_FUNCTION_ARGS);
 
+/*
+ * A very simple allocator used to carve out different parts of a hash table
+ * from a previously allocated contiguous shared memory area.
+ */
+typedef struct shmem_hash_allocator
+{
+       char       *next;                       /* start of free space in the area */
+       char       *end;                        /* end of the shmem area */
+} shmem_hash_allocator;
+
 /*
  *     InitShmemAllocator() --- set up basic pointers to shared memory.
  *
@@ -126,7 +139,6 @@ InitShmemAllocator(PGShmemHeader *seghdr)
        Size            offset;
        HASHCTL         info;
        int                     hash_flags;
-       size_t          size = 0;
 
 #ifndef EXEC_BACKEND
        Assert(!IsUnderPostmaster);
@@ -179,19 +191,18 @@ InitShmemAllocator(PGShmemHeader *seghdr)
         */
        info.keysize = SHMEM_INDEX_KEYSIZE;
        info.entrysize = sizeof(ShmemIndexEnt);
-       info.dsize = info.max_dsize = hash_select_dirsize(SHMEM_INDEX_SIZE);
-       info.alloc = ShmemHashAlloc;
-       info.alloc_arg = NULL;
-       hash_flags = HASH_ELEM | HASH_STRINGS | HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE;
+       hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
+
        if (!IsUnderPostmaster)
        {
-               size = hash_get_shared_size(&info, hash_flags);
-               ShmemAllocator->index = (HASHHDR *) ShmemAlloc(size);
+               ShmemAllocator->index_size = hash_estimate_size(SHMEM_INDEX_SIZE, info.entrysize);
+               ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
        }
-       else
-               hash_flags |= HASH_ATTACH;
-       info.hctl = ShmemAllocator->index;
-       ShmemIndex = hash_create("ShmemIndex", SHMEM_INDEX_SIZE, &info, hash_flags);
+       ShmemIndex = shmem_hash_create(ShmemAllocator->index,
+                                                                  ShmemAllocator->index_size,
+                                                                  IsUnderPostmaster,
+                                                                  "ShmemIndex", SHMEM_INDEX_SIZE,
+                                                                  &info, hash_flags);
        Assert(ShmemIndex != NULL);
 
        /*
@@ -205,8 +216,8 @@ InitShmemAllocator(PGShmemHeader *seghdr)
                        hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
 
                Assert(!found);
-               result->size = size;
-               result->allocated_size = size;
+               result->size = ShmemAllocator->index_size;
+               result->allocated_size = ShmemAllocator->index_size;
                result->location = ShmemAllocator->index;
        }
 }
@@ -246,13 +257,27 @@ ShmemAllocNoError(Size size)
        return ShmemAllocRaw(size, &allocated_size);
 }
 
-/* Alloc callback for shared memory hash tables */
+/*
+ * ShmemHashAlloc -- alloc callback for shared memory hash tables
+ *
+ * Carve out the allocation from a pre-allocated region.  All shared memory
+ * hash tables are initialized with HASH_FIXED_SIZE, so all the allocations
+ * happen upfront during initialization and no locking is required.
+ */
 static void *
 ShmemHashAlloc(Size size, void *alloc_arg)
 {
-       Size            allocated_size;
+       shmem_hash_allocator *allocator = (shmem_hash_allocator *) alloc_arg;
+       void       *result;
 
-       return ShmemAllocRaw(size, &allocated_size);
+       size = MAXALIGN(size);
+
+       if (allocator->end - allocator->next < size)
+               return NULL;
+       result = allocator->next;
+       allocator->next += size;
+
+       return result;
 }
 
 /*
@@ -343,13 +368,34 @@ ShmemInitHash(const char *name,           /* table string name for shmem index */
                          int hash_flags)       /* info about infoP */
 {
        bool            found;
+       size_t          size;
        void       *location;
 
+       size = hash_estimate_size(nelems, infoP->entrysize);
+
+       /* look it up in the shmem index or allocate */
+       location = ShmemInitStruct(name, size, &found);
+
+       return shmem_hash_create(location, size, found,
+                                                        name, nelems, infoP, hash_flags);
+}
+
+/*
+ * Initialize or attach to a shared hash table in the given shmem region.
+ *
+ * This is extracted from ShmemInitHash() to allow InitShmemAllocator() to
+ * share the logic for bootstrapping the ShmemIndex hash table.
+ */
+static HTAB *
+shmem_hash_create(void *location, size_t size, bool found,
+                                 const char *name, int64 nelems, HASHCTL *infoP, int hash_flags)
+{
+       shmem_hash_allocator allocator;
+
        /*
-        * Hash tables allocated in shared memory have a fixed directory; it can't
-        * grow or other backends wouldn't be able to find it. So, make sure we
-        * make it big enough to start with.  We also allocate all the buckets
-        * upfront.
+        * Hash tables allocated in shared memory have a fixed directory and have
+        * all elements allocated upfront.  We don't support growing because we'd
+        * need to grow the underlying shmem region with it.
         *
         * The shared memory allocator must be specified too.
         */
@@ -358,20 +404,22 @@ ShmemInitHash(const char *name,           /* table string name for shmem index */
        infoP->alloc_arg = NULL;
        hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE;
 
-       /* look it up in the shmem index */
-       location = ShmemInitStruct(name,
-                                                          hash_get_shared_size(infoP, hash_flags),
-                                                          &found);
-
        /*
         * if it already exists, attach to it rather than allocate and initialize
         * new space
         */
-       if (found)
+       if (!found)
+       {
+               allocator.next = (char *) location;
+               allocator.end = (char *) location + size;
+               infoP->alloc_arg = &allocator;
+       }
+       else
+       {
+               /* Pass location of hashtable header to hash_create */
+               infoP->hctl = (HASHHDR *) location;
                hash_flags |= HASH_ATTACH;
-
-       /* Pass location of hashtable header to hash_create */
-       infoP->hctl = (HASHHDR *) location;
+       }
 
        return hash_create(name, nelems, infoP, hash_flags);
 }
index f8317add68fa83a88256d011ff025ac73a6cab97..447b638b7c9249573e0c91c7b01e05521e0d686a 100644 (file)
@@ -195,6 +195,9 @@ struct HASHHDR
        int                     nelem_alloc;    /* number of entries to allocate at once */
        bool            isfixed;                /* if true, don't enlarge */
 
+       /* Current directory.  In shared tables, this doesn't change */
+       HASHSEGMENT *dir;
+
 #ifdef HASH_STATISTICS
 
        /*
@@ -374,6 +377,8 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
         * hash_destroy very simple.  The memory context is made a child of either
         * a context specified by the caller, or TopMemoryContext if nothing is
         * specified.
+        *
+        * Note that HASH_DIRSIZE and HASH_ALLOC had better be set as well.
         */
        if (flags & HASH_SHARED_MEM)
        {
@@ -485,22 +490,19 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
 
        if (flags & HASH_SHARED_MEM)
        {
-               /*
-                * ctl structure and directory are preallocated for shared memory
-                * tables.  Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
-                * well.
-                */
-               hashp->hctl = info->hctl;
-               hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
                hashp->hcxt = NULL;
                hashp->isshared = true;
 
                /* hash table already exists, we're just attaching to it */
                if (flags & HASH_ATTACH)
                {
+                       /* Caller must pass the pointer to the shared header */
+                       Assert(info->hctl);
+                       hashp->hctl = info->hctl;
+
                        /* make local copies of some heavily-used values */
-                       hctl = hashp->hctl;
-                       hashp->keysize = hctl->keysize;
+                       hashp->dir = info->hctl->dir;
+                       hashp->keysize = info->hctl->keysize;
 
                        return hashp;
                }
@@ -514,14 +516,20 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
                hashp->isshared = false;
        }
 
+       /*
+        * Allocate the header structure.
+        *
+        * XXX: In case of a shared memory hash table, other processes need the
+        * pointer to the header to re-find the hash table.  There is currently no
+        * explicit way to pass it back from here, the caller relies on the fact
+        * that this is the first allocation made with the alloc function.  That's
+        * a little ugly, but works for now.
+        */
+       hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg);
        if (!hashp->hctl)
-       {
-               hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg);
-               if (!hashp->hctl)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OUT_OF_MEMORY),
-                                        errmsg("out of memory")));
-       }
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
 
        hashp->frozen = false;
 
@@ -724,25 +732,17 @@ init_htab(HTAB *hashp, int64 nelem)
        nsegs = next_pow2_int(nsegs);
 
        /*
-        * Make sure directory is big enough. If pre-allocated directory is too
-        * small, choke (caller screwed up).
+        * Make sure directory is big enough.
         */
        if (nsegs > hctl->dsize)
-       {
-               if (!(hashp->dir))
-                       hctl->dsize = nsegs;
-               else
-                       return false;
-       }
+               hctl->dsize = nsegs;
 
        /* Allocate a directory */
-       if (!(hashp->dir))
-       {
-               hashp->dir = (HASHSEGMENT *)
-                       hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg);
-               if (!hashp->dir)
-                       return false;
-       }
+       hctl->dir = (HASHSEGMENT *)
+               hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg);
+       if (!hctl->dir)
+               return false;
+       hashp->dir = hctl->dir;
 
        /* Allocate initial segments */
        for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
@@ -831,19 +831,6 @@ hash_select_dirsize(int64 num_entries)
        return nDirEntries;
 }
 
-/*
- * Compute the required initial memory allocation for a shared-memory
- * hashtable with the given parameters.  We need space for the HASHHDR
- * and for the (non expansible) directory.
- */
-Size
-hash_get_shared_size(HASHCTL *info, int flags)
-{
-       Assert(flags & HASH_DIRSIZE);
-       Assert(info->dsize == info->max_dsize);
-       return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
-}
-
 
 /********************** DESTROY ROUTINES ************************/
 
@@ -1647,6 +1634,7 @@ dir_realloc(HTAB *hashp)
        {
                memcpy(p, old_p, old_dirsize);
                MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
+               hashp->hctl->dir = p;
                hashp->dir = p;
                hashp->hctl->dsize = new_dsize;
 
index 337b2e446259b5aa89a63573e1054647dc96996f..6a1931b0d21fcf5139c616f0c82a9d0d06358ac8 100644 (file)
@@ -82,7 +82,7 @@ typedef struct HASHCTL
        void       *alloc_arg;          /* opaque argument passed to allocator */
        /* Used if HASH_CONTEXT flag is set: */
        MemoryContext hcxt;                     /* memory context to use for allocations */
-       /* Used if HASH_SHARED_MEM flag is set: */
+       /* Used if HASH_ATTACH flag is set: */
        HASHHDR    *hctl;                       /* location of header in shared mem */
 } HASHCTL;
 
@@ -149,7 +149,6 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status);
 extern void hash_freeze(HTAB *hashp);
 extern Size hash_estimate_size(int64 num_entries, Size entrysize);
 extern int64 hash_select_dirsize(int64 num_entries);
-extern Size hash_get_shared_size(HASHCTL *info, int flags);
 extern void AtEOXact_HashTables(bool isCommit);
 extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
 
index ad999aa48dd0488b5701d89d221f793344bb6082..c72f6c595730a931991f46cddb306ac4da9b8bdc 100644 (file)
@@ -4225,6 +4225,7 @@ shm_mq_result
 shm_toc
 shm_toc_entry
 shm_toc_estimator
+shmem_hash_allocator
 shmem_request_hook_type
 shmem_startup_hook_type
 sig_atomic_t