From: Heikki Linnakangas Date: Fri, 3 Apr 2026 23:40:25 +0000 (+0300) Subject: Allocate all parts of shmem hash table from a single contiguous area X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9fe9ecd516b09434df59d6e7396d508f9c4477c8;p=thirdparty%2Fpostgresql.git Allocate all parts of shmem hash table from a single contiguous area Previously, the shared header (HASHHDR) and the directory were allocated by the caller, and passed to hash_create(), while the actual elements were allocated separately with ShmemAlloc(). After this commit, all the memory needed by the header, the directory, and all the elements is allocated using a single ShmemInitStruct() call, and the different parts are carved out of that allocation. This way the ShmemIndex entries (and thus pg_shmem_allocations) reflect the size of the whole hash table, rather than just the directories. Commit f5930f9a98 attempted this earlier, but it had to be reverted. The new strategy is to let dynahash.c perform all the allocations with the alloc function, but have the alloc function carve out the parts from the one larger allocation. The shared header and the directory are now also allocated with alloc calls, instead of passing the area for those directly from the caller. Reviewed-by: Tomas Vondra Discussion: https://www.postgresql.org/message-id/01ab1d41-3eda-4705-8bbd-af898f5007f1@iki.fi --- diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index 8e002f5c7a6..bf1b3f1e8f1 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -90,11 +90,14 @@ typedef struct ShmemAllocatorData slock_t shmem_lock; HASHHDR *index; /* location of ShmemIndex */ + size_t index_size; /* size of shmem region holding ShmemIndex */ LWLock index_lock; /* protects ShmemIndex */ } ShmemAllocatorData; #define ShmemIndexLock (&ShmemAllocator->index_lock) +static HTAB *shmem_hash_create(void *location, size_t size, bool found, + const char *name, int64 nelems, HASHCTL *infoP, int hash_flags); static void *ShmemHashAlloc(Size size, void *alloc_arg); static void *ShmemAllocRaw(Size size, Size *allocated_size); @@ -112,6 +115,16 @@ static bool firstNumaTouch = true; Datum pg_numa_available(PG_FUNCTION_ARGS); +/* + * A very simple allocator used to carve out different parts of a hash table + * from a previously allocated contiguous shared memory area. + */ +typedef struct shmem_hash_allocator +{ + char *next; /* start of free space in the area */ + char *end; /* end of the shmem area */ +} shmem_hash_allocator; + /* * InitShmemAllocator() --- set up basic pointers to shared memory. * @@ -126,7 +139,6 @@ InitShmemAllocator(PGShmemHeader *seghdr) Size offset; HASHCTL info; int hash_flags; - size_t size = 0; #ifndef EXEC_BACKEND Assert(!IsUnderPostmaster); @@ -179,19 +191,18 @@ InitShmemAllocator(PGShmemHeader *seghdr) */ info.keysize = SHMEM_INDEX_KEYSIZE; info.entrysize = sizeof(ShmemIndexEnt); - info.dsize = info.max_dsize = hash_select_dirsize(SHMEM_INDEX_SIZE); - info.alloc = ShmemHashAlloc; - info.alloc_arg = NULL; - hash_flags = HASH_ELEM | HASH_STRINGS | HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE; + hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE; + if (!IsUnderPostmaster) { - size = hash_get_shared_size(&info, hash_flags); - ShmemAllocator->index = (HASHHDR *) ShmemAlloc(size); + ShmemAllocator->index_size = hash_estimate_size(SHMEM_INDEX_SIZE, info.entrysize); + ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size); } - else - hash_flags |= HASH_ATTACH; - info.hctl = ShmemAllocator->index; - ShmemIndex = hash_create("ShmemIndex", SHMEM_INDEX_SIZE, &info, hash_flags); + ShmemIndex = shmem_hash_create(ShmemAllocator->index, + ShmemAllocator->index_size, + IsUnderPostmaster, + "ShmemIndex", SHMEM_INDEX_SIZE, + &info, hash_flags); Assert(ShmemIndex != NULL); /* @@ -205,8 +216,8 @@ InitShmemAllocator(PGShmemHeader *seghdr) hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found); Assert(!found); - result->size = size; - result->allocated_size = size; + result->size = ShmemAllocator->index_size; + result->allocated_size = ShmemAllocator->index_size; result->location = ShmemAllocator->index; } } @@ -246,13 +257,27 @@ ShmemAllocNoError(Size size) return ShmemAllocRaw(size, &allocated_size); } -/* Alloc callback for shared memory hash tables */ +/* + * ShmemHashAlloc -- alloc callback for shared memory hash tables + * + * Carve out the allocation from a pre-allocated region. All shared memory + * hash tables are initialized with HASH_FIXED_SIZE, so all the allocations + * happen upfront during initialization and no locking is required. + */ static void * ShmemHashAlloc(Size size, void *alloc_arg) { - Size allocated_size; + shmem_hash_allocator *allocator = (shmem_hash_allocator *) alloc_arg; + void *result; - return ShmemAllocRaw(size, &allocated_size); + size = MAXALIGN(size); + + if (allocator->end - allocator->next < size) + return NULL; + result = allocator->next; + allocator->next += size; + + return result; } /* @@ -343,13 +368,34 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ int hash_flags) /* info about infoP */ { bool found; + size_t size; void *location; + size = hash_estimate_size(nelems, infoP->entrysize); + + /* look it up in the shmem index or allocate */ + location = ShmemInitStruct(name, size, &found); + + return shmem_hash_create(location, size, found, + name, nelems, infoP, hash_flags); +} + +/* + * Initialize or attach to a shared hash table in the given shmem region. + * + * This is extracted from ShmemInitHash() to allow InitShmemAllocator() to + * share the logic for bootstrapping the ShmemIndex hash table. + */ +static HTAB * +shmem_hash_create(void *location, size_t size, bool found, + const char *name, int64 nelems, HASHCTL *infoP, int hash_flags) +{ + shmem_hash_allocator allocator; + /* - * Hash tables allocated in shared memory have a fixed directory; it can't - * grow or other backends wouldn't be able to find it. So, make sure we - * make it big enough to start with. We also allocate all the buckets - * upfront. + * Hash tables allocated in shared memory have a fixed directory and have + * all elements allocated upfront. We don't support growing because we'd + * need to grow the underlying shmem region with it. * * The shared memory allocator must be specified too. */ @@ -358,20 +404,22 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ infoP->alloc_arg = NULL; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE; - /* look it up in the shmem index */ - location = ShmemInitStruct(name, - hash_get_shared_size(infoP, hash_flags), - &found); - /* * if it already exists, attach to it rather than allocate and initialize * new space */ - if (found) + if (!found) + { + allocator.next = (char *) location; + allocator.end = (char *) location + size; + infoP->alloc_arg = &allocator; + } + else + { + /* Pass location of hashtable header to hash_create */ + infoP->hctl = (HASHHDR *) location; hash_flags |= HASH_ATTACH; - - /* Pass location of hashtable header to hash_create */ - infoP->hctl = (HASHHDR *) location; + } return hash_create(name, nelems, infoP, hash_flags); } diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index f8317add68f..447b638b7c9 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -195,6 +195,9 @@ struct HASHHDR int nelem_alloc; /* number of entries to allocate at once */ bool isfixed; /* if true, don't enlarge */ + /* Current directory. In shared tables, this doesn't change */ + HASHSEGMENT *dir; + #ifdef HASH_STATISTICS /* @@ -374,6 +377,8 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) * hash_destroy very simple. The memory context is made a child of either * a context specified by the caller, or TopMemoryContext if nothing is * specified. + * + * Note that HASH_DIRSIZE and HASH_ALLOC had better be set as well. */ if (flags & HASH_SHARED_MEM) { @@ -485,22 +490,19 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) if (flags & HASH_SHARED_MEM) { - /* - * ctl structure and directory are preallocated for shared memory - * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as - * well. - */ - hashp->hctl = info->hctl; - hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR)); hashp->hcxt = NULL; hashp->isshared = true; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) { + /* Caller must pass the pointer to the shared header */ + Assert(info->hctl); + hashp->hctl = info->hctl; + /* make local copies of some heavily-used values */ - hctl = hashp->hctl; - hashp->keysize = hctl->keysize; + hashp->dir = info->hctl->dir; + hashp->keysize = info->hctl->keysize; return hashp; } @@ -514,14 +516,20 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) hashp->isshared = false; } + /* + * Allocate the header structure. + * + * XXX: In case of a shared memory hash table, other processes need the + * pointer to the header to re-find the hash table. There is currently no + * explicit way to pass it back from here, the caller relies on the fact + * that this is the first allocation made with the alloc function. That's + * a little ugly, but works for now. + */ + hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg); if (!hashp->hctl) - { - hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg); - if (!hashp->hctl) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); hashp->frozen = false; @@ -724,25 +732,17 @@ init_htab(HTAB *hashp, int64 nelem) nsegs = next_pow2_int(nsegs); /* - * Make sure directory is big enough. If pre-allocated directory is too - * small, choke (caller screwed up). + * Make sure directory is big enough. */ if (nsegs > hctl->dsize) - { - if (!(hashp->dir)) - hctl->dsize = nsegs; - else - return false; - } + hctl->dsize = nsegs; /* Allocate a directory */ - if (!(hashp->dir)) - { - hashp->dir = (HASHSEGMENT *) - hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg); - if (!hashp->dir) - return false; - } + hctl->dir = (HASHSEGMENT *) + hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg); + if (!hctl->dir) + return false; + hashp->dir = hctl->dir; /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) @@ -831,19 +831,6 @@ hash_select_dirsize(int64 num_entries) return nDirEntries; } -/* - * Compute the required initial memory allocation for a shared-memory - * hashtable with the given parameters. We need space for the HASHHDR - * and for the (non expansible) directory. - */ -Size -hash_get_shared_size(HASHCTL *info, int flags) -{ - Assert(flags & HASH_DIRSIZE); - Assert(info->dsize == info->max_dsize); - return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); -} - /********************** DESTROY ROUTINES ************************/ @@ -1647,6 +1634,7 @@ dir_realloc(HTAB *hashp) { memcpy(p, old_p, old_dirsize); MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize); + hashp->hctl->dir = p; hashp->dir = p; hashp->hctl->dsize = new_dsize; diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 337b2e44625..6a1931b0d21 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -82,7 +82,7 @@ typedef struct HASHCTL void *alloc_arg; /* opaque argument passed to allocator */ /* Used if HASH_CONTEXT flag is set: */ MemoryContext hcxt; /* memory context to use for allocations */ - /* Used if HASH_SHARED_MEM flag is set: */ + /* Used if HASH_ATTACH flag is set: */ HASHHDR *hctl; /* location of header in shared mem */ } HASHCTL; @@ -149,7 +149,6 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(int64 num_entries, Size entrysize); extern int64 hash_select_dirsize(int64 num_entries); -extern Size hash_get_shared_size(HASHCTL *info, int flags); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ad999aa48dd..c72f6c59573 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4225,6 +4225,7 @@ shm_mq_result shm_toc shm_toc_entry shm_toc_estimator +shmem_hash_allocator shmem_request_hook_type shmem_startup_hook_type sig_atomic_t