From: Wilco Dijkstra Date: Wed, 17 Dec 2025 16:16:23 +0000 (+0000) Subject: malloc: Remove unused tcache code from unsorted bin scan X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ea4c36c36bc1fcdc4683127edd2312cbdf540a45;p=thirdparty%2Fglibc.git malloc: Remove unused tcache code from unsorted bin scan Now that fastbins have been removed, there is no need to add chunks to tcache during an unsorted scan. Small blocks can only be added to unsorted as a result of a remainder chunk split off a larger block, so there is no point in checking for additional chunks to place in tcache. The last remainder is checked first, and will be used if it is large enough or an exact fit. The unsorted bin scan becomes simpler as a result. Remove the tcache_unsorted_limit tunable and manual entries. Reviewed-by: Adhemerval Zanella  --- diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list index 5bc5f03792..040a544c0e 100644 --- a/elf/dl-tunables.list +++ b/elf/dl-tunables.list @@ -71,9 +71,6 @@ glibc { tcache_count { type: SIZE_T } - tcache_unsorted_limit { - type: SIZE_T - } mxfast { type: SIZE_T minval: 0 diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp index 8df6f5906e..9590021f3a 100644 --- a/elf/tst-rtld-list-tunables.exp +++ b/elf/tst-rtld-list-tunables.exp @@ -8,7 +8,6 @@ glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.perturb: 0 (min: 0, max: 255) glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+) -glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.top_pad: 0x20000 (min: 0x0, max: 0x[f]+) glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+) glibc.rtld.dynamic_sort: 2 (min: 1, max: 2) diff --git a/malloc/arena.c b/malloc/arena.c index 5bfcd7f972..cabeb0d8ce 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -238,7 +238,6 @@ TUNABLE_CALLBACK_FNDECL (set_arena_test, size_t) #if USE_TCACHE TUNABLE_CALLBACK_FNDECL (set_tcache_max, size_t) TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t) -TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t) #endif TUNABLE_CALLBACK_FNDECL (set_hugetlb, size_t) @@ -290,8 +289,6 @@ __ptmalloc_init (void) # if USE_TCACHE TUNABLE_GET (tcache_max, size_t, TUNABLE_CALLBACK (set_tcache_max)); TUNABLE_GET (tcache_count, size_t, TUNABLE_CALLBACK (set_tcache_count)); - TUNABLE_GET (tcache_unsorted_limit, size_t, - TUNABLE_CALLBACK (set_tcache_unsorted_limit)); # endif TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb)); diff --git a/malloc/malloc.c b/malloc/malloc.c index a49e211925..0ff016e549 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1797,9 +1797,6 @@ struct malloc_par size_t tcache_max_bytes; /* Maximum number of chunks in each bucket. */ size_t tcache_count; - /* Maximum number of chunks to remove from the unsorted list, which - aren't used to prefill the cache. */ - size_t tcache_unsorted_limit; #endif }; @@ -1832,7 +1829,6 @@ static struct malloc_par mp_ = .tcache_count = TCACHE_FILL_COUNT, .tcache_small_bins = TCACHE_SMALL_BINS, .tcache_max_bytes = MAX_TCACHE_SMALL_SIZE + 1, - .tcache_unsorted_limit = 0 /* No limit. */ #endif }; @@ -3824,10 +3820,6 @@ _int_malloc (mstate av, size_t bytes) mchunkptr fwd; /* misc temp for linking */ mchunkptr bck; /* misc temp for linking */ -#if USE_TCACHE - size_t tcache_unsorted_count; /* count of unsorted chunks processed */ -#endif - /* Convert request size to internal form by adding SIZE_SZ bytes overhead plus possibly more to obtain necessary alignment and/or @@ -3925,24 +3917,8 @@ _int_malloc (mstate av, size_t bytes) the most recent non-exact fit. Place other traversed chunks in bins. Note that this step is the only place in any routine where chunks are placed in bins. - - The outer loop here is needed because we might not realize until - near the end of malloc that we should have consolidated, so must - do so and retry. This happens at most once, and only when we would - otherwise need to expand memory to service a "small" request. */ -#if USE_TCACHE - INTERNAL_SIZE_T tcache_nb = 0; - size_t tc_idx = csize2tidx (nb); - if (tc_idx < mp_.tcache_small_bins) - tcache_nb = nb; - int return_cached = 0; - - tcache_unsorted_count = 0; -#endif - - for (;; ) { int iters = 0; while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av)) @@ -4012,28 +3988,10 @@ _int_malloc (mstate av, size_t bytes) set_inuse_bit_at_offset (victim, size); if (av != &main_arena) set_non_main_arena (victim); -#if USE_TCACHE - if (__glibc_unlikely (tcache_inactive ())) - tcache_init (av); - /* Fill cache first, return to user only if cache fills. - We may return one of these chunks later. */ - if (tcache_nb > 0 - && tcache->num_slots[tc_idx] != 0) - { - tcache_put (victim, tc_idx); - return_cached = 1; - continue; - } - else - { -#endif check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; -#if USE_TCACHE - } -#endif } /* Place chunk in bin. Only splitting can put @@ -4107,31 +4065,11 @@ _int_malloc (mstate av, size_t bytes) fwd->bk = victim; bck->fd = victim; -#if USE_TCACHE - /* If we've processed as many chunks as we're allowed while - filling the cache, return one of the cached ones. */ - ++tcache_unsorted_count; - if (return_cached - && mp_.tcache_unsorted_limit > 0 - && tcache_unsorted_count > mp_.tcache_unsorted_limit) - { - return tcache_get (tc_idx); - } -#endif - #define MAX_ITERS 10000 if (++iters >= MAX_ITERS) break; } -#if USE_TCACHE - /* If all the small chunks we found ended up cached, return one now. */ - if (return_cached) - { - return tcache_get (tc_idx); - } -#endif - /* If a large request, scan through the chunks of current bin in sorted order to find smallest that fits. Use the skip list for this. @@ -5114,13 +5052,6 @@ do_set_tcache_count (size_t value) return 0; } -static __always_inline int -do_set_tcache_unsorted_limit (size_t value) -{ - LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit); - mp_.tcache_unsorted_limit = value; - return 1; -} #endif static __always_inline int diff --git a/manual/probes.texi b/manual/probes.texi index ee019e6517..23340e8e07 100644 --- a/manual/probes.texi +++ b/manual/probes.texi @@ -235,13 +235,6 @@ tunable is set. Argument @var{$arg1} is the requested value, and @var{$arg2} is the previous value of this tunable. @end deftp -@deftp Probe memory_tunable_tcache_unsorted_limit (int @var{$arg1}, int @var{$arg2}) -This probe is triggered when the -@code{glibc.malloc.tcache_unsorted_limit} tunable is set. Argument -@var{$arg1} is the requested value, and @var{$arg2} is the previous -value of this tunable. -@end deftp - @deftp Probe memory_tcache_double_free (void *@var{$arg1}, int @var{$arg2}) This probe is triggered when @code{free} determines that the memory being freed has probably already been freed, and resides in the diff --git a/manual/tunables.texi b/manual/tunables.texi index 7956df919b..cacc0ea652 100644 --- a/manual/tunables.texi +++ b/manual/tunables.texi @@ -53,7 +53,6 @@ glibc.cpu.x86_shstk: glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff) glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) glibc.cpu.plt_rewrite: 0 (min: 0, max: 2) -glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff) glibc.cpu.x86_ibt: glibc.cpu.hwcaps: glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0xffffffffffffffff) @@ -243,21 +242,6 @@ per-thread cache is approximately 236 KB on 64-bit systems and 118 KB on 32-bit systems. @end deftp -@deftp Tunable glibc.malloc.tcache_unsorted_limit -When the user requests memory and the request cannot be met via the -per-thread cache, the arenas are used to meet the request. At this -time, additional chunks will be moved from existing arena lists to -pre-fill the corresponding cache. While copies from the fastbins, -smallbins, and regular bins are bounded and predictable due to the bin -sizes, copies from the unsorted bin are not bounded, and incur -additional time penalties as they need to be sorted as they're -scanned. To make scanning the unsorted list more predictable and -bounded, the user may set this tunable to limit the number of chunks -that are scanned from the unsorted list while searching for chunks to -pre-fill the per-thread cache with. The default, or when set to zero, -is no limit. -@end deftp - @deftp Tunable glibc.malloc.mxfast One of the optimizations @code{malloc} uses is to maintain a series of ``fast bins'' that hold chunks up to a specific size. The default and