From: Julian Seward Date: Sat, 10 Sep 2011 10:17:35 +0000 (+0000) Subject: Avoid excessive fragmentation in m_mallocfree by munmapping unused X-Git-Tag: svn/VALGRIND_3_7_0~215 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c0bb2b82c6434092486c783ba4bc61fb29501f0d;p=thirdparty%2Fvalgrind.git Avoid excessive fragmentation in m_mallocfree by munmapping unused superblocks in some circumstances (second attempt). Bug 250101 comment 15. (Philippe Waroquiers, philippe.waroquiers@skynet.be). git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12022 --- diff --git a/coregrind/m_mallocfree.c b/coregrind/m_mallocfree.c index 765da4ae77..4d82882720 100644 --- a/coregrind/m_mallocfree.c +++ b/coregrind/m_mallocfree.c @@ -151,6 +151,19 @@ typedef // 8-bytes on 32-bit machines with an 8-byte VG_MIN_MALLOC_SZB -- because // it's too hard to make a constant expression that works perfectly in all // cases. +// 'reclaimable' is set to NULL if superblock cannot be reclaimed, otherwise +// it is set to the address of the superblock. A reclaimable superblock +// will contain only one allocated block. The superblock segment will +// be unmapped when the (only) allocated block is freed. +// The free space at the end of a reclaimable superblock is not used to +// make a free block. Note that this means that a reclaimable superblock can +// have up to slightly less than 1 page of unused bytes at the end of the +// superblock. +// 'reclaimable' is used to avoid quadratic memory usage for linear reallocation +// of big structures (see http://bugs.kde.org/show_bug.cgi?id=250101). +// ??? reclaimable replaces 'void *padding2'. Choosed this +// ??? to avoid changing the alignment logic. Maybe something cleaner +// ??? can be done. // payload_bytes[] is made a single big Block when the Superblock is // created, and then can be split and the splittings remerged, but Blocks // always cover its entire length -- there's never any unused bytes at the @@ -158,7 +171,7 @@ typedef typedef struct _Superblock { SizeT n_payload_bytes; - void* padding2; + struct _Superblock* reclaimable; UByte padding[ VG_MIN_MALLOC_SZB - ((sizeof(struct _Superblock*) + sizeof(SizeT)) % VG_MIN_MALLOC_SZB) ]; @@ -174,6 +187,14 @@ typedef Bool clientmem; // Allocates in the client address space? SizeT rz_szB; // Red zone size in bytes SizeT min_sblock_szB; // Minimum superblock size in bytes + SizeT min_reclaimable_sblock_szB; + // Minimum reclaimable superblock size in bytes. To be marked as + // reclaimable, a superblock must have a size >= min_reclaimable_sblock_szB + // and cannot be splitted. So, to avoid big overhead, superblocks used to + // provide aligned blocks on big alignments are not marked as reclaimable. + // Reclaimable superblocks will be reclaimed when their (only) + // allocated block is freed. + // Smaller size superblocks are not reclaimable. Block* freelist[N_MALLOC_LISTS]; // A dynamically expanding, ordered array of (pointers to) // superblocks in the arena. If this array is expanded, which @@ -196,6 +217,7 @@ typedef // If profiling, when should the next profile happen at // (in terms of stats__bytes_on_loan_max) ? SizeT next_profile_at; + SizeT stats__bytes_mmaped_max; } Arena; @@ -458,7 +480,8 @@ static Arena* arenaId_to_ArenaP ( ArenaId arena ) // Initialise an arena. rz_szB is the minimum redzone size; it might be // made bigger to ensure that VG_MIN_MALLOC_SZB is observed. static -void arena_init ( ArenaId aid, Char* name, SizeT rz_szB, SizeT min_sblock_szB ) +void arena_init ( ArenaId aid, Char* name, SizeT rz_szB, + SizeT min_sblock_szB, SizeT min_reclaimable_sblock_szB ) { SizeT i; Arena* a = arenaId_to_ArenaP(aid); @@ -481,6 +504,7 @@ void arena_init ( ArenaId aid, Char* name, SizeT rz_szB, SizeT min_sblock_szB ) vg_assert(overhead_szB_lo(a) - hp_overhead_szB() == overhead_szB_hi(a)); a->min_sblock_szB = min_sblock_szB; + a->min_reclaimable_sblock_szB = min_reclaimable_sblock_szB; for (i = 0; i < N_MALLOC_LISTS; i++) a->freelist[i] = NULL; a->sblocks = & a->sblocks_initial[0]; @@ -489,6 +513,7 @@ void arena_init ( ArenaId aid, Char* name, SizeT rz_szB, SizeT min_sblock_szB ) a->stats__bytes_on_loan = 0; a->stats__bytes_mmaped = 0; a->stats__bytes_on_loan_max = 0; + a->stats__bytes_mmaped_max = 0; a->stats__tot_blocks = 0; a->stats__tot_bytes = 0; a->stats__nsearches = 0; @@ -504,10 +529,11 @@ void VG_(print_all_arena_stats) ( void ) for (i = 0; i < VG_N_ARENAS; i++) { Arena* a = arenaId_to_ArenaP(i); VG_(message)(Vg_DebugMsg, - "%8s: %8ld mmap'd, %8ld/%8ld max/curr, " + "%8s: %8ld/%8ld max/curr mmap'd, %8ld/%8ld max/curr, " "%10llu/%10llu totalloc-blocks/bytes," " %10llu searches\n", - a->name, a->stats__bytes_mmaped, + a->name, + a->stats__bytes_mmaped_max, a->stats__bytes_mmaped, a->stats__bytes_on_loan_max, a->stats__bytes_on_loan, a->stats__tot_blocks, a->stats__tot_bytes, @@ -582,7 +608,9 @@ void ensure_mm_init ( ArenaId aid ) // increasing the superblock size reduces the number of superblocks // in the client arena, which makes findSb cheaper. ar_client_sbszB = 4194304; - arena_init ( VG_AR_CLIENT, "client", client_rz_szB, ar_client_sbszB ); + // superblocks with a size > ar_client_sbszB will be reclaimed. + arena_init ( VG_AR_CLIENT, "client", client_rz_szB, + ar_client_sbszB, ar_client_sbszB+1); client_inited = True; } else { @@ -590,13 +618,19 @@ void ensure_mm_init ( ArenaId aid ) return; } // Initialise the non-client arenas - arena_init ( VG_AR_CORE, "core", 4, 1048576 ); - arena_init ( VG_AR_TOOL, "tool", 4, 4194304 ); - arena_init ( VG_AR_DINFO, "dinfo", 4, 1048576 ); - arena_init ( VG_AR_DEMANGLE, "demangle", 4, 65536 ); - arena_init ( VG_AR_EXECTXT, "exectxt", 4, 1048576 ); - arena_init ( VG_AR_ERRORS, "errors", 4, 65536 ); - arena_init ( VG_AR_TTAUX, "ttaux", 4, 65536 ); + // superblocks of non-client arena are not reclaimed. + // If big transient allocations are done in an arena, + // then activating reclaim for these arenas might be useful. + // A test done with memcheck on a big executable with reclaim + // set to min_sblock_szB+1 for all arenas has shown that + // some superblocks in dinfo and tool would be reclaimed. + arena_init ( VG_AR_CORE, "core", 4, 1048576, MAX_PSZB ); + arena_init ( VG_AR_TOOL, "tool", 4, 4194304, MAX_PSZB ); + arena_init ( VG_AR_DINFO, "dinfo", 4, 1048576, MAX_PSZB ); + arena_init ( VG_AR_DEMANGLE, "demangle", 4, 65536, MAX_PSZB ); + arena_init ( VG_AR_EXECTXT, "exectxt", 4, 1048576, MAX_PSZB ); + arena_init ( VG_AR_ERRORS, "errors", 4, 65536, MAX_PSZB ); + arena_init ( VG_AR_TTAUX, "ttaux", 4, 65536, MAX_PSZB ); nonclient_inited = True; } @@ -667,6 +701,7 @@ Superblock* newSuperblock ( Arena* a, SizeT cszB ) { Superblock* sb; SysRes sres; + Bool reclaimable; // Take into account admin bytes in the Superblock. cszB += sizeof(Superblock); @@ -674,10 +709,20 @@ Superblock* newSuperblock ( Arena* a, SizeT cszB ) if (cszB < a->min_sblock_szB) cszB = a->min_sblock_szB; cszB = VG_PGROUNDUP(cszB); + if (cszB >= a->min_reclaimable_sblock_szB) + reclaimable = True; + else + reclaimable = False; + + if (a->clientmem) { // client allocation -- return 0 to client if it fails - sres = VG_(am_sbrk_anon_float_client) - ( cszB, VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC ); + if (reclaimable) + sres = VG_(am_mmap_anon_float_client) + ( cszB, VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC ); + else + sres = VG_(am_sbrk_anon_float_client) + ( cszB, VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC ); if (sr_isError(sres)) return 0; sb = (Superblock*)(AddrH)sr_Res(sres); @@ -689,7 +734,10 @@ Superblock* newSuperblock ( Arena* a, SizeT cszB ) ); } else { // non-client allocation -- abort if it fails - sres = VG_(am_sbrk_anon_float_valgrind)( cszB ); + if (reclaimable) + sres = VG_(am_mmap_anon_float_valgrind)( cszB ); + else + sres = VG_(am_sbrk_anon_float_valgrind)( cszB ); if (sr_isError(sres)) { VG_(out_of_memory_NORETURN)("newSuperblock", cszB); /* NOTREACHED */ @@ -702,14 +750,73 @@ Superblock* newSuperblock ( Arena* a, SizeT cszB ) //zzVALGRIND_MAKE_MEM_UNDEFINED(sb, cszB); vg_assert(0 == (Addr)sb % VG_MIN_MALLOC_SZB); sb->n_payload_bytes = cszB - sizeof(Superblock); + sb->reclaimable = (reclaimable ? sb : NULL); a->stats__bytes_mmaped += cszB; + if (a->stats__bytes_mmaped > a->stats__bytes_mmaped_max) + a->stats__bytes_mmaped_max = a->stats__bytes_mmaped; VG_(debugLog)(1, "mallocfree", - "newSuperblock at %p (pszB %7ld) owner %s/%s\n", - sb, sb->n_payload_bytes, + "newSuperblock at %p (pszB %7ld) %s owner %s/%s\n", + sb, sb->n_payload_bytes, (reclaimable ? "reclaimable" : ""), a->clientmem ? "CLIENT" : "VALGRIND", a->name ); return sb; } +// Reclaims the given (reclaimable) superblock: +// * removes sb from arena sblocks list. +// * munmap the superblock segment. +static +void reclaimSuperblock ( Arena* a, Superblock* sb) +{ + SysRes sres; + SizeT cszB; + UInt i, j; + + VG_(debugLog)(1, "mallocfree", + "reclaimSuperblock at %p (pszB %7ld) owner %s/%s\n", + sb, sb->n_payload_bytes, + a->clientmem ? "CLIENT" : "VALGRIND", a->name ); + + vg_assert (sb->reclaimable); + vg_assert (sb->reclaimable == sb); + + // Take into account admin bytes in the Superblock. + cszB = sizeof(Superblock) + sb->n_payload_bytes; + vg_assert (cszB >= a->min_reclaimable_sblock_szB); + + // removes sb from superblock list. + for (i = 0; i < a->sblocks_used; i++) { + if (a->sblocks[i] == sb) + break; + } + vg_assert(i >= 0 && i < a->sblocks_used); + for (j = i; j < a->sblocks_used; j++) + a->sblocks[j] = a->sblocks[j+1]; + a->sblocks_used--; + a->sblocks[a->sblocks_used] = NULL; + // paranoia: NULLify ptr to reclaimed sb or NULLify copy of ptr to last sb. + + // Now that the sb is removed from the list, mnumap its space. + if (a->clientmem) { + // reclaimable client allocation + Bool need_discard = False; + sres = VG_(am_munmap_client)(&need_discard, (Addr) sb, cszB); + /* If need_discard is ever True (iow, if the following assertion + ever fails), we'll need to tell m_transtab to discard the + range. This unfortunately give it and this a circular + dependency. It would be better to let this routine return + and have its caller to the discard, than do it here (no + effect on deadlocking etc but it might be easier to read.) + */ + vg_assert (!need_discard); + vg_assert2(! sr_isError(sres), "superblock client munmap failure\n"); + } else { + // reclaimable non-client allocation + sres = VG_(am_munmap_valgrind)((Addr) sb, cszB); + vg_assert2(! sr_isError(sres), "superblock valgrind munmap failure\n"); + } + a->stats__bytes_mmaped -= cszB; +} + // Find the superblock containing the given chunk. static Superblock* findSb ( Arena* a, Block* b ) @@ -923,8 +1030,9 @@ void ppSuperblocks ( Arena* a ) Superblock * sb = a->sblocks[j]; VG_(printf)( "\n" ); - VG_(printf)( "superblock %d at %p, sb->n_pl_bs = %lu\n", - blockno++, sb, sb->n_payload_bytes); + VG_(printf)( "superblock %d at %p %s, sb->n_pl_bs = %lu\n", + blockno++, sb, (sb->reclaimable ? "reclaimable" : ""), + sb->n_payload_bytes); for (i = 0; i < sb->n_payload_bytes; i += b_bszB) { Block* b = (Block*)&sb->payload_bytes[i]; b_bszB = get_bszB(b); @@ -1002,8 +1110,8 @@ static void sanity_check_malloc_arena ( ArenaId aid ) if (arena_bytes_on_loan != a->stats__bytes_on_loan) { # ifdef VERBOSE_MALLOC - VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %ld, " - "arena_bytes_on_loan %ld: " + VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %lu, " + "arena_bytes_on_loan %lu: " "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan); # endif ppSuperblocks(a); @@ -1098,8 +1206,9 @@ static void cc_analyse_alloc_arena ( ArenaId aid ) sanity_check_malloc_arena(aid); VG_(printf)( - "-------- Arena \"%s\": %ld mmap'd, %ld/%ld max/curr --------\n", - a->name, a->stats__bytes_mmaped, + "-------- Arena \"%s\": %lu/%lu max/curr mmap'd, " + "%lu/%lu max/curr on_loan --------\n", + a->name, a->stats__bytes_mmaped_max, a->stats__bytes_mmaped, a->stats__bytes_on_loan_max, a->stats__bytes_on_loan ); @@ -1273,7 +1382,7 @@ void* VG_(arena_malloc) ( ArenaId aid, HChar* cc, SizeT req_pszB ) { SizeT req_bszB, frag_bszB, b_bszB; UInt lno, i; - Superblock* new_sb; + Superblock* new_sb = NULL; Block* b = NULL; Arena* a; void* v; @@ -1398,8 +1507,10 @@ void* VG_(arena_malloc) ( ArenaId aid, HChar* cc, SizeT req_pszB ) vg_assert(b_bszB >= req_bszB); // Could we split this block and still get a useful fragment? + // A block in a reclaimable superblock can never be splitted. frag_bszB = b_bszB - req_bszB; - if (frag_bszB >= min_useful_bszB(a)) { + if (frag_bszB >= min_useful_bszB(a) + && (NULL == new_sb || ! new_sb->reclaimable)) { // Yes, split block in two, put the fragment on the appropriate free // list, and update b_bszB accordingly. // printf( "split %dB into %dB and %dB\n", b_bszB, req_bszB, frag_bszB ); @@ -1502,60 +1613,73 @@ void VG_(arena_free) ( ArenaId aid, void* ptr ) if (aid != VG_AR_CLIENT) VG_(memset)(ptr, 0xDD, (SizeT)b_pszB); - // Put this chunk back on a list somewhere. - b_listno = pszB_to_listNo(b_pszB); - mkFreeBlock( a, b, b_bszB, b_listno ); - if (VG_(clo_profile_heap)) - set_cc(b, "admin.free-1"); - - // See if this block can be merged with its successor. - // First test if we're far enough before the superblock's end to possibly - // have a successor. - other_b = b + b_bszB; - if (other_b+min_useful_bszB(a)-1 <= (Block*)sb_end) { - // Ok, we have a successor, merge if it's not in use. - other_bszB = get_bszB(other_b); - if (!is_inuse_block(other_b)) { - // VG_(printf)( "merge-successor\n"); -# ifdef DEBUG_MALLOC - vg_assert(blockSane(a, other_b)); -# endif - unlinkBlock( a, b, b_listno ); - unlinkBlock( a, other_b, pszB_to_listNo(bszB_to_pszB(a,other_bszB)) ); - b_bszB += other_bszB; - b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB)); - mkFreeBlock( a, b, b_bszB, b_listno ); - if (VG_(clo_profile_heap)) - set_cc(b, "admin.free-2"); + if (! sb->reclaimable) { + // Put this chunk back on a list somewhere. + b_listno = pszB_to_listNo(b_pszB); + mkFreeBlock( a, b, b_bszB, b_listno ); + if (VG_(clo_profile_heap)) + set_cc(b, "admin.free-1"); + + // See if this block can be merged with its successor. + // First test if we're far enough before the superblock's end to possibly + // have a successor. + other_b = b + b_bszB; + if (other_b+min_useful_bszB(a)-1 <= (Block*)sb_end) { + // Ok, we have a successor, merge if it's not in use. + other_bszB = get_bszB(other_b); + if (!is_inuse_block(other_b)) { + // VG_(printf)( "merge-successor\n"); +# ifdef DEBUG_MALLOC + vg_assert(blockSane(a, other_b)); +# endif + unlinkBlock( a, b, b_listno ); + unlinkBlock( a, other_b, + pszB_to_listNo(bszB_to_pszB(a,other_bszB)) ); + b_bszB += other_bszB; + b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB)); + mkFreeBlock( a, b, b_bszB, b_listno ); + if (VG_(clo_profile_heap)) + set_cc(b, "admin.free-2"); + } + } else { + // Not enough space for successor: check that b is the last block + // ie. there are no unused bytes at the end of the Superblock. + vg_assert(other_b-1 == (Block*)sb_end); } - } else { - // Not enough space for successor: check that b is the last block - // ie. there are no unused bytes at the end of the Superblock. - vg_assert(other_b-1 == (Block*)sb_end); - } - // Then see if this block can be merged with its predecessor. - // First test if we're far enough after the superblock's start to possibly - // have a predecessor. - if (b >= (Block*)sb_start + min_useful_bszB(a)) { - // Ok, we have a predecessor, merge if it's not in use. - other_b = get_predecessor_block( b ); - other_bszB = get_bszB(other_b); - if (!is_inuse_block(other_b)) { - // VG_(printf)( "merge-predecessor\n"); - unlinkBlock( a, b, b_listno ); - unlinkBlock( a, other_b, pszB_to_listNo(bszB_to_pszB(a, other_bszB)) ); - b = other_b; - b_bszB += other_bszB; - b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB)); - mkFreeBlock( a, b, b_bszB, b_listno ); - if (VG_(clo_profile_heap)) - set_cc(b, "admin.free-3"); + // Then see if this block can be merged with its predecessor. + // First test if we're far enough after the superblock's start to possibly + // have a predecessor. + if (b >= (Block*)sb_start + min_useful_bszB(a)) { + // Ok, we have a predecessor, merge if it's not in use. + other_b = get_predecessor_block( b ); + other_bszB = get_bszB(other_b); + if (!is_inuse_block(other_b)) { + // VG_(printf)( "merge-predecessor\n"); + unlinkBlock( a, b, b_listno ); + unlinkBlock( a, other_b, + pszB_to_listNo(bszB_to_pszB(a, other_bszB)) ); + b = other_b; + b_bszB += other_bszB; + b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB)); + mkFreeBlock( a, b, b_bszB, b_listno ); + if (VG_(clo_profile_heap)) + set_cc(b, "admin.free-3"); + } + } else { + // Not enough space for predecessor: check that b is the first block, + // ie. there are no unused bytes at the start of the Superblock. + vg_assert((Block*)sb_start == b); } } else { - // Not enough space for predecessor: check that b is the first block, - // ie. there are no unused bytes at the start of the Superblock. + // b must be first block (i.e. no unused bytes at the beginning) vg_assert((Block*)sb_start == b); + + // b must be last block (i.e. no unused bytes at the end) + other_b = b + b_bszB; + vg_assert(other_b-1 == (Block*)sb_end); + + reclaimSuperblock (a, sb); } # ifdef DEBUG_MALLOC @@ -1640,7 +1764,16 @@ void* VG_(arena_memalign) ( ArenaId aid, HChar* cc, /* Payload ptr for the block we are going to split. Note this changes a->bytes_on_loan; we save and restore it ourselves. */ saved_bytes_on_loan = a->stats__bytes_on_loan; - base_p = VG_(arena_malloc) ( aid, cc, base_pszB_req ); + { + /* As we will split the block given back by VG_(arena_malloc), + we have to (temporarily) disable reclaimable for this arena, + as reclamaible superblocks cannot be splitted. */ + const SizeT save_min_reclaimable_sblock_szB + = a->min_reclaimable_sblock_szB; + a->min_reclaimable_sblock_szB = MAX_PSZB; + base_p = VG_(arena_malloc) ( aid, cc, base_pszB_req ); + a->min_reclaimable_sblock_szB = save_min_reclaimable_sblock_szB; + } a->stats__bytes_on_loan = saved_bytes_on_loan; /* Give up if we couldn't allocate enough space */ diff --git a/memcheck/tests/memalign2.c b/memcheck/tests/memalign2.c index 99f322f051..12aa047db1 100644 --- a/memcheck/tests/memalign2.c +++ b/memcheck/tests/memalign2.c @@ -26,9 +26,37 @@ int main ( void ) # else // Nb: assuming VG_MIN_MALLOC_SZB is 8 or more... int* p; + int* piece; int res; assert(sizeof(long int) == sizeof(void*)); + // Check behaviour of memalign/free for big alignment. + // In particular, the below aims at checking that a + // superblock with a big size is not marked as reclaimable + // if the superblock is used to provide a big aligned block + // (see bug 250101, comment #14). + // Valgrind m_mallocfree.c will allocate a big superblock for the memalign + // call and will split it in two. This splitted superblock was + // wrongly marked as reclaimable, which was then causing + // assert failures (as reclaimable blocks cannot be splitted). + p = memalign(1024 * 1024, 4 * 1024 * 1024 + 1); assert(0 == (long)p % (1024 * 1024)); + // We allocate (and then free) a piece of memory smaller than + // the hole created in the big superblock. + // If the superblock is marked as reclaimable, the below free(s) will cause + // an assert. Note that the test has to be run with a --free-list-vol + // parameter smaller than the released blocks size to ensure the free is directly + // executed (otherwise memcheck does not really release the memory and so + // the bug is not properly tested). + piece = malloc(1024 * 1000); assert (piece); + free (piece); + free (p); + + // Same as above but do the free in the reverse order. + p = memalign(1024 * 1024, 4 * 1024 * 1024 + 1); assert(0 == (long)p % (1024 * 1024)); + piece = malloc(1024 * 100); assert (piece); + free (p); + free (piece); + p = memalign(0, 100); assert(0 == (long)p % 8); p = memalign(1, 100); assert(0 == (long)p % 8); p = memalign(2, 100); assert(0 == (long)p % 8); @@ -48,6 +76,7 @@ int main ( void ) p = memalign(4096, 100); assert(0 == (long)p % 4096); p = memalign(4097, 100); assert(0 == (long)p % 8192); + # define PM(a,b,c) posix_memalign((void**)a, b, c) res = PM(&p, -1,100); assert(EINVAL == res); diff --git a/memcheck/tests/memalign2.vgtest b/memcheck/tests/memalign2.vgtest index f15cb63206..d4e6ab8fa9 100644 --- a/memcheck/tests/memalign2.vgtest +++ b/memcheck/tests/memalign2.vgtest @@ -1,2 +1,2 @@ prog: memalign2 -vgopts: -q +vgopts: -q --freelist-vol=100000