From: Willy Tarreau Date: Mon, 10 Feb 2025 10:15:44 +0000 (+0100) Subject: DEBUG: thread: reduce the struct lock_stat to store only 30 buckets X-Git-Tag: v3.2-dev6~45 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=eced1d6d8a1cca7748ad71e308ceb8faf5d55880;p=thirdparty%2Fhaproxy.git DEBUG: thread: reduce the struct lock_stat to store only 30 buckets Storing only 30 buckets means we only keep 256 bytes per label. This further simplifies address calculation and reduces the memory used without complicating the locking code. It means we won't measure wait times larger than a second but we're not supposed to face this as it would trigger the watchdog anyway. It may become a little bit just if measuring using rdtsc() instead of now_mono_time() though (typically the limit would be around 350ms for a 3 GHz CPU). --- diff --git a/include/haproxy/thread-t.h b/include/haproxy/thread-t.h index cb91e07c5..631eeba8b 100644 --- a/include/haproxy/thread-t.h +++ b/include/haproxy/thread-t.h @@ -110,13 +110,13 @@ /* Debugging information that is only used when thread debugging is enabled */ -/* This is aligned as it's already 512B per lock label, so better simplify the +/* This is aligned as it's already 256B per lock label, so better simplify the * address calculations in the fast path than save a few bytes in BSS. */ struct lock_stat { uint64_t nsec_wait; uint64_t num_unlocked; - uint64_t buckets[32]; // operations per time buckets (1-2ns to 2.1-4.3s) + uint64_t buckets[30]; // operations per time buckets (1-2ns to 0.5-1s) } ALIGNED(256); struct ha_spinlock_state { diff --git a/src/thread.c b/src/thread.c index d0b29c2e4..fa47cd67d 100644 --- a/src/thread.c +++ b/src/thread.c @@ -483,7 +483,7 @@ static uint64_t get_lock_stat_num_read(int lbl) uint64_t ret = 0; uint bucket; - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) ret += _HA_ATOMIC_LOAD(&lock_stats_rd[lbl].buckets[bucket]); return ret; } @@ -493,7 +493,7 @@ static uint64_t get_lock_stat_num_seek(int lbl) uint64_t ret = 0; uint bucket; - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) ret += _HA_ATOMIC_LOAD(&lock_stats_sk[lbl].buckets[bucket]); return ret; } @@ -503,7 +503,7 @@ static uint64_t get_lock_stat_num_write(int lbl) uint64_t ret = 0; uint bucket; - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) ret += _HA_ATOMIC_LOAD(&lock_stats_wr[lbl].buckets[bucket]); return ret; } @@ -542,7 +542,7 @@ void show_lock_stats() (double)lock_stats_wr[lbl].nsec_wait / 1000000.0, num_write_locked ? ((double)lock_stats_wr[lbl].nsec_wait / (double)num_write_locked) : 0); - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) if (lock_stats_wr[lbl].buckets[bucket]) fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_wr[lbl].buckets[bucket]); fprintf(stderr, "\n"); @@ -561,7 +561,7 @@ void show_lock_stats() (double)lock_stats_sk[lbl].nsec_wait / 1000000.0, num_seek_locked ? ((double)lock_stats_sk[lbl].nsec_wait / (double)num_seek_locked) : 0); - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) if (lock_stats_sk[lbl].buckets[bucket]) fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_sk[lbl].buckets[bucket]); fprintf(stderr, "\n"); @@ -580,7 +580,7 @@ void show_lock_stats() (double)lock_stats_rd[lbl].nsec_wait / 1000000.0, num_read_locked ? ((double)lock_stats_rd[lbl].nsec_wait / (double)num_read_locked) : 0); - for (bucket = 0; bucket < 32; bucket++) + for (bucket = 0; bucket < 30; bucket++) if (lock_stats_rd[lbl].buckets[bucket]) fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_rd[lbl].buckets[bucket]); fprintf(stderr, "\n"); @@ -619,6 +619,7 @@ void __ha_rwlock_wrlock(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time + 1) - 1; HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]); @@ -654,6 +655,7 @@ int __ha_rwlock_trywrlock(enum lock_label lbl, struct ha_rwlock *l, } HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]); @@ -705,6 +707,7 @@ void __ha_rwlock_rdlock(enum lock_label lbl,struct ha_rwlock *l) start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]); @@ -734,6 +737,7 @@ int __ha_rwlock_tryrdlock(enum lock_label lbl,struct ha_rwlock *l) HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]); @@ -780,6 +784,7 @@ void __ha_rwlock_wrtord(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]); @@ -813,6 +818,7 @@ void __ha_rwlock_wrtosk(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]); @@ -843,6 +849,7 @@ void __ha_rwlock_sklock(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]); @@ -875,6 +882,7 @@ void __ha_rwlock_sktowr(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]); @@ -908,6 +916,7 @@ void __ha_rwlock_sktord(enum lock_label lbl, struct ha_rwlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]); @@ -960,6 +969,7 @@ int __ha_rwlock_trysklock(enum lock_label lbl, struct ha_rwlock *l, /* got the lock ! */ HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]); HA_ATOMIC_OR(&st->cur_seeker, tbit); @@ -997,6 +1007,7 @@ int __ha_rwlock_tryrdtosk(enum lock_label lbl, struct ha_rwlock *l, /* got the lock ! */ HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]); HA_ATOMIC_OR(&st->cur_seeker, tbit); @@ -1042,6 +1053,7 @@ void __spin_lock(enum lock_label lbl, struct ha_spinlock *l, start_time += now_mono_time(); HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]); @@ -1078,6 +1090,7 @@ int __spin_trylock(enum lock_label lbl, struct ha_spinlock *l, HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time); + start_time &= 0x3fffffff; // keep values below 1 billion only bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1; HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);