From: Neil Horman Date: Tue, 24 Jun 2025 17:50:09 +0000 (-0400) Subject: Add lock contention checking to our pthreads implementation X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;p=thirdparty%2Fopenssl.git Add lock contention checking to our pthreads implementation Something we're missing in our ability to do performance monitoring and diagnosis in openssl is the ability to check for lock contention. While some tools exist for this (valgrinds drd tool for example), they really only measure the time spent in critical sections, not the instances in which they are contended. For that we need something more specific. This patch introduces the REPORT_RWLOCK_CONTENTION macro. When openssl is built with: ./Configure -rdynamic -fno-omit-frame-pointer -DREPORT_RWLOCK_CONTENTION We can now get output sent to a log file that looks like the following: =============== lock blocked on WRITE for 1001 usec /lib64/libasan.so.8(+0x525e7) [0x7fc5ef4525e7] ../../test/quic_radix_test(CRYPTO_THREAD_write_lock+0x151) [0x79d976] ../../test/quic_radix_test() [0x61e6fb] ../../test/quic_radix_test(test_vprintf_stderr+0x15) [0x61e78d] ../../test/quic_radix_test(test_printf_stderr+0x10f) [0x623ba4] ../../test/quic_radix_test(test_fail_message_prefix+0xf7) [0x62436b] ../../test/quic_radix_test() [0x6243b9] ../../test/quic_radix_test(test_info+0x133) [0x624842] ../../test/quic_radix_test() [0x415aad] ../../test/quic_radix_test() [0x417bad] ../../test/quic_radix_test() [0x41b1a9] ../../test/quic_radix_test() [0x41b2eb] ../../test/quic_radix_test() [0x8071fc] /lib64/libasan.so.8(+0x28ee6) [0x7fc5ef428ee6] /lib64/libc.so.6(+0x711d4) [0x7fc5ef27f1d4] /lib64/libc.so.6(+0xf3cec) [0x7fc5ef301cec] ============== Which tells us when a thread blocked because someone else was already holding the lock, how long it was blocked for, and where the blocking call originated from via its backtrace. I think this should enable us to better determine where our contended locking paths are for a given application, and give us some insight on how to fix them. Currently its linux only (as the backtrace functionality only exists there, and there are few warts (like the need to use a file pointer rather than a bio to record the log, see comments), but I think its enough to give us a useful diagnostic tool to help drive some performance improvements. Fixes openssl/project#1237 Reviewed-by: Saša Nedvědický Reviewed-by: Tomas Mraz Reviewed-by: Matt Caswell (Merged from https://github.com/openssl/openssl/pull/27884) --- diff --git a/crypto/threads_pthread.c b/crypto/threads_pthread.c index fd87ccfa03..e3b262e7db 100644 --- a/crypto/threads_pthread.c +++ b/crypto/threads_pthread.c @@ -10,12 +10,36 @@ /* We need to use the OPENSSL_fork_*() deprecated APIs */ #define OPENSSL_SUPPRESS_DEPRECATED +#if !defined(__GNUC__) || !defined(__ATOMIC_ACQ_REL) || \ + defined(BROKEN_CLANG_ATOMICS) || defined(OPENSSL_NO_STDIO) +/* + * we only enable REPORT_RWLOCK_CONTENTION on clang/gcc when we have + * atomics available. We do this because we need to use an atomic to track + * when we can close the log file. We could use the CRYPTO_atomic_ api + * but that requires lock creation which gets us into a bad recursive loop + * when we try to initialize the file pointer + */ +# ifdef REPORT_RWLOCK_CONTENTION +# warning "RWLOCK CONTENTION REPORTING NOT SUPPORTED, Disabling" +# undef REPORT_RWLOCK_CONTENTION +# endif +#endif + +#ifdef REPORT_RWLOCK_CONTENTION +# define _GNU_SOURCE +# include +# include +#endif + #include #include #include #include "internal/cryptlib.h" #include "internal/threads_common.h" #include "internal/rcu.h" +#ifdef REPORT_RWLOCK_CONTENTION +# include "internal/time.h" +#endif #include "rcu_internal.h" #if defined(__clang__) && defined(__has_feature) @@ -575,11 +599,81 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) OPENSSL_free(rlock); } +# ifdef REPORT_RWLOCK_CONTENTION +/* + * Normally we would use a BIO here to do this, but we create locks during + * library initialization, and creating a bio too early, creates a recursive set + * of stack calls that leads us to call CRYPTO_thread_run_once while currently + * executing the init routine for various run_once functions, which leads to + * deadlock. Avoid that by just using a FILE pointer. Also note that we + * directly use a pthread_mutex_t to protect access from multiple threads + * to the contention log file. We do this because we want to avoid use + * of the CRYPTO_THREAD api so as to prevent recursive blocking reports. + */ +static FILE *contention_fp = NULL; +static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT; +static int rwlock_count = 0; +pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER; +CRYPTO_THREAD_LOCAL thread_contention_data; + +static void destroy_contention_data(void *data) +{ + OPENSSL_free(data); +} + +struct stack_info { + unsigned int nptrs; + int write; + OSSL_TIME start; + OSSL_TIME duration; + char **strings; +}; + +# define STACKS_COUNT 32 +struct stack_traces { + int lock_depth; + size_t idx; + struct stack_info stacks[STACKS_COUNT]; +}; + +static void init_contention_fp_once(void) +{ +# ifdef FIPS_MODULE + contention_fp = fopen("lock-contention-log-fips.txt", "w"); +# else + contention_fp = fopen("lock-contention-log.txt", "w"); +# endif + if (contention_fp == NULL) + fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n"); + + /* + * Create a thread local key here to store our list of stack traces + * to be printed when we unlock the lock we are holding + */ + CRYPTO_THREAD_init_local(&thread_contention_data, destroy_contention_data); + return; +} +# endif + CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) { # ifdef USE_RWLOCK CRYPTO_RWLOCK *lock; +# ifdef REPORT_RWLOCK_CONTENTION + CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once); + __atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL); + { + struct stack_info *thread_stack_info; + + thread_stack_info = CRYPTO_THREAD_get_local(&thread_contention_data); + if (thread_stack_info == NULL) { + thread_stack_info = OPENSSL_zalloc(sizeof(struct stack_traces)); + CRYPTO_THREAD_set_local(&thread_contention_data, thread_stack_info); + } + } +# endif + if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL) /* Don't set error, to avoid recursion blowup. */ return NULL; @@ -620,11 +714,75 @@ CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) return lock; } +# ifdef REPORT_RWLOCK_CONTENTION +static void print_stack_traces(struct stack_traces *traces, FILE *fptr) +{ + unsigned int j; + + pthread_mutex_lock(&log_lock); + while (traces != NULL && traces->idx >= 1) { + traces->idx--; + fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n", + traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ", + ossl_time2us(traces->stacks[traces->idx].duration), + ossl_time2us(traces->stacks[traces->idx].start), + gettid()); + if (traces->stacks[traces->idx].strings != NULL) { + for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) + fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]); + free(traces->stacks[traces->idx].strings); + } else { + fprintf(fptr, "No stack trace available\n"); + } + fprintf(contention_fp, "\n"); + } + pthread_mutex_unlock(&log_lock); +} +# endif + +# define BT_BUF_SIZE 1024 + __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) { # ifdef USE_RWLOCK +# ifdef REPORT_RWLOCK_CONTENTION + struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data); + + if (ossl_unlikely(traces == NULL)) { + traces = OPENSSL_zalloc(sizeof(struct stack_traces)); + CRYPTO_THREAD_set_local(&thread_contention_data, traces); + if (ossl_unlikely(traces == NULL)) + return 0; + } + + traces->lock_depth++; + if (pthread_rwlock_tryrdlock(lock)) { + void *buffer[BT_BUF_SIZE]; + OSSL_TIME start, end; + + start = ossl_time_now(); + if (!ossl_assert(pthread_rwlock_rdlock(lock) == 0)) { + traces->lock_depth--; + return 0; + } + end = ossl_time_now(); + traces->stacks[traces->idx].duration = ossl_time_subtract(end, start); + traces->stacks[traces->idx].nptrs = backtrace(buffer, BT_BUF_SIZE); + traces->stacks[traces->idx].strings = backtrace_symbols(buffer, + traces->stacks[traces->idx].nptrs); + traces->stacks[traces->idx].duration = ossl_time_subtract(end, start); + traces->stacks[traces->idx].start = start; + traces->stacks[traces->idx].write = 0; + traces->idx++; + if (traces->idx >= STACKS_COUNT) { + fprintf(stderr, "STACK RECORD OVERFLOW!\n"); + print_stack_traces(traces, contention_fp); + } + } +# else if (!ossl_assert(pthread_rwlock_rdlock(lock) == 0)) return 0; +# endif # else if (pthread_mutex_lock(lock) != 0) { assert(errno != EDEADLK && errno != EBUSY); @@ -638,8 +796,43 @@ __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock) { # ifdef USE_RWLOCK +# ifdef REPORT_RWLOCK_CONTENTION + struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data); + + if (ossl_unlikely(traces == NULL)) { + traces = OPENSSL_zalloc(sizeof(struct stack_traces)); + CRYPTO_THREAD_set_local(&thread_contention_data, traces); + if (ossl_unlikely(traces == NULL)) + return 0; + } + + traces->lock_depth++; + if (pthread_rwlock_trywrlock(lock)) { + void *buffer[BT_BUF_SIZE]; + OSSL_TIME start, end; + + start = ossl_time_now(); + if (!ossl_assert(pthread_rwlock_wrlock(lock) == 0)) { + traces->lock_depth--; + return 0; + } + end = ossl_time_now(); + traces->stacks[traces->idx].nptrs = backtrace(buffer, BT_BUF_SIZE); + traces->stacks[traces->idx].strings = backtrace_symbols(buffer, + traces->stacks[traces->idx].nptrs); + traces->stacks[traces->idx].duration = ossl_time_subtract(end, start); + traces->stacks[traces->idx].start = start; + traces->stacks[traces->idx].write = 1; + traces->idx++; + if (traces->idx >= STACKS_COUNT) { + fprintf(stderr, "STACK RECORD OVERFLOW!\n"); + print_stack_traces(traces, contention_fp); + } + } +# else if (!ossl_assert(pthread_rwlock_wrlock(lock) == 0)) return 0; +# endif # else if (pthread_mutex_lock(lock) != 0) { assert(errno != EDEADLK && errno != EBUSY); @@ -655,6 +848,18 @@ int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock) # ifdef USE_RWLOCK if (pthread_rwlock_unlock(lock) != 0) return 0; +# ifdef REPORT_RWLOCK_CONTENTION + { + struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data); + + if (contention_fp != NULL && traces != NULL) { + traces->lock_depth--; + assert(traces->lock_depth >= 0); + if (traces->lock_depth == 0) + print_stack_traces(traces, contention_fp); + } + } +# endif # else if (pthread_mutex_unlock(lock) != 0) { assert(errno != EPERM); @@ -669,6 +874,20 @@ void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) { if (lock == NULL) return; +# ifdef REPORT_RWLOCK_CONTENTION + + /* + * Note: It's possible here that OpenSSL may allocate a lock and immediately + * free it, in which case we would erroneously close the contention log + * prior to the library going on to do more real work. In practice + * that never happens though, and since this is a debug facility + * we don't worry about that here. + */ + if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) { + fclose(contention_fp); + contention_fp = NULL; + } +# endif # ifdef USE_RWLOCK pthread_rwlock_destroy(lock);