]> git.ipfire.org Git - thirdparty/openssl.git/commitdiff
Add lock contention checking to our pthreads implementation master
authorNeil Horman <nhorman@openssl.org>
Tue, 24 Jun 2025 17:50:09 +0000 (13:50 -0400)
committerNeil Horman <nhorman@openssl.org>
Fri, 11 Jul 2025 16:38:27 +0000 (12:38 -0400)
Something we're missing in our ability to do performance monitoring
and diagnosis in openssl is the ability to check for lock contention.
While some tools exist for this (valgrinds drd tool for example), they
really only measure the time spent in critical sections, not the
instances in which they are contended.  For that we need something more
specific.

This patch introduces the REPORT_RWLOCK_CONTENTION macro.  When openssl
is built with:
./Configure -rdynamic -fno-omit-frame-pointer -DREPORT_RWLOCK_CONTENTION

We can now get output sent to a log file that looks like the following:

===============
lock blocked on WRITE for 1001 usec
/lib64/libasan.so.8(+0x525e7) [0x7fc5ef4525e7]
../../test/quic_radix_test(CRYPTO_THREAD_write_lock+0x151) [0x79d976]
../../test/quic_radix_test() [0x61e6fb]
../../test/quic_radix_test(test_vprintf_stderr+0x15) [0x61e78d]
../../test/quic_radix_test(test_printf_stderr+0x10f) [0x623ba4]
../../test/quic_radix_test(test_fail_message_prefix+0xf7) [0x62436b]
../../test/quic_radix_test() [0x6243b9]
../../test/quic_radix_test(test_info+0x133) [0x624842]
../../test/quic_radix_test() [0x415aad]
../../test/quic_radix_test() [0x417bad]
../../test/quic_radix_test() [0x41b1a9]
../../test/quic_radix_test() [0x41b2eb]
../../test/quic_radix_test() [0x8071fc]
/lib64/libasan.so.8(+0x28ee6) [0x7fc5ef428ee6]
/lib64/libc.so.6(+0x711d4) [0x7fc5ef27f1d4]
/lib64/libc.so.6(+0xf3cec) [0x7fc5ef301cec]
==============

Which tells us when a thread blocked because someone else was already
holding the lock, how long it was blocked for, and where the blocking
call originated from via its backtrace.

I think this should enable us to better determine where our contended
locking paths are for a given application, and give us some insight on
how to fix them.

Currently its linux only (as the backtrace functionality only exists
there, and there are few warts (like the need to use a file pointer
rather than a bio to record the log, see comments), but I think its
enough to give us a useful diagnostic tool to help drive some
performance improvements.

Fixes openssl/project#1237

Reviewed-by: Saša Nedvědický <sashan@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/27884)

crypto/threads_pthread.c

index fd87ccfa033def8486be98c519fe3ddbad728a29..e3b262e7db71de0230d42c90eaacc89e0c3d917c 100644 (file)
 /* We need to use the OPENSSL_fork_*() deprecated APIs */
 #define OPENSSL_SUPPRESS_DEPRECATED
 
+#if !defined(__GNUC__) || !defined(__ATOMIC_ACQ_REL) || \
+    defined(BROKEN_CLANG_ATOMICS) || defined(OPENSSL_NO_STDIO)
+/*
+ * we only enable REPORT_RWLOCK_CONTENTION on clang/gcc when we have
+ * atomics available.  We do this because we need to use an atomic to track
+ * when we can close the log file.  We could use the CRYPTO_atomic_ api
+ * but that requires lock creation which gets us into a bad recursive loop
+ * when we try to initialize the file pointer
+ */
+# ifdef REPORT_RWLOCK_CONTENTION
+#  warning "RWLOCK CONTENTION REPORTING NOT SUPPORTED, Disabling"
+#  undef REPORT_RWLOCK_CONTENTION
+# endif
+#endif
+
+#ifdef REPORT_RWLOCK_CONTENTION
+# define _GNU_SOURCE
+# include <execinfo.h>
+# include <unistd.h>
+#endif
+
 #include <openssl/crypto.h>
 #include <crypto/cryptlib.h>
 #include <crypto/sparse_array.h>
 #include "internal/cryptlib.h"
 #include "internal/threads_common.h"
 #include "internal/rcu.h"
+#ifdef REPORT_RWLOCK_CONTENTION
+# include "internal/time.h"
+#endif
 #include "rcu_internal.h"
 
 #if defined(__clang__) && defined(__has_feature)
@@ -575,11 +599,81 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
     OPENSSL_free(rlock);
 }
 
+# ifdef REPORT_RWLOCK_CONTENTION
+/*
+ * Normally we would use a BIO here to do this, but we create locks during
+ * library initialization, and creating a bio too early, creates a recursive set
+ * of stack calls that leads us to call CRYPTO_thread_run_once while currently
+ * executing the init routine for various run_once functions, which leads to
+ * deadlock.  Avoid that by just using a FILE pointer.  Also note that we
+ * directly use a pthread_mutex_t to protect access from multiple threads
+ * to the contention log file.  We do this because we want to avoid use
+ * of the CRYPTO_THREAD api so as to prevent recursive blocking reports.
+ */
+static FILE *contention_fp = NULL;
+static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT;
+static int rwlock_count = 0;
+pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
+CRYPTO_THREAD_LOCAL thread_contention_data;
+
+static void destroy_contention_data(void *data)
+{
+    OPENSSL_free(data);
+}
+
+struct stack_info {
+    unsigned int nptrs;
+    int write;
+    OSSL_TIME start;
+    OSSL_TIME duration;
+    char **strings;
+};
+
+#  define STACKS_COUNT 32
+struct stack_traces {
+    int lock_depth;
+    size_t idx;
+    struct stack_info stacks[STACKS_COUNT];
+};
+
+static void init_contention_fp_once(void)
+{
+#  ifdef FIPS_MODULE
+    contention_fp = fopen("lock-contention-log-fips.txt", "w");
+#  else
+    contention_fp = fopen("lock-contention-log.txt", "w");
+#  endif
+    if (contention_fp == NULL)
+        fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n");
+
+    /*
+     * Create a thread local key here to store our list of stack traces
+     * to be printed when we unlock the lock we are holding
+     */
+    CRYPTO_THREAD_init_local(&thread_contention_data, destroy_contention_data);
+    return;
+}
+# endif
+
 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
 {
 # ifdef USE_RWLOCK
     CRYPTO_RWLOCK *lock;
 
+#  ifdef REPORT_RWLOCK_CONTENTION
+    CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once);
+    __atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL);
+    {
+        struct stack_info *thread_stack_info;
+
+        thread_stack_info = CRYPTO_THREAD_get_local(&thread_contention_data);
+        if (thread_stack_info == NULL) {
+            thread_stack_info = OPENSSL_zalloc(sizeof(struct stack_traces));
+            CRYPTO_THREAD_set_local(&thread_contention_data, thread_stack_info);
+        }
+    }
+#  endif
+
     if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
         /* Don't set error, to avoid recursion blowup. */
         return NULL;
@@ -620,11 +714,75 @@ CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
     return lock;
 }
 
+# ifdef REPORT_RWLOCK_CONTENTION
+static void print_stack_traces(struct stack_traces *traces, FILE *fptr)
+{
+    unsigned int j;
+
+    pthread_mutex_lock(&log_lock);
+    while (traces != NULL && traces->idx >= 1) {
+        traces->idx--;
+        fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n",
+                traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ",
+                ossl_time2us(traces->stacks[traces->idx].duration),
+                ossl_time2us(traces->stacks[traces->idx].start),
+                gettid());
+        if (traces->stacks[traces->idx].strings != NULL) {
+            for (j = 0; j < traces->stacks[traces->idx].nptrs; j++)
+                fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]);
+            free(traces->stacks[traces->idx].strings);
+        } else {
+            fprintf(fptr, "No stack trace available\n");
+        }
+        fprintf(contention_fp, "\n");
+    }
+    pthread_mutex_unlock(&log_lock);
+}
+# endif
+
+# define BT_BUF_SIZE 1024
+
 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
 {
 # ifdef USE_RWLOCK
+#  ifdef REPORT_RWLOCK_CONTENTION
+    struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
+
+    if (ossl_unlikely(traces == NULL)) {
+        traces = OPENSSL_zalloc(sizeof(struct stack_traces));
+        CRYPTO_THREAD_set_local(&thread_contention_data, traces);
+        if (ossl_unlikely(traces == NULL))
+            return 0;
+    }
+
+    traces->lock_depth++;
+    if (pthread_rwlock_tryrdlock(lock)) {
+        void *buffer[BT_BUF_SIZE];
+        OSSL_TIME start, end;
+
+        start = ossl_time_now();
+        if (!ossl_assert(pthread_rwlock_rdlock(lock) == 0)) {
+            traces->lock_depth--;
+            return 0;
+        }
+        end = ossl_time_now();
+        traces->stacks[traces->idx].duration = ossl_time_subtract(end, start);
+        traces->stacks[traces->idx].nptrs = backtrace(buffer, BT_BUF_SIZE);
+        traces->stacks[traces->idx].strings = backtrace_symbols(buffer,
+                                                                traces->stacks[traces->idx].nptrs);
+        traces->stacks[traces->idx].duration = ossl_time_subtract(end, start);
+        traces->stacks[traces->idx].start = start;
+        traces->stacks[traces->idx].write = 0;
+        traces->idx++;
+        if (traces->idx >= STACKS_COUNT) {
+            fprintf(stderr, "STACK RECORD OVERFLOW!\n");
+            print_stack_traces(traces, contention_fp);
+        }
+    }
+#  else
     if (!ossl_assert(pthread_rwlock_rdlock(lock) == 0))
         return 0;
+#  endif
 # else
     if (pthread_mutex_lock(lock) != 0) {
         assert(errno != EDEADLK && errno != EBUSY);
@@ -638,8 +796,43 @@ __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
 {
 # ifdef USE_RWLOCK
+#  ifdef REPORT_RWLOCK_CONTENTION
+    struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
+
+    if (ossl_unlikely(traces == NULL)) {
+        traces = OPENSSL_zalloc(sizeof(struct stack_traces));
+        CRYPTO_THREAD_set_local(&thread_contention_data, traces);
+        if (ossl_unlikely(traces == NULL))
+            return 0;
+    }
+
+    traces->lock_depth++;
+    if (pthread_rwlock_trywrlock(lock)) {
+        void *buffer[BT_BUF_SIZE];
+        OSSL_TIME start, end;
+
+        start = ossl_time_now();
+        if (!ossl_assert(pthread_rwlock_wrlock(lock) == 0)) {
+            traces->lock_depth--;
+            return 0;
+        }
+        end = ossl_time_now();
+        traces->stacks[traces->idx].nptrs = backtrace(buffer, BT_BUF_SIZE);
+        traces->stacks[traces->idx].strings = backtrace_symbols(buffer,
+                                                                traces->stacks[traces->idx].nptrs);
+        traces->stacks[traces->idx].duration = ossl_time_subtract(end, start);
+        traces->stacks[traces->idx].start = start;
+        traces->stacks[traces->idx].write = 1;
+        traces->idx++;
+        if (traces->idx >= STACKS_COUNT) {
+            fprintf(stderr, "STACK RECORD OVERFLOW!\n");
+            print_stack_traces(traces, contention_fp);
+        }
+    }
+#  else
     if (!ossl_assert(pthread_rwlock_wrlock(lock) == 0))
         return 0;
+#  endif
 # else
     if (pthread_mutex_lock(lock) != 0) {
         assert(errno != EDEADLK && errno != EBUSY);
@@ -655,6 +848,18 @@ int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
 # ifdef USE_RWLOCK
     if (pthread_rwlock_unlock(lock) != 0)
         return 0;
+#  ifdef REPORT_RWLOCK_CONTENTION
+    {
+        struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
+
+        if (contention_fp != NULL && traces != NULL) {
+            traces->lock_depth--;
+            assert(traces->lock_depth >= 0);
+            if (traces->lock_depth == 0)
+                print_stack_traces(traces, contention_fp);
+        }
+    }
+#  endif
 # else
     if (pthread_mutex_unlock(lock) != 0) {
         assert(errno != EPERM);
@@ -669,6 +874,20 @@ void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
 {
     if (lock == NULL)
         return;
+# ifdef REPORT_RWLOCK_CONTENTION
+
+    /*
+     * Note: It's possible here that OpenSSL may allocate a lock and immediately
+     * free it, in which case we would erroneously close the contention log
+     * prior to the library going on to do more real work.  In practice
+     * that never happens though, and since this is a debug facility
+     * we don't worry about that here.
+     */
+    if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) {
+        fclose(contention_fp);
+        contention_fp = NULL;
+    }
+# endif
 
 # ifdef USE_RWLOCK
     pthread_rwlock_destroy(lock);