]> git.ipfire.org Git - thirdparty/openssl.git/commitdiff
crypto/threads_pthread: rewrite contention data storage to per-tid
authorEugene Syromiatnikov <esyr@openssl.org>
Mon, 7 Jul 2025 14:03:09 +0000 (16:03 +0200)
committerNeil Horman <nhorman@openssl.org>
Thu, 7 Aug 2025 15:02:51 +0000 (11:02 -0400)
This eliminates locking during writing out of the lock contation report
data, which claws back some of the lost performance degradation imposed
by the lock contention reporting instrumentation:

    [Without -DREPORT_RWLOCK_CONTENTION]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 4.502162us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 8.224920us

    [Before]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 13.079795us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 23.420235us

    [After]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 6.557428us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 13.415148us

The downside is that it produces a file for each TID, which floods
the working directory with debug files, but that mich be an acceptable
trade-off.

Reviewed-by: Neil Horman <nhorman@openssl.org>
Reviewed-by: Saša Nedvědický <sashan@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/27983)

crypto/threads_pthread.c
util/analyze-contention-log.sh

index afdab335793a6f4ec7881b1598d614347e218fb1..e4d4f72dd8952efeb3b9379c9b79fef9742654e2 100644 (file)
 #include "internal/threads_common.h"
 #include "internal/rcu.h"
 #ifdef REPORT_RWLOCK_CONTENTION
+# include <fcntl.h>
 # include <stdbool.h>
+# include <sys/syscall.h>
+# include <sys/uio.h>
 # include "internal/time.h"
 #endif
 #include "rcu_internal.h"
@@ -611,17 +614,10 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
  * to the contention log file.  We do this because we want to avoid use
  * of the CRYPTO_THREAD api so as to prevent recursive blocking reports.
  */
-static FILE *contention_fp = NULL;
-static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT;
-static int rwlock_count = 0;
+static CRYPTO_ONCE init_contention_data_flag = CRYPTO_ONCE_STATIC_INIT;
 pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
 CRYPTO_THREAD_LOCAL thread_contention_data;
 
-static void destroy_contention_data(void *data)
-{
-    OPENSSL_free(data);
-}
-
 struct stack_info {
     unsigned int nptrs;
     int write;
@@ -633,22 +629,48 @@ struct stack_info {
 #  define STACKS_COUNT 32
 #  define BT_BUF_SIZE 1024
 struct stack_traces {
+    int fd;
     int lock_depth;
     size_t idx;
     struct stack_info stacks[STACKS_COUNT];
 };
 
+/* The glibc gettid() definition presents only since 2.30. */
+static ossl_inline pid_t get_tid(void)
+{
+    return syscall(SYS_gettid);
+}
+
 #  ifdef FIPS_MODULE
 #   define FIPS_SFX "-fips"
 #  else
 #   define FIPS_SFX ""
 #  endif
-static void init_contention_fp_once(void)
+static void *init_contention_data(void)
+{
+    struct stack_traces *traces;
+    char fname_fmt[] = "lock-contention-log" FIPS_SFX ".%d.txt";
+    char fname[sizeof(fname_fmt) + sizeof(int) * 3];
+
+    traces = OPENSSL_zalloc(sizeof(struct stack_traces));
+
+    snprintf(fname, sizeof(fname), fname_fmt, get_tid());
+
+    traces->fd = open(fname, O_WRONLY | O_APPEND | O_CLOEXEC | O_CREAT, 0600);
+
+    return traces;
+}
+
+static void destroy_contention_data(void *data)
 {
-    contention_fp = fopen("lock-contention-log" FIPS_SFX ".txt", "w");
-    if (contention_fp == NULL)
-        fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n");
+    struct stack_traces *st = data;
+
+    close(st->fd);
+    OPENSSL_free(data);
+}
 
+static void init_contention_data_once(void)
+{
     /*
      * Create a thread local key here to store our list of stack traces
      * to be printed when we unlock the lock we are holding
@@ -662,56 +684,56 @@ static struct stack_traces *get_stack_traces(bool init)
     struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
 
     if (!traces && init) {
-        traces = OPENSSL_zalloc(sizeof(*traces));
+        traces = init_contention_data();
         CRYPTO_THREAD_set_local(&thread_contention_data, traces);
     }
 
     return traces;
 }
 
-static void print_stack_traces(struct stack_traces *traces, FILE *fptr)
+static void print_stack_traces(struct stack_traces *traces)
 {
     unsigned int j;
+    struct iovec *iov;
+    int iovcnt;
 
-    pthread_mutex_lock(&log_lock);
     while (traces != NULL && traces->idx >= 1) {
         traces->idx--;
-        fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n",
+        dprintf(traces->fd,
+                "lock blocked on %s for %zu usec at time %zu tid %d\n",
                 traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ",
                 ossl_time2us(traces->stacks[traces->idx].duration),
                 ossl_time2us(traces->stacks[traces->idx].start),
-                gettid());
+                get_tid());
         if (traces->stacks[traces->idx].strings != NULL) {
-            for (j = 0; j < traces->stacks[traces->idx].nptrs; j++)
-                fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]);
-            free(traces->stacks[traces->idx].strings);
+            static const char lf = '\n';
+
+            iovcnt = traces->stacks[traces->idx].nptrs * 2 + 1;
+            iov = alloca(iovcnt * sizeof(*iov));
+            for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) {
+                iov[2 * j].iov_base = traces->stacks[traces->idx].strings[j];
+                iov[2 * j].iov_len = strlen(traces->stacks[traces->idx].strings[j]);
+                iov[2 * j + 1].iov_base = (char *) &lf;
+                iov[2 * j + 1].iov_len = 1;
+            }
+            iov[traces->stacks[traces->idx].nptrs * 2].iov_base = (char *) &lf;
+            iov[traces->stacks[traces->idx].nptrs * 2].iov_len = 1;
         } else {
-            fprintf(fptr, "No stack trace available\n");
+            static const char no_bt[] = "No stack trace available\n\n";
+
+            iovcnt = 1;
+            iov = alloca(iovcnt * sizeof(*iov));
+            iov[0].iov_base = (char *) no_bt;
+            iov[0].iov_len = sizeof(no_bt) - 1;
         }
-        fprintf(contention_fp, "\n");
+        writev(traces->fd, iov, iovcnt);
+        free(traces->stacks[traces->idx].strings);
     }
-    pthread_mutex_unlock(&log_lock);
 }
 
 static ossl_inline void ossl_init_rwlock_contention_data(void)
 {
-    CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once);
-    __atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL);
-}
-
-static ossl_inline void ossl_free_rwlock_contention_data(void)
-{
-    /*
-     * Note: It's possible here that OpenSSL may allocate a lock and immediately
-     * free it, in which case we would erroneously close the contention log
-     * prior to the library going on to do more real work.  In practice
-     * that never happens though, and since this is a debug facility
-     * we don't worry about that here.
-     */
-    if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) {
-        fclose(contention_fp);
-        contention_fp = NULL;
-    }
+    CRYPTO_THREAD_run_once(&init_contention_data_flag, init_contention_data_once);
 }
 
 static int record_lock_contention(pthread_rwlock_t *lock,
@@ -735,7 +757,7 @@ static int record_lock_contention(pthread_rwlock_t *lock,
     traces->idx++;
     if (traces->idx >= STACKS_COUNT) {
         fprintf(stderr, "STACK RECORD OVERFLOW!\n");
-        print_stack_traces(traces, contention_fp);
+        print_stack_traces(traces);
     }
 
     return 0;
@@ -792,11 +814,11 @@ static ossl_inline int ossl_rwlock_unlock(pthread_rwlock_t *lock)
     {
         struct stack_traces *traces = get_stack_traces(false);
 
-        if (contention_fp != NULL && traces != NULL) {
+        if (traces != NULL) {
             traces->lock_depth--;
             assert(traces->lock_depth >= 0);
             if (traces->lock_depth == 0)
-                print_stack_traces(traces, contention_fp);
+                print_stack_traces(traces);
         }
     }
 
@@ -809,10 +831,6 @@ static ossl_inline void ossl_init_rwlock_contention_data(void)
 {
 }
 
-static ossl_inline void ossl_free_rwlock_contention_data(void)
-{
-}
-
 static ossl_inline int ossl_rwlock_rdlock(pthread_rwlock_t *rwlock)
 {
     return pthread_rwlock_rdlock(rwlock);
@@ -927,7 +945,6 @@ void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
         return;
 
 # ifdef USE_RWLOCK
-    ossl_free_rwlock_contention_data();
     pthread_rwlock_destroy(lock);
 # else
     pthread_mutex_destroy(lock);
index b8cd6d0f77437c9e003bf5eafaed45c3a65ab9f2..a107ed04b2162a310a18cbaf1f54f8790dec60b5 100755 (executable)
 #
 TEMPDIR=$(mktemp -d /tmp/contention.XXXXXX)
 
-LOGFILE=$1
 trap "rm -rf $TEMPDIR" EXIT
 
-if [ ! -f $LOGFILE ]
-then
-    echo "No log file found"
-    exit 1
-fi
-LOGFILEBASE=$(basename $LOGFILE)
-
 echo "Splitting files"
 
 #
 #start by splitting the log into separate stack traces
 #
-mkdir $TEMPDIR/individual_files
-cp $LOGFILE $TEMPDIR/individual_files/$LOGFILEBASE
+mkdir "$TEMPDIR/individual_files"
+cat "$@" > "$TEMPDIR/individual_files/log"
 pushd $TEMPDIR/individual_files/ > /dev/null
 awk '
     BEGIN {RS = ""; FS = "\n"}
-    {file_num++; print > ("stacktrace" file_num ".txt")}' ./$LOGFILEBASE
+    {file_num++; print > ("stacktrace" file_num ".txt")}' ./log
 popd > /dev/null
-rm -f $TEMPDIR/individual_files/$LOGFILEBASE
+rm -f $TEMPDIR/individual_files/log
 
 #
 # Make some associative arrays to track our stats