From: Eugene Syromiatnikov Date: Mon, 7 Jul 2025 14:03:09 +0000 (+0200) Subject: crypto/threads_pthread: rewrite contention data storage to per-tid X-Git-Tag: openssl-3.6.0-alpha1~253 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=99d0d23e05b0011c25b31b2b0e8254f544163a92;p=thirdparty%2Fopenssl.git crypto/threads_pthread: rewrite contention data storage to per-tid This eliminates locking during writing out of the lock contation report data, which claws back some of the lost performance degradation imposed by the lock contention reporting instrumentation: [Without -DREPORT_RWLOCK_CONTENTION] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 4.502162us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 8.224920us [Before] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 13.079795us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 23.420235us [After] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 6.557428us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 13.415148us The downside is that it produces a file for each TID, which floods the working directory with debug files, but that mich be an acceptable trade-off. Reviewed-by: Neil Horman Reviewed-by: Saša Nedvědický (Merged from https://github.com/openssl/openssl/pull/27983) --- diff --git a/crypto/threads_pthread.c b/crypto/threads_pthread.c index afdab335793..e4d4f72dd89 100644 --- a/crypto/threads_pthread.c +++ b/crypto/threads_pthread.c @@ -38,7 +38,10 @@ #include "internal/threads_common.h" #include "internal/rcu.h" #ifdef REPORT_RWLOCK_CONTENTION +# include # include +# include +# include # include "internal/time.h" #endif #include "rcu_internal.h" @@ -611,17 +614,10 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) * to the contention log file. We do this because we want to avoid use * of the CRYPTO_THREAD api so as to prevent recursive blocking reports. */ -static FILE *contention_fp = NULL; -static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT; -static int rwlock_count = 0; +static CRYPTO_ONCE init_contention_data_flag = CRYPTO_ONCE_STATIC_INIT; pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER; CRYPTO_THREAD_LOCAL thread_contention_data; -static void destroy_contention_data(void *data) -{ - OPENSSL_free(data); -} - struct stack_info { unsigned int nptrs; int write; @@ -633,22 +629,48 @@ struct stack_info { # define STACKS_COUNT 32 # define BT_BUF_SIZE 1024 struct stack_traces { + int fd; int lock_depth; size_t idx; struct stack_info stacks[STACKS_COUNT]; }; +/* The glibc gettid() definition presents only since 2.30. */ +static ossl_inline pid_t get_tid(void) +{ + return syscall(SYS_gettid); +} + # ifdef FIPS_MODULE # define FIPS_SFX "-fips" # else # define FIPS_SFX "" # endif -static void init_contention_fp_once(void) +static void *init_contention_data(void) +{ + struct stack_traces *traces; + char fname_fmt[] = "lock-contention-log" FIPS_SFX ".%d.txt"; + char fname[sizeof(fname_fmt) + sizeof(int) * 3]; + + traces = OPENSSL_zalloc(sizeof(struct stack_traces)); + + snprintf(fname, sizeof(fname), fname_fmt, get_tid()); + + traces->fd = open(fname, O_WRONLY | O_APPEND | O_CLOEXEC | O_CREAT, 0600); + + return traces; +} + +static void destroy_contention_data(void *data) { - contention_fp = fopen("lock-contention-log" FIPS_SFX ".txt", "w"); - if (contention_fp == NULL) - fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n"); + struct stack_traces *st = data; + + close(st->fd); + OPENSSL_free(data); +} +static void init_contention_data_once(void) +{ /* * Create a thread local key here to store our list of stack traces * to be printed when we unlock the lock we are holding @@ -662,56 +684,56 @@ static struct stack_traces *get_stack_traces(bool init) struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data); if (!traces && init) { - traces = OPENSSL_zalloc(sizeof(*traces)); + traces = init_contention_data(); CRYPTO_THREAD_set_local(&thread_contention_data, traces); } return traces; } -static void print_stack_traces(struct stack_traces *traces, FILE *fptr) +static void print_stack_traces(struct stack_traces *traces) { unsigned int j; + struct iovec *iov; + int iovcnt; - pthread_mutex_lock(&log_lock); while (traces != NULL && traces->idx >= 1) { traces->idx--; - fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n", + dprintf(traces->fd, + "lock blocked on %s for %zu usec at time %zu tid %d\n", traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ", ossl_time2us(traces->stacks[traces->idx].duration), ossl_time2us(traces->stacks[traces->idx].start), - gettid()); + get_tid()); if (traces->stacks[traces->idx].strings != NULL) { - for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) - fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]); - free(traces->stacks[traces->idx].strings); + static const char lf = '\n'; + + iovcnt = traces->stacks[traces->idx].nptrs * 2 + 1; + iov = alloca(iovcnt * sizeof(*iov)); + for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) { + iov[2 * j].iov_base = traces->stacks[traces->idx].strings[j]; + iov[2 * j].iov_len = strlen(traces->stacks[traces->idx].strings[j]); + iov[2 * j + 1].iov_base = (char *) &lf; + iov[2 * j + 1].iov_len = 1; + } + iov[traces->stacks[traces->idx].nptrs * 2].iov_base = (char *) &lf; + iov[traces->stacks[traces->idx].nptrs * 2].iov_len = 1; } else { - fprintf(fptr, "No stack trace available\n"); + static const char no_bt[] = "No stack trace available\n\n"; + + iovcnt = 1; + iov = alloca(iovcnt * sizeof(*iov)); + iov[0].iov_base = (char *) no_bt; + iov[0].iov_len = sizeof(no_bt) - 1; } - fprintf(contention_fp, "\n"); + writev(traces->fd, iov, iovcnt); + free(traces->stacks[traces->idx].strings); } - pthread_mutex_unlock(&log_lock); } static ossl_inline void ossl_init_rwlock_contention_data(void) { - CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once); - __atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL); -} - -static ossl_inline void ossl_free_rwlock_contention_data(void) -{ - /* - * Note: It's possible here that OpenSSL may allocate a lock and immediately - * free it, in which case we would erroneously close the contention log - * prior to the library going on to do more real work. In practice - * that never happens though, and since this is a debug facility - * we don't worry about that here. - */ - if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) { - fclose(contention_fp); - contention_fp = NULL; - } + CRYPTO_THREAD_run_once(&init_contention_data_flag, init_contention_data_once); } static int record_lock_contention(pthread_rwlock_t *lock, @@ -735,7 +757,7 @@ static int record_lock_contention(pthread_rwlock_t *lock, traces->idx++; if (traces->idx >= STACKS_COUNT) { fprintf(stderr, "STACK RECORD OVERFLOW!\n"); - print_stack_traces(traces, contention_fp); + print_stack_traces(traces); } return 0; @@ -792,11 +814,11 @@ static ossl_inline int ossl_rwlock_unlock(pthread_rwlock_t *lock) { struct stack_traces *traces = get_stack_traces(false); - if (contention_fp != NULL && traces != NULL) { + if (traces != NULL) { traces->lock_depth--; assert(traces->lock_depth >= 0); if (traces->lock_depth == 0) - print_stack_traces(traces, contention_fp); + print_stack_traces(traces); } } @@ -809,10 +831,6 @@ static ossl_inline void ossl_init_rwlock_contention_data(void) { } -static ossl_inline void ossl_free_rwlock_contention_data(void) -{ -} - static ossl_inline int ossl_rwlock_rdlock(pthread_rwlock_t *rwlock) { return pthread_rwlock_rdlock(rwlock); @@ -927,7 +945,6 @@ void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) return; # ifdef USE_RWLOCK - ossl_free_rwlock_contention_data(); pthread_rwlock_destroy(lock); # else pthread_mutex_destroy(lock); diff --git a/util/analyze-contention-log.sh b/util/analyze-contention-log.sh index b8cd6d0f774..a107ed04b21 100755 --- a/util/analyze-contention-log.sh +++ b/util/analyze-contention-log.sh @@ -15,29 +15,21 @@ # TEMPDIR=$(mktemp -d /tmp/contention.XXXXXX) -LOGFILE=$1 trap "rm -rf $TEMPDIR" EXIT -if [ ! -f $LOGFILE ] -then - echo "No log file found" - exit 1 -fi -LOGFILEBASE=$(basename $LOGFILE) - echo "Splitting files" # #start by splitting the log into separate stack traces # -mkdir $TEMPDIR/individual_files -cp $LOGFILE $TEMPDIR/individual_files/$LOGFILEBASE +mkdir "$TEMPDIR/individual_files" +cat "$@" > "$TEMPDIR/individual_files/log" pushd $TEMPDIR/individual_files/ > /dev/null awk ' BEGIN {RS = ""; FS = "\n"} - {file_num++; print > ("stacktrace" file_num ".txt")}' ./$LOGFILEBASE + {file_num++; print > ("stacktrace" file_num ".txt")}' ./log popd > /dev/null -rm -f $TEMPDIR/individual_files/$LOGFILEBASE +rm -f $TEMPDIR/individual_files/log # # Make some associative arrays to track our stats