From 16beec98d26644b96d57bd8da477166d0bc7d05c Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Fri, 28 Jun 2024 08:16:10 +0300 Subject: [PATCH] threads_win: fix build error with VS2010 x86 InterlockedAnd64 and InterlockedAdd64 are not available on VS2010 x86. We already have implemented replacements for other functions, such as InterlockedOr64. Apply the same approach to fix the errors. A CRYPTO_RWLOCK rw_lock is added to rcu_lock_st. Replace InterlockedOr64 and InterlockedOr with CRYPTO_atomic_load and CRYPTO_atomic_load_int, using the existing design pattern. Add documentation and tests for the new atomic functions CRYPTO_atomic_add64, CRYPTO_atomic_and Fixes: libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAdd64 referenced in function _get_hold_current_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr referenced in function _get_hold_current_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAnd64 referenced in function _update_qp libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr64 referenced in function _ossl_synchronize_rcu Signed-off-by: Georgi Valkov Reviewed-by: Neil Horman Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/24405) --- crypto/threads_none.c | 18 ++++++++ crypto/threads_pthread.c | 52 +++++++++++++++++++++ crypto/threads_win.c | 72 +++++++++++++++++++++++++---- doc/man3/CRYPTO_THREAD_run_once.pod | 27 ++++++++++- include/openssl/crypto.h.in | 4 ++ test/threadstest.c | 46 ++++++++++++++++++ util/libcrypto.num | 2 + 7 files changed, 210 insertions(+), 11 deletions(-) diff --git a/crypto/threads_none.c b/crypto/threads_none.c index c2e6173bcac..75069792609 100644 --- a/crypto/threads_none.c +++ b/crypto/threads_none.c @@ -211,6 +211,24 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ + *val += op; + *ret = *val; + + return 1; +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ + *val &= op; + *ret = *val; + + return 1; +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/crypto/threads_pthread.c b/crypto/threads_pthread.c index 0628db542f6..7b545999478 100644 --- a/crypto/threads_pthread.c +++ b/crypto/threads_pthread.c @@ -872,6 +872,58 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +# if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) + if (__atomic_is_lock_free(sizeof(*val), val)) { + *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL); + return 1; + } +# elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) + /* This will work for all future Solaris versions. */ + if (ret != NULL) { + *ret = atomic_add_64_nv(val, op); + return 1; + } +# endif + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val += op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +# if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) + if (__atomic_is_lock_free(sizeof(*val), val)) { + *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL); + return 1; + } +# elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) + /* This will work for all future Solaris versions. */ + if (ret != NULL) { + *ret = atomic_and_64_nv(val, op); + return 1; + } +# endif + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val &= op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/crypto/threads_win.c b/crypto/threads_win.c index 5ee0751b041..c96583f2a8e 100644 --- a/crypto/threads_win.c +++ b/crypto/threads_win.c @@ -103,6 +103,7 @@ struct rcu_lock_st { CRYPTO_CONDVAR *alloc_signal; CRYPTO_MUTEX *prior_lock; CRYPTO_CONDVAR *prior_signal; + CRYPTO_RWLOCK *rw_lock; }; static struct rcu_qp *allocate_new_qp_group(struct rcu_lock_st *lock, @@ -132,6 +133,7 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) return NULL; new->ctx = ctx; + new->rw_lock = CRYPTO_THREAD_lock_new(); new->write_lock = ossl_crypto_mutex_new(); new->alloc_signal = ossl_crypto_condvar_new(); new->prior_signal = ossl_crypto_condvar_new(); @@ -143,7 +145,9 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) || new->prior_signal == NULL || new->write_lock == NULL || new->alloc_lock == NULL - || new->prior_lock == NULL) { + || new->prior_lock == NULL + || new->rw_lock == NULL) { + CRYPTO_THREAD_lock_free(new->rw_lock); OPENSSL_free(new->qp_group); ossl_crypto_condvar_free(&new->alloc_signal); ossl_crypto_condvar_free(&new->prior_signal); @@ -159,6 +163,7 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) { + CRYPTO_THREAD_lock_free(lock->rw_lock); OPENSSL_free(lock->qp_group); ossl_crypto_condvar_free(&lock->alloc_signal); ossl_crypto_condvar_free(&lock->prior_signal); @@ -171,14 +176,20 @@ void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) static ossl_inline struct rcu_qp *get_hold_current_qp(CRYPTO_RCU_LOCK *lock) { uint32_t qp_idx; + uint32_t tmp; + uint64_t tmp64; /* get the current qp index */ for (;;) { - qp_idx = InterlockedOr(&lock->reader_idx, 0); - InterlockedAdd64(&lock->qp_group[qp_idx].users, VAL_READER); - if (qp_idx == InterlockedOr(&lock->reader_idx, 0)) + CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&qp_idx, + lock->rw_lock); + CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, VAL_READER, &tmp64, + lock->rw_lock); + CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&tmp, lock->rw_lock); + if (qp_idx == tmp) break; - InterlockedAdd64(&lock->qp_group[qp_idx].users, -VAL_READER); + CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, -VAL_READER, &tmp64, + lock->rw_lock); } return &lock->qp_group[qp_idx]; @@ -254,7 +265,9 @@ void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) if (data->thread_qps[i].lock == lock) { data->thread_qps[i].depth--; if (data->thread_qps[i].depth == 0) { - ret = InterlockedAdd64(&data->thread_qps[i].qp->users, -VAL_READER); + CRYPTO_atomic_add64(&data->thread_qps[i].qp->users, + -VAL_READER, (uint64_t *)&ret, + lock->rw_lock); OPENSSL_assert(ret >= 0); data->thread_qps[i].qp = NULL; data->thread_qps[i].lock = NULL; @@ -269,6 +282,7 @@ static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) uint64_t new_id; uint32_t current_idx; uint32_t tmp; + uint64_t tmp64; ossl_crypto_mutex_lock(lock->alloc_lock); /* @@ -292,8 +306,10 @@ static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) lock->id_ctr++; new_id = VAL_ID(new_id); - InterlockedAnd64(&lock->qp_group[current_idx].users, ID_MASK); - InterlockedAdd64(&lock->qp_group[current_idx].users, new_id); + CRYPTO_atomic_and(&lock->qp_group[current_idx].users, ID_MASK, &tmp64, + lock->rw_lock); + CRYPTO_atomic_add64(&lock->qp_group[current_idx].users, new_id, &tmp64, + lock->rw_lock); /* update the reader index to be the prior qp */ tmp = lock->current_alloc_idx; @@ -328,7 +344,7 @@ void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) /* wait for the reader count to reach zero */ do { - count = InterlockedOr64(&qp->users, 0); + CRYPTO_atomic_load(&qp->users, &count, lock->rw_lock); } while (READER_COUNT(count) != 0); /* retire in order */ @@ -559,6 +575,44 @@ int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) return 1; } +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +#if (defined(NO_INTERLOCKEDOR64)) + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val += op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +#else + *ret = (uint64_t)InterlockedAdd64((LONG64 volatile *)val, (LONG64)op); + return 1; +#endif +} + +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock) +{ +#if (defined(NO_INTERLOCKEDOR64)) + if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) + return 0; + *val &= op; + *ret = *val; + + if (!CRYPTO_THREAD_unlock(lock)) + return 0; + + return 1; +#else + *ret = (uint64_t)InterlockedAnd64((LONG64 volatile *)val, (LONG64)op) & op; + return 1; +#endif +} + int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock) { diff --git a/doc/man3/CRYPTO_THREAD_run_once.pod b/doc/man3/CRYPTO_THREAD_run_once.pod index 2b0d0675ab0..e26f406f121 100644 --- a/doc/man3/CRYPTO_THREAD_run_once.pod +++ b/doc/man3/CRYPTO_THREAD_run_once.pod @@ -5,8 +5,8 @@ CRYPTO_THREAD_run_once, CRYPTO_THREAD_lock_new, CRYPTO_THREAD_read_lock, CRYPTO_THREAD_write_lock, CRYPTO_THREAD_unlock, CRYPTO_THREAD_lock_free, -CRYPTO_atomic_add, CRYPTO_atomic_or, CRYPTO_atomic_load, CRYPTO_atomic_store, -CRYPTO_atomic_load_int, +CRYPTO_atomic_add, CRYPTO_atomic_add64, CRYPTO_atomic_and, CRYPTO_atomic_or, +CRYPTO_atomic_load, CRYPTO_atomic_store, CRYPTO_atomic_load_int, OSSL_set_max_threads, OSSL_get_max_threads, OSSL_get_thread_support_flags, OSSL_THREAD_SUPPORT_FLAG_THREAD_POOL, OSSL_THREAD_SUPPORT_FLAG_DEFAULT_SPAWN - OpenSSL thread support @@ -25,6 +25,10 @@ OSSL_THREAD_SUPPORT_FLAG_DEFAULT_SPAWN - OpenSSL thread support void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); + int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); + int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock); int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); @@ -95,6 +99,25 @@ supported and I is NULL, then the function will fail. =item * +CRYPTO_atomic_add64() atomically adds I to I<*val> and returns the +result of the operation in I<*ret>. I will be locked, unless atomic +operations are supported on the specific platform. Because of this, if a +variable is modified by CRYPTO_atomic_add64() then CRYPTO_atomic_add64() must +be the only way that the variable is modified. If atomic operations are not +supported and I is NULL, then the function will fail. + +=item * + +CRYPTO_atomic_and() performs an atomic bitwise and of I and I<*val> and stores +the result back in I<*val>. It also returns the result of the operation in +I<*ret>. I will be locked, unless atomic operations are supported on the +specific platform. Because of this, if a variable is modified by +CRYPTO_atomic_and() or read by CRYPTO_atomic_load() then CRYPTO_atomic_and() must +be the only way that the variable is modified. If atomic operations are not +supported and I is NULL, then the function will fail. + +=item * + CRYPTO_atomic_or() performs an atomic bitwise or of I and I<*val> and stores the result back in I<*val>. It also returns the result of the operation in I<*ret>. I will be locked, unless atomic operations are supported on the diff --git a/include/openssl/crypto.h.in b/include/openssl/crypto.h.in index ef87f9754d9..1bc8ae4cce0 100644 --- a/include/openssl/crypto.h.in +++ b/include/openssl/crypto.h.in @@ -86,6 +86,10 @@ int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock); void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, CRYPTO_RWLOCK *lock); int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); diff --git a/test/threadstest.c b/test/threadstest.c index b2b1ec2555a..85d5e114950 100644 --- a/test/threadstest.c +++ b/test/threadstest.c @@ -654,6 +654,52 @@ static int test_atomic(void) || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) goto err; + ret64 = 0; + + if (CRYPTO_atomic_and(&val64, 5, &ret64, NULL)) { + /* This succeeds therefore we're on a platform with lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + } else { + /* This failed therefore we're on a platform without lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_int_eq((unsigned int)ret64, 0)) + goto err; + } + val64 = 3; + ret64 = 0; + + if (!TEST_true(CRYPTO_atomic_and(&val64, 5, &ret64, lock))) + goto err; + + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + + ret64 = 0; + + if (CRYPTO_atomic_add64(&val64, 2, &ret64, NULL)) { + /* This succeeds therefore we're on a platform with lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + } else { + /* This failed therefore we're on a platform without lockless atomics */ + if (!TEST_uint_eq((unsigned int)val64, 1) + || !TEST_int_eq((unsigned int)ret64, 0)) + goto err; + } + val64 = 1; + ret64 = 0; + + if (!TEST_true(CRYPTO_atomic_add64(&val64, 2, &ret64, lock))) + goto err; + + if (!TEST_uint_eq((unsigned int)val64, 3) + || !TEST_uint_eq((unsigned int)val64, (unsigned int)ret64)) + goto err; + testresult = 1; err: CRYPTO_THREAD_lock_free(lock); diff --git a/util/libcrypto.num b/util/libcrypto.num index 2c9292d4abe..8dc578670b0 100644 --- a/util/libcrypto.num +++ b/util/libcrypto.num @@ -5688,3 +5688,5 @@ i2d_ATTRIBUTES_SYNTAX ? 3_4_0 EXIST::FUNCTION: ATTRIBUTES_SYNTAX_free ? 3_4_0 EXIST::FUNCTION: ATTRIBUTES_SYNTAX_new ? 3_4_0 EXIST::FUNCTION: ATTRIBUTES_SYNTAX_it ? 3_4_0 EXIST::FUNCTION: +CRYPTO_atomic_add64 ? 3_4_0 EXIST::FUNCTION: +CRYPTO_atomic_and ? 3_4_0 EXIST::FUNCTION: -- 2.47.2