]>
Commit | Line | Data |
---|---|---|
b1322259 | 1 | /* |
b6461792 | 2 | * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved. |
71a04cfc | 3 | * |
0e9725bc | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
b1322259 RS |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
71a04cfc AG |
8 | */ |
9 | ||
9750b4d3 RB |
10 | /* We need to use the OPENSSL_fork_*() deprecated APIs */ |
11 | #define OPENSSL_SUPPRESS_DEPRECATED | |
12 | ||
71a04cfc | 13 | #include <openssl/crypto.h> |
d0e1a0ae | 14 | #include <crypto/cryptlib.h> |
5f8dd0f8 | 15 | #include "internal/cryptlib.h" |
d0e1a0ae NH |
16 | #include "internal/rcu.h" |
17 | #include "rcu_internal.h" | |
71a04cfc | 18 | |
3bcac460 NH |
19 | #if defined(__clang__) && defined(__has_feature) |
20 | # if __has_feature(thread_sanitizer) | |
21 | # define __SANITIZE_THREAD__ | |
22 | # endif | |
23 | #endif | |
24 | ||
25 | #if defined(__SANITIZE_THREAD__) | |
26 | # include <sanitizer/tsan_interface.h> | |
27 | # define TSAN_FAKE_UNLOCK(x) __tsan_mutex_pre_unlock((x), 0); \ | |
28 | __tsan_mutex_post_unlock((x), 0) | |
29 | ||
30 | # define TSAN_FAKE_LOCK(x) __tsan_mutex_pre_lock((x), 0); \ | |
31 | __tsan_mutex_post_lock((x), 0, 0) | |
32 | #else | |
33 | # define TSAN_FAKE_UNLOCK(x) | |
34 | # define TSAN_FAKE_LOCK(x) | |
35 | #endif | |
36 | ||
d6dda392 VK |
37 | #if defined(__sun) |
38 | # include <atomic.h> | |
39 | #endif | |
40 | ||
d39de479 KK |
41 | #if defined(__apple_build_version__) && __apple_build_version__ < 6000000 |
42 | /* | |
43 | * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and | |
44 | * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free() | |
45 | * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))). | |
46 | * All of this makes impossible to use __atomic_is_lock_free here. | |
47 | * | |
48 | * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760 | |
49 | */ | |
81f39349 | 50 | # define BROKEN_CLANG_ATOMICS |
d39de479 KK |
51 | #endif |
52 | ||
71a04cfc AG |
53 | #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS) |
54 | ||
84952925 DMSP |
55 | # if defined(OPENSSL_SYS_UNIX) |
56 | # include <sys/types.h> | |
57 | # include <unistd.h> | |
81f39349 | 58 | # endif |
84952925 | 59 | |
0d407456 RB |
60 | # include <assert.h> |
61 | ||
ec93a292 DK |
62 | # ifdef PTHREAD_RWLOCK_INITIALIZER |
63 | # define USE_RWLOCK | |
64 | # endif | |
2accf3f7 | 65 | |
a02077d4 RL |
66 | /* |
67 | * For all GNU/clang atomic builtins, we also need fallbacks, to cover all | |
68 | * other compilers. | |
69 | ||
70 | * Unfortunately, we can't do that with some "generic type", because there's no | |
71 | * guarantee that the chosen generic type is large enough to cover all cases. | |
72 | * Therefore, we implement fallbacks for each applicable type, with composed | |
73 | * names that include the type they handle. | |
74 | * | |
75 | * (an anecdote: we previously tried to use |void *| as the generic type, with | |
76 | * the thought that the pointer itself is the largest type. However, this is | |
77 | * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large) | |
78 | * | |
79 | * All applicable ATOMIC_ macros take the intended type as first parameter, so | |
80 | * they can map to the correct fallback function. In the GNU/clang case, that | |
81 | * parameter is simply ignored. | |
82 | */ | |
83 | ||
84 | /* | |
85 | * Internal types used with the ATOMIC_ macros, to make it possible to compose | |
86 | * fallback function names. | |
87 | */ | |
88 | typedef void *pvoid; | |
89 | typedef struct rcu_cb_item *prcu_cb_item; | |
90 | ||
91 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \ | |
92 | && !defined(USE_ATOMIC_FALLBACKS) | |
81f39349 | 93 | # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) |
f5b5a35c | 94 | /* |
a02077d4 RL |
95 | * For pointers, Apple M1 virtualized cpu seems to have some problem using the |
96 | * ldapr instruction (see https://github.com/openssl/openssl/pull/23974) | |
f5b5a35c NH |
97 | * When using the native apple clang compiler, this instruction is emitted for |
98 | * atomic loads, which is bad. So, if | |
99 | * 1) We are building on a target that defines __APPLE__ AND | |
100 | * 2) We are building on a target using clang (__clang__) AND | |
101 | * 3) We are building for an M1 processor (__aarch64__) | |
5454ef7c LU |
102 | * Then we should not use __atomic_load_n and instead implement our own |
103 | * function to issue the ldar instruction instead, which produces the proper | |
f5b5a35c NH |
104 | * sequencing guarantees |
105 | */ | |
a02077d4 RL |
106 | static inline void *apple_atomic_load_n_pvoid(void **p, |
107 | ossl_unused int memorder) | |
f5b5a35c NH |
108 | { |
109 | void *ret; | |
110 | ||
111 | __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):); | |
112 | ||
113 | return ret; | |
114 | } | |
115 | ||
a02077d4 RL |
116 | /* For uint64_t, we should be fine, though */ |
117 | # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o) | |
118 | ||
119 | # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o) | |
f5b5a35c | 120 | # else |
a02077d4 | 121 | # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o) |
f5b5a35c | 122 | # endif |
a02077d4 RL |
123 | # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o) |
124 | # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o) | |
125 | # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o) | |
81f39349 RL |
126 | # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o) |
127 | # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o) | |
128 | # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o) | |
129 | # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o) | |
130 | # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o) | |
131 | # else | |
d0e1a0ae NH |
132 | static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER; |
133 | ||
a02077d4 RL |
134 | # define IMPL_fallback_atomic_load_n(t) \ |
135 | static inline t fallback_atomic_load_n_##t(t *p) \ | |
136 | { \ | |
137 | t ret; \ | |
138 | \ | |
139 | pthread_mutex_lock(&atomic_sim_lock); \ | |
140 | ret = *p; \ | |
141 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
142 | return ret; \ | |
143 | } | |
144 | IMPL_fallback_atomic_load_n(uint64_t) | |
145 | IMPL_fallback_atomic_load_n(pvoid) | |
146 | ||
147 | # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p) | |
148 | ||
149 | # define IMPL_fallback_atomic_store_n(t) \ | |
150 | static inline t fallback_atomic_store_n_##t(t *p, t v) \ | |
151 | { \ | |
152 | t ret; \ | |
153 | \ | |
154 | pthread_mutex_lock(&atomic_sim_lock); \ | |
155 | ret = *p; \ | |
156 | *p = v; \ | |
157 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
158 | return ret; \ | |
159 | } | |
160 | IMPL_fallback_atomic_store_n(uint64_t) | |
d0e1a0ae | 161 | |
a02077d4 | 162 | # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v) |
d0e1a0ae | 163 | |
a02077d4 RL |
164 | # define IMPL_fallback_atomic_store(t) \ |
165 | static inline void fallback_atomic_store_##t(t *p, t *v) \ | |
166 | { \ | |
167 | pthread_mutex_lock(&atomic_sim_lock); \ | |
168 | *p = *v; \ | |
169 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
170 | } | |
171 | IMPL_fallback_atomic_store(uint64_t) | |
172 | IMPL_fallback_atomic_store(pvoid) | |
173 | ||
174 | # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v) | |
175 | ||
176 | # define IMPL_fallback_atomic_exchange_n(t) \ | |
177 | static inline t fallback_atomic_exchange_n_##t(t *p, t v) \ | |
178 | { \ | |
179 | t ret; \ | |
180 | \ | |
181 | pthread_mutex_lock(&atomic_sim_lock); \ | |
182 | ret = *p; \ | |
183 | *p = v; \ | |
184 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
185 | return ret; \ | |
186 | } | |
187 | IMPL_fallback_atomic_exchange_n(uint64_t) | |
188 | IMPL_fallback_atomic_exchange_n(prcu_cb_item) | |
d0e1a0ae | 189 | |
a02077d4 | 190 | # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v) |
d0e1a0ae | 191 | |
a02077d4 RL |
192 | /* |
193 | * The fallbacks that follow don't need any per type implementation, as | |
194 | * they are designed for uint64_t only. If there comes a time when multiple | |
195 | * types need to be covered, it's relatively easy to refactor them the same | |
196 | * way as the fallbacks above. | |
197 | */ | |
d0e1a0ae NH |
198 | |
199 | static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v) | |
200 | { | |
201 | uint64_t ret; | |
202 | ||
203 | pthread_mutex_lock(&atomic_sim_lock); | |
204 | *p += v; | |
205 | ret = *p; | |
206 | pthread_mutex_unlock(&atomic_sim_lock); | |
207 | return ret; | |
208 | } | |
209 | ||
81f39349 | 210 | # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v) |
d0e1a0ae NH |
211 | |
212 | static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v) | |
213 | { | |
214 | uint64_t ret; | |
215 | ||
216 | pthread_mutex_lock(&atomic_sim_lock); | |
217 | ret = *p; | |
218 | *p += v; | |
219 | pthread_mutex_unlock(&atomic_sim_lock); | |
220 | return ret; | |
221 | } | |
222 | ||
81f39349 | 223 | # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v) |
d0e1a0ae NH |
224 | |
225 | static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v) | |
226 | { | |
227 | uint64_t ret; | |
228 | ||
229 | pthread_mutex_lock(&atomic_sim_lock); | |
230 | *p -= v; | |
231 | ret = *p; | |
232 | pthread_mutex_unlock(&atomic_sim_lock); | |
233 | return ret; | |
234 | } | |
235 | ||
81f39349 | 236 | # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v) |
d0e1a0ae NH |
237 | |
238 | static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m) | |
239 | { | |
240 | uint64_t ret; | |
241 | ||
242 | pthread_mutex_lock(&atomic_sim_lock); | |
243 | *p &= m; | |
244 | ret = *p; | |
245 | pthread_mutex_unlock(&atomic_sim_lock); | |
246 | return ret; | |
247 | } | |
248 | ||
81f39349 | 249 | # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v) |
d0e1a0ae NH |
250 | |
251 | static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m) | |
252 | { | |
253 | uint64_t ret; | |
254 | ||
255 | pthread_mutex_lock(&atomic_sim_lock); | |
256 | *p |= m; | |
257 | ret = *p; | |
258 | pthread_mutex_unlock(&atomic_sim_lock); | |
259 | return ret; | |
260 | } | |
261 | ||
81f39349 RL |
262 | # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v) |
263 | # endif | |
d0e1a0ae | 264 | |
d0e1a0ae NH |
265 | /* |
266 | * users is broken up into 2 parts | |
267 | * bits 0-15 current readers | |
268 | * bit 32-63 - ID | |
269 | */ | |
270 | # define READER_SHIFT 0 | |
271 | # define ID_SHIFT 32 | |
272 | # define READER_SIZE 16 | |
273 | # define ID_SIZE 32 | |
274 | ||
275 | # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1) | |
276 | # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1) | |
277 | # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK) | |
278 | # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK) | |
279 | # define VAL_READER ((uint64_t)1 << READER_SHIFT) | |
280 | # define VAL_ID(x) ((uint64_t)x << ID_SHIFT) | |
281 | ||
282 | /* | |
283 | * This is the core of an rcu lock. It tracks the readers and writers for the | |
284 | * current quiescence point for a given lock. Users is the 64 bit value that | |
285 | * stores the READERS/ID as defined above | |
286 | * | |
287 | */ | |
288 | struct rcu_qp { | |
289 | uint64_t users; | |
290 | }; | |
291 | ||
292 | struct thread_qp { | |
293 | struct rcu_qp *qp; | |
294 | unsigned int depth; | |
295 | CRYPTO_RCU_LOCK *lock; | |
296 | }; | |
297 | ||
81f39349 | 298 | # define MAX_QPS 10 |
d0e1a0ae NH |
299 | /* |
300 | * This is the per thread tracking data | |
301 | * that is assigned to each thread participating | |
302 | * in an rcu qp | |
303 | * | |
304 | * qp points to the qp that it last acquired | |
305 | * | |
306 | */ | |
307 | struct rcu_thr_data { | |
308 | struct thread_qp thread_qps[MAX_QPS]; | |
309 | }; | |
310 | ||
311 | /* | |
312 | * This is the internal version of a CRYPTO_RCU_LOCK | |
313 | * it is cast from CRYPTO_RCU_LOCK | |
314 | */ | |
315 | struct rcu_lock_st { | |
316 | /* Callbacks to call for next ossl_synchronize_rcu */ | |
317 | struct rcu_cb_item *cb_items; | |
318 | ||
24d16d3a NH |
319 | /* The context we are being created against */ |
320 | OSSL_LIB_CTX *ctx; | |
321 | ||
d0e1a0ae NH |
322 | /* rcu generation counter for in-order retirement */ |
323 | uint32_t id_ctr; | |
324 | ||
325 | /* Array of quiescent points for synchronization */ | |
326 | struct rcu_qp *qp_group; | |
327 | ||
328 | /* Number of elements in qp_group array */ | |
329 | size_t group_count; | |
330 | ||
331 | /* Index of the current qp in the qp_group array */ | |
332 | uint64_t reader_idx; | |
333 | ||
334 | /* value of the next id_ctr value to be retired */ | |
335 | uint32_t next_to_retire; | |
336 | ||
337 | /* index of the next free rcu_qp in the qp_group */ | |
338 | uint64_t current_alloc_idx; | |
339 | ||
340 | /* number of qp's in qp_group array currently being retired */ | |
341 | uint32_t writers_alloced; | |
342 | ||
343 | /* lock protecting write side operations */ | |
344 | pthread_mutex_t write_lock; | |
345 | ||
346 | /* lock protecting updates to writers_alloced/current_alloc_idx */ | |
347 | pthread_mutex_t alloc_lock; | |
348 | ||
349 | /* signal to wake threads waiting on alloc_lock */ | |
350 | pthread_cond_t alloc_signal; | |
351 | ||
352 | /* lock to enforce in-order retirement */ | |
353 | pthread_mutex_t prior_lock; | |
354 | ||
355 | /* signal to wake threads waiting on prior_lock */ | |
356 | pthread_cond_t prior_signal; | |
357 | }; | |
358 | ||
d0e1a0ae NH |
359 | /* Read side acquisition of the current qp */ |
360 | static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) | |
361 | { | |
362 | uint64_t qp_idx; | |
363 | ||
364 | /* get the current qp index */ | |
365 | for (;;) { | |
366 | /* | |
367 | * Notes on use of __ATOMIC_ACQUIRE | |
368 | * We need to ensure the following: | |
369 | * 1) That subsequent operations aren't optimized by hoisting them above | |
370 | * this operation. Specifically, we don't want the below re-load of | |
371 | * qp_idx to get optimized away | |
372 | * 2) We want to ensure that any updating of reader_idx on the write side | |
373 | * of the lock is flushed from a local cpu cache so that we see any | |
374 | * updates prior to the load. This is a non-issue on cache coherent | |
375 | * systems like x86, but is relevant on other arches | |
376 | * Note: This applies to the reload below as well | |
377 | */ | |
a02077d4 | 378 | qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
379 | |
380 | /* | |
381 | * Notes of use of __ATOMIC_RELEASE | |
382 | * This counter is only read by the write side of the lock, and so we | |
383 | * specify __ATOMIC_RELEASE here to ensure that the write side of the | |
384 | * lock see this during the spin loop read of users, as it waits for the | |
385 | * reader count to approach zero | |
386 | */ | |
387 | ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
388 | __ATOMIC_RELEASE); | |
389 | ||
390 | /* if the idx hasn't changed, we're good, else try again */ | |
a02077d4 | 391 | if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE)) |
d0e1a0ae NH |
392 | break; |
393 | ||
394 | /* | |
395 | * Notes on use of __ATOMIC_RELEASE | |
396 | * As with the add above, we want to ensure that this decrement is | |
397 | * seen by the write side of the lock as soon as it happens to prevent | |
398 | * undue spinning waiting for write side completion | |
399 | */ | |
400 | ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
401 | __ATOMIC_RELEASE); | |
402 | } | |
403 | ||
404 | return &lock->qp_group[qp_idx]; | |
405 | } | |
406 | ||
24d16d3a NH |
407 | static void ossl_rcu_free_local_data(void *arg) |
408 | { | |
409 | OSSL_LIB_CTX *ctx = arg; | |
410 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx); | |
411 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); | |
412 | OPENSSL_free(data); | |
413 | } | |
414 | ||
d0e1a0ae NH |
415 | void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock) |
416 | { | |
417 | struct rcu_thr_data *data; | |
418 | int i, available_qp = -1; | |
24d16d3a | 419 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
d0e1a0ae NH |
420 | |
421 | /* | |
422 | * we're going to access current_qp here so ask the | |
423 | * processor to fetch it | |
424 | */ | |
24d16d3a | 425 | data = CRYPTO_THREAD_get_local(lkey); |
d0e1a0ae NH |
426 | |
427 | if (data == NULL) { | |
428 | data = OPENSSL_zalloc(sizeof(*data)); | |
429 | OPENSSL_assert(data != NULL); | |
24d16d3a NH |
430 | CRYPTO_THREAD_set_local(lkey, data); |
431 | ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data); | |
d0e1a0ae NH |
432 | } |
433 | ||
434 | for (i = 0; i < MAX_QPS; i++) { | |
435 | if (data->thread_qps[i].qp == NULL && available_qp == -1) | |
436 | available_qp = i; | |
437 | /* If we have a hold on this lock already, we're good */ | |
438 | if (data->thread_qps[i].lock == lock) { | |
439 | data->thread_qps[i].depth++; | |
440 | return; | |
441 | } | |
442 | } | |
443 | ||
444 | /* | |
445 | * if we get here, then we don't have a hold on this lock yet | |
446 | */ | |
447 | assert(available_qp != -1); | |
448 | ||
449 | data->thread_qps[available_qp].qp = get_hold_current_qp(lock); | |
450 | data->thread_qps[available_qp].depth = 1; | |
451 | data->thread_qps[available_qp].lock = lock; | |
452 | } | |
453 | ||
454 | void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) | |
455 | { | |
456 | int i; | |
24d16d3a NH |
457 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
458 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); | |
d0e1a0ae NH |
459 | uint64_t ret; |
460 | ||
461 | assert(data != NULL); | |
462 | ||
463 | for (i = 0; i < MAX_QPS; i++) { | |
464 | if (data->thread_qps[i].lock == lock) { | |
465 | /* | |
466 | * As with read side acquisition, we use __ATOMIC_RELEASE here | |
467 | * to ensure that the decrement is published immediately | |
468 | * to any write side waiters | |
469 | */ | |
470 | data->thread_qps[i].depth--; | |
471 | if (data->thread_qps[i].depth == 0) { | |
472 | ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER, | |
473 | __ATOMIC_RELEASE); | |
474 | OPENSSL_assert(ret != UINT64_MAX); | |
475 | data->thread_qps[i].qp = NULL; | |
476 | data->thread_qps[i].lock = NULL; | |
477 | } | |
478 | return; | |
479 | } | |
480 | } | |
481 | /* | |
39fe3e5d DP |
482 | * If we get here, we're trying to unlock a lock that we never acquired - |
483 | * that's fatal. | |
d0e1a0ae NH |
484 | */ |
485 | assert(0); | |
486 | } | |
487 | ||
488 | /* | |
489 | * Write side allocation routine to get the current qp | |
490 | * and replace it with a new one | |
491 | */ | |
492 | static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) | |
493 | { | |
494 | uint64_t new_id; | |
495 | uint64_t current_idx; | |
496 | ||
497 | pthread_mutex_lock(&lock->alloc_lock); | |
498 | ||
499 | /* | |
500 | * we need at least one qp to be available with one | |
501 | * left over, so that readers can start working on | |
502 | * one that isn't yet being waited on | |
503 | */ | |
504 | while (lock->group_count - lock->writers_alloced < 2) | |
505 | /* we have to wait for one to be free */ | |
506 | pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock); | |
507 | ||
508 | current_idx = lock->current_alloc_idx; | |
509 | ||
510 | /* Allocate the qp */ | |
511 | lock->writers_alloced++; | |
512 | ||
513 | /* increment the allocation index */ | |
514 | lock->current_alloc_idx = | |
515 | (lock->current_alloc_idx + 1) % lock->group_count; | |
516 | ||
517 | /* get and insert a new id */ | |
518 | new_id = lock->id_ctr; | |
519 | lock->id_ctr++; | |
520 | ||
521 | new_id = VAL_ID(new_id); | |
522 | /* | |
523 | * Even though we are under a write side lock here | |
524 | * We need to use atomic instructions to ensure that the results | |
525 | * of this update are published to the read side prior to updating the | |
526 | * reader idx below | |
527 | */ | |
528 | ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK, | |
529 | __ATOMIC_RELEASE); | |
530 | ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id, | |
531 | __ATOMIC_RELEASE); | |
532 | ||
533 | /* | |
39fe3e5d | 534 | * Update the reader index to be the prior qp. |
d0e1a0ae | 535 | * Note the use of __ATOMIC_RELEASE here is based on the corresponding use |
f7241edd | 536 | * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication |
d0e1a0ae NH |
537 | * of this value to be seen on the read side immediately after it happens |
538 | */ | |
a02077d4 | 539 | ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx, |
d0e1a0ae NH |
540 | __ATOMIC_RELEASE); |
541 | ||
542 | /* wake up any waiters */ | |
543 | pthread_cond_signal(&lock->alloc_signal); | |
544 | pthread_mutex_unlock(&lock->alloc_lock); | |
545 | return &lock->qp_group[current_idx]; | |
546 | } | |
547 | ||
548 | static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp) | |
549 | { | |
550 | pthread_mutex_lock(&lock->alloc_lock); | |
551 | lock->writers_alloced--; | |
552 | pthread_cond_signal(&lock->alloc_signal); | |
553 | pthread_mutex_unlock(&lock->alloc_lock); | |
554 | } | |
555 | ||
556 | static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock, | |
557 | int count) | |
558 | { | |
559 | struct rcu_qp *new = | |
560 | OPENSSL_zalloc(sizeof(*new) * count); | |
561 | ||
562 | lock->group_count = count; | |
563 | return new; | |
564 | } | |
565 | ||
566 | void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock) | |
567 | { | |
568 | pthread_mutex_lock(&lock->write_lock); | |
3bcac460 | 569 | TSAN_FAKE_UNLOCK(&lock->write_lock); |
d0e1a0ae NH |
570 | } |
571 | ||
572 | void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock) | |
573 | { | |
3bcac460 | 574 | TSAN_FAKE_LOCK(&lock->write_lock); |
d0e1a0ae NH |
575 | pthread_mutex_unlock(&lock->write_lock); |
576 | } | |
577 | ||
578 | void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) | |
579 | { | |
580 | struct rcu_qp *qp; | |
581 | uint64_t count; | |
582 | struct rcu_cb_item *cb_items, *tmpcb; | |
583 | ||
3bcac460 NH |
584 | pthread_mutex_lock(&lock->write_lock); |
585 | cb_items = lock->cb_items; | |
586 | lock->cb_items = NULL; | |
587 | pthread_mutex_unlock(&lock->write_lock); | |
d0e1a0ae NH |
588 | |
589 | qp = update_qp(lock); | |
590 | ||
591 | /* | |
592 | * wait for the reader count to reach zero | |
593 | * Note the use of __ATOMIC_ACQUIRE here to ensure that any | |
594 | * prior __ATOMIC_RELEASE write operation in get_hold_current_qp | |
595 | * is visible prior to our read | |
596 | */ | |
597 | do { | |
a02077d4 | 598 | count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
599 | } while (READER_COUNT(count) != 0); |
600 | ||
601 | /* retire in order */ | |
602 | pthread_mutex_lock(&lock->prior_lock); | |
603 | while (lock->next_to_retire != ID_VAL(count)) | |
604 | pthread_cond_wait(&lock->prior_signal, &lock->prior_lock); | |
605 | lock->next_to_retire++; | |
606 | pthread_cond_broadcast(&lock->prior_signal); | |
607 | pthread_mutex_unlock(&lock->prior_lock); | |
608 | ||
609 | retire_qp(lock, qp); | |
610 | ||
611 | /* handle any callbacks that we have */ | |
612 | while (cb_items != NULL) { | |
613 | tmpcb = cb_items; | |
614 | cb_items = cb_items->next; | |
615 | tmpcb->fn(tmpcb->data); | |
616 | OPENSSL_free(tmpcb); | |
617 | } | |
618 | } | |
619 | ||
620 | int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data) | |
621 | { | |
622 | struct rcu_cb_item *new = | |
623 | OPENSSL_zalloc(sizeof(*new)); | |
624 | ||
625 | if (new == NULL) | |
626 | return 0; | |
627 | ||
628 | new->data = data; | |
629 | new->fn = cb; | |
630 | /* | |
631 | * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this | |
632 | * list are visible to us prior to reading, and publish the new value | |
633 | * immediately | |
634 | */ | |
a02077d4 RL |
635 | new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new, |
636 | __ATOMIC_ACQ_REL); | |
d0e1a0ae NH |
637 | |
638 | return 1; | |
639 | } | |
640 | ||
641 | void *ossl_rcu_uptr_deref(void **p) | |
642 | { | |
a02077d4 | 643 | return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
644 | } |
645 | ||
646 | void ossl_rcu_assign_uptr(void **p, void **v) | |
647 | { | |
a02077d4 | 648 | ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE); |
d0e1a0ae NH |
649 | } |
650 | ||
24d16d3a | 651 | CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) |
d0e1a0ae NH |
652 | { |
653 | struct rcu_lock_st *new; | |
654 | ||
d0e1a0ae NH |
655 | if (num_writers < 1) |
656 | num_writers = 1; | |
657 | ||
24d16d3a NH |
658 | ctx = ossl_lib_ctx_get_concrete(ctx); |
659 | if (ctx == NULL) | |
660 | return 0; | |
661 | ||
d0e1a0ae NH |
662 | new = OPENSSL_zalloc(sizeof(*new)); |
663 | if (new == NULL) | |
664 | return NULL; | |
665 | ||
24d16d3a | 666 | new->ctx = ctx; |
d0e1a0ae NH |
667 | pthread_mutex_init(&new->write_lock, NULL); |
668 | pthread_mutex_init(&new->prior_lock, NULL); | |
669 | pthread_mutex_init(&new->alloc_lock, NULL); | |
670 | pthread_cond_init(&new->prior_signal, NULL); | |
671 | pthread_cond_init(&new->alloc_signal, NULL); | |
672 | new->qp_group = allocate_new_qp_group(new, num_writers + 1); | |
673 | if (new->qp_group == NULL) { | |
674 | OPENSSL_free(new); | |
675 | new = NULL; | |
676 | } | |
677 | return new; | |
678 | } | |
679 | ||
680 | void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) | |
681 | { | |
682 | struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock; | |
683 | ||
684 | if (lock == NULL) | |
685 | return; | |
686 | ||
687 | /* make sure we're synchronized */ | |
688 | ossl_synchronize_rcu(rlock); | |
689 | ||
690 | OPENSSL_free(rlock->qp_group); | |
691 | /* There should only be a single qp left now */ | |
692 | OPENSSL_free(rlock); | |
693 | } | |
694 | ||
71a04cfc AG |
695 | CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) |
696 | { | |
ec93a292 | 697 | # ifdef USE_RWLOCK |
7de2b9c4 RS |
698 | CRYPTO_RWLOCK *lock; |
699 | ||
d0e1a0ae | 700 | if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL) |
7de2b9c4 | 701 | /* Don't set error, to avoid recursion blowup. */ |
71a04cfc AG |
702 | return NULL; |
703 | ||
0b2fc928 F |
704 | if (pthread_rwlock_init(lock, NULL) != 0) { |
705 | OPENSSL_free(lock); | |
71a04cfc | 706 | return NULL; |
0b2fc928 | 707 | } |
ec93a292 DK |
708 | # else |
709 | pthread_mutexattr_t attr; | |
7de2b9c4 RS |
710 | CRYPTO_RWLOCK *lock; |
711 | ||
d0e1a0ae | 712 | if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL) |
7de2b9c4 | 713 | /* Don't set error, to avoid recursion blowup. */ |
2accf3f7 DK |
714 | return NULL; |
715 | ||
e60147fe RS |
716 | /* |
717 | * We don't use recursive mutexes, but try to catch errors if we do. | |
718 | */ | |
2accf3f7 | 719 | pthread_mutexattr_init(&attr); |
6870c1e7 RB |
720 | # if !defined (__TANDEM) && !defined (_SPT_MODEL_) |
721 | # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK) | |
e60147fe | 722 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); |
6870c1e7 RB |
723 | # endif |
724 | # else | |
725 | /* The SPT Thread Library does not define MUTEX attributes. */ | |
e60147fe | 726 | # endif |
5d5eed44 | 727 | |
2accf3f7 DK |
728 | if (pthread_mutex_init(lock, &attr) != 0) { |
729 | pthread_mutexattr_destroy(&attr); | |
730 | OPENSSL_free(lock); | |
731 | return NULL; | |
732 | } | |
5d5eed44 | 733 | |
2accf3f7 | 734 | pthread_mutexattr_destroy(&attr); |
ec93a292 | 735 | # endif |
71a04cfc AG |
736 | |
737 | return lock; | |
738 | } | |
739 | ||
cd3f8c1b | 740 | __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 741 | { |
ec93a292 | 742 | # ifdef USE_RWLOCK |
71a04cfc AG |
743 | if (pthread_rwlock_rdlock(lock) != 0) |
744 | return 0; | |
ec93a292 | 745 | # else |
e60147fe RS |
746 | if (pthread_mutex_lock(lock) != 0) { |
747 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 748 | return 0; |
e60147fe | 749 | } |
ec93a292 | 750 | # endif |
71a04cfc AG |
751 | |
752 | return 1; | |
753 | } | |
754 | ||
cd3f8c1b | 755 | __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 756 | { |
ec93a292 | 757 | # ifdef USE_RWLOCK |
71a04cfc AG |
758 | if (pthread_rwlock_wrlock(lock) != 0) |
759 | return 0; | |
ec93a292 | 760 | # else |
e60147fe RS |
761 | if (pthread_mutex_lock(lock) != 0) { |
762 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 763 | return 0; |
e60147fe | 764 | } |
ec93a292 | 765 | # endif |
71a04cfc AG |
766 | |
767 | return 1; | |
768 | } | |
769 | ||
770 | int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock) | |
771 | { | |
ec93a292 | 772 | # ifdef USE_RWLOCK |
71a04cfc AG |
773 | if (pthread_rwlock_unlock(lock) != 0) |
774 | return 0; | |
ec93a292 | 775 | # else |
e60147fe RS |
776 | if (pthread_mutex_unlock(lock) != 0) { |
777 | assert(errno != EPERM); | |
2accf3f7 | 778 | return 0; |
e60147fe | 779 | } |
ec93a292 | 780 | # endif |
71a04cfc AG |
781 | |
782 | return 1; | |
783 | } | |
784 | ||
785 | void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) | |
786 | { | |
787 | if (lock == NULL) | |
788 | return; | |
789 | ||
ec93a292 | 790 | # ifdef USE_RWLOCK |
71a04cfc | 791 | pthread_rwlock_destroy(lock); |
ec93a292 | 792 | # else |
2accf3f7 | 793 | pthread_mutex_destroy(lock); |
ec93a292 | 794 | # endif |
71a04cfc AG |
795 | OPENSSL_free(lock); |
796 | ||
797 | return; | |
798 | } | |
799 | ||
800 | int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)) | |
801 | { | |
802 | if (pthread_once(once, init) != 0) | |
803 | return 0; | |
804 | ||
805 | return 1; | |
806 | } | |
807 | ||
808 | int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)) | |
809 | { | |
810 | if (pthread_key_create(key, cleanup) != 0) | |
811 | return 0; | |
812 | ||
813 | return 1; | |
814 | } | |
815 | ||
816 | void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key) | |
817 | { | |
818 | return pthread_getspecific(*key); | |
819 | } | |
820 | ||
821 | int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val) | |
822 | { | |
823 | if (pthread_setspecific(*key, val) != 0) | |
824 | return 0; | |
825 | ||
826 | return 1; | |
827 | } | |
828 | ||
829 | int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key) | |
830 | { | |
831 | if (pthread_key_delete(*key) != 0) | |
832 | return 0; | |
833 | ||
834 | return 1; | |
835 | } | |
836 | ||
837 | CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void) | |
838 | { | |
839 | return pthread_self(); | |
840 | } | |
841 | ||
842 | int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b) | |
843 | { | |
844 | return pthread_equal(a, b); | |
845 | } | |
846 | ||
847 | int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) | |
848 | { | |
d39de479 | 849 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
1beca676 RL |
850 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
851 | *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL); | |
852 | return 1; | |
853 | } | |
d6dda392 VK |
854 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
855 | /* This will work for all future Solaris versions. */ | |
856 | if (ret != NULL) { | |
857 | *ret = atomic_add_int_nv((volatile unsigned int *)val, amount); | |
858 | return 1; | |
859 | } | |
1beca676 | 860 | # endif |
d5e742de | 861 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) |
71a04cfc AG |
862 | return 0; |
863 | ||
864 | *val += amount; | |
865 | *ret = *val; | |
866 | ||
867 | if (!CRYPTO_THREAD_unlock(lock)) | |
868 | return 0; | |
71a04cfc AG |
869 | |
870 | return 1; | |
871 | } | |
872 | ||
d5e742de MC |
873 | int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, |
874 | CRYPTO_RWLOCK *lock) | |
875 | { | |
d39de479 | 876 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
877 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
878 | *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL); | |
879 | return 1; | |
880 | } | |
881 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
882 | /* This will work for all future Solaris versions. */ | |
883 | if (ret != NULL) { | |
884 | *ret = atomic_or_64_nv(val, op); | |
885 | return 1; | |
886 | } | |
887 | # endif | |
888 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) | |
889 | return 0; | |
890 | *val |= op; | |
891 | *ret = *val; | |
892 | ||
893 | if (!CRYPTO_THREAD_unlock(lock)) | |
894 | return 0; | |
895 | ||
896 | return 1; | |
897 | } | |
898 | ||
899 | int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock) | |
900 | { | |
d39de479 | 901 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
902 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
903 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
904 | return 1; | |
905 | } | |
906 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
907 | /* This will work for all future Solaris versions. */ | |
908 | if (ret != NULL) { | |
909 | *ret = atomic_or_64_nv(val, 0); | |
910 | return 1; | |
911 | } | |
912 | # endif | |
913 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
914 | return 0; | |
915 | *ret = *val; | |
916 | if (!CRYPTO_THREAD_unlock(lock)) | |
917 | return 0; | |
918 | ||
919 | return 1; | |
920 | } | |
629b408c | 921 | |
7e45ac68 NH |
922 | int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock) |
923 | { | |
924 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) | |
925 | if (__atomic_is_lock_free(sizeof(*dst), dst)) { | |
926 | __atomic_store(dst, &val, __ATOMIC_RELEASE); | |
927 | return 1; | |
928 | } | |
929 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
930 | /* This will work for all future Solaris versions. */ | |
931 | if (ret != NULL) { | |
932 | atomic_swap_64(dst, val); | |
933 | return 1; | |
934 | } | |
935 | # endif | |
936 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
937 | return 0; | |
938 | *dst = val; | |
939 | if (!CRYPTO_THREAD_unlock(lock)) | |
940 | return 0; | |
941 | ||
942 | return 1; | |
943 | } | |
944 | ||
629b408c HL |
945 | int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock) |
946 | { | |
947 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) | |
948 | if (__atomic_is_lock_free(sizeof(*val), val)) { | |
949 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
950 | return 1; | |
951 | } | |
952 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
953 | /* This will work for all future Solaris versions. */ | |
954 | if (ret != NULL) { | |
955 | *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0); | |
956 | return 1; | |
957 | } | |
958 | # endif | |
959 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
960 | return 0; | |
961 | *ret = *val; | |
962 | if (!CRYPTO_THREAD_unlock(lock)) | |
963 | return 0; | |
964 | ||
965 | return 1; | |
966 | } | |
967 | ||
f844f9eb | 968 | # ifndef FIPS_MODULE |
2915fe19 RS |
969 | int openssl_init_fork_handlers(void) |
970 | { | |
59795962 | 971 | return 1; |
2915fe19 | 972 | } |
f844f9eb | 973 | # endif /* FIPS_MODULE */ |
84952925 DMSP |
974 | |
975 | int openssl_get_fork_id(void) | |
976 | { | |
977 | return getpid(); | |
978 | } | |
71a04cfc | 979 | #endif |