]>
Commit | Line | Data |
---|---|---|
b1322259 | 1 | /* |
b6461792 | 2 | * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved. |
71a04cfc | 3 | * |
0e9725bc | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
b1322259 RS |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
71a04cfc AG |
8 | */ |
9 | ||
9750b4d3 RB |
10 | /* We need to use the OPENSSL_fork_*() deprecated APIs */ |
11 | #define OPENSSL_SUPPRESS_DEPRECATED | |
12 | ||
71a04cfc | 13 | #include <openssl/crypto.h> |
d0e1a0ae | 14 | #include <crypto/cryptlib.h> |
5f8dd0f8 | 15 | #include "internal/cryptlib.h" |
d0e1a0ae NH |
16 | #include "internal/rcu.h" |
17 | #include "rcu_internal.h" | |
71a04cfc | 18 | |
d6dda392 VK |
19 | #if defined(__sun) |
20 | # include <atomic.h> | |
21 | #endif | |
22 | ||
d39de479 KK |
23 | #if defined(__apple_build_version__) && __apple_build_version__ < 6000000 |
24 | /* | |
25 | * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and | |
26 | * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free() | |
27 | * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))). | |
28 | * All of this makes impossible to use __atomic_is_lock_free here. | |
29 | * | |
30 | * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760 | |
31 | */ | |
81f39349 | 32 | # define BROKEN_CLANG_ATOMICS |
d39de479 KK |
33 | #endif |
34 | ||
71a04cfc AG |
35 | #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS) |
36 | ||
84952925 DMSP |
37 | # if defined(OPENSSL_SYS_UNIX) |
38 | # include <sys/types.h> | |
39 | # include <unistd.h> | |
81f39349 | 40 | # endif |
84952925 | 41 | |
0d407456 RB |
42 | # include <assert.h> |
43 | ||
ec93a292 DK |
44 | # ifdef PTHREAD_RWLOCK_INITIALIZER |
45 | # define USE_RWLOCK | |
46 | # endif | |
2accf3f7 | 47 | |
a02077d4 RL |
48 | /* |
49 | * For all GNU/clang atomic builtins, we also need fallbacks, to cover all | |
50 | * other compilers. | |
51 | ||
52 | * Unfortunately, we can't do that with some "generic type", because there's no | |
53 | * guarantee that the chosen generic type is large enough to cover all cases. | |
54 | * Therefore, we implement fallbacks for each applicable type, with composed | |
55 | * names that include the type they handle. | |
56 | * | |
57 | * (an anecdote: we previously tried to use |void *| as the generic type, with | |
58 | * the thought that the pointer itself is the largest type. However, this is | |
59 | * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large) | |
60 | * | |
61 | * All applicable ATOMIC_ macros take the intended type as first parameter, so | |
62 | * they can map to the correct fallback function. In the GNU/clang case, that | |
63 | * parameter is simply ignored. | |
64 | */ | |
65 | ||
66 | /* | |
67 | * Internal types used with the ATOMIC_ macros, to make it possible to compose | |
68 | * fallback function names. | |
69 | */ | |
70 | typedef void *pvoid; | |
71 | typedef struct rcu_cb_item *prcu_cb_item; | |
72 | ||
73 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \ | |
74 | && !defined(USE_ATOMIC_FALLBACKS) | |
81f39349 | 75 | # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) |
f5b5a35c | 76 | /* |
a02077d4 RL |
77 | * For pointers, Apple M1 virtualized cpu seems to have some problem using the |
78 | * ldapr instruction (see https://github.com/openssl/openssl/pull/23974) | |
f5b5a35c NH |
79 | * When using the native apple clang compiler, this instruction is emitted for |
80 | * atomic loads, which is bad. So, if | |
81 | * 1) We are building on a target that defines __APPLE__ AND | |
82 | * 2) We are building on a target using clang (__clang__) AND | |
83 | * 3) We are building for an M1 processor (__aarch64__) | |
5454ef7c LU |
84 | * Then we should not use __atomic_load_n and instead implement our own |
85 | * function to issue the ldar instruction instead, which produces the proper | |
f5b5a35c NH |
86 | * sequencing guarantees |
87 | */ | |
a02077d4 RL |
88 | static inline void *apple_atomic_load_n_pvoid(void **p, |
89 | ossl_unused int memorder) | |
f5b5a35c NH |
90 | { |
91 | void *ret; | |
92 | ||
93 | __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):); | |
94 | ||
95 | return ret; | |
96 | } | |
97 | ||
a02077d4 RL |
98 | /* For uint64_t, we should be fine, though */ |
99 | # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o) | |
100 | ||
101 | # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o) | |
f5b5a35c | 102 | # else |
a02077d4 | 103 | # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o) |
f5b5a35c | 104 | # endif |
a02077d4 RL |
105 | # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o) |
106 | # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o) | |
107 | # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o) | |
81f39349 RL |
108 | # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o) |
109 | # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o) | |
110 | # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o) | |
111 | # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o) | |
112 | # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o) | |
113 | # else | |
d0e1a0ae NH |
114 | static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER; |
115 | ||
a02077d4 RL |
116 | # define IMPL_fallback_atomic_load_n(t) \ |
117 | static inline t fallback_atomic_load_n_##t(t *p) \ | |
118 | { \ | |
119 | t ret; \ | |
120 | \ | |
121 | pthread_mutex_lock(&atomic_sim_lock); \ | |
122 | ret = *p; \ | |
123 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
124 | return ret; \ | |
125 | } | |
126 | IMPL_fallback_atomic_load_n(uint64_t) | |
127 | IMPL_fallback_atomic_load_n(pvoid) | |
128 | ||
129 | # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p) | |
130 | ||
131 | # define IMPL_fallback_atomic_store_n(t) \ | |
132 | static inline t fallback_atomic_store_n_##t(t *p, t v) \ | |
133 | { \ | |
134 | t ret; \ | |
135 | \ | |
136 | pthread_mutex_lock(&atomic_sim_lock); \ | |
137 | ret = *p; \ | |
138 | *p = v; \ | |
139 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
140 | return ret; \ | |
141 | } | |
142 | IMPL_fallback_atomic_store_n(uint64_t) | |
d0e1a0ae | 143 | |
a02077d4 | 144 | # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v) |
d0e1a0ae | 145 | |
a02077d4 RL |
146 | # define IMPL_fallback_atomic_store(t) \ |
147 | static inline void fallback_atomic_store_##t(t *p, t *v) \ | |
148 | { \ | |
149 | pthread_mutex_lock(&atomic_sim_lock); \ | |
150 | *p = *v; \ | |
151 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
152 | } | |
153 | IMPL_fallback_atomic_store(uint64_t) | |
154 | IMPL_fallback_atomic_store(pvoid) | |
155 | ||
156 | # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v) | |
157 | ||
158 | # define IMPL_fallback_atomic_exchange_n(t) \ | |
159 | static inline t fallback_atomic_exchange_n_##t(t *p, t v) \ | |
160 | { \ | |
161 | t ret; \ | |
162 | \ | |
163 | pthread_mutex_lock(&atomic_sim_lock); \ | |
164 | ret = *p; \ | |
165 | *p = v; \ | |
166 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
167 | return ret; \ | |
168 | } | |
169 | IMPL_fallback_atomic_exchange_n(uint64_t) | |
170 | IMPL_fallback_atomic_exchange_n(prcu_cb_item) | |
d0e1a0ae | 171 | |
a02077d4 | 172 | # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v) |
d0e1a0ae | 173 | |
a02077d4 RL |
174 | /* |
175 | * The fallbacks that follow don't need any per type implementation, as | |
176 | * they are designed for uint64_t only. If there comes a time when multiple | |
177 | * types need to be covered, it's relatively easy to refactor them the same | |
178 | * way as the fallbacks above. | |
179 | */ | |
d0e1a0ae NH |
180 | |
181 | static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v) | |
182 | { | |
183 | uint64_t ret; | |
184 | ||
185 | pthread_mutex_lock(&atomic_sim_lock); | |
186 | *p += v; | |
187 | ret = *p; | |
188 | pthread_mutex_unlock(&atomic_sim_lock); | |
189 | return ret; | |
190 | } | |
191 | ||
81f39349 | 192 | # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v) |
d0e1a0ae NH |
193 | |
194 | static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v) | |
195 | { | |
196 | uint64_t ret; | |
197 | ||
198 | pthread_mutex_lock(&atomic_sim_lock); | |
199 | ret = *p; | |
200 | *p += v; | |
201 | pthread_mutex_unlock(&atomic_sim_lock); | |
202 | return ret; | |
203 | } | |
204 | ||
81f39349 | 205 | # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v) |
d0e1a0ae NH |
206 | |
207 | static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v) | |
208 | { | |
209 | uint64_t ret; | |
210 | ||
211 | pthread_mutex_lock(&atomic_sim_lock); | |
212 | *p -= v; | |
213 | ret = *p; | |
214 | pthread_mutex_unlock(&atomic_sim_lock); | |
215 | return ret; | |
216 | } | |
217 | ||
81f39349 | 218 | # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v) |
d0e1a0ae NH |
219 | |
220 | static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m) | |
221 | { | |
222 | uint64_t ret; | |
223 | ||
224 | pthread_mutex_lock(&atomic_sim_lock); | |
225 | *p &= m; | |
226 | ret = *p; | |
227 | pthread_mutex_unlock(&atomic_sim_lock); | |
228 | return ret; | |
229 | } | |
230 | ||
81f39349 | 231 | # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v) |
d0e1a0ae NH |
232 | |
233 | static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m) | |
234 | { | |
235 | uint64_t ret; | |
236 | ||
237 | pthread_mutex_lock(&atomic_sim_lock); | |
238 | *p |= m; | |
239 | ret = *p; | |
240 | pthread_mutex_unlock(&atomic_sim_lock); | |
241 | return ret; | |
242 | } | |
243 | ||
81f39349 RL |
244 | # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v) |
245 | # endif | |
d0e1a0ae | 246 | |
d0e1a0ae NH |
247 | /* |
248 | * users is broken up into 2 parts | |
249 | * bits 0-15 current readers | |
250 | * bit 32-63 - ID | |
251 | */ | |
252 | # define READER_SHIFT 0 | |
253 | # define ID_SHIFT 32 | |
254 | # define READER_SIZE 16 | |
255 | # define ID_SIZE 32 | |
256 | ||
257 | # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1) | |
258 | # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1) | |
259 | # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK) | |
260 | # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK) | |
261 | # define VAL_READER ((uint64_t)1 << READER_SHIFT) | |
262 | # define VAL_ID(x) ((uint64_t)x << ID_SHIFT) | |
263 | ||
264 | /* | |
265 | * This is the core of an rcu lock. It tracks the readers and writers for the | |
266 | * current quiescence point for a given lock. Users is the 64 bit value that | |
267 | * stores the READERS/ID as defined above | |
268 | * | |
269 | */ | |
270 | struct rcu_qp { | |
271 | uint64_t users; | |
272 | }; | |
273 | ||
274 | struct thread_qp { | |
275 | struct rcu_qp *qp; | |
276 | unsigned int depth; | |
277 | CRYPTO_RCU_LOCK *lock; | |
278 | }; | |
279 | ||
81f39349 | 280 | # define MAX_QPS 10 |
d0e1a0ae NH |
281 | /* |
282 | * This is the per thread tracking data | |
283 | * that is assigned to each thread participating | |
284 | * in an rcu qp | |
285 | * | |
286 | * qp points to the qp that it last acquired | |
287 | * | |
288 | */ | |
289 | struct rcu_thr_data { | |
290 | struct thread_qp thread_qps[MAX_QPS]; | |
291 | }; | |
292 | ||
293 | /* | |
294 | * This is the internal version of a CRYPTO_RCU_LOCK | |
295 | * it is cast from CRYPTO_RCU_LOCK | |
296 | */ | |
297 | struct rcu_lock_st { | |
298 | /* Callbacks to call for next ossl_synchronize_rcu */ | |
299 | struct rcu_cb_item *cb_items; | |
300 | ||
24d16d3a NH |
301 | /* The context we are being created against */ |
302 | OSSL_LIB_CTX *ctx; | |
303 | ||
d0e1a0ae NH |
304 | /* rcu generation counter for in-order retirement */ |
305 | uint32_t id_ctr; | |
306 | ||
307 | /* Array of quiescent points for synchronization */ | |
308 | struct rcu_qp *qp_group; | |
309 | ||
310 | /* Number of elements in qp_group array */ | |
311 | size_t group_count; | |
312 | ||
313 | /* Index of the current qp in the qp_group array */ | |
314 | uint64_t reader_idx; | |
315 | ||
316 | /* value of the next id_ctr value to be retired */ | |
317 | uint32_t next_to_retire; | |
318 | ||
319 | /* index of the next free rcu_qp in the qp_group */ | |
320 | uint64_t current_alloc_idx; | |
321 | ||
322 | /* number of qp's in qp_group array currently being retired */ | |
323 | uint32_t writers_alloced; | |
324 | ||
325 | /* lock protecting write side operations */ | |
326 | pthread_mutex_t write_lock; | |
327 | ||
328 | /* lock protecting updates to writers_alloced/current_alloc_idx */ | |
329 | pthread_mutex_t alloc_lock; | |
330 | ||
331 | /* signal to wake threads waiting on alloc_lock */ | |
332 | pthread_cond_t alloc_signal; | |
333 | ||
334 | /* lock to enforce in-order retirement */ | |
335 | pthread_mutex_t prior_lock; | |
336 | ||
337 | /* signal to wake threads waiting on prior_lock */ | |
338 | pthread_cond_t prior_signal; | |
339 | }; | |
340 | ||
d0e1a0ae NH |
341 | /* Read side acquisition of the current qp */ |
342 | static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) | |
343 | { | |
344 | uint64_t qp_idx; | |
345 | ||
346 | /* get the current qp index */ | |
347 | for (;;) { | |
348 | /* | |
349 | * Notes on use of __ATOMIC_ACQUIRE | |
350 | * We need to ensure the following: | |
351 | * 1) That subsequent operations aren't optimized by hoisting them above | |
352 | * this operation. Specifically, we don't want the below re-load of | |
353 | * qp_idx to get optimized away | |
354 | * 2) We want to ensure that any updating of reader_idx on the write side | |
355 | * of the lock is flushed from a local cpu cache so that we see any | |
356 | * updates prior to the load. This is a non-issue on cache coherent | |
357 | * systems like x86, but is relevant on other arches | |
358 | * Note: This applies to the reload below as well | |
359 | */ | |
a02077d4 | 360 | qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
361 | |
362 | /* | |
363 | * Notes of use of __ATOMIC_RELEASE | |
364 | * This counter is only read by the write side of the lock, and so we | |
365 | * specify __ATOMIC_RELEASE here to ensure that the write side of the | |
366 | * lock see this during the spin loop read of users, as it waits for the | |
367 | * reader count to approach zero | |
368 | */ | |
369 | ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
370 | __ATOMIC_RELEASE); | |
371 | ||
372 | /* if the idx hasn't changed, we're good, else try again */ | |
a02077d4 | 373 | if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE)) |
d0e1a0ae NH |
374 | break; |
375 | ||
376 | /* | |
377 | * Notes on use of __ATOMIC_RELEASE | |
378 | * As with the add above, we want to ensure that this decrement is | |
379 | * seen by the write side of the lock as soon as it happens to prevent | |
380 | * undue spinning waiting for write side completion | |
381 | */ | |
382 | ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
383 | __ATOMIC_RELEASE); | |
384 | } | |
385 | ||
386 | return &lock->qp_group[qp_idx]; | |
387 | } | |
388 | ||
24d16d3a NH |
389 | static void ossl_rcu_free_local_data(void *arg) |
390 | { | |
391 | OSSL_LIB_CTX *ctx = arg; | |
392 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx); | |
393 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); | |
394 | OPENSSL_free(data); | |
395 | } | |
396 | ||
d0e1a0ae NH |
397 | void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock) |
398 | { | |
399 | struct rcu_thr_data *data; | |
400 | int i, available_qp = -1; | |
24d16d3a | 401 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
d0e1a0ae NH |
402 | |
403 | /* | |
404 | * we're going to access current_qp here so ask the | |
405 | * processor to fetch it | |
406 | */ | |
24d16d3a | 407 | data = CRYPTO_THREAD_get_local(lkey); |
d0e1a0ae NH |
408 | |
409 | if (data == NULL) { | |
410 | data = OPENSSL_zalloc(sizeof(*data)); | |
411 | OPENSSL_assert(data != NULL); | |
24d16d3a NH |
412 | CRYPTO_THREAD_set_local(lkey, data); |
413 | ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data); | |
d0e1a0ae NH |
414 | } |
415 | ||
416 | for (i = 0; i < MAX_QPS; i++) { | |
417 | if (data->thread_qps[i].qp == NULL && available_qp == -1) | |
418 | available_qp = i; | |
419 | /* If we have a hold on this lock already, we're good */ | |
420 | if (data->thread_qps[i].lock == lock) { | |
421 | data->thread_qps[i].depth++; | |
422 | return; | |
423 | } | |
424 | } | |
425 | ||
426 | /* | |
427 | * if we get here, then we don't have a hold on this lock yet | |
428 | */ | |
429 | assert(available_qp != -1); | |
430 | ||
431 | data->thread_qps[available_qp].qp = get_hold_current_qp(lock); | |
432 | data->thread_qps[available_qp].depth = 1; | |
433 | data->thread_qps[available_qp].lock = lock; | |
434 | } | |
435 | ||
436 | void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) | |
437 | { | |
438 | int i; | |
24d16d3a NH |
439 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
440 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); | |
d0e1a0ae NH |
441 | uint64_t ret; |
442 | ||
443 | assert(data != NULL); | |
444 | ||
445 | for (i = 0; i < MAX_QPS; i++) { | |
446 | if (data->thread_qps[i].lock == lock) { | |
447 | /* | |
448 | * As with read side acquisition, we use __ATOMIC_RELEASE here | |
449 | * to ensure that the decrement is published immediately | |
450 | * to any write side waiters | |
451 | */ | |
452 | data->thread_qps[i].depth--; | |
453 | if (data->thread_qps[i].depth == 0) { | |
454 | ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER, | |
455 | __ATOMIC_RELEASE); | |
456 | OPENSSL_assert(ret != UINT64_MAX); | |
457 | data->thread_qps[i].qp = NULL; | |
458 | data->thread_qps[i].lock = NULL; | |
459 | } | |
460 | return; | |
461 | } | |
462 | } | |
463 | /* | |
39fe3e5d DP |
464 | * If we get here, we're trying to unlock a lock that we never acquired - |
465 | * that's fatal. | |
d0e1a0ae NH |
466 | */ |
467 | assert(0); | |
468 | } | |
469 | ||
470 | /* | |
471 | * Write side allocation routine to get the current qp | |
472 | * and replace it with a new one | |
473 | */ | |
474 | static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) | |
475 | { | |
476 | uint64_t new_id; | |
477 | uint64_t current_idx; | |
478 | ||
479 | pthread_mutex_lock(&lock->alloc_lock); | |
480 | ||
481 | /* | |
482 | * we need at least one qp to be available with one | |
483 | * left over, so that readers can start working on | |
484 | * one that isn't yet being waited on | |
485 | */ | |
486 | while (lock->group_count - lock->writers_alloced < 2) | |
487 | /* we have to wait for one to be free */ | |
488 | pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock); | |
489 | ||
490 | current_idx = lock->current_alloc_idx; | |
491 | ||
492 | /* Allocate the qp */ | |
493 | lock->writers_alloced++; | |
494 | ||
495 | /* increment the allocation index */ | |
496 | lock->current_alloc_idx = | |
497 | (lock->current_alloc_idx + 1) % lock->group_count; | |
498 | ||
499 | /* get and insert a new id */ | |
500 | new_id = lock->id_ctr; | |
501 | lock->id_ctr++; | |
502 | ||
503 | new_id = VAL_ID(new_id); | |
504 | /* | |
505 | * Even though we are under a write side lock here | |
506 | * We need to use atomic instructions to ensure that the results | |
507 | * of this update are published to the read side prior to updating the | |
508 | * reader idx below | |
509 | */ | |
510 | ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK, | |
511 | __ATOMIC_RELEASE); | |
512 | ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id, | |
513 | __ATOMIC_RELEASE); | |
514 | ||
515 | /* | |
39fe3e5d | 516 | * Update the reader index to be the prior qp. |
d0e1a0ae | 517 | * Note the use of __ATOMIC_RELEASE here is based on the corresponding use |
f7241edd | 518 | * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication |
d0e1a0ae NH |
519 | * of this value to be seen on the read side immediately after it happens |
520 | */ | |
a02077d4 | 521 | ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx, |
d0e1a0ae NH |
522 | __ATOMIC_RELEASE); |
523 | ||
524 | /* wake up any waiters */ | |
525 | pthread_cond_signal(&lock->alloc_signal); | |
526 | pthread_mutex_unlock(&lock->alloc_lock); | |
527 | return &lock->qp_group[current_idx]; | |
528 | } | |
529 | ||
530 | static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp) | |
531 | { | |
532 | pthread_mutex_lock(&lock->alloc_lock); | |
533 | lock->writers_alloced--; | |
534 | pthread_cond_signal(&lock->alloc_signal); | |
535 | pthread_mutex_unlock(&lock->alloc_lock); | |
536 | } | |
537 | ||
538 | static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock, | |
539 | int count) | |
540 | { | |
541 | struct rcu_qp *new = | |
542 | OPENSSL_zalloc(sizeof(*new) * count); | |
543 | ||
544 | lock->group_count = count; | |
545 | return new; | |
546 | } | |
547 | ||
548 | void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock) | |
549 | { | |
550 | pthread_mutex_lock(&lock->write_lock); | |
551 | } | |
552 | ||
553 | void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock) | |
554 | { | |
555 | pthread_mutex_unlock(&lock->write_lock); | |
556 | } | |
557 | ||
558 | void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) | |
559 | { | |
560 | struct rcu_qp *qp; | |
561 | uint64_t count; | |
562 | struct rcu_cb_item *cb_items, *tmpcb; | |
563 | ||
564 | /* | |
565 | * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published | |
566 | * writes before we read, and publish our write immediately | |
567 | */ | |
a02077d4 RL |
568 | cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL, |
569 | __ATOMIC_ACQ_REL); | |
d0e1a0ae NH |
570 | |
571 | qp = update_qp(lock); | |
572 | ||
573 | /* | |
574 | * wait for the reader count to reach zero | |
575 | * Note the use of __ATOMIC_ACQUIRE here to ensure that any | |
576 | * prior __ATOMIC_RELEASE write operation in get_hold_current_qp | |
577 | * is visible prior to our read | |
578 | */ | |
579 | do { | |
a02077d4 | 580 | count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
581 | } while (READER_COUNT(count) != 0); |
582 | ||
583 | /* retire in order */ | |
584 | pthread_mutex_lock(&lock->prior_lock); | |
585 | while (lock->next_to_retire != ID_VAL(count)) | |
586 | pthread_cond_wait(&lock->prior_signal, &lock->prior_lock); | |
587 | lock->next_to_retire++; | |
588 | pthread_cond_broadcast(&lock->prior_signal); | |
589 | pthread_mutex_unlock(&lock->prior_lock); | |
590 | ||
591 | retire_qp(lock, qp); | |
592 | ||
593 | /* handle any callbacks that we have */ | |
594 | while (cb_items != NULL) { | |
595 | tmpcb = cb_items; | |
596 | cb_items = cb_items->next; | |
597 | tmpcb->fn(tmpcb->data); | |
598 | OPENSSL_free(tmpcb); | |
599 | } | |
600 | } | |
601 | ||
602 | int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data) | |
603 | { | |
604 | struct rcu_cb_item *new = | |
605 | OPENSSL_zalloc(sizeof(*new)); | |
606 | ||
607 | if (new == NULL) | |
608 | return 0; | |
609 | ||
610 | new->data = data; | |
611 | new->fn = cb; | |
612 | /* | |
613 | * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this | |
614 | * list are visible to us prior to reading, and publish the new value | |
615 | * immediately | |
616 | */ | |
a02077d4 RL |
617 | new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new, |
618 | __ATOMIC_ACQ_REL); | |
d0e1a0ae NH |
619 | |
620 | return 1; | |
621 | } | |
622 | ||
623 | void *ossl_rcu_uptr_deref(void **p) | |
624 | { | |
a02077d4 | 625 | return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
626 | } |
627 | ||
628 | void ossl_rcu_assign_uptr(void **p, void **v) | |
629 | { | |
a02077d4 | 630 | ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE); |
d0e1a0ae NH |
631 | } |
632 | ||
24d16d3a | 633 | CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) |
d0e1a0ae NH |
634 | { |
635 | struct rcu_lock_st *new; | |
636 | ||
d0e1a0ae NH |
637 | if (num_writers < 1) |
638 | num_writers = 1; | |
639 | ||
24d16d3a NH |
640 | ctx = ossl_lib_ctx_get_concrete(ctx); |
641 | if (ctx == NULL) | |
642 | return 0; | |
643 | ||
d0e1a0ae NH |
644 | new = OPENSSL_zalloc(sizeof(*new)); |
645 | if (new == NULL) | |
646 | return NULL; | |
647 | ||
24d16d3a | 648 | new->ctx = ctx; |
d0e1a0ae NH |
649 | pthread_mutex_init(&new->write_lock, NULL); |
650 | pthread_mutex_init(&new->prior_lock, NULL); | |
651 | pthread_mutex_init(&new->alloc_lock, NULL); | |
652 | pthread_cond_init(&new->prior_signal, NULL); | |
653 | pthread_cond_init(&new->alloc_signal, NULL); | |
654 | new->qp_group = allocate_new_qp_group(new, num_writers + 1); | |
655 | if (new->qp_group == NULL) { | |
656 | OPENSSL_free(new); | |
657 | new = NULL; | |
658 | } | |
659 | return new; | |
660 | } | |
661 | ||
662 | void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) | |
663 | { | |
664 | struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock; | |
665 | ||
666 | if (lock == NULL) | |
667 | return; | |
668 | ||
669 | /* make sure we're synchronized */ | |
670 | ossl_synchronize_rcu(rlock); | |
671 | ||
672 | OPENSSL_free(rlock->qp_group); | |
673 | /* There should only be a single qp left now */ | |
674 | OPENSSL_free(rlock); | |
675 | } | |
676 | ||
71a04cfc AG |
677 | CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) |
678 | { | |
ec93a292 | 679 | # ifdef USE_RWLOCK |
7de2b9c4 RS |
680 | CRYPTO_RWLOCK *lock; |
681 | ||
d0e1a0ae | 682 | if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL) |
7de2b9c4 | 683 | /* Don't set error, to avoid recursion blowup. */ |
71a04cfc AG |
684 | return NULL; |
685 | ||
0b2fc928 F |
686 | if (pthread_rwlock_init(lock, NULL) != 0) { |
687 | OPENSSL_free(lock); | |
71a04cfc | 688 | return NULL; |
0b2fc928 | 689 | } |
ec93a292 DK |
690 | # else |
691 | pthread_mutexattr_t attr; | |
7de2b9c4 RS |
692 | CRYPTO_RWLOCK *lock; |
693 | ||
d0e1a0ae | 694 | if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL) |
7de2b9c4 | 695 | /* Don't set error, to avoid recursion blowup. */ |
2accf3f7 DK |
696 | return NULL; |
697 | ||
e60147fe RS |
698 | /* |
699 | * We don't use recursive mutexes, but try to catch errors if we do. | |
700 | */ | |
2accf3f7 | 701 | pthread_mutexattr_init(&attr); |
6870c1e7 RB |
702 | # if !defined (__TANDEM) && !defined (_SPT_MODEL_) |
703 | # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK) | |
e60147fe | 704 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); |
6870c1e7 RB |
705 | # endif |
706 | # else | |
707 | /* The SPT Thread Library does not define MUTEX attributes. */ | |
e60147fe | 708 | # endif |
5d5eed44 | 709 | |
2accf3f7 DK |
710 | if (pthread_mutex_init(lock, &attr) != 0) { |
711 | pthread_mutexattr_destroy(&attr); | |
712 | OPENSSL_free(lock); | |
713 | return NULL; | |
714 | } | |
5d5eed44 | 715 | |
2accf3f7 | 716 | pthread_mutexattr_destroy(&attr); |
ec93a292 | 717 | # endif |
71a04cfc AG |
718 | |
719 | return lock; | |
720 | } | |
721 | ||
cd3f8c1b | 722 | __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 723 | { |
ec93a292 | 724 | # ifdef USE_RWLOCK |
71a04cfc AG |
725 | if (pthread_rwlock_rdlock(lock) != 0) |
726 | return 0; | |
ec93a292 | 727 | # else |
e60147fe RS |
728 | if (pthread_mutex_lock(lock) != 0) { |
729 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 730 | return 0; |
e60147fe | 731 | } |
ec93a292 | 732 | # endif |
71a04cfc AG |
733 | |
734 | return 1; | |
735 | } | |
736 | ||
cd3f8c1b | 737 | __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 738 | { |
ec93a292 | 739 | # ifdef USE_RWLOCK |
71a04cfc AG |
740 | if (pthread_rwlock_wrlock(lock) != 0) |
741 | return 0; | |
ec93a292 | 742 | # else |
e60147fe RS |
743 | if (pthread_mutex_lock(lock) != 0) { |
744 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 745 | return 0; |
e60147fe | 746 | } |
ec93a292 | 747 | # endif |
71a04cfc AG |
748 | |
749 | return 1; | |
750 | } | |
751 | ||
752 | int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock) | |
753 | { | |
ec93a292 | 754 | # ifdef USE_RWLOCK |
71a04cfc AG |
755 | if (pthread_rwlock_unlock(lock) != 0) |
756 | return 0; | |
ec93a292 | 757 | # else |
e60147fe RS |
758 | if (pthread_mutex_unlock(lock) != 0) { |
759 | assert(errno != EPERM); | |
2accf3f7 | 760 | return 0; |
e60147fe | 761 | } |
ec93a292 | 762 | # endif |
71a04cfc AG |
763 | |
764 | return 1; | |
765 | } | |
766 | ||
767 | void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) | |
768 | { | |
769 | if (lock == NULL) | |
770 | return; | |
771 | ||
ec93a292 | 772 | # ifdef USE_RWLOCK |
71a04cfc | 773 | pthread_rwlock_destroy(lock); |
ec93a292 | 774 | # else |
2accf3f7 | 775 | pthread_mutex_destroy(lock); |
ec93a292 | 776 | # endif |
71a04cfc AG |
777 | OPENSSL_free(lock); |
778 | ||
779 | return; | |
780 | } | |
781 | ||
782 | int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)) | |
783 | { | |
784 | if (pthread_once(once, init) != 0) | |
785 | return 0; | |
786 | ||
787 | return 1; | |
788 | } | |
789 | ||
790 | int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)) | |
791 | { | |
792 | if (pthread_key_create(key, cleanup) != 0) | |
793 | return 0; | |
794 | ||
795 | return 1; | |
796 | } | |
797 | ||
798 | void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key) | |
799 | { | |
800 | return pthread_getspecific(*key); | |
801 | } | |
802 | ||
803 | int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val) | |
804 | { | |
805 | if (pthread_setspecific(*key, val) != 0) | |
806 | return 0; | |
807 | ||
808 | return 1; | |
809 | } | |
810 | ||
811 | int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key) | |
812 | { | |
813 | if (pthread_key_delete(*key) != 0) | |
814 | return 0; | |
815 | ||
816 | return 1; | |
817 | } | |
818 | ||
819 | CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void) | |
820 | { | |
821 | return pthread_self(); | |
822 | } | |
823 | ||
824 | int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b) | |
825 | { | |
826 | return pthread_equal(a, b); | |
827 | } | |
828 | ||
829 | int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) | |
830 | { | |
d39de479 | 831 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
1beca676 RL |
832 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
833 | *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL); | |
834 | return 1; | |
835 | } | |
d6dda392 VK |
836 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
837 | /* This will work for all future Solaris versions. */ | |
838 | if (ret != NULL) { | |
839 | *ret = atomic_add_int_nv((volatile unsigned int *)val, amount); | |
840 | return 1; | |
841 | } | |
1beca676 | 842 | # endif |
d5e742de | 843 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) |
71a04cfc AG |
844 | return 0; |
845 | ||
846 | *val += amount; | |
847 | *ret = *val; | |
848 | ||
849 | if (!CRYPTO_THREAD_unlock(lock)) | |
850 | return 0; | |
71a04cfc AG |
851 | |
852 | return 1; | |
853 | } | |
854 | ||
d5e742de MC |
855 | int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, |
856 | CRYPTO_RWLOCK *lock) | |
857 | { | |
d39de479 | 858 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
859 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
860 | *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL); | |
861 | return 1; | |
862 | } | |
863 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
864 | /* This will work for all future Solaris versions. */ | |
865 | if (ret != NULL) { | |
866 | *ret = atomic_or_64_nv(val, op); | |
867 | return 1; | |
868 | } | |
869 | # endif | |
870 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) | |
871 | return 0; | |
872 | *val |= op; | |
873 | *ret = *val; | |
874 | ||
875 | if (!CRYPTO_THREAD_unlock(lock)) | |
876 | return 0; | |
877 | ||
878 | return 1; | |
879 | } | |
880 | ||
881 | int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock) | |
882 | { | |
d39de479 | 883 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
884 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
885 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
886 | return 1; | |
887 | } | |
888 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
889 | /* This will work for all future Solaris versions. */ | |
890 | if (ret != NULL) { | |
891 | *ret = atomic_or_64_nv(val, 0); | |
892 | return 1; | |
893 | } | |
894 | # endif | |
895 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
896 | return 0; | |
897 | *ret = *val; | |
898 | if (!CRYPTO_THREAD_unlock(lock)) | |
899 | return 0; | |
900 | ||
901 | return 1; | |
902 | } | |
629b408c HL |
903 | |
904 | int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock) | |
905 | { | |
906 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) | |
907 | if (__atomic_is_lock_free(sizeof(*val), val)) { | |
908 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
909 | return 1; | |
910 | } | |
911 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
912 | /* This will work for all future Solaris versions. */ | |
913 | if (ret != NULL) { | |
914 | *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0); | |
915 | return 1; | |
916 | } | |
917 | # endif | |
918 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
919 | return 0; | |
920 | *ret = *val; | |
921 | if (!CRYPTO_THREAD_unlock(lock)) | |
922 | return 0; | |
923 | ||
924 | return 1; | |
925 | } | |
926 | ||
f844f9eb | 927 | # ifndef FIPS_MODULE |
2915fe19 RS |
928 | int openssl_init_fork_handlers(void) |
929 | { | |
59795962 | 930 | return 1; |
2915fe19 | 931 | } |
f844f9eb | 932 | # endif /* FIPS_MODULE */ |
84952925 DMSP |
933 | |
934 | int openssl_get_fork_id(void) | |
935 | { | |
936 | return getpid(); | |
937 | } | |
71a04cfc | 938 | #endif |