]>
Commit | Line | Data |
---|---|---|
b1322259 | 1 | /* |
b6461792 | 2 | * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved. |
71a04cfc | 3 | * |
0e9725bc | 4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
b1322259 RS |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at | |
7 | * https://www.openssl.org/source/license.html | |
71a04cfc AG |
8 | */ |
9 | ||
9750b4d3 RB |
10 | /* We need to use the OPENSSL_fork_*() deprecated APIs */ |
11 | #define OPENSSL_SUPPRESS_DEPRECATED | |
12 | ||
71a04cfc | 13 | #include <openssl/crypto.h> |
d0e1a0ae | 14 | #include <crypto/cryptlib.h> |
5f8dd0f8 | 15 | #include "internal/cryptlib.h" |
d0e1a0ae NH |
16 | #include "internal/rcu.h" |
17 | #include "rcu_internal.h" | |
71a04cfc | 18 | |
d6dda392 VK |
19 | #if defined(__sun) |
20 | # include <atomic.h> | |
21 | #endif | |
22 | ||
d39de479 KK |
23 | #if defined(__apple_build_version__) && __apple_build_version__ < 6000000 |
24 | /* | |
25 | * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and | |
26 | * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free() | |
27 | * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))). | |
28 | * All of this makes impossible to use __atomic_is_lock_free here. | |
29 | * | |
30 | * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760 | |
31 | */ | |
81f39349 | 32 | # define BROKEN_CLANG_ATOMICS |
d39de479 KK |
33 | #endif |
34 | ||
71a04cfc AG |
35 | #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS) |
36 | ||
84952925 DMSP |
37 | # if defined(OPENSSL_SYS_UNIX) |
38 | # include <sys/types.h> | |
39 | # include <unistd.h> | |
81f39349 | 40 | # endif |
84952925 | 41 | |
0d407456 RB |
42 | # include <assert.h> |
43 | ||
ec93a292 DK |
44 | # ifdef PTHREAD_RWLOCK_INITIALIZER |
45 | # define USE_RWLOCK | |
46 | # endif | |
2accf3f7 | 47 | |
a02077d4 RL |
48 | /* |
49 | * For all GNU/clang atomic builtins, we also need fallbacks, to cover all | |
50 | * other compilers. | |
51 | ||
52 | * Unfortunately, we can't do that with some "generic type", because there's no | |
53 | * guarantee that the chosen generic type is large enough to cover all cases. | |
54 | * Therefore, we implement fallbacks for each applicable type, with composed | |
55 | * names that include the type they handle. | |
56 | * | |
57 | * (an anecdote: we previously tried to use |void *| as the generic type, with | |
58 | * the thought that the pointer itself is the largest type. However, this is | |
59 | * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large) | |
60 | * | |
61 | * All applicable ATOMIC_ macros take the intended type as first parameter, so | |
62 | * they can map to the correct fallback function. In the GNU/clang case, that | |
63 | * parameter is simply ignored. | |
64 | */ | |
65 | ||
66 | /* | |
67 | * Internal types used with the ATOMIC_ macros, to make it possible to compose | |
68 | * fallback function names. | |
69 | */ | |
70 | typedef void *pvoid; | |
71 | typedef struct rcu_cb_item *prcu_cb_item; | |
72 | ||
73 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \ | |
74 | && !defined(USE_ATOMIC_FALLBACKS) | |
81f39349 | 75 | # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) |
f5b5a35c | 76 | /* |
a02077d4 RL |
77 | * For pointers, Apple M1 virtualized cpu seems to have some problem using the |
78 | * ldapr instruction (see https://github.com/openssl/openssl/pull/23974) | |
f5b5a35c NH |
79 | * When using the native apple clang compiler, this instruction is emitted for |
80 | * atomic loads, which is bad. So, if | |
81 | * 1) We are building on a target that defines __APPLE__ AND | |
82 | * 2) We are building on a target using clang (__clang__) AND | |
83 | * 3) We are building for an M1 processor (__aarch64__) | |
84 | * Then we shold not use __atomic_load_n and instead implement our own | |
85 | * function to issue the ldar instruction instead, which procuces the proper | |
86 | * sequencing guarantees | |
87 | */ | |
a02077d4 RL |
88 | static inline void *apple_atomic_load_n_pvoid(void **p, |
89 | ossl_unused int memorder) | |
f5b5a35c NH |
90 | { |
91 | void *ret; | |
92 | ||
93 | __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):); | |
94 | ||
95 | return ret; | |
96 | } | |
97 | ||
a02077d4 RL |
98 | /* For uint64_t, we should be fine, though */ |
99 | # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o) | |
100 | ||
101 | # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o) | |
f5b5a35c | 102 | # else |
a02077d4 | 103 | # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o) |
f5b5a35c | 104 | # endif |
a02077d4 RL |
105 | # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o) |
106 | # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o) | |
107 | # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o) | |
81f39349 RL |
108 | # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o) |
109 | # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o) | |
110 | # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o) | |
111 | # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o) | |
112 | # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o) | |
113 | # else | |
d0e1a0ae NH |
114 | static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER; |
115 | ||
a02077d4 RL |
116 | # define IMPL_fallback_atomic_load_n(t) \ |
117 | static inline t fallback_atomic_load_n_##t(t *p) \ | |
118 | { \ | |
119 | t ret; \ | |
120 | \ | |
121 | pthread_mutex_lock(&atomic_sim_lock); \ | |
122 | ret = *p; \ | |
123 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
124 | return ret; \ | |
125 | } | |
126 | IMPL_fallback_atomic_load_n(uint64_t) | |
127 | IMPL_fallback_atomic_load_n(pvoid) | |
128 | ||
129 | # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p) | |
130 | ||
131 | # define IMPL_fallback_atomic_store_n(t) \ | |
132 | static inline t fallback_atomic_store_n_##t(t *p, t v) \ | |
133 | { \ | |
134 | t ret; \ | |
135 | \ | |
136 | pthread_mutex_lock(&atomic_sim_lock); \ | |
137 | ret = *p; \ | |
138 | *p = v; \ | |
139 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
140 | return ret; \ | |
141 | } | |
142 | IMPL_fallback_atomic_store_n(uint64_t) | |
d0e1a0ae | 143 | |
a02077d4 | 144 | # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v) |
d0e1a0ae | 145 | |
a02077d4 RL |
146 | # define IMPL_fallback_atomic_store(t) \ |
147 | static inline void fallback_atomic_store_##t(t *p, t *v) \ | |
148 | { \ | |
149 | pthread_mutex_lock(&atomic_sim_lock); \ | |
150 | *p = *v; \ | |
151 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
152 | } | |
153 | IMPL_fallback_atomic_store(uint64_t) | |
154 | IMPL_fallback_atomic_store(pvoid) | |
155 | ||
156 | # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v) | |
157 | ||
158 | # define IMPL_fallback_atomic_exchange_n(t) \ | |
159 | static inline t fallback_atomic_exchange_n_##t(t *p, t v) \ | |
160 | { \ | |
161 | t ret; \ | |
162 | \ | |
163 | pthread_mutex_lock(&atomic_sim_lock); \ | |
164 | ret = *p; \ | |
165 | *p = v; \ | |
166 | pthread_mutex_unlock(&atomic_sim_lock); \ | |
167 | return ret; \ | |
168 | } | |
169 | IMPL_fallback_atomic_exchange_n(uint64_t) | |
170 | IMPL_fallback_atomic_exchange_n(prcu_cb_item) | |
d0e1a0ae | 171 | |
a02077d4 | 172 | # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v) |
d0e1a0ae | 173 | |
a02077d4 RL |
174 | /* |
175 | * The fallbacks that follow don't need any per type implementation, as | |
176 | * they are designed for uint64_t only. If there comes a time when multiple | |
177 | * types need to be covered, it's relatively easy to refactor them the same | |
178 | * way as the fallbacks above. | |
179 | */ | |
d0e1a0ae NH |
180 | |
181 | static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v) | |
182 | { | |
183 | uint64_t ret; | |
184 | ||
185 | pthread_mutex_lock(&atomic_sim_lock); | |
186 | *p += v; | |
187 | ret = *p; | |
188 | pthread_mutex_unlock(&atomic_sim_lock); | |
189 | return ret; | |
190 | } | |
191 | ||
81f39349 | 192 | # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v) |
d0e1a0ae NH |
193 | |
194 | static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v) | |
195 | { | |
196 | uint64_t ret; | |
197 | ||
198 | pthread_mutex_lock(&atomic_sim_lock); | |
199 | ret = *p; | |
200 | *p += v; | |
201 | pthread_mutex_unlock(&atomic_sim_lock); | |
202 | return ret; | |
203 | } | |
204 | ||
81f39349 | 205 | # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v) |
d0e1a0ae NH |
206 | |
207 | static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v) | |
208 | { | |
209 | uint64_t ret; | |
210 | ||
211 | pthread_mutex_lock(&atomic_sim_lock); | |
212 | *p -= v; | |
213 | ret = *p; | |
214 | pthread_mutex_unlock(&atomic_sim_lock); | |
215 | return ret; | |
216 | } | |
217 | ||
81f39349 | 218 | # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v) |
d0e1a0ae NH |
219 | |
220 | static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m) | |
221 | { | |
222 | uint64_t ret; | |
223 | ||
224 | pthread_mutex_lock(&atomic_sim_lock); | |
225 | *p &= m; | |
226 | ret = *p; | |
227 | pthread_mutex_unlock(&atomic_sim_lock); | |
228 | return ret; | |
229 | } | |
230 | ||
81f39349 | 231 | # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v) |
d0e1a0ae NH |
232 | |
233 | static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m) | |
234 | { | |
235 | uint64_t ret; | |
236 | ||
237 | pthread_mutex_lock(&atomic_sim_lock); | |
238 | *p |= m; | |
239 | ret = *p; | |
240 | pthread_mutex_unlock(&atomic_sim_lock); | |
241 | return ret; | |
242 | } | |
243 | ||
81f39349 RL |
244 | # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v) |
245 | # endif | |
d0e1a0ae NH |
246 | |
247 | static CRYPTO_THREAD_LOCAL rcu_thr_key; | |
248 | ||
249 | /* | |
250 | * users is broken up into 2 parts | |
251 | * bits 0-15 current readers | |
252 | * bit 32-63 - ID | |
253 | */ | |
254 | # define READER_SHIFT 0 | |
255 | # define ID_SHIFT 32 | |
256 | # define READER_SIZE 16 | |
257 | # define ID_SIZE 32 | |
258 | ||
259 | # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1) | |
260 | # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1) | |
261 | # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK) | |
262 | # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK) | |
263 | # define VAL_READER ((uint64_t)1 << READER_SHIFT) | |
264 | # define VAL_ID(x) ((uint64_t)x << ID_SHIFT) | |
265 | ||
266 | /* | |
267 | * This is the core of an rcu lock. It tracks the readers and writers for the | |
268 | * current quiescence point for a given lock. Users is the 64 bit value that | |
269 | * stores the READERS/ID as defined above | |
270 | * | |
271 | */ | |
272 | struct rcu_qp { | |
273 | uint64_t users; | |
274 | }; | |
275 | ||
276 | struct thread_qp { | |
277 | struct rcu_qp *qp; | |
278 | unsigned int depth; | |
279 | CRYPTO_RCU_LOCK *lock; | |
280 | }; | |
281 | ||
81f39349 | 282 | # define MAX_QPS 10 |
d0e1a0ae NH |
283 | /* |
284 | * This is the per thread tracking data | |
285 | * that is assigned to each thread participating | |
286 | * in an rcu qp | |
287 | * | |
288 | * qp points to the qp that it last acquired | |
289 | * | |
290 | */ | |
291 | struct rcu_thr_data { | |
292 | struct thread_qp thread_qps[MAX_QPS]; | |
293 | }; | |
294 | ||
295 | /* | |
296 | * This is the internal version of a CRYPTO_RCU_LOCK | |
297 | * it is cast from CRYPTO_RCU_LOCK | |
298 | */ | |
299 | struct rcu_lock_st { | |
300 | /* Callbacks to call for next ossl_synchronize_rcu */ | |
301 | struct rcu_cb_item *cb_items; | |
302 | ||
303 | /* rcu generation counter for in-order retirement */ | |
304 | uint32_t id_ctr; | |
305 | ||
306 | /* Array of quiescent points for synchronization */ | |
307 | struct rcu_qp *qp_group; | |
308 | ||
309 | /* Number of elements in qp_group array */ | |
310 | size_t group_count; | |
311 | ||
312 | /* Index of the current qp in the qp_group array */ | |
313 | uint64_t reader_idx; | |
314 | ||
315 | /* value of the next id_ctr value to be retired */ | |
316 | uint32_t next_to_retire; | |
317 | ||
318 | /* index of the next free rcu_qp in the qp_group */ | |
319 | uint64_t current_alloc_idx; | |
320 | ||
321 | /* number of qp's in qp_group array currently being retired */ | |
322 | uint32_t writers_alloced; | |
323 | ||
324 | /* lock protecting write side operations */ | |
325 | pthread_mutex_t write_lock; | |
326 | ||
327 | /* lock protecting updates to writers_alloced/current_alloc_idx */ | |
328 | pthread_mutex_t alloc_lock; | |
329 | ||
330 | /* signal to wake threads waiting on alloc_lock */ | |
331 | pthread_cond_t alloc_signal; | |
332 | ||
333 | /* lock to enforce in-order retirement */ | |
334 | pthread_mutex_t prior_lock; | |
335 | ||
336 | /* signal to wake threads waiting on prior_lock */ | |
337 | pthread_cond_t prior_signal; | |
338 | }; | |
339 | ||
340 | /* | |
341 | * Called on thread exit to free the pthread key | |
342 | * associated with this thread, if any | |
343 | */ | |
344 | static void free_rcu_thr_data(void *ptr) | |
345 | { | |
346 | struct rcu_thr_data *data = | |
347 | (struct rcu_thr_data *)CRYPTO_THREAD_get_local(&rcu_thr_key); | |
348 | ||
349 | OPENSSL_free(data); | |
350 | CRYPTO_THREAD_set_local(&rcu_thr_key, NULL); | |
351 | } | |
352 | ||
353 | static void ossl_rcu_init(void) | |
354 | { | |
355 | CRYPTO_THREAD_init_local(&rcu_thr_key, NULL); | |
356 | } | |
357 | ||
358 | /* Read side acquisition of the current qp */ | |
359 | static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) | |
360 | { | |
361 | uint64_t qp_idx; | |
362 | ||
363 | /* get the current qp index */ | |
364 | for (;;) { | |
365 | /* | |
366 | * Notes on use of __ATOMIC_ACQUIRE | |
367 | * We need to ensure the following: | |
368 | * 1) That subsequent operations aren't optimized by hoisting them above | |
369 | * this operation. Specifically, we don't want the below re-load of | |
370 | * qp_idx to get optimized away | |
371 | * 2) We want to ensure that any updating of reader_idx on the write side | |
372 | * of the lock is flushed from a local cpu cache so that we see any | |
373 | * updates prior to the load. This is a non-issue on cache coherent | |
374 | * systems like x86, but is relevant on other arches | |
375 | * Note: This applies to the reload below as well | |
376 | */ | |
a02077d4 | 377 | qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
378 | |
379 | /* | |
380 | * Notes of use of __ATOMIC_RELEASE | |
381 | * This counter is only read by the write side of the lock, and so we | |
382 | * specify __ATOMIC_RELEASE here to ensure that the write side of the | |
383 | * lock see this during the spin loop read of users, as it waits for the | |
384 | * reader count to approach zero | |
385 | */ | |
386 | ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
387 | __ATOMIC_RELEASE); | |
388 | ||
389 | /* if the idx hasn't changed, we're good, else try again */ | |
a02077d4 | 390 | if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE)) |
d0e1a0ae NH |
391 | break; |
392 | ||
393 | /* | |
394 | * Notes on use of __ATOMIC_RELEASE | |
395 | * As with the add above, we want to ensure that this decrement is | |
396 | * seen by the write side of the lock as soon as it happens to prevent | |
397 | * undue spinning waiting for write side completion | |
398 | */ | |
399 | ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, | |
400 | __ATOMIC_RELEASE); | |
401 | } | |
402 | ||
403 | return &lock->qp_group[qp_idx]; | |
404 | } | |
405 | ||
406 | void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock) | |
407 | { | |
408 | struct rcu_thr_data *data; | |
409 | int i, available_qp = -1; | |
410 | ||
411 | /* | |
412 | * we're going to access current_qp here so ask the | |
413 | * processor to fetch it | |
414 | */ | |
415 | data = CRYPTO_THREAD_get_local(&rcu_thr_key); | |
416 | ||
417 | if (data == NULL) { | |
418 | data = OPENSSL_zalloc(sizeof(*data)); | |
419 | OPENSSL_assert(data != NULL); | |
420 | CRYPTO_THREAD_set_local(&rcu_thr_key, data); | |
421 | ossl_init_thread_start(NULL, NULL, free_rcu_thr_data); | |
422 | } | |
423 | ||
424 | for (i = 0; i < MAX_QPS; i++) { | |
425 | if (data->thread_qps[i].qp == NULL && available_qp == -1) | |
426 | available_qp = i; | |
427 | /* If we have a hold on this lock already, we're good */ | |
428 | if (data->thread_qps[i].lock == lock) { | |
429 | data->thread_qps[i].depth++; | |
430 | return; | |
431 | } | |
432 | } | |
433 | ||
434 | /* | |
435 | * if we get here, then we don't have a hold on this lock yet | |
436 | */ | |
437 | assert(available_qp != -1); | |
438 | ||
439 | data->thread_qps[available_qp].qp = get_hold_current_qp(lock); | |
440 | data->thread_qps[available_qp].depth = 1; | |
441 | data->thread_qps[available_qp].lock = lock; | |
442 | } | |
443 | ||
444 | void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) | |
445 | { | |
446 | int i; | |
447 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(&rcu_thr_key); | |
448 | uint64_t ret; | |
449 | ||
450 | assert(data != NULL); | |
451 | ||
452 | for (i = 0; i < MAX_QPS; i++) { | |
453 | if (data->thread_qps[i].lock == lock) { | |
454 | /* | |
455 | * As with read side acquisition, we use __ATOMIC_RELEASE here | |
456 | * to ensure that the decrement is published immediately | |
457 | * to any write side waiters | |
458 | */ | |
459 | data->thread_qps[i].depth--; | |
460 | if (data->thread_qps[i].depth == 0) { | |
461 | ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER, | |
462 | __ATOMIC_RELEASE); | |
463 | OPENSSL_assert(ret != UINT64_MAX); | |
464 | data->thread_qps[i].qp = NULL; | |
465 | data->thread_qps[i].lock = NULL; | |
466 | } | |
467 | return; | |
468 | } | |
469 | } | |
470 | /* | |
39fe3e5d DP |
471 | * If we get here, we're trying to unlock a lock that we never acquired - |
472 | * that's fatal. | |
d0e1a0ae NH |
473 | */ |
474 | assert(0); | |
475 | } | |
476 | ||
477 | /* | |
478 | * Write side allocation routine to get the current qp | |
479 | * and replace it with a new one | |
480 | */ | |
481 | static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) | |
482 | { | |
483 | uint64_t new_id; | |
484 | uint64_t current_idx; | |
485 | ||
486 | pthread_mutex_lock(&lock->alloc_lock); | |
487 | ||
488 | /* | |
489 | * we need at least one qp to be available with one | |
490 | * left over, so that readers can start working on | |
491 | * one that isn't yet being waited on | |
492 | */ | |
493 | while (lock->group_count - lock->writers_alloced < 2) | |
494 | /* we have to wait for one to be free */ | |
495 | pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock); | |
496 | ||
497 | current_idx = lock->current_alloc_idx; | |
498 | ||
499 | /* Allocate the qp */ | |
500 | lock->writers_alloced++; | |
501 | ||
502 | /* increment the allocation index */ | |
503 | lock->current_alloc_idx = | |
504 | (lock->current_alloc_idx + 1) % lock->group_count; | |
505 | ||
506 | /* get and insert a new id */ | |
507 | new_id = lock->id_ctr; | |
508 | lock->id_ctr++; | |
509 | ||
510 | new_id = VAL_ID(new_id); | |
511 | /* | |
512 | * Even though we are under a write side lock here | |
513 | * We need to use atomic instructions to ensure that the results | |
514 | * of this update are published to the read side prior to updating the | |
515 | * reader idx below | |
516 | */ | |
517 | ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK, | |
518 | __ATOMIC_RELEASE); | |
519 | ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id, | |
520 | __ATOMIC_RELEASE); | |
521 | ||
522 | /* | |
39fe3e5d | 523 | * Update the reader index to be the prior qp. |
d0e1a0ae | 524 | * Note the use of __ATOMIC_RELEASE here is based on the corresponding use |
f7241edd | 525 | * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication |
d0e1a0ae NH |
526 | * of this value to be seen on the read side immediately after it happens |
527 | */ | |
a02077d4 | 528 | ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx, |
d0e1a0ae NH |
529 | __ATOMIC_RELEASE); |
530 | ||
531 | /* wake up any waiters */ | |
532 | pthread_cond_signal(&lock->alloc_signal); | |
533 | pthread_mutex_unlock(&lock->alloc_lock); | |
534 | return &lock->qp_group[current_idx]; | |
535 | } | |
536 | ||
537 | static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp) | |
538 | { | |
539 | pthread_mutex_lock(&lock->alloc_lock); | |
540 | lock->writers_alloced--; | |
541 | pthread_cond_signal(&lock->alloc_signal); | |
542 | pthread_mutex_unlock(&lock->alloc_lock); | |
543 | } | |
544 | ||
545 | static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock, | |
546 | int count) | |
547 | { | |
548 | struct rcu_qp *new = | |
549 | OPENSSL_zalloc(sizeof(*new) * count); | |
550 | ||
551 | lock->group_count = count; | |
552 | return new; | |
553 | } | |
554 | ||
555 | void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock) | |
556 | { | |
557 | pthread_mutex_lock(&lock->write_lock); | |
558 | } | |
559 | ||
560 | void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock) | |
561 | { | |
562 | pthread_mutex_unlock(&lock->write_lock); | |
563 | } | |
564 | ||
565 | void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) | |
566 | { | |
567 | struct rcu_qp *qp; | |
568 | uint64_t count; | |
569 | struct rcu_cb_item *cb_items, *tmpcb; | |
570 | ||
571 | /* | |
572 | * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published | |
573 | * writes before we read, and publish our write immediately | |
574 | */ | |
a02077d4 RL |
575 | cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL, |
576 | __ATOMIC_ACQ_REL); | |
d0e1a0ae NH |
577 | |
578 | qp = update_qp(lock); | |
579 | ||
580 | /* | |
581 | * wait for the reader count to reach zero | |
582 | * Note the use of __ATOMIC_ACQUIRE here to ensure that any | |
583 | * prior __ATOMIC_RELEASE write operation in get_hold_current_qp | |
584 | * is visible prior to our read | |
585 | */ | |
586 | do { | |
a02077d4 | 587 | count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
588 | } while (READER_COUNT(count) != 0); |
589 | ||
590 | /* retire in order */ | |
591 | pthread_mutex_lock(&lock->prior_lock); | |
592 | while (lock->next_to_retire != ID_VAL(count)) | |
593 | pthread_cond_wait(&lock->prior_signal, &lock->prior_lock); | |
594 | lock->next_to_retire++; | |
595 | pthread_cond_broadcast(&lock->prior_signal); | |
596 | pthread_mutex_unlock(&lock->prior_lock); | |
597 | ||
598 | retire_qp(lock, qp); | |
599 | ||
600 | /* handle any callbacks that we have */ | |
601 | while (cb_items != NULL) { | |
602 | tmpcb = cb_items; | |
603 | cb_items = cb_items->next; | |
604 | tmpcb->fn(tmpcb->data); | |
605 | OPENSSL_free(tmpcb); | |
606 | } | |
607 | } | |
608 | ||
609 | int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data) | |
610 | { | |
611 | struct rcu_cb_item *new = | |
612 | OPENSSL_zalloc(sizeof(*new)); | |
613 | ||
614 | if (new == NULL) | |
615 | return 0; | |
616 | ||
617 | new->data = data; | |
618 | new->fn = cb; | |
619 | /* | |
620 | * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this | |
621 | * list are visible to us prior to reading, and publish the new value | |
622 | * immediately | |
623 | */ | |
a02077d4 RL |
624 | new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new, |
625 | __ATOMIC_ACQ_REL); | |
d0e1a0ae NH |
626 | |
627 | return 1; | |
628 | } | |
629 | ||
630 | void *ossl_rcu_uptr_deref(void **p) | |
631 | { | |
a02077d4 | 632 | return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE); |
d0e1a0ae NH |
633 | } |
634 | ||
635 | void ossl_rcu_assign_uptr(void **p, void **v) | |
636 | { | |
a02077d4 | 637 | ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE); |
d0e1a0ae NH |
638 | } |
639 | ||
640 | static CRYPTO_ONCE rcu_init_once = CRYPTO_ONCE_STATIC_INIT; | |
641 | ||
642 | CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers) | |
643 | { | |
644 | struct rcu_lock_st *new; | |
645 | ||
646 | if (!CRYPTO_THREAD_run_once(&rcu_init_once, ossl_rcu_init)) | |
647 | return NULL; | |
648 | ||
649 | if (num_writers < 1) | |
650 | num_writers = 1; | |
651 | ||
652 | new = OPENSSL_zalloc(sizeof(*new)); | |
653 | if (new == NULL) | |
654 | return NULL; | |
655 | ||
656 | pthread_mutex_init(&new->write_lock, NULL); | |
657 | pthread_mutex_init(&new->prior_lock, NULL); | |
658 | pthread_mutex_init(&new->alloc_lock, NULL); | |
659 | pthread_cond_init(&new->prior_signal, NULL); | |
660 | pthread_cond_init(&new->alloc_signal, NULL); | |
661 | new->qp_group = allocate_new_qp_group(new, num_writers + 1); | |
662 | if (new->qp_group == NULL) { | |
663 | OPENSSL_free(new); | |
664 | new = NULL; | |
665 | } | |
666 | return new; | |
667 | } | |
668 | ||
669 | void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) | |
670 | { | |
671 | struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock; | |
672 | ||
673 | if (lock == NULL) | |
674 | return; | |
675 | ||
676 | /* make sure we're synchronized */ | |
677 | ossl_synchronize_rcu(rlock); | |
678 | ||
679 | OPENSSL_free(rlock->qp_group); | |
680 | /* There should only be a single qp left now */ | |
681 | OPENSSL_free(rlock); | |
682 | } | |
683 | ||
71a04cfc AG |
684 | CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) |
685 | { | |
ec93a292 | 686 | # ifdef USE_RWLOCK |
7de2b9c4 RS |
687 | CRYPTO_RWLOCK *lock; |
688 | ||
d0e1a0ae | 689 | if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL) |
7de2b9c4 | 690 | /* Don't set error, to avoid recursion blowup. */ |
71a04cfc AG |
691 | return NULL; |
692 | ||
0b2fc928 F |
693 | if (pthread_rwlock_init(lock, NULL) != 0) { |
694 | OPENSSL_free(lock); | |
71a04cfc | 695 | return NULL; |
0b2fc928 | 696 | } |
ec93a292 DK |
697 | # else |
698 | pthread_mutexattr_t attr; | |
7de2b9c4 RS |
699 | CRYPTO_RWLOCK *lock; |
700 | ||
d0e1a0ae | 701 | if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL) |
7de2b9c4 | 702 | /* Don't set error, to avoid recursion blowup. */ |
2accf3f7 DK |
703 | return NULL; |
704 | ||
e60147fe RS |
705 | /* |
706 | * We don't use recursive mutexes, but try to catch errors if we do. | |
707 | */ | |
2accf3f7 | 708 | pthread_mutexattr_init(&attr); |
6870c1e7 RB |
709 | # if !defined (__TANDEM) && !defined (_SPT_MODEL_) |
710 | # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK) | |
e60147fe | 711 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); |
6870c1e7 RB |
712 | # endif |
713 | # else | |
714 | /* The SPT Thread Library does not define MUTEX attributes. */ | |
e60147fe | 715 | # endif |
5d5eed44 | 716 | |
2accf3f7 DK |
717 | if (pthread_mutex_init(lock, &attr) != 0) { |
718 | pthread_mutexattr_destroy(&attr); | |
719 | OPENSSL_free(lock); | |
720 | return NULL; | |
721 | } | |
5d5eed44 | 722 | |
2accf3f7 | 723 | pthread_mutexattr_destroy(&attr); |
ec93a292 | 724 | # endif |
71a04cfc AG |
725 | |
726 | return lock; | |
727 | } | |
728 | ||
cd3f8c1b | 729 | __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 730 | { |
ec93a292 | 731 | # ifdef USE_RWLOCK |
71a04cfc AG |
732 | if (pthread_rwlock_rdlock(lock) != 0) |
733 | return 0; | |
ec93a292 | 734 | # else |
e60147fe RS |
735 | if (pthread_mutex_lock(lock) != 0) { |
736 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 737 | return 0; |
e60147fe | 738 | } |
ec93a292 | 739 | # endif |
71a04cfc AG |
740 | |
741 | return 1; | |
742 | } | |
743 | ||
cd3f8c1b | 744 | __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock) |
71a04cfc | 745 | { |
ec93a292 | 746 | # ifdef USE_RWLOCK |
71a04cfc AG |
747 | if (pthread_rwlock_wrlock(lock) != 0) |
748 | return 0; | |
ec93a292 | 749 | # else |
e60147fe RS |
750 | if (pthread_mutex_lock(lock) != 0) { |
751 | assert(errno != EDEADLK && errno != EBUSY); | |
2accf3f7 | 752 | return 0; |
e60147fe | 753 | } |
ec93a292 | 754 | # endif |
71a04cfc AG |
755 | |
756 | return 1; | |
757 | } | |
758 | ||
759 | int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock) | |
760 | { | |
ec93a292 | 761 | # ifdef USE_RWLOCK |
71a04cfc AG |
762 | if (pthread_rwlock_unlock(lock) != 0) |
763 | return 0; | |
ec93a292 | 764 | # else |
e60147fe RS |
765 | if (pthread_mutex_unlock(lock) != 0) { |
766 | assert(errno != EPERM); | |
2accf3f7 | 767 | return 0; |
e60147fe | 768 | } |
ec93a292 | 769 | # endif |
71a04cfc AG |
770 | |
771 | return 1; | |
772 | } | |
773 | ||
774 | void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) | |
775 | { | |
776 | if (lock == NULL) | |
777 | return; | |
778 | ||
ec93a292 | 779 | # ifdef USE_RWLOCK |
71a04cfc | 780 | pthread_rwlock_destroy(lock); |
ec93a292 | 781 | # else |
2accf3f7 | 782 | pthread_mutex_destroy(lock); |
ec93a292 | 783 | # endif |
71a04cfc AG |
784 | OPENSSL_free(lock); |
785 | ||
786 | return; | |
787 | } | |
788 | ||
789 | int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)) | |
790 | { | |
791 | if (pthread_once(once, init) != 0) | |
792 | return 0; | |
793 | ||
794 | return 1; | |
795 | } | |
796 | ||
797 | int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)) | |
798 | { | |
799 | if (pthread_key_create(key, cleanup) != 0) | |
800 | return 0; | |
801 | ||
802 | return 1; | |
803 | } | |
804 | ||
805 | void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key) | |
806 | { | |
807 | return pthread_getspecific(*key); | |
808 | } | |
809 | ||
810 | int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val) | |
811 | { | |
812 | if (pthread_setspecific(*key, val) != 0) | |
813 | return 0; | |
814 | ||
815 | return 1; | |
816 | } | |
817 | ||
818 | int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key) | |
819 | { | |
820 | if (pthread_key_delete(*key) != 0) | |
821 | return 0; | |
822 | ||
823 | return 1; | |
824 | } | |
825 | ||
826 | CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void) | |
827 | { | |
828 | return pthread_self(); | |
829 | } | |
830 | ||
831 | int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b) | |
832 | { | |
833 | return pthread_equal(a, b); | |
834 | } | |
835 | ||
836 | int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) | |
837 | { | |
d39de479 | 838 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
1beca676 RL |
839 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
840 | *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL); | |
841 | return 1; | |
842 | } | |
d6dda392 VK |
843 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
844 | /* This will work for all future Solaris versions. */ | |
845 | if (ret != NULL) { | |
846 | *ret = atomic_add_int_nv((volatile unsigned int *)val, amount); | |
847 | return 1; | |
848 | } | |
1beca676 | 849 | # endif |
d5e742de | 850 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) |
71a04cfc AG |
851 | return 0; |
852 | ||
853 | *val += amount; | |
854 | *ret = *val; | |
855 | ||
856 | if (!CRYPTO_THREAD_unlock(lock)) | |
857 | return 0; | |
71a04cfc AG |
858 | |
859 | return 1; | |
860 | } | |
861 | ||
d5e742de MC |
862 | int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, |
863 | CRYPTO_RWLOCK *lock) | |
864 | { | |
d39de479 | 865 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
866 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
867 | *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL); | |
868 | return 1; | |
869 | } | |
870 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
871 | /* This will work for all future Solaris versions. */ | |
872 | if (ret != NULL) { | |
873 | *ret = atomic_or_64_nv(val, op); | |
874 | return 1; | |
875 | } | |
876 | # endif | |
877 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) | |
878 | return 0; | |
879 | *val |= op; | |
880 | *ret = *val; | |
881 | ||
882 | if (!CRYPTO_THREAD_unlock(lock)) | |
883 | return 0; | |
884 | ||
885 | return 1; | |
886 | } | |
887 | ||
888 | int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock) | |
889 | { | |
d39de479 | 890 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
d5e742de MC |
891 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
892 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
893 | return 1; | |
894 | } | |
895 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
896 | /* This will work for all future Solaris versions. */ | |
897 | if (ret != NULL) { | |
898 | *ret = atomic_or_64_nv(val, 0); | |
899 | return 1; | |
900 | } | |
901 | # endif | |
902 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
903 | return 0; | |
904 | *ret = *val; | |
905 | if (!CRYPTO_THREAD_unlock(lock)) | |
906 | return 0; | |
907 | ||
908 | return 1; | |
909 | } | |
629b408c HL |
910 | |
911 | int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock) | |
912 | { | |
913 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) | |
914 | if (__atomic_is_lock_free(sizeof(*val), val)) { | |
915 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); | |
916 | return 1; | |
917 | } | |
918 | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) | |
919 | /* This will work for all future Solaris versions. */ | |
920 | if (ret != NULL) { | |
921 | *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0); | |
922 | return 1; | |
923 | } | |
924 | # endif | |
925 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) | |
926 | return 0; | |
927 | *ret = *val; | |
928 | if (!CRYPTO_THREAD_unlock(lock)) | |
929 | return 0; | |
930 | ||
931 | return 1; | |
932 | } | |
933 | ||
f844f9eb | 934 | # ifndef FIPS_MODULE |
2915fe19 RS |
935 | int openssl_init_fork_handlers(void) |
936 | { | |
59795962 | 937 | return 1; |
2915fe19 | 938 | } |
f844f9eb | 939 | # endif /* FIPS_MODULE */ |
84952925 DMSP |
940 | |
941 | int openssl_get_fork_id(void) | |
942 | { | |
943 | return getpid(); | |
944 | } | |
71a04cfc | 945 | #endif |