]>
Commit | Line | Data |
---|---|---|
042e1521 | 1 | /* sem_waitcommon -- wait on a semaphore, shared code. |
6d7e8eda | 2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
042e1521 | 3 | This file is part of the GNU C Library. |
042e1521 CD |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
042e1521 | 18 | |
c2f5813a | 19 | #include <kernel-features.h> |
042e1521 CD |
20 | #include <errno.h> |
21 | #include <sysdep.h> | |
a2f0363f | 22 | #include <futex-internal.h> |
042e1521 CD |
23 | #include <internaltypes.h> |
24 | #include <semaphore.h> | |
25 | #include <sys/time.h> | |
26 | ||
27 | #include <pthreadP.h> | |
28 | #include <shlib-compat.h> | |
29 | #include <atomic.h> | |
30 | ||
042e1521 CD |
31 | |
32 | /* The semaphore provides two main operations: sem_post adds a token to the | |
33 | semaphore; sem_wait grabs a token from the semaphore, potentially waiting | |
34 | until there is a token available. A sem_wait needs to synchronize with | |
35 | the sem_post that provided the token, so that whatever lead to the sem_post | |
36 | happens before the code after sem_wait. | |
37 | ||
38 | Conceptually, available tokens can simply be counted; let's call that the | |
39 | value of the semaphore. However, we also want to know whether there might | |
40 | be a sem_wait that is blocked on the value because it was zero (using a | |
41 | futex with the value being the futex variable); if there is no blocked | |
42 | sem_wait, sem_post does not need to execute a futex_wake call. Therefore, | |
43 | we also need to count the number of potentially blocked sem_wait calls | |
44 | (which we call nwaiters). | |
45 | ||
46 | What makes this tricky is that POSIX requires that a semaphore can be | |
47 | destroyed as soon as the last remaining sem_wait has returned, and no | |
48 | other sem_wait or sem_post calls are executing concurrently. However, the | |
49 | sem_post call whose token was consumed by the last sem_wait is considered | |
50 | to have finished once it provided the token to the sem_wait. | |
51 | Thus, sem_post must not access the semaphore struct anymore after it has | |
52 | made a token available; IOW, it needs to be able to atomically provide | |
53 | a token and check whether any blocked sem_wait calls might exist. | |
54 | ||
55 | This is straightforward to do if the architecture provides 64b atomics | |
56 | because we can just put both the value and nwaiters into one variable that | |
57 | we access atomically: This is the data field, the value is in the | |
58 | least-significant 32 bits, and nwaiters in the other bits. When sem_post | |
59 | makes a value available, it can atomically check nwaiters. | |
60 | ||
61 | If we have only 32b atomics available, we cannot put both nwaiters and | |
62 | value into one 32b value because then we might have too few bits for both | |
63 | of those counters. Therefore, we need to use two distinct fields. | |
64 | ||
65 | To allow sem_post to atomically make a token available and check for | |
66 | blocked sem_wait calls, we use one bit in value to indicate whether | |
67 | nwaiters is nonzero. That allows sem_post to use basically the same | |
68 | algorithm as with 64b atomics, but requires sem_wait to update the bit; it | |
69 | can't do this atomically with another access to nwaiters, but it can compute | |
70 | a conservative value for the bit because it's benign if the bit is set | |
71 | even if nwaiters is zero (all we get is an unnecessary futex wake call by | |
72 | sem_post). | |
73 | Specifically, sem_wait will unset the bit speculatively if it believes that | |
74 | there is no other concurrently executing sem_wait. If it misspeculated, | |
75 | it will have to clean up by waking any other sem_wait call (i.e., what | |
76 | sem_post would do otherwise). This does not conflict with the destruction | |
77 | requirement because the semaphore must not be destructed while any sem_wait | |
78 | is still executing. */ | |
79 | ||
042e1521 CD |
80 | #if !__HAVE_64B_ATOMICS |
81 | static void | |
82 | __sem_wait_32_finish (struct new_sem *sem); | |
83 | #endif | |
84 | ||
85 | static void | |
86 | __sem_wait_cleanup (void *arg) | |
87 | { | |
88 | struct new_sem *sem = (struct new_sem *) arg; | |
89 | ||
90 | #if __HAVE_64B_ATOMICS | |
91 | /* Stop being registered as a waiter. See below for MO. */ | |
22971c35 | 92 | atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
042e1521 CD |
93 | #else |
94 | __sem_wait_32_finish (sem); | |
95 | #endif | |
96 | } | |
97 | ||
98 | /* Wait until at least one token is available, possibly with a timeout. | |
99 | This is in a separate function in order to make sure gcc | |
100 | puts the call site into an exception region, and thus the | |
101 | cleanups get properly run. TODO still necessary? Other futex_wait | |
102 | users don't seem to need it. */ | |
103 | static int | |
104 | __attribute__ ((noinline)) | |
6615f779 | 105 | do_futex_wait (struct new_sem *sem, clockid_t clockid, |
b8d3e8fb | 106 | const struct __timespec64 *abstime) |
042e1521 CD |
107 | { |
108 | int err; | |
109 | ||
110 | #if __HAVE_64B_ATOMICS | |
b8d3e8fb | 111 | err = __futex_abstimed_wait_cancelable64 ( |
99d01ffc | 112 | (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, |
6615f779 | 113 | clockid, abstime, |
a2f0363f | 114 | sem->private); |
042e1521 | 115 | #else |
b8d3e8fb LM |
116 | err = __futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK, |
117 | clockid, abstime, sem->private); | |
042e1521 CD |
118 | #endif |
119 | ||
120 | return err; | |
121 | } | |
122 | ||
123 | /* Fast path: Try to grab a token without blocking. */ | |
124 | static int | |
125 | __new_sem_wait_fast (struct new_sem *sem, int definitive_result) | |
126 | { | |
127 | /* We need acquire MO if we actually grab a token, so that this | |
128 | synchronizes with all token providers (i.e., the RMW operation we read | |
129 | from or all those before it in modification order; also see sem_post). | |
130 | We do not need to guarantee any ordering if we observed that there is | |
131 | no token (POSIX leaves it unspecified whether functions that fail | |
132 | synchronize memory); thus, relaxed MO is sufficient for the initial load | |
133 | and the failure path of the CAS. If the weak CAS fails and we need a | |
134 | definitive result, retry. */ | |
135 | #if __HAVE_64B_ATOMICS | |
e0da28a1 | 136 | uint64_t d = atomic_load_relaxed (&sem->data); |
042e1521 CD |
137 | do |
138 | { | |
139 | if ((d & SEM_VALUE_MASK) == 0) | |
140 | break; | |
141 | if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1)) | |
142 | return 0; | |
143 | } | |
144 | while (definitive_result); | |
145 | return -1; | |
146 | #else | |
147 | unsigned int v = atomic_load_relaxed (&sem->value); | |
148 | do | |
149 | { | |
150 | if ((v >> SEM_VALUE_SHIFT) == 0) | |
151 | break; | |
152 | if (atomic_compare_exchange_weak_acquire (&sem->value, | |
153 | &v, v - (1 << SEM_VALUE_SHIFT))) | |
154 | return 0; | |
155 | } | |
156 | while (definitive_result); | |
157 | return -1; | |
158 | #endif | |
159 | } | |
160 | ||
161 | /* Slow path that blocks. */ | |
162 | static int | |
163 | __attribute__ ((noinline)) | |
b8d3e8fb LM |
164 | __new_sem_wait_slow64 (struct new_sem *sem, clockid_t clockid, |
165 | const struct __timespec64 *abstime) | |
042e1521 CD |
166 | { |
167 | int err = 0; | |
168 | ||
169 | #if __HAVE_64B_ATOMICS | |
170 | /* Add a waiter. Relaxed MO is sufficient because we can rely on the | |
171 | ordering provided by the RMW operations we use. */ | |
22971c35 L |
172 | uint64_t d = atomic_fetch_add_relaxed (&sem->data, |
173 | (uint64_t) 1 << SEM_NWAITERS_SHIFT); | |
042e1521 CD |
174 | |
175 | pthread_cleanup_push (__sem_wait_cleanup, sem); | |
176 | ||
177 | /* Wait for a token to be available. Retry until we can grab one. */ | |
178 | for (;;) | |
179 | { | |
180 | /* If there is no token available, sleep until there is. */ | |
181 | if ((d & SEM_VALUE_MASK) == 0) | |
182 | { | |
6615f779 | 183 | err = do_futex_wait (sem, clockid, abstime); |
042e1521 CD |
184 | /* A futex return value of 0 or EAGAIN is due to a real or spurious |
185 | wake-up, or due to a change in the number of tokens. We retry in | |
186 | these cases. | |
187 | If we timed out, forward this to the caller. | |
1aa8d144 TR |
188 | EINTR is returned if we are interrupted by a signal; we |
189 | forward this to the caller. (See futex_wait and related | |
190 | documentation. Before Linux 2.6.22, EINTR was also returned on | |
191 | spurious wake-ups; we only support more recent Linux versions, | |
192 | so do not need to consider this here.) */ | |
aa69f19a | 193 | if (err == ETIMEDOUT || err == EINTR || err == EOVERFLOW) |
042e1521 CD |
194 | { |
195 | __set_errno (err); | |
196 | err = -1; | |
197 | /* Stop being registered as a waiter. */ | |
198 | atomic_fetch_add_relaxed (&sem->data, | |
22971c35 | 199 | -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
042e1521 CD |
200 | break; |
201 | } | |
202 | /* Relaxed MO is sufficient; see below. */ | |
203 | d = atomic_load_relaxed (&sem->data); | |
204 | } | |
205 | else | |
206 | { | |
207 | /* Try to grab both a token and stop being a waiter. We need | |
208 | acquire MO so this synchronizes with all token providers (i.e., | |
209 | the RMW operation we read from or all those before it in | |
210 | modification order; also see sem_post). On the failure path, | |
211 | relaxed MO is sufficient because we only eventually need the | |
212 | up-to-date value; the futex_wait or the CAS perform the real | |
213 | work. */ | |
214 | if (atomic_compare_exchange_weak_acquire (&sem->data, | |
22971c35 | 215 | &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT))) |
042e1521 CD |
216 | { |
217 | err = 0; | |
218 | break; | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | pthread_cleanup_pop (0); | |
224 | #else | |
225 | /* The main difference to the 64b-atomics implementation is that we need to | |
226 | access value and nwaiters in separate steps, and that the nwaiters bit | |
227 | in the value can temporarily not be set even if nwaiters is nonzero. | |
228 | We work around incorrectly unsetting the nwaiters bit by letting sem_wait | |
229 | set the bit again and waking the number of waiters that could grab a | |
230 | token. There are two additional properties we need to ensure: | |
231 | (1) We make sure that whenever unsetting the bit, we see the increment of | |
232 | nwaiters by the other thread that set the bit. IOW, we will notice if | |
233 | we make a mistake. | |
234 | (2) When setting the nwaiters bit, we make sure that we see the unsetting | |
235 | of the bit by another waiter that happened before us. This avoids having | |
236 | to blindly set the bit whenever we need to block on it. We set/unset | |
237 | the bit while having incremented nwaiters (i.e., are a registered | |
238 | waiter), and the problematic case only happens when one waiter indeed | |
239 | followed another (i.e., nwaiters was never larger than 1); thus, this | |
240 | works similarly as with a critical section using nwaiters (see the MOs | |
241 | and related comments below). | |
242 | ||
243 | An alternative approach would be to unset the bit after decrementing | |
244 | nwaiters; however, that would result in needing Dekker-like | |
245 | synchronization and thus full memory barriers. We also would not be able | |
246 | to prevent misspeculation, so this alternative scheme does not seem | |
247 | beneficial. */ | |
248 | unsigned int v; | |
249 | ||
250 | /* Add a waiter. We need acquire MO so this synchronizes with the release | |
251 | MO we use when decrementing nwaiters below; it ensures that if another | |
252 | waiter unset the bit before us, we see that and set it again. Also see | |
253 | property (2) above. */ | |
254 | atomic_fetch_add_acquire (&sem->nwaiters, 1); | |
255 | ||
256 | pthread_cleanup_push (__sem_wait_cleanup, sem); | |
257 | ||
258 | /* Wait for a token to be available. Retry until we can grab one. */ | |
259 | /* We do not need any ordering wrt. to this load's reads-from, so relaxed | |
260 | MO is sufficient. The acquire MO above ensures that in the problematic | |
261 | case, we do see the unsetting of the bit by another waiter. */ | |
262 | v = atomic_load_relaxed (&sem->value); | |
263 | do | |
264 | { | |
265 | do | |
266 | { | |
267 | /* We are about to block, so make sure that the nwaiters bit is | |
268 | set. We need release MO on the CAS to ensure that when another | |
269 | waiter unsets the nwaiters bit, it will also observe that we | |
270 | incremented nwaiters in the meantime (also see the unsetting of | |
271 | the bit below). Relaxed MO on CAS failure is sufficient (see | |
272 | above). */ | |
273 | do | |
274 | { | |
275 | if ((v & SEM_NWAITERS_MASK) != 0) | |
276 | break; | |
277 | } | |
278 | while (!atomic_compare_exchange_weak_release (&sem->value, | |
279 | &v, v | SEM_NWAITERS_MASK)); | |
280 | /* If there is no token, wait. */ | |
281 | if ((v >> SEM_VALUE_SHIFT) == 0) | |
282 | { | |
283 | /* See __HAVE_64B_ATOMICS variant. */ | |
6615f779 | 284 | err = do_futex_wait (sem, clockid, abstime); |
1aa8d144 | 285 | if (err == ETIMEDOUT || err == EINTR) |
042e1521 CD |
286 | { |
287 | __set_errno (err); | |
288 | err = -1; | |
289 | goto error; | |
290 | } | |
291 | err = 0; | |
292 | /* We blocked, so there might be a token now. Relaxed MO is | |
293 | sufficient (see above). */ | |
294 | v = atomic_load_relaxed (&sem->value); | |
295 | } | |
296 | } | |
297 | /* If there is no token, we must not try to grab one. */ | |
298 | while ((v >> SEM_VALUE_SHIFT) == 0); | |
299 | } | |
300 | /* Try to grab a token. We need acquire MO so this synchronizes with | |
301 | all token providers (i.e., the RMW operation we read from or all those | |
302 | before it in modification order; also see sem_post). */ | |
303 | while (!atomic_compare_exchange_weak_acquire (&sem->value, | |
304 | &v, v - (1 << SEM_VALUE_SHIFT))); | |
305 | ||
306 | error: | |
307 | pthread_cleanup_pop (0); | |
308 | ||
309 | __sem_wait_32_finish (sem); | |
310 | #endif | |
311 | ||
312 | return err; | |
313 | } | |
314 | ||
315 | /* Stop being a registered waiter (non-64b-atomics code only). */ | |
316 | #if !__HAVE_64B_ATOMICS | |
317 | static void | |
318 | __sem_wait_32_finish (struct new_sem *sem) | |
319 | { | |
320 | /* The nwaiters bit is still set, try to unset it now if this seems | |
321 | necessary. We do this before decrementing nwaiters so that the unsetting | |
322 | is visible to other waiters entering after us. Relaxed MO is sufficient | |
323 | because we are just speculating here; a stronger MO would not prevent | |
324 | misspeculation. */ | |
325 | unsigned int wguess = atomic_load_relaxed (&sem->nwaiters); | |
326 | if (wguess == 1) | |
327 | /* We might be the last waiter, so unset. This needs acquire MO so that | |
7f0d9e61 | 328 | it synchronizes with the release MO when setting the bit above; if we |
042e1521 CD |
329 | overwrite someone else that set the bit, we'll read in the following |
330 | decrement of nwaiters at least from that release sequence, so we'll | |
331 | see if the other waiter is still active or if another writer entered | |
332 | in the meantime (i.e., using the check below). */ | |
333 | atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK); | |
334 | ||
335 | /* Now stop being a waiter, and see whether our guess was correct. | |
336 | This needs release MO so that it synchronizes with the acquire MO when | |
337 | a waiter increments nwaiters; this makes sure that newer writers see that | |
338 | we reset the waiters_present bit. */ | |
339 | unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1); | |
340 | if (wfinal > 1 && wguess == 1) | |
341 | { | |
342 | /* We guessed wrong, and so need to clean up after the mistake and | |
343 | unblock any waiters that could have not been woken. There is no | |
344 | additional ordering that we need to set up, so relaxed MO is | |
345 | sufficient. */ | |
346 | unsigned int v = atomic_fetch_or_relaxed (&sem->value, | |
347 | SEM_NWAITERS_MASK); | |
348 | /* If there are available tokens, then wake as many waiters. If there | |
349 | aren't any, then there is no need to wake anyone because there is | |
350 | none to grab for another waiter. If tokens become available | |
351 | subsequently, then the respective sem_post calls will do the wake-up | |
352 | due to us having set the nwaiters bit again. */ | |
353 | v >>= SEM_VALUE_SHIFT; | |
354 | if (v > 0) | |
355 | futex_wake (&sem->value, v, sem->private); | |
356 | } | |
357 | } | |
358 | #endif |