]>
Commit | Line | Data |
---|---|---|
cc25c8b4 | 1 | /* POSIX reader--writer lock: core parts. |
6d7e8eda | 2 | Copyright (C) 2016-2023 Free Software Foundation, Inc. |
cc25c8b4 TR |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
cc25c8b4 TR |
18 | |
19 | #include <errno.h> | |
20 | #include <sysdep.h> | |
21 | #include <pthread.h> | |
22 | #include <pthreadP.h> | |
23 | #include <sys/time.h> | |
24 | #include <stap-probe.h> | |
25 | #include <atomic.h> | |
26 | #include <futex-internal.h> | |
48123656 | 27 | #include <time.h> |
cc25c8b4 TR |
28 | |
29 | ||
30 | /* A reader--writer lock that fulfills the POSIX requirements (but operations | |
31 | on this lock are not necessarily full barriers, as one may interpret the | |
32 | POSIX requirement about "synchronizing memory"). All critical sections are | |
33 | in a total order, writers synchronize with prior writers and readers, and | |
34 | readers synchronize with prior writers. | |
35 | ||
36 | A thread is allowed to acquire a read lock recursively (i.e., have rdlock | |
37 | critical sections that overlap in sequenced-before) unless the kind of the | |
3d265911 | 38 | rwlock is set to PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP. |
cc25c8b4 TR |
39 | |
40 | This lock is built so that workloads of mostly readers can be executed with | |
41 | low runtime overheads. This matches that the default kind of the lock is | |
42 | PTHREAD_RWLOCK_PREFER_READER_NP. Acquiring a read lock requires a single | |
43 | atomic addition if the lock is or was previously acquired by other | |
44 | readers; releasing the lock is a single CAS if there are no concurrent | |
45 | writers. | |
46 | Workloads consisting of mostly writers are of secondary importance. | |
47 | An uncontended write lock acquisition is as fast as for a normal | |
48 | exclusive mutex but writer contention is somewhat more costly due to | |
49 | keeping track of the exact number of writers. If the rwlock kind requests | |
3d265911 | 50 | writers to be preferred (i.e., PTHREAD_RWLOCK_PREFER_WRITER_NP or the |
cc25c8b4 TR |
51 | no-recursive-readers variant of it), then writer--to--writer lock ownership |
52 | hand-over is fairly fast and bypasses lock acquisition attempts by readers. | |
53 | The costs of lock ownership transfer between readers and writers vary. If | |
54 | the program asserts that there are no recursive readers and writers are | |
55 | preferred, then write lock acquisition attempts will block subsequent read | |
56 | lock acquisition attempts, so that new incoming readers do not prolong a | |
57 | phase in which readers have acquired the lock. | |
58 | ||
cc25c8b4 TR |
59 | The main components of the rwlock are a writer-only lock that allows only |
60 | one of the concurrent writers to be the primary writer, and a | |
61 | single-writer-multiple-readers lock that decides between read phases, in | |
62 | which readers have acquired the rwlock, and write phases in which a primary | |
63 | writer or a sequence of different primary writers have acquired the rwlock. | |
64 | ||
65 | The single-writer-multiple-readers lock is the central piece of state | |
66 | describing the rwlock and is encoded in the __readers field (see below for | |
67 | a detailed explanation): | |
68 | ||
69 | State WP WL R RW Notes | |
70 | --------------------------- | |
71 | #1 0 0 0 0 Lock is idle (and in a read phase). | |
72 | #2 0 0 >0 0 Readers have acquired the lock. | |
faf8c066 CD |
73 | #3 0 1 0 0 Lock is not acquired; a writer will try to start a |
74 | write phase. | |
cc25c8b4 TR |
75 | #4 0 1 >0 0 Readers have acquired the lock; a writer is waiting |
76 | and explicit hand-over to the writer is required. | |
77 | #4a 0 1 >0 1 Same as #4 except that there are further readers | |
78 | waiting because the writer is to be preferred. | |
79 | #5 1 0 0 0 Lock is idle (and in a write phase). | |
faf8c066 CD |
80 | #6 1 0 >0 0 Write phase; readers will try to start a read phase |
81 | (requires explicit hand-over to all readers that | |
82 | do not start the read phase). | |
cc25c8b4 TR |
83 | #7 1 1 0 0 Lock is acquired by a writer. |
84 | #8 1 1 >0 0 Lock acquired by a writer and readers are waiting; | |
85 | explicit hand-over to the readers is required. | |
86 | ||
87 | WP (PTHREAD_RWLOCK_WRPHASE) is true if the lock is in a write phase, so | |
88 | potentially acquired by a primary writer. | |
89 | WL (PTHREAD_RWLOCK_WRLOCKED) is true if there is a primary writer (i.e., | |
90 | the thread that was able to set this bit from false to true). | |
91 | R (all bits in __readers except the number of least-significant bits | |
92 | denoted in PTHREAD_RWLOCK_READER_SHIFT) is the number of readers that have | |
93 | or are trying to acquired the lock. There may be more readers waiting if | |
94 | writers are preferred and there will be no recursive readers, in which | |
95 | case RW (PTHREAD_RWLOCK_RWAITING) is true in state #4a. | |
96 | ||
97 | We want to block using futexes but using __readers as a futex word directly | |
98 | is not a good solution. First, we want to wait on different conditions | |
99 | such as waiting for a phase change vs. waiting for the primary writer to | |
100 | release the writer-only lock. Second, the number of readers could change | |
101 | frequently, which would make it likely that a writer's futex_wait fails | |
102 | frequently too because the expected value does not match the value of | |
103 | __readers anymore. | |
104 | Therefore, we split out the futex words into the __wrphase_futex and | |
105 | __writers_futex fields. The former tracks the value of the WP bit and is | |
106 | changed after changing WP by the thread that changes WP. However, because | |
107 | of the POSIX requirements regarding mutex/rwlock destruction (i.e., that | |
108 | destroying a rwlock is allowed as soon as no thread has acquired or will | |
109 | acquire the lock), we have to be careful and hand over lock ownership (via | |
110 | a phase change) carefully to those threads waiting. Specifically, we must | |
111 | prevent a situation in which we are not quite sure whether we still have | |
112 | to unblock another thread through a change to memory (executing a | |
113 | futex_wake on a former futex word that is now used for something else is | |
114 | fine). | |
115 | The scheme we use for __wrphase_futex is that waiting threads that may | |
116 | use the futex word to block now all have to use the futex word to block; it | |
117 | is not allowed to take the short-cut and spin-wait on __readers because | |
118 | then the waking thread cannot just make one final change to memory to | |
119 | unblock all potentially waiting threads. If, for example, a reader | |
120 | increments R in states #7 or #8, it has to then block until __wrphase_futex | |
121 | is 0 and it can confirm that the value of 0 was stored by the primary | |
122 | writer; in turn, the primary writer has to change to a read phase too when | |
123 | releasing WL (i.e., to state #2), and it must change __wrphase_futex to 0 | |
124 | as the next step. This ensures that the waiting reader will not be able to | |
125 | acquire, release, and then destroy the lock concurrently with the pending | |
126 | futex unblock operations by the former primary writer. This scheme is | |
127 | called explicit hand-over in what follows. | |
128 | Note that waiting threads can cancel waiting only if explicit hand-over has | |
129 | not yet started (e.g., if __readers is still in states #7 or #8 in the | |
130 | example above). | |
131 | ||
132 | Writers determine the primary writer through WL. Blocking using futexes | |
133 | is performed using __writers_futex as a futex word; primary writers will | |
134 | enable waiting on this futex by setting it to 1 after they acquired the WL | |
135 | bit and will disable waiting by setting it to 0 before they release WL. | |
136 | This leaves small windows where blocking using futexes is not possible | |
137 | although a primary writer exists, but in turn decreases complexity of the | |
138 | writer--writer synchronization and does not affect correctness. | |
139 | If writers are preferred, writers can hand over WL directly to other | |
140 | waiting writers that registered by incrementing __writers: If the primary | |
141 | writer can CAS __writers from a non-zero value to the same value with the | |
142 | PTHREAD_RWLOCK_WRHANDOVER bit set, it effectively transfers WL ownership | |
143 | to one of the registered waiting writers and does not reset WL; in turn, | |
144 | a registered writer that can clear PTHREAD_RWLOCK_WRHANDOVER using a CAS | |
145 | then takes over WL. Note that registered waiting writers can cancel | |
146 | waiting by decrementing __writers, but the last writer to unregister must | |
147 | become the primary writer if PTHREAD_RWLOCK_WRHANDOVER is set. | |
148 | Also note that adding another state/bit to signal potential writer--writer | |
149 | contention (e.g., as done in the normal mutex algorithm) would not be | |
150 | helpful because we would have to conservatively assume that there is in | |
151 | fact no other writer, and wake up readers too. | |
152 | ||
153 | To avoid having to call futex_wake when no thread uses __wrphase_futex or | |
154 | __writers_futex, threads will set the PTHREAD_RWLOCK_FUTEX_USED bit in the | |
155 | respective futex words before waiting on it (using a CAS so it will only be | |
156 | set if in a state in which waiting would be possible). In the case of | |
157 | __writers_futex, we wake only one thread but several threads may share | |
158 | PTHREAD_RWLOCK_FUTEX_USED, so we must assume that there are still others. | |
159 | This is similar to what we do in pthread_mutex_lock. We do not need to | |
160 | do this for __wrphase_futex because there, we always wake all waiting | |
161 | threads. | |
162 | ||
163 | Blocking in the state #4a simply uses __readers as futex word. This | |
164 | simplifies the algorithm but suffers from some of the drawbacks discussed | |
165 | before, though not to the same extent because R can only decrease in this | |
166 | state, so the number of potentially failing futex_wait attempts will be | |
167 | bounded. All threads moving from state #4a to another state must wake | |
168 | up threads blocked on the __readers futex. | |
169 | ||
170 | The ordering invariants that we have to take care of in the implementation | |
171 | are primarily those necessary for a reader--writer lock; this is rather | |
172 | straightforward and happens during write/read phase switching (potentially | |
173 | through explicit hand-over), and between writers through synchronization | |
174 | involving the PTHREAD_RWLOCK_WRLOCKED or PTHREAD_RWLOCK_WRHANDOVER bits. | |
175 | Additionally, we need to take care that modifications of __writers_futex | |
176 | and __wrphase_futex (e.g., by otherwise unordered readers) take place in | |
177 | the writer critical sections or read/write phases, respectively, and that | |
178 | explicit hand-over observes stores from the previous phase. How this is | |
179 | done is explained in more detail in comments in the code. | |
180 | ||
181 | Many of the accesses to the futex words just need relaxed MO. This is | |
182 | possible because we essentially drive both the core rwlock synchronization | |
183 | and the futex synchronization in parallel. For example, an unlock will | |
184 | unlock the rwlock and take part in the futex synchronization (using | |
185 | PTHREAD_RWLOCK_FUTEX_USED, see above); even if they are not tightly | |
186 | ordered in some way, the futex synchronization ensures that there are no | |
187 | lost wake-ups, and woken threads will then eventually see the most recent | |
188 | state of the rwlock. IOW, waiting threads will always be woken up, while | |
189 | not being able to wait using futexes (which can happen) is harmless; in | |
190 | turn, this means that waiting threads don't need special ordering wrt. | |
191 | waking threads. | |
192 | ||
193 | The futex synchronization consists of the three-state futex word: | |
194 | (1) cannot block on it, (2) can block on it, and (3) there might be a | |
195 | thread blocked on it (i.e., with PTHREAD_RWLOCK_FUTEX_USED set). | |
196 | Relaxed-MO atomic read-modify-write operations are sufficient to maintain | |
197 | this (e.g., using a CAS to go from (2) to (3) but not from (1) to (3)), | |
198 | but we need ordering of the futex word modifications by the waking threads | |
199 | so that they collectively make correct state changes between (1)-(3). | |
200 | The futex-internal synchronization (i.e., the conceptual critical sections | |
201 | around futex operations in the kernel) then ensures that even an | |
202 | unconstrained load (i.e., relaxed MO) inside of futex_wait will not lead to | |
203 | lost wake-ups because either the waiting thread will see the change from | |
204 | (3) to (1) when a futex_wake came first, or this futex_wake will wake this | |
205 | waiting thread because the waiting thread came first. | |
206 | ||
207 | ||
208 | POSIX allows but does not require rwlock acquisitions to be a cancellation | |
209 | point. We do not support cancellation. | |
210 | ||
211 | TODO We do not try to elide any read or write lock acquisitions currently. | |
212 | While this would be possible, it is unclear whether HTM performance is | |
213 | currently predictable enough and our runtime tuning is good enough at | |
214 | deciding when to use elision so that enabling it would lead to consistently | |
215 | better performance. */ | |
216 | ||
217 | ||
218 | static int | |
219 | __pthread_rwlock_get_private (pthread_rwlock_t *rwlock) | |
220 | { | |
221 | return rwlock->__data.__shared != 0 ? FUTEX_SHARED : FUTEX_PRIVATE; | |
222 | } | |
223 | ||
224 | static __always_inline void | |
225 | __pthread_rwlock_rdunlock (pthread_rwlock_t *rwlock) | |
226 | { | |
227 | int private = __pthread_rwlock_get_private (rwlock); | |
228 | /* We decrease the number of readers, and if we are the last reader and | |
229 | there is a primary writer, we start a write phase. We use a CAS to | |
230 | make this atomic so that it is clear whether we must hand over ownership | |
231 | explicitly. */ | |
232 | unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers); | |
233 | unsigned int rnew; | |
234 | for (;;) | |
235 | { | |
236 | rnew = r - (1 << PTHREAD_RWLOCK_READER_SHIFT); | |
237 | /* If we are the last reader, we also need to unblock any readers | |
238 | that are waiting for a writer to go first (PTHREAD_RWLOCK_RWAITING) | |
239 | so that they can register while the writer is active. */ | |
240 | if ((rnew >> PTHREAD_RWLOCK_READER_SHIFT) == 0) | |
241 | { | |
242 | if ((rnew & PTHREAD_RWLOCK_WRLOCKED) != 0) | |
243 | rnew |= PTHREAD_RWLOCK_WRPHASE; | |
244 | rnew &= ~(unsigned int) PTHREAD_RWLOCK_RWAITING; | |
245 | } | |
246 | /* We need release MO here for three reasons. First, so that we | |
247 | synchronize with subsequent writers. Second, we might have been the | |
248 | first reader and set __wrphase_futex to 0, so we need to synchronize | |
249 | with the last reader that will set it to 1 (note that we will always | |
250 | change __readers before the last reader, or we are the last reader). | |
251 | Third, a writer that takes part in explicit hand-over needs to see | |
252 | the first reader's store to __wrphase_futex (or a later value) if | |
253 | the writer observes that a write phase has been started. */ | |
254 | if (atomic_compare_exchange_weak_release (&rwlock->__data.__readers, | |
3d265911 | 255 | &r, rnew)) |
cc25c8b4 TR |
256 | break; |
257 | /* TODO Back-off. */ | |
258 | } | |
259 | if ((rnew & PTHREAD_RWLOCK_WRPHASE) != 0) | |
260 | { | |
261 | /* We need to do explicit hand-over. We need the acquire MO fence so | |
262 | that our modification of _wrphase_futex happens after a store by | |
263 | another reader that started a read phase. Relaxed MO is sufficient | |
264 | for the modification of __wrphase_futex because it is just used | |
265 | to delay acquisition by a writer until all threads are unblocked | |
266 | irrespective of whether they are looking at __readers or | |
267 | __wrphase_futex; any other synchronizes-with relations that are | |
268 | necessary are established through __readers. */ | |
269 | atomic_thread_fence_acquire (); | |
270 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 1) | |
271 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) | |
272 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | |
273 | } | |
274 | /* Also wake up waiting readers if we did reset the RWAITING flag. */ | |
275 | if ((r & PTHREAD_RWLOCK_RWAITING) != (rnew & PTHREAD_RWLOCK_RWAITING)) | |
276 | futex_wake (&rwlock->__data.__readers, INT_MAX, private); | |
277 | } | |
278 | ||
279 | ||
280 | static __always_inline int | |
3102e28b LM |
281 | __pthread_rwlock_rdlock_full64 (pthread_rwlock_t *rwlock, clockid_t clockid, |
282 | const struct __timespec64 *abstime) | |
cc25c8b4 TR |
283 | { |
284 | unsigned int r; | |
285 | ||
e996fa72 MC |
286 | /* Make sure any passed in clockid and timeout value are valid. Note that |
287 | the previous implementation assumed that this check *must* not be | |
288 | performed if there would in fact be no blocking; however, POSIX only | |
289 | requires that "the validity of the abstime parameter need not be checked | |
290 | if the lock can be immediately acquired" (i.e., we need not but may check | |
291 | it). */ | |
292 | if (abstime && __glibc_unlikely (!futex_abstimed_supported_clockid (clockid) | |
48123656 | 293 | || ! valid_nanoseconds (abstime->tv_nsec))) |
600b4be4 MC |
294 | return EINVAL; |
295 | ||
cc25c8b4 TR |
296 | /* Make sure we are not holding the rwlock as a writer. This is a deadlock |
297 | situation we recognize and report. */ | |
298 | if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer) | |
3d265911 | 299 | == THREAD_GETMEM (THREAD_SELF, tid))) |
cc25c8b4 TR |
300 | return EDEADLK; |
301 | ||
302 | /* If we prefer writers, recursive rdlock is disallowed, we are in a read | |
303 | phase, and there are other readers present, we try to wait without | |
304 | extending the read phase. We will be unblocked by either one of the | |
305 | other active readers, or if the writer gives up WRLOCKED (e.g., on | |
306 | timeout). | |
307 | If there are no other readers, we simply race with any existing primary | |
308 | writer; it would have been a race anyway, and changing the odds slightly | |
309 | will likely not make a big difference. */ | |
310 | if (rwlock->__data.__flags == PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP) | |
311 | { | |
312 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
3d265911 AS |
313 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0 |
314 | && (r & PTHREAD_RWLOCK_WRLOCKED) != 0 | |
315 | && (r >> PTHREAD_RWLOCK_READER_SHIFT) > 0) | |
cc25c8b4 TR |
316 | { |
317 | /* TODO Spin first. */ | |
318 | /* Try setting the flag signaling that we are waiting without having | |
319 | incremented the number of readers. Relaxed MO is fine because | |
320 | this is just about waiting for a state change in __readers. */ | |
321 | if (atomic_compare_exchange_weak_relaxed | |
322 | (&rwlock->__data.__readers, &r, r | PTHREAD_RWLOCK_RWAITING)) | |
323 | { | |
324 | /* Wait for as long as the flag is set. An ABA situation is | |
325 | harmless because the flag is just about the state of | |
326 | __readers, and all threads set the flag under the same | |
327 | conditions. */ | |
f21e8f8c | 328 | while (((r = atomic_load_relaxed (&rwlock->__data.__readers)) |
3d265911 | 329 | & PTHREAD_RWLOCK_RWAITING) != 0) |
cc25c8b4 TR |
330 | { |
331 | int private = __pthread_rwlock_get_private (rwlock); | |
3102e28b LM |
332 | int err = __futex_abstimed_wait64 (&rwlock->__data.__readers, |
333 | r, clockid, abstime, | |
334 | private); | |
cc25c8b4 TR |
335 | /* We ignore EAGAIN and EINTR. On time-outs, we can just |
336 | return because we don't need to clean up anything. */ | |
aa69f19a | 337 | if (err == ETIMEDOUT || err == EOVERFLOW) |
cc25c8b4 TR |
338 | return err; |
339 | } | |
340 | /* It makes sense to not break out of the outer loop here | |
341 | because we might be in the same situation again. */ | |
342 | } | |
343 | else | |
344 | { | |
345 | /* TODO Back-off. */ | |
346 | } | |
347 | } | |
348 | } | |
349 | /* Register as a reader, using an add-and-fetch so that R can be used as | |
350 | expected value for future operations. Acquire MO so we synchronize with | |
351 | prior writers as well as the last reader of the previous read phase (see | |
352 | below). */ | |
3d265911 AS |
353 | r = (atomic_fetch_add_acquire (&rwlock->__data.__readers, |
354 | (1 << PTHREAD_RWLOCK_READER_SHIFT)) | |
355 | + (1 << PTHREAD_RWLOCK_READER_SHIFT)); | |
cc25c8b4 TR |
356 | |
357 | /* Check whether there is an overflow in the number of readers. We assume | |
358 | that the total number of threads is less than half the maximum number | |
359 | of readers that we have bits for in __readers (i.e., with 32-bit int and | |
360 | PTHREAD_RWLOCK_READER_SHIFT of 3, we assume there are less than | |
361 | 1 << (32-3-1) concurrent threads). | |
362 | If there is an overflow, we use a CAS to try to decrement the number of | |
363 | readers if there still is an overflow situation. If so, we return | |
364 | EAGAIN; if not, we are not a thread causing an overflow situation, and so | |
365 | we just continue. Using a fetch-add instead of the CAS isn't possible | |
366 | because other readers might release the lock concurrently, which could | |
367 | make us the last reader and thus responsible for handing ownership over | |
368 | to writers (which requires a CAS too to make the decrement and ownership | |
369 | transfer indivisible). */ | |
370 | while (__glibc_unlikely (r >= PTHREAD_RWLOCK_READER_OVERFLOW)) | |
371 | { | |
372 | /* Relaxed MO is okay because we just want to undo our registration and | |
373 | cannot have changed the rwlock state substantially if the CAS | |
374 | succeeds. */ | |
3d265911 AS |
375 | if (atomic_compare_exchange_weak_relaxed |
376 | (&rwlock->__data.__readers, | |
377 | &r, r - (1 << PTHREAD_RWLOCK_READER_SHIFT))) | |
cc25c8b4 TR |
378 | return EAGAIN; |
379 | } | |
380 | ||
381 | /* We have registered as a reader, so if we are in a read phase, we have | |
382 | acquired a read lock. This is also the reader--reader fast-path. | |
383 | Even if there is a primary writer, we just return. If writers are to | |
384 | be preferred and we are the only active reader, we could try to enter a | |
385 | write phase to let the writer proceed. This would be okay because we | |
386 | cannot have acquired the lock previously as a reader (which could result | |
387 | in deadlock if we would wait for the primary writer to run). However, | |
388 | this seems to be a corner case and handling it specially not be worth the | |
389 | complexity. */ | |
390 | if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0)) | |
391 | return 0; | |
faf8c066 CD |
392 | /* Otherwise, if we were in a write phase (states #6 or #8), we must wait |
393 | for explicit hand-over of the read phase; the only exception is if we | |
394 | can start a read phase if there is no primary writer currently. */ | |
3d265911 AS |
395 | while ((r & PTHREAD_RWLOCK_WRPHASE) != 0 |
396 | && (r & PTHREAD_RWLOCK_WRLOCKED) == 0) | |
cc25c8b4 | 397 | { |
3d265911 | 398 | /* Try to enter a read phase: If the CAS below succeeds, we have |
cc25c8b4 TR |
399 | ownership; if it fails, we will simply retry and reassess the |
400 | situation. | |
401 | Acquire MO so we synchronize with prior writers. */ | |
402 | if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, &r, | |
3d265911 | 403 | r ^ PTHREAD_RWLOCK_WRPHASE)) |
cc25c8b4 TR |
404 | { |
405 | /* We started the read phase, so we are also responsible for | |
406 | updating the write-phase futex. Relaxed MO is sufficient. | |
faf8c066 CD |
407 | We have to do the same steps as a writer would when handing |
408 | over the read phase to us because other readers cannot | |
409 | distinguish between us and the writer; this includes | |
410 | explicit hand-over and potentially having to wake other readers | |
411 | (but we can pretend to do the setting and unsetting of WRLOCKED | |
412 | atomically, and thus can skip this step). */ | |
413 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0) | |
3d265911 | 414 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) |
faf8c066 CD |
415 | { |
416 | int private = __pthread_rwlock_get_private (rwlock); | |
417 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | |
418 | } | |
cc25c8b4 TR |
419 | return 0; |
420 | } | |
421 | else | |
422 | { | |
423 | /* TODO Back off before retrying. Also see above. */ | |
424 | } | |
425 | } | |
426 | ||
faf8c066 CD |
427 | /* We were in a write phase but did not install the read phase. We cannot |
428 | distinguish between a writer and another reader starting the read phase, | |
429 | so we must wait for explicit hand-over via __wrphase_futex. | |
430 | However, __wrphase_futex might not have been set to 1 yet (either | |
431 | because explicit hand-over to the writer is still ongoing, or because | |
432 | the writer has started the write phase but has not yet updated | |
433 | __wrphase_futex). The least recent value of __wrphase_futex we can | |
434 | read from here is the modification of the last read phase (because | |
435 | we synchronize with the last reader in this read phase through | |
436 | __readers; see the use of acquire MO on the fetch_add above). | |
437 | Therefore, if we observe a value of 0 for __wrphase_futex, we need | |
438 | to subsequently check that __readers now indicates a read phase; we | |
439 | need to use acquire MO for this so that if we observe a read phase, | |
440 | we will also see the modification of __wrphase_futex by the previous | |
441 | writer. We then need to load __wrphase_futex again and continue to | |
442 | wait if it is not 0, so that we do not skip explicit hand-over. | |
443 | Relaxed MO is sufficient for the load from __wrphase_futex because | |
444 | we just use it as an indicator for when we can proceed; we use | |
445 | __readers and the acquire MO accesses to it to eventually read from | |
446 | the proper stores to __wrphase_futex. */ | |
447 | unsigned int wpf; | |
448 | bool ready = false; | |
449 | for (;;) | |
cc25c8b4 | 450 | { |
faf8c066 | 451 | while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex)) |
3d265911 | 452 | | PTHREAD_RWLOCK_FUTEX_USED) == (1 | PTHREAD_RWLOCK_FUTEX_USED)) |
cc25c8b4 | 453 | { |
faf8c066 CD |
454 | int private = __pthread_rwlock_get_private (rwlock); |
455 | if (((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0) | |
3d265911 | 456 | && (!atomic_compare_exchange_weak_relaxed |
faf8c066 | 457 | (&rwlock->__data.__wrphase_futex, |
3d265911 | 458 | &wpf, wpf | PTHREAD_RWLOCK_FUTEX_USED))) |
faf8c066 | 459 | continue; |
3102e28b LM |
460 | int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex, |
461 | 1 | PTHREAD_RWLOCK_FUTEX_USED, | |
462 | clockid, abstime, private); | |
aa69f19a | 463 | if (err == ETIMEDOUT || err == EOVERFLOW) |
cc25c8b4 | 464 | { |
faf8c066 CD |
465 | /* If we timed out, we need to unregister. If no read phase |
466 | has been installed while we waited, we can just decrement | |
467 | the number of readers. Otherwise, we just acquire the | |
468 | lock, which is allowed because we give no precise timing | |
469 | guarantees, and because the timeout is only required to | |
470 | be in effect if we would have had to wait for other | |
471 | threads (e.g., if futex_wait would time-out immediately | |
472 | because the given absolute time is in the past). */ | |
473 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
474 | while ((r & PTHREAD_RWLOCK_WRPHASE) != 0) | |
cc25c8b4 | 475 | { |
faf8c066 CD |
476 | /* We don't need to make anything else visible to |
477 | others besides unregistering, so relaxed MO is | |
478 | sufficient. */ | |
479 | if (atomic_compare_exchange_weak_relaxed | |
480 | (&rwlock->__data.__readers, &r, | |
481 | r - (1 << PTHREAD_RWLOCK_READER_SHIFT))) | |
aa69f19a | 482 | return err; |
faf8c066 | 483 | /* TODO Back-off. */ |
cc25c8b4 | 484 | } |
faf8c066 CD |
485 | /* Use the acquire MO fence to mirror the steps taken in the |
486 | non-timeout case. Note that the read can happen both | |
487 | in the atomic_load above as well as in the failure case | |
488 | of the CAS operation. */ | |
489 | atomic_thread_fence_acquire (); | |
490 | /* We still need to wait for explicit hand-over, but we must | |
491 | not use futex_wait anymore because we would just time out | |
492 | in this case and thus make the spin-waiting we need | |
493 | unnecessarily expensive. */ | |
494 | while ((atomic_load_relaxed (&rwlock->__data.__wrphase_futex) | |
3d265911 AS |
495 | | PTHREAD_RWLOCK_FUTEX_USED) |
496 | == (1 | PTHREAD_RWLOCK_FUTEX_USED)) | |
faf8c066 CD |
497 | { |
498 | /* TODO Back-off? */ | |
499 | } | |
500 | ready = true; | |
501 | break; | |
cc25c8b4 | 502 | } |
faf8c066 CD |
503 | /* If we got interrupted (EINTR) or the futex word does not have the |
504 | expected value (EAGAIN), retry. */ | |
cc25c8b4 | 505 | } |
faf8c066 CD |
506 | if (ready) |
507 | /* See below. */ | |
508 | break; | |
509 | /* We need acquire MO here so that we synchronize with the lock | |
510 | release of the writer, and so that we observe a recent value of | |
511 | __wrphase_futex (see below). */ | |
512 | if ((atomic_load_acquire (&rwlock->__data.__readers) | |
3d265911 | 513 | & PTHREAD_RWLOCK_WRPHASE) == 0) |
faf8c066 CD |
514 | /* We are in a read phase now, so the least recent modification of |
515 | __wrphase_futex we can read from is the store by the writer | |
516 | with value 1. Thus, only now we can assume that if we observe | |
517 | a value of 0, explicit hand-over is finished. Retry the loop | |
518 | above one more time. */ | |
519 | ready = true; | |
cc25c8b4 TR |
520 | } |
521 | ||
522 | return 0; | |
523 | } | |
524 | ||
525 | ||
526 | static __always_inline void | |
527 | __pthread_rwlock_wrunlock (pthread_rwlock_t *rwlock) | |
528 | { | |
529 | int private = __pthread_rwlock_get_private (rwlock); | |
530 | ||
531 | atomic_store_relaxed (&rwlock->__data.__cur_writer, 0); | |
532 | /* Disable waiting by writers. We will wake up after we decided how to | |
533 | proceed. */ | |
3d265911 AS |
534 | bool wake_writers |
535 | = ((atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0) | |
536 | & PTHREAD_RWLOCK_FUTEX_USED) != 0); | |
cc25c8b4 TR |
537 | |
538 | if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP) | |
539 | { | |
540 | /* First, try to hand over to another writer. */ | |
541 | unsigned int w = atomic_load_relaxed (&rwlock->__data.__writers); | |
542 | while (w != 0) | |
543 | { | |
544 | /* Release MO so that another writer that gets WRLOCKED from us will | |
545 | synchronize with us and thus can take over our view of | |
546 | __readers (including, for example, whether we are in a write | |
547 | phase or not). */ | |
3d265911 AS |
548 | if (atomic_compare_exchange_weak_release |
549 | (&rwlock->__data.__writers, &w, w | PTHREAD_RWLOCK_WRHANDOVER)) | |
cc25c8b4 TR |
550 | /* Another writer will take over. */ |
551 | goto done; | |
552 | /* TODO Back-off. */ | |
553 | } | |
554 | } | |
555 | ||
556 | /* We have done everything we needed to do to prefer writers, so now we | |
557 | either hand over explicitly to readers if there are any, or we simply | |
558 | stay in a write phase. See pthread_rwlock_rdunlock for more details. */ | |
559 | unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers); | |
560 | /* Release MO so that subsequent readers or writers synchronize with us. */ | |
561 | while (!atomic_compare_exchange_weak_release | |
3d265911 AS |
562 | (&rwlock->__data.__readers, &r, |
563 | ((r ^ PTHREAD_RWLOCK_WRLOCKED) | |
564 | ^ ((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0 ? 0 | |
565 | : PTHREAD_RWLOCK_WRPHASE)))) | |
cc25c8b4 TR |
566 | { |
567 | /* TODO Back-off. */ | |
568 | } | |
569 | if ((r >> PTHREAD_RWLOCK_READER_SHIFT) != 0) | |
570 | { | |
571 | /* We must hand over explicitly through __wrphase_futex. Relaxed MO is | |
572 | sufficient because it is just used to delay acquisition by a writer; | |
573 | any other synchronizes-with relations that are necessary are | |
574 | established through __readers. */ | |
575 | if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0) | |
576 | & PTHREAD_RWLOCK_FUTEX_USED) != 0) | |
577 | futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private); | |
578 | } | |
579 | ||
580 | done: | |
581 | /* We released WRLOCKED in some way, so wake a writer. */ | |
582 | if (wake_writers) | |
583 | futex_wake (&rwlock->__data.__writers_futex, 1, private); | |
584 | } | |
585 | ||
586 | ||
587 | static __always_inline int | |
3102e28b LM |
588 | __pthread_rwlock_wrlock_full64 (pthread_rwlock_t *rwlock, clockid_t clockid, |
589 | const struct __timespec64 *abstime) | |
cc25c8b4 | 590 | { |
e996fa72 MC |
591 | /* Make sure any passed in clockid and timeout value are valid. Note that |
592 | the previous implementation assumed that this check *must* not be | |
593 | performed if there would in fact be no blocking; however, POSIX only | |
594 | requires that "the validity of the abstime parameter need not be checked | |
595 | if the lock can be immediately acquired" (i.e., we need not but may check | |
596 | it). */ | |
597 | if (abstime && __glibc_unlikely (!futex_abstimed_supported_clockid (clockid) | |
48123656 | 598 | || ! valid_nanoseconds (abstime->tv_nsec))) |
600b4be4 MC |
599 | return EINVAL; |
600 | ||
cc25c8b4 TR |
601 | /* Make sure we are not holding the rwlock as a writer. This is a deadlock |
602 | situation we recognize and report. */ | |
603 | if (__glibc_unlikely (atomic_load_relaxed (&rwlock->__data.__cur_writer) | |
3d265911 | 604 | == THREAD_GETMEM (THREAD_SELF, tid))) |
cc25c8b4 TR |
605 | return EDEADLK; |
606 | ||
607 | /* First we try to acquire the role of primary writer by setting WRLOCKED; | |
608 | if it was set before, there already is a primary writer. Acquire MO so | |
609 | that we synchronize with previous primary writers. | |
610 | ||
611 | We do not try to change to a write phase right away using a fetch_or | |
612 | because we would have to reset it again and wake readers if there are | |
613 | readers present (some readers could try to acquire the lock more than | |
614 | once, so setting a write phase in the middle of this could cause | |
615 | deadlock). Changing to a write phase eagerly would only speed up the | |
616 | transition from a read phase to a write phase in the uncontended case, | |
617 | but it would slow down the contended case if readers are preferred (which | |
618 | is the default). | |
619 | We could try to CAS from a state with no readers to a write phase, but | |
620 | this could be less scalable if readers arrive and leave frequently. */ | |
621 | bool may_share_futex_used_flag = false; | |
622 | unsigned int r = atomic_fetch_or_acquire (&rwlock->__data.__readers, | |
3d265911 | 623 | PTHREAD_RWLOCK_WRLOCKED); |
cc25c8b4 TR |
624 | if (__glibc_unlikely ((r & PTHREAD_RWLOCK_WRLOCKED) != 0)) |
625 | { | |
626 | /* There is another primary writer. */ | |
3d265911 AS |
627 | bool prefer_writer |
628 | = (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP); | |
cc25c8b4 TR |
629 | if (prefer_writer) |
630 | { | |
631 | /* We register as a waiting writer, so that we can make use of | |
632 | writer--writer hand-over. Relaxed MO is fine because we just | |
633 | want to register. We assume that the maximum number of threads | |
634 | is less than the capacity in __writers. */ | |
635 | atomic_fetch_add_relaxed (&rwlock->__data.__writers, 1); | |
636 | } | |
637 | for (;;) | |
638 | { | |
639 | /* TODO Spin until WRLOCKED is 0 before trying the CAS below. | |
640 | But pay attention to not delay trying writer--writer hand-over | |
641 | for too long (which we must try eventually anyway). */ | |
642 | if ((r & PTHREAD_RWLOCK_WRLOCKED) == 0) | |
643 | { | |
644 | /* Try to become the primary writer or retry. Acquire MO as in | |
645 | the fetch_or above. */ | |
646 | if (atomic_compare_exchange_weak_acquire | |
3d265911 | 647 | (&rwlock->__data.__readers, &r, r | PTHREAD_RWLOCK_WRLOCKED)) |
cc25c8b4 TR |
648 | { |
649 | if (prefer_writer) | |
650 | { | |
651 | /* Unregister as a waiting writer. Note that because we | |
652 | acquired WRLOCKED, WRHANDOVER will not be set. | |
653 | Acquire MO on the CAS above ensures that | |
654 | unregistering happens after the previous writer; | |
655 | this sorts the accesses to __writers by all | |
656 | primary writers in a useful way (e.g., any other | |
657 | primary writer acquiring after us or getting it from | |
658 | us through WRHANDOVER will see both our changes to | |
659 | __writers). | |
660 | ??? Perhaps this is not strictly necessary for | |
661 | reasons we do not yet know of. */ | |
3d265911 | 662 | atomic_fetch_add_relaxed (&rwlock->__data.__writers, -1); |
cc25c8b4 TR |
663 | } |
664 | break; | |
665 | } | |
666 | /* Retry if the CAS fails (r will have been updated). */ | |
667 | continue; | |
668 | } | |
669 | /* If writer--writer hand-over is available, try to become the | |
670 | primary writer this way by grabbing the WRHANDOVER token. If we | |
671 | succeed, we own WRLOCKED. */ | |
672 | if (prefer_writer) | |
673 | { | |
3d265911 | 674 | unsigned int w = atomic_load_relaxed (&rwlock->__data.__writers); |
cc25c8b4 TR |
675 | if ((w & PTHREAD_RWLOCK_WRHANDOVER) != 0) |
676 | { | |
677 | /* Acquire MO is required here so that we synchronize with | |
678 | the writer that handed over WRLOCKED. We also need this | |
679 | for the reload of __readers below because our view of | |
680 | __readers must be at least as recent as the view of the | |
681 | writer that handed over WRLOCKED; we must avoid an ABA | |
682 | through WRHANDOVER, which could, for example, lead to us | |
683 | assuming we are still in a write phase when in fact we | |
684 | are not. */ | |
685 | if (atomic_compare_exchange_weak_acquire | |
686 | (&rwlock->__data.__writers, | |
687 | &w, (w - PTHREAD_RWLOCK_WRHANDOVER - 1))) | |
688 | { | |
689 | /* Reload so our view is consistent with the view of | |
690 | the previous owner of WRLOCKED. See above. */ | |
691 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
692 | break; | |
693 | } | |
694 | /* We do not need to reload __readers here. We should try | |
695 | to perform writer--writer hand-over if possible; if it | |
696 | is not possible anymore, we will reload __readers | |
697 | elsewhere in this loop. */ | |
698 | continue; | |
699 | } | |
700 | } | |
701 | /* We did not acquire WRLOCKED nor were able to use writer--writer | |
702 | hand-over, so we block on __writers_futex. */ | |
703 | int private = __pthread_rwlock_get_private (rwlock); | |
3d265911 AS |
704 | unsigned int wf |
705 | = atomic_load_relaxed (&rwlock->__data.__writers_futex); | |
cc25c8b4 TR |
706 | if (((wf & ~(unsigned int) PTHREAD_RWLOCK_FUTEX_USED) != 1) |
707 | || ((wf != (1 | PTHREAD_RWLOCK_FUTEX_USED)) | |
3d265911 | 708 | && (!atomic_compare_exchange_weak_relaxed |
cc25c8b4 | 709 | (&rwlock->__data.__writers_futex, &wf, |
3d265911 | 710 | 1 | PTHREAD_RWLOCK_FUTEX_USED)))) |
cc25c8b4 TR |
711 | { |
712 | /* If we cannot block on __writers_futex because there is no | |
713 | primary writer, or we cannot set PTHREAD_RWLOCK_FUTEX_USED, | |
714 | we retry. We must reload __readers here in case we cannot | |
715 | block on __writers_futex so that we can become the primary | |
716 | writer and are not stuck in a loop that just continuously | |
717 | fails to block on __writers_futex. */ | |
718 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
719 | continue; | |
720 | } | |
721 | /* We set the flag that signals that the futex is used, or we could | |
722 | have set it if we had been faster than other waiters. As a | |
723 | result, we may share the flag with an unknown number of other | |
724 | writers. Therefore, we must keep this flag set when we acquire | |
725 | the lock. We do not need to do this when we do not reach this | |
726 | point here because then we are not part of the group that may | |
727 | share the flag, and another writer will wake one of the writers | |
728 | in this group. */ | |
729 | may_share_futex_used_flag = true; | |
3102e28b LM |
730 | int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex, |
731 | 1 | PTHREAD_RWLOCK_FUTEX_USED, | |
732 | clockid, abstime, private); | |
aa69f19a | 733 | if (err == ETIMEDOUT || err == EOVERFLOW) |
cc25c8b4 TR |
734 | { |
735 | if (prefer_writer) | |
736 | { | |
737 | /* We need to unregister as a waiting writer. If we are the | |
738 | last writer and writer--writer hand-over is available, | |
739 | we must make use of it because nobody else will reset | |
740 | WRLOCKED otherwise. (If we use it, we simply pretend | |
741 | that this happened before the timeout; see | |
742 | pthread_rwlock_rdlock_full for the full reasoning.) | |
743 | Also see the similar code above. */ | |
3d265911 AS |
744 | unsigned int w |
745 | = atomic_load_relaxed (&rwlock->__data.__writers); | |
cc25c8b4 | 746 | while (!atomic_compare_exchange_weak_acquire |
3d265911 | 747 | (&rwlock->__data.__writers, &w, |
cc25c8b4 TR |
748 | (w == PTHREAD_RWLOCK_WRHANDOVER + 1 ? 0 : w - 1))) |
749 | { | |
750 | /* TODO Back-off. */ | |
751 | } | |
752 | if (w == PTHREAD_RWLOCK_WRHANDOVER + 1) | |
753 | { | |
754 | /* We must continue as primary writer. See above. */ | |
755 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
756 | break; | |
757 | } | |
758 | } | |
759 | /* We cleaned up and cannot have stolen another waiting writer's | |
760 | futex wake-up, so just return. */ | |
aa69f19a | 761 | return err; |
cc25c8b4 TR |
762 | } |
763 | /* If we got interrupted (EINTR) or the futex word does not have the | |
764 | expected value (EAGAIN), retry after reloading __readers. */ | |
765 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
766 | } | |
767 | /* Our snapshot of __readers is up-to-date at this point because we | |
faf8c066 CD |
768 | either set WRLOCKED using a CAS (and update r accordingly below, |
769 | which was used as expected value for the CAS) or got WRLOCKED from | |
cc25c8b4 | 770 | another writer whose snapshot of __readers we inherit. */ |
faf8c066 | 771 | r |= PTHREAD_RWLOCK_WRLOCKED; |
cc25c8b4 TR |
772 | } |
773 | ||
faf8c066 CD |
774 | /* We are the primary writer; enable blocking on __writers_futex. Relaxed |
775 | MO is sufficient for futex words; acquire MO on the previous | |
776 | modifications of __readers ensures that this store happens after the | |
777 | store of value 0 by the previous primary writer. */ | |
778 | atomic_store_relaxed (&rwlock->__data.__writers_futex, | |
3d265911 AS |
779 | 1 | (may_share_futex_used_flag |
780 | ? PTHREAD_RWLOCK_FUTEX_USED : 0)); | |
faf8c066 CD |
781 | |
782 | /* If we are in a write phase, we have acquired the lock. */ | |
783 | if ((r & PTHREAD_RWLOCK_WRPHASE) != 0) | |
784 | goto done; | |
785 | ||
cc25c8b4 TR |
786 | /* If we are in a read phase and there are no readers, try to start a write |
787 | phase. */ | |
3d265911 AS |
788 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0 |
789 | && (r >> PTHREAD_RWLOCK_READER_SHIFT) == 0) | |
cc25c8b4 TR |
790 | { |
791 | /* Acquire MO so that we synchronize with prior writers and do | |
792 | not interfere with their updates to __writers_futex, as well | |
793 | as regarding prior readers and their updates to __wrphase_futex, | |
794 | respectively. */ | |
795 | if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, | |
3d265911 | 796 | &r, r | PTHREAD_RWLOCK_WRPHASE)) |
cc25c8b4 TR |
797 | { |
798 | /* We have started a write phase, so need to enable readers to wait. | |
faf8c066 CD |
799 | See the similar case in __pthread_rwlock_rdlock_full. Unlike in |
800 | that similar case, we are the (only) primary writer and so do | |
801 | not need to wake another writer. */ | |
cc25c8b4 | 802 | atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 1); |
faf8c066 CD |
803 | |
804 | goto done; | |
cc25c8b4 TR |
805 | } |
806 | /* TODO Back-off. */ | |
807 | } | |
808 | ||
faf8c066 CD |
809 | /* We became the primary writer in a read phase and there were readers when |
810 | we did (because of the previous loop). Thus, we have to wait for | |
811 | explicit hand-over from one of these readers. | |
812 | We basically do the same steps as for the similar case in | |
813 | __pthread_rwlock_rdlock_full, except that we additionally might try | |
814 | to directly hand over to another writer and need to wake up | |
815 | other writers or waiting readers (i.e., PTHREAD_RWLOCK_RWAITING). */ | |
816 | unsigned int wpf; | |
817 | bool ready = false; | |
818 | for (;;) | |
cc25c8b4 | 819 | { |
faf8c066 | 820 | while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex)) |
3d265911 | 821 | | PTHREAD_RWLOCK_FUTEX_USED) == PTHREAD_RWLOCK_FUTEX_USED) |
cc25c8b4 | 822 | { |
faf8c066 | 823 | int private = __pthread_rwlock_get_private (rwlock); |
3d265911 AS |
824 | if ((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0 |
825 | && (!atomic_compare_exchange_weak_relaxed | |
faf8c066 | 826 | (&rwlock->__data.__wrphase_futex, &wpf, |
3d265911 | 827 | PTHREAD_RWLOCK_FUTEX_USED))) |
faf8c066 | 828 | continue; |
3102e28b LM |
829 | int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex, |
830 | PTHREAD_RWLOCK_FUTEX_USED, | |
831 | clockid, abstime, private); | |
aa69f19a | 832 | if (err == ETIMEDOUT || err == EOVERFLOW) |
cc25c8b4 | 833 | { |
3d265911 | 834 | if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP) |
cc25c8b4 | 835 | { |
faf8c066 | 836 | /* We try writer--writer hand-over. */ |
3d265911 AS |
837 | unsigned int w |
838 | = atomic_load_relaxed (&rwlock->__data.__writers); | |
faf8c066 | 839 | if (w != 0) |
cc25c8b4 | 840 | { |
faf8c066 CD |
841 | /* We are about to hand over WRLOCKED, so we must |
842 | release __writers_futex too; otherwise, we'd have | |
843 | a pending store, which could at least prevent | |
844 | other threads from waiting using the futex | |
845 | because it could interleave with the stores | |
846 | by subsequent writers. In turn, this means that | |
847 | we have to clean up when we do not hand over | |
848 | WRLOCKED. | |
849 | Release MO so that another writer that gets | |
850 | WRLOCKED from us can take over our view of | |
851 | __readers. */ | |
3d265911 AS |
852 | unsigned int wf |
853 | = atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0); | |
faf8c066 | 854 | while (w != 0) |
cc25c8b4 | 855 | { |
cc25c8b4 | 856 | if (atomic_compare_exchange_weak_release |
faf8c066 | 857 | (&rwlock->__data.__writers, &w, |
3d265911 | 858 | w | PTHREAD_RWLOCK_WRHANDOVER)) |
cc25c8b4 TR |
859 | { |
860 | /* Wake other writers. */ | |
861 | if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0) | |
862 | futex_wake (&rwlock->__data.__writers_futex, | |
faf8c066 | 863 | 1, private); |
aa69f19a | 864 | return err; |
cc25c8b4 | 865 | } |
faf8c066 | 866 | /* TODO Back-off. */ |
cc25c8b4 | 867 | } |
faf8c066 CD |
868 | /* We still own WRLOCKED and someone else might set |
869 | a write phase concurrently, so enable waiting | |
870 | again. Make sure we don't loose the flag that | |
871 | signals whether there are threads waiting on | |
872 | this futex. */ | |
3d265911 | 873 | atomic_store_relaxed (&rwlock->__data.__writers_futex, wf); |
cc25c8b4 | 874 | } |
faf8c066 CD |
875 | } |
876 | /* If we timed out and we are not in a write phase, we can | |
877 | just stop being a primary writer. Otherwise, we just | |
878 | acquire the lock. */ | |
879 | r = atomic_load_relaxed (&rwlock->__data.__readers); | |
880 | if ((r & PTHREAD_RWLOCK_WRPHASE) == 0) | |
881 | { | |
882 | /* We are about to release WRLOCKED, so we must release | |
883 | __writers_futex too; see the handling of | |
884 | writer--writer hand-over above. */ | |
3d265911 AS |
885 | unsigned int wf |
886 | = atomic_exchange_relaxed (&rwlock->__data.__writers_futex, 0); | |
faf8c066 | 887 | while ((r & PTHREAD_RWLOCK_WRPHASE) == 0) |
cc25c8b4 | 888 | { |
faf8c066 CD |
889 | /* While we don't need to make anything from a |
890 | caller's critical section visible to other | |
891 | threads, we need to ensure that our changes to | |
892 | __writers_futex are properly ordered. | |
893 | Therefore, use release MO to synchronize with | |
894 | subsequent primary writers. Also wake up any | |
895 | waiting readers as they are waiting because of | |
896 | us. */ | |
897 | if (atomic_compare_exchange_weak_release | |
898 | (&rwlock->__data.__readers, &r, | |
899 | (r ^ PTHREAD_RWLOCK_WRLOCKED) | |
900 | & ~(unsigned int) PTHREAD_RWLOCK_RWAITING)) | |
901 | { | |
902 | /* Wake other writers. */ | |
903 | if ((wf & PTHREAD_RWLOCK_FUTEX_USED) != 0) | |
904 | futex_wake (&rwlock->__data.__writers_futex, | |
3d265911 | 905 | 1, private); |
faf8c066 CD |
906 | /* Wake waiting readers. */ |
907 | if ((r & PTHREAD_RWLOCK_RWAITING) != 0) | |
908 | futex_wake (&rwlock->__data.__readers, | |
3d265911 | 909 | INT_MAX, private); |
faf8c066 CD |
910 | return ETIMEDOUT; |
911 | } | |
cc25c8b4 | 912 | } |
faf8c066 CD |
913 | /* We still own WRLOCKED and someone else might set a |
914 | write phase concurrently, so enable waiting again. | |
915 | Make sure we don't loose the flag that signals | |
916 | whether there are threads waiting on this futex. */ | |
917 | atomic_store_relaxed (&rwlock->__data.__writers_futex, wf); | |
cc25c8b4 | 918 | } |
faf8c066 CD |
919 | /* Use the acquire MO fence to mirror the steps taken in the |
920 | non-timeout case. Note that the read can happen both | |
921 | in the atomic_load above as well as in the failure case | |
922 | of the CAS operation. */ | |
923 | atomic_thread_fence_acquire (); | |
924 | /* We still need to wait for explicit hand-over, but we must | |
925 | not use futex_wait anymore. */ | |
3d265911 AS |
926 | while ((atomic_load_relaxed (&rwlock->__data.__wrphase_futex) |
927 | | PTHREAD_RWLOCK_FUTEX_USED) | |
928 | == PTHREAD_RWLOCK_FUTEX_USED) | |
faf8c066 CD |
929 | { |
930 | /* TODO Back-off. */ | |
931 | } | |
932 | ready = true; | |
933 | break; | |
cc25c8b4 | 934 | } |
faf8c066 CD |
935 | /* If we got interrupted (EINTR) or the futex word does not have |
936 | the expected value (EAGAIN), retry. */ | |
cc25c8b4 | 937 | } |
faf8c066 CD |
938 | /* See pthread_rwlock_rdlock_full. */ |
939 | if (ready) | |
940 | break; | |
941 | if ((atomic_load_acquire (&rwlock->__data.__readers) | |
3d265911 | 942 | & PTHREAD_RWLOCK_WRPHASE) != 0) |
faf8c066 | 943 | ready = true; |
cc25c8b4 TR |
944 | } |
945 | ||
faf8c066 | 946 | done: |
cc25c8b4 | 947 | atomic_store_relaxed (&rwlock->__data.__cur_writer, |
3d265911 | 948 | THREAD_GETMEM (THREAD_SELF, tid)); |
cc25c8b4 TR |
949 | return 0; |
950 | } |