]>
Commit | Line | Data |
---|---|---|
04277e02 | 1 | /* Copyright (C) 2003-2019 Free Software Foundation, Inc. |
a88c9263 UD |
2 | This file is part of the GNU C Library. |
3 | Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
a88c9263 UD |
18 | |
19 | #include <errno.h> | |
20 | #include <sysdep.h> | |
a2f0363f | 21 | #include <futex-internal.h> |
a88c9263 UD |
22 | #include <pthreadP.h> |
23 | ||
24 | ||
b02840ba TR |
25 | /* Wait on the barrier. |
26 | ||
27 | In each round, we wait for a fixed number of threads to enter the barrier | |
28 | (COUNT). Once that has happened, exactly these threads are allowed to | |
29 | leave the barrier. Note that POSIX does not require that only COUNT | |
30 | threads can attempt to block using the barrier concurrently. | |
31 | ||
32 | We count the number of threads that have entered (IN). Each thread | |
33 | increments IN when entering, thus getting a position in the sequence of | |
34 | threads that are or have been waiting (starting with 1, so the position | |
35 | is the number of threads that have entered so far including the current | |
36 | thread). | |
37 | CURRENT_ROUND designates the most recent thread whose round has been | |
38 | detected as complete. When a thread detects that enough threads have | |
39 | entered to make a round complete, it finishes this round by effectively | |
40 | adding COUNT to CURRENT_ROUND atomically. Threads that believe that their | |
41 | round is not complete yet wait until CURRENT_ROUND is not smaller than | |
42 | their position anymore. | |
43 | ||
44 | A barrier can be destroyed as soon as no threads are blocked on the | |
45 | barrier. This is already the case if just one thread from the last round | |
46 | has stopped waiting and returned to the caller; the assumption is that | |
47 | all threads from the round are unblocked atomically, even though they may | |
48 | return at different times from the respective calls to | |
49 | pthread_barrier_wait). Thus, a valid call to pthread_barrier_destroy can | |
50 | be concurrent with other threads still figuring out that their round has | |
51 | been completed. Therefore, threads need to confirm that they have left | |
52 | the barrier by incrementing OUT, and pthread_barrier_destroy needs to wait | |
53 | until OUT equals IN. | |
54 | ||
55 | To avoid an ABA issue for futex_wait on CURRENT_ROUND and for archs with | |
56 | 32b-only atomics, we additionally reset the barrier when IN reaches | |
57 | a threshold to avoid overflow. We assume that the total number of threads | |
58 | is less than UINT_MAX/2, and set the threshold accordingly so that we can | |
59 | use a simple atomic_fetch_add on IN instead of a CAS when entering. The | |
60 | threshold is always set to the end of a round, so all threads that have | |
61 | entered are either pre-reset threads or post-reset threads (i.e., have a | |
62 | position larger than the threshold). | |
63 | Pre-reset threads just run the algorithm explained above. Post-reset | |
64 | threads wait until IN is reset to a pre-threshold value. | |
65 | When the last pre-reset thread leaves the barrier (i.e., OUT equals the | |
66 | threshold), it resets the barrier to its initial state. Other (post-reset) | |
67 | threads wait for the reset to have finished by waiting until IN is less | |
68 | than the threshold and then restart by trying to enter the barrier again. | |
69 | ||
70 | We reuse the reset mechanism in pthread_barrier_destroy to get notified | |
71 | when all threads have left the barrier: We trigger an artificial reset and | |
72 | wait for the last pre-reset thread to finish reset, thus notifying the | |
73 | thread that is about to destroy the barrier. | |
74 | ||
75 | Blocking using futexes is straightforward: pre-reset threads wait for | |
76 | completion of their round using CURRENT_ROUND as futex word, and post-reset | |
77 | threads and pthread_barrier_destroy use IN as futex word. | |
78 | ||
79 | Further notes: | |
80 | * It is not simple to let some of the post-reset threads help with the | |
81 | reset because of the ABA issues that arise; therefore, we simply make | |
82 | the last thread to leave responsible for the reset. | |
83 | * POSIX leaves it unspecified whether a signal handler running in a thread | |
84 | that has been unblocked (because its round is complete) can stall all | |
85 | other threads and prevent them from returning from the barrier. In this | |
86 | implementation, other threads will return. However, | |
87 | pthread_barrier_destroy will of course wait for the signal handler thread | |
88 | to confirm that it left the barrier. | |
89 | ||
90 | TODO We should add spinning with back-off. Once we do that, we could also | |
91 | try to avoid the futex_wake syscall when a round is detected as finished. | |
92 | If we do not spin, it is quite likely that at least some other threads will | |
93 | have called futex_wait already. */ | |
a88c9263 | 94 | int |
9d46370c | 95 | __pthread_barrier_wait (pthread_barrier_t *barrier) |
a88c9263 | 96 | { |
b02840ba TR |
97 | struct pthread_barrier *bar = (struct pthread_barrier *) barrier; |
98 | ||
99 | /* How many threads entered so far, including ourself. */ | |
100 | unsigned int i; | |
101 | ||
102 | reset_restart: | |
103 | /* Try to enter the barrier. We need acquire MO to (1) ensure that if we | |
104 | observe that our round can be completed (see below for our attempt to do | |
105 | so), all pre-barrier-entry effects of all threads in our round happen | |
106 | before us completing the round, and (2) to make our use of the barrier | |
107 | happen after a potential reset. We need release MO to make sure that our | |
108 | pre-barrier-entry effects happen before threads in this round leaving the | |
109 | barrier. */ | |
110 | i = atomic_fetch_add_acq_rel (&bar->in, 1) + 1; | |
111 | /* These loads are after the fetch_add so that we're less likely to first | |
112 | pull in the cache line as shared. */ | |
113 | unsigned int count = bar->count; | |
114 | /* This is the number of threads that can enter before we need to reset. | |
115 | Always at the end of a round. */ | |
116 | unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD | |
117 | - BARRIER_IN_THRESHOLD % count; | |
118 | ||
119 | if (i > max_in_before_reset) | |
a88c9263 | 120 | { |
b02840ba TR |
121 | /* We're in a reset round. Just wait for a reset to finish; do not |
122 | help finishing previous rounds because this could happen | |
123 | concurrently with a reset. */ | |
124 | while (i > max_in_before_reset) | |
125 | { | |
126 | futex_wait_simple (&bar->in, i, bar->shared); | |
127 | /* Relaxed MO is fine here because we just need an indication for | |
128 | when we should retry to enter (which will use acquire MO, see | |
129 | above). */ | |
130 | i = atomic_load_relaxed (&bar->in); | |
131 | } | |
132 | goto reset_restart; | |
a88c9263 | 133 | } |
37c054c7 | 134 | |
b02840ba TR |
135 | /* Look at the current round. At this point, we are just interested in |
136 | whether we can complete rounds, based on the information we obtained | |
137 | through our acquire-MO load of IN. Nonetheless, if we notice that | |
138 | our round has been completed using this load, we use the acquire-MO | |
139 | fence below to make sure that all pre-barrier-entry effects of all | |
140 | threads in our round happen before us leaving the barrier. Therefore, | |
141 | relaxed MO is sufficient. */ | |
142 | unsigned cr = atomic_load_relaxed (&bar->current_round); | |
143 | ||
144 | /* Try to finish previous rounds and/or the current round. We simply | |
145 | consider just our position here and do not try to do the work of threads | |
146 | that entered more recently. */ | |
147 | while (cr + count <= i) | |
148 | { | |
149 | /* Calculate the new current round based on how many threads entered. | |
150 | NEWCR must be larger than CR because CR+COUNT ends a round. */ | |
151 | unsigned int newcr = i - i % count; | |
152 | /* Try to complete previous and/or the current round. We need release | |
153 | MO to propagate the happens-before that we observed through reading | |
154 | with acquire MO from IN to other threads. If the CAS fails, it | |
155 | is like the relaxed-MO load of CURRENT_ROUND above. */ | |
156 | if (atomic_compare_exchange_weak_release (&bar->current_round, &cr, | |
157 | newcr)) | |
158 | { | |
159 | /* Update CR with the modification we just did. */ | |
160 | cr = newcr; | |
161 | /* Wake threads belonging to the rounds we just finished. We may | |
162 | wake more threads than necessary if more than COUNT threads try | |
163 | to block concurrently on the barrier, but this is not a typical | |
164 | use of barriers. | |
165 | Note that we can still access SHARED because we haven't yet | |
166 | confirmed to have left the barrier. */ | |
167 | futex_wake (&bar->current_round, INT_MAX, bar->shared); | |
168 | /* We did as much as we could based on our position. If we advanced | |
169 | the current round to a round sufficient for us, do not wait for | |
170 | that to happen and skip the acquire fence (we already | |
171 | synchronize-with all other threads in our round through the | |
172 | initial acquire MO fetch_add of IN. */ | |
173 | if (i <= cr) | |
174 | goto ready_to_leave; | |
175 | else | |
176 | break; | |
177 | } | |
178 | } | |
37c054c7 | 179 | |
b02840ba TR |
180 | /* Wait until the current round is more recent than the round we are in. */ |
181 | while (i > cr) | |
182 | { | |
183 | /* Wait for the current round to finish. */ | |
184 | futex_wait_simple (&bar->current_round, cr, bar->shared); | |
185 | /* See the fence below. */ | |
186 | cr = atomic_load_relaxed (&bar->current_round); | |
a88c9263 | 187 | } |
a88c9263 | 188 | |
b02840ba TR |
189 | /* Our round finished. Use the acquire MO fence to synchronize-with the |
190 | thread that finished the round, either through the initial load of | |
191 | CURRENT_ROUND above or a failed CAS in the loop above. */ | |
192 | atomic_thread_fence_acquire (); | |
193 | ||
194 | /* Now signal that we left. */ | |
195 | unsigned int o; | |
196 | ready_to_leave: | |
197 | /* We need release MO here so that our use of the barrier happens before | |
198 | reset or memory reuse after pthread_barrier_destroy. */ | |
199 | o = atomic_fetch_add_release (&bar->out, 1) + 1; | |
200 | if (o == max_in_before_reset) | |
201 | { | |
202 | /* Perform a reset if we are the last pre-reset thread leaving. All | |
203 | other threads accessing the barrier are post-reset threads and are | |
204 | incrementing or spinning on IN. Thus, resetting IN as the last step | |
205 | of reset ensures that the reset is not concurrent with actual use of | |
206 | the barrier. We need the acquire MO fence so that the reset happens | |
207 | after use of the barrier by all earlier pre-reset threads. */ | |
208 | atomic_thread_fence_acquire (); | |
209 | atomic_store_relaxed (&bar->current_round, 0); | |
210 | atomic_store_relaxed (&bar->out, 0); | |
211 | /* When destroying the barrier, we wait for a reset to happen. Thus, | |
212 | we must load SHARED now so that this happens before the barrier is | |
213 | destroyed. */ | |
214 | int shared = bar->shared; | |
215 | atomic_store_release (&bar->in, 0); | |
216 | futex_wake (&bar->in, INT_MAX, shared); | |
37c054c7 | 217 | |
b02840ba | 218 | } |
a88c9263 | 219 | |
b02840ba TR |
220 | /* Return a special value for exactly one thread per round. */ |
221 | return i % count == 0 ? PTHREAD_BARRIER_SERIAL_THREAD : 0; | |
a88c9263 | 222 | } |
90dd5913 | 223 | weak_alias (__pthread_barrier_wait, pthread_barrier_wait) |