]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/iter.c
libgomp.texi: Document omp_pause_resource{,_all} and omp_target_memcpy*
[thirdparty/gcc.git] / libgomp / iter.c
CommitLineData
a945c346 1/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
953ff289
DN
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
953ff289
DN
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
953ff289
DN
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
953ff289
DN
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
953ff289
DN
25
26/* This file contains routines for managing work-share iteration, both
27 for loops and sections. */
28
29#include "libgomp.h"
30#include <stdlib.h>
31
32
33/* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
37
38int
39gomp_iter_static_next (long *pstart, long *pend)
40{
41 struct gomp_thread *thr = gomp_thread ();
42 struct gomp_team *team = thr->ts.team;
43 struct gomp_work_share *ws = thr->ts.work_share;
44 unsigned long nthreads = team ? team->nthreads : 1;
45
46 if (thr->ts.static_trip == -1)
47 return -1;
48
49 /* Quick test for degenerate teams and orphaned constructs. */
50 if (nthreads == 1)
51 {
52 *pstart = ws->next;
53 *pend = ws->end;
54 thr->ts.static_trip = -1;
55 return ws->next == ws->end;
56 }
57
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws->chunk_size == 0)
62 {
fb79f500 63 unsigned long n, q, i, t;
953ff289
DN
64 unsigned long s0, e0;
65 long s, e;
66
67 if (thr->ts.static_trip > 0)
68 return 1;
69
70 /* Compute the total number of iterations. */
71 s = ws->incr + (ws->incr > 0 ? -1 : 1);
72 n = (ws->end - ws->next + s) / ws->incr;
73 i = thr->ts.team_id;
74
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
77 q = n / nthreads;
fb79f500
JJ
78 t = n % nthreads;
79 if (i < t)
80 {
81 t = 0;
82 q++;
83 }
84 s0 = q * i + t;
953ff289 85 e0 = s0 + q;
953ff289
DN
86
87 /* Notice when no iterations allocated for this thread. */
88 if (s0 >= e0)
89 {
90 thr->ts.static_trip = 1;
91 return 1;
92 }
93
94 /* Transform these to the actual start and end numbers. */
95 s = (long)s0 * ws->incr + ws->next;
96 e = (long)e0 * ws->incr + ws->next;
97
98 *pstart = s;
99 *pend = e;
100 thr->ts.static_trip = (e0 == n ? -1 : 1);
101 return 0;
102 }
103 else
104 {
105 unsigned long n, s0, e0, i, c;
106 long s, e;
107
108 /* Otherwise, each thread gets exactly chunk_size iterations
109 (if available) each time through the loop. */
110
111 s = ws->incr + (ws->incr > 0 ? -1 : 1);
112 n = (ws->end - ws->next + s) / ws->incr;
113 i = thr->ts.team_id;
114 c = ws->chunk_size;
115
116 /* Initial guess is a C sized chunk positioned nthreads iterations
117 in, offset by our thread number. */
118 s0 = (thr->ts.static_trip * nthreads + i) * c;
119 e0 = s0 + c;
120
121 /* Detect overflow. */
122 if (s0 >= n)
123 return 1;
124 if (e0 > n)
125 e0 = n;
126
127 /* Transform these to the actual start and end numbers. */
128 s = (long)s0 * ws->incr + ws->next;
129 e = (long)e0 * ws->incr + ws->next;
130
131 *pstart = s;
132 *pend = e;
133
134 if (e0 == n)
135 thr->ts.static_trip = -1;
136 else
137 thr->ts.static_trip++;
138 return 0;
139 }
140}
141
142
143/* This function implements the DYNAMIC scheduling method. Arguments are
144 as for gomp_iter_static_next. This function must be called with ws->lock
145 held. */
146
147bool
148gomp_iter_dynamic_next_locked (long *pstart, long *pend)
149{
150 struct gomp_thread *thr = gomp_thread ();
151 struct gomp_work_share *ws = thr->ts.work_share;
152 long start, end, chunk, left;
153
154 start = ws->next;
155 if (start == ws->end)
156 return false;
157
a68ab351 158 chunk = ws->chunk_size;
953ff289
DN
159 left = ws->end - start;
160 if (ws->incr < 0)
161 {
162 if (chunk < left)
163 chunk = left;
164 }
165 else
166 {
167 if (chunk > left)
168 chunk = left;
169 }
170 end = start + chunk;
171
172 ws->next = end;
173 *pstart = start;
174 *pend = end;
175 return true;
176}
177
178
179#ifdef HAVE_SYNC_BUILTINS
180/* Similar, but doesn't require the lock held, and uses compare-and-swap
181 instead. Note that the only memory value that changes is ws->next. */
182
183bool
184gomp_iter_dynamic_next (long *pstart, long *pend)
185{
186 struct gomp_thread *thr = gomp_thread ();
187 struct gomp_work_share *ws = thr->ts.work_share;
188 long start, end, nend, chunk, incr;
189
953ff289
DN
190 end = ws->end;
191 incr = ws->incr;
a68ab351
JJ
192 chunk = ws->chunk_size;
193
194 if (__builtin_expect (ws->mode, 1))
195 {
196 long tmp = __sync_fetch_and_add (&ws->next, chunk);
197 if (incr > 0)
198 {
199 if (tmp >= end)
200 return false;
201 nend = tmp + chunk;
202 if (nend > end)
203 nend = end;
204 *pstart = tmp;
205 *pend = nend;
206 return true;
207 }
208 else
209 {
210 if (tmp <= end)
211 return false;
212 nend = tmp + chunk;
213 if (nend < end)
214 nend = end;
215 *pstart = tmp;
216 *pend = nend;
217 return true;
218 }
219 }
953ff289 220
bfe7ac89 221 start = __atomic_load_n (&ws->next, MEMMODEL_RELAXED);
953ff289
DN
222 while (1)
223 {
224 long left = end - start;
225 long tmp;
226
227 if (start == end)
228 return false;
229
230 if (incr < 0)
231 {
232 if (chunk < left)
233 chunk = left;
234 }
235 else
236 {
237 if (chunk > left)
238 chunk = left;
239 }
240 nend = start + chunk;
241
242 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
243 if (__builtin_expect (tmp == start, 1))
244 break;
245
246 start = tmp;
247 }
248
249 *pstart = start;
250 *pend = nend;
251 return true;
252}
253#endif /* HAVE_SYNC_BUILTINS */
254
255
256/* This function implements the GUIDED scheduling method. Arguments are
257 as for gomp_iter_static_next. This function must be called with the
258 work share lock held. */
259
260bool
261gomp_iter_guided_next_locked (long *pstart, long *pend)
262{
263 struct gomp_thread *thr = gomp_thread ();
264 struct gomp_work_share *ws = thr->ts.work_share;
265 struct gomp_team *team = thr->ts.team;
266 unsigned long nthreads = team ? team->nthreads : 1;
267 unsigned long n, q;
268 long start, end;
269
270 if (ws->next == ws->end)
271 return false;
272
9e775963
JJ
273 start = ws->next;
274 n = (ws->end - start) / ws->incr;
953ff289
DN
275 q = (n + nthreads - 1) / nthreads;
276
277 if (q < ws->chunk_size)
278 q = ws->chunk_size;
9e775963
JJ
279 if (q <= n)
280 end = start + q * ws->incr;
281 else
282 end = ws->end;
953ff289
DN
283
284 ws->next = end;
285 *pstart = start;
286 *pend = end;
287 return true;
288}
289
290#ifdef HAVE_SYNC_BUILTINS
291/* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next. */
293
294bool
295gomp_iter_guided_next (long *pstart, long *pend)
296{
297 struct gomp_thread *thr = gomp_thread ();
298 struct gomp_work_share *ws = thr->ts.work_share;
299 struct gomp_team *team = thr->ts.team;
300 unsigned long nthreads = team ? team->nthreads : 1;
301 long start, end, nend, incr;
302 unsigned long chunk_size;
303
bfe7ac89 304 start = __atomic_load_n (&ws->next, MEMMODEL_RELAXED);
953ff289
DN
305 end = ws->end;
306 incr = ws->incr;
307 chunk_size = ws->chunk_size;
308
309 while (1)
310 {
311 unsigned long n, q;
312 long tmp;
313
314 if (start == end)
315 return false;
316
9e775963 317 n = (end - start) / incr;
953ff289
DN
318 q = (n + nthreads - 1) / nthreads;
319
320 if (q < chunk_size)
321 q = chunk_size;
9e775963
JJ
322 if (__builtin_expect (q <= n, 1))
323 nend = start + q * incr;
324 else
325 nend = end;
953ff289
DN
326
327 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
328 if (__builtin_expect (tmp == start, 1))
329 break;
330
331 start = tmp;
332 }
333
334 *pstart = start;
335 *pend = nend;
336 return true;
337}
338#endif /* HAVE_SYNC_BUILTINS */