]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/iter_ull.c
Add prange entries in gimple-range-op.cc.
[thirdparty/gcc.git] / libgomp / iter_ull.c
CommitLineData
a945c346 1/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
a68ab351
JJ
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
a68ab351
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
a68ab351
JJ
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
a68ab351
JJ
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
a68ab351
JJ
25
26/* This file contains routines for managing work-share iteration, both
27 for loops and sections. */
28
29#include "libgomp.h"
30#include <stdlib.h>
31
32typedef unsigned long long gomp_ull;
33
34/* This function implements the STATIC scheduling method. The caller should
35 iterate *pstart <= x < *pend. Return zero if there are more iterations
36 to perform; nonzero if not. Return less than 0 if this thread had
37 received the absolutely last iteration. */
38
39int
40gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
41{
42 struct gomp_thread *thr = gomp_thread ();
43 struct gomp_team *team = thr->ts.team;
44 struct gomp_work_share *ws = thr->ts.work_share;
45 unsigned long nthreads = team ? team->nthreads : 1;
46
47 if (thr->ts.static_trip == -1)
48 return -1;
49
50 /* Quick test for degenerate teams and orphaned constructs. */
51 if (nthreads == 1)
52 {
53 *pstart = ws->next_ull;
54 *pend = ws->end_ull;
55 thr->ts.static_trip = -1;
56 return ws->next_ull == ws->end_ull;
57 }
58
59 /* We interpret chunk_size zero as "unspecified", which means that we
60 should break up the iterations such that each thread makes only one
61 trip through the outer loop. */
62 if (ws->chunk_size_ull == 0)
63 {
fb79f500 64 gomp_ull n, q, i, t, s0, e0, s, e;
a68ab351
JJ
65
66 if (thr->ts.static_trip > 0)
67 return 1;
68
69 /* Compute the total number of iterations. */
70 if (__builtin_expect (ws->mode, 0) == 0)
71 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
72 else
73 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
74 i = thr->ts.team_id;
75
76 /* Compute the "zero-based" start and end points. That is, as
77 if the loop began at zero and incremented by one. */
78 q = n / nthreads;
fb79f500
JJ
79 t = n % nthreads;
80 if (i < t)
81 {
82 t = 0;
83 q++;
84 }
85 s0 = q * i + t;
a68ab351 86 e0 = s0 + q;
a68ab351
JJ
87
88 /* Notice when no iterations allocated for this thread. */
89 if (s0 >= e0)
90 {
91 thr->ts.static_trip = 1;
92 return 1;
93 }
94
95 /* Transform these to the actual start and end numbers. */
96 s = s0 * ws->incr_ull + ws->next_ull;
97 e = e0 * ws->incr_ull + ws->next_ull;
98
99 *pstart = s;
100 *pend = e;
101 thr->ts.static_trip = (e0 == n ? -1 : 1);
102 return 0;
103 }
104 else
105 {
106 gomp_ull n, s0, e0, i, c, s, e;
107
108 /* Otherwise, each thread gets exactly chunk_size iterations
109 (if available) each time through the loop. */
110
111 if (__builtin_expect (ws->mode, 0) == 0)
112 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
113 else
114 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
115 i = thr->ts.team_id;
116 c = ws->chunk_size_ull;
117
118 /* Initial guess is a C sized chunk positioned nthreads iterations
119 in, offset by our thread number. */
120 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
121 e0 = s0 + c;
122
123 /* Detect overflow. */
124 if (s0 >= n)
125 return 1;
126 if (e0 > n)
127 e0 = n;
128
129 /* Transform these to the actual start and end numbers. */
130 s = s0 * ws->incr_ull + ws->next_ull;
131 e = e0 * ws->incr_ull + ws->next_ull;
132
133 *pstart = s;
134 *pend = e;
135
136 if (e0 == n)
137 thr->ts.static_trip = -1;
138 else
139 thr->ts.static_trip++;
140 return 0;
141 }
142}
143
144
145/* This function implements the DYNAMIC scheduling method. Arguments are
146 as for gomp_iter_ull_static_next. This function must be called with
147 ws->lock held. */
148
149bool
150gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
151{
152 struct gomp_thread *thr = gomp_thread ();
153 struct gomp_work_share *ws = thr->ts.work_share;
154 gomp_ull start, end, chunk, left;
155
156 start = ws->next_ull;
157 if (start == ws->end_ull)
158 return false;
159
160 chunk = ws->chunk_size_ull;
161 left = ws->end_ull - start;
162 if (__builtin_expect (ws->mode & 2, 0))
163 {
164 if (chunk < left)
165 chunk = left;
166 }
167 else
168 {
169 if (chunk > left)
170 chunk = left;
171 }
172 end = start + chunk;
173
174 ws->next_ull = end;
175 *pstart = start;
176 *pend = end;
177 return true;
178}
179
180
181#if defined HAVE_SYNC_BUILTINS && defined __LP64__
182/* Similar, but doesn't require the lock held, and uses compare-and-swap
183 instead. Note that the only memory value that changes is ws->next_ull. */
184
185bool
186gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
187{
188 struct gomp_thread *thr = gomp_thread ();
189 struct gomp_work_share *ws = thr->ts.work_share;
190 gomp_ull start, end, nend, chunk;
191
192 end = ws->end_ull;
193 chunk = ws->chunk_size_ull;
194
195 if (__builtin_expect (ws->mode & 1, 1))
196 {
197 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
198 if (__builtin_expect (ws->mode & 2, 0) == 0)
199 {
200 if (tmp >= end)
201 return false;
202 nend = tmp + chunk;
203 if (nend > end)
204 nend = end;
205 *pstart = tmp;
206 *pend = nend;
207 return true;
208 }
209 else
210 {
211 if (tmp <= end)
212 return false;
213 nend = tmp + chunk;
214 if (nend < end)
215 nend = end;
216 *pstart = tmp;
217 *pend = nend;
218 return true;
219 }
220 }
221
bfe7ac89 222 start = __atomic_load_n (&ws->next_ull, MEMMODEL_RELAXED);
a68ab351
JJ
223 while (1)
224 {
225 gomp_ull left = end - start;
226 gomp_ull tmp;
227
228 if (start == end)
229 return false;
230
231 if (__builtin_expect (ws->mode & 2, 0))
232 {
233 if (chunk < left)
234 chunk = left;
235 }
236 else
237 {
238 if (chunk > left)
239 chunk = left;
240 }
241 nend = start + chunk;
242
243 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
244 if (__builtin_expect (tmp == start, 1))
245 break;
246
247 start = tmp;
248 }
249
250 *pstart = start;
251 *pend = nend;
252 return true;
253}
254#endif /* HAVE_SYNC_BUILTINS */
255
256
257/* This function implements the GUIDED scheduling method. Arguments are
258 as for gomp_iter_ull_static_next. This function must be called with the
259 work share lock held. */
260
261bool
262gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
263{
264 struct gomp_thread *thr = gomp_thread ();
265 struct gomp_work_share *ws = thr->ts.work_share;
266 struct gomp_team *team = thr->ts.team;
267 gomp_ull nthreads = team ? team->nthreads : 1;
268 gomp_ull n, q;
269 gomp_ull start, end;
270
271 if (ws->next_ull == ws->end_ull)
272 return false;
273
274 start = ws->next_ull;
275 if (__builtin_expect (ws->mode, 0) == 0)
276 n = (ws->end_ull - start) / ws->incr_ull;
277 else
278 n = (start - ws->end_ull) / -ws->incr_ull;
279 q = (n + nthreads - 1) / nthreads;
280
281 if (q < ws->chunk_size_ull)
282 q = ws->chunk_size_ull;
283 if (q <= n)
284 end = start + q * ws->incr_ull;
285 else
286 end = ws->end_ull;
287
288 ws->next_ull = end;
289 *pstart = start;
290 *pend = end;
291 return true;
292}
293
294#if defined HAVE_SYNC_BUILTINS && defined __LP64__
295/* Similar, but doesn't require the lock held, and uses compare-and-swap
296 instead. Note that the only memory value that changes is ws->next_ull. */
297
298bool
299gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
300{
301 struct gomp_thread *thr = gomp_thread ();
302 struct gomp_work_share *ws = thr->ts.work_share;
303 struct gomp_team *team = thr->ts.team;
304 gomp_ull nthreads = team ? team->nthreads : 1;
305 gomp_ull start, end, nend, incr;
306 gomp_ull chunk_size;
307
bfe7ac89 308 start = __atomic_load_n (&ws->next_ull, MEMMODEL_RELAXED);
a68ab351
JJ
309 end = ws->end_ull;
310 incr = ws->incr_ull;
311 chunk_size = ws->chunk_size_ull;
312
313 while (1)
314 {
315 gomp_ull n, q;
316 gomp_ull tmp;
317
318 if (start == end)
319 return false;
320
321 if (__builtin_expect (ws->mode, 0) == 0)
322 n = (end - start) / incr;
323 else
324 n = (start - end) / -incr;
325 q = (n + nthreads - 1) / nthreads;
326
327 if (q < chunk_size)
328 q = chunk_size;
329 if (__builtin_expect (q <= n, 1))
330 nend = start + q * incr;
331 else
332 nend = end;
333
334 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
335 if (__builtin_expect (tmp == start, 1))
336 break;
337
338 start = tmp;
339 }
340
341 *pstart = start;
342 *pend = nend;
343 return true;
344}
345#endif /* HAVE_SYNC_BUILTINS */