]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/iter.c
Update copyright years.
[thirdparty/gcc.git] / libgomp / iter.c
CommitLineData
f1717362 1/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
1e8e9920 2 Contributed by Richard Henderson <rth@redhat.com>.
3
c35c9a62 4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
1e8e9920 6
7 Libgomp is free software; you can redistribute it and/or modify it
6bc9506f 8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
1e8e9920 11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
6bc9506f 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
1e8e9920 15 more details.
16
6bc9506f 17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
1e8e9920 25
26/* This file contains routines for managing work-share iteration, both
27 for loops and sections. */
28
29#include "libgomp.h"
30#include <stdlib.h>
31
32
33/* This function implements the STATIC scheduling method. The caller should
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
35 to perform; nonzero if not. Return less than 0 if this thread had
36 received the absolutely last iteration. */
37
38int
39gomp_iter_static_next (long *pstart, long *pend)
40{
41 struct gomp_thread *thr = gomp_thread ();
42 struct gomp_team *team = thr->ts.team;
43 struct gomp_work_share *ws = thr->ts.work_share;
44 unsigned long nthreads = team ? team->nthreads : 1;
45
46 if (thr->ts.static_trip == -1)
47 return -1;
48
49 /* Quick test for degenerate teams and orphaned constructs. */
50 if (nthreads == 1)
51 {
52 *pstart = ws->next;
53 *pend = ws->end;
54 thr->ts.static_trip = -1;
55 return ws->next == ws->end;
56 }
57
58 /* We interpret chunk_size zero as "unspecified", which means that we
59 should break up the iterations such that each thread makes only one
60 trip through the outer loop. */
61 if (ws->chunk_size == 0)
62 {
31712e83 63 unsigned long n, q, i, t;
1e8e9920 64 unsigned long s0, e0;
65 long s, e;
66
67 if (thr->ts.static_trip > 0)
68 return 1;
69
70 /* Compute the total number of iterations. */
71 s = ws->incr + (ws->incr > 0 ? -1 : 1);
72 n = (ws->end - ws->next + s) / ws->incr;
73 i = thr->ts.team_id;
74
75 /* Compute the "zero-based" start and end points. That is, as
76 if the loop began at zero and incremented by one. */
77 q = n / nthreads;
31712e83 78 t = n % nthreads;
79 if (i < t)
80 {
81 t = 0;
82 q++;
83 }
84 s0 = q * i + t;
1e8e9920 85 e0 = s0 + q;
1e8e9920 86
87 /* Notice when no iterations allocated for this thread. */
88 if (s0 >= e0)
89 {
90 thr->ts.static_trip = 1;
91 return 1;
92 }
93
94 /* Transform these to the actual start and end numbers. */
95 s = (long)s0 * ws->incr + ws->next;
96 e = (long)e0 * ws->incr + ws->next;
97
98 *pstart = s;
99 *pend = e;
100 thr->ts.static_trip = (e0 == n ? -1 : 1);
101 return 0;
102 }
103 else
104 {
105 unsigned long n, s0, e0, i, c;
106 long s, e;
107
108 /* Otherwise, each thread gets exactly chunk_size iterations
109 (if available) each time through the loop. */
110
111 s = ws->incr + (ws->incr > 0 ? -1 : 1);
112 n = (ws->end - ws->next + s) / ws->incr;
113 i = thr->ts.team_id;
114 c = ws->chunk_size;
115
116 /* Initial guess is a C sized chunk positioned nthreads iterations
117 in, offset by our thread number. */
118 s0 = (thr->ts.static_trip * nthreads + i) * c;
119 e0 = s0 + c;
120
121 /* Detect overflow. */
122 if (s0 >= n)
123 return 1;
124 if (e0 > n)
125 e0 = n;
126
127 /* Transform these to the actual start and end numbers. */
128 s = (long)s0 * ws->incr + ws->next;
129 e = (long)e0 * ws->incr + ws->next;
130
131 *pstart = s;
132 *pend = e;
133
134 if (e0 == n)
135 thr->ts.static_trip = -1;
136 else
137 thr->ts.static_trip++;
138 return 0;
139 }
140}
141
142
143/* This function implements the DYNAMIC scheduling method. Arguments are
144 as for gomp_iter_static_next. This function must be called with ws->lock
145 held. */
146
147bool
148gomp_iter_dynamic_next_locked (long *pstart, long *pend)
149{
150 struct gomp_thread *thr = gomp_thread ();
151 struct gomp_work_share *ws = thr->ts.work_share;
152 long start, end, chunk, left;
153
154 start = ws->next;
155 if (start == ws->end)
156 return false;
157
fd6481cf 158 chunk = ws->chunk_size;
1e8e9920 159 left = ws->end - start;
160 if (ws->incr < 0)
161 {
162 if (chunk < left)
163 chunk = left;
164 }
165 else
166 {
167 if (chunk > left)
168 chunk = left;
169 }
170 end = start + chunk;
171
172 ws->next = end;
173 *pstart = start;
174 *pend = end;
175 return true;
176}
177
178
179#ifdef HAVE_SYNC_BUILTINS
180/* Similar, but doesn't require the lock held, and uses compare-and-swap
181 instead. Note that the only memory value that changes is ws->next. */
182
183bool
184gomp_iter_dynamic_next (long *pstart, long *pend)
185{
186 struct gomp_thread *thr = gomp_thread ();
187 struct gomp_work_share *ws = thr->ts.work_share;
188 long start, end, nend, chunk, incr;
189
1e8e9920 190 end = ws->end;
191 incr = ws->incr;
fd6481cf 192 chunk = ws->chunk_size;
193
194 if (__builtin_expect (ws->mode, 1))
195 {
196 long tmp = __sync_fetch_and_add (&ws->next, chunk);
197 if (incr > 0)
198 {
199 if (tmp >= end)
200 return false;
201 nend = tmp + chunk;
202 if (nend > end)
203 nend = end;
204 *pstart = tmp;
205 *pend = nend;
206 return true;
207 }
208 else
209 {
210 if (tmp <= end)
211 return false;
212 nend = tmp + chunk;
213 if (nend < end)
214 nend = end;
215 *pstart = tmp;
216 *pend = nend;
217 return true;
218 }
219 }
1e8e9920 220
150899aa 221 start = __atomic_load_n (&ws->next, MEMMODEL_RELAXED);
1e8e9920 222 while (1)
223 {
224 long left = end - start;
225 long tmp;
226
227 if (start == end)
228 return false;
229
230 if (incr < 0)
231 {
232 if (chunk < left)
233 chunk = left;
234 }
235 else
236 {
237 if (chunk > left)
238 chunk = left;
239 }
240 nend = start + chunk;
241
242 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
243 if (__builtin_expect (tmp == start, 1))
244 break;
245
246 start = tmp;
247 }
248
249 *pstart = start;
250 *pend = nend;
251 return true;
252}
253#endif /* HAVE_SYNC_BUILTINS */
254
255
256/* This function implements the GUIDED scheduling method. Arguments are
257 as for gomp_iter_static_next. This function must be called with the
258 work share lock held. */
259
260bool
261gomp_iter_guided_next_locked (long *pstart, long *pend)
262{
263 struct gomp_thread *thr = gomp_thread ();
264 struct gomp_work_share *ws = thr->ts.work_share;
265 struct gomp_team *team = thr->ts.team;
266 unsigned long nthreads = team ? team->nthreads : 1;
267 unsigned long n, q;
268 long start, end;
269
270 if (ws->next == ws->end)
271 return false;
272
5205ccb0 273 start = ws->next;
274 n = (ws->end - start) / ws->incr;
1e8e9920 275 q = (n + nthreads - 1) / nthreads;
276
277 if (q < ws->chunk_size)
278 q = ws->chunk_size;
5205ccb0 279 if (q <= n)
280 end = start + q * ws->incr;
281 else
282 end = ws->end;
1e8e9920 283
284 ws->next = end;
285 *pstart = start;
286 *pend = end;
287 return true;
288}
289
290#ifdef HAVE_SYNC_BUILTINS
291/* Similar, but doesn't require the lock held, and uses compare-and-swap
292 instead. Note that the only memory value that changes is ws->next. */
293
294bool
295gomp_iter_guided_next (long *pstart, long *pend)
296{
297 struct gomp_thread *thr = gomp_thread ();
298 struct gomp_work_share *ws = thr->ts.work_share;
299 struct gomp_team *team = thr->ts.team;
300 unsigned long nthreads = team ? team->nthreads : 1;
301 long start, end, nend, incr;
302 unsigned long chunk_size;
303
150899aa 304 start = __atomic_load_n (&ws->next, MEMMODEL_RELAXED);
1e8e9920 305 end = ws->end;
306 incr = ws->incr;
307 chunk_size = ws->chunk_size;
308
309 while (1)
310 {
311 unsigned long n, q;
312 long tmp;
313
314 if (start == end)
315 return false;
316
5205ccb0 317 n = (end - start) / incr;
1e8e9920 318 q = (n + nthreads - 1) / nthreads;
319
320 if (q < chunk_size)
321 q = chunk_size;
5205ccb0 322 if (__builtin_expect (q <= n, 1))
323 nend = start + q * incr;
324 else
325 nend = end;
1e8e9920 326
327 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
328 if (__builtin_expect (tmp == start, 1))
329 break;
330
331 start = tmp;
332 }
333
334 *pstart = start;
335 *pend = nend;
336 return true;
337}
338#endif /* HAVE_SYNC_BUILTINS */