]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2005-2023 Free Software Foundation, Inc. |
953ff289 DN |
2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
953ff289 DN |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
748086b7 JJ |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
953ff289 DN |
11 | |
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
748086b7 | 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
953ff289 DN |
15 | more details. |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
953ff289 DN |
25 | |
26 | /* This file handles the (bare) PARALLEL construct. */ | |
27 | ||
28 | #include "libgomp.h" | |
a68ab351 | 29 | #include <limits.h> |
953ff289 DN |
30 | |
31 | ||
32 | /* Determine the number of threads to be launched for a PARALLEL construct. | |
a68ab351 | 33 | This algorithm is explicitly described in OpenMP 3.0 section 2.4.1. |
953ff289 DN |
34 | SPECIFIED is a combination of the NUM_THREADS clause and the IF clause. |
35 | If the IF clause is false, SPECIFIED is forced to 1. When NUM_THREADS | |
36 | is not present, SPECIFIED is 0. */ | |
37 | ||
38 | unsigned | |
a68ab351 | 39 | gomp_resolve_num_threads (unsigned specified, unsigned count) |
953ff289 | 40 | { |
acf0174b | 41 | struct gomp_thread *thr = gomp_thread (); |
a68ab351 JJ |
42 | struct gomp_task_icv *icv; |
43 | unsigned threads_requested, max_num_threads, num_threads; | |
acf0174b JJ |
44 | unsigned long busy; |
45 | struct gomp_thread_pool *pool; | |
a68ab351 JJ |
46 | |
47 | icv = gomp_icv (false); | |
48 | ||
953ff289 DN |
49 | if (specified == 1) |
50 | return 1; | |
091ddcc1 AS |
51 | |
52 | if (thr->ts.active_level >= 1 | |
53 | /* Accelerators with fixed thread counts require this to return 1 for | |
54 | nested parallel regions. */ | |
55 | #if !defined(__AMDGCN__) && !defined(__nvptx__) | |
6fae7eda | 56 | && icv->max_active_levels_var <= 1 |
091ddcc1 AS |
57 | #endif |
58 | ) | |
a68ab351 | 59 | return 1; |
6fae7eda | 60 | else if (thr->ts.active_level >= icv->max_active_levels_var) |
953ff289 DN |
61 | return 1; |
62 | ||
63 | /* If NUM_THREADS not specified, use nthreads_var. */ | |
64 | if (specified == 0) | |
a68ab351 JJ |
65 | threads_requested = icv->nthreads_var; |
66 | else | |
67 | threads_requested = specified; | |
68 | ||
69 | max_num_threads = threads_requested; | |
953ff289 DN |
70 | |
71 | /* If dynamic threads are enabled, bound the number of threads | |
72 | that we launch. */ | |
a68ab351 | 73 | if (icv->dyn_var) |
953ff289 DN |
74 | { |
75 | unsigned dyn = gomp_dynamic_max_threads (); | |
a68ab351 JJ |
76 | if (dyn < max_num_threads) |
77 | max_num_threads = dyn; | |
78 | ||
79 | /* Optimization for parallel sections. */ | |
80 | if (count && count < max_num_threads) | |
81 | max_num_threads = count; | |
953ff289 DN |
82 | } |
83 | ||
acf0174b JJ |
84 | /* UINT_MAX stands for infinity. */ |
85 | if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1) | |
a68ab351 JJ |
86 | || max_num_threads == 1) |
87 | return max_num_threads; | |
88 | ||
acf0174b JJ |
89 | /* The threads_busy counter lives in thread_pool, if there |
90 | isn't a thread_pool yet, there must be just one thread | |
91 | in the contention group. If thr->team is NULL, this isn't | |
92 | nested parallel, so there is just one thread in the | |
93 | contention group as well, no need to handle it atomically. */ | |
94 | pool = thr->thread_pool; | |
e4606348 | 95 | if (thr->ts.team == NULL || pool == NULL) |
acf0174b JJ |
96 | { |
97 | num_threads = max_num_threads; | |
98 | if (num_threads > icv->thread_limit_var) | |
99 | num_threads = icv->thread_limit_var; | |
100 | if (pool) | |
101 | pool->threads_busy = num_threads; | |
102 | return num_threads; | |
103 | } | |
104 | ||
a68ab351 JJ |
105 | #ifdef HAVE_SYNC_BUILTINS |
106 | do | |
107 | { | |
acf0174b | 108 | busy = pool->threads_busy; |
a68ab351 | 109 | num_threads = max_num_threads; |
acf0174b JJ |
110 | if (icv->thread_limit_var - busy + 1 < num_threads) |
111 | num_threads = icv->thread_limit_var - busy + 1; | |
a68ab351 | 112 | } |
acf0174b JJ |
113 | while (__sync_val_compare_and_swap (&pool->threads_busy, |
114 | busy, busy + num_threads - 1) | |
115 | != busy); | |
a68ab351 | 116 | #else |
acf0174b | 117 | gomp_mutex_lock (&gomp_managed_threads_lock); |
a68ab351 | 118 | num_threads = max_num_threads; |
acf0174b JJ |
119 | busy = pool->threads_busy; |
120 | if (icv->thread_limit_var - busy + 1 < num_threads) | |
121 | num_threads = icv->thread_limit_var - busy + 1; | |
122 | pool->threads_busy += num_threads - 1; | |
123 | gomp_mutex_unlock (&gomp_managed_threads_lock); | |
a68ab351 JJ |
124 | #endif |
125 | ||
126 | return num_threads; | |
953ff289 DN |
127 | } |
128 | ||
129 | void | |
130 | GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) | |
131 | { | |
a68ab351 | 132 | num_threads = gomp_resolve_num_threads (num_threads, 0); |
28567c40 JJ |
133 | gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads), |
134 | NULL); | |
953ff289 DN |
135 | } |
136 | ||
137 | void | |
138 | GOMP_parallel_end (void) | |
139 | { | |
acf0174b JJ |
140 | struct gomp_task_icv *icv = gomp_icv (false); |
141 | if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0)) | |
a68ab351 JJ |
142 | { |
143 | struct gomp_thread *thr = gomp_thread (); | |
144 | struct gomp_team *team = thr->ts.team; | |
acf0174b JJ |
145 | unsigned int nthreads = team ? team->nthreads : 1; |
146 | gomp_team_end (); | |
147 | if (nthreads > 1) | |
a68ab351 | 148 | { |
acf0174b JJ |
149 | /* If not nested, there is just one thread in the |
150 | contention group left, no need for atomicity. */ | |
151 | if (thr->ts.team == NULL) | |
152 | thr->thread_pool->threads_busy = 1; | |
153 | else | |
154 | { | |
a68ab351 | 155 | #ifdef HAVE_SYNC_BUILTINS |
acf0174b JJ |
156 | __sync_fetch_and_add (&thr->thread_pool->threads_busy, |
157 | 1UL - nthreads); | |
a68ab351 | 158 | #else |
acf0174b JJ |
159 | gomp_mutex_lock (&gomp_managed_threads_lock); |
160 | thr->thread_pool->threads_busy -= nthreads - 1; | |
161 | gomp_mutex_unlock (&gomp_managed_threads_lock); | |
a68ab351 | 162 | #endif |
acf0174b | 163 | } |
a68ab351 JJ |
164 | } |
165 | } | |
acf0174b JJ |
166 | else |
167 | gomp_team_end (); | |
168 | } | |
169 | ialias (GOMP_parallel_end) | |
170 | ||
171 | void | |
28567c40 JJ |
172 | GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, |
173 | unsigned int flags) | |
acf0174b JJ |
174 | { |
175 | num_threads = gomp_resolve_num_threads (num_threads, 0); | |
28567c40 JJ |
176 | gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), |
177 | NULL); | |
acf0174b JJ |
178 | fn (data); |
179 | ialias_call (GOMP_parallel_end) (); | |
180 | } | |
181 | ||
28567c40 JJ |
182 | unsigned |
183 | GOMP_parallel_reductions (void (*fn) (void *), void *data, | |
184 | unsigned num_threads, unsigned int flags) | |
185 | { | |
186 | struct gomp_taskgroup *taskgroup; | |
187 | num_threads = gomp_resolve_num_threads (num_threads, 0); | |
188 | uintptr_t *rdata = *(uintptr_t **)data; | |
189 | taskgroup = gomp_parallel_reduction_register (rdata, num_threads); | |
190 | gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), | |
191 | taskgroup); | |
192 | fn (data); | |
193 | ialias_call (GOMP_parallel_end) (); | |
194 | gomp_sem_destroy (&taskgroup->taskgroup_sem); | |
195 | free (taskgroup); | |
196 | return num_threads; | |
197 | } | |
198 | ||
acf0174b JJ |
199 | bool |
200 | GOMP_cancellation_point (int which) | |
201 | { | |
202 | if (!gomp_cancel_var) | |
203 | return false; | |
204 | ||
205 | struct gomp_thread *thr = gomp_thread (); | |
206 | struct gomp_team *team = thr->ts.team; | |
207 | if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS)) | |
208 | { | |
209 | if (team == NULL) | |
210 | return false; | |
211 | return team->work_share_cancelled != 0; | |
212 | } | |
213 | else if (which & GOMP_CANCEL_TASKGROUP) | |
214 | { | |
28567c40 JJ |
215 | if (thr->task->taskgroup) |
216 | { | |
217 | if (thr->task->taskgroup->cancelled) | |
218 | return true; | |
219 | if (thr->task->taskgroup->workshare | |
220 | && thr->task->taskgroup->prev | |
221 | && thr->task->taskgroup->prev->cancelled) | |
222 | return true; | |
223 | } | |
acf0174b JJ |
224 | /* FALLTHRU into the GOMP_CANCEL_PARALLEL case, |
225 | as #pragma omp cancel parallel also cancels all explicit | |
226 | tasks. */ | |
227 | } | |
228 | if (team) | |
229 | return gomp_team_barrier_cancelled (&team->barrier); | |
230 | return false; | |
953ff289 | 231 | } |
acf0174b JJ |
232 | ialias (GOMP_cancellation_point) |
233 | ||
234 | bool | |
235 | GOMP_cancel (int which, bool do_cancel) | |
236 | { | |
237 | if (!gomp_cancel_var) | |
238 | return false; | |
239 | ||
240 | if (!do_cancel) | |
241 | return ialias_call (GOMP_cancellation_point) (which); | |
953ff289 | 242 | |
acf0174b JJ |
243 | struct gomp_thread *thr = gomp_thread (); |
244 | struct gomp_team *team = thr->ts.team; | |
245 | if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS)) | |
246 | { | |
247 | /* In orphaned worksharing region, all we want to cancel | |
248 | is current thread. */ | |
249 | if (team != NULL) | |
250 | team->work_share_cancelled = 1; | |
251 | return true; | |
252 | } | |
253 | else if (which & GOMP_CANCEL_TASKGROUP) | |
254 | { | |
28567c40 | 255 | if (thr->task->taskgroup) |
acf0174b | 256 | { |
28567c40 JJ |
257 | struct gomp_taskgroup *taskgroup = thr->task->taskgroup; |
258 | if (taskgroup->workshare && taskgroup->prev) | |
259 | taskgroup = taskgroup->prev; | |
260 | if (!taskgroup->cancelled) | |
261 | { | |
262 | gomp_mutex_lock (&team->task_lock); | |
263 | taskgroup->cancelled = true; | |
264 | gomp_mutex_unlock (&team->task_lock); | |
265 | } | |
acf0174b JJ |
266 | } |
267 | return true; | |
268 | } | |
269 | team->team_cancelled = 1; | |
270 | gomp_team_barrier_cancel (team); | |
271 | return true; | |
272 | } | |
953ff289 DN |
273 | \f |
274 | /* The public OpenMP API for thread and team related inquiries. */ | |
275 | ||
276 | int | |
277 | omp_get_num_threads (void) | |
278 | { | |
279 | struct gomp_team *team = gomp_thread ()->ts.team; | |
280 | return team ? team->nthreads : 1; | |
281 | } | |
282 | ||
953ff289 | 283 | int |
a68ab351 | 284 | omp_get_thread_num (void) |
953ff289 | 285 | { |
a68ab351 | 286 | return gomp_thread ()->ts.team_id; |
953ff289 DN |
287 | } |
288 | ||
a68ab351 JJ |
289 | /* This wasn't right for OpenMP 2.5. Active region used to be non-zero |
290 | when the IF clause doesn't evaluate to false, starting with OpenMP 3.0 | |
291 | it is non-zero with more than one thread in the team. */ | |
292 | ||
953ff289 | 293 | int |
a68ab351 | 294 | omp_in_parallel (void) |
953ff289 | 295 | { |
a68ab351 | 296 | return gomp_thread ()->ts.active_level > 0; |
953ff289 DN |
297 | } |
298 | ||
a68ab351 JJ |
299 | int |
300 | omp_get_level (void) | |
301 | { | |
302 | return gomp_thread ()->ts.level; | |
303 | } | |
953ff289 | 304 | |
a68ab351 JJ |
305 | int |
306 | omp_get_ancestor_thread_num (int level) | |
953ff289 | 307 | { |
a68ab351 JJ |
308 | struct gomp_team_state *ts = &gomp_thread ()->ts; |
309 | if (level < 0 || level > ts->level) | |
310 | return -1; | |
311 | for (level = ts->level - level; level > 0; --level) | |
312 | ts = &ts->team->prev_ts; | |
313 | return ts->team_id; | |
314 | } | |
953ff289 | 315 | |
a68ab351 JJ |
316 | int |
317 | omp_get_team_size (int level) | |
318 | { | |
319 | struct gomp_team_state *ts = &gomp_thread ()->ts; | |
320 | if (level < 0 || level > ts->level) | |
321 | return -1; | |
322 | for (level = ts->level - level; level > 0; --level) | |
323 | ts = &ts->team->prev_ts; | |
324 | if (ts->team == NULL) | |
325 | return 1; | |
326 | else | |
327 | return ts->team->nthreads; | |
328 | } | |
953ff289 | 329 | |
a68ab351 JJ |
330 | int |
331 | omp_get_active_level (void) | |
332 | { | |
333 | return gomp_thread ()->ts.active_level; | |
953ff289 DN |
334 | } |
335 | ||
336 | ialias (omp_get_num_threads) | |
953ff289 DN |
337 | ialias (omp_get_thread_num) |
338 | ialias (omp_in_parallel) | |
a68ab351 JJ |
339 | ialias (omp_get_level) |
340 | ialias (omp_get_ancestor_thread_num) | |
341 | ialias (omp_get_team_size) | |
342 | ialias (omp_get_active_level) |