s = C_OMP_CLAUSE_SPLIT_TARGET;
break;
case OMP_CLAUSE_NUM_TEAMS:
- case OMP_CLAUSE_THREAD_LIMIT:
s = C_OMP_CLAUSE_SPLIT_TEAMS;
break;
case OMP_CLAUSE_DIST_SCHEDULE:
else
s = C_OMP_CLAUSE_SPLIT_FOR;
break;
+ /* thread_limit is allowed on target and teams. Distribute it
+ to all. */
+ case OMP_CLAUSE_THREAD_LIMIT:
+ if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_MAP))
+ != 0)
+ {
+ if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS))
+ != 0)
+ {
+ c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
+ OMP_CLAUSE_THREAD_LIMIT);
+ OMP_CLAUSE_THREAD_LIMIT_EXPR (c)
+ = OMP_CLAUSE_THREAD_LIMIT_EXPR (clauses);
+ OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_TARGET];
+ cclauses[C_OMP_CLAUSE_SPLIT_TARGET] = c;
+ }
+ else
+ {
+ s = C_OMP_CLAUSE_SPLIT_TARGET;
+ break;
+ }
+ }
+ s = C_OMP_CLAUSE_SPLIT_TEAMS;
+ break;
/* Allocate clause is allowed on target, teams, distribute, parallel,
for, sections and taskloop. Distribute it to all. */
case OMP_CLAUSE_ALLOCATE:
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
static bool
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
static bool
if (!DECL_P (expr) && TREE_CODE (expr) != TARGET_EXPR)
OMP_CLAUSE_OPERAND (c, 0) = *p;
}
- c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
- OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
- OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
- OMP_TARGET_CLAUSES (target) = c;
+ if (!omp_find_clause (OMP_TARGET_CLAUSES (target), OMP_CLAUSE_THREAD_LIMIT))
+ {
+ c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
+ OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
+ OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
+ OMP_TARGET_CLAUSES (target) = c;
+ }
c = build_omp_clause (num_teams_loc, OMP_CLAUSE_NUM_TEAMS);
OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c) = num_teams_upper;
OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (c) = num_teams_lower;
if (tid == 0)
{
gomp_global_icv.nthreads_var = ntids;
+ gomp_global_icv.thread_limit_var = ntids;
/* Starting additional threads is not supported. */
gomp_global_icv.dyn_var = true;
static void
gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
- struct gomp_device_descr *devicep)
+ struct gomp_device_descr *devicep, void **args)
{
struct gomp_thread old_thr, *thr = gomp_thread ();
thr->place = old_thr.place;
thr->ts.place_partition_len = gomp_places_list_len;
}
+ if (args)
+ while (*args)
+ {
+ intptr_t id = (intptr_t) *args++, val;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ val = (intptr_t) *args++;
+ else
+ val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
+ if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
+ continue;
+ id &= GOMP_TARGET_ARG_ID_MASK;
+ if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
+ continue;
+ val = val > INT_MAX ? INT_MAX : val;
+ if (val)
+ gomp_icv (true)->thread_limit_var = val;
+ break;
+ }
+
fn (hostaddrs);
gomp_free_thread (thr);
*thr = old_thr;
/* All shared memory devices should use the GOMP_target_ext function. */
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
- return gomp_target_fallback (fn, hostaddrs, devicep);
+ return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
htab_t refcount_set = htab_create (mapnum);
struct target_mem_desc *tgt_vars
tgt_align, tgt_size);
}
}
- gomp_target_fallback (fn, hostaddrs, devicep);
+ gomp_target_fallback (fn, hostaddrs, devicep, args);
return;
}
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
ttask->state = GOMP_TARGET_TASK_FALLBACK;
- gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
+ gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
+ ttask->args);
return false;
}
size_t depend_size = 0;
uintptr_t depend_cnt = 0;
size_t tgt_align = 0, tgt_size = 0;
+ uintptr_t args_cnt = 0;
if (depend != NULL)
{
tgt_size += tgt_align - 1;
else
tgt_size = 0;
+ if (args)
+ {
+ void **cargs = args;
+ while (*cargs)
+ {
+ intptr_t id = (intptr_t) *cargs++;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ cargs++;
+ }
+ args_cnt = cargs + 1 - args;
+ }
}
task = gomp_malloc (sizeof (*task) + depend_size
+ sizeof (*ttask)
+ + args_cnt * sizeof (void *)
+ mapnum * (sizeof (void *) + sizeof (size_t)
+ sizeof (unsigned short))
+ tgt_size);
ttask->devicep = devicep;
ttask->fn = fn;
ttask->mapnum = mapnum;
- ttask->args = args;
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
- ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+ if (args_cnt)
+ {
+ ttask->args = (void **) &ttask->hostaddrs[mapnum];
+ memcpy (ttask->args, args, args_cnt * sizeof (void *));
+ ttask->sizes = (size_t *) &ttask->args[args_cnt];
+ }
+ else
+ {
+ ttask->args = args;
+ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+ }
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
--- /dev/null
+#include <omp.h>
+#include <stdlib.h>
+
+void
+foo ()
+{
+ {
+ #pragma omp target parallel nowait thread_limit (4) num_threads (1)
+ if (omp_get_thread_limit () > 4)
+ abort ();
+ }
+ #pragma omp taskwait
+}
+
+int
+main ()
+{
+ #pragma omp target thread_limit (6)
+ if (omp_get_thread_limit () > 6)
+ abort ();
+ foo ();
+ return 0;
+}