+2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
+
+ * tree-parloops.c (create_parallel_loop, gen_parallel_loop)
+ (parallelize_loops): In OpenACC kernels mode, set n_threads to
+ zero.
+ (pass_parallelize_loops::gate): In OpenACC kernels mode, gate on
+ flag_openacc.
+ * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
+
2016-02-23 Richard Biener <rguenther@suse.de>
* mem-stats.h (struct mem_usage): Use PRIu64 for printing size_t.
+2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
+
+ * c-c++-common/goacc/kernels-counter-vars-function-scope.c: Adjust
+ to -ftree-parallelize-loops/-fopenacc changes.
+ * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
+ * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-2.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-3.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-g.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-n.c: Likewise.
+ * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+ * c-c++-common/goacc/kernels-loop.c: Likewise.
+ * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+ * c-c++-common/goacc/kernels-reduction.c: Likewise.
+ * gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
+ * gfortran.dg/goacc/kernels-loops-adjacent.f95: Likewise.
+
2016-02-23 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.target/i386/chkp-hidden-def.c: Require alias support.
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-g" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
/* { dg-additional-options "-fdump-tree-optimized" } */
/* Check that the loop has been split off into a function. */
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
-
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
! { dg-additional-options "-O2" }
-! { dg-additional-options "-ftree-parallelize-loops=32" }
program main
implicit none
! { dg-additional-options "-O2" }
-! { dg-additional-options "-ftree-parallelize-loops=10" }
program main
implicit none
/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
NEW_DATA is the variable that should be initialized from the argument
- of LOOP_FN. N_THREADS is the requested number of threads. */
+ of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
+ that number is to be determined later. */
static void
create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
basic_block paral_bb = single_pred (bb);
gsi = gsi_last_bb (paral_bb);
+ gcc_checking_assert (n_threads != 0);
t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
OMP_CLAUSE_NUM_THREADS_EXPR (t)
= build_int_cst (integer_type_node, n_threads);
}
/* Generates code to execute the iterations of LOOP in N_THREADS
- threads in parallel.
+ threads in parallel, which can be 0 if that number is to be determined
+ later.
NITER describes number of iterations of LOOP.
REDUCTION_LIST describes the reductions existent in the LOOP. */
else
m_p_thread=MIN_PER_THREAD;
+ gcc_checking_assert (n_threads != 0);
many_iterations_cond =
fold_build2 (GE_EXPR, boolean_type_node,
nit, build_int_cst (type, m_p_thread * n_threads));
static bool
parallelize_loops (bool oacc_kernels_p)
{
- unsigned n_threads = flag_tree_parallelize_loops;
+ unsigned n_threads;
bool changed = false;
struct loop *loop;
struct loop *skip_loop = NULL;
if (cfun->has_nonlocal_label)
return false;
+ /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
+ the argument to -ftree-parallelize-loops. */
+ if (oacc_kernels_p)
+ n_threads = 0;
+ else
+ n_threads = flag_tree_parallelize_loops;
+
gcc_obstack_init (&parloop_obstack);
reduction_info_table_type reduction_list (10);
{}
/* opt_pass methods: */
- virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
+ virtual bool gate (function *)
+ {
+ if (oacc_kernels_p)
+ return flag_openacc;
+ else
+ return flag_tree_parallelize_loops > 1;
+ }
virtual unsigned int execute (function *);
opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
void set_pass_param (unsigned int n, bool param)
static bool
gate_oacc_kernels (function *fn)
{
- if (flag_tree_parallelize_loops <= 1)
+ if (!flag_openacc)
return false;
tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
virtual bool gate (function *)
{
return (optimize
- /* Don't bother doing anything if the program has errors. */
- && !seen_error ()
&& flag_openacc
- && flag_tree_parallelize_loops > 1);
+ /* Don't bother doing anything if the program has errors. */
+ && !seen_error ());
}
}; // class pass_ipa_oacc
+2016-02-23 Thomas Schwinge <thomas@codesourcery.com>
+
+ * oacc-parallel.c (GOACC_parallel_keyed): Initialize dims.
+ * plugin/plugin-nvptx.c (nvptx_exec): Provide default values for
+ dims.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Adjust to
+ -ftree-parallelize-loops/-fopenacc changes.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c:
+ Likewise.
+
2016-02-22 Cesar Philippidis <cesar@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/vprop.c: New test.
return;
}
+ /* Default: let the runtime choose. */
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ dims[i] = 0;
+
va_start (ap, kinds);
/* TODO: This will need amending when device_type is implemented. */
while ((tag = va_arg (ap, unsigned)) != 0)
/* Initialize the launch dimensions. Typically this is constant,
provided by the device compiler, but we must permit runtime
values. */
- for (i = 0; i != 3; i++)
- if (targ_fn->launch->dim[i])
- dims[i] = targ_fn->launch->dim[i];
+ int seen_zero = 0;
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ {
+ if (targ_fn->launch->dim[i])
+ dims[i] = targ_fn->launch->dim[i];
+ if (!dims[i])
+ seen_zero = 1;
+ }
+
+ if (seen_zero)
+ {
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ if (!dims[i])
+ dims[i] = /* TODO */ 32;
+ }
/* This reserves a chunk of a pre-allocated page of memory mapped on both
the host and the device. HP is a host pointer to the new chunk, and DP is
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N (1024 * 512)
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N (1024 * 512)
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 32
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 100
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-g" } */
#include "kernels-loop.c"
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N ((1024 * 512) + 1)
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N ((1024 * 512) + 1)
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N 1000
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define N (1024 * 512)
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
#include <stdlib.h>
#define n 10000