Add the parsing of loop transformations on inner loops of a loop-nest.
gcc/c/ChangeLog:
* c-parser.cc (c_parser_omp_nested_loop_transform_clauses):
Add argument for the level of loop-nest at which the clauses
appear, ...
(c_parser_omp_tile): ... adjust use here,
(c_parser_omp_unroll): ... and here,
(c_parser_omp_for_loop): ... and here. Stop treating loop
transformations like intervening code, parse them, and adjust
the loop-nest depth if necessary for tiling.
gcc/cp/ChangeLog:
* parser.cc (cp_parser_is_pragma): New function.
(cp_parser_omp_nested_loop_transform_clauses):
Add argument for the level of loop-nest at which the clauses
appear, ...
(cp_parser_omp_tile): ... adjust use here,
(cp_parser_omp_unroll): ... and here,
(cp_parser_omp_for_loop): ... and here. Stop treating loop
gcc/testsuite/ChangeLog:
* c-c++-common/gomp/loop-transforms/unroll-inner-1.c: New test.
* c-c++-common/gomp/loop-transforms/unroll-inner-2.c: New test.
libgomp/ChangeLog
* testsuite/libgomp.c++/loop-transforms/tile-1.C: Deleted, replaced by
matrix-* tests.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-1.h:
New header file for new tests.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-constant-iter.h:
Likewise.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-helper.h:
Likewise.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-no-directive-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-no-directive-unroll-full-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-distribute-parallel-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-parallel-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-parallel-masked-taskloop-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-parallel-masked-taskloop-simd-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-target-parallel-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-target-teams-distribute-parallel-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-taskloop-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-omp-teams-distribute-parallel-for-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-simd-1.c:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/matrix-transform-variants-1.h:
New test.
* testsuite/libgomp.c-c++-common/loop-transforms/unroll-non-rect-1.c:
New test.
}
static int c_parser_omp_nested_loop_transform_clauses (c_parser *, tree &, int,
- const char *);
+ int, const char *);
/* Parse the restricted form of loop statements allowed by OpenACC and OpenMP.
The real trick here is to determine the loop control variable early
ordered = collapse;
}
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, collapse,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, collapse,
"loop collapse");
/* Find the depth of the loop nest affected by "omp tile"
else if (bracecount
&& c_parser_next_token_is (parser, CPP_SEMICOLON))
c_parser_consume_token (parser);
+ else if (c_parser_peek_token (parser)->pragma_kind
+ == PRAGMA_OMP_UNROLL
+ || c_parser_peek_token (parser)->pragma_kind
+ == PRAGMA_OMP_TILE)
+ {
+ int depth = c_parser_omp_nested_loop_transform_clauses (
+ parser, clauses, i + 1, count - i - 1, "loop collapse");
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ }
+ }
else
{
c_parser_error (parser, "not enough perfectly nested loops");
fail = true;
count = 0;
break;
- }
+ }
}
while (1);
}
/* Parse zero or more OpenMP loop transformation directives that
- follow another directive that requires a canonical loop nest and
- append all to CLAUSES. Return the nesting depth
- of the transformed loop nest.
+ follow another directive that requires a canonical loop nest,
+ append all to CLAUSES and record the LEVEL at which the clauses
+ appear in the loop nest in each clause.
REQUIRED_DEPTH is the nesting depth of the loop nest required by
the preceding directive. OUTER_DESCR is a description of the
static int
c_parser_omp_nested_loop_transform_clauses (c_parser *parser, tree &clauses,
- int required_depth,
+ int level, int required_depth,
const char *outer_descr)
{
tree c = NULL_TREE;
if (!transformed_depth)
transformed_depth = last_depth;
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, level);
if (!clauses)
clauses = c;
else if (last_c)
return error_mark_node;
int required_depth = list_length (OMP_CLAUSE_TILE_SIZES (clauses));
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, required_depth,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, required_depth,
"outer transformation");
block = c_begin_compound_stmt (true);
tree clauses = c_parser_omp_all_clauses (parser, mask, p_name, false);
int required_depth = 1;
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, required_depth,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, required_depth,
"outer transformation");
if (!clauses)
}
static int cp_parser_omp_nested_loop_transform_clauses (cp_parser *, tree &,
- int, const char *);
+ int, int,
+ const char *);
/* Parse the restricted form of the for statement allowed by OpenMP. */
gcc_assert (oacc_tiling || (collapse >= 1 && ordered >= 0));
count = ordered ? ordered : collapse;
- cp_parser_omp_nested_loop_transform_clauses (parser, clauses, count,
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, count,
"loop collapse");
/* Find the depth of the loop nest affected by "omp tile"
cp_parser_parse_tentatively (parser);
for (;;)
{
- if (cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+ cp_token *tok = cp_lexer_peek_token (parser->lexer);
+ if (cp_parser_is_keyword (tok, RID_FOR))
break;
- else if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+ else if (tok->type == CPP_OPEN_BRACE)
{
cp_lexer_consume_token (parser->lexer);
bracecount++;
}
- else if (bracecount
- && cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
+ else if (bracecount && tok->type == CPP_SEMICOLON)
cp_lexer_consume_token (parser->lexer);
+ else if (cp_parser_pragma_kind (tok) == PRAGMA_OMP_UNROLL
+ || cp_parser_pragma_kind (tok) == PRAGMA_OMP_TILE)
+ {
+ int depth = cp_parser_omp_nested_loop_transform_clauses (
+ parser, clauses, i + 1, count - i - 1, "loop collapse");
+
+ /* Adjust the loop nest depth to the requirements of the
+ loop transformations. The collapse will be reduced
+ to value requested by the "collapse" and "ordered"
+ clauses after the execution of the loop transformations
+ in the middle end. */
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ if (declv)
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ if (orig_declv)
+ declv = grow_tree_vec (orig_declv, count);
+ }
+ }
else
{
- loc = cp_lexer_peek_token (parser->lexer)->location;
+ loc = tok->location;
error_at (loc, "not enough for loops to collapse");
collapse_err = true;
cp_parser_abort_tentative_parse (parser);
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
cp_lexer_consume_token (parser->lexer);
+ else if (cp_parser_pragma_kind (cp_lexer_peek_token (parser->lexer))
+ == PRAGMA_OMP_UNROLL
+ || cp_parser_pragma_kind (cp_lexer_peek_token (parser->lexer))
+ == PRAGMA_OMP_TILE)
+ {
+ int depth =
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses,
+ i + 1, count - i -1,
+ "loop collapse");
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ if (declv)
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ if (orig_declv)
+ declv = grow_tree_vec (orig_declv, count);
+ }
+ }
else
{
if (!collapse_err)
return true;
}
-
/* OpenMP 5.1: Parse sizes list for "omp tile sizes"
sizes ( size-expr-list ) */
static tree
gcc_assert (sizes);
tree c = build_omp_clause (loc, OMP_CLAUSE_TILE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_TILE_SIZES (c) = sizes;
OMP_CLAUSE_TRANSFORM_LEVEL (c)
= build_int_cst (unsigned_type_node, 0);
return error_mark_node;
int required_depth = list_length (OMP_CLAUSE_TILE_SIZES (clauses));
- cp_parser_omp_nested_loop_transform_clauses (
- parser, clauses, required_depth, "outer transformation");
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0,
+ required_depth,
+ "outer transformation");
block = begin_omp_structured_block ();
clauses = finish_omp_clauses (clauses, C_ORT_OMP);
}
/* Parse zero or more OpenMP loop transformation directives that
- follow another directive that requires a canonical loop nest and
- append all to CLAUSES. Return the nesting depth
+ follow another directive that requires a canonical loop nest,
+ append all to CLAUSES, and require the level at which the clause
+ appears in the loop nest in each clause. Return the nesting depth
of the transformed loop nest.
REQUIRED_DEPTH is the nesting depth of the loop nest required by
static int
cp_parser_omp_nested_loop_transform_clauses (cp_parser *parser, tree &clauses,
- int required_depth,
+ int level, int required_depth,
const char *outer_descr)
{
tree c = NULL_TREE;
default:
gcc_unreachable ();
}
- OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c)
+ = build_int_cst (unsigned_type_node, level);
if (depth < last_depth)
{
}
int required_depth = 1;
- cp_parser_omp_nested_loop_transform_clauses (
- parser, clauses, required_depth, "outer transformation");
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0,
+ required_depth,
+ "outer transformation");
block = begin_omp_structured_block ();
ret = cp_parser_omp_for_loop (parser, OMP_LOOP_TRANS, clauses, NULL, if_p);
--- /dev/null
+void test ()
+{
+#pragma omp tile sizes (2,4,6)
+ for (unsigned i = 0; i < 10; i++)
+ for (unsigned j = 0; j < 10; j++)
+ {
+ float intervening_decl = 0; /* { dg-bogus "not enough for loops to collapse" "TODO C/C++ imperfect loop nest handling" { xfail c++ } } */
+ /* { dg-bogus "not enough perfectly nested loops" "TODO C/C++ imperfect loop nest handling" { xfail c } .-1 } */
+#pragma omp unroll partial(2)
+ for (unsigned k = 0; k < 10; k++);
+ }
+}
--- /dev/null
+/* { dg-additional-options "-std=c++11" { target c++} } */
+
+extern void dummy (int);
+
+void
+test ()
+{
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+ #pragma omp unroll partial
+ for (int j = 0; j != 100; ++j)
+ dummy (i);
+}
+
--- /dev/null
+/* { dg-additional-options "-std=c++11" { target c++} } */
+
+extern void dummy (int);
+
+void
+test ()
+{
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2)
+ for (int j = 0; j != 100; ++j)
+ dummy (i);
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2, 3)
+ for (int j = 0; j != 100; ++j)
+ dummy (i); /* { dg-error {not enough for loops to collapse} "" { target c++ } } */
+/* { dg-error {'i' was not declared in this scope} "" { target c++ } .-1 } */
+/* { dg-error {not enough perfectly nested loops before 'dummy'} "" { target c } .-2 } */
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2, 3)
+ for (int j = 0; j != 100; ++j)
+ for (int k = 0; k != 100; ++k)
+ dummy (i);
+}
+
+
--- /dev/null
+extern void dummy (int);
+
+void
+test1 ()
+{
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll partial(2)
+ for (int j = i * 2; j <= i * 4 + 1; ++j)
+ dummy (i);
+
+#pragma omp target parallel for collapse(3)
+ for (int i = -300; i != 100; ++i)
+ for (int j = i; j != i * 2; ++j)
+ #pragma omp unroll partial
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+#pragma omp unroll full
+ for (int i = -300; i != 100; ++i)
+ for (int j = i; j != i * 2; ++j)
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll full
+ for (int j = i; j != i + 10; ++j)
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll full
+ for (int j = i; j != i + 10; ++j)
+ for (int k = j; k != 100; ++k)
+ dummy (i);
+}
+
--- /dev/null
+extern void dummy (int);
+
+void
+test1 ()
+{
+#pragma omp target parallel for collapse(2) /* { dg-error {invalid OpenMP non-rectangular loop step; \'\(1 - 0\) \* 1\' is not a multiple of loop 2 step \'5\'} "" { target c } } */
+ for (int i = -300; i != 100; ++i) /* { dg-error {invalid OpenMP non-rectangular loop step; \'\(1 - 0\) \* 1\' is not a multiple of loop 2 step \'5\'} "" { target c++ } } */
+#pragma omp unroll partial
+ for (int j = 2; j != i; ++j)
+ dummy (i);
+}
+
+void
+test2 ()
+{
+ int i,j;
+#pragma omp target parallel for collapse(2)
+ for (i = -300; i != 100; ++i)
+ #pragma omp unroll partial
+ for (j = 2; j != i; ++j)
+ dummy (i);
+}
+++ /dev/null
-#include <string.h>
-#include <stdio.h>
-#include <math.h>
-
-void
-mult (float *matrix1, float *matrix2, float *result, unsigned dim0,
- unsigned dim1)
-{
- memset (result, 0, sizeof (float) * dim0 * dim1);
-#pragma omp target parallel for collapse(3)
-#pragma omp tile sizes(8, 16, 4)
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- for (unsigned k = 0; k < dim1; k++)
- result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
-}
-
-int
-main ()
-{
- unsigned dim0 = 20;
- unsigned dim1 = 20;
-
- float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
- float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
- float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
-
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- matrix1[i * dim1 + j] = j;
-
- for (unsigned i = 0; i < dim1; i++)
- for (unsigned j = 0; j < dim0; j++)
- if (i == j)
- matrix2[i * dim0 + j] = 1;
- else
- matrix2[i * dim0 + j] = 0;
-
- mult (matrix1, matrix2, result, dim0, dim1);
-
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- {
- if (matrix1[i * dim1 + j] != result[i * dim1 + j])
- {
- printf ("ERROR at %d, %d\n", i, j);
- __builtin_abort ();
- }
- }
-
- return 0;
-}
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void MULT (float *matrix1, float *matrix2, float *result,
+ unsigned dim0, unsigned dim1)
+{
+ unsigned i;
+
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int MAIN ()
+{
+ unsigned dim0 = 20;
+ unsigned dim1 = 20;
+
+ float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result, dim0, dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++) {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j]) {
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ fprintf(stderr, "%s: ERROR at %d, %d\n", __FUNCTION__, i, j);
+ abort();
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void MULT (float *matrix1, float *matrix2, float *result)
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (unsigned i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int MAIN ()
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++) {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j]) {
+ __builtin_printf("%s: error at %d, %d\n", __FUNCTION__, i, j);
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ __builtin_printf("\n");
+ __builtin_abort();
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CAT(x,y) XCAT(x,y)
+#define XCAT(x,y) x ## y
+#define DO_PRAGMA(x) XDO_PRAGMA(x)
+#define XDO_PRAGMA(x) _Pragma (#x)
+
+
+void print_matrix (float *matrix, unsigned dim0, unsigned dim1)
+{
+ for (unsigned i = 0; i < dim0; i++)
+ {
+ for (unsigned j = 0; j < dim1; j++)
+ fprintf (stderr, "%f ", matrix[i * dim1 + j]);
+ fprintf (stderr, "\n");
+ }
+ fprintf (stderr, "\n");
+}
--- /dev/null
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll_partial" 12 "original" } } */
--- /dev/null
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE
+#define COMMON_TOP_TRANSFORM omp unroll full
+#define COLLAPSE_1
+#define COLLAPSE_2
+#define COLLAPSE_3
+#define IMPLEMENTATION_FILE "matrix-constant-iter.h"
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll_full" 13 "original" } } */
--- /dev/null
+#define COMMON_DIRECTIVE "omp teams distribute parallel for"
+#define COLLAPSE_1 "collapse(1)"
+#define COLLAPSE_2 "collapse(2)"
+#define COLLAPSE_3 "collapse(3)"
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp for" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 12 "original" } } */
--- /dev/null
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp parallel" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
--- /dev/null
+#define COMMON_DIRECTIVE omp parallel masked taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+#define COMMON_DIRECTIVE omp parallel masked taskloop simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp target parallel for map(tofrom:result[0:dim0*dim1]) map(to:matrix1[0:dim0*dim1], matrix2[0:dim0*dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp target" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
+/* { dg-final { scan-tree-dump-times "unroll_partial" 12 "original" } } */
--- /dev/null
+#define COMMON_DIRECTIVE omp target teams distribute parallel for map(tofrom:result[:dim0*dim1]) map(to:matrix1[0:dim0*dim1], matrix2[0:dim0*dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+#define COMMON_DIRECTIVE omp taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+#define COMMON_DIRECTIVE omp teams distribute parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+#define COMMON_DIRECTIVE omp simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
--- /dev/null
+#include "matrix-helper.h"
+
+#ifndef COMMON_TOP_TRANSFORM
+#define COMMON_TOP_TRANSFORM
+#endif
+
+#ifndef IMPLEMENTATION_FILE
+#define IMPLEMENTATION_FILE "matrix-1.h"
+#endif
+
+#define FUN_NAME_SUFFIX 1
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(2)") _Pragma("omp tile sizes(10)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 2
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8,16,4)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 3
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 4
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 5
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 6
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(10)") _Pragma("omp unroll partial(2)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 7
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 8
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 9
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 10
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 11
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 12
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2
+#define TRANSFORMATION3 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 13
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7,8)")
+#define TRANSFORMATION3 _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+int main ()
+{
+ main1 ();
+ main2 ();
+ main3 ();
+ main4 ();
+ main5 ();
+ main6 ();
+ main7 ();
+ main8 ();
+ main9 ();
+ main10 ();
+ main11 ();
+ main12 ();
+ main13 ();
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+
+void test1 ()
+{
+ int sum = 0;
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test2 ()
+{
+ int sum = 0;
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test3 ()
+{
+ int sum = 0;
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test4 ()
+{
+ int sum = 0;
+#pragma omp for
+#pragma omp unroll partial(5)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test5 ()
+{
+ int sum = 0;
+#pragma omp parallel for reduction(+:sum)
+#pragma omp unroll partial(2)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test6 ()
+{
+ int sum = 0;
+#pragma omp target parallel for reduction(+:sum)
+#pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test7 ()
+{
+ int sum = 0;
+#pragma omp target teams distribute parallel for reduction(+:sum)
+#pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ test1 ();
+ test2 ();
+ test3 ();
+ test4 ();
+ test5 ();
+ test6 ();
+ test7 ();
+
+ return 0;
+}