]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR middle-end/91272 ([SVE] Use fully-masked loops for CLASTB reductions)
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
Mon, 28 Oct 2019 14:50:58 +0000 (14:50 +0000)
committerPrathamesh Kulkarni <prathamesh3492@gcc.gnu.org>
Mon, 28 Oct 2019 14:50:58 +0000 (14:50 +0000)
2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

PR middle-end/91272
* tree-vect-stmts.c (vectorizable_condition): Support
EXTRACT_LAST_REDUCTION with fully-masked loops.

testsuite/
* gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
* gcc.target/aarch64/sve/clastb_2.c: Likewise.
* gcc.target/aarch64/sve/clastb_3.c: Likewise.
* gcc.target/aarch64/sve/clastb_4.c: Likewise.
* gcc.target/aarch64/sve/clastb_5.c: Likewise.
* gcc.target/aarch64/sve/clastb_6.c: Likewise.
* gcc.target/aarch64/sve/clastb_7.c: Likewise.
* gcc.target/aarch64/sve/clastb_8.c: Likewise.

From-SVN: r277524

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
gcc/tree-vect-stmts.c

index c9daad995d7c028c64c58bf35eeec22d6106433f..f3410ebf730d67f1e72aa4eeb89efe024c34cdbf 100644 (file)
@@ -1,3 +1,9 @@
+2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR middle-end/91272
+       * tree-vect-stmts.c (vectorizable_condition): Support
+       EXTRACT_LAST_REDUCTION with fully-masked loops.
+
 2019-10-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92252
index 5d1ddbe4146519072b67aa6922b35b85b01f9939..f46cc932a198b27c4d03502f33ffd2a4a98d4573 100644 (file)
@@ -1,3 +1,15 @@
+2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR middle-end/91272
+       * gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
+       * gcc.target/aarch64/sve/clastb_2.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_3.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_4.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_5.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_6.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_7.c: Likewise.
+       * gcc.target/aarch64/sve/clastb_8.c: Likewise.
+
 2019-10-28  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92252
index d4f9b0b6a94a4c054fc7ce3dac3236307459a2ee..d3ea52dea475edf67d3a9fd709a9278873f56102 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 2c49bd3b0f0ec0f99fe2c65ca98af88212b9d9b2..c222b70791258042d79ca9b25fe463775dff5f18 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #include <stdint.h>
 
@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 35344f446c666f6e44b8ccd1524cac244dfa6f0a..5aaa71f948dfe657bb2bbefce0d48639beae2bcf 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint8_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\tb[0-9]+, p[0-7], b[0-9]+, z[0-9]+\.b} } } */
index ce58abd6161c133082692f677e4fe8dafb6e64c0..b4db170ea06b6f6e45547c618d263c9b19f31213 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE int16_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
index 2b9783d6627acb90824d4e2337c6c9ed6fd8425e..28d40a01f93ebeedf86a0ef895d2da20ce1dcc6c 100644 (file)
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint64_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
index c47d303f7301f8a9af574e82e157129e080308cd..38632a21be14531e49fb9c43e21746fa0b1958d0 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
index 3345f874a39eb81358c9a1761ac4c333ecf3c192..e5307d2edc848f87633a57d2a7f728673fd76841 100644 (file)
@@ -1,7 +1,8 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE double
 #include "clastb_6.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
index d86a428a7fa2575f080a6ea7232a55098738eb66..583fc8d8d6ddbff3dfe7c575a9297fb0c979ab13 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
 
 #include <stdint.h>
 
@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
 TEST_TYPE (uint32_t);
 TEST_TYPE (uint64_t);
 
+/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
index 19ac82fe4e3c34c3776bd7ef3df40a06660abdb5..36821cec2ee8ee77ab98d93bdf7c042cc6f9d09f 100644 (file)
@@ -10050,16 +10050,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                return false;
            }
        }
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
-         && reduction_type == EXTRACT_LAST_REDUCTION)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't yet use a fully-masked loop for"
-                            " EXTRACT_LAST_REDUCTION.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-       }
       if (expand_vec_cond_expr_p (vectype, comp_vectype,
                                     cond_code))
        {
@@ -10089,31 +10079,31 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* Handle cond expr.  */
   for (j = 0; j < ncopies; j++)
     {
-      tree loop_mask = NULL_TREE;
       bool swap_cond_operands = false;
 
       /* See whether another part of the vectorized code applies a loop
         mask to the condition, or to its inverse.  */
 
+      vec_loop_masks *masks = NULL;
       if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
        {
-         scalar_cond_masked_key cond (cond_expr, ncopies);
-         if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-           {
-             vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-             loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
-           }
+         if (reduction_type == EXTRACT_LAST_REDUCTION)
+           masks = &LOOP_VINFO_MASKS (loop_vinfo);
          else
            {
-             bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
-             cond.code = invert_tree_comparison (cond.code, honor_nans);
+             scalar_cond_masked_key cond (cond_expr, ncopies);
              if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+               masks = &LOOP_VINFO_MASKS (loop_vinfo);
+             else
                {
-                 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-                 loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
-                                                 vectype, j);
-                 cond_code = cond.code;
-                 swap_cond_operands = true;
+                 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+                 cond.code = invert_tree_comparison (cond.code, honor_nans);
+                 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+                   {
+                     masks = &LOOP_VINFO_MASKS (loop_vinfo);
+                     cond_code = cond.code;
+                     swap_cond_operands = true;
+                   }
                }
            }
        }
@@ -10248,28 +10238,10 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
             vec != { 0, ... } (masked in the MASK_LOAD,
             unmasked in the VEC_COND_EXPR).  */
 
-         if (loop_mask)
-           {
-             if (COMPARISON_CLASS_P (vec_compare))
-               {
-                 tree tmp = make_ssa_name (vec_cmp_type);
-                 tree op0 = TREE_OPERAND (vec_compare, 0);
-                 tree op1 = TREE_OPERAND (vec_compare, 1);
-                 gassign *g = gimple_build_assign (tmp,
-                                                   TREE_CODE (vec_compare),
-                                                   op0, op1);
-                 vect_finish_stmt_generation (stmt_info, g, gsi);
-                 vec_compare = tmp;
-               }
-
-             tree tmp2 = make_ssa_name (vec_cmp_type);
-             gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
-                                               vec_compare, loop_mask);
-             vect_finish_stmt_generation (stmt_info, g, gsi);
-             vec_compare = tmp2;
-           }
+         /* Force vec_compare to be an SSA_NAME rather than a comparison,
+            in cases where that's necessary.  */
 
-         if (reduction_type == EXTRACT_LAST_REDUCTION)
+         if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
            {
              if (!is_gimple_val (vec_compare))
                {
@@ -10279,6 +10251,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
                  vec_compare = vec_compare_name;
                }
+
              if (must_invert_cmp_result)
                {
                  tree vec_compare_name = make_ssa_name (vec_cmp_type);
@@ -10288,6 +10261,24 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
                  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
                  vec_compare = vec_compare_name;
                }
+
+             if (masks)
+               {
+                 unsigned vec_num = vec_oprnds0.length ();
+                 tree loop_mask
+                   = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+                                         vectype, vec_num * j + i);
+                 tree tmp2 = make_ssa_name (vec_cmp_type);
+                 gassign *g
+                   = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
+                                          loop_mask);
+                 vect_finish_stmt_generation (stmt_info, g, gsi);
+                 vec_compare = tmp2;
+               }
+           }
+
+         if (reduction_type == EXTRACT_LAST_REDUCTION)
+           {
              gcall *new_stmt = gimple_build_call_internal
                (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
                 vec_then_clause);