]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/114375 - disallow SLP discovery of permuted mask loads
authorRichard Biener <rguenther@suse.de>
Mon, 18 Mar 2024 11:39:03 +0000 (12:39 +0100)
committerRichard Biener <rguenther@suse.de>
Wed, 8 May 2024 12:53:19 +0000 (14:53 +0200)
We cannot currently handle permutations of mask loads in code generation
or permute optimization.  But we simply drop any permutation on the
floor, so the following instead rejects the SLP build rather than
producing wrong-code.  I've also made sure to reject them in
vectorizable_load for completeness.

PR tree-optimization/114375
* tree-vect-slp.cc (vect_build_slp_tree_2): Compute the
load permutation for masked loads but reject it when any
such is necessary.
* tree-vect-stmts.cc (vectorizable_load): Reject masked
VMAT_ELEMENTWISE and VMAT_STRIDED_SLP as those are not
supported.

* gcc.dg/vect/vect-pr114375.c: New testcase.

(cherry picked from commit 94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a)

gcc/testsuite/gcc.dg/vect/vect-pr114375.c [new file with mode: 0644]
gcc/tree-vect-slp.cc
gcc/tree-vect-stmts.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
new file mode 100644 (file)
index 0000000..1e1cb01
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+  for (int i = 0; i < 64; ++i)
+    {
+      int tem = 2, tem2 = 2;
+      if (a[4*i + 1])
+        tem = p[4*i];
+      if (a[4*i])
+        tem2 = p[4*i + 2];
+      b[2*i] = tem2;
+      b[2*i+1] = tem;
+      if (a[4*i + 2])
+        tem = p[4*i + 1];
+      if (a[4*i + 3])
+        tem2 = p[4*i + 3];
+      c[2*i] = tem2;
+      c[2*i+1] = tem;
+    }
+}
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 512; ++i)
+    a[i] = (i >> 1) & 1;
+
+  foo (a);
+
+  if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+      || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+    abort ();
+
+  return 0;
+}
+
index bbc05fac65ece87c4f0d445029a35facfe81883c..c01dc02afff6dfea8e4c0f0e8a9890c2f7b4c56b 100644 (file)
@@ -1780,10 +1780,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     {
-      if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-       gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+       gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
       else
        {
          *max_nunits = this_max_nunits;
@@ -1799,15 +1797,37 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          load_permutation.create (group_size);
          stmt_vec_info first_stmt_info
            = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+         bool any_permute = false;
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
            {
              int load_place = vect_get_place_in_interleaving_chain
                  (load_info, first_stmt_info);
              gcc_assert (load_place != -1);
-             load_permutation.safe_push (load_place);
+             any_permute |= load_place != j;
+             load_permutation.quick_push (load_place);
+           }
+
+         if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
+           {
+             gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+                         || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+                         || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+             load_permutation.release ();
+             /* We cannot handle permuted masked loads, see PR114375.  */
+             if (any_permute
+                 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                     && DR_GROUP_SIZE (first_stmt_info) != group_size)
+                 || STMT_VINFO_STRIDED_P (stmt_info))
+               {
+                 matches[0] = false;
+                 return NULL;
+               }
+           }
+         else
+           {
+             SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+             return node;
            }
-         SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
-         return node;
        }
     }
   else if (gimple_assign_single_p (stmt_info->stmt)
index 6b7dbfd4a231baec24e740ffe0ce0b0bf7a1de6b..e3dea33e04a750961f13ba6bbccacbd3b4038f42 100644 (file)
@@ -9121,6 +9121,14 @@ vectorizable_load (vec_info *vinfo,
                             "unsupported masked emulated gather.\n");
          return false;
        }
+      else if (memory_access_type == VMAT_ELEMENTWISE
+              || memory_access_type == VMAT_STRIDED_SLP)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "unsupported masked strided access.\n");
+         return false;
+       }
     }
 
   if (!vec_stmt) /* transformation not required.  */