tree-optimization/114375 - disallow SLP discovery of permuted mask loads

author Richard Biener <rguenther@suse.de>

Mon, 18 Mar 2024 11:39:03 +0000 (12:39 +0100)

committer Richard Biener <rguenther@suse.de>

Tue, 19 Mar 2024 08:02:17 +0000 (09:02 +0100)
author Richard Biener <rguenther@suse.de>
Mon, 18 Mar 2024 11:39:03 +0000 (12:39 +0100)
committer Richard Biener <rguenther@suse.de>
Tue, 19 Mar 2024 08:02:17 +0000 (09:02 +0100)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c

new file mode 100644 (file)

index 0000000..1e1cb01
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+  for (int i = 0; i < 64; ++i)
+    {
+      int tem = 2, tem2 = 2;
+      if (a[4*i + 1])
+        tem = p[4*i];
+      if (a[4*i])
+        tem2 = p[4*i + 2];
+      b[2*i] = tem2;
+      b[2*i+1] = tem;
+      if (a[4*i + 2])
+        tem = p[4*i + 1];
+      if (a[4*i + 3])
+        tem2 = p[4*i + 3];
+      c[2*i] = tem2;
+      c[2*i+1] = tem;
+    }
+}
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 512; ++i)
+    a[i] = (i >> 1) & 1;
+
+  foo (a);
+
+  if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+      || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc

index 527b06c9f9c86c16992de8d5e7d93a78c4af356d..23f9593191ada302a80f77d6deb448f3893aaf56 100644 (file)
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1921,12 +1921,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
    if (STMT_VINFO_DATA_REF (stmt_info)
        && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
      {
-      if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-       gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD));
-      else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
         gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
        else
         {
@@ -1943,19 +1938,43 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
           load_permutation.create (group_size);
           stmt_vec_info first_stmt_info
             = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+         bool any_permute = false;
           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
             {
               int load_place;
               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
                 load_place = vect_get_place_in_interleaving_chain
-                               (load_info, first_stmt_info);
+                   (load_info, first_stmt_info);
               else
                 load_place = 0;
               gcc_assert (load_place != -1);
-             load_permutation.safe_push (load_place);
+             any_permute |= load_place != j;
+             load_permutation.quick_push (load_place);
+           }
+
+         if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
+           {
+             gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+                         || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+                         || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+                         || gimple_call_internal_p (stmt,
+                                                    IFN_MASK_LEN_GATHER_LOAD));
+             load_permutation.release ();
+             /* We cannot handle permuted masked loads, see PR114375.  */
+             if (any_permute
+                 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                     && DR_GROUP_SIZE (first_stmt_info) != group_size)
+                 || STMT_VINFO_STRIDED_P (stmt_info))
+               {
+                 matches[0] = false;
+                 return NULL;
+               }
+           }
+         else
+           {
+             SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+             return node;
             }
-         SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
-         return node;
         }
      }
    else if (gimple_assign_single_p (stmt_info->stmt)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index e8617439a480485021178c3ea1531cb3e8cd5542..5a4eb136c6d9bf4557b57a766f6b90e85ceab49b 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10080,6 +10080,14 @@ vectorizable_load (vec_info *vinfo,
                              "unsupported masked emulated gather.\n");
           return false;
         }
+      else if (memory_access_type == VMAT_ELEMENTWISE
+              || memory_access_type == VMAT_STRIDED_SLP)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "unsupported masked strided access.\n");
+         return false;
+       }
      }
  
    bool costing_p = !vec_stmt;
author	Richard Biener <rguenther@suse.de>
	Mon, 18 Mar 2024 11:39:03 +0000 (12:39 +0100)
committer	Richard Biener <rguenther@suse.de>
	Tue, 19 Mar 2024 08:02:17 +0000 (09:02 +0100)
gcc/testsuite/gcc.dg/vect/vect-pr114375.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-slp.cc		patch \| blob \| blame \| history
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history