]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
middle-end: lower COND_EXPR into gimple form in vect_recog_bool_pattern
authorTamar Christina <tamar.christina@arm.com>
Sun, 22 Sep 2024 12:38:49 +0000 (13:38 +0100)
committerTamar Christina <tamar.christina@arm.com>
Sun, 22 Sep 2024 12:38:49 +0000 (13:38 +0100)
Currently the vectorizer cheats when lowering COND_EXPR during bool recog.
In the cases where the conditonal is loop invariant or non-boolean it instead
converts the operation back into GENERIC and hides much of the operation from
the analysis part of the vectorizer.

i.e.

  a ? b : c

is transformed into:

  a != 0 ? b : c

however by doing so we can't perform any optimization on the mask as they aren't
explicit until quite late during codegen.

To fix this this patch lowers booleans earlier and so ensures that we are always
in GIMPLE.

For when the value is a loop invariant boolean we have to generate an additional
conversion from bool to the integer mask form.

This is done by creating a loop invariant a ? -1 : 0 with the target mask
precision and then doing a normal != 0 comparison on that.

To support this the patch also adds the ability to during pattern matching
create a loop invariant pattern that won't be seen by the vectorizer and will
instead me materialized inside the loop preheader in the case of loops, or in
the case of BB vectorization it materializes it in the first BB in the region.

gcc/ChangeLog:

* tree-vect-patterns.cc (append_inv_pattern_def_seq): New.
(vect_recog_bool_pattern): Lower COND_EXPRs.
* tree-vect-slp.cc (vect_slp_region): Materialize loop invariant
statements.
* tree-vect-loop.cc (vect_transform_loop): Likewise.
* tree-vect-stmts.cc (vectorizable_comparison_1): Remove
VECT_SCALAR_BOOLEAN_TYPE_P handling for vectype.
* tree-vectorizer.cc (vec_info::vec_info): Initialize
inv_pattern_def_seq.
* tree-vectorizer.h (LOOP_VINFO_INV_PATTERN_DEF_SEQ): New.
(class vec_info): Add inv_pattern_def_seq.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-conditional_store_1.c: New test.
* gcc.dg/vect/vect-conditional_store_5.c: New test.
* gcc.dg/vect/vect-conditional_store_6.c: New test.

gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c [new file with mode: 0644]
gcc/tree-vect-loop.cc
gcc/tree-vect-patterns.cc
gcc/tree-vect-slp.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.cc
gcc/tree-vectorizer.h

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
new file mode 100644 (file)
index 0000000..650a3bf
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_float } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (float *restrict a, int *restrict c)
+{
+#pragma GCC unroll 8
+  for (int i = 0; i < 8; i++)
+    c[i] = a[i] > 1.0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized using SLP" "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
new file mode 100644 (file)
index 0000000..37d60fa
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+#include <stdbool.h>
+
+void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  bool ai = a[0];
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (ai)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c
new file mode 100644 (file)
index 0000000..5e1aedf
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (unsigned long long *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i])
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
index 0f54ebe7dc48d173a620c91b61304db23c271a35..5cd4bdb32e040d83d4ceadaaf94187823f6cf070 100644 (file)
@@ -12356,6 +12356,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
       vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
     }
 
+  /* Generate the loop invariant statements.  */
+  if (!gimple_seq_empty_p (LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo)))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "------>generating loop invariant statements\n");
+      gimple_stmt_iterator gsi;
+      gsi = gsi_after_labels (loop_preheader_edge (loop)->src);
+      gsi_insert_seq_before (&gsi, LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo),
+                            GSI_CONTINUE_LINKING);
+    }
+
   /* FORNOW: the vectorizer supports only loops which body consist
      of one basic block (header + empty latch). When the vectorizer will
      support more involved loop forms, the order by which the BBs are
index a2bf90a575ae84c2ab25dc5a066f0cdb660d7e65..e7e877dd2adb55262822f1660f8d92b42d44e6d0 100644 (file)
@@ -183,6 +183,17 @@ append_pattern_def_seq (vec_info *vinfo,
                                      new_stmt);
 }
 
+
+/* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
+   statements are not vectorized but are materialized as scalar in the loop
+   preheader.  */
+
+static inline void
+append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
+{
+  gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
+}
+
 /* The caller wants to perform new operations on vect_external variable
    VAR, so that the result of the operations would also be vect_external.
    Return the edge on which the operations can be performed, if one exists.
@@ -6055,12 +6066,34 @@ vect_recog_bool_pattern (vec_info *vinfo,
        var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
       else if (integer_type_for_mask (var, vinfo))
        return NULL;
+      else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
+              && !vect_get_internal_def (vinfo, var))
+       {
+         /* If the condition is already a boolean then manually convert it to a
+            mask of the given integer type but don't set a vectype.  */
+         tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
+         pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
+                                             build_all_ones_cst (type),
+                                             build_zero_cst (type));
+         append_inv_pattern_def_seq (vinfo, pattern_stmt);
+         var = lhs_ivar;
+       }
+
+      tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+      pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
+                                         build_zero_cst (TREE_TYPE (var)));
+
+      tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
+      if (!new_vectype)
+       return NULL;
+
+      new_vectype = truth_type_for (new_vectype);
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
+                             TREE_TYPE (var));
 
       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
       pattern_stmt 
-       = gimple_build_assign (lhs, COND_EXPR,
-                              build2 (NE_EXPR, boolean_type_node,
-                                      var, build_int_cst (TREE_TYPE (var), 0)),
+       = gimple_build_assign (lhs, COND_EXPR, lhs_var,
                               gimple_assign_rhs2 (last_stmt),
                               gimple_assign_rhs3 (last_stmt));
       *type_out = vectype;
index 9c817de18bd14e16653882c07c3cdb200206b026..600987dd6e5d506aa5fbb02350f9dab77793d382 100644 (file)
@@ -9159,6 +9159,20 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
 
              vect_location = saved_vect_location;
            }
+
+
+         /* Generate the invariant statements.  */
+         if (!gimple_seq_empty_p (bb_vinfo->inv_pattern_def_seq))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                        "------>generating invariant statements\n");
+
+             gimple_stmt_iterator gsi;
+             gsi = gsi_after_labels (bb_vinfo->bbs[0]);
+             gsi_insert_seq_after (&gsi, bb_vinfo->inv_pattern_def_seq,
+                                   GSI_CONTINUE_LINKING);
+           }
        }
       else
        {
index 45003f762ddfd74e82c068ef5a77ce6e3963b196..b72b54d666879d8485f8d972b4e8d9dc64bc86b3 100644 (file)
@@ -12736,11 +12736,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
   /* Invariant comparison.  */
   if (!vectype)
     {
-      if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
-       vectype = mask_type;
-      else
-       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
-                                              slp_node);
+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
        return false;
     }
index 4279b6db4cfccda759c3dc9c630f999b7e697fc7..d4ab47349a3a80181710698274acfcf80963231f 100644 (file)
@@ -466,7 +466,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
     shared (shared_),
     stmt_vec_info_ro (false),
     bbs (NULL),
-    nbbs (0)
+    nbbs (0),
+    inv_pattern_def_seq (NULL)
 {
   stmt_vec_infos.create (50);
 }
index 53105f9292f78c922925813bca9c5af5263fb4e0..490061aea2f6d465d9589eb97bbd34a920d76b1c 100644 (file)
@@ -512,6 +512,12 @@ public:
   /* The count of the basic blocks in the vectorization region.  */
   unsigned int nbbs;
 
+  /* Used to keep a sequence of def stmts of a pattern stmt that are loop
+    invariant if they exists.
+    The sequence is emitted in the loop preheader should the loop be vectorized
+    and are reset when undoing patterns.  */
+  gimple_seq inv_pattern_def_seq;
+
 private:
   stmt_vec_info new_stmt_vec_info (gimple *stmt);
   void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
@@ -1042,6 +1048,7 @@ public:
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
+#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L)  (L)->inv_pattern_def_seq
 
 #define LOOP_VINFO_FULLY_MASKED_P(L)           \
   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)      \