Implement bool reduction vectorization

author Richard Biener <rguenther@suse.de>

Thu, 9 Oct 2025 12:03:29 +0000 (14:03 +0200)

committer Richard Biener <rguenth@gcc.gnu.org>

Mon, 20 Oct 2025 08:35:10 +0000 (10:35 +0200)
author Richard Biener <rguenther@suse.de>
Thu, 9 Oct 2025 12:03:29 +0000 (14:03 +0200)
committer Richard Biener <rguenth@gcc.gnu.org>
Mon, 20 Oct 2025 08:35:10 +0000 (10:35 +0200)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-1.c

new file mode 100644 (file)

index 0000000..38aead8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-1.c
@@ -0,0 +1,52 @@
+#include "tree-vect.h"
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-2.c

new file mode 100644 (file)

index 0000000..2949b83
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-2.c
@@ -0,0 +1,52 @@
+#include "tree-vect.h"
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-3.c

new file mode 100644 (file)

index 0000000..893aa4b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-3.c
@@ -0,0 +1,52 @@
+#include "tree-vect.h"
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-4.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-4.c

new file mode 100644 (file)

index 0000000..dc37e06
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-4.c
@@ -0,0 +1,52 @@
+#include "tree-vect.h"
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-5.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-5.c

new file mode 100644 (file)

index 0000000..9bafc09
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-5.c
@@ -0,0 +1,50 @@
+#include "tree-vect.h"
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-6.c

new file mode 100644 (file)

index 0000000..ee1b964
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-6.c
@@ -0,0 +1,50 @@
+#include "tree-vect.h"
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-7.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-7.c

new file mode 100644 (file)

index 0000000..ab5f3ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-7.c
@@ -0,0 +1,50 @@
+#include "tree-vect.h"
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-8.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-8.c

new file mode 100644 (file)

index 0000000..6b0a656
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-8.c
@@ -0,0 +1,50 @@
+#include "tree-vect.h"
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  check_vect ();
+
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc

index 568353ae5bf5df57a45e029db5fd76462698dbe2..de335b1ac6fb8d3b22d054d18870b7772364cfa4 100644 (file)
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3297,6 +3297,28 @@ reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
        }
  }
  
+/* Set *SBOOL_FN to the corresponding function working on vector masks
+   for REDUC_FN.  Return true if that exists, false otherwise.  */
+
+static bool
+sbool_reduction_fn_for_fn (internal_fn reduc_fn, internal_fn *sbool_fn)
+{
+  switch (reduc_fn)
+    {
+    case IFN_REDUC_AND:
+      *sbool_fn = IFN_REDUC_SBOOL_AND;
+      return true;
+    case IFN_REDUC_IOR:
+      *sbool_fn = IFN_REDUC_SBOOL_IOR;
+      return true;
+    case IFN_REDUC_XOR:
+      *sbool_fn = IFN_REDUC_SBOOL_XOR;
+      return true;
+    default:
+      return false;
+    }
+}
+
  /* If there is a neutral value X such that a reduction would not be affected
     by the introduction of additional X elements, return that X, otherwise
     return null.  CODE is the code of the reduction and SCALAR_TYPE is type
@@ -4902,17 +4924,16 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
    if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
      nunits = group_size;
  
+  tree vector_elt_type = TREE_TYPE (vector_type);
    number_of_places_left_in_vector = nunits;
    bool constant_p = true;
    tree_vector_builder elts (vector_type, nunits, 1);
    elts.quick_grow (nunits);
    gimple_seq ctor_seq = NULL;
    if (neutral_op
-      && !useless_type_conversion_p (TREE_TYPE (vector_type),
+      && !useless_type_conversion_p (vector_elt_type,
                                      TREE_TYPE (neutral_op)))
-    neutral_op = gimple_convert (&ctor_seq,
-                                TREE_TYPE (vector_type),
-                                neutral_op);
+    neutral_op = gimple_convert (&ctor_seq, vector_elt_type, neutral_op);
    for (j = 0; j < nunits * number_of_vectors; ++j)
      {
        tree op;
@@ -4924,11 +4945,22 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
         op = neutral_op;
        else
         {
-         if (!useless_type_conversion_p (TREE_TYPE (vector_type),
+         if (!useless_type_conversion_p (vector_elt_type,
                                           TREE_TYPE (initial_values[i])))
-           initial_values[i] = gimple_convert (&ctor_seq,
-                                               TREE_TYPE (vector_type),
-                                               initial_values[i]);
+           {
+             if (VECTOR_BOOLEAN_TYPE_P (vector_type))
+               initial_values[i] = gimple_build (&ctor_seq, COND_EXPR,
+                                                 vector_elt_type,
+                                                 initial_values[i],
+                                                 build_all_ones_cst
+                                                   (vector_elt_type),
+                                                 build_zero_cst
+                                                   (vector_elt_type));
+             else
+               initial_values[i] = gimple_convert (&ctor_seq,
+                                                   vector_elt_type,
+                                                   initial_values[i]);
+           }
           op = initial_values[i];
         }
  
@@ -5549,6 +5581,22 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
    /* Shouldn't be used beyond this point.  */
    exit_bb = nullptr;
  
+  /* If we are operating on a mask vector and do not support direct mask
+     reduction, work on a bool data vector instead of a mask vector.  */
+  if (VECTOR_BOOLEAN_TYPE_P (vectype)
+      && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
+      && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
+    {
+      gcc_assert (reduc_inputs.length () == 1);
+      vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
+      gimple_seq stmts = NULL;
+      reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+                                     reduc_inputs[0],
+                                     build_one_cst (vectype),
+                                     build_zero_cst (vectype));
+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+    }
+
    if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION
        && reduc_fn != IFN_LAST)
      {
@@ -5943,8 +5991,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
  
           new_temp = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype1),
                                    new_temp, bitsize, bitsize_zero_node);
-         new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR,
-                                  scalar_type, new_temp);
+         new_temp = gimple_convert (&stmts, scalar_type, new_temp);
           gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
           scalar_results.safe_push (new_temp);
          }
@@ -7017,15 +7064,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
    tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
    VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out;
  
-  /* We do not handle mask reductions correctly in the epilogue.  */
-  if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "mask reduction not supported.\n");
-      return false;
-    }
-
    gimple_match_op op;
    if (!gimple_extract_op (stmt_info->stmt, &op))
      gcc_unreachable ();
@@ -7343,6 +7381,23 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
        return false;
      }
  
+  /* See if we can convert a mask vector to a corresponding bool data vector
+     to perform the epilogue reduction.  */
+  tree alt_vectype_out = NULL_TREE;
+  if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
+    {
+      alt_vectype_out
+       = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
+                                              TREE_TYPE (vectype_out),
+                                              TYPE_VECTOR_SUBPARTS
+                                                (vectype_out));
+      if (!alt_vectype_out
+         || maybe_ne (TYPE_VECTOR_SUBPARTS (alt_vectype_out),
+                      TYPE_VECTOR_SUBPARTS (vectype_out))
+         || !expand_vec_cond_expr_p (alt_vectype_out, vectype_out))
+       alt_vectype_out = NULL_TREE;
+    }
+
    internal_fn reduc_fn = IFN_LAST;
    if (reduction_type == TREE_CODE_REDUCTION
        || reduction_type == FOLD_LEFT_REDUCTION
@@ -7353,9 +7408,26 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
           ? fold_left_reduction_fn (orig_code, &reduc_fn)
           : reduction_fn_for_scalar_code (orig_code, &reduc_fn))
         {
-         if (reduc_fn != IFN_LAST
-             && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
-                                                 OPTIMIZE_FOR_SPEED))
+         internal_fn sbool_fn = IFN_LAST;
+         if (reduc_fn == IFN_LAST)
+           ;
+         else if ((!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+                   || (GET_MODE_CLASS (TYPE_MODE (vectype_out))
+                       == MODE_VECTOR_BOOL))
+                  && direct_internal_fn_supported_p (reduc_fn, vectype_out,
+                                                     OPTIMIZE_FOR_SPEED))
+           ;
+         else if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+                  && sbool_reduction_fn_for_fn (reduc_fn, &sbool_fn)
+                  && direct_internal_fn_supported_p (sbool_fn, vectype_out,
+                                                     OPTIMIZE_FOR_SPEED))
+           reduc_fn = sbool_fn;
+         else if (reduction_type != FOLD_LEFT_REDUCTION
+                  && alt_vectype_out
+                  && direct_internal_fn_supported_p (reduc_fn, alt_vectype_out,
+                                                     OPTIMIZE_FOR_SPEED))
+           VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+         else
             {
               if (dump_enabled_p ())
                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7372,6 +7444,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
  
           return false;
         }
+      if (reduc_fn == IFN_LAST
+         && VECTOR_BOOLEAN_TYPE_P (vectype_out))
+       {
+         if (!alt_vectype_out)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "cannot turn mask into bool data vector for "
+                                "reduction epilogue.\n");
+             return false;
+           }
+         VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+       }
      }
    else if (reduction_type == COND_REDUCTION)
      {
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc

index c92fbcd143dc731c87ca6103f13f7d9f71600c9d..878a045c4364c4b379b9d223631605c4aac32ca4 100644 (file)
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -7157,13 +7157,14 @@ possible_vector_mask_operation_p (stmt_vec_info stmt_info)
  
  /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
     a vector mask type instead of a normal vector type.  Record the
-   result in STMT_INFO->mask_precision.  */
+   result in STMT_INFO->mask_precision.  Returns true when the
+   precision changed.  */
  
-static void
+static bool
  vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
  {
    if (!possible_vector_mask_operation_p (stmt_info))
-    return;
+    return false;
  
    /* If at least one boolean input uses a vector mask type,
       pick the mask type with the narrowest elements.
@@ -7245,8 +7246,11 @@ vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
           scalar_mode mode;
           tree vectype, mask_type;
           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
-             && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
-             && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
+             /* Do not allow this to set vinfo->vector_mode, this might
+                disrupt the result for the next iteration.  */
+             && (vectype = get_related_vectype_for_scalar_type
+                                               (vinfo->vector_mode, op0_type))
+             && (mask_type = truth_type_for (vectype))
               && expand_vec_cmp_expr_p (vectype, mask_type, code))
             precision = GET_MODE_BITSIZE (mode);
         }
@@ -7272,19 +7276,30 @@ vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
         }
      }
  
-  if (dump_enabled_p ())
+  if (stmt_info->mask_precision != precision)
      {
-      if (precision == ~0U)
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "using normal nonmask vectors for %G",
-                        stmt_info->stmt);
-      else
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "using boolean precision %d for %G",
-                        precision, stmt_info->stmt);
-    }
+      if (dump_enabled_p ())
+       {
+         if (precision == ~0U)
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "using normal nonmask vectors for %G",
+                            stmt_info->stmt);
+         else
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "using boolean precision %d for %G",
+                            precision, stmt_info->stmt);
+       }
  
-  stmt_info->mask_precision = precision;
+      /* ???  We'd like to assert stmt_info->mask_precision == 0
+        || stmt_info->mask_precision > precision, thus that we only
+        decrease mask precisions throughout iteration, but the
+        tcc_comparison handling above means for comparisons of bools
+        we start with 8 but might increase in case the bools get mask
+        precision on their own.  */
+      stmt_info->mask_precision = precision;
+      return true;
+    }
+  return false;
  }
  
  /* Handle vect_determine_precisions for STMT_INFO, given that we
@@ -7317,22 +7332,33 @@ vect_determine_precisions (vec_info *vinfo)
  
    DUMP_VECT_SCOPE ("vect_determine_precisions");
  
-  for (unsigned int i = 0; i < nbbs; i++)
+  /* For mask precisions we have to iterate since otherwise we do not
+     get reduction PHI precision correct.  For now do this only for
+     loop vectorization.  */
+  bool changed;
+  do
      {
-      basic_block bb = bbs[i];
-      for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       {
-         stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
-         if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
-           vect_determine_mask_precision (vinfo, stmt_info);
-       }
-      for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      changed = false;
+      for (unsigned int i = 0; i < nbbs; i++)
         {
-         stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
-         if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
-           vect_determine_mask_precision (vinfo, stmt_info);
+         basic_block bb = bbs[i];
+         for (auto gsi = gsi_start_phis (bb);
+              !gsi_end_p (gsi); gsi_next (&gsi))
+           {
+             stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
+             if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
+               changed |= vect_determine_mask_precision (vinfo, stmt_info);
+           }
+         for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+           {
+             stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
+             if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
+               changed |= vect_determine_mask_precision (vinfo, stmt_info);
+           }
         }
      }
+  while (changed && is_a <loop_vec_info> (vinfo));
+
    for (unsigned int i = 0; i < nbbs; i++)
      {
        basic_block bb = bbs[nbbs - i - 1];
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 4785cbdd61df7d1d8db50dc8cd1f4e2f712a211e..905a29142d3eb8077ab9fb29b3cceb04834848fe 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -855,6 +855,10 @@ public:
    /* The vector type for performing the actual reduction operation.  */
    tree reduc_vectype;
  
+  /* The vector type we should use for the final reduction in the epilogue
+     when we reduce a mask.  */
+  tree reduc_vectype_for_mask;
+
    /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used.  */
    tree induc_cond_initial_val;
  
@@ -888,6 +892,7 @@ typedef class vect_reduc_info_s *vect_reduc_info;
  #define VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL(I) ((I)->induc_cond_initial_val)
  #define VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT(I) ((I)->reduc_epilogue_adjustment)
  #define VECT_REDUC_INFO_VECTYPE(I) ((I)->reduc_vectype)
+#define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)->reduc_vectype_for_mask)
  #define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
  #define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)
author	Richard Biener <rguenther@suse.de>
	Thu, 9 Oct 2025 12:03:29 +0000 (14:03 +0200)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Mon, 20 Oct 2025 08:35:10 +0000 (10:35 +0200)
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-bool-8.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop.cc		patch \| blob \| blame \| history
gcc/tree-vect-patterns.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history