]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/122365 - deal with bool SLP reductions
authorRichard Biener <rguenther@suse.de>
Tue, 21 Oct 2025 13:40:41 +0000 (15:40 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 22 Oct 2025 06:54:53 +0000 (08:54 +0200)
I hadn't thought of these but at least added an assert which now
tripped.  Fixed thus.  There's also a latent issue with AVX512
mask types.  The by-pieces reduction code used the wrong element
sizes.

PR tree-optimization/122365
* tree-vect-loop.cc (vect_create_epilog_for_reduction):
Convert all inputs.  Use the proper vector element sizes
for the elementwise reduction.

* gcc.dg/vect/vect-reduc-bool-9.c: New testcase.

gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c [new file with mode: 0644]
gcc/tree-vect-loop.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c
new file mode 100644 (file)
index 0000000..4ec141c
--- /dev/null
@@ -0,0 +1,27 @@
+/* PR122365 */
+/* { dg-do compile } */
+
+struct TDTI {
+  float V[4];
+};
+struct TDTI4D {
+  struct TDTI S[];
+};
+void bar();
+struct TDTI4D nii_readParRec_dti4D;
+int nii_readParRec_d_0_0;
+void nii_readParRec() {
+  for (int i;;) {
+    bool v1varies = false, v2varies = false, v3varies = false;
+    for (; i < nii_readParRec_d_0_0; i++) {
+      if (nii_readParRec_dti4D.S[i].V[1])
+        v1varies = true;
+      if (nii_readParRec_dti4D.S[i].V[2])
+        v2varies = true;
+      if (nii_readParRec_dti4D.S[i].V[3])
+        v3varies = true;
+    }
+    if (v1varies || v2varies || v3varies)
+      bar();
+  }
+}
index be684a529db24b5f3cbee93a8d87535077efe743..15cb22023fc1e70efd7a0dd4697f47087e3b70a7 100644 (file)
@@ -5616,13 +5616,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
       && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
       && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
     {
-      gcc_assert (reduc_inputs.length () == 1);
       vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
       gimple_seq stmts = NULL;
-      reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
-                                     reduc_inputs[0],
-                                     build_one_cst (vectype),
-                                     build_zero_cst (vectype));
+      for (unsigned i = 0; i < reduc_inputs.length (); ++i)
+       reduc_inputs[i] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+                                       reduc_inputs[i],
+                                       build_one_cst (vectype),
+                                       build_zero_cst (vectype));
       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
     }
 
@@ -5963,25 +5963,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
              gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
            }
        }
-      if (!slp_reduc
-         && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
+      else if (!slp_reduc
+              && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
        nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
 
-      tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
-                                                          stype, nunits1);
+      tree vectype1 = vectype;
+      if (mode1 != mode)
+       {
+         vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+                                                         stype, nunits1);
+         /* First reduce the vector to the desired vector size we should
+            do shift reduction on by combining upper and lower halves.  */
+         gimple_seq stmts = NULL;
+         new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
+                                                code, &stmts);
+         gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+         reduc_inputs[0] = new_temp;
+       }
+
       reduce_with_shift = have_whole_vector_shift (mode1);
       if (!VECTOR_MODE_P (mode1)
          || !directly_supported_p (code, vectype1))
        reduce_with_shift = false;
 
-      /* First reduce the vector to the desired vector size we should
-        do shift reduction on by combining upper and lower halves.  */
-      gimple_seq stmts = NULL;
-      new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
-                                            code, &stmts);
-      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
-      reduc_inputs[0] = new_temp;
-
       if (reduce_with_shift && (!slp_reduc || group_size == 1))
        {
          tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1));
@@ -6009,7 +6013,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
                             "Reduce using vector shifts\n");
 
          gimple_seq stmts = NULL;
-         new_temp = gimple_convert (&stmts, vectype1, new_temp);
+         new_temp = gimple_convert (&stmts, vectype1, reduc_inputs[0]);
           for (elt_offset = nelements / 2;
                elt_offset >= 1;
                elt_offset /= 2)
@@ -6053,13 +6057,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
                             "Reduce using scalar code.\n");
 
          tree compute_type = TREE_TYPE (vectype1);
-         tree bitsize = TYPE_SIZE (compute_type);
-         int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
-         int element_bitsize = tree_to_uhwi (bitsize);
+         unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
+         unsigned element_bitsize = vector_element_bits (vectype1);
+         tree bitsize = bitsize_int (element_bitsize);
          gimple_seq stmts = NULL;
          FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
             {
-              int bit_offset;
+             unsigned bit_offset;
              new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
                                       vec_temp, bitsize, bitsize_zero_node);