From: Richard Biener Date: Tue, 21 Oct 2025 13:40:41 +0000 (+0200) Subject: tree-optimization/122365 - deal with bool SLP reductions X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=aadc94947665a4b9d9cd35a55b1d60f5d0482415;p=thirdparty%2Fgcc.git tree-optimization/122365 - deal with bool SLP reductions I hadn't thought of these but at least added an assert which now tripped. Fixed thus. There's also a latent issue with AVX512 mask types. The by-pieces reduction code used the wrong element sizes. PR tree-optimization/122365 * tree-vect-loop.cc (vect_create_epilog_for_reduction): Convert all inputs. Use the proper vector element sizes for the elementwise reduction. * gcc.dg/vect/vect-reduc-bool-9.c: New testcase. --- diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c new file mode 100644 index 00000000000..4ec141c5e69 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c @@ -0,0 +1,27 @@ +/* PR122365 */ +/* { dg-do compile } */ + +struct TDTI { + float V[4]; +}; +struct TDTI4D { + struct TDTI S[]; +}; +void bar(); +struct TDTI4D nii_readParRec_dti4D; +int nii_readParRec_d_0_0; +void nii_readParRec() { + for (int i;;) { + bool v1varies = false, v2varies = false, v3varies = false; + for (; i < nii_readParRec_d_0_0; i++) { + if (nii_readParRec_dti4D.S[i].V[1]) + v1varies = true; + if (nii_readParRec_dti4D.S[i].V[2]) + v2varies = true; + if (nii_readParRec_dti4D.S[i].V[3]) + v3varies = true; + } + if (v1varies || v2varies || v3varies) + bar(); + } +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index be684a529db..15cb22023fc 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -5616,13 +5616,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)) { - gcc_assert (reduc_inputs.length () == 1); vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info); gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype, - reduc_inputs[0], - build_one_cst (vectype), - build_zero_cst (vectype)); + for (unsigned i = 0; i < reduc_inputs.length (); ++i) + reduc_inputs[i] = gimple_build (&stmts, VEC_COND_EXPR, vectype, + reduc_inputs[i], + build_one_cst (vectype), + build_zero_cst (vectype)); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } @@ -5963,25 +5963,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits); } } - if (!slp_reduc - && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) + else if (!slp_reduc + && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) nunits1 = GET_MODE_NUNITS (mode1).to_constant (); - tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), - stype, nunits1); + tree vectype1 = vectype; + if (mode1 != mode) + { + vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), + stype, nunits1); + /* First reduce the vector to the desired vector size we should + do shift reduction on by combining upper and lower halves. */ + gimple_seq stmts = NULL; + new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1, + code, &stmts); + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); + reduc_inputs[0] = new_temp; + } + reduce_with_shift = have_whole_vector_shift (mode1); if (!VECTOR_MODE_P (mode1) || !directly_supported_p (code, vectype1)) reduce_with_shift = false; - /* First reduce the vector to the desired vector size we should - do shift reduction on by combining upper and lower halves. */ - gimple_seq stmts = NULL; - new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1, - code, &stmts); - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - reduc_inputs[0] = new_temp; - if (reduce_with_shift && (!slp_reduc || group_size == 1)) { tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1)); @@ -6009,7 +6013,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using vector shifts\n"); gimple_seq stmts = NULL; - new_temp = gimple_convert (&stmts, vectype1, new_temp); + new_temp = gimple_convert (&stmts, vectype1, reduc_inputs[0]); for (elt_offset = nelements / 2; elt_offset >= 1; elt_offset /= 2) @@ -6053,13 +6057,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using scalar code.\n"); tree compute_type = TREE_TYPE (vectype1); - tree bitsize = TYPE_SIZE (compute_type); - int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); - int element_bitsize = tree_to_uhwi (bitsize); + unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); + unsigned element_bitsize = vector_element_bits (vectype1); + tree bitsize = bitsize_int (element_bitsize); gimple_seq stmts = NULL; FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp) { - int bit_offset; + unsigned bit_offset; new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type, vec_temp, bitsize, bitsize_zero_node);