From: Juergen Christ Date: Thu, 5 Feb 2026 10:42:45 +0000 (+0100) Subject: tree-optimization/122297 - fix load/store bias handling X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2291c5c4471040acd3da83b5e2cbe56d7b4720e9;p=thirdparty%2Fgcc.git tree-optimization/122297 - fix load/store bias handling When load/store with length is used and only QImode versions are available, vectorizable_live_operation produces wrong results for VEC_EXTRACT. Provide a flag to vect_get_loop_len to specify if bias-adjusted length should be used or not. gcc/ChangeLog: PR tree-optimization/122297 * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust. (vectorizable_induction): Adjust. (vectorizable_live_operation_1): Adjust. (vect_get_loop_len): Provide parameter to select bias-adjusted length. (vect_gen_loop_len_mask): Adjust. (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust. * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust. (vectorizable_call): Adjust. (vectorizable_operation): Adjust. (vectorizable_store): Adjust. (vectorizable_load): Adjust. (vectorizable_condition): Adjust. * tree-vectorizer.h (vect_get_loop_len): Add parameter. gcc/testsuite/ChangeLog: PR tree-optimization/122297 * gcc.dg/vect/nodump-extractlast-1.c: Fix typo. * gcc.dg/vect/nodump-extractlast-2.c: New test. Signed-off-by: Juergen Christ --- diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c index 980ac3e4218..83d8a38f13e 100644 --- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c @@ -1,4 +1,4 @@ -/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when using VEC_EXTRACT. */ /* { dg-require-effective-target vect_int } */ diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c new file mode 100644 index 00000000000..9697687c108 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c @@ -0,0 +1,23 @@ +/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when + using VEC_EXTRACT. This variant uses .LEN_LOAD which might use QImode + vectors during load, but SImode vectors for the extraction. */ +int __attribute__ ((noinline, noclone)) +test_int (int *x, int n, int value) +{ + int last; + for (int j = 0; j < n; ++j) + { + last = x[j]; + x[j] = last * value; + } + return last; +} + +int +main () +{ + int arr[] = {1,2,3,4,5,1}; + if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1) + __builtin_abort(); + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0947962fcf2..a7daeb72a5c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -6544,7 +6544,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) { len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in, - i, 1); + i, 1, false); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); bias = build_int_cst (intQI_type_node, biasval); if (!is_cond_op) @@ -9938,7 +9938,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, _21 = vect_vec_iv_.6_22 + vect_cst__22; */ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, - vectype, 0, 0); + vectype, 0, 0, false); if (SCALAR_FLOAT_TYPE_P (stept)) expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); else @@ -10085,7 +10085,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb, { /* Emit: - SCALAR_RES = VEC_EXTRACT + SCALAR_RES = VEC_EXTRACT where VEC_LHS is the vectorized live-out result, LEN is the length of the vector, BIAS is the load-store bias. The bias should not be used @@ -10096,21 +10096,14 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb, gimple_stmt_iterator gsi = gsi_last (tem); tree len = vect_get_loop_len (loop_vinfo, &gsi, &LOOP_VINFO_LENS (loop_vinfo), - 1, vectype, 0, 1); + 1, vectype, 0, 1, false); gimple_seq_add_seq (&stmts, tem); - /* BIAS + 1. */ - signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); - tree bias_plus_one - = int_const_binop (PLUS_EXPR, - build_int_cst (TREE_TYPE (len), biasval), - build_one_cst (TREE_TYPE (len))); - - /* LAST_INDEX = LEN - (BIAS + 1). */ + /* LAST_INDEX = LEN - 1. */ tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len), - len, bias_plus_one); + len, build_one_cst (TREE_TYPE (len))); - /* SCALAR_RES = VEC_EXTRACT . */ + /* SCALAR_RES = VEC_EXTRACT . */ tree scalar_res = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype), vec_lhs_phi, last_index); @@ -10731,7 +10724,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens, tree vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, vec_loop_lens *lens, unsigned int nvectors, tree vectype, - unsigned int index, unsigned int factor) + unsigned int index, unsigned int factor, bool adjusted) { rgroup_controls *rgl = &(*lens)[nvectors - 1]; bool use_bias_adjusted_len = @@ -10764,7 +10757,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, } } - if (use_bias_adjusted_len) + if (use_bias_adjusted_len && adjusted) return rgl->bias_adjusted_ctrl; tree loop_len = rgl->controls[index]; @@ -10787,6 +10780,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); } } + else if (factor && rgl->factor != factor) + { + /* The number of scalars per iteration, scalar occupied bytes and + the number of vectors are both compile-time constants. */ + unsigned int nscalars_per_iter + = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype), + LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant (); + unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter; + unsigned int vecsize = nscalars_per_iter * factor; + if (rglvecsize > vecsize) + { + unsigned int fac = rglvecsize / vecsize; + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); + gimple_seq seq = NULL; + loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len, + build_int_cst (iv_type, fac)); + if (seq) + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + } + else if (rglvecsize < vecsize) + { + unsigned int fac = vecsize / rglvecsize; + tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); + gimple_seq seq = NULL; + loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len, + build_int_cst (iv_type, fac)); + if (seq) + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + } + } return loop_len; } @@ -10804,7 +10827,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, tree all_one_mask = build_all_ones_cst (vectype); tree all_zero_mask = build_zero_cst (vectype); tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, - factor); + factor, true); tree bias = build_int_cst (intQI_type_node, LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)); tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask"); @@ -11075,7 +11098,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo) { vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1, - NULL_TREE, 0, 0); + NULL_TREE, 0, 0, true); } tree iter_var; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 77f6d7a639a..22285250aa8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -3181,7 +3181,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node, .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... ); vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */ tree loop_len - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0); + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, true); tree tmp = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, unshare_expr (DR_STEP (dr))), @@ -3252,7 +3252,7 @@ vect_get_loop_variant_data_ptr_increment ( addr = addr + .SELECT_VL (ARG..) * step; */ tree loop_len - = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0); + = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true); tree len_type = TREE_TYPE (loop_len); /* Since the outcome of .SELECT_VL is element size, we should adjust it into bytesize so that it can be used in address pointer variable @@ -3888,7 +3888,7 @@ vectorizable_call (vec_info *vinfo, { unsigned int vec_num = vec_oprnds0.length (); tree len = vect_get_loop_len (loop_vinfo, gsi, lens, - vec_num, vectype_out, i, 1); + vec_num, vectype_out, i, 1, true); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); tree bias = build_int_cst (intQI_type_node, biasval); @@ -7098,7 +7098,7 @@ vectorizable_operation (vec_info *vinfo, if (len_loop_p) { tree len = vect_get_loop_len (loop_vinfo, gsi, lens, - vec_num, vectype, i, 1); + vec_num, vectype, i, 1, true); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); tree bias = build_int_cst (intQI_type_node, biasval); @@ -8814,7 +8814,7 @@ vectorizable_store (vec_info *vinfo, { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, - ncopies, vectype, j, 1); + ncopies, vectype, j, 1, true); else final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); signed char biasval @@ -9012,7 +9012,7 @@ vectorizable_store (vec_info *vinfo, if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, num_stmts, - vectype, j, 1); + vectype, j, 1, true); else final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); @@ -9396,7 +9396,7 @@ vectorizable_store (vec_info *vinfo, unsigned factor = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode); final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, - vec_num, vectype, i, factor); + vec_num, vectype, i, factor, true); } else if (final_mask) { @@ -10759,7 +10759,7 @@ vectorizable_load (vec_info *vinfo, { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, - ncopies, vectype, j, 1); + ncopies, vectype, j, 1, true); else final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); signed char biasval @@ -10967,7 +10967,7 @@ vectorizable_load (vec_info *vinfo, { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, - vec_num, vectype, i, 1); + vec_num, vectype, i, 1, true); else final_len = build_int_cst (sizetype, TYPE_VECTOR_SUBPARTS (vectype)); @@ -11419,7 +11419,7 @@ vectorizable_load (vec_info *vinfo, unsigned factor = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode); final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, - vec_num, vectype, i, factor); + vec_num, vectype, i, factor, true); } else if (final_mask) { @@ -12498,8 +12498,10 @@ vectorizable_condition (vec_info *vinfo, { if (lens) { + /* ??? Do we really want the adjusted LEN here? Isn't this + based on number of elements? */ len = vect_get_loop_len (loop_vinfo, gsi, lens, - vec_num, vectype, i, 1); + vec_num, vectype, i, 1, true); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); bias = build_int_cst (intQI_type_node, biasval); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4849830204d..5c700535ed2 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2678,7 +2678,7 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int, tree, unsigned int); extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, vec_loop_lens *, unsigned int, tree, - unsigned int, unsigned int); + unsigned int, unsigned int, bool); extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, gimple_stmt_iterator *, vec_loop_lens *, unsigned int, tree, tree, unsigned int,