]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/122297 - fix load/store bias handling
authorJuergen Christ <jchrist@linux.ibm.com>
Thu, 5 Feb 2026 10:42:45 +0000 (11:42 +0100)
committerJuergen Christ <jchrist@linux.ibm.com>
Tue, 10 Feb 2026 12:18:35 +0000 (13:18 +0100)
When load/store with length is used and only QImode versions are
available, vectorizable_live_operation produces wrong results for
VEC_EXTRACT.  Provide a flag to vect_get_loop_len to specify if
bias-adjusted length should be used or not.

gcc/ChangeLog:

PR tree-optimization/122297
* tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
(vectorizable_induction): Adjust.
(vectorizable_live_operation_1): Adjust.
(vect_get_loop_len): Provide parameter to select bias-adjusted
length.
(vect_gen_loop_len_mask): Adjust.
(vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
* tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
(vectorizable_call): Adjust.
(vectorizable_operation): Adjust.
(vectorizable_store): Adjust.
(vectorizable_load): Adjust.
(vectorizable_condition): Adjust.
* tree-vectorizer.h (vect_get_loop_len): Add parameter.

gcc/testsuite/ChangeLog:

PR tree-optimization/122297
* gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
* gcc.dg/vect/nodump-extractlast-2.c: New test.

Signed-off-by: Juergen Christ <jchrist@linux.ibm.com>
gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c [new file with mode: 0644]
gcc/tree-vect-loop.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

index 980ac3e421881cd449b52a95eb31c1108cedb59f..83d8a38f13e3262f531a57c7b0c71147a9f1b32f 100644 (file)
@@ -1,4 +1,4 @@
-/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
    using VEC_EXTRACT.  */
 /* { dg-require-effective-target vect_int } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
new file mode 100644 (file)
index 0000000..9697687
--- /dev/null
@@ -0,0 +1,23 @@
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+   using VEC_EXTRACT.  This variant uses .LEN_LOAD which might use QImode
+   vectors during load, but SImode vectors for the extraction.  */
+int __attribute__ ((noinline, noclone))
+test_int (int *x, int n, int value)
+{
+  int last;
+  for (int j = 0; j < n; ++j)
+    {
+      last = x[j];
+      x[j] = last * value;
+    }
+  return last;
+}
+
+int
+main ()
+{
+  int arr[] = {1,2,3,4,5,1};
+  if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
+    __builtin_abort();
+  return 0;
+}
index 0947962fcf2b0925c38074c21d31ae2b0f4165ef..a7daeb72a5c737c7497063d4bc90bf78bfed80b6 100644 (file)
@@ -6544,7 +6544,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
        {
          len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
-                                  i, 1);
+                                  i, 1, false);
          signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
          bias = build_int_cst (intQI_type_node, biasval);
          if (!is_cond_op)
@@ -9938,7 +9938,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                   _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
              vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
              tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                           vectype, 0, 0);
+                                           vectype, 0, 0, false);
              if (SCALAR_FLOAT_TYPE_P (stept))
                expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
              else
@@ -10085,7 +10085,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb,
     {
       /* Emit:
 
-        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
+        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
 
         where VEC_LHS is the vectorized live-out result, LEN is the length of
         the vector, BIAS is the load-store bias.  The bias should not be used
@@ -10096,21 +10096,14 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb,
       gimple_stmt_iterator gsi = gsi_last (tem);
       tree len = vect_get_loop_len (loop_vinfo, &gsi,
                                    &LOOP_VINFO_LENS (loop_vinfo),
-                                   1, vectype, 0, 1);
+                                   1, vectype, 0, 1, false);
       gimple_seq_add_seq (&stmts, tem);
 
-      /* BIAS + 1.  */
-      signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-      tree bias_plus_one
-       = int_const_binop (PLUS_EXPR,
-                          build_int_cst (TREE_TYPE (len), biasval),
-                          build_one_cst (TREE_TYPE (len)));
-
-      /* LAST_INDEX = LEN - (BIAS + 1).  */
+      /* LAST_INDEX = LEN - 1.  */
       tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
-                                    len, bias_plus_one);
+                                    len, build_one_cst (TREE_TYPE (len)));
 
-      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>.  */
+      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>.  */
       tree scalar_res
        = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
                        vec_lhs_phi, last_index);
@@ -10731,7 +10724,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
 tree
 vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
                   vec_loop_lens *lens, unsigned int nvectors, tree vectype,
-                  unsigned int index, unsigned int factor)
+                  unsigned int index, unsigned int factor, bool adjusted)
 {
   rgroup_controls *rgl = &(*lens)[nvectors - 1];
   bool use_bias_adjusted_len =
@@ -10764,7 +10757,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
        }
     }
 
-  if (use_bias_adjusted_len)
+  if (use_bias_adjusted_len && adjusted)
     return rgl->bias_adjusted_ctrl;
 
   tree loop_len = rgl->controls[index];
@@ -10787,6 +10780,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
            gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
        }
     }
+  else if (factor && rgl->factor != factor)
+    {
+      /* The number of scalars per iteration, scalar occupied bytes and
+        the number of vectors are both compile-time constants.  */
+      unsigned int nscalars_per_iter
+       = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+                    LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+      unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
+      unsigned int vecsize = nscalars_per_iter * factor;
+      if (rglvecsize > vecsize)
+       {
+         unsigned int fac = rglvecsize / vecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+      else if (rglvecsize < vecsize)
+       {
+         unsigned int fac = vecsize / rglvecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+    }
   return loop_len;
 }
 
@@ -10804,7 +10827,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
   tree all_one_mask = build_all_ones_cst (vectype);
   tree all_zero_mask = build_zero_cst (vectype);
   tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
-                               factor);
+                               factor, true);
   tree bias = build_int_cst (intQI_type_node,
                             LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
   tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
@@ -11075,7 +11098,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
     {
       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
       tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                  NULL_TREE, 0, 0);
+                                  NULL_TREE, 0, 0, true);
     }
 
   tree iter_var;
index 77f6d7a639aeea7d0b12e9769fc3b7425ba9c391..22285250aa8d4f600721c647a83d1b2bafe7ef2a 100644 (file)
@@ -3181,7 +3181,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
         .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
         vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
       tree loop_len
-       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
+       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, true);
       tree tmp
        = fold_build2 (MULT_EXPR, sizetype,
                       fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
@@ -3252,7 +3252,7 @@ vect_get_loop_variant_data_ptr_increment (
      addr = addr + .SELECT_VL (ARG..) * step;
   */
   tree loop_len
-    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
+    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
   tree len_type = TREE_TYPE (loop_len);
   /* Since the outcome of .SELECT_VL is element size, we should adjust
      it into bytesize so that it can be used in address pointer variable
@@ -3888,7 +3888,7 @@ vectorizable_call (vec_info *vinfo,
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
                      tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                                   vec_num, vectype_out, i, 1);
+                                                   vec_num, vectype_out, i, 1, true);
                      signed char biasval
                        = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                      tree bias = build_int_cst (intQI_type_node, biasval);
@@ -7098,7 +7098,7 @@ vectorizable_operation (vec_info *vinfo,
          if (len_loop_p)
            {
              tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                           vec_num, vectype, i, 1);
+                                           vec_num, vectype, i, 1, true);
              signed char biasval
                = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
              tree bias = build_int_cst (intQI_type_node, biasval);
@@ -8814,7 +8814,7 @@ vectorizable_store (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -9012,7 +9012,7 @@ vectorizable_store (vec_info *vinfo,
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi,
                                                   loop_lens, num_stmts,
-                                                  vectype, j, 1);
+                                                  vectype, j, 1, true);
                  else
                    final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
@@ -9396,7 +9396,7 @@ vectorizable_store (vec_info *vinfo,
          unsigned factor
            = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
          final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                        vec_num, vectype, i, factor);
+                                        vec_num, vectype, i, factor, true);
        }
       else if (final_mask)
        {
@@ -10759,7 +10759,7 @@ vectorizable_load (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -10967,7 +10967,7 @@ vectorizable_load (vec_info *vinfo,
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype, i, 1);
+                                                  vec_num, vectype, i, 1, true);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -11419,7 +11419,7 @@ vectorizable_load (vec_info *vinfo,
                unsigned factor
                  = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype, i, factor);
+                                              vec_num, vectype, i, factor, true);
              }
            else if (final_mask)
              {
@@ -12498,8 +12498,10 @@ vectorizable_condition (vec_info *vinfo,
            {
              if (lens)
                {
+                 /* ??? Do we really want the adjusted LEN here?  Isn't this
+                    based on number of elements?  */
                  len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                          vec_num, vectype, i, 1);
+                                          vec_num, vectype, i, 1, true);
                  signed char biasval
                    = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                  bias = build_int_cst (intQI_type_node, biasval);
index 4849830204d7e3755d430e9b883d58ad8f528231..5c700535ed26b2309a27e545117b7ad92646c5f5 100644 (file)
@@ -2678,7 +2678,7 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
                                  tree, unsigned int);
 extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
                               vec_loop_lens *, unsigned int, tree,
-                              unsigned int, unsigned int);
+                              unsigned int, unsigned int, bool);
 extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
                                    gimple_stmt_iterator *, vec_loop_lens *,
                                    unsigned int, tree, tree, unsigned int,