]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
vect: Relax gather/scatter scale handling.
authorRobin Dapp <rdapp@ventanamicro.com>
Wed, 29 Oct 2025 15:02:51 +0000 (16:02 +0100)
committerRobin Dapp <rdapp@ventanamicro.com>
Fri, 7 Nov 2025 20:09:45 +0000 (21:09 +0100)
Similar to the signed/unsigned patch before this one relaxes the
gather/scatter restrictions on scale factors.  The basic idea is that a
natively unsupported scale factor can still be reached by emitting a
multiplication before the actual gather operation.  As before, we need
to make sure that there is no overflow when multiplying.

gcc/ChangeLog:

* tree-vect-data-refs.cc (struct gather_scatter_config):
Add scale.
(vect_gather_scatter_get_configs): Try various scales.
(vect_gather_scatter_fn_p): Add scale handling.
(vect_check_gather_scatter): Add scale parameter.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors):
Ditto.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_grouped_gather): Ditto.
(get_load_store_type): Ditto.
(vectorizable_store): Scale offset if necessary.
(vectorizable_load): Ditto.
* tree-vectorizer.h (struct vect_load_store_data): Add
supported_scale.
(vect_gather_scatter_fn_p): Add argument.

gcc/tree-vect-data-refs.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

index fb2450a30c45389ef94c4316ade14c99806874fb..e8cfb884c1d0888fea86124e085b0bcc252280d3 100644 (file)
@@ -4431,6 +4431,7 @@ struct gather_scatter_config
 {
   internal_fn ifn;
   tree offset_vectype;
+  int scale;
   vec<int> elsvals;
 };
 
@@ -4523,38 +4524,62 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
       if (!offset_vectype)
        continue;
 
-      vec<int> elsvals = vNULL;
+      /* Try multiple scale values.  Start with exact match, then try
+        smaller common scales that a target might support .  */
+      int scales_to_try[] = {scale, 1, 2, 4, 8};
 
-      /* If we haven't determined which IFN is supported yet, try all three
-        to find which one the target supports.  */
-      if (ifn == IFN_LAST)
+      for (unsigned int j = 0;
+          j < sizeof (scales_to_try) / sizeof (*scales_to_try);
+          j++)
        {
-         ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
-                                              vectype, memory_type,
-                                              offset_vectype, scale, &elsvals);
-         if (ifn != IFN_LAST)
+         int try_scale = scales_to_try[j];
+
+         /* Skip scales >= requested scale (except for exact match).  */
+         if (j > 0 && try_scale >= scale)
+           continue;
+
+         /* Skip if requested scale is not a multiple of this scale.  */
+         if (j > 0 && scale % try_scale != 0)
+           continue;
+
+         vec<int> elsvals = vNULL;
+
+         /* If we haven't determined which IFN is supported yet, try all three
+            to find which one the target supports.  */
+         if (ifn == IFN_LAST)
            {
-             /* Found which IFN is supported.  Save this configuration.  */
-             gather_scatter_config config;
-             config.ifn = ifn;
-             config.offset_vectype = offset_vectype;
-             config.elsvals = elsvals;
-             configs.safe_push (config);
+             ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
+                                                  vectype, memory_type,
+                                                  offset_vectype, try_scale,
+                                                  &elsvals);
+             if (ifn != IFN_LAST)
+               {
+                 /* Found which IFN is supported.  Save this configuration.  */
+                 gather_scatter_config config;
+                 config.ifn = ifn;
+                 config.offset_vectype = offset_vectype;
+                 config.scale = try_scale;
+                 config.elsvals = elsvals;
+                 configs.safe_push (config);
+               }
            }
-       }
-      else
-       {
-         /* We already know which IFN is supported, just check if this
-            offset type works with it.  */
-         if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
-                                                     offset_vectype, scale,
-                                                     &elsvals))
+         else
            {
-             gather_scatter_config config;
-             config.ifn = ifn;
-             config.offset_vectype = offset_vectype;
-             config.elsvals = elsvals;
-             configs.safe_push (config);
+             /* We already know which IFN is supported, just check if this
+                offset type and scale work with it.  */
+             if (internal_gather_scatter_fn_supported_p (ifn, vectype,
+                                                         memory_type,
+                                                         offset_vectype,
+                                                         try_scale,
+                                                         &elsvals))
+               {
+                 gather_scatter_config config;
+                 config.ifn = ifn;
+                 config.offset_vectype = offset_vectype;
+                 config.scale = try_scale;
+                 config.elsvals = elsvals;
+                 configs.safe_push (config);
+               }
            }
        }
     }
@@ -4570,6 +4595,10 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
    base address.  If OFFSET_TYPE is scalar the function chooses an
    appropriate vector type for it.  SCALE is the amount by which the
    offset should be multiplied *after* it has been converted to address width.
+   If the target does not support the requested SCALE, SUPPORTED_SCALE
+   will contain the scale that is actually supported
+   (which may be smaller, requiring additional multiplication).
+   Otherwise SUPPORTED_SCALE is 0.
 
    Return true if the function is supported, storing the function id in
    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
@@ -4582,12 +4611,14 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
 bool
 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
                          tree vectype, tree memory_type, tree offset_type,
-                         int scale, internal_fn *ifn_out,
+                         int scale, int *supported_scale,
+                         internal_fn *ifn_out,
                          tree *offset_vectype_out,
                          tree *supported_offset_vectype,
                          vec<int> *elsvals)
 {
   *supported_offset_vectype = NULL_TREE;
+  *supported_scale = 0;
   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
   unsigned int element_bits = vector_element_bits (vectype);
   if (element_bits != memory_bits)
@@ -4609,11 +4640,19 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
   if (configs.is_empty ())
     return false;
 
-  /* First, try to find a configuration that matches our offset type
-     (no conversion needed).  */
+  /* Selection priority:
+     1 - Exact scale match + offset type match
+     2 - Exact scale match + sign-swapped offset
+     3 - Smaller scale + offset type match
+     4 - Smaller scale + sign-swapped offset
+     Within each category, prefer smaller offset types.  */
+
+  /* First pass: exact scale match with no conversion.  */
   for (unsigned int i = 0; i < configs.length (); i++)
     {
-      if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN (offset_vectype))
+      if (configs[i].scale == scale
+         && TYPE_SIGN (configs[i].offset_vectype)
+            == TYPE_SIGN (offset_vectype))
        {
          *ifn_out = configs[i].ifn;
          *offset_vectype_out = configs[i].offset_vectype;
@@ -4623,19 +4662,77 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
        }
     }
 
-  /* No direct match.  This means we try to find a sign-swapped offset
-     vectype.  */
+  /* No direct match.  This means we try to find either
+      - a sign-swapped offset vectype or
+      - a different scale and 2x larger offset type
+      - a different scale and larger sign-swapped offset vectype.  */
   unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
   unsigned int needed_precision
     = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
   needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
 
+  /* Second pass: No direct match.  This means we try to find a sign-swapped
+     offset vectype.  */
   enum tree_code tmp;
   for (unsigned int i = 0; i < configs.length (); i++)
     {
       unsigned int precision
        = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
-      if (precision >= needed_precision
+      if (configs[i].scale == scale
+         && precision >= needed_precision
+         && (supportable_convert_operation (CONVERT_EXPR,
+                                            configs[i].offset_vectype,
+                                            offset_vectype, &tmp)
+             || (needed_precision == offset_precision
+                 && tree_nop_conversion_p (configs[i].offset_vectype,
+                                           offset_vectype))))
+       {
+         *ifn_out = configs[i].ifn;
+         *offset_vectype_out = offset_vectype;
+         *supported_offset_vectype = configs[i].offset_vectype;
+         if (elsvals)
+           *elsvals = configs[i].elsvals;
+         return true;
+       }
+    }
+
+  /* Third pass: Try a smaller scale with the same signedness.  */
+  needed_precision = offset_precision * 2;
+  needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+  for (unsigned int i = 0; i < configs.length (); i++)
+    {
+      unsigned int precision
+       = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+      if (configs[i].scale < scale
+         && precision >= needed_precision
+         && (supportable_convert_operation (CONVERT_EXPR,
+                                           configs[i].offset_vectype,
+                                           offset_vectype, &tmp)
+             || (needed_precision == offset_precision
+                 && tree_nop_conversion_p (configs[i].offset_vectype,
+                                           offset_vectype))))
+       {
+         *ifn_out = configs[i].ifn;
+         *offset_vectype_out = configs[i].offset_vectype;
+         *supported_scale = configs[i].scale;
+         if (elsvals)
+           *elsvals = configs[i].elsvals;
+         return true;
+       }
+    }
+
+  /* Fourth pass: Try a smaller scale and sign-swapped offset vectype.  */
+  needed_precision
+    = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
+  needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+  for (unsigned int i = 0; i < configs.length (); i++)
+    {
+      unsigned int precision
+       = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+      if (configs[i].scale < scale
+         && precision >= needed_precision
          && (supportable_convert_operation (CONVERT_EXPR,
                                             configs[i].offset_vectype,
                                             offset_vectype, &tmp)
@@ -4646,6 +4743,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
          *ifn_out = configs[i].ifn;
          *offset_vectype_out = offset_vectype;
          *supported_offset_vectype = configs[i].offset_vectype;
+         *supported_scale = configs[i].scale;
          if (elsvals)
            *elsvals = configs[i].elsvals;
          return true;
@@ -4805,6 +4903,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
 
   base = fold_convert (sizetype, base);
   base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+  int tmp_scale;
   tree tmp_offset_vectype;
 
   /* OFF at this point may be either a SSA_NAME or some tree expression
@@ -4878,14 +4977,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
                  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
                                                masked_p, vectype, memory_type,
                                                signed_char_type_node,
-                                               new_scale, &ifn,
+                                               new_scale, &tmp_scale,
+                                               &ifn,
                                                &offset_vectype,
                                                &tmp_offset_vectype,
                                                elsvals)
                  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
                                                masked_p, vectype, memory_type,
                                                unsigned_char_type_node,
-                                               new_scale, &ifn,
+                                               new_scale, &tmp_scale,
+                                               &ifn,
                                                &offset_vectype,
                                                &tmp_offset_vectype,
                                                elsvals))
@@ -4910,7 +5011,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
              && !POINTER_TYPE_P (TREE_TYPE (off))
              && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
                                           masked_p, vectype, memory_type,
-                                          TREE_TYPE (off), scale, &ifn,
+                                          TREE_TYPE (off),
+                                          scale, &tmp_scale,
+                                          &ifn,
                                           &offset_vectype,
                                           &tmp_offset_vectype,
                                           elsvals))
@@ -4966,7 +5069,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
   if (use_ifn_p)
     {
       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
-                                    vectype, memory_type, offtype, scale,
+                                    vectype, memory_type, offtype,
+                                    scale, &tmp_scale,
                                     &ifn, &offset_vectype,
                                     &tmp_offset_vectype,
                                     elsvals))
index da093d5021bcdb67042455fbb48c2a4901416832..2054f2afa6e0b7ad4f0a4574d85ec03ad820328f 100644 (file)
@@ -1512,6 +1512,9 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
         we chose a different one use this instead.  */
       if (ls->supported_offset_vectype)
        off_vectype = ls->supported_offset_vectype;
+      /* Same for scale.  */
+      if (ls->supported_scale)
+       scale = ls->supported_scale;
 
       if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
                                                  memory_type,
@@ -1706,8 +1709,10 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
         no narrower than OFFSET_TYPE.  */
       tree memory_type = TREE_TYPE (DR_REF (dr));
       tree tmp_offset_vectype;
+      int tmp_scale;
       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
-                                    vectype, memory_type, offset_type, scale,
+                                    vectype, memory_type, offset_type,
+                                    scale, &tmp_scale,
                                     &gs_info->ifn, &gs_info->offset_vectype,
                                     &tmp_offset_vectype, elsvals)
          || gs_info->ifn == IFN_LAST)
@@ -1789,9 +1794,10 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
      not available we still have a strided load/store.  */
   bool ok = false;
   tree tmp_vectype;
+  int tmp_scale;
   if (vect_gather_scatter_fn_p
       (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
-       TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
+       TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
        &offset_vectype, &tmp_vectype, elsvals))
     ok = true;
   else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
@@ -2091,6 +2097,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
   bool *slp_perm = &ls->slp_perm;
   unsigned *n_perms = &ls->n_perms;
   tree *supported_offset_vectype = &ls->supported_offset_vectype;
+  int *supported_scale = &ls->supported_scale;
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -2164,7 +2171,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
       tree tem;
       if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
                                    masked_p, vectype, memory_type,
-                                   offset_vectype, scale,
+                                   offset_vectype, scale, supported_scale,
                                    &ls->gs.ifn, &tem,
                                    supported_offset_vectype, elsvals))
        {
@@ -2179,6 +2186,10 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
                dump_printf_loc (MSG_NOTE, vect_location,
                                 " target supports offset type %T.\n",
                                 *supported_offset_vectype);
+             if (*supported_scale)
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                " target supports offset scale %d.\n",
+                                *supported_scale);
            }
          *memory_access_type = VMAT_GATHER_SCATTER_IFN;
        }
@@ -2455,7 +2466,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
          gcc_assert (vect_gather_scatter_fn_p
                      (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
                       gs_info.memory_type, TREE_TYPE (gs_info.offset),
-                      gs_info.scale, &gs_info.ifn,
+                      gs_info.scale, supported_scale, &gs_info.ifn,
                       &tmp, supported_offset_vectype, elsvals));
 
          SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
@@ -8850,6 +8861,10 @@ vectorizable_store (vec_info *vinfo,
                    inside_cost
                      += record_stmt_cost (cost_vec, 1, vector_stmt,
                                           slp_node, 0, vect_body);
+                 if (ls.supported_scale)
+                   inside_cost
+                     += record_stmt_cost (cost_vec, 1, vector_stmt,
+                                          slp_node, 0, vect_body);
 
                  unsigned int cnunits = vect_nunits_for_cost (vectype);
                  inside_cost
@@ -8864,12 +8879,26 @@ vectorizable_store (vec_info *vinfo,
              tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
              bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
 
-             /* Perform the offset conversion if necessary.  */
-             if (!strided && ls.supported_offset_vectype)
+             /* Perform the offset conversion and scaling if necessary.  */
+             if (!strided
+                 && (ls.supported_offset_vectype || ls.supported_scale))
                {
                  gimple_seq stmts = NULL;
-                 vec_offset = gimple_convert
-                   (&stmts, ls.supported_offset_vectype, vec_offset);
+                 if (ls.supported_offset_vectype)
+                   vec_offset = gimple_convert
+                     (&stmts, ls.supported_offset_vectype, vec_offset);
+                 if (ls.supported_scale)
+                   {
+                     tree mult_cst = build_int_cst
+                       (TREE_TYPE (TREE_TYPE (vec_offset)),
+                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                     tree mult = build_vector_from_val
+                       (TREE_TYPE (vec_offset), mult_cst);
+                     vec_offset = gimple_build
+                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                        vec_offset, mult);
+                     scale = size_int (ls.supported_scale);
+                   }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
                }
 
@@ -10691,6 +10720,10 @@ vectorizable_load (vec_info *vinfo,
                    inside_cost
                      += record_stmt_cost (cost_vec, 1, vector_stmt,
                                           slp_node, 0, vect_body);
+                 if (ls.supported_scale)
+                   inside_cost
+                     += record_stmt_cost (cost_vec, 1, vector_stmt,
+                                          slp_node, 0, vect_body);
 
                  unsigned int cnunits = vect_nunits_for_cost (vectype);
                  inside_cost
@@ -10704,12 +10737,26 @@ vectorizable_load (vec_info *vinfo,
              tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
              bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
 
-             /* Perform the offset conversion if necessary.  */
-             if (!strided && ls.supported_offset_vectype)
+             /* Perform the offset conversion and scaling if necessary.  */
+             if (!strided
+                 && (ls.supported_offset_vectype || ls.supported_scale))
                {
                  gimple_seq stmts = NULL;
-                 vec_offset = gimple_convert
-                   (&stmts, ls.supported_offset_vectype, vec_offset);
+                 if (ls.supported_offset_vectype)
+                   vec_offset = gimple_convert
+                     (&stmts, ls.supported_offset_vectype, vec_offset);
+                 if (ls.supported_scale)
+                   {
+                     tree mult_cst = build_int_cst
+                       (TREE_TYPE (TREE_TYPE (vec_offset)),
+                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                     tree mult = build_vector_from_val
+                       (TREE_TYPE (vec_offset), mult_cst);
+                     vec_offset = gimple_build
+                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                        vec_offset, mult);
+                     scale = size_int (ls.supported_scale);
+                   }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
                }
 
index b940a763a3c7dd4048392e421f66ab38b1690081..b7f3297a16b982eb15c0c196255e4fc36696c0e0 100644 (file)
@@ -290,9 +290,14 @@ struct vect_load_store_data : vect_data {
   tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
   tree ls_type; // VMAT_GATHER_SCATTER_IFN
   /* This is set to a supported offset vector type if we don't support the
-     originally requested offset type.  In that case there will be an
-     additional offset conversion before the gather/scatter.  */
+     originally requested offset type, otherwise NULL.
+     If nonzero there will be an additional offset conversion before
+     the gather/scatter.  */
   tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
+  /* Similar for scale.  Only nonzero if we don't support the requested
+     scale.  Then we need to multiply the offset vector before the
+     gather/scatter.  */
+  int supported_scale; // VMAT_GATHER_SCATTER_IFN
   auto_vec<int> elsvals;
   /* True if the load requires a load permutation.  */
   bool slp_perm;    // SLP_TREE_LOAD_PERMUTATION
@@ -2592,7 +2597,7 @@ extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
 extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
 extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
-                                     tree, int, internal_fn *, tree *,
+                                     tree, int, int *, internal_fn *, tree *,
                                      tree *, vec<int> * = nullptr);
 extern bool vect_check_gather_scatter (stmt_vec_info, tree,
                                       loop_vec_info, gather_scatter_info *,