[og10] vect: Add target hook to prefer gather/scatter instructions

author Julian Brown <julian@codesourcery.com>

Wed, 25 Nov 2020 17:08:01 +0000 (09:08 -0800)

committer Kwok Cheung Yeung <kcy@codesourcery.com>

Tue, 21 Jun 2022 13:11:30 +0000 (14:11 +0100)
author Julian Brown <julian@codesourcery.com>
Wed, 25 Nov 2020 17:08:01 +0000 (09:08 -0800)
committer Kwok Cheung Yeung <kcy@codesourcery.com>
Tue, 21 Jun 2022 13:11:30 +0000 (14:11 +0100)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp

index 74b7f799ccaa6da7b4f6ee27c4cfd6c98bec20c7..444d008139ddfeaf893a03f650591f7206b35ccc 100644 (file)
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,14 @@
+2021-01-13  Julian Brown  <julian@codesourcery.com>
+
+       * doc/tm.texi.in (TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Add
+       documentation hook.
+       * doc/tm.texi: Regenerate.
+       * target.def (prefer_gather_scatter): Add target hook under vectorizer.
+       * tree-vect-stmts.cc (get_group_load_store_type): Optionally prefer
+       gather/scatter instructions to scalar/elementwise fallback.
+       * config/gcn/gcn.cc (TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Define
+       hook.
+
  2021-01-13  Julian Brown  <julian@codesourcery.com>
  
         * omp-offload.cc (oacc_thread_numbers): Add VF_BY_VECTORIZER parameter.
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc

index e2e9335ad753005d20e874c5d0f678743babaff3..4012d14ea94bb02ddd023608d508f8d065861379 100644 (file)
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -6666,6 +6666,8 @@ gcn_dwarf_register_span (rtx rtl)
    gcn_vector_alignment_reachable
  #undef  TARGET_VECTOR_MODE_SUPPORTED_P
  #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
+#undef  TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER true
  
  struct gcc_target targetm = TARGET_INITIALIZER;
  
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi

index c5006afc00d2ae82a81c48b4c6be89275f7ecf35..2b2f2e3470a799f90d2060cb0832362b674c7a6c 100644 (file)
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6245,6 +6245,11 @@ The default is @code{NULL_TREE} which means to not vectorize scatter
  stores.
  @end deftypefn
  
+@deftypevr {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+This hook is set to TRUE if gather loads or scatter stores are cheaper on
+this target than a sequence of elementwise loads or stores.
+@end deftypevr
+
  @deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
  This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
  fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in

index f869ddd5e5b8b7acbd8e9765fb103af24a1085b6..4ca576836180cf857049af1b6471b98343503792 100644 (file)
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4188,6 +4188,8 @@ address;  but often a machine-dependent strategy can generate better code.
  
  @hook TARGET_VECTORIZE_BUILTIN_SCATTER
  
+@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+
  @hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
  
  @hook TARGET_SIMD_CLONE_ADJUST
diff --git a/gcc/target.def b/gcc/target.def

index d85adf36a3918ec4ce3409ef05a8d0bdcec9478c..23ffd3fc2c790a6e217480ce05ffb69ecda558ed 100644 (file)
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2025,6 +2025,14 @@ all zeros.  GCC can then try to branch around the instruction instead.",
   (unsigned ifn),
   default_empty_mask_is_expensive)
  
+/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\
+we cannot use a contiguous access.  */
+DEFHOOKPOD
+(prefer_gather_scatter,
+ "This hook is set to TRUE if gather loads or scatter stores are cheaper on\n\
+this target than a sequence of elementwise loads or stores.",
+ bool, false)
+
  /* Target builtin that implements vector gather operation.  */
  DEFHOOK
  (builtin_gather,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index c9534ef9b1eba4ec0334de59cb4794b3f578d34c..6615636f08969c5123d39f95442dd2121a182484 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2350,9 +2350,14 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
          it probably isn't a win to use separate strided accesses based
          on nearby locations.  Or, even if it's a win over scalar code,
          it might not be a win over vectorizing at a lower VF, if that
-        allows us to use contiguous accesses.  */
+        allows us to use contiguous accesses.
+
+        On some targets (e.g. AMD GCN), always use gather/scatter accesses
+        here since those are the only types of vector loads/stores available,
+        and the fallback case of using elementwise accesses is very
+        inefficient.  */
        if (*memory_access_type == VMAT_ELEMENTWISE
-         && single_element_p
+         && (targetm.vectorize.prefer_gather_scatter || single_element_p)
           && loop_vinfo
           && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
                                                  masked_p, gs_info))
author	Julian Brown <julian@codesourcery.com>
	Wed, 25 Nov 2020 17:08:01 +0000 (09:08 -0800)
committer	Kwok Cheung Yeung <kcy@codesourcery.com>
	Tue, 21 Jun 2022 13:11:30 +0000 (14:11 +0100)
gcc/ChangeLog.omp		patch \| blob \| blame \| history
gcc/config/gcn/gcn.cc		patch \| blob \| blame \| history
gcc/doc/tm.texi		patch \| blob \| blame \| history
gcc/doc/tm.texi.in		patch \| blob \| blame \| history
gcc/target.def		patch \| blob \| blame \| history
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history