tree-optimization/117080 - Add SLP_TREE_MEMORY_ACCESS_TYPE

author Richard Biener <rguenther@suse.de>

Thu, 6 Jun 2024 13:52:02 +0000 (15:52 +0200)

committer Richard Biener <rguenth@gcc.gnu.org>

Fri, 11 Oct 2024 13:11:18 +0000 (15:11 +0200)
author Richard Biener <rguenther@suse.de>
Thu, 6 Jun 2024 13:52:02 +0000 (15:52 +0200)
committer Richard Biener <rguenth@gcc.gnu.org>
Fri, 11 Oct 2024 13:11:18 +0000 (15:11 +0200)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc

index ab0ade3790f2300bb7a66cb7238cc50515c704b1..a1f0ae7a7e167e4318a563bbc895835ce54fa3a2 100644 (file)
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25201,13 +25201,21 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       (AGU and load ports).  Try to account for this by scaling the
       construction cost by the number of elements involved.  */
    if ((kind == vec_construct || kind == vec_to_scalar)
-      && stmt_info
-      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
-         || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-      && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
-          && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
-              != INTEGER_CST))
-         || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
+      && ((stmt_info
+          && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
+              || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+          && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
+               && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
+                   != INTEGER_CST))
+              || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
+                  == VMAT_GATHER_SCATTER)))
+         || (node
+             && ((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+                 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+                                           (SLP_TREE_REPRESENTATIVE (node))))
+                     != INTEGER_CST))
+                 || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
+                     == VMAT_GATHER_SCATTER)))))
      {
        stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
        stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc

index 914b0b61b4d588dd9d50f75bfe54a8f466a66da1..83cb39fc21423a010b2000a2046487c71bffae66 100644 (file)
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -124,6 +124,7 @@ _slp_tree::_slp_tree ()
    this->ldst_lanes = false;
    SLP_TREE_VECTYPE (this) = NULL_TREE;
    SLP_TREE_REPRESENTATIVE (this) = NULL;
+  SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT;
    SLP_TREE_REF_COUNT (this) = 1;
    this->failed = NULL;
    this->max_nunits = 1;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index ad4a3141ab892f8d40e522e53149d4344685a3bc..4f6905f15417f90c6f36e1711a7a25071f0f507c 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8350,6 +8350,8 @@ vectorizable_store (vec_info *vinfo,
    if (costing_p) /* transformation not required.  */
      {
        STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
+      if (slp_node)
+       SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
  
        if (loop_vinfo
           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -8390,7 +8392,10 @@ vectorizable_store (vec_info *vinfo,
           && first_stmt_info != stmt_info)
         return true;
      }
-  gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
+  if (slp_node)
+    gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
+  else
+    gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
  
    /* Transform.  */
  
@@ -10232,6 +10237,8 @@ vectorizable_load (vec_info *vinfo,
  
        if (!slp)
         STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
+      else
+       SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
  
        if (loop_vinfo
           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -10256,6 +10263,9 @@ vectorizable_load (vec_info *vinfo,
    if (!slp)
      gcc_assert (memory_access_type
                 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
+  else
+    gcc_assert (memory_access_type
+               == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
  
    if (dump_enabled_p () && !costing_p)
      dump_printf_loc (MSG_NOTE, vect_location,
@@ -11337,7 +11347,7 @@ vectorizable_load (vec_info *vinfo,
                          offset add is consumed by the load).  */
                       inside_cost = record_stmt_cost (cost_vec, const_nunits,
                                                       vec_to_scalar, stmt_info,
-                                                     0, vect_body);
+                                                     slp_node, 0, vect_body);
                       /* N scalar loads plus gathering them into a
                          vector.  */
                       inside_cost
@@ -11345,7 +11355,7 @@ vectorizable_load (vec_info *vinfo,
                                             stmt_info, 0, vect_body);
                       inside_cost
                         = record_stmt_cost (cost_vec, 1, vec_construct,
-                                           stmt_info, 0, vect_body);
+                                           stmt_info, slp_node, 0, vect_body);
                       continue;
                     }
                   unsigned HOST_WIDE_INT const_offset_nunits
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 73bccb5a40a8be4d3f5f600239d63c756d2e35e7..2775d873ca42436fb6b6789ca8102c03e2540b4f 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -161,6 +161,46 @@ struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
    static bool equal (const value_type &, const compare_type &);
  };
  
+/* Describes how we're going to vectorize an individual load or store,
+   or a group of loads or stores.  */
+enum vect_memory_access_type {
+  /* An access to an invariant address.  This is used only for loads.  */
+  VMAT_INVARIANT,
+
+  /* A simple contiguous access.  */
+  VMAT_CONTIGUOUS,
+
+  /* A contiguous access that goes down in memory rather than up,
+     with no additional permutation.  This is used only for stores
+     of invariants.  */
+  VMAT_CONTIGUOUS_DOWN,
+
+  /* A simple contiguous access in which the elements need to be permuted
+     after loading or before storing.  Only used for loop vectorization;
+     SLP uses separate permutes.  */
+  VMAT_CONTIGUOUS_PERMUTE,
+
+  /* A simple contiguous access in which the elements need to be reversed
+     after loading or before storing.  */
+  VMAT_CONTIGUOUS_REVERSE,
+
+  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
+  VMAT_LOAD_STORE_LANES,
+
+  /* An access in which each scalar element is loaded or stored
+     individually.  */
+  VMAT_ELEMENTWISE,
+
+  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
+     SLP accesses.  Each unrolled iteration uses a contiguous load
+     or store for the whole group, but the groups from separate iterations
+     are combined in the same way as for VMAT_ELEMENTWISE.  */
+  VMAT_STRIDED_SLP,
+
+  /* The access uses gather loads or scatter stores.  */
+  VMAT_GATHER_SCATTER
+};
+
  /************************************************************************
    SLP
   ************************************************************************/
@@ -228,6 +268,10 @@ struct _slp_tree {
  
    int vertex;
  
+  /* Classifies how the load or store is going to be implemented
+     for loop vectorization.  */
+  vect_memory_access_type memory_access_type;
+
    /* If not NULL this is a cached failed SLP discovery attempt with
       the lanes that failed during SLP discovery as 'false'.  This is
       a copy of the matches array.  */
@@ -315,6 +359,7 @@ public:
  #define SLP_TREE_REPRESENTATIVE(S)              (S)->representative
  #define SLP_TREE_LANES(S)                       (S)->lanes
  #define SLP_TREE_CODE(S)                        (S)->code
+#define SLP_TREE_MEMORY_ACCESS_TYPE(S)          (S)->memory_access_type
  
  enum vect_partial_vector_style {
      vect_partial_vectors_none,
@@ -1203,46 +1248,6 @@ enum vec_load_store_type {
    VLS_STORE_INVARIANT
  };
  
-/* Describes how we're going to vectorize an individual load or store,
-   or a group of loads or stores.  */
-enum vect_memory_access_type {
-  /* An access to an invariant address.  This is used only for loads.  */
-  VMAT_INVARIANT,
-
-  /* A simple contiguous access.  */
-  VMAT_CONTIGUOUS,
-
-  /* A contiguous access that goes down in memory rather than up,
-     with no additional permutation.  This is used only for stores
-     of invariants.  */
-  VMAT_CONTIGUOUS_DOWN,
-
-  /* A simple contiguous access in which the elements need to be permuted
-     after loading or before storing.  Only used for loop vectorization;
-     SLP uses separate permutes.  */
-  VMAT_CONTIGUOUS_PERMUTE,
-
-  /* A simple contiguous access in which the elements need to be reversed
-     after loading or before storing.  */
-  VMAT_CONTIGUOUS_REVERSE,
-
-  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
-  VMAT_LOAD_STORE_LANES,
-
-  /* An access in which each scalar element is loaded or stored
-     individually.  */
-  VMAT_ELEMENTWISE,
-
-  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
-     SLP accesses.  Each unrolled iteration uses a contiguous load
-     or store for the whole group, but the groups from separate iterations
-     are combined in the same way as for VMAT_ELEMENTWISE.  */
-  VMAT_STRIDED_SLP,
-
-  /* The access uses gather loads or scatter stores.  */
-  VMAT_GATHER_SCATTER
-};
-
  class dr_vec_info {
  public:
    /* The data reference itself.  */
@@ -2346,6 +2351,23 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
                            STMT_VINFO_VECTYPE (stmt_info), misalign, where);
  }
  
+/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO and
+   SLP node specified.  */
+
+inline unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+                 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+                 slp_tree node,
+                 int misalign, enum vect_cost_model_location where)
+{
+  if (node)
+    return record_stmt_cost (body_cost_vec, count, kind, node,
+                            STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+  else
+    return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
+                            STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+}
+
  extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
  extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
                                          gimple_stmt_iterator *);
author	Richard Biener <rguenther@suse.de>
	Thu, 6 Jun 2024 13:52:02 +0000 (15:52 +0200)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Fri, 11 Oct 2024 13:11:18 +0000 (15:11 +0200)
gcc/config/i386/i386.cc		patch \| blob \| blame \| history
gcc/tree-vect-slp.cc		patch \| blob \| blame \| history
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history