AArch64: re-enable memory access costing after SLP change.

author Tamar Christina <tamar.christina@arm.com>

Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)
author Tamar Christina <tamar.christina@arm.com>
Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 102680a0efca1ce928e6945033c01cfb68a65152..5770491b30ce13ae7d43a39e47fc47f0d4287ffe 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16278,7 +16278,7 @@ public:
  private:
    void record_potential_advsimd_unrolling (loop_vec_info);
    void analyze_loop_vinfo (loop_vec_info);
-  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info,
+  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, slp_tree,
                   aarch64_vec_op_count *);
    fractional_cost adjust_body_cost_sve (const aarch64_vec_op_count *,
                                         fractional_cost, unsigned int,
@@ -16595,11 +16595,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
      }
  }
  
-/* Return true if an access of kind KIND for STMT_INFO represents one
-   vector of an LD[234] or ST[234] operation.  Return the total number of
-   vectors (2, 3 or 4) if so, otherwise return a value outside that range.  */
+/* Return true if an access of kind KIND for STMT_INFO (or NODE if SLP)
+   represents one vector of an LD[234] or ST[234] operation.  Return the total
+   number of vectors (2, 3 or 4) if so, otherwise return a value outside that
+   range.  */
  static int
-aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
+aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+                            slp_tree node)
  {
    if ((kind == vector_load
         || kind == unaligned_load
@@ -16609,7 +16611,7 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
      {
        stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
        if (stmt_info
-         && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+         && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES)
         return DR_GROUP_SIZE (stmt_info);
      }
    return 0;
@@ -16847,14 +16849,15 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
  }
  
  /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
-   for the vectorized form of STMT_INFO, which has cost kind KIND and which
-   when vectorized would operate on vector type VECTYPE.  Try to subdivide
-   the target-independent categorization provided by KIND to get a more
-   accurate cost.  WHERE specifies where the cost associated with KIND
-   occurs.  */
+   for the vectorized form of STMT_INFO possibly using SLP node NODE, which has
+   cost kind KIND and which when vectorized would operate on vector type
+   VECTYPE.  Try to subdivide the target-independent categorization provided by
+   KIND to get a more accurate cost.  WHERE specifies where the cost associated
+   with KIND occurs.  */
  static fractional_cost
  aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
-                                   stmt_vec_info stmt_info, tree vectype,
+                                   stmt_vec_info stmt_info, slp_tree node,
+                                   tree vectype,
                                     enum vect_cost_model_location where,
                                     fractional_cost stmt_cost)
  {
@@ -16880,7 +16883,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
       cost by the number of elements in the vector.  */
    if (kind == scalar_load
        && sve_costs
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
      {
        unsigned int nunits = vect_nunits_for_cost (vectype);
        /* Test for VNx2 modes, which have 64-bit containers.  */
@@ -16893,7 +16896,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
       in a scatter operation.  */
    if (kind == scalar_store
        && sve_costs
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
      return sve_costs->scatter_store_elt_cost;
  
    /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
@@ -17017,7 +17020,7 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
     cost of any embedded operations.  */
  static fractional_cost
  aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
-                         stmt_vec_info stmt_info, tree vectype,
+                         stmt_vec_info stmt_info, slp_tree node, tree vectype,
                           unsigned vec_flags, fractional_cost stmt_cost)
  {
    if (vectype)
@@ -17026,7 +17029,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
  
        /* Detect cases in which a vector load or store represents an
          LD[234] or ST[234] instruction.  */
-      switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+      switch (aarch64_ld234_st234_vectors (kind, stmt_info, node))
         {
         case 2:
           stmt_cost += simd_costs->ld2_st2_permute_cost;
@@ -17098,7 +17101,7 @@ aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info)
     information relating to the vector operation in OPS.  */
  void
  aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
-                                stmt_vec_info stmt_info,
+                                stmt_vec_info stmt_info, slp_tree node,
                                  aarch64_vec_op_count *ops)
  {
    const aarch64_base_vec_issue_info *base_issue = ops->base_issue_info ();
@@ -17196,7 +17199,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
  
    /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
    if (simd_issue)
-    switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+    switch (aarch64_ld234_st234_vectors (kind, stmt_info, node))
        {
        case 2:
         ops->general_ops += simd_issue->ld2_st2_general_ops * count;
@@ -17214,7 +17217,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
    /* Add any overhead associated with gather loads and scatter stores.  */
    if (sve_issue
        && (kind == scalar_load || kind == scalar_store)
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
      {
        unsigned int pairs = CEIL (count, 2);
        ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17319,7 +17322,7 @@ aarch64_stp_sequence_cost (unsigned int count, vect_cost_for_stmt kind,
  
  unsigned
  aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
-                                    stmt_vec_info stmt_info, slp_tree,
+                                    stmt_vec_info stmt_info, slp_tree node,
                                      tree vectype, int misalign,
                                      vect_cost_model_location where)
  {
@@ -17363,13 +17366,14 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
  
        if (vectype && m_vec_flags)
         stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
-                                                       stmt_info, vectype,
-                                                       where, stmt_cost);
+                                                       stmt_info, node,
+                                                       vectype, where,
+                                                       stmt_cost);
  
        /* Check if we've seen an SVE gather/scatter operation and which size.  */
        if (kind == scalar_load
           && aarch64_sve_mode_p (TYPE_MODE (vectype))
-         && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+         && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
         {
           const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
           if (sve_costs)
@@ -17418,7 +17422,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
      {
        /* Account for any extra "embedded" costs that apply additively
          to the base cost calculated above.  */
-      stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info,
+      stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info, node,
                                             vectype, m_vec_flags, stmt_cost);
  
        /* If we're recording a nonzero vector loop body cost for the
@@ -17429,7 +17433,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
           && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
           && stmt_cost != 0)
         for (auto &ops : m_ops)
-         count_ops (count, kind, stmt_info, &ops);
+         count_ops (count, kind, stmt_info, node, &ops);
  
        /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
          estimate the number of statements in the unrolled Advanced SIMD
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 11f921fbad8746bde9c19f90fdbcd95c2760d82c..b7f2708fec0ff864439addc72ebfac235b77691b 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2730,6 +2730,18 @@ vect_is_reduction (stmt_vec_info stmt_info)
    return STMT_VINFO_REDUC_IDX (stmt_info) >= 0;
  }
  
+/* Returns the memory acccess type being used to vectorize the statement.  If
+   SLP this is read from NODE, otherwise it's read from the STMT_VINFO.  */
+
+inline vect_memory_access_type
+vect_mem_access_type (stmt_vec_info stmt_info, slp_tree node)
+{
+  if (node)
+    return SLP_TREE_MEMORY_ACCESS_TYPE (node);
+  else
+    return STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info);
+}
+
  /* If STMT_INFO describes a reduction, return the vect_reduction_type
     of the reduction it describes, otherwise return -1.  */
  inline int
author	Tamar Christina <tamar.christina@arm.com>
	Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Tue, 15 Oct 2024 10:22:26 +0000 (11:22 +0100)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history