]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
LoongArch:Implement the new vector cost model framework.
authorJiahao Xu <xujiahao@loongson.cn>
Wed, 18 Oct 2023 09:43:39 +0000 (17:43 +0800)
committerLulu Cheng <chenglulu@loongson.cn>
Thu, 19 Oct 2023 06:15:38 +0000 (14:15 +0800)
This patch make loongarch use the new vector hooks and implements the costing
function determine_suggested_unroll_factor, to make it be able to suggest the
unroll factor for a given loop being vectorized base vec_ops analysis during
vector costing and the available issue information. Referring to aarch64 and
rs6000 port.

The patch also reduces the cost of unaligned stores, making it equal to the
cost of aligned ones in order to avoid odd alignment peeling.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from
vector_costs.  Add a constructor.
(loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to
adjust the cost for inner loops.
(loongarch_vector_costs::count_operations): New function.
(loongarch_vector_costs::determine_suggested_unroll_factor): Ditto.
(loongarch_vector_costs::finish_cost): Ditto.
(loongarch_builtin_vectorization_cost): Adjust.
* config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter.
(loongarcg-vect-issue-info): Ditto.
(mmemvec-cost): Delete.
* config/loongarch/genopts/loongarch.opt.in
(loongarch-vect-unroll-limit): Ditto.
(loongarcg-vect-issue-info): Ditto.
(mmemvec-cost): Delete.
* doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option.

gcc/config/loongarch/genopts/loongarch.opt.in
gcc/config/loongarch/loongarch.cc
gcc/config/loongarch/loongarch.opt
gcc/doc/invoke.texi

index 9f98f2d845a61e0bb3632901bebaa3d2cb1a0258..4a2d7438f1b5600ca563055471152a298ec831de 100644 (file)
@@ -146,10 +146,6 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
 -mbranch-cost=COST     Set the cost of branches to roughly COST instructions.
 
-mmemvec-cost=
-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
-mmemvec-cost=COST      Set the cost of vector memory access instructions.
-
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV)
 Trap on integer divide by zero.
@@ -213,3 +209,14 @@ mrelax
 Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
+
+-param=loongarch-vect-unroll-limit=
+Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
+Used to limit unroll factor which indicates how much the autovectorizer may
+unroll a loop.  The default value is 6.
+
+-param=loongarch-vect-issue-info=
+Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
+Indicate how many non memory access vector instructions can be issued per
+cycle, it's used in unroll factor determination for autovectorizer.  The
+default value is 4.
index da132d01edb5d505dea9ee5f53818d0690d327c3..73f0c160e5f0157ebf8ce538f56b5ae05bc8da85 100644 (file)
@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "opts.h"
 #include "function-abi.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -3845,8 +3847,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
     }
 }
 
-/* Vectorizer cost model implementation.  */
-
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 
 static int
@@ -3865,36 +3865,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case vector_load:
       case vec_to_scalar:
       case scalar_to_vec:
-      case cond_branch_not_taken:
-      case vec_promote_demote:
       case scalar_store:
       case vector_store:
        return 1;
 
+      case vec_promote_demote:
       case vec_perm:
        return LASX_SUPPORTED_MODE_P (mode)
          && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
 
       case unaligned_load:
-      case vector_gather_load:
-       return 2;
-
       case unaligned_store:
-      case vector_scatter_store:
-       return 10;
+       return 2;
 
       case cond_branch_taken:
-       return 3;
+       return 4;
+
+      case cond_branch_not_taken:
+       return 2;
 
       case vec_construct:
        elements = TYPE_VECTOR_SUBPARTS (vectype);
-       return elements / 2 + 1;
+       if (ISA_HAS_LASX)
+         return elements + 1;
+       else
+         return elements;
 
       default:
        gcc_unreachable ();
     }
 }
 
+class loongarch_vector_costs : public vector_costs
+{
+public:
+  using vector_costs::vector_costs;
+
+  unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
+                             stmt_vec_info stmt_info, slp_tree, tree vectype,
+                             int misalign,
+                             vect_cost_model_location where) override;
+  void finish_cost (const vector_costs *) override;
+
+protected:
+  void count_operations (vect_cost_for_stmt, stmt_vec_info,
+                        vect_cost_model_location, unsigned int);
+  unsigned int determine_suggested_unroll_factor (loop_vec_info);
+  /* The number of vectorized stmts in loop.  */
+  unsigned m_stmts = 0;
+  /* The number of load and store operations in loop.  */
+  unsigned m_loads = 0;
+  unsigned m_stores = 0;
+  /* Reduction factor for suggesting unroll factor.  */
+  unsigned m_reduc_factor = 0;
+  /* True if the loop contains an average operation. */
+  bool m_has_avg =false;
+};
+
+/* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
+static vector_costs *
+loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
+{
+  return new loongarch_vector_costs (vinfo, costing_for_scalar);
+}
+
+void
+loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
+                                         stmt_vec_info stmt_info,
+                                         vect_cost_model_location where,
+                                         unsigned int count)
+{
+  if (!m_costing_for_scalar
+      && is_a<loop_vec_info> (m_vinfo)
+      && where == vect_body)
+    {
+      m_stmts += count;
+
+      if (kind == scalar_load
+         || kind == vector_load
+         || kind == unaligned_load)
+       m_loads += count;
+      else if (kind == scalar_store
+              || kind == vector_store
+              || kind == unaligned_store)
+       m_stores += count;
+      else if ((kind == scalar_stmt
+               || kind == vector_stmt
+               || kind == vec_to_scalar)
+              && stmt_info && vect_is_reduction (stmt_info))
+       {
+         tree lhs = gimple_get_lhs (stmt_info->stmt);
+         unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
+         m_reduc_factor = MAX (base * count, m_reduc_factor);
+       }
+    }
+}
+
+unsigned int
+loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
+{
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+  if (m_has_avg)
+    return 1;
+
+  /* Don't unroll if it's specified explicitly not to be unrolled.  */
+  if (loop->unroll == 1
+      || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
+      || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
+    return 1;
+
+  unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores;
+  /* Don't unroll if no vector instructions excepting for memory access.  */
+  if (nstmts_nonldst == 0)
+    return 1;
+
+  /* Use this simple hardware resource model that how many non vld/vst
+     vector instructions can be issued per cycle.  */
+  unsigned int issue_info = loongarch_vect_issue_info;
+  unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
+  unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
+  uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf);
+
+  return 1 << ceil_log2 (uf);
+}
+
+unsigned
+loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
+                                      stmt_vec_info stmt_info, slp_tree,
+                                      tree vectype, int misalign,
+                                      vect_cost_model_location where)
+{
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
+                                                           misalign);
+      retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
+      m_costs[where] += retval;
+
+      count_operations (kind, stmt_info, where, count);
+    }
+
+  if (stmt_info)
+    {
+      /* Detect the use of an averaging operation.  */
+      gimple *stmt = stmt_info->stmt;
+      if (is_gimple_call (stmt)
+         && gimple_call_internal_p (stmt))
+       {
+         switch (gimple_call_internal_fn (stmt))
+           {
+           case IFN_AVG_FLOOR:
+           case IFN_AVG_CEIL:
+             m_has_avg = true;
+           default:
+             break;
+           }
+       }
+    }
+
+  return retval;
+}
+
+void
+loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
+{
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+  if (loop_vinfo)
+    {
+      m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
+    }
+
+  vector_costs::finish_cost (scalar_costs);
+}
+
 /* Implement TARGET_ADDRESS_COST.  */
 
 static int
@@ -7265,9 +7411,6 @@ loongarch_option_override_internal (struct gcc_options *opts,
   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
     error ("%qs cannot be used for compiling a shared library",
           "-mdirect-extern-access");
-  if (loongarch_vector_access_cost == 0)
-    loongarch_vector_access_cost = 5;
-
 
   switch (la_target.cmodel)
     {
@@ -11281,6 +11424,8 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   loongarch_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_CREATE_COSTS
+#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs
 
 
 #undef TARGET_IN_SMALL_DATA_P
index e1b085ae87cfe85a723a3277f383229a3e022492..6215abcac043c9553948c7bd21acb8826d3ea30f 100644 (file)
@@ -153,10 +153,6 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
 -mbranch-cost=COST     Set the cost of branches to roughly COST instructions.
 
-mmemvec-cost=
-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
-mmemvec-cost=COST      Set the cost of vector memory access instructions.
-
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV)
 Trap on integer divide by zero.
@@ -220,3 +216,14 @@ mrelax
 Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
+
+-param=loongarch-vect-unroll-limit=
+Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
+Used to limit unroll factor which indicates how much the autovectorizer may
+unroll a loop.  The default value is 6.
+
+-param=loongarch-vect-issue-info=
+Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
+Indicate how many non memory access vector instructions can be issued per
+cycle, it's used in unroll factor determination for autovectorizer.  The
+default value is 4.
index bfaadd4a51e73354afe0ff0aa72f565e6bb2331c..9b5ff4570277e3040ee3b436320563bd56cf1165 100644 (file)
@@ -26225,6 +26225,13 @@ environments where no dynamic link is performed, like firmwares, OS
 kernels, executables linked with @option{-static} or @option{-static-pie}.
 @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or
 @option{-fpic}.
+
+@item loongarch-vect-unroll-limit
+The vectorizer will use available tuning information to determine whether it
+would be beneficial to unroll the main vectorized loop and by how much.  This
+parameter set's the upper bound of how much the vectorizer will unroll the main
+loop.  The default value is six.
+
 @end table