RISC-V: Fix unexpected big LMUL choosing in dynamic LMUL model for non-adjacent load...

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Mon, 16 Oct 2023 23:20:38 +0000 (07:20 +0800)

committer Pan Li <pan2.li@intel.com>

Tue, 17 Oct 2023 01:06:11 +0000 (09:06 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Mon, 16 Oct 2023 23:20:38 +0000 (07:20 +0800)
committer Pan Li <pan2.li@intel.com>
Tue, 17 Oct 2023 01:06:11 +0000 (09:06 +0800)
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc

index 0b89039653544691fc7a2e95eaaf0228203e19bc..33061efb1d0e7e943d51105c65a06eb7369d68e8 100644 (file)
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "bitmap.h"
  #include "ssa.h"
  #include "backend.h"
+#include "tree-data-ref.h"
  
  /* This file should be included last.  */
  #include "riscv-vector-costs.h"
@@ -135,8 +136,9 @@ compute_local_program_points (
                     || is_gimple_call (gsi_stmt (si))))
                 continue;
               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
-             if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
-                 != undef_vec_info_type)
+             enum stmt_vec_info_type type
+               = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+             if (type != undef_vec_info_type)
                 {
                   stmt_point info = {point, gsi_stmt (si)};
                   program_points.safe_push (info);
@@ -289,9 +291,7 @@ max_number_of_live_regs (const basic_block bb,
    unsigned int i;
    unsigned int live_point = 0;
    auto_vec<unsigned int> live_vars_vec;
-  live_vars_vec.safe_grow (max_point + 1, true);
-  for (i = 0; i < live_vars_vec.length (); ++i)
-    live_vars_vec[i] = 0;
+  live_vars_vec.safe_grow_cleared (max_point + 1, true);
    for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
         iter != live_ranges.end (); ++iter)
      {
@@ -360,6 +360,31 @@ get_current_lmul (class loop *loop)
    return loop_autovec_infos.get (loop)->current_lmul;
  }
  
+/* Get STORE value.  */
+static tree
+get_store_value (gimple *stmt)
+{
+  if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
+    {
+      if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
+       return gimple_call_arg (stmt, 3);
+      else
+       gcc_unreachable ();
+    }
+  else
+    return gimple_assign_rhs1 (stmt);
+}
+
+/* Return true if it is non-contiguous load/store.  */
+static bool
+non_contiguous_memory_access_p (stmt_vec_info stmt_info)
+{
+  enum stmt_vec_info_type type
+    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+  return ((type == load_vec_info_type || type == store_vec_info_type)
+         && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
+}
+
  /* Update the live ranges according PHI.
  
     Loop:
@@ -395,13 +420,15 @@ update_local_live_ranges (
    unsigned int nbbs = loop->num_nodes;
    unsigned int i, j;
    gphi_iterator psi;
+  gimple_stmt_iterator si;
    for (i = 0; i < nbbs; i++)
      {
        basic_block bb = bbs[i];
        if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
-                        "Update local program points for bb %d:\n", bb->index);
-      for (psi = gsi_start_phis (bbs[i]); !gsi_end_p (psi); gsi_next (&psi))
+                        "Update local program points for bb %d:\n",
+                        bbs[i]->index);
+      for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
         {
           gphi *phi = psi.phi ();
           stmt_vec_info stmt_info = vinfo->lookup_stmt (phi);
@@ -413,12 +440,23 @@ update_local_live_ranges (
             {
               edge e = gimple_phi_arg_edge (phi, j);
               tree def = gimple_phi_arg_def (phi, j);
-             auto *live_ranges = live_ranges_per_bb.get (e->src);
+             auto *live_ranges = live_ranges_per_bb.get (bb);
+             auto *live_range = live_ranges->get (def);
+             if (live_range && flow_bb_inside_loop_p (loop, e->src))
+               {
+                 unsigned int start = (*live_range).first;
+                 (*live_range).first = 0;
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_NOTE, vect_location,
+                                    "Update %T start point from %d to %d:\n",
+                                    def, start, (*live_range).first);
+               }
+             live_ranges = live_ranges_per_bb.get (e->src);
               if (!program_points_per_bb.get (e->src))
                 continue;
               unsigned int max_point
                 = (*program_points_per_bb.get (e->src)).length () - 1;
-             auto *live_range = live_ranges->get (def);
+             live_range = live_ranges->get (def);
               if (!live_range)
                 continue;
  
@@ -430,6 +468,43 @@ update_local_live_ranges (
                                  end, (*live_range).second);
             }
         }
+      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+       {
+         if (!(is_gimple_assign (gsi_stmt (si))
+               || is_gimple_call (gsi_stmt (si))))
+           continue;
+         stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
+         enum stmt_vec_info_type type
+           = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+         if (non_contiguous_memory_access_p (stmt_info))
+           {
+             /* For non-adjacent load/store STMT, we will potentially
+                convert it into:
+
+                  1. MASK_LEN_GATHER_LOAD (..., perm indice).
+                  2. Continguous load/store + VEC_PERM (..., perm indice)
+
+               We will be likely using one more vector variable.  */
+             unsigned int max_point
+               = (*program_points_per_bb.get (bb)).length () - 1;
+             auto *live_ranges = live_ranges_per_bb.get (bb);
+             bool existed_p = false;
+             tree var = type == load_vec_info_type
+                          ? gimple_get_lhs (gsi_stmt (si))
+                          : get_store_value (gsi_stmt (si));
+             tree sel_type = build_nonstandard_integer_type (
+               TYPE_PRECISION (TREE_TYPE (var)), 1);
+             tree sel = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+                                    get_identifier ("vect_perm"), sel_type);
+             pair &live_range = live_ranges->get_or_insert (sel, &existed_p);
+             gcc_assert (!existed_p);
+             live_range = pair (0, max_point);
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Add perm indice %T, start = 0, end = %d\n",
+                                sel, max_point);
+           }
+       }
      }
  }
  
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c

new file mode 100644 (file)

index 0000000..3dfc6f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+int
+bar (int *x, int a, int b, int n)
+{
+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
+  int sum1 = 0;
+  int sum2 = 0;
+  for (int i = 0; i < n; ++i)
+    {
+      sum1 += x[2*i] - a;
+      sum1 += x[2*i+1] * b;
+      sum2 += x[2*i] - b;
+      sum2 += x[2*i+1] * a;
+    }
+  return sum1 + sum2;
+}
+
+/* { dg-final { scan-assembler {e32,m2} } } */
+/* { dg-final { scan-assembler-times {csrr} 1 } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Mon, 16 Oct 2023 23:20:38 +0000 (07:20 +0800)
committer	Pan Li <pan2.li@intel.com>
	Tue, 17 Oct 2023 01:06:11 +0000 (09:06 +0800)
gcc/config/riscv/riscv-vector-costs.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c	[new file with mode: 0644]	patch \| blob