RISC-V: Support RVV VLA SLP auto-vectorization

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Wed, 7 Jun 2023 03:19:15 +0000 (11:19 +0800)

committer Pan Li <pan2.li@intel.com>

Wed, 7 Jun 2023 06:02:56 +0000 (14:02 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Wed, 7 Jun 2023 03:19:15 +0000 (11:19 +0800)
committer Pan Li <pan2.li@intel.com>
Wed, 7 Jun 2023 06:02:56 +0000 (14:02 +0800)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h

index ebbaac255f9f5e9358dce3d9417ed94c0e02fbb2..9782f1794fb9c45b048e6b5e30a9b2398d1e3af3 100644 (file)
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -169,6 +169,8 @@ void init_builtins (void);
  const char *mangle_builtin_type (const_tree);
  #ifdef GCC_TARGET_H
  bool verify_type_context (location_t, type_context_kind, const_tree, bool);
+bool expand_vec_perm_const (machine_mode, machine_mode, rtx, rtx, rtx,
+                           const vec_perm_indices &);
  #endif
  void handle_pragma_vector (void);
  tree builtin_decl (unsigned, bool);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc

index 49752cd889976b52611e2dd4ae8ad218e1084c23..477a22cd2b0c841ed1973b5719c2b2aee59c02b6 100644 (file)
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -251,9 +251,12 @@ public:
      m_inner_mode = GET_MODE_INNER (mode);
      m_inner_bits_size = GET_MODE_BITSIZE (m_inner_mode);
      m_inner_bytes_size = GET_MODE_SIZE (m_inner_mode);
+    m_mask_mode = get_mask_mode (mode).require ();
  
      gcc_assert (
        int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mode));
+    m_int_mode
+      = get_vector_mode (m_inner_int_mode, GET_MODE_NUNITS (mode)).require ();
    }
  
    bool can_duplicate_repeating_sequence_p ();
@@ -262,9 +265,14 @@ public:
    bool repeating_sequence_use_merge_profitable_p ();
    rtx get_merge_scalar_mask (unsigned int) const;
  
+  bool single_step_npatterns_p () const;
+  bool npatterns_all_equal_p () const;
+
    machine_mode new_mode () const { return m_new_mode; }
    scalar_mode inner_mode () const { return m_inner_mode; }
    scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
+  machine_mode mask_mode () const { return m_mask_mode; }
+  machine_mode int_mode () const { return m_int_mode; }
    unsigned int inner_bits_size () const { return m_inner_bits_size; }
    unsigned int inner_bytes_size () const { return m_inner_bytes_size; }
  
@@ -273,6 +281,8 @@ private:
    scalar_int_mode m_inner_int_mode;
    machine_mode m_new_mode;
    scalar_int_mode m_new_inner_mode;
+  machine_mode m_mask_mode;
+  machine_mode m_int_mode;
    unsigned int m_inner_bits_size;
    unsigned int m_inner_bytes_size;
  };
@@ -290,7 +300,9 @@ rvv_builder::can_duplicate_repeating_sequence_p ()
        || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
        || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode))
      return false;
-  return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns ());
+  if (full_nelts ().is_constant ())
+    return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns ());
+  return nelts_per_pattern () == 1;
  }
  
  /* Return true if it is a repeating sequence that using
@@ -398,6 +410,67 @@ rvv_builder::get_merge_scalar_mask (unsigned int index_in_pattern) const
    return gen_int_mode (mask, inner_int_mode ());
  }
  
+/* Return true if the variable-length vector is single step.
+   Single step means step all patterns in NPATTERNS are equal.
+   Consider this following case:
+
+     CASE 1: NPATTERNS = 2, NELTS_PER_PATTERN = 3.
+       { 0, 2, 2, 4, 4, 6, ... }
+     First pattern: step1 = 2 - 0 = 2
+                   step2 = 4 - 2 = 2
+     Second pattern: step1 = 4 - 2 = 2
+                    step2 = 6 - 4 = 2
+     Since all steps of NPATTERNS are equal step = 2.
+     Return true in this case.
+
+     CASE 2: NPATTERNS = 2, NELTS_PER_PATTERN = 3.
+       { 0, 1, 2, 4, 4, 7, ... }
+     First pattern: step1 = 2 - 0 = 2
+                   step2 = 4 - 2 = 2
+     Second pattern: step1 = 4 - 1 = 3
+                    step2 = 7 - 4 = 3
+     Since not all steps are equal, return false.  */
+bool
+rvv_builder::single_step_npatterns_p () const
+{
+  if (nelts_per_pattern () != 3)
+    return false;
+
+  poly_int64 step
+    = rtx_to_poly_int64 (elt (npatterns ())) - rtx_to_poly_int64 (elt (0));
+  for (unsigned int i = 0; i < npatterns (); i++)
+    {
+      poly_int64 ele0 = rtx_to_poly_int64 (elt (i));
+      poly_int64 ele1 = rtx_to_poly_int64 (elt (npatterns () + i));
+      poly_int64 ele2 = rtx_to_poly_int64 (elt (npatterns () * 2 + i));
+      poly_int64 diff1 = ele1 - ele0;
+      poly_int64 diff2 = ele2 - ele1;
+      if (maybe_ne (step, diff1) || maybe_ne (step, diff2))
+       return false;
+    }
+  return true;
+}
+
+/* Return true if all elements of NPATTERNS are equal.
+
+   E.g. NPATTERNS = 4:
+     { 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, ... }
+   E.g. NPATTERNS = 8:
+     { 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, ... }
+*/
+bool
+rvv_builder::npatterns_all_equal_p () const
+{
+  poly_int64 ele0 = rtx_to_poly_int64 (elt (0));
+  for (unsigned int i = 1; i < npatterns (); i++)
+    {
+      poly_int64 ele = rtx_to_poly_int64 (elt (i));
+      if (!known_eq (ele, ele0))
+       return false;
+    }
+  return true;
+}
+
  static unsigned
  get_sew (machine_mode mode)
  {
@@ -425,7 +498,7 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
     future.  */
  
  static bool
-const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval)
+const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval)
  {
    if (!CONST_VECTOR_P (vec)
        || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
@@ -440,8 +513,10 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval)
    for (int i = 0; i < nunits; i++)
      {
        rtx vec_elem = CONST_VECTOR_ELT (vec, i);
-      if (!CONST_INT_P (vec_elem)
-         || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
+      poly_int64 value;
+      if (!poly_int_rtx_p (vec_elem, &value)
+         || maybe_lt (value, minval)
+         || maybe_gt (value, maxval))
         return false;
      }
    return true;
@@ -453,7 +528,7 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval)
     future.  */
  
  static rtx
-gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
+gen_const_vector_dup (machine_mode mode, poly_int64 val)
  {
    rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
    return gen_const_vec_duplicate (mode, c);
@@ -727,7 +802,10 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
    rtx elt;
    insn_code icode;
    machine_mode data_mode = GET_MODE (target);
-  if (const_vec_duplicate_p (sel, &elt))
+  machine_mode sel_mode = GET_MODE (sel);
+  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
+    icode = code_for_pred_gatherei16 (data_mode);
+  else if (const_vec_duplicate_p (sel, &elt))
      {
        icode = code_for_pred_gather_scalar (data_mode);
        sel = elt;
@@ -744,7 +822,10 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
    rtx elt;
    insn_code icode;
    machine_mode data_mode = GET_MODE (target);
-  if (const_vec_duplicate_p (sel, &elt))
+  machine_mode sel_mode = GET_MODE (sel);
+  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
+    icode = code_for_pred_gatherei16 (data_mode);
+  else if (const_vec_duplicate_p (sel, &elt))
      {
        icode = code_for_pred_gather_scalar (data_mode);
        sel = elt;
@@ -895,11 +976,154 @@ expand_const_vector (rtx target, rtx src)
        return;
      }
  
-  /* TODO: We only support const duplicate vector for now. More cases
-     will be supported when we support auto-vectorization:
+  /* Handle variable-length vector.  */
+  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+  rvv_builder builder (mode, npatterns, nelts_per_pattern);
+  for (unsigned int i = 0; i < nelts_per_pattern; i++)
+    {
+      for (unsigned int j = 0; j < npatterns; j++)
+       builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+    }
+  builder.finalize ();
+  
+  if (CONST_VECTOR_DUPLICATE_P (src))
+    {
+      /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
+        E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
+             NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
+       The elements within NPATTERNS are not necessary regular.  */
+      if (builder.can_duplicate_repeating_sequence_p ())
+       {
+         /* We handle the case that we can find a vector containter to hold
+            element bitsize = NPATTERNS * ele_bitsize.
+
+              NPATTERNS = 8, element width = 8
+                v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+              In this case, we can combine NPATTERNS element into a larger
+              element. Use element width = 64 and broadcast a vector with
+              all element equal to 0x0706050403020100.  */
+         rtx ele = builder.get_merged_repeating_sequence ();
+         rtx dup = expand_vector_broadcast (builder.new_mode (), ele);
+         emit_move_insn (target, gen_lowpart (mode, dup));
+       }
+      else
+       {
+         /* We handle the case that we can't find a vector containter to hold
+            element bitsize = NPATTERNS * ele_bitsize.
+
+              NPATTERNS = 8, element width = 16
+                v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+              Since NPATTERNS * element width = 128, we can't find a container
+              to hold it.
+
+              In this case, we use NPATTERNS merge operations to generate such
+              vector.  */
+         unsigned int nbits = npatterns - 1;
+
+         /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+         rtx vid = gen_reg_rtx (builder.int_mode ());
+         rtx op[] = {vid};
+         emit_vlmax_insn (code_for_pred_series (builder.int_mode ()),
+                          RVV_MISC_OP, op);
+
+         /* Generate vid_repeat = { 0, 1, ... nbits, ... }  */
+         rtx vid_repeat = gen_reg_rtx (builder.int_mode ());
+         rtx and_ops[] = {vid_repeat, vid,
+                          gen_int_mode (nbits, builder.inner_int_mode ())};
+         emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()),
+                          RVV_BINOP, and_ops);
+
+         rtx tmp = gen_reg_rtx (builder.mode ());
+         rtx dup_ops[] = {tmp, builder.elt (0)};
+         emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), RVV_UNOP,
+                          dup_ops);
+         for (unsigned int i = 1; i < builder.npatterns (); i++)
+           {
+             /* Generate mask according to i.  */
+             rtx mask = gen_reg_rtx (builder.mask_mode ());
+             rtx const_vec = gen_const_vector_dup (builder.int_mode (), i);
+             expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
+
+             /* Merge scalar to each i.  */
+             rtx tmp2 = gen_reg_rtx (builder.mode ());
+             rtx merge_ops[] = {tmp2, tmp, builder.elt (i), mask};
+             insn_code icode = code_for_pred_merge_scalar (builder.mode ());
+             emit_vlmax_merge_insn (icode, RVV_MERGE_OP, merge_ops);
+             tmp = tmp2;
+           }
+         emit_move_insn (target, tmp);
+       }
+      return;
+    }
+  else if (CONST_VECTOR_STEPPED_P (src))
+    {
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+      if (builder.single_step_npatterns_p ())
+       {
+         /* Describe the case by choosing NPATTERNS = 4 as an example.  */
+         rtx base, step;
+         if (builder.npatterns_all_equal_p ())
+           {
+             /* Generate the variable-length vector following this rule:
+                { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+                  E.g. { 0, 0, 8, 8, 16, 16, ... } */
+             /* Step 1: Generate base = { 0, 0, 0, 0, 0, 0, 0, ... }.  */
+             base = expand_vector_broadcast (builder.mode (), builder.elt (0));
+           }
+         else
+           {
+             /* Generate the variable-length vector following this rule:
+                { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
+                  E.g. { 0, 6, 0, 6, 8, 14, 8, 14, 16, 22, 16, 22, ... } */
+             /* Step 1: Generate base = { 0, 6, 0, 6, ... }.  */
+             rvv_builder new_builder (builder.mode (), builder.npatterns (),
+                                      1);
+             for (unsigned int i = 0; i < builder.npatterns (); ++i)
+               new_builder.quick_push (builder.elt (i));
+             rtx new_vec = new_builder.build ();
+             base = gen_reg_rtx (builder.mode ());
+             emit_move_insn (base, new_vec);
+           }
  
-       1. multiple elts duplicate vector.
-       2. multiple patterns with multiple elts.  */
+         /* Step 2: Generate step = gen_int_mode (diff, mode).  */
+         poly_int64 value1 = rtx_to_poly_int64 (builder.elt (0));
+         poly_int64 value2
+           = rtx_to_poly_int64 (builder.elt (builder.npatterns ()));
+         poly_int64 diff = value2 - value1;
+         step = gen_int_mode (diff, builder.inner_mode ());
+
+         /* Step 3: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+         rtx vid = gen_reg_rtx (builder.mode ());
+         rtx op[] = {vid};
+         emit_vlmax_insn (code_for_pred_series (builder.mode ()), RVV_MISC_OP,
+                          op);
+
+         /* Step 4: Generate factor = { 0, 0, 0, 0, 1, 1, 1, 1, ... }.  */
+         rtx factor = gen_reg_rtx (builder.mode ());
+         rtx shift_ops[]
+           = {factor, vid,
+              gen_int_mode (exact_log2 (builder.npatterns ()), Pmode)};
+         emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder.mode ()),
+                          RVV_BINOP, shift_ops);
+
+         /* Step 5: Generate adjusted step = { 0, 0, 0, 0, diff, diff, ... } */
+         rtx adjusted_step = gen_reg_rtx (builder.mode ());
+         rtx mul_ops[] = {adjusted_step, factor, step};
+         emit_vlmax_insn (code_for_pred_scalar (MULT, builder.mode ()),
+                          RVV_BINOP, mul_ops);
+
+         /* Step 6: Generate the final result.  */
+         rtx add_ops[] = {target, base, adjusted_step};
+         emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), RVV_BINOP,
+                          add_ops);
+       }
+      else
+       /* TODO: We will enable more variable-length vector in the future.  */
+       gcc_unreachable ();
+    }
+  else
+    gcc_unreachable ();
  }
  
  /* Expand a pre-RA RVV data move from SRC to DEST.
@@ -2029,14 +2253,13 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
  {
    machine_mode data_mode = GET_MODE (target);
    machine_mode sel_mode = GET_MODE (sel);
-
-  /* Enforced by the pattern condition.  */
-  int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
+  poly_uint64 nunits = GET_MODE_NUNITS (sel_mode);
  
    /* Check if the sel only references the first values vector. If each select
       index is in range of [0, nunits - 1]. A single vrgather instructions is
-     enough.  */
-  if (const_vec_all_in_range_p (sel, 0, nunits - 1))
+     enough. Since we will use vrgatherei16.vv for variable-length vector,
+     it is never out of range and we don't need to modulo the index.  */
+  if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits - 1))
      {
        emit_vlmax_gather_insn (target, op0, sel);
        return;
@@ -2057,14 +2280,20 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
        return;
      }
  
-  /* Note: vec_perm indices are supposed to wrap when they go beyond the
-     size of the two value vectors, i.e. the upper bits of the indices
-     are effectively ignored.  RVV vrgather instead produces 0 for any
-     out-of-range indices, so we need to modulo all the vec_perm indices
-     to ensure they are all in range of [0, 2 * nunits - 1].  */
+  rtx sel_mod = sel;
    rtx max_sel = gen_const_vector_dup (sel_mode, 2 * nunits - 1);
-  rtx sel_mod
-    = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0, OPTAB_DIRECT);
+  /* We don't need to modulo indices for VLA vector.
+     Since we should gurantee they aren't out of range before.  */
+  if (nunits.is_constant ())
+    {
+      /* Note: vec_perm indices are supposed to wrap when they go beyond the
+        size of the two value vectors, i.e. the upper bits of the indices
+        are effectively ignored.  RVV vrgather instead produces 0 for any
+        out-of-range indices, so we need to modulo all the vec_perm indices
+        to ensure they are all in range of [0, 2 * nunits - 1].  */
+      sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0,
+                                    OPTAB_DIRECT);
+    }
  
    /* This following sequence is handling the case that:
       __builtin_shufflevector (vec1, vec2, index...), the index can be any
@@ -2094,4 +2323,128 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
    emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
  }
  
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV.  */
+
+/* vec_perm support.  */
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  vec_perm_indices perm;
+  machine_mode vmode;
+  machine_mode op_mode;
+  bool one_vector_p;
+  bool testing_p;
+};
+
+/* Recognize the pattern that can be shuffled by generic approach.  */
+
+static bool
+shuffle_generic_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
+  poly_uint64 nunits = GET_MODE_NUNITS (d->vmode);
+
+  /* We don't enable SLP for non-power of 2 NPATTERNS.  */
+  if (!pow2p_hwi (d->perm.encoding().npatterns ()))
+    return false;
+
+  /* For constant size indices, we dont't need to handle it here.
+     Just leave it to vec_perm<mode>.  */
+  if (d->perm.length ().is_constant ())
+    return false;
+
+  /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv.
+     Otherwise, it could overflow the index range.  */
+  if (GET_MODE_INNER (d->vmode) == QImode
+      && !get_vector_mode (HImode, nunits).exists (&sel_mode))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+  expand_vec_perm (d->target, d->op0, d->op1, force_reg (sel_mode, sel));
+  return true;
+}
+
+static bool
+expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  gcc_assert (d->op_mode != E_VOIDmode);
+
+  /* The pattern matching functions above are written to look for a small
+     number to begin the sequence (0, 1, N/2).  If we begin with an index
+     from the second operand, we can swap the operands.  */
+  poly_int64 nelt = d->perm.length ();
+  if (known_ge (d->perm[0], nelt))
+    {
+      d->perm.rotate_inputs (1);
+      std::swap (d->op0, d->op1);
+    }
+
+  if (known_gt (nelt, 1))
+    {
+      if (d->vmode == d->op_mode)
+       {
+         if (shuffle_generic_patterns (d))
+           return true;
+         return false;
+       }
+      else
+       return false;
+    }
+  return false;
+}
+
+bool
+expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
+                      rtx op0, rtx op1, const vec_perm_indices &sel)
+{
+  /* RVV doesn't have Mask type pack/unpack instructions and we don't use
+     mask to do the iteration loop control. Just disable it directly.  */
+  if (GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL)
+    return false;
+
+  struct expand_vec_perm_d d;
+
+  /* Check whether the mask can be applied to a single vector.  */
+  if (sel.ninputs () == 1 || (op0 && rtx_equal_p (op0, op1)))
+    d.one_vector_p = true;
+  else if (sel.all_from_input_p (0))
+    {
+      d.one_vector_p = true;
+      op1 = op0;
+    }
+  else if (sel.all_from_input_p (1))
+    {
+      d.one_vector_p = true;
+      op0 = op1;
+    }
+  else
+    d.one_vector_p = false;
+
+  d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
+                    sel.nelts_per_input ());
+  d.vmode = vmode;
+  d.op_mode = op_mode;
+  d.target = target;
+  d.op0 = op0;
+  if (op0 == op1)
+    d.op1 = d.op0;
+  else
+    d.op1 = op1;
+  d.testing_p = !target;
+
+  if (!d.testing_p)
+    return expand_vec_perm_const_1 (&d);
+
+  rtx_insn *last = get_last_insn ();
+  bool ret = expand_vec_perm_const_1 (&d);
+  gcc_assert (last == get_last_insn ());
+
+  return ret;
+}
+
  } // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc

index 3954c89a039a6f9d2567e47bc6cf06820214e28c..21e7d3b3caaadd6f9a78e8a75ea77b2562a5133b 100644 (file)
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7631,6 +7631,19 @@ riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode
    return default_vectorize_related_mode (vector_mode, element_mode, nunits);
  }
  
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
+
+static bool
+riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+                               rtx target, rtx op0, rtx op1,
+                               const vec_perm_indices &sel)
+{
+  if (TARGET_VECTOR && riscv_v_ext_vector_mode_p (vmode))
+    return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
+                                               op1, sel);
+
+  return false;
+}
  
  /* Initialize the GCC target structure.  */
  #undef TARGET_ASM_ALIGNED_HI_OP
@@ -7930,6 +7943,9 @@ riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode
  #undef TARGET_VECTORIZE_RELATED_MODE
  #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
  
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
+
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-riscv.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c

new file mode 100644 (file)

index 0000000..befb518
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 37] + 1;
+      a[i * 8 + 1] = b[i * 8 + 37] + 2;
+      a[i * 8 + 2] = b[i * 8 + 37] + 8;
+      a[i * 8 + 3] = b[i * 8 + 37] + 4;
+      a[i * 8 + 4] = b[i * 8 + 37] + 5;
+      a[i * 8 + 5] = b[i * 8 + 37] + 6;
+      a[i * 8 + 6] = b[i * 8 + 37] + 7;
+      a[i * 8 + 7] = b[i * 8 + 37] + 3;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c

new file mode 100644 (file)

index 0000000..ac81745
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (int16_t *restrict a, int16_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 37] + 1;
+      a[i * 8 + 1] = b[i * 8 + 37] + 2;
+      a[i * 8 + 2] = b[i * 8 + 37] + 8;
+      a[i * 8 + 3] = b[i * 8 + 37] + 4;
+      a[i * 8 + 4] = b[i * 8 + 37] + 5;
+      a[i * 8 + 5] = b[i * 8 + 37] + 6;
+      a[i * 8 + 6] = b[i * 8 + 37] + 7;
+      a[i * 8 + 7] = b[i * 8 + 37] + 3;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c

new file mode 100644 (file)

index 0000000..7396205
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 1] + 3;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 1] + 7;
+      a[i * 8 + 7] = b[i * 8 + 7] + 8;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c

new file mode 100644 (file)

index 0000000..fa216fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (int16_t *restrict a, int16_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 1] + 3;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 1] + 7;
+      a[i * 8 + 7] = b[i * 8 + 7] + 8;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c

new file mode 100644 (file)

index 0000000..899ed9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 4] + 3;
+      a[i * 8 + 3] = b[i * 8 + 8] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 4] + 7;
+      a[i * 8 + 7] = b[i * 8 + 8] + 8;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c

new file mode 100644 (file)

index 0000000..fb87cc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (uint8_t *restrict a, uint8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 2] + 2;
+      a[i * 8 + 2] = b[i * 8 + 6] + 8;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 3] + 5;
+      a[i * 8 + 5] = b[i * 8 + 4] + 6;
+      a[i * 8 + 6] = b[i * 8 + 5] + 7;
+      a[i * 8 + 7] = b[i * 8 + 0] + 3;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c

new file mode 100644 (file)

index 0000000..3dd744b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void __attribute__ ((noipa))
+f (float *__restrict f, double *__restrict d, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      f[i * 2 + 0] = 1;
+      f[i * 2 + 1] = 2;
+      d[i] = 3;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c

new file mode 100644 (file)

index 0000000..16f078a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c
@@ -0,0 +1,66 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-1.c"
+
+#define LIMIT 128
+void __attribute__ ((optimize (0)))
+f_golden (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 37] + 1;
+      a[i * 8 + 1] = b[i * 8 + 37] + 2;
+      a[i * 8 + 2] = b[i * 8 + 37] + 8;
+      a[i * 8 + 3] = b[i * 8 + 37] + 4;
+      a[i * 8 + 4] = b[i * 8 + 37] + 5;
+      a[i * 8 + 5] = b[i * 8 + 37] + 6;
+      a[i * 8 + 6] = b[i * 8 + 37] + 7;
+      a[i * 8 + 7] = b[i * 8 + 37] + 3;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int8_t a_##NUM[NUM * 8 + 8] = {0};                                           \
+  int8_t a_golden_##NUM[NUM * 8 + 8] = {0};                                    \
+  int8_t b_##NUM[NUM * 8 + 37] = {0};                                          \
+  for (int i = 0; i < NUM * 8 + 37; i++)                                       \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c

new file mode 100644 (file)

index 0000000..41f688f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-2.c"
+
+#define LIMIT 32767
+
+void __attribute__ ((optimize (0)))
+f_golden (int16_t *restrict a, int16_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 37] + 1;
+      a[i * 8 + 1] = b[i * 8 + 37] + 2;
+      a[i * 8 + 2] = b[i * 8 + 37] + 8;
+      a[i * 8 + 3] = b[i * 8 + 37] + 4;
+      a[i * 8 + 4] = b[i * 8 + 37] + 5;
+      a[i * 8 + 5] = b[i * 8 + 37] + 6;
+      a[i * 8 + 6] = b[i * 8 + 37] + 7;
+      a[i * 8 + 7] = b[i * 8 + 37] + 3;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int16_t a_##NUM[NUM * 8 + 8] = {0};                                          \
+  int16_t a_golden_##NUM[NUM * 8 + 8] = {0};                                   \
+  int16_t b_##NUM[NUM * 8 + 37] = {0};                                         \
+  for (int i = 0; i < NUM * 8 + 37; i++)                                       \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c

new file mode 100644 (file)

index 0000000..30996cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-3.c"
+
+#define LIMIT 128
+
+void __attribute__ ((optimize (0)))
+f_golden (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 1] + 3;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 1] + 7;
+      a[i * 8 + 7] = b[i * 8 + 7] + 8;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int8_t a_##NUM[NUM * 8 + 8] = {0};                                           \
+  int8_t a_golden_##NUM[NUM * 8 + 8] = {0};                                    \
+  int8_t b_##NUM[NUM * 8 + 8] = {0};                                           \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c

new file mode 100644 (file)

index 0000000..3d43ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-4.c"
+
+#define LIMIT 32767
+
+void __attribute__ ((optimize (0)))
+f_golden (int16_t *restrict a, int16_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 1] + 3;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 1] + 7;
+      a[i * 8 + 7] = b[i * 8 + 7] + 8;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int16_t a_##NUM[NUM * 8 + 8] = {0};                                          \
+  int16_t a_golden_##NUM[NUM * 8 + 8] = {0};                                   \
+  int16_t b_##NUM[NUM * 8 + 8] = {0};                                          \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c

new file mode 100644 (file)

index 0000000..814308b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-5.c"
+
+#define LIMIT 128
+
+void __attribute__ ((optimize (0)))
+f_golden (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 7] + 2;
+      a[i * 8 + 2] = b[i * 8 + 4] + 3;
+      a[i * 8 + 3] = b[i * 8 + 8] + 4;
+      a[i * 8 + 4] = b[i * 8 + 1] + 5;
+      a[i * 8 + 5] = b[i * 8 + 7] + 6;
+      a[i * 8 + 6] = b[i * 8 + 4] + 7;
+      a[i * 8 + 7] = b[i * 8 + 8] + 8;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int8_t a_##NUM[NUM * 8 + 8] = {0};                                           \
+  int8_t a_golden_##NUM[NUM * 8 + 8] = {0};                                    \
+  int8_t b_##NUM[NUM * 8 + 9] = {0};                                           \
+  for (int i = 0; i < NUM * 8 + 9; i++)                                        \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c

new file mode 100644 (file)

index 0000000..e317eea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-6.c"
+
+#define LIMIT 128
+
+void __attribute__ ((optimize (0)))
+f_golden (int8_t *restrict a, int8_t *restrict b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      a[i * 8 + 0] = b[i * 8 + 1] + 1;
+      a[i * 8 + 1] = b[i * 8 + 2] + 2;
+      a[i * 8 + 2] = b[i * 8 + 6] + 8;
+      a[i * 8 + 3] = b[i * 8 + 7] + 4;
+      a[i * 8 + 4] = b[i * 8 + 3] + 5;
+      a[i * 8 + 5] = b[i * 8 + 4] + 6;
+      a[i * 8 + 6] = b[i * 8 + 5] + 7;
+      a[i * 8 + 7] = b[i * 8 + 0] + 3;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  int8_t a_##NUM[NUM * 8 + 8] = {0};                                           \
+  int8_t a_golden_##NUM[NUM * 8 + 8] = {0};                                    \
+  int8_t b_##NUM[NUM * 8 + 9] = {0};                                           \
+  for (int i = 0; i < NUM * 8 + 9; i++)                                        \
+    {                                                                          \
+      if (i % NUM == 0)                                                        \
+       b_##NUM[i] = (i + NUM) % LIMIT;                                        \
+      else                                                                     \
+       b_##NUM[i] = (i - NUM) % (-LIMIT);                                     \
+    }                                                                          \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_##NUM, NUM);                                     \
+  for (int i = 0; i < NUM * 8 + 8; i++)                                        \
+    {                                                                          \
+      if (a_##NUM[i] != a_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c

new file mode 100644 (file)

index 0000000..a8e4781
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-7.c"
+
+void
+f_golden (float *__restrict f, double *__restrict d, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      f[i * 2 + 0] = 1;
+      f[i * 2 + 1] = 2;
+      d[i] = 3;
+    }
+}
+
+int
+main (void)
+{
+#define RUN(NUM)                                                               \
+  float a_##NUM[NUM * 2 + 2] = {0};                                            \
+  float a_golden_##NUM[NUM * 2 + 2] = {0};                                     \
+  double b_##NUM[NUM] = {0};                                                   \
+  double b_golden_##NUM[NUM] = {0};                                            \
+  f (a_##NUM, b_##NUM, NUM);                                                   \
+  f_golden (a_golden_##NUM, b_golden_##NUM, NUM);                              \
+  for (int i = 0; i < NUM; i++)                                                \
+    {                                                                          \
+      if (a_##NUM[i * 2 + 0] != a_golden_##NUM[i * 2 + 0])                     \
+       __builtin_abort ();                                                    \
+      if (a_##NUM[i * 2 + 1] != a_golden_##NUM[i * 2 + 1])                     \
+       __builtin_abort ();                                                    \
+      if (b_##NUM[i] != b_golden_##NUM[i])                                     \
+       __builtin_abort ();                                                    \
+    }
+
+  RUN (3);
+  RUN (5);
+  RUN (15);
+  RUN (16);
+  RUN (17);
+  RUN (31);
+  RUN (32);
+  RUN (33);
+  RUN (63);
+  RUN (64);
+  RUN (65);
+  RUN (127);
+  RUN (128);
+  RUN (129);
+  RUN (239);
+  RUN (359);
+  RUN (498);
+  RUN (799);
+  RUN (977);
+  RUN (5789);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c

index 500b0adce668704677f80387c51f9c0af951bcad..3c03a87377d6bf481c4bfff28b6c6e7dcfaf58bc 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c
@@ -14,4 +14,4 @@ f (int32_t *__restrict f, int32_t *__restrict d, int n)
      }
  }
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c

index 383c82a3b7ce7d6ceb2933fffa0773647729f173..e68d05f5f48018a2b2875820cfc65503115a4d77 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
@@ -3,9 +3,4 @@
  
  #include "template-1.h"
  
-/* Currently, we don't support SLP auto-vectorization for VLA. But it's
-   necessary that we add this testcase here to make sure such unsupported SLP
-   auto-vectorization will not cause an ICE. We will enable "vect" checking when
-   we support SLP auto-vectorization for VLA in the future.  */
-
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c

index 23cc1c8651f868d588a3364d0b4b298d21709f5f..ecfda79e19aec276906a66de8e772bed28b2a374 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c

index 4f130f02f67d166af794a6eb7df3ab23ff2f2540..1394f08f2b9bf5ccacd8cabb4ccf025c00753d96 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c

index 823d51a03cb4fcaa89f99e5c85de90d2805a6476..c5e89996fa4d00c87cd8e4e2aa7f0cf13c4ec085 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c

index 5ead22746d3fdc4710ddff7a48a2359c05f710b6..6b320ca6f38aefef1fac040f8b58fa9cd248e289 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c

index e03d1b44ca677bc762aacd89bde505a80707e9c5..6c2a002de9cc9b7730663ed1e1c8b52ef1596f8e 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c

index 5bb2d9d96fa1215fa9d6bbb4b0439388ccd9832e..ae3f066477c7ea0c5d50fcd0e9c1ac3e122fad51 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c

index 71820ece4b26f71f5fd586847cb66367c9a8a29e..fc676a3865e6b06d7ad8641ade0f0e94928520b7 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c
@@ -3,4 +3,4 @@
  
  #include "template-1.h"
  
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Wed, 7 Jun 2023 03:19:15 +0000 (11:19 +0800)
committer	Pan Li <pan2.li@intel.com>
	Wed, 7 Jun 2023 06:02:56 +0000 (14:02 +0800)
gcc/config/riscv/riscv-protos.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv-v.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c		patch \| blob \| blame \| history