RISC-V: Using merge approach to optimize repeating sequence in vec_init

author Pan Li <pan2.li@intel.com>

Thu, 25 May 2023 02:58:49 +0000 (10:58 +0800)

committer Pan Li <pan2.li@intel.com>

Mon, 29 May 2023 10:24:52 +0000 (18:24 +0800)
author Pan Li <pan2.li@intel.com>
Thu, 25 May 2023 02:58:49 +0000 (10:58 +0800)
committer Pan Li <pan2.li@intel.com>
Mon, 29 May 2023 10:24:52 +0000 (18:24 +0800)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h

index 0462f96c8d57f1c13b26b4a940449b621d7e184e..277845673d4e36b415b896ce00e655948eadd82e 100644 (file)
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -142,6 +142,7 @@ enum insn_type
    RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand.  */
    RVV_UNOP_MU = RVV_UNOP + 2,    /* Likewise.  */
    RVV_TERNOP = 5,
+  RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md.  */
  };
  enum vlmul_type
  {
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc

index a5715bb466c345783dfa07ed18f100670b424843..8c92053254955a87baef2b750c56a474302c0444 100644 (file)
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -21,6 +21,10 @@
  
  #define IN_TARGET_CODE 1
  
+/* We have a maximum of 11 operands for RVV instruction patterns according to
+   the vector.md.  */
+#define RVV_INSN_OPERANDS_MAX 11
+
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
@@ -1286,19 +1290,32 @@ public:
      : rtx_vector_builder (mode, npatterns, nelts_per_pattern)
    {
      m_inner_mode = GET_MODE_INNER (mode);
-    m_inner_size = GET_MODE_BITSIZE (m_inner_mode).to_constant ();
+    m_inner_bits_size = GET_MODE_BITSIZE (m_inner_mode);
+    m_inner_bytes_size = GET_MODE_SIZE (m_inner_mode);
+
+    gcc_assert (
+      int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mode));
    }
  
    bool can_duplicate_repeating_sequence_p ();
    rtx get_merged_repeating_sequence ();
  
+  bool repeating_sequence_use_merge_profitable_p ();
+  rtx get_merge_scalar_mask (unsigned int) const;
+
    machine_mode new_mode () const { return m_new_mode; }
+  scalar_mode inner_mode () const { return m_inner_mode; }
+  scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
+  unsigned int inner_bits_size () const { return m_inner_bits_size; }
+  unsigned int inner_bytes_size () const { return m_inner_bytes_size; }
  
  private:
-  machine_mode m_inner_mode;
+  scalar_mode m_inner_mode;
+  scalar_int_mode m_inner_int_mode;
    machine_mode m_new_mode;
    scalar_int_mode m_new_inner_mode;
-  unsigned int m_inner_size;
+  unsigned int m_inner_bits_size;
+  unsigned int m_inner_bytes_size;
  };
  
  /* Return true if the vector duplicated by a super element which is the fusion
@@ -1309,7 +1326,7 @@ bool
  rvv_builder::can_duplicate_repeating_sequence_p ()
  {
    poly_uint64 new_size = exact_div (full_nelts (), npatterns ());
-  unsigned int new_inner_size = m_inner_size * npatterns ();
+  unsigned int new_inner_size = m_inner_bits_size * npatterns ();
    if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
        || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
        || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode))
@@ -1317,6 +1334,61 @@ rvv_builder::can_duplicate_repeating_sequence_p ()
    return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns ());
  }
  
+/* Return true if it is a repeating sequence that using
+   merge approach has better codegen than using default
+   approach (slide1down).
+
+   Sequence A:
+     {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}
+
+   nelts = 16
+   npatterns = 2
+
+   for merging a we need mask 101010....
+   for merging b we need mask 010101....
+
+   Foreach element in the npattern, we need to build a mask in scalar register.
+   Mostely we need 3 instructions (aka COST = 3), which is consist of 2 scalar
+   instruction and 1 scalar move to v0 register.  Finally we need vector merge
+   to merge them.
+
+   lui         a5, #imm
+   add         a5, #imm
+   vmov.s.x    v0, a5
+   vmerge.vxm  v9, v9, a1, v0
+
+   So the overall (roughly) COST of Sequence A = (3 + 1) * npatterns = 8.
+   If we use slide1down, the COST = nelts = 16 > 8 (COST of merge).
+   So return true in this case as it is profitable.
+
+   Sequence B:
+     {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h}
+
+   nelts = 16
+   npatterns = 8
+
+   COST of merge approach = (3 + 1) * npatterns = 24
+   COST of slide1down approach = nelts = 16
+   Return false in this case as it is NOT profitable in merge approach.
+*/
+bool
+rvv_builder::repeating_sequence_use_merge_profitable_p ()
+{
+  if (inner_bytes_size () > UNITS_PER_WORD)
+    return false;
+
+  unsigned int nelts = full_nelts ().to_constant ();
+
+  if (!repeating_sequence_p (0, nelts, npatterns ()))
+    return false;
+
+  unsigned int merge_cost = 1;
+  unsigned int build_merge_mask_cost = 3;
+  unsigned int slide1down_cost = nelts;
+
+  return (build_merge_mask_cost + merge_cost) * npatterns () < slide1down_cost;
+}
+
  /* Merge the repeating sequence into a single element and return the RTX.  */
  rtx
  rvv_builder::get_merged_repeating_sequence ()
@@ -1324,11 +1396,11 @@ rvv_builder::get_merged_repeating_sequence ()
    scalar_int_mode mode = Pmode;
    rtx target = gen_reg_rtx (mode);
    emit_move_insn (target, const0_rtx);
-  rtx imm = gen_int_mode ((1ULL << m_inner_size) - 1, mode);
+  rtx imm = gen_int_mode ((1ULL << m_inner_bits_size) - 1, mode);
    /* { a, b, a, b }: Generate duplicate element = b << bits | a.  */
    for (unsigned int i = 0; i < npatterns (); i++)
      {
-      unsigned int loc = m_inner_size * i;
+      unsigned int loc = m_inner_bits_size * i;
        rtx shift = gen_int_mode (loc, mode);
        rtx ele = gen_lowpart (mode, elt (i));
        rtx tmp = expand_simple_binop (mode, AND, ele, imm, NULL_RTX, false,
@@ -1344,6 +1416,29 @@ rvv_builder::get_merged_repeating_sequence ()
    return target;
  }
  
+/* Get the mask for merge approach.
+
+   Consider such following case:
+     {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}
+   To merge "a", the mask should be 1010....
+   To merge "b", the mask should be 0101....
+*/
+rtx
+rvv_builder::get_merge_scalar_mask (unsigned int index_in_pattern) const
+{
+  unsigned HOST_WIDE_INT mask = 0;
+  unsigned HOST_WIDE_INT base_mask = (1ULL << index_in_pattern);
+
+  gcc_assert (BITS_PER_WORD % npatterns () == 0);
+
+  int limit = BITS_PER_WORD / npatterns ();
+
+  for (int i = 0; i < limit; i++)
+    mask |= base_mask << (i * npatterns ());
+
+  return gen_int_mode (mask, inner_int_mode ());
+}
+
  /* Subroutine of riscv_vector_expand_vector_init.
     Works as follows:
     (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
@@ -1371,6 +1466,111 @@ expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
      }
  }
  
+/* Emit vmv.s.x instruction.  */
+
+static void
+emit_scalar_move_insn (unsigned icode, rtx *ops)
+{
+  machine_mode data_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (data_mode).require ();
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (riscv_vector::RVV_SCALAR_MOV_OP,
+                                         /* HAS_DEST_P */ true,
+                                         /* FULLY_UNMASKED_P */ false,
+                                         /* USE_REAL_MERGE_P */ true,
+                                         /* HAS_AVL_P */ true,
+                                         /* VLMAX_P */ false,
+                                         data_mode, mask_mode);
+  e.set_policy (TAIL_ANY);
+  e.set_policy (MASK_ANY);
+  e.set_vl (CONST1_RTX (Pmode));
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
+/* Emit vmv.v.x instruction with vlmax.  */
+
+static void
+emit_vlmax_integer_move_insn (unsigned icode, rtx *ops, rtx vl)
+{
+  emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops, vl);
+}
+
+/* Emit vmv.v.x instruction with nonvlmax.  */
+
+static void
+emit_nonvlmax_integer_move_insn (unsigned icode, rtx *ops, rtx avl)
+{
+  emit_nonvlmax_insn (icode, riscv_vector::RVV_UNOP, ops, avl);
+}
+
+/* Emit merge instruction.  */
+
+static machine_mode
+get_repeating_sequence_dup_machine_mode (const rvv_builder &builder)
+{
+  poly_uint64 dup_nunits = GET_MODE_NUNITS (builder.mode ());
+
+  if (known_ge (GET_MODE_SIZE (builder.mode ()), BYTES_PER_RISCV_VECTOR))
+    {
+      dup_nunits = exact_div (BYTES_PER_RISCV_VECTOR,
+       builder.inner_bytes_size ());
+    }
+
+  return get_vector_mode (builder.inner_int_mode (), dup_nunits).require ();
+}
+
+/* Use merge approach to initialize the vector with repeating sequence.
+   v = {a, b, a, b, a, b, a, b}.
+
+   v = broadcast (a).
+   mask = 0b01010101....
+   v = merge (v, b, mask)
+*/
+static void
+expand_vector_init_merge_repeating_sequence (rtx target,
+                                            const rvv_builder &builder)
+{
+  machine_mode dup_mode = get_repeating_sequence_dup_machine_mode (builder);
+  machine_mode dup_mask_mode = get_mask_mode (dup_mode).require ();
+  machine_mode mask_mode = get_mask_mode (builder.mode ()).require ();
+  uint64_t full_nelts = builder.full_nelts ().to_constant ();
+
+  /* Step 1: Broadcast the first pattern.  */
+  rtx ops[] = {target, force_reg (GET_MODE_INNER (dup_mode), builder.elt (0))};
+  emit_vlmax_integer_move_insn (code_for_pred_broadcast (builder.mode ()),
+                               ops, NULL_RTX);
+
+  /* Step 2: Merge the rest iteration of pattern.  */
+  for (unsigned int i = 1; i < builder.npatterns (); i++)
+    {
+      /* Step 2-1: Generate mask register v0 for each merge.  */
+      rtx merge_mask = builder.get_merge_scalar_mask (i);
+      rtx mask = gen_reg_rtx (mask_mode);
+      rtx dup = gen_reg_rtx (dup_mode);
+
+      if (full_nelts <= BITS_PER_WORD) /* vmv.s.x.  */
+       {
+         rtx ops[] = {dup, gen_scalar_move_mask (dup_mask_mode),
+           RVV_VUNDEF (dup_mode), merge_mask};
+         emit_scalar_move_insn (code_for_pred_broadcast (GET_MODE (dup)),
+                                ops);
+       }
+      else /* vmv.v.x.  */
+       {
+         rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
+         rtx vl = gen_int_mode (CEIL (full_nelts, BITS_PER_WORD), Pmode);
+         emit_nonvlmax_integer_move_insn (code_for_pred_broadcast (dup_mode),
+                                          ops, vl);
+       }
+
+      emit_move_insn (mask, gen_lowpart (mask_mode, dup));
+
+      /* Step 2-2: Merge pattern according to the mask.  */
+      rtx ops[] = {target, target, builder.elt (i), mask};
+      emit_vlmax_merge_insn (code_for_pred_merge_scalar (GET_MODE (target)),
+                            riscv_vector::RVV_MERGE_OP, ops);
+    }
+}
+
  /* Initialize register TARGET from the elements in PARALLEL rtx VALS.  */
  
  void
@@ -1394,6 +1594,19 @@ expand_vec_init (rtx target, rtx vals)
           emit_move_insn (target, gen_lowpart (mode, dup));
           return;
         }
+
+      /* Case 2: Optimize repeating sequence cases that Case 1 can
+        not handle and it is profitable.  For example:
+        ELEMENT BITSIZE = 64.
+        v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}.
+        We can't find a vector mode for "ab" which will be combined into
+        128-bit element to duplicate.  */
+      if (v.repeating_sequence_use_merge_profitable_p ())
+       {
+         expand_vector_init_merge_repeating_sequence (target, v);
+         return;
+       }
+
        /* TODO: We will support more Initialization of vector in the future.  */
      }
  
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h

index 807b0bccc185ce9cdcdf57b11cf6f47efe254f8c..4541255a8aec2bbee34f0cbcebee0d41657b7a95 100644 (file)
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -150,6 +150,7 @@ ASM_MISA_SPEC
  
  /* Width of a word, in units (bytes).  */
  #define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#define BITS_PER_WORD (BITS_PER_UNIT * UNITS_PER_WORD)
  #ifndef IN_LIBGCC2
  #define MIN_UNITS_PER_WORD 4
  #endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-1.c

new file mode 100644 (file)

index 0000000..59ad49c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvl1024b -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx16di __attribute__ ((vector_size (1024)));
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t *out)
+{
+  vnx16di v = {
+    a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+    a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+    a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+    a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+  };
+  *(vnx16di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vmv\.v\.x\s+v[0-9]+,\s*[a-x0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-2.c

new file mode 100644 (file)

index 0000000..fe3741e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef double vnx8df __attribute__ ((vector_size (64)));
+typedef double vnx16df __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx8df (double a, double b, double *out)
+{
+  vnx8df v = {a, b, a, b, a, b, a, b};
+  *(vnx8df *) out = v;
+}
+
+__attribute__ ((noipa)) void
+f_vnx16df (double a, double b, double *out)
+{
+  vnx16df v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
+  *(vnx16df *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vfmerge\.vfm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-3.c

new file mode 100644 (file)

index 0000000..74776de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx8di __attribute__ ((vector_size (64)));
+typedef int64_t vnx16di __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx8di (int64_t a, int64_t b, int64_t *out)
+{
+  vnx8di v = {a, b, a, b, a, b, a, b};
+  *(vnx8di *) out = v;
+}
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t *out)
+{
+  vnx16di v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
+  *(vnx16di *) out = v;
+}
+
+
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-4.c

new file mode 100644 (file)

index 0000000..2f61465
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx8di __attribute__ ((vector_size (64)));
+
+__attribute__ ((noipa)) void
+f_vnx8di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
+{
+  vnx8di v = {a, b, c, d, a, b, c, d};
+  *(vnx8di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-5.c

new file mode 100644 (file)

index 0000000..7f4e678
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx16di __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
+{
+  vnx16di v = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,};
+  *(vnx16di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vmv\.v\.x\tv[0-9]+,\s*[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 0 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 0 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-1.c

new file mode 100644 (file)

index 0000000..1931d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "init-repeat-sequence-2.c"
+
+int
+main ()
+{
+  double a = -1789089.23423;
+  double b = -8916156.45644;
+
+  double v_vnx8df[sizeof (vnx8df) / sizeof (double)];
+  f_vnx8df (a, b, v_vnx8df);
+
+  return 0;
+  for (int i = 0; i < sizeof (vnx8df) / sizeof (double); i++)
+    {
+      if (i % 2 == 0)
+       {
+         if (v_vnx8df[i] != a)
+           __builtin_abort ();
+       }
+      else
+       {
+         if (v_vnx8df[i] != b)
+           __builtin_abort ();
+       }
+    }
+
+  double v_vnx16df[sizeof (vnx16df) / sizeof (double)];
+  f_vnx16df (a, b, v_vnx16df);
+  for (int i = 0; i < sizeof (vnx16df) / sizeof (double); i++)
+    {
+      if (i % 2 == 0)
+       {
+         if (v_vnx16df[i] != a)
+           __builtin_abort ();
+       }
+      else
+       {
+         if (v_vnx16df[i] != b)
+           __builtin_abort ();
+       }
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-2.c

new file mode 100644 (file)

index 0000000..5564dd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "init-repeat-sequence-3.c"
+
+int
+main ()
+{
+  int64_t a = -178908923423;
+  int64_t b = -891615645644;
+
+  int64_t v_vnx8di[sizeof (vnx8di) / sizeof (int64_t)];
+  f_vnx8di (a, b, v_vnx8di);
+  for (int i = 0; i < sizeof (vnx8di) / sizeof (int64_t); i++)
+    {
+      if (i % 2 == 0)
+       {
+         if (v_vnx8di[i] != a)
+           __builtin_abort ();
+       }
+      else
+       {
+         if (v_vnx8di[i] != b)
+           __builtin_abort ();
+       }
+    }
+
+  int64_t v_vnx16di[sizeof (vnx16di) / sizeof (int64_t)];
+  f_vnx16di (a, b, v_vnx16di);
+
+  for (int i = 0; i < sizeof (vnx16di) / sizeof (int64_t); i++)
+    {
+      if (i % 2 == 0)
+       {
+         if (v_vnx16di[i] != a)
+           __builtin_abort ();
+       }
+      else
+       {
+         if (v_vnx16di[i] != b)
+           __builtin_abort ();
+       }
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-3.c

new file mode 100644 (file)

index 0000000..fec5adc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-3.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "init-repeat-sequence-5.c"
+
+int
+main ()
+{
+  int64_t a = -178908923423;
+  int64_t b = -891615645644;
+  int64_t c = 78908923423;
+  int64_t d = 81615645644;
+
+  int64_t v_vnx16di[sizeof (vnx16di) / sizeof (int64_t)];
+  f_vnx16di (a, b, c, d, v_vnx16di);
+  for (int i = 0; i < sizeof (vnx16di) / sizeof (int64_t); i++)
+    {
+      if (i % 4 == 0)
+       {
+         if (v_vnx16di[i] != a)
+           __builtin_abort ();
+       }
+      else if (i % 4 == 1)
+       {
+         if (v_vnx16di[i] != b)
+           __builtin_abort ();
+       }
+      else if (i % 4 == 2)
+       {
+         if (v_vnx16di[i] != c)
+           __builtin_abort ();
+       }
+      else
+       {
+         if (v_vnx16di[i] != d)
+           __builtin_abort ();
+       }
+    }
+
+  return 0;
+}
author	Pan Li <pan2.li@intel.com>
	Thu, 25 May 2023 02:58:49 +0000 (10:58 +0800)
committer	Pan Li <pan2.li@intel.com>
	Mon, 29 May 2023 10:24:52 +0000 (18:24 +0800)
gcc/config/riscv/riscv-protos.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv-v.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv.h		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/init-repeat-sequence-run-3.c	[new file with mode: 0644]	patch \| blob