RISC-V: Lower vmv.v.x (avl = 1) into vmv.s.x

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Mon, 22 Jan 2024 06:46:46 +0000 (14:46 +0800)

committer Pan Li <pan2.li@intel.com>

Mon, 22 Jan 2024 13:48:21 +0000 (21:48 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Mon, 22 Jan 2024 06:46:46 +0000 (14:46 +0800)
committer Pan Li <pan2.li@intel.com>
Mon, 22 Jan 2024 13:48:21 +0000 (21:48 +0800)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h

index 7fe26fcd9391661b3cbdfa8fccfa73f866c85bcf..b3f0bdb9924d380b984836f0ef157570526cb318 100644 (file)
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -708,6 +708,7 @@ bool can_be_broadcasted_p (rtx);
  bool gather_scatter_valid_offset_p (machine_mode);
  HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
  bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
+bool splat_to_scalar_move_p (rtx *);
  }
  
  /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc

index 93a1238a5abbd67a97f40d63ccffecd1dcd1e043..4bacb7fea45ffa1881ec6dc108109387dc60e5c1 100644 (file)
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5151,4 +5151,16 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
    return false;
  }
  
+/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f.  */
+bool
+splat_to_scalar_move_p (rtx *ops)
+{
+  return satisfies_constraint_Wc1 (ops[1])
+        && satisfies_constraint_vu (ops[2])
+        && !MEM_P (ops[3])
+        && satisfies_constraint_c01 (ops[4])
+        && INTVAL (ops[7]) == NONVLMAX
+        && known_ge (GET_MODE_SIZE (Pmode), GET_MODE_SIZE (GET_MODE (ops[3])));
+}
+
  } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md

index 307d9a8c952e64016795c8d747b8744799a6a4fb..ab6e099852dc978342fa89ca09734cc07d2d8162 100644 (file)
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1977,8 +1977,15 @@
           (match_operand:V_VLS 2 "vector_merge_operand")))]
    "TARGET_VECTOR"
  {
+  /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
+     has better chances to do vsetvl fusion in vsetvl pass.  */
+  if (riscv_vector::splat_to_scalar_move_p (operands))
+    {
+      operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
+      operands[3] = force_reg (<VEL>mode, operands[3]);
+    }
    /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar.  */
-  if (satisfies_constraint_Wdm (operands[3]))
+  else if (satisfies_constraint_Wdm (operands[3]))
      {
        if (satisfies_constraint_Wb1 (operands[1]))
         {
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c

new file mode 100644 (file)

index 0000000..b3fec26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (uint32_t *outputMat, uint32_t *inputMat)
+{
+  vuint32m1_t matRegIn0 = __riscv_vle32_v_u32m1 (inputMat, 4);
+  vuint32m1_t matRegIn1 = __riscv_vle32_v_u32m1 (inputMat + 4, 4);
+  vuint32m1_t matRegIn2 = __riscv_vle32_v_u32m1 (inputMat + 8, 4);
+  vuint32m1_t matRegIn3 = __riscv_vle32_v_u32m1 (inputMat + 12, 4);
+
+  vbool32_t oddMask
+    = __riscv_vreinterpret_v_u32m1_b32 (__riscv_vmv_v_x_u32m1 (0xaaaa, 1));
+
+  vuint32m1_t smallTransposeMat0
+    = __riscv_vslideup_vx_u32m1_tumu (oddMask, matRegIn0, matRegIn1, 1, 4);
+  vuint32m1_t smallTransposeMat2
+    = __riscv_vslideup_vx_u32m1_tumu (oddMask, matRegIn2, matRegIn3, 1, 4);
+
+  vuint32m1_t outMat0 = __riscv_vslideup_vx_u32m1_tu (smallTransposeMat0,
+                                                     smallTransposeMat2, 2, 4);
+
+  __riscv_vse32_v_u32m1 (outputMat, outMat0, 4);
+}
+
+void
+foo2 (void *outputMat, void *inputMat)
+{
+  vfloat32m1_t v = __riscv_vfmv_v_f_f32m1 (0xaaaa, 1);
+  __riscv_vse32_v_f32m1 (outputMat, v, 4);
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c

new file mode 100644 (file)

index 0000000..643f6a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void matrix_transpose_in_register(uint32_t* outputMat, uint32_t* inputMat) {
+    vuint32m1_t matRegIn0 = __riscv_vle32_v_u32m1(inputMat, 4);
+    vuint32m1_t matRegIn1 = __riscv_vle32_v_u32m1(inputMat + 4, 4);
+    vuint32m1_t matRegIn2 = __riscv_vle32_v_u32m1(inputMat + 8, 4);
+    vuint32m1_t matRegIn3 = __riscv_vle32_v_u32m1(inputMat + 12, 4);
+
+    vbool32_t oddMask = __riscv_vreinterpret_v_u32m1_b32(__riscv_vmv_v_x_u32m1(0xaaaa, 1));
+
+    vuint32m1_t smallTransposeMat0 = __riscv_vslideup_vx_u32m1_tumu(oddMask, matRegIn0, matRegIn1, 1, 4);
+    vuint32m1_t smallTransposeMat2 = __riscv_vslideup_vx_u32m1_tumu(oddMask, matRegIn2, matRegIn3, 1, 4);
+
+    vbool32_t evenMask = __riscv_vreinterpret_v_u32m1_b32(__riscv_vmv_v_x_u32m1(0x5555, 1));
+
+    vuint32m1_t smallTransposeMat1 = __riscv_vslidedown_vx_u32m1_tumu(evenMask, matRegIn1, matRegIn0, 1, 4);
+    vuint32m1_t smallTransposeMat3 = __riscv_vslidedown_vx_u32m1_tumu(evenMask, matRegIn3, matRegIn2, 1, 4);
+
+    vuint32m1_t outMat0 = __riscv_vslideup_vx_u32m1_tu(smallTransposeMat0, smallTransposeMat2, 2, 4);
+    vuint32m1_t outMat1 = __riscv_vslideup_vx_u32m1_tu(smallTransposeMat1, smallTransposeMat3, 2, 4);
+
+    vuint32m1_t outMat2 = __riscv_vslidedown_vx_u32m1_tu(smallTransposeMat2, smallTransposeMat0, 2, 2);
+    vuint32m1_t outMat3 = __riscv_vslidedown_vx_u32m1_tu(smallTransposeMat3, smallTransposeMat1, 2, 2);
+    __riscv_vse32_v_u32m1(outputMat, outMat0, 4);
+    __riscv_vse32_v_u32m1(outputMat + 4, outMat1, 4);
+    __riscv_vse32_v_u32m1(outputMat + 8, outMat2, 4);
+    __riscv_vse32_v_u32m1(outputMat + 12, outMat3, 4);
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 3 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Mon, 22 Jan 2024 06:46:46 +0000 (14:46 +0800)
committer	Pan Li <pan2.li@intel.com>
	Mon, 22 Jan 2024 13:48:21 +0000 (21:48 +0800)
gcc/config/riscv/riscv-protos.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv-v.cc		patch \| blob \| blame \| history
gcc/config/riscv/vector.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c	[new file with mode: 0644]	patch \| blob