From: Lehua Ding <lehua.ding@rivai.ai>
Date: Tue, 7 Nov 2023 07:33:20 +0000 (+0800)
Subject: RISC-V: Fixed failed rvv combine testcases
X-Git-Tag: basepoints/gcc-15~4956
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=71b1efd910d47362558defc31b92fc4cc80c0ec6;p=thirdparty%2Fgcc.git

RISC-V: Fixed failed rvv combine testcases

This patch fixed the fellowing failed testcases on the trunk:
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
...
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
...

The reason for these failed testcases is the introduce of .VCOND_MASK_LEN
in midend for other bugfix and further leads to a new vcond_mask_len rtl
pattern after expand. So we need add new combine patterns handle this case.

Consider this code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

Before this patch:
foo:
        vsetivli        zero,16,e8,m1,ta,ma
        vle8.v  v0,0(a1)
        vsetvli a5,zero,e8,m1,ta,ma
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,ma
        li      a3,0
        vmv.v.i v2,0
        vsetivli        zero,16,e16,m2,ta,ma
        vle8.v  v6,0(a0),v0.t
        vmv.s.x v1,a3
        vsetvli a5,zero,e16,m2,ta,ma
        vsext.vf2       v4,v6
        vsetivli        zero,16,e16,m2,tu,ma
        vmerge.vvm      v2,v2,v4,v0
        vsetvli a5,zero,e16,m2,ta,ma
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

After this patch:
foo:
	vsetivli	zero,16,e16,m2,ta,ma
	li	a5,0
	vle8.v	v0,0(a1)
	vmv.s.x	v1,a5
	vsetvli	zero,zero,e8,m1,ta,ma
	vmsne.vi	v0,v0,0
	vle8.v	v2,0(a0),v0.t
	vwredsum.vs	v1,v2,v1,v0.t
	vsetvli	zero,zero,e16,m1,ta,ma
	vmv.x.s	a0,v1
	slliw	a0,a0,16
	sraiw	a0,a0,16
	ret

Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
reducing the corresponding vsetvl instructions.

gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
	New combine pattern.
	(*cond_len_<optab><v_quad_trunc><mode>): Ditto.
	(*cond_len_<optab><v_oct_trunc><mode>): Ditto.
	(*cond_len_extend<v_double_trunc><mode>): Ditto.
	(*cond_len_widen_reduc_plus_scal_<mode>): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:
---

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d0f8b3cde4ec..3c87e66ea491 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -194,6 +194,84 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+    (if_then_else:VWEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTI
+        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
+  [(set (match_operand:VQEXTI 0 "register_operand")
+    (if_then_else:VQEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VQEXTI
+        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2 "register_operand"))
+        (match_operand:VQEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
+  [(set (match_operand:VOEXTI 0 "register_operand")
+    (if_then_else:VOEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VOEXTI
+        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2 "register_operand"))
+        (match_operand:VOEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -235,6 +313,32 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine FP extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
+  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
+    (if_then_else:VWEXTF_ZVFHMIN
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTF_ZVFHMIN
+        (float_extend:VWEXTF_ZVFHMIN (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_extend<mode> (operands[0], operands[3], operands[1], operands[2],
+                                    operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine FP trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -1151,6 +1255,61 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (any_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine mask_extend + vfredsum to mask_vfwredusum
 ;; where the mrege of mask_extend is vector const 0
 (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
@@ -1187,6 +1346,61 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (float_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VF_HS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; =============================================================================
 ;; Misc combine patterns
 ;; =============================================================================
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
index 22a71048684e..47889f3a1cd8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -15,16 +15,27 @@
 
 #define TEST_ALL(TEST)                                                         \
   TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int8_t, 8)                                                    \
   TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int8_t, 4)                                                    \
+  TEST (int64_t, int16_t, 4)                                                   \
   TEST (int64_t, int32_t, 4)                                                   \
   TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint8_t, 8)                                                  \
   TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint8_t, 4)                                                  \
+  TEST (uint64_t, uint16_t, 4)                                                 \
   TEST (uint64_t, uint32_t, 4)                                                 \
   TEST (float, _Float16, 8)                                                    \
+  TEST (double, _Float16, 4)                                                   \
   TEST (double, float, 4)
 
 TEST_ALL (TEST_TYPE)
 
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
index 7c8fedd072b2..662d13512157 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
@@ -1,30 +1,12 @@
 /* { dg-do compile } */
 /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
-#include <stdint-gcc.h>
 
-#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
-  __attribute__ ((noipa))                                                      \
-  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
-  {                                                                            \
-    TYPE1 sum = 0;                                                             \
-    for (int i = 0; i < N; i += 1)                                             \
-      if (pred[i])                                                             \
-	sum += a[i];                                                           \
-    return sum;                                                                \
-  }
+#include "cond_widen_reduc-1.c"
 
-#define TEST_ALL(TEST)                                                         \
-  TEST (int16_t, int8_t, 16)                                                   \
-  TEST (int32_t, int16_t, 8)                                                   \
-  TEST (int64_t, int32_t, 4)                                                   \
-  TEST (uint16_t, uint8_t, 16)                                                 \
-  TEST (uint32_t, uint16_t, 8)                                                 \
-  TEST (uint64_t, uint32_t, 4)                                                 \
-  TEST (float, _Float16, 8)                                                    \
-  TEST (double, float, 4)
-
-TEST_ALL (TEST_TYPE)
-
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */