RISC-V: Support highest-number regno overlap for widen ternary

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Mon, 4 Dec 2023 13:32:06 +0000 (21:32 +0800)

committer Pan Li <pan2.li@intel.com>

Mon, 4 Dec 2023 13:36:00 +0000 (21:36 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Mon, 4 Dec 2023 13:32:06 +0000 (21:32 +0800)
committer Pan Li <pan2.li@intel.com>
Mon, 4 Dec 2023 13:36:00 +0000 (21:36 +0800)
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md

index d98661fc1643920701edc31d9b94874926dc75a6..cec1edc8190eaab7e47a8c20c237958dcb77c740 100644 (file)
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -5866,29 +5866,30 @@
     (set_attr "mode" "<V_DOUBLE_TRUNC>")])
  
  (define_insn "@pred_widen_mul_plus<su><mode>_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"                    "=&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                   "=vd, vr, vd, vr, vd, vr, ?&vr")
         (if_then_else:VWEXTI
           (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-            (match_operand 5 "vector_length_operand"                "   rK")
-            (match_operand 6 "const_int_operand"                    "    i")
-            (match_operand 7 "const_int_operand"                    "    i")
-            (match_operand 8 "const_int_operand"                    "    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"             " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1")
+            (match_operand 5 "vector_length_operand"                " rK, rK, rK, rK, rK, rK,   rK")
+            (match_operand 6 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 7 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 8 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
              (reg:SI VL_REGNUM)
              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
           (plus:VWEXTI
             (mult:VWEXTI
               (any_extend:VWEXTI
                 (vec_duplicate:<V_DOUBLE_TRUNC>
-                 (match_operand:<VSUBEL> 3 "register_operand"       "    r")))
+                 (match_operand:<VSUBEL> 3 "reg_or_0_operand"       " rJ, rJ, rJ, rJ, rJ, rJ,   rJ")))
               (any_extend:VWEXTI
-               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr")))
-           (match_operand:VWEXTI 2 "register_operand"               "    0"))
+               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W42,W42,W84,W84,   vr")))
+           (match_operand:VWEXTI 2 "register_operand"               "  0,  0,  0,  0,  0,  0,    0"))
           (match_dup 2)))]
    "TARGET_VECTOR"
-  "vwmacc<u>.vx\t%0,%3,%4%p1"
+  "vwmacc<u>.vx\t%0,%z3,%4%p1"
    [(set_attr "type" "viwmuladd")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
  
  (define_insn "@pred_widen_mul_plussu<mode>"
    [(set (match_operand:VWEXTI 0 "register_operand"                    "=&vr")
@@ -5915,54 +5916,56 @@
     (set_attr "mode" "<V_DOUBLE_TRUNC>")])
  
  (define_insn "@pred_widen_mul_plussu<mode>_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"                    "=&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                    "=vd, vr, vd, vr, vd, vr, ?&vr")
         (if_then_else:VWEXTI
           (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-            (match_operand 5 "vector_length_operand"                "   rK")
-            (match_operand 6 "const_int_operand"                    "    i")
-            (match_operand 7 "const_int_operand"                    "    i")
-            (match_operand 8 "const_int_operand"                    "    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"              " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1")
+            (match_operand 5 "vector_length_operand"                 " rK, rK, rK, rK, rK, rK,   rK")
+            (match_operand 6 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 7 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 8 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
              (reg:SI VL_REGNUM)
              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
           (plus:VWEXTI
             (mult:VWEXTI
               (sign_extend:VWEXTI
                 (vec_duplicate:<V_DOUBLE_TRUNC>
-                 (match_operand:<VSUBEL> 3 "register_operand"       "    r")))
+                 (match_operand:<VSUBEL> 3 "reg_or_0_operand"        " rJ, rJ, rJ, rJ, rJ, rJ,   rJ")))
               (zero_extend:VWEXTI
-               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr")))
-           (match_operand:VWEXTI 2 "register_operand"               "    0"))
+               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"  "W21,W21,W42,W42,W84,W84,   vr")))
+           (match_operand:VWEXTI 2 "register_operand"                "  0,  0,  0,  0,  0,  0,    0"))
           (match_dup 2)))]
    "TARGET_VECTOR"
-  "vwmaccsu.vx\t%0,%3,%4%p1"
+  "vwmaccsu.vx\t%0,%z3,%4%p1"
    [(set_attr "type" "viwmuladd")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
  
  (define_insn "@pred_widen_mul_plusus<mode>_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"                    "=&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                    "=vd, vr, vd, vr, vd, vr, ?&vr")
         (if_then_else:VWEXTI
           (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-            (match_operand 5 "vector_length_operand"                "   rK")
-            (match_operand 6 "const_int_operand"                    "    i")
-            (match_operand 7 "const_int_operand"                    "    i")
-            (match_operand 8 "const_int_operand"                    "    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"              " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1")
+            (match_operand 5 "vector_length_operand"                 " rK, rK, rK, rK, rK, rK,   rK")
+            (match_operand 6 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 7 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 8 "const_int_operand"                     "  i,  i,  i,  i,  i,  i,    i")
              (reg:SI VL_REGNUM)
              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
           (plus:VWEXTI
             (mult:VWEXTI
               (zero_extend:VWEXTI
                 (vec_duplicate:<V_DOUBLE_TRUNC>
-                 (match_operand:<VSUBEL> 3 "register_operand"       "    r")))
+                 (match_operand:<VSUBEL> 3 "reg_or_0_operand"       " rJ, rJ, rJ, rJ, rJ, rJ,   rJ")))
               (sign_extend:VWEXTI
-               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr")))
-           (match_operand:VWEXTI 2 "register_operand"               "    0"))
+               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W42,W42,W84,W84,   vr")))
+           (match_operand:VWEXTI 2 "register_operand"               "  0,  0,  0,  0,  0,  0,    0"))
           (match_dup 2)))]
    "TARGET_VECTOR"
-  "vwmaccus.vx\t%0,%3,%4%p1"
+  "vwmaccus.vx\t%0,%z3,%4%p1"
    [(set_attr "type" "viwmuladd")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
  
  ;; -------------------------------------------------------------------------------
  ;; ---- Predicated BOOL mask operations
@@ -7181,15 +7184,15 @@
         (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
  
  (define_insn "@pred_widen_mul_<optab><mode>_scalar"
-  [(set (match_operand:VWEXTF 0 "register_operand"                    "=&vr")
+  [(set (match_operand:VWEXTF 0 "register_operand"                    "=vd, vr, vd, vr, vd, vr, ?&vr")
         (if_then_else:VWEXTF
           (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-            (match_operand 5 "vector_length_operand"                "   rK")
-            (match_operand 6 "const_int_operand"                    "    i")
-            (match_operand 7 "const_int_operand"                    "    i")
-            (match_operand 8 "const_int_operand"                    "    i")
-            (match_operand 9 "const_int_operand"                    "    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"             " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1")
+            (match_operand 5 "vector_length_operand"                " rK, rK, rK, rK, rK, rK,   rK")
+            (match_operand 6 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 7 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 8 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 9 "const_int_operand"                    "  i,  i,  i,  i,  i,  i,    i")
              (reg:SI VL_REGNUM)
              (reg:SI VTYPE_REGNUM)
              (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
@@ -7197,17 +7200,18 @@
             (mult:VWEXTF
               (float_extend:VWEXTF
                 (vec_duplicate:<V_DOUBLE_TRUNC>
-                 (match_operand:<VSUBEL> 3 "register_operand"       "    f")))
+                 (match_operand:<VSUBEL> 3 "register_operand"       " f, f, f, f, f, f, f")))
               (float_extend:VWEXTF
-               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr")))
-           (match_operand:VWEXTF 2 "register_operand"               "    0"))
+               (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W42,W42,W84,W84,   vr")))
+           (match_operand:VWEXTF 2 "register_operand"               "  0,  0,  0,  0,  0,  0,    0"))
           (match_dup 2)))]
    "TARGET_VECTOR"
    "vfw<macc_msac>.vf\t%0,%3,%4%p1"
    [(set_attr "type" "vfwmuladd")
     (set_attr "mode" "<V_DOUBLE_TRUNC>")
     (set (attr "frm_mode")
-       (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
+       (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
  
  (define_insn "@pred_widen_mul_neg_<optab><mode>"
    [(set (match_operand:VWEXTF 0 "register_operand"                      "=&vr")
@@ -7239,15 +7243,15 @@
         (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
  
  (define_insn "@pred_widen_mul_neg_<optab><mode>_scalar"
-  [(set (match_operand:VWEXTF 0 "register_operand"                      "=&vr")
+  [(set (match_operand:VWEXTF 0 "register_operand"                      "=vd, vr, vd, vr, vd, vr, ?&vr")
         (if_then_else:VWEXTF
           (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1")
-            (match_operand 5 "vector_length_operand"                  "   rK")
-            (match_operand 6 "const_int_operand"                      "    i")
-            (match_operand 7 "const_int_operand"                      "    i")
-            (match_operand 8 "const_int_operand"                      "    i")
-            (match_operand 9 "const_int_operand"                      "    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"               " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1")
+            (match_operand 5 "vector_length_operand"                  " rK, rK, rK, rK, rK, rK,   rK")
+            (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 8 "const_int_operand"                      "  i,  i,  i,  i,  i,  i,    i")
+            (match_operand 9 "const_int_operand"                      "  i,  i,  i,  i,  i,  i,    i")
              (reg:SI VL_REGNUM)
              (reg:SI VTYPE_REGNUM)
              (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
@@ -7256,17 +7260,18 @@
               (mult:VWEXTF
                 (float_extend:VWEXTF
                   (vec_duplicate:<V_DOUBLE_TRUNC>
-                   (match_operand:<VSUBEL> 3 "register_operand"       "    f")))
+                   (match_operand:<VSUBEL> 3 "register_operand"       " f, f, f, f, f, f, f")))
                 (float_extend:VWEXTF
-                 (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr"))))
-           (match_operand:VWEXTF 2 "register_operand"                 "    0"))
+                 (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W42,W42,W84,W84,   vr"))))
+           (match_operand:VWEXTF 2 "register_operand"                 "  0,  0,  0,  0,  0,  0,    0"))
           (match_dup 2)))]
    "TARGET_VECTOR"
    "vfw<nmsac_nmacc>.vf\t%0,%3,%4%p1"
    [(set_attr "type" "vfwmuladd")
     (set_attr "mode" "<V_DOUBLE_TRUNC>")
     (set (attr "frm_mode")
-       (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
+       (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
  
  ;; -------------------------------------------------------------------------------
  ;; ---- Predicated floating-point comparison operations
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c

new file mode 100644 (file)

index 0000000..6337ff8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8);
+  vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c

new file mode 100644 (file)

index 0000000..7b7d6cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vfloat64m2_t accum = __riscv_vle64_v_f64m2 (in, 4);
+  vfloat64m1_t high_eew64 = __riscv_vget_v_f64m2_f64m1 (accum, 1);
+  vint64m1_t high_eew64_i = __riscv_vreinterpret_v_f64m1_i64m1 (high_eew64);
+  vint32m1_t high_eew32_i = __riscv_vreinterpret_v_i64m1_i32m1 (high_eew64_i);
+  vfloat32m1_t high_eew32 = __riscv_vreinterpret_v_i32m1_f32m1 (high_eew32_i);
+  vfloat64m2_t result = __riscv_vfwmacc_vf_f64m2 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vfloat64m4_t accum = __riscv_vle64_v_f64m4 (in, 4);
+  vfloat64m2_t high_eew64 = __riscv_vget_v_f64m4_f64m2 (accum, 1);
+  vint64m2_t high_eew64_i = __riscv_vreinterpret_v_f64m2_i64m2 (high_eew64);
+  vint32m2_t high_eew32_i = __riscv_vreinterpret_v_i64m2_i32m2 (high_eew64_i);
+  vfloat32m2_t high_eew32 = __riscv_vreinterpret_v_i32m2_f32m2 (high_eew32_i);
+  vfloat64m4_t result = __riscv_vfwmacc_vf_f64m4 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vfloat64m8_t accum = __riscv_vle64_v_f64m8 (in, 4);
+  vfloat64m4_t high_eew64 = __riscv_vget_v_f64m8_f64m4 (accum, 1);
+  vint64m4_t high_eew64_i = __riscv_vreinterpret_v_f64m4_i64m4 (high_eew64);
+  vint32m4_t high_eew32_i = __riscv_vreinterpret_v_i64m4_i32m4 (high_eew64_i);
+  vfloat32m4_t high_eew32 = __riscv_vreinterpret_v_i32m4_f32m4 (high_eew32_i);
+  vfloat64m8_t result = __riscv_vfwmacc_vf_f64m8 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vfloat64m2_t accum = __riscv_vle64_v_f64m2 (in, 4);
+  vfloat64m1_t high_eew64 = __riscv_vget_v_f64m2_f64m1 (accum, 1);
+  vint64m1_t high_eew64_i = __riscv_vreinterpret_v_f64m1_i64m1 (high_eew64);
+  vint32m1_t high_eew32_i = __riscv_vreinterpret_v_i64m1_i32m1 (high_eew64_i);
+  vfloat32m1_t high_eew32 = __riscv_vreinterpret_v_i32m1_f32m1 (high_eew32_i);
+  vfloat64m2_t result = __riscv_vfwnmsac_vf_f64m2 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vfloat64m4_t accum = __riscv_vle64_v_f64m4 (in, 4);
+  vfloat64m2_t high_eew64 = __riscv_vget_v_f64m4_f64m2 (accum, 1);
+  vint64m2_t high_eew64_i = __riscv_vreinterpret_v_f64m2_i64m2 (high_eew64);
+  vint32m2_t high_eew32_i = __riscv_vreinterpret_v_i64m2_i32m2 (high_eew64_i);
+  vfloat32m2_t high_eew32 = __riscv_vreinterpret_v_i32m2_f32m2 (high_eew32_i);
+  vfloat64m4_t result = __riscv_vfwnmsac_vf_f64m4 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vfloat64m8_t accum = __riscv_vle64_v_f64m8 (in, 4);
+  vfloat64m4_t high_eew64 = __riscv_vget_v_f64m8_f64m4 (accum, 1);
+  vint64m4_t high_eew64_i = __riscv_vreinterpret_v_f64m4_i64m4 (high_eew64);
+  vint32m4_t high_eew32_i = __riscv_vreinterpret_v_i64m4_i32m4 (high_eew64_i);
+  vfloat32m4_t high_eew32 = __riscv_vreinterpret_v_i32m4_f32m4 (high_eew32_i);
+  vfloat64m8_t result = __riscv_vfwnmsac_vf_f64m8 (accum, 64, high_eew32, 4);
+  __riscv_vse64_v_f64m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Mon, 4 Dec 2023 13:32:06 +0000 (21:32 +0800)
committer	Pan Li <pan2.li@intel.com>
	Mon, 4 Dec 2023 13:36:00 +0000 (21:36 +0800)
gcc/config/riscv/vector.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c	[new file with mode: 0644]	patch \| blob