}
[(set_attr "type" "vnshift")])
+;; Combine vmulh.vv/vmulhu.vv + vcond_mask
+(define_insn_and_split "*cond_<mulh_table><mode>3_highpart"
+ [(set (match_operand:VFULLI 0 "register_operand")
+ (if_then_else:VFULLI
+ (match_operand:<VM> 1 "register_operand")
+ (mulh:VFULLI
+ (match_operand:VFULLI 2 "register_operand")
+ (match_operand:VFULLI 3 "register_operand"))
+ (match_operand:VFULLI 4 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ insn_code icode = code_for_pred_mulh (<MULH_UNSPEC>, <MODE>mode);
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+ gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+ riscv_vector::expand_cond_len_binop (icode, ops);
+ DONE;
+}
+[(set_attr "type" "vector")])
+
;; =============================================================================
;; Combine extend + binop to widen_binop
;; =============================================================================
}
[(set_attr "type" "vfwmul")])
-
;; =============================================================================
;; Misc combine patterns
;; =============================================================================
;; - vmulhu.vv
;; -------------------------------------------------------------------------
-(define_insn_and_split "smul<mode>3_highpart"
+(define_insn_and_split "<mulh_table><mode>3_highpart"
[(set (match_operand:VFULLI 0 "register_operand")
- (smul_highpart:VFULLI
+ (mulh:VFULLI
(match_operand:VFULLI 1 "register_operand")
(match_operand:VFULLI 2 "register_operand")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"&& 1"
[(const_int 0)]
{
- insn_code icode = code_for_pred_mulh (UNSPEC_VMULHS, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
- DONE;
-}
-[(set_attr "type" "vimul")])
-
-(define_insn_and_split "umul<mode>3_highpart"
- [(set (match_operand:VFULLI 0 "register_operand")
- (umul_highpart:VFULLI
- (match_operand:VFULLI 1 "register_operand")
- (match_operand:VFULLI 2 "register_operand")))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
- "#"
- "&& 1"
- [(const_int 0)]
-{
- insn_code icode = code_for_pred_mulh (UNSPEC_VMULHU, <MODE>mode);
+ insn_code icode = code_for_pred_mulh (<MULH_UNSPEC>, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
DONE;
}
(define_code_iterator sat_int_plus_binop [ss_plus us_plus])
(define_code_iterator sat_int_minus_binop [ss_minus us_minus])
+(define_code_iterator mulh [smul_highpart umul_highpart])
+(define_code_attr mulh_table [(smul_highpart "smul") (umul_highpart "umul")])
+(define_code_attr MULH_UNSPEC [(smul_highpart "UNSPEC_VMULHS") (umul_highpart "UNSPEC_VMULHU")])
+
(define_code_iterator any_widen_binop [plus minus mult])
(define_code_iterator plus_minus [plus minus])
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_LOOP(TYPE, WTYPE) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##WTYPE (TYPE *__restrict a, TYPE *__restrict b, \
+ TYPE *__restrict c, TYPE *__restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ c[i] = pred[i] \
+ ? (TYPE) (((WTYPE) a[i] * (WTYPE) b[i]) >> sizeof (TYPE) * 8) \
+ : c[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int8_t, int16_t) \
+ T (int16_t, int32_t) \
+ T (int32_t, int64_t) \
+ T (uint8_t, uint16_t) \
+ T (uint16_t, uint32_t) \
+ T (uint32_t, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tvmulh\.vv\tv[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvmulhu\.vv\tv[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_LOOP(TYPE1, WTYPE1, TYPE2, WTYPE2, TYPE3) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE1##_##TYPE2 (TYPE1 *__restrict a, TYPE2 *__restrict b, \
+ TYPE3 *__restrict c, TYPE3 *__restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ c[i] \
+ = pred[i] \
+ ? (TYPE3) (((WTYPE1) a[i] * (WTYPE2) b[i]) >> sizeof (TYPE1) * 8) \
+ : c[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int8_t, int16_t, uint8_t, uint16_t, int8_t) \
+ T (uint8_t, uint16_t, int8_t, int16_t, int8_t) \
+ T (int16_t, int32_t, uint16_t, uint32_t, int16_t) \
+ T (uint16_t, uint32_t, int16_t, int32_t, int16_t) \
+ T (int32_t, int64_t, uint32_t, uint64_t, int32_t) \
+ T (uint32_t, uint64_t, int32_t, int64_t, int32_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* FIXME: need midend support: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381 */
+/* { dg-final { scan-assembler-times {\tvmulhsu\.vv\tv[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 {xfail riscv*-*-*} } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm\t} } } */
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "cond_mulh-1.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, WTYPE) \
+ { \
+ TYPE a[N], b[N], c[N], pred[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * i; \
+ b[i] = ((i + 2) % 3) * (i + 1); \
+ pred[i] = i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##WTYPE (a, b, c, pred, N); \
+ for (int i = 0; i < N; ++i) \
+ if (c[i] \
+ != (pred[i] \
+ ? (TYPE) (((WTYPE) a[i] * (WTYPE) b[i]) >> sizeof (TYPE) * 8) \
+ : c[i])) \
+ __builtin_abort (); \
+ }
+
+int
+main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "cond_mulh-2.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE1, WTYPE1, TYPE2, WTYPE2, TYPE3) \
+ { \
+ TYPE1 a[N]; \
+ TYPE2 b[N]; \
+ TYPE3 c[N], pred[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * i; \
+ b[i] = ((i + 2) % 3) * (i + 1); \
+ pred[i] = i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE1##_##TYPE2 (a, b, c, pred, N); \
+ for (int i = 0; i < N; ++i) \
+ if (c[i] \
+ != (pred[i] ? (TYPE3) (((WTYPE1) a[i] * (WTYPE2) b[i]) \
+ >> sizeof (TYPE1) * 8) \
+ : c[i])) \
+ __builtin_abort (); \
+ }
+
+int
+main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}