From 84116a684bcb78e45876b853ecebf1ea97042e58 Mon Sep 17 00:00:00 2001 From: Pan Li Date: Mon, 24 Nov 2025 09:05:02 +0800 Subject: [PATCH] RISC-V: Combine vec_duplicate + vmsltu.vv to vmsltu.vx on GR2VR cost This patch would like to combine the vec_duplicate + vmsltu.wv to the vmsltu.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero. Assume we have asm code like below, GR2VR cost is 0. Before this patch: 11 beq a3,zero,.L8 12 vsetvli a5,zero,e32,m1,ta,ma 13 vmv.v.x v2,a2 ... 16 .L3: 17 vsetvli a5,a3,e32,m1,ta,ma ... 22 vmsltu.wv v1,v2,v3 ... 25 bne a3,zero,.L3 After this patch: 11 beq a3,zero,.L8 ... 14 .L3: 15 vsetvli a5,a3,e32,m1,ta,ma ... 20 vmsltu.wx v1,a2,v3 ... 23 bne a3,zero,.L3 gcc/ChangeLog: * config/riscv/autovec-opt.md (*pred_cmp_swapped_scalar): Add new pattern to match vec_dup > vec for vmsltu. * config/riscv/predicates.md (comparison_swappable_operator): Add new iterator for above pattern * config/riscv/riscv-protos.h (expand_vx_cmp_vec_dup_vec): Add new func to emit vmsltu.vx. * config/riscv/riscv-v.cc (get_swapped_cmp_rtx_code): Add new func to convert cmp code to swapped, like gtu to ltu. (expand_vx_cmp_vec_dup_vec): Add new func decl. Signed-off-by: Pan Li --- gcc/config/riscv/autovec-opt.md | 30 ++++++++++++++++++++++++++++++ gcc/config/riscv/predicates.md | 3 +++ gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 52ab79c555a..40627fac91c 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1976,6 +1976,36 @@ } [(set_attr "type" "viwmuladd")]) +(define_insn_and_split "*pred_cmp_swapped_scalar" + [(set (match_operand: 0 "register_operand") + (if_then_else: + (unspec: + [(match_operand: 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operator: 3 "comparison_swappable_operator" + [(vec_duplicate:V_VLSI + (match_operand: 4 "register_operand")) + (match_operand:V_VLSI 5 "register_operand")]) + (unspec: + [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::expand_vx_cmp_vec_dup_vec (operands[0], operands[4], + operands[5], + GET_CODE (operands[3]), + mode); + + DONE; + } + [(set_attr "type" "vicmp")]) + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index f811a4e40ca..3cc954e10cc 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -611,6 +611,9 @@ (define_predicate "comparison_except_ge_operator" (match_code "eq,ne,le,leu,gt,gtu,lt,ltu")) +(define_predicate "comparison_swappable_operator" + (match_code "gtu")) + (define_predicate "ge_operator" (match_code "ge,geu")) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a372779cf9f..261c25c4c6b 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -691,6 +691,7 @@ void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode); void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode); void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode); void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode); +void expand_vx_cmp_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5e30b77b4eb..c0f0b99fe24 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5862,6 +5862,38 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops); } +static rtx_code +get_swapped_cmp_rtx_code (rtx_code code) +{ + switch (code) + { + case GTU: + return LTU; + default: + gcc_unreachable (); + } +} + +/* Expand the binary vx combine with the format like v2 = vec_dup(x) > v1. + Aka the first op comes from the vec_duplicate, and the second op is the vector + reg. Unfortunately, the RVV vms* only form like v2 = v1 < vec_dup(x), so + we need to swap the op_1 and op_2, then emit the swapped(from gtu to ltu) + insn instead. */ + +void +expand_vx_cmp_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, rtx_code code, + machine_mode mode) +{ + machine_mode mask_mode = get_mask_mode (mode); + rtx_code swapped_code = get_swapped_cmp_rtx_code (code); + + insn_code icode = code_for_pred_cmp_scalar (mode); + rtx cmp = gen_rtx_fmt_ee (swapped_code, mask_mode, op_2, op_1); + rtx ops[] = {op_0, cmp, op_2, op_1}; + + emit_vlmax_insn (icode, COMPARE_OP, ops); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void -- 2.47.3