From: Andrew Stubbs Date: Wed, 1 Mar 2023 15:32:50 +0000 (+0000) Subject: amdgcn: vec_extract no-op insns X-Git-Tag: basepoints/gcc-14~379 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=db80ccd34365c14e529111c94b93d3fb201b6eef;p=thirdparty%2Fgcc.git amdgcn: vec_extract no-op insns Just using move insn for no-op conversions triggers special move handling in IRA which declares that subreg of vectors aren't valid and routes everything through memory. These patterns make the vec_select explicit and all is well. gcc/ChangeLog: * config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New. * config/gcn/gcn-valu.md (V_1REG_ALT): New. (V_2REG_ALT): New. (vec_extract_nop): New. (vec_extract_nop): New. (vec_extract): Use new patterns. * config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New. * config/gcn/predicates.md (ascending_zero_int_parallel): New. --- diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index d7862b21a2a4..287ce17d4225 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -75,6 +75,7 @@ extern reg_class gcn_regno_reg_class (int regno); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); +extern bool gcn_stepped_zero_int_parallel_p (rtx op, int step); extern bool gcn_valid_move_p (machine_mode, rtx, rtx); extern rtx gcn_vec_constant (machine_mode, int); extern rtx gcn_vec_constant (machine_mode, rtx); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 787d7709d0d5..334b6b0b51cf 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -49,6 +49,13 @@ V16QI V16HI V16SI V16HF V16SF V32QI V32HI V32SI V32HF V32SF V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V_1REG_ALT + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG [V2QI V2HI V2SI @@ -80,6 +87,13 @@ V16DI V16DF V32DI V32DF V64DI V64DF]) +(define_mode_iterator V_2REG_ALT + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI @@ -788,11 +802,36 @@ (set_attr "exec" "none") (set_attr "laneselect" "yes")]) +(define_insn "vec_extract_nop" + [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_1REG_ALT + (match_operand:V_1REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (mode) < MODE_VF (mode) + && mode == mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + +(define_insn "vec_extract_nop" + [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_2REG_ALT + (match_operand:V_2REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (mode) < MODE_VF (mode) + && mode == mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + (define_expand "vec_extract" - [(set (match_operand:V_ALL_ALT 0 "register_operand") - (vec_select:V_ALL_ALT - (match_operand:V_ALL 1 "register_operand") - (parallel [(match_operand 2 "immediate_operand")])))] + [(match_operand:V_ALL_ALT 0 "register_operand") + (match_operand:V_ALL 1 "register_operand") + (match_operand 2 "immediate_operand")] "MODE_VF (mode) < MODE_VF (mode) && mode == mode" { @@ -802,8 +841,12 @@ if (firstlane == 0) { - /* A plain move will do. */ - tmp = operands[1]; + rtx parallel = gen_rtx_PARALLEL (mode, + rtvec_alloc (numlanes)); + for (int i = 0; i < numlanes; i++) + XVECEXP (parallel, 0, i) = GEN_INT (i); + emit_insn (gen_vec_extract_nop + (operands[0], operands[1], parallel)); } else { /* FIXME: optimize this by using DPP where available. */ @@ -815,10 +858,10 @@ tmp = gen_reg_rtx (mode); emit_insn (gen_ds_bpermute (tmp, permutation, operands[1], get_exec (mode))); - } - emit_move_insn (operands[0], - gen_rtx_SUBREG (mode, tmp, 0)); + emit_move_insn (operands[0], + gen_rtx_SUBREG (mode, tmp, 0)); + } DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index aca17a19d84e..5bf88e980838 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1422,6 +1422,24 @@ CODE_FOR_OP (reload_out) #undef CODE_FOR_OP #undef CODE_FOR +/* Return true if OP is a PARALLEL of CONST_INTs that form a linear + series with step STEP. */ + +bool +gcn_stepped_zero_int_parallel_p (rtx op, int step) +{ + if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0))) + return false; + + unsigned HOST_WIDE_INT base = 0; + for (int i = 0; i < XVECLEN (op, 0); ++i) + if (!CONST_INT_P (XVECEXP (op, 0, i)) + || UINTVAL (XVECEXP (op, 0, i)) != base + i * step) + return false; + + return true; +} + /* }}} */ /* {{{ Addresses, pointers and moves. */ diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md index a20acf7e9ef5..5554a06b63b5 100644 --- a/gcc/config/gcn/predicates.md +++ b/gcc/config/gcn/predicates.md @@ -197,3 +197,10 @@ (ior (match_operand 0 "gcn_ds_memory_operand") (and (match_code "unspec") (match_test "XINT (op, 1) == UNSPEC_VECTOR"))))) + +(define_predicate "ascending_zero_int_parallel" + (match_code "parallel") +{ + return gcn_stepped_zero_int_parallel_p (op, 1); +}) +