From: Julian Brown Date: Tue, 29 Jun 2021 10:57:31 +0000 (-0700) Subject: amdgcn: Add [us]mulsid3/muldi3 patterns X-Git-Tag: basepoints/gcc-13~6405 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8f332122589f97b9c974b168ca5b0b186296f0e4;p=thirdparty%2Fgcc.git amdgcn: Add [us]mulsid3/muldi3 patterns This patch improves 64-bit multiplication for AMD GCN: patterns for unsigned and signed 32x32->64 bit multiplication have been added, and also 64x64->64 bit multiplication is now open-coded rather than calling a library function (which may be a win for code size as well as speed: the function calling sequence isn't particularly concise for GCN). This version of the patch uses define_insn_and_split in order to keep multiply operations together during RTL optimisations up to register allocation: this appears to produce more compact code via inspection on small test cases than the previous approach using an expander. The DImode multiply implementation is lost from libgcc if we build it for DImode/TImode rather than SImode/DImode, a change we make in a later patch in this series. 2021-06-29 Julian Brown gcc/ * config/gcn/gcn.md (mulsidi3, mulsidi3_reg, mulsidi3_imm, muldi3): Add patterns. --- diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index d1d49981ebbf..82f7a468bcec 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1457,6 +1457,100 @@ (set_attr "length" "4,8,8") (set_attr "gcn_version" "gcn5,gcn5,*")]) +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "")) + (any_extend:DI + (match_operand:SI 2 "nonmemory_operand" ""))))] + "" +{ + if (can_create_pseudo_p () + && !TARGET_GCN5 + && !gcn_inline_immediate_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + if (REG_P (operands[2])) + emit_insn (gen_mulsidi3_reg (operands[0], operands[1], operands[2])); + else + emit_insn (gen_mulsidi3_imm (operands[0], operands[1], operands[2])); + + DONE; +}) + +(define_insn_and_split "mulsidi3_reg" + [(set (match_operand:DI 0 "register_operand" "=&Sg, &v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "%Sg, v")) + (any_extend:DI + (match_operand:SI 2 "register_operand" "Sg,vSv"))))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,*")]) + +(define_insn_and_split "mulsidi3_imm" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "Sg, Sg, v")) + (match_operand:DI 2 "gcn_32bit_immediate_operand" + "A, B, A")))] + "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*")]) + +(define_insn_and_split "muldi3" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v") + (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") + (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) + (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v")) + (clobber (match_scratch:BI 4 "=cs, cs, X, X")) + (clobber (match_scratch:DI 5 "=X, X,cV,cV"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx tmp = operands[3]; + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + rtx op1lo = gcn_operand_part (DImode, operands[1], 0); + rtx op1hi = gcn_operand_part (DImode, operands[1], 1); + rtx op2lo = gcn_operand_part (DImode, operands[2], 0); + rtx op2hi = gcn_operand_part (DImode, operands[2], 1); + emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo)); + emit_insn (gen_mulsi3 (tmp, op1lo, op2hi)); + rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + emit_insn (gen_mulsi3 (tmp, op1hi, op2lo)); + add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*,*")]) + (define_insn "mulhisi3" [(set (match_operand:SI 0 "register_operand" "=v") (mult:SI