From: Xi Ruoyao Date: Sat, 22 Feb 2025 07:34:54 +0000 (+0800) Subject: LoongArch: Avoid unnecessary zero-initialization using LSX for scalar popcount X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d7434f3babc5a954fa68dd0c8ce6d4e917a017c1;p=thirdparty%2Fgcc.git LoongArch: Avoid unnecessary zero-initialization using LSX for scalar popcount Now for __builtin_popcountl we are getting things like vrepli.b $vr0,0 vinsgr2vr.d $vr0,$r4,0 vpcnt.d $vr0,$vr0 vpickve2gr.du $r4,$vr0,0 slli.w $r4,$r4,0 jr $r1 The "vrepli.b" instruction is introduced by the init-regs pass (see PR61810 and all the issues it references). To work it around, we can use post-reload instead of define_expand: the "f" constraint will make the compiler automatically move the scalar between GPR and FPR, and reload is much later than init-regs so init-regs won't get in our way. Now the code looks like: movgr2fr.d $f0,$r4 vpcnt.d $vr0,$vr0 movfr2gr.d $r4,$f0 jr $r1 gcc/ChangeLog: * config/loongarch/loongarch.md (cntmap): Change to uppercase. (popcount2): Modify to a post reload split. --- diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 2f4817d885c..ba668880ba5 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1773,21 +1773,23 @@ ;; This attribute used for get connection of scalar mode and corresponding ;; vector mode. -(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) +(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")]) -(define_expand "popcount2" - [(set (match_operand:GPR 0 "register_operand") - (popcount:GPR (match_operand:GPR 1 "register_operand")))] +(define_insn_and_split "popcount2" + [(set (match_operand:GPR 0 "register_operand" "=f") + (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))] "ISA_HAS_LSX" + "#" + ;; Do the split very lately to work around init-regs unneeded zero- + ;; initialization from init-regs. See PR61810 and all the referenced + ;; issues. + "&& reload_completed" + [(set (match_operand: 0 "register_operand" "=f") + (popcount: + (match_operand: 1 "register_operand" "f")))] { - rtx in = operands[1]; - rtx out = operands[0]; - rtx vreg = mode == SImode ? gen_reg_rtx (V4SImode) : - gen_reg_rtx (V2DImode); - emit_insn (gen_lsx_vinsgr2vr_ (vreg, in, vreg, GEN_INT (1))); - emit_insn (gen_popcount2 (vreg, vreg)); - emit_insn (gen_lsx_vpickve2gr_ (out, vreg, GEN_INT (0))); - DONE; + operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); }) ;;