From: Roger Sayle Date: Sun, 1 Jan 2023 17:00:28 +0000 (+0000) Subject: Add post-reload splitter for extendditi2 on x86_64. X-Git-Tag: basepoints/gcc-14~2262 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f1314f547f69d3a2b1f16ce301267e3bfb4e427;p=thirdparty%2Fgcc.git Add post-reload splitter for extendditi2 on x86_64. This is another step towards a possible solution for PR 105137. This patch introduces a define_insn for extendditi2 that allows DImode to TImode sign-extension to be represented in the early RTL optimizers, before being split post-reload into the exact same idiom as currently produced by RTL expansion. Typically this produces the identical code, so the first new test case: __int128 foo(long long x) { return (__int128)x; } continues to generate: foo: movq %rdi, %rax cqto ret The "magic" is that this representation allows combine and the other RTL optimizers to do a better job. Hence, the second test case: __int128 foo(__int128 a, long long b) { a += ((__int128)b) << 70; return a; } which mainline with -O2 currently generates as: foo: movq %rsi, %rax movq %rdx, %rcx movq %rdi, %rsi salq $6, %rcx movq %rax, %rdi xorl %eax, %eax movq %rcx, %rdx addq %rsi, %rax adcq %rdi, %rdx ret with this patch now becomes: foo: movl $0, %eax salq $6, %rdx addq %rdi, %rax adcq %rsi, %rdx ret i.e. the same code for the signed and unsigned extension variants. 2023-01-01 Roger Sayle Uroš Bizjak gcc/ChangeLog * config/i386/i386.md (extendditi2): New define_insn. (define_split): Use DWIH mode iterator to treat new extendditi2 identically to existing extendsidi2_1. (define_peephole2): Likewise. (define_peephole2): Likewise. (define_Split): Likewise. gcc/testsuite/ChangeLog * gcc.target/i386/extendditi2-1.c: New test case. * gcc.target/i386/extendditi2-2.c: Likewise. --- diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ca40c4fee17a..890c4c839dc6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4548,17 +4548,27 @@ "!TARGET_64BIT" "#") +(define_insn "extendditi2" + [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DI 2 "=X,X,X,&r"))] + "TARGET_64BIT" + "#") + ;; Split the memory case. If the source register doesn't die, it will stay ;; this way, if it does die, following peephole2s take care of it. (define_split - [(set (match_operand:DI 0 "memory_operand") - (sign_extend:DI (match_operand:SI 1 "register_operand"))) + [(set (match_operand: 0 "memory_operand") + (sign_extend: (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:SI 2 "register_operand"))] + (clobber (match_operand:DWIH 2 "register_operand"))] "reload_completed" [(const_int 0)] { - split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + rtx bits = GEN_INT ( * BITS_PER_UNIT - 1); + + split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); emit_move_insn (operands[3], operands[1]); @@ -4567,12 +4577,12 @@ && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG) { - emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); + emit_insn (gen_ashr3_cvt (operands[2], operands[1], bits)); } else { emit_move_insn (operands[2], operands[1]); - emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31))); + emit_insn (gen_ashr3_cvt (operands[2], operands[2], bits)); } emit_move_insn (operands[4], operands[2]); DONE; @@ -4581,69 +4591,79 @@ ;; Peepholes for the case where the source register does die, after ;; being split with the above splitter. (define_peephole2 - [(set (match_operand:SI 0 "memory_operand") - (match_operand:SI 1 "general_reg_operand")) - (set (match_operand:SI 2 "general_reg_operand") (match_dup 1)) + [(set (match_operand:DWIH 0 "memory_operand") + (match_operand:DWIH 1 "general_reg_operand")) + (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1)) (parallel [(set (match_dup 2) - (ashiftrt:SI (match_dup 2) (const_int 31))) + (ashiftrt:DWIH (match_dup 2) + (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) - (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "REGNO (operands[1]) != REGNO (operands[2]) + && INTVAL (operands[4]) == ( * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (4, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) - (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) (define_peephole2 - [(set (match_operand:SI 0 "memory_operand") - (match_operand:SI 1 "general_reg_operand")) - (parallel [(set (match_operand:SI 2 "general_reg_operand") - (ashiftrt:SI (match_dup 1) (const_int 31))) + [(set (match_operand:DWIH 0 "memory_operand") + (match_operand:DWIH 1 "general_reg_operand")) + (parallel [(set (match_operand:DWIH 2 "general_reg_operand") + (ashiftrt:DWIH (match_dup 1) + (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) - (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "/* cltd is shorter than sarl $31, %eax */ !optimize_function_for_size_p (cfun) && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG + && INTVAL (operands[4]) == ( * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) - (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) ;; Extend to register case. Optimize case where source and destination ;; registers match and cases where we can use cltd. (define_split - [(set (match_operand:DI 0 "register_operand") - (sign_extend:DI (match_operand:SI 1 "register_operand"))) + [(set (match_operand: 0 "register_operand") + (sign_extend: (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) - (clobber (match_scratch:SI 2))] + (clobber (match_scratch:DWIH 2))] "reload_completed" [(const_int 0)] { - split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + rtx bits = GEN_INT ( * BITS_PER_UNIT - 1); + + split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); if (REGNO (operands[3]) != REGNO (operands[1])) emit_move_insn (operands[3], operands[1]); + rtx src = operands[1]; + if (REGNO (operands[3]) == AX_REG) + src = operands[3]; + /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && REGNO (operands[3]) == AX_REG + && REGNO (src) == AX_REG && REGNO (operands[4]) == DX_REG) { - emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); + emit_insn (gen_ashr3_cvt (operands[4], src, bits)); DONE; } if (REGNO (operands[4]) != REGNO (operands[1])) emit_move_insn (operands[4], operands[1]); - emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); + emit_insn (gen_ashr3_cvt (operands[4], operands[4], bits)); DONE; }) diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-1.c b/gcc/testsuite/gcc.target/i386/extendditi2-1.c new file mode 100644 index 000000000000..efbad0e92b3d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extendditi2-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 foo(long long x) +{ + return (__int128)x; +} +/* { dg-final { scan-assembler "cqt?o" } } */ diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-2.c b/gcc/testsuite/gcc.target/i386/extendditi2-2.c new file mode 100644 index 000000000000..dbfa6fb90b6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extendditi2-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 foo(__int128 a, long long b) { + a += ((__int128)b) << 70; + return a; +} + +__int128 bar(__int128 a, unsigned long long b) { + a += ((__int128)b) << 70; + return a; +} +/* { dg-final { scan-assembler-not "movq" } } */