From: Jan Beulich Date: Fri, 9 Jan 2026 07:44:30 +0000 (+0100) Subject: x86: optimize MOVSX between accumulator regs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b689d0ea0363ffdd172a489e3d91ac1d1f1790d5;p=thirdparty%2Fbinutils-gdb.git x86: optimize MOVSX between accumulator regs Except on the K6 CBW/CWDE/CDQE perform equally well, but are shorter to encode. --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 28b1f4192d8..c7b47eb7e24 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5174,6 +5174,36 @@ optimize_encoding (void) i.seg[0] = NULL; } + if (((i.tm.opcode_space == SPACE_0F + && (i.tm.base_opcode | 1) == 0xbf + && (i.types[0].bitfield.byte + ? i.types[1].bitfield.word + : i.types[1].bitfield.dword)) + || (i.tm.opcode_space == SPACE_BASE + && i.tm.base_opcode == 0x63 + && i.types[1].bitfield.qword)) + && i.reg_operands == 2 + && i.op[0].regs->reg_type.bitfield.instance == Accum + && i.op[1].regs->reg_type.bitfield.instance == Accum + && (cpu_arch_tune != PROCESSOR_K6 || optimize_for_space)) + { + /* Optimize: -O: + movsb %al, %ax -> cbw + movsw %ax, %eax -> cwde + movsl %eax, %rax -> cdqe + */ + i.tm.opcode_space = SPACE_BASE; + i.tm.base_opcode = 0x98; + i.tm.opcode_modifier.modrm = 0; + /* Leave the destination register in place for process_suffix() to take + care of operand sizing. This will end up as short_form encoding, + with the register number being 0 (i.e. not altering the opcode). */ + i.reg_operands = 1; + i.op[0].regs = i.op[1].regs; + i.tm.operand_types[1].bitfield.class = ClassNone; + return; + } + if (optimize_for_space && (i.tm.mnem_off == MN_test || (i.tm.base_opcode == 0xf6 diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index 89551d19978..b9a7b0b0f4a 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -24,6 +24,12 @@ Disassembly of section .text: +[a-f0-9]+: 09 f6 or %esi,%esi +[a-f0-9]+: 87 0a xchg %ecx,\(%edx\) +[a-f0-9]+: 87 11 xchg %edx,\(%ecx\) + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl +[a-f0-9]+: d0 e2 shl \$1,%dl +[a-f0-9]+: d0 e2 shl \$1,%dl +[a-f0-9]+: 66 d1 e2 shl \$1,%dx diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 1ddaab05540..375a95a467f 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -22,6 +22,17 @@ _start: lock xchg %ecx, (%edx) lock xchg (%ecx), %edx + movsb %al, %ax + movsbw %al, %ax + + movsw %ax, %eax + movswl %ax, %eax + + .intel_syntax noprefix + movsx ax, al + movsx eax, ax + .att_syntax prefix + shl $1, %dl shl %dl diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d index 3fe8945142b..9f5c4889e3f 100644 --- a/gas/testsuite/gas/i386/optimize-2b.d +++ b/gas/testsuite/gas/i386/optimize-2b.d @@ -25,6 +25,12 @@ Disassembly of section .text: +[a-f0-9]+: 85 f6 test %esi,%esi +[a-f0-9]+: 87 0a xchg %ecx,\(%edx\) +[a-f0-9]+: 87 11 xchg %edx,\(%ecx\) + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl +[a-f0-9]+: 00 d2 add %dl,%dl +[a-f0-9]+: 00 d2 add %dl,%dl +[a-f0-9]+: 66 01 d2 add %dx,%dx diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d index 51513c828e4..c91503aa8f9 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d @@ -80,6 +80,16 @@ Disassembly of section .text: +[a-f0-9]+: 66 09 f6 or %si,%si +[a-f0-9]+: 09 ff or %edi,%edi +[a-f0-9]+: 4d 09 c0 or %r8,%r8 + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 48 98 cltq +[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 +[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s index b2cf16d05ff..4fd29aeec98 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s @@ -39,6 +39,22 @@ _start: or %edi, %edi or %r8, %r8 + movsb %al, %ax + movsbw %al, %ax + + movsw %ax, %eax + movswl %ax, %eax + + movsl %eax, %rax + movslq %eax, %rax + movsxd %eax, %rax + + .intel_syntax noprefix + movsx ax, al + movsx eax, ax + movsx rax, eax + .att_syntax prefix + vandnpd %zmm1, %zmm1, %zmm5 vmovdqa32 %xmm1, %xmm2 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3b.d b/gas/testsuite/gas/i386/x86-64-optimize-3b.d index abfc8f91781..2a8fd3a0f41 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d @@ -81,6 +81,16 @@ Disassembly of section .text: +[a-f0-9]+: 66 85 f6 test %si,%si +[a-f0-9]+: 09 ff or %edi,%edi +[a-f0-9]+: 4d 85 c0 test %r8,%r8 + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 48 98 cltq + +[a-f0-9]+: 66 98 cbtw + +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 48 98 cltq +[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 +[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 4d247b5c7f4..63f4f31c301 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -220,12 +220,12 @@ movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { R bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 } // Move with sign extend. -movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -movsw, 0xfbf, i386, Modrm|No_bSuf|No_wSuf|No_sSuf, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 } -movsl, 0x63, x64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Reg32|Unspecified|BaseIndex, Reg64 } -movsx, 0xfbe, i386, W|Modrm|No_lSuf|No_sSuf|No_qSuf, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -movsx, 0x63, x64, Modrm|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 } -movsxd, 0x63, x64, Modrm|NoSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 } +movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf|Optimize, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +movsw, 0xfbf, i386, Modrm|No_bSuf|No_wSuf|No_sSuf|Optimize, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 } +movsl, 0x63, x64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|Optimize, { Reg32|Unspecified|BaseIndex, Reg64 } +movsx, 0xfbe, i386, W|Modrm|No_lSuf|No_sSuf|No_qSuf|Optimize, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +movsx, 0x63, x64, Modrm|No_bSuf|No_wSuf|No_sSuf|No_qSuf|Optimize, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 } +movsxd, 0x63, x64, Modrm|NoSuf|Optimize, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 } movsxd, 0x63, x64, Amd64|Modrm|NoSuf, { Reg32|Unspecified|BaseIndex, Reg16 } movsxd, 0x63, x64, Intel64|Modrm|NoSuf, { Reg16|Unspecified|BaseIndex, Reg16 } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 33dee8b02d0..a97dd6487aa 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -214,7 +214,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsb, 0xbe, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -224,7 +224,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsw, 0xbf, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -234,7 +234,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsl, 0x63, 2, SPACE_BASE, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -244,7 +244,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsx, 0xbe, 2, SPACE_0F, None, { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -254,7 +254,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsx, 0x63, 2, SPACE_BASE, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -264,7 +264,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movsxd, 0x63, 2, SPACE_BASE, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },