From: Jan Beulich Date: Fri, 9 Jan 2026 07:44:40 +0000 (+0100) Subject: x86: optimize MOVZX in a few cases X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=141a0400236ce22bd8d1839d4f92d3a3a3309fb3;p=thirdparty%2Fbinutils-gdb.git x86: optimize MOVZX in a few cases There are shorter encoding options available, so space optimization is possible. --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index c7b47eb7e24..b317536cc60 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5204,6 +5204,69 @@ optimize_encoding (void) return; } + if (optimize_for_space + && i.tm.opcode_space == SPACE_0F + && (i.tm.base_opcode | 1) == 0xb7 + && i.reg_operands == 2 + && !i.op[0].regs->reg_flags + && !i.op[1].regs->reg_flags + && (i.types[0].bitfield.byte + ? i.types[1].bitfield.word + && i.op[0].regs->reg_num < 4 + && i.op[1].regs->reg_num == i.op[0].regs->reg_num + && (!i.suffix || i.suffix == WORD_MNEM_SUFFIX) + : i.types[1].bitfield.dword + && flag_code == CODE_16BIT + && i.op[0].regs->reg_type.bitfield.baseindex + && i.op[0].regs->reg_num != EBP_REG_NUM)) + { + /* Optimize: -Os: + movzb %r8, %r16 -> mov $0, %r8h + + %r8 being one of %al, %cl, %dl, or %bl, with %r16 being the + matching 16-bit reg. + */ + + i.tm.opcode_space = SPACE_BASE; + i.tm.opcode_modifier.w = 0; + i.reg_operands = 1; + if (i.types[0].bitfield.byte) + { + i.tm.base_opcode = 0xb0; + i.tm.opcode_modifier.modrm = 0; + copy_operand (1, 0); + i.op[1].regs += 4; + + im_expressions[0].X_op = O_constant; + im_expressions[0].X_add_number = 0; + i.op[0].imms = &im_expressions[0]; + operand_type_set (&i.types[0], 0); + i.types[0].bitfield.imm8 = 1; + i.tm.operand_types[0] = i.types[0]; + i.tm.operand_types[0].bitfield.class = ClassNone; + i.imm_operands = 1; + + i.suffix = 0; + return; + } + + /* In 16-bit mode, optimize: -Os: + movzw %r16, %r32 -> lea (%r16), %r32 + + %r16 being one of %bx, %si, or %di. + */ + i.tm.base_opcode = 0x8d; + + i.base_reg = i.op[0].regs; + operand_type_set (&i.types[0], 0); + i.types[0].bitfield.baseindex = 1; + i.tm.operand_types[0] = i.types[0]; + i.op[0].disps = NULL; + i.flags[0] = Operand_Mem; + i.mem_operands = 1; + return; + } + if (optimize_for_space && (i.tm.mnem_off == MN_test || (i.tm.base_opcode == 0xf6 diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index ac9e0b31923..b52d86d6d4f 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -651,6 +651,8 @@ if [gas_32_check] then { run_dump_test "optimize-6b" run_list_test "optimize-7" "-I${srcdir}/$subdir -march=+noavx2 -al" run_list_test "optimize-8" "-Os" + run_dump_test "optimize16-O2" + run_dump_test "optimize16-Os" run_dump_test "noopt" run_dump_test "lea-optimize" run_dump_test "lea16-optimize" diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index b9a7b0b0f4a..2738b84b80d 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -28,8 +28,11 @@ Disassembly of section .text: +[a-f0-9]+: 66 98 cbtw +[a-f0-9]+: 98 cwtl +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: b4 00 mov \$(0x)?0,%ah + +[a-f0-9]+: b5 00 mov \$(0x)?0,%ch +[a-f0-9]+: 66 98 cbtw +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: b6 00 mov \$(0x)?0,%dh +[a-f0-9]+: d0 e2 shl \$1,%dl +[a-f0-9]+: d0 e2 shl \$1,%dl +[a-f0-9]+: 66 d1 e2 shl \$1,%dx diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 375a95a467f..b2b1cc112df 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -28,9 +28,13 @@ _start: movsw %ax, %eax movswl %ax, %eax + movzb %al, %ax + movzbw %cl, %cx + .intel_syntax noprefix movsx ax, al movsx eax, ax + movzx dx, dl .att_syntax prefix shl $1, %dl diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d index 9f5c4889e3f..3e92acfd869 100644 --- a/gas/testsuite/gas/i386/optimize-2b.d +++ b/gas/testsuite/gas/i386/optimize-2b.d @@ -29,8 +29,11 @@ Disassembly of section .text: +[a-f0-9]+: 66 98 cbtw +[a-f0-9]+: 98 cwtl +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 66 0f b6 c0 movzbw %al,%ax + +[a-f0-9]+: 66 0f b6 c9 movzbw %cl,%cx +[a-f0-9]+: 66 98 cbtw +[a-f0-9]+: 98 cwtl + +[a-f0-9]+: 66 0f b6 d2 movzbw %dl,%dx +[a-f0-9]+: 00 d2 add %dl,%dl +[a-f0-9]+: 00 d2 add %dl,%dl +[a-f0-9]+: 66 01 d2 add %dx,%dx diff --git a/gas/testsuite/gas/i386/optimize16-O2.d b/gas/testsuite/gas/i386/optimize16-O2.d new file mode 100644 index 00000000000..0e59fc16ef2 --- /dev/null +++ b/gas/testsuite/gas/i386/optimize16-O2.d @@ -0,0 +1,15 @@ +#name: 16-bit optimized encoding with -O2 +#source: optimize16.s +#as: -O2 +#objdump: -drwMi8086 + +.*: +file format .* + + +Disassembly of section .text: + +0+ <_start>: + +[a-f0-9]+: 66 0f b7 c3 movzwl %bx,%eax + +[a-f0-9]+: 66 0f b7 ce movzwl %si,%ecx + +[a-f0-9]+: 66 0f b7 d7 movzwl %di,%edx +#pass diff --git a/gas/testsuite/gas/i386/optimize16-Os.d b/gas/testsuite/gas/i386/optimize16-Os.d new file mode 100644 index 00000000000..df4c97c734a --- /dev/null +++ b/gas/testsuite/gas/i386/optimize16-Os.d @@ -0,0 +1,15 @@ +#name: 16-bit optimized encoding with -Os +#source: optimize16.s +#as: -Os +#objdump: -drwMi8086 + +.*: +file format .* + + +Disassembly of section .text: + +0+ <_start>: + +[a-f0-9]+: 66 8d 07 lea \(%bx\),%eax + +[a-f0-9]+: 66 8d 0c lea \(%si\),%ecx + +[a-f0-9]+: 66 8d 15 lea \(%di\),%edx +#pass diff --git a/gas/testsuite/gas/i386/optimize16.s b/gas/testsuite/gas/i386/optimize16.s new file mode 100644 index 00000000000..517f887779e --- /dev/null +++ b/gas/testsuite/gas/i386/optimize16.s @@ -0,0 +1,10 @@ +# Check 16-bit instructions with optimized encoding + + .code16 + .text +_start: + movzw %bx, %eax + movzwl %si, %ecx + .intel_syntax noprefix + movzx edx, di + .att_syntax prefix diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 63f4f31c301..5797a634de3 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -230,11 +230,11 @@ movsxd, 0x63, x64, Amd64|Modrm|NoSuf, { Reg32|Unspecified|BaseIndex, Reg16 } movsxd, 0x63, x64, Intel64|Modrm|NoSuf, { Reg16|Unspecified|BaseIndex, Reg16 } // Move with zero extend. -movzb, 0xfb6, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } -movzw, 0xfb7, i386, Modrm|No_bSuf|No_wSuf|No_sSuf, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 } +movzb, 0xfb6, i386, Modrm|No_bSuf|No_sSuf|Optimize, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +movzw, 0xfb7, i386, Modrm|No_bSuf|No_wSuf|No_sSuf|Optimize, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 } // The 64-bit variant is not particularly useful since the zero extend // 32->64 is implicit, but we can encode them. -movzx, 0xfb6, i386, W|Modrm|No_lSuf|No_sSuf|No_qSuf, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +movzx, 0xfb6, i386, W|Modrm|No_lSuf|No_sSuf|No_qSuf|Optimize, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } // Push instructions. push, 0x50, No64, ImplicitStackOp|No_bSuf|No_sSuf|No_qSuf, { Reg16|Reg32 } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index a97dd6487aa..da96efc6fda 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -294,7 +294,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movzb, 0xb6, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -304,7 +304,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movzw, 0xb7, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -314,7 +314,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_movzx, 0xb6, 2, SPACE_0F, None, { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },