From: Jan Beulich Date: Fri, 27 Sep 2024 09:21:51 +0000 (+0200) Subject: x86: optimize {,V}EXTRACTPS with immediate 0 X-Git-Tag: gdb-16-branchpoint~788 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=afd5b33bc7a4a0e58a4981c92727d9f85c421f5d;p=thirdparty%2Fbinutils-gdb.git x86: optimize {,V}EXTRACTPS with immediate 0 They are equivalent to simple moves, which are up to 2 bytes shorter to encode (and maybe also cheaper to execute). --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 23d75d7644b..611b63fc74a 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5548,6 +5548,42 @@ optimize_encoding (void) i.reloc[1] = i.reloc[2]; i.tm.operand_types[1] = i.tm.operand_types[2]; + i.operands = 2; + i.imm_operands = 0; + } + else if (i.tm.base_opcode == 0x17 + && i.tm.opcode_space == SPACE_0F3A + && i.op[0].imms->X_op == O_constant + && i.op[0].imms->X_add_number == 0) + { + /* Optimize: -O: + extractps $0, %xmmN, %rM -> movd %xmmN, %rM + extractps $0, %xmmN, mem -> movss %xmmN, mem + vextractps $0, %xmmN, %rM -> vmovd %xmmN, %rM + vextractps $0, %xmmN, mem -> vmovss %xmmN, mem + */ + i.tm.opcode_space = SPACE_0F; + i.tm.opcode_modifier.vexw = VEXW0; + + if (!i.mem_operands) + i.tm.base_opcode = 0x7e; + else + { + i.tm.base_opcode = 0x11; + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3; + } + + i.op[0].regs = i.op[1].regs; + i.types[0] = i.types[1]; + i.flags[0] = i.flags[1]; + i.tm.operand_types[0] = i.tm.operand_types[1]; + + i.op[1].regs = i.op[2].regs; + i.types[1] = i.types[2]; + i.flags[1] = i.flags[2]; + i.reloc[1] = i.reloc[2]; + i.tm.operand_types[1] = i.tm.operand_types[2]; + i.operands = 2; i.imm_operands = 0; } diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index f496846afe1..c2802b894b9 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -166,6 +166,10 @@ Disassembly of section .text: +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\) + +[a-f0-9]+: 66 .* movd %xmm1,%edx + +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovd %xmm1,%edx + +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index ce537582b7c..40546e8c157 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -194,6 +194,11 @@ _start: vpextrd $0, %xmm1, %edx vpextrd $0, %xmm1, (%edx) + extractps $0, %xmm1, %edx + extractps $0, %xmm1, (%edx) + vextractps $0, %xmm1, %edx + vextractps $0, %xmm1, (%edx) + bt $15, %ax bt $16, %ax btc $15, %ax diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d index b039a0de09e..563567f3f30 100644 --- a/gas/testsuite/gas/i386/optimize-1a.d +++ b/gas/testsuite/gas/i386/optimize-1a.d @@ -167,6 +167,10 @@ Disassembly of section .text: +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\) + +[a-f0-9]+: 66 .* movd %xmm1,%edx + +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovd %xmm1,%edx + +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index ea2c7612b79..2449bc55ad9 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -166,6 +166,10 @@ Disassembly of section .text: +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\) + +[a-f0-9]+: 66 .* movd %xmm1,%edx + +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovd %xmm1,%edx + +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index d53d2cc28e2..d60d8421cbf 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -166,6 +166,10 @@ Disassembly of section .text: +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\) + +[a-f0-9]+: 66 .* movd %xmm1,%edx + +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovd %xmm1,%edx + +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/x86-64-optimize-extractps.d b/gas/testsuite/gas/i386/x86-64-optimize-extractps.d new file mode 100644 index 00000000000..706cd000331 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-extractps.d @@ -0,0 +1,20 @@ +#as: -O -msse2avx +#objdump: -drw +#name: x86-64 EXTRACTPS optimized encoding with -msse2avx + +.*: +file format .* + + +Disassembly of section .text: + +0+ : + +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx + +[a-f0-9]+: c5 fa 11 0a vmovss %xmm1,\(%rdx\) + +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d + +[a-f0-9]+: 62 f9 7e 08 11 0a vmovss %xmm1,\(%r18\) + +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx + +[a-f0-9]+: c5 fa 11 0a vmovss %xmm1,\(%rdx\) + +[a-f0-9]+: 62 e1 7d 08 7e ca vmovd %xmm17,%edx + +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d + +[a-f0-9]+: 62 f9 7e 08 11 0a vmovss %xmm1,\(%r18\) +#pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-extractps.l b/gas/testsuite/gas/i386/x86-64-optimize-extractps.l new file mode 100644 index 00000000000..d52794e4049 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-extractps.l @@ -0,0 +1,21 @@ +.*: Assembler messages: +.*:6: Error: .* +.*:7: Error: .* +[ ]*[0-9a-f]+[ ]+\.text +[ ]*[0-9a-f]+[ ]+extractps: +[ ]*[0-9a-f]+[ ]+\?\?\?\? 660F7ECA[ ]+extractps \$0, %xmm1, %edx +[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F110A[ ]+extractps \$0, %xmm1, \(%rdx\) +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+extractps \$0, %xmm1, %r18d +[ ]*[0-9a-f]+[ ]+extractps \$0, %xmm1, \(%r18\) +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5F97ECA[ ]+vextractps \$0, %xmm1, %edx +[ ]*[0-9a-f]+[ ]+\?\?\?\? C5FA110A[ ]+vextractps \$0, %xmm1, \(%rdx\) +[ ]*[0-9a-f]+[ ]+ +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E17D08[ ]+vextractps \$0, %xmm17, %edx +[ ]*[0-9a-f]+[ ]+7ECA +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97D08[ ]+vextractps \$0, %xmm1, %r18d +[ ]*[0-9a-f]+[ ]+7ECA +[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97E08[ ]+vextractps \$0, %xmm1, \(%r18\) +[ ]*[0-9a-f]+[ ]+110A +#pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-extractps.s b/gas/testsuite/gas/i386/x86-64-optimize-extractps.s new file mode 100644 index 00000000000..1a1c77b1a45 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-extractps.s @@ -0,0 +1,14 @@ + .text +extractps: + extractps $0, %xmm1, %edx + extractps $0, %xmm1, (%rdx) + + extractps $0, %xmm1, %r18d + extractps $0, %xmm1, (%r18) + + vextractps $0, %xmm1, %edx + vextractps $0, %xmm1, (%rdx) + + vextractps $0, %xmm17, %edx + vextractps $0, %xmm1, %r18d + vextractps $0, %xmm1, (%r18) diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp index 740f5268de3..61fafd07a8c 100644 --- a/gas/testsuite/gas/i386/x86-64.exp +++ b/gas/testsuite/gas/i386/x86-64.exp @@ -594,6 +594,8 @@ run_dump_test "x86-64-optimize-7b" run_list_test "x86-64-optimize-8" "-I${srcdir}/$subdir -march=+noavx2 -al" run_list_test "x86-64-optimize-pextr" "-O -aln" run_dump_test "x86-64-optimize-pextr" +run_list_test "x86-64-optimize-extractps" "-O -aln" +run_dump_test "x86-64-optimize-extractps" run_dump_test "x86-64-apx-ndd-optimize" run_dump_test "x86-64-align-branch-1a" run_dump_test "x86-64-align-branch-1b" diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 4c42d4ab8e6..bc48611cae0 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1491,10 +1491,10 @@ blendvp, 0x664a | , AVX, Modrm|Vex128|Space0F3A|Src1VVVV|VexW0|NoSuf blendvp, 0x660f3814 | , SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM } blendvp, 0x660f3814 | , SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } dpp, 0x660f3a40 | , , Modrm|||NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM } -extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } -extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 } -extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } -extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|NoRex64, { Imm8, RegXMM, Reg64 } +extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } +extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64 } +extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } +extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|Optimize|NoRex64, { Imm8, RegXMM, Reg64 } insertps, 0x660f3a21, , Modrm|||Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM } movntdqa, 0x660f382a, , Modrm||NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM } mpsadbw, 0x660f3a42, , Modrm|||NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM } @@ -1665,8 +1665,8 @@ vcvtts2si, 0x2c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf, vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM } -vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } -vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 } +vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } +vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf|Optimize, { Imm8, RegXMM, Reg64 } vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index a2d42258c29..9ab2b544273 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -18014,7 +18014,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_extractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0 } }, @@ -18026,7 +18026,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } } } }, { MN_extractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 1, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0 } }, @@ -18038,7 +18038,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_extractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -18050,7 +18050,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } } } }, { MN_extractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 28, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -25964,7 +25964,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 1, 0 } } } }, { MN_vextractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 3, 1, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 3, 1, 0, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0 } }, @@ -25976,7 +25976,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } } } }, { MN_vextractps, 0x17, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 1, 0, 3, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 3, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0 } },