From: Jan Beulich Date: Fri, 27 Sep 2024 09:22:34 +0000 (+0200) Subject: x86: optimize {,V}EXTRACT{F,I}{128,32x{4,8},64x{2,4}} with immediate 0 X-Git-Tag: gdb-16-branchpoint~787 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f079b0c4b2923393c2ce58e72dfd5a1f2c7a339a;p=thirdparty%2Fbinutils-gdb.git x86: optimize {,V}EXTRACT{F,I}{128,32x{4,8},64x{2,4}} with immediate 0 They, too, are equivalent to simple moves, which are up to 3 bytes shorter to encode (and maybe also cheaper to execute). --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 611b63fc74a..620dc9c80f2 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5584,6 +5584,80 @@ optimize_encoding (void) i.reloc[1] = i.reloc[2]; i.tm.operand_types[1] = i.tm.operand_types[2]; + i.operands = 2; + i.imm_operands = 0; + } + else if ((i.tm.base_opcode | 0x22) == 0x3b + && i.tm.opcode_space == SPACE_0F3A + && i.op[0].imms->X_op == O_constant + && i.op[0].imms->X_add_number == 0) + { + /* Optimize: -O: + vextractf128 $0, %ymmN, %xmmM -> vmovaps %xmmN, %xmmM + vextractf128 $0, %ymmN, mem -> vmovups %xmmN, mem + vextractf32x4 $0, %[yz]mmN, %xmmM -> vmovaps %xmmN, %xmmM + vextractf32x4 $0, %[yz]mmN, mem -> vmovups %xmmN, mem + vextractf64x2 $0, %[yz]mmN, %xmmM -> vmovapd %xmmN, %xmmM + vextractf64x2 $0, %[yz]mmN, mem -> vmovupd %xmmN, mem + vextractf32x8 $0, %zmmN, %ymmM -> vmovaps %ymmN, %ymmM + vextractf32x8 $0, %zmmN, mem -> vmovups %ymmN, mem + vextractf64x4 $0, %zmmN, %ymmM -> vmovapd %ymmN, %ymmM + vextractf64x4 $0, %zmmN, mem -> vmovupd %ymmN, mem + vextracti128 $0, %ymmN, %xmmM -> vmovdqa %xmmN, %xmmM + vextracti128 $0, %ymmN, mem -> vmovdqu %xmmN, mem + vextracti32x4 $0, %[yz]mmN, %xmmM -> vmovdqa{,32} %xmmN, %xmmM + vextracti32x4 $0, %[yz]mmN, mem -> vmovdqu{,32} %xmmN, mem + vextracti64x2 $0, %[yz]mmN, %xmmM -> vmovdqa{,64} %xmmN, %xmmM + vextracti64x2 $0, %[yz]mmN, mem -> vmovdqu{,64} %xmmN, mem + vextracti32x8 $0, %zmmN, %ymmM -> vmovdqa{,32} %ymmN, %ymmM + vextracti32x8 $0, %zmmN, mem -> vmovdqu{,32} %ymmN, mem + vextracti64x4 $0, %zmmN, %ymmM -> vmovdqa{,64} %ymmN, %ymmM + vextracti64x4 $0, %zmmN, mem -> vmovdqu{,64} %ymmN, mem + */ + i.tm.opcode_space = SPACE_0F; + + if (!i.mask.reg + && (pp.encoding <= encoding_vex3 + || (pp.encoding == encoding_evex512 + && (!i.base_reg || !(i.base_reg->reg_flags & RegRex2)) + && (!i.index_reg || !(i.index_reg->reg_flags & RegRex2))))) + { + i.tm.opcode_modifier.vex = i.tm.base_opcode & 2 ? VEX256 : VEX128; + i.tm.opcode_modifier.evex = 0; + } + else + i.tm.opcode_modifier.evex = i.tm.base_opcode & 2 ? EVEX256 : EVEX128; + + if (i.tm.base_opcode & 0x20) + { + i.tm.base_opcode = 0x7f; + if (i.reg_operands != 2) + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3; + } + else + { + if (i.reg_operands == 2) + i.tm.base_opcode = 0x29; + else + i.tm.base_opcode = 0x11; + if (i.tm.opcode_modifier.vexw != VEXW1) + i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE; + } + + if (i.tm.opcode_modifier.vex) + i.tm.opcode_modifier.vexw = VEXWIG; + + i.op[0].regs = i.op[1].regs; + i.types[0] = i.types[1]; + i.flags[0] = i.flags[1]; + i.tm.operand_types[0] = i.tm.operand_types[1]; + + i.op[1].regs = i.op[2].regs; + i.types[1] = i.types[2]; + i.flags[1] = i.flags[2]; + i.reloc[1] = i.reloc[2]; + i.tm.operand_types[1] = i.tm.operand_types[2]; + i.operands = 2; i.imm_operands = 0; } diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index c2802b894b9..fffa655d330 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -170,6 +170,26 @@ Disassembly of section .text: +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index 40546e8c157..afdeed4c423 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -199,6 +199,31 @@ _start: vextractps $0, %xmm1, %edx vextractps $0, %xmm1, (%edx) + vextractf128 $0, %ymm1, %xmm2 + vextractf128 $0, %ymm1, (%edx) + vextracti128 $0, %ymm1, %xmm2 + vextracti128 $0, %ymm1, (%edx) + + vextractf32x4 $0, %ymm1, %xmm2 + vextractf32x4 $0, %ymm1, (%edx) + vextracti32x4 $0, %ymm1, %xmm2 + vextracti32x4 $0, %ymm1, (%edx) + + vextractf64x2 $0, %ymm1, %xmm2 + vextractf64x2 $0, %ymm1, (%edx) + vextracti64x2 $0, %ymm1, %xmm2 + vextracti64x2 $0, %ymm1, (%edx) + + vextractf32x8 $0, %zmm1, %ymm2 + vextractf32x8 $0, %zmm1, (%edx) + vextracti32x8 $0, %zmm1, %ymm2 + vextracti32x8 $0, %zmm1, (%edx) + + vextractf64x4 $0, %zmm1, %ymm2 + vextractf64x4 $0, %zmm1, (%edx) + vextracti64x4 $0, %zmm1, %ymm2 + vextracti64x4 $0, %zmm1, (%edx) + bt $15, %ax bt $16, %ax btc $15, %ax diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d index 563567f3f30..aff37681a00 100644 --- a/gas/testsuite/gas/i386/optimize-1a.d +++ b/gas/testsuite/gas/i386/optimize-1a.d @@ -171,6 +171,26 @@ Disassembly of section .text: +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index 2449bc55ad9..fbc142a424c 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -170,6 +170,26 @@ Disassembly of section .text: +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index d60d8421cbf..a40022a3420 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -170,6 +170,26 @@ Disassembly of section .text: +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\) +[a-f0-9]+: c5 .* vmovd %xmm1,%edx +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\) + +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\) +[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax +[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax +[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax diff --git a/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.d b/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.d new file mode 100644 index 00000000000..739a2fbb54a --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.d @@ -0,0 +1,59 @@ +#as: -O +#objdump: -drw +#name: x86-64 VEXTRACT{F,I} optimized encoding with -msse2avx + +.*: +file format .* + + +Disassembly of section .text: + +0+ : + +[a-f0-9]+: c5 f8 29 ca vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 11 0a vmovups %xmm1,\(%rdx\) + +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\) + +0+[a-f0-9]+ : + +[a-f0-9]+: c5 f8 29 ca vmovaps %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 11 0a vmovups %xmm1,\(%rdx\) + +[a-f0-9]+: 62 e1 7c 08 29 ca vmovaps %xmm17,%xmm2 + +[a-f0-9]+: 62 e1 7c 08 11 0a vmovups %xmm17,\(%rdx\) + +[a-f0-9]+: 62 f9 7c 08 11 0a vmovups %xmm1,\(%r18\) + +[a-f0-9]+: c5 f9 29 ca vmovapd %xmm1,%xmm2 + +[a-f0-9]+: c5 f9 11 0a vmovupd %xmm1,\(%rdx\) + +[a-f0-9]+: 62 e1 fd 08 29 ca vmovapd %xmm17,%xmm2 + +[a-f0-9]+: 62 e1 fd 08 11 0a vmovupd %xmm17,\(%rdx\) + +[a-f0-9]+: 62 f9 fd 08 11 0a vmovupd %xmm1,\(%r18\) + +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\) + +[a-f0-9]+: 62 e1 7d 08 7f ca vmovdqa32 %xmm17,%xmm2 + +[a-f0-9]+: 62 e1 7e 08 7f 0a vmovdqu32 %xmm17,\(%rdx\) + +[a-f0-9]+: 62 f9 7e 08 7f 0a vmovdqu32 %xmm1,\(%r18\) + +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2 + +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\) + +[a-f0-9]+: 62 e1 fd 08 7f ca vmovdqa64 %xmm17,%xmm2 + +[a-f0-9]+: 62 e1 fe 08 7f 0a vmovdqu64 %xmm17,\(%rdx\) + +[a-f0-9]+: 62 f9 fe 08 7f 0a vmovdqu64 %xmm1,\(%r18\) + +0+[a-f0-9]+ : + +[a-f0-9]+: c5 fc 29 ca vmovaps %ymm1,%ymm2 + +[a-f0-9]+: c5 fc 11 0a vmovups %ymm1,\(%rdx\) + +[a-f0-9]+: 62 e1 7c 28 29 ca vmovaps %ymm17,%ymm2 + +[a-f0-9]+: 62 e1 7c 28 11 0a vmovups %ymm17,\(%rdx\) + +[a-f0-9]+: 62 f9 7c 28 11 0a vmovups %ymm1,\(%r18\) + +[a-f0-9]+: c5 fd 29 ca vmovapd %ymm1,%ymm2 + +[a-f0-9]+: c5 fd 11 0a vmovupd %ymm1,\(%rdx\) + +[a-f0-9]+: 62 e1 fd 28 29 ca vmovapd %ymm17,%ymm2 + +[a-f0-9]+: 62 e1 fd 28 11 0a vmovupd %ymm17,\(%rdx\) + +[a-f0-9]+: 62 f9 fd 28 11 0a vmovupd %ymm1,\(%r18\) + +[a-f0-9]+: c5 fd 7f ca vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 fe 7f 0a vmovdqu %ymm1,\(%rdx\) + +[a-f0-9]+: 62 e1 7d 28 7f ca vmovdqa32 %ymm17,%ymm2 + +[a-f0-9]+: 62 e1 7e 28 7f 0a vmovdqu32 %ymm17,\(%rdx\) + +[a-f0-9]+: 62 f9 7e 28 7f 0a vmovdqu32 %ymm1,\(%r18\) + +[a-f0-9]+: c5 fd 7f ca vmovdqa %ymm1,%ymm2 + +[a-f0-9]+: c5 fe 7f 0a vmovdqu %ymm1,\(%rdx\) + +[a-f0-9]+: 62 e1 fd 28 7f ca vmovdqa64 %ymm17,%ymm2 + +[a-f0-9]+: 62 e1 fe 28 7f 0a vmovdqu64 %ymm17,\(%rdx\) + +[a-f0-9]+: 62 f9 fe 28 7f 0a vmovdqu64 %ymm1,\(%r18\) +#pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.s b/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.s new file mode 100644 index 00000000000..5da8265d345 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-optimize-vextractNN.s @@ -0,0 +1,57 @@ + .text +vextract_128: + vextractf128 $0, %ymm1, %xmm2 + vextractf128 $0, %ymm1, (%rdx) + + vextracti128 $0, %ymm1, %xmm2 + vextracti128 $0, %ymm1, (%rdx) + +vextract_NNxM_XMM: + vextractf32x4 $0, %ymm1, %xmm2 + vextractf32x4 $0, %ymm1, (%rdx) + vextractf32x4 $0, %ymm17, %xmm2 + vextractf32x4 $0, %ymm17, (%rdx) + vextractf32x4 $0, %ymm1, (%r18) + + vextractf64x2 $0, %ymm1, %xmm2 + vextractf64x2 $0, %ymm1, (%rdx) + vextractf64x2 $0, %ymm17, %xmm2 + vextractf64x2 $0, %ymm17, (%rdx) + vextractf64x2 $0, %ymm1, (%r18) + + vextracti32x4 $0, %ymm1, %xmm2 + vextracti32x4 $0, %ymm1, (%rdx) + vextracti32x4 $0, %ymm17, %xmm2 + vextracti32x4 $0, %ymm17, (%rdx) + vextracti32x4 $0, %ymm1, (%r18) + + vextracti64x2 $0, %ymm1, %xmm2 + vextracti64x2 $0, %ymm1, (%rdx) + vextracti64x2 $0, %ymm17, %xmm2 + vextracti64x2 $0, %ymm17, (%rdx) + vextracti64x2 $0, %ymm1, (%r18) + +vextract_NNxM_YMM: + vextractf32x8 $0, %zmm1, %ymm2 + vextractf32x8 $0, %zmm1, (%rdx) + vextractf32x8 $0, %zmm17, %ymm2 + vextractf32x8 $0, %zmm17, (%rdx) + vextractf32x8 $0, %zmm1, (%r18) + + vextractf64x4 $0, %zmm1, %ymm2 + vextractf64x4 $0, %zmm1, (%rdx) + vextractf64x4 $0, %zmm17, %ymm2 + vextractf64x4 $0, %zmm17, (%rdx) + vextractf64x4 $0, %zmm1, (%r18) + + vextracti32x8 $0, %zmm1, %ymm2 + vextracti32x8 $0, %zmm1, (%rdx) + vextracti32x8 $0, %zmm17, %ymm2 + vextracti32x8 $0, %zmm17, (%rdx) + vextracti32x8 $0, %zmm1, (%r18) + + vextracti64x4 $0, %zmm1, %ymm2 + vextracti64x4 $0, %zmm1, (%rdx) + vextracti64x4 $0, %zmm17, %ymm2 + vextracti64x4 $0, %zmm17, (%rdx) + vextracti64x4 $0, %zmm1, (%r18) diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp index 61fafd07a8c..ad713dc59ae 100644 --- a/gas/testsuite/gas/i386/x86-64.exp +++ b/gas/testsuite/gas/i386/x86-64.exp @@ -596,6 +596,7 @@ run_list_test "x86-64-optimize-pextr" "-O -aln" run_dump_test "x86-64-optimize-pextr" run_list_test "x86-64-optimize-extractps" "-O -aln" run_dump_test "x86-64-optimize-extractps" +run_dump_test "x86-64-optimize-vextractNN" run_dump_test "x86-64-apx-ndd-optimize" run_dump_test "x86-64-align-branch-1a" run_dump_test "x86-64-align-branch-1b" diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index bc48611cae0..29289ca467f 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1664,7 +1664,7 @@ vcvttps2dq, 0xf35b, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { Unspecified|BaseIndex vcvtts2si, 0x2c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf, { |Unspecified|BaseIndex|RegXMM, Reg32|Reg64 } vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } -vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM } +vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf|Optimize, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM } vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf|Optimize, { Imm8, RegXMM, Reg64 } vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } @@ -1864,7 +1864,7 @@ vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|Ve vpermpd, 0x6601, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM } vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM } vpermq, 0x6600, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM } -vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM } +vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf|Optimize, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM } vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM } vpmaskmov, 0x668e, AVX2, Modrm|Vex|Space0F38|Src1VVVV||CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex } vpmaskmov, 0x668c, AVX2, Modrm|Vex|Space0F38|Src1VVVV||CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM } @@ -2356,11 +2356,11 @@ vpexpandq, 0x6689, AVX512F, Modrm|Masking|Space0F38|VexW=2|Disp8MemShift=3|Check vexpandps, 0x6688, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=2|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM } vpexpandd, 0x6689, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=2|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM } -vextractf32x4, 0x6619, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } -vextracti32x4, 0x6639, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } +vextractf32x4, 0x6619, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } +vextracti32x4, 0x6639, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } -vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } -vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } +vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } +vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } vfixupimmp, 0x6654, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV||Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM||Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vfixupimms, 0x6655, AVX512F, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV||Disp8MemShift|NoSuf|SAE, { Imm8|Imm8S, RegXMM||Unspecified|BaseIndex, RegXMM, RegXMM } @@ -2814,16 +2814,16 @@ vcvttps2uqq, 0x6678, AVX512DQ&AVX512VL, Modrm|EVex256|Masking|Space0F|VexW0|Broa vcvtuqq2ps, 0xf27a, AVX512DQ&, Modrm||Masking|Space0F|VexW1|Broadcast|NoSuf|, { |Qword, } -vextractf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } -vextracti32x8, 0x663B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } +vextractf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } +vextracti32x8, 0x663B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex } vinsertf32x8, 0x661A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM } vinserti32x8, 0x663A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM } vpextr, 0x6616, AVX512DQ&, Modrm|EVex128|Space0F3A||Disp8MemShift|NoSuf|Optimize, { Imm8, RegXMM, |Unspecified|BaseIndex } vpinsr, 0x6622, AVX512DQ&, Modrm|EVex128|Space0F3A|Src1VVVV||Disp8MemShift|NoSuf, { Imm8, |Unspecified|BaseIndex, RegXMM, RegXMM } -vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } -vextracti64x2, 0x6639, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } +vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } +vextracti64x2, 0x6639, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex } vinsertf64x2, 0x6618, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM } vinserti64x2, 0x6638, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 9ab2b544273..5e04c5dee69 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -25952,7 +25952,7 @@ static const insn_template i386_optab[] = 1, 1, 0, 0, 0, 0 } } } }, { MN_vextractf128, 0x19, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -30722,7 +30722,7 @@ static const insn_template i386_optab[] = 0, 1, 1, 0, 0, 0 } } } }, { MN_vextracti128, 0x39, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }, { { 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -37370,7 +37370,7 @@ static const insn_template i386_optab[] = 1, 1, 1, 0, 0, 0 } } } }, { MN_vextractf32x4, 0x19, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 5, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 5, 1, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -37382,7 +37382,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 1, 0 } } } }, { MN_vextracti32x4, 0x39, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 5, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 5, 1, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -37394,7 +37394,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 1, 0 } } } }, { MN_vextractf64x4, 0x1b, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 1, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -37406,7 +37406,7 @@ static const insn_template i386_optab[] = 0, 1, 0, 0, 1, 0 } } } }, { MN_vextracti64x4, 0x3b, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 1, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -40794,7 +40794,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_vextractf32x8, 0x1b, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0 }, { { 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -40806,7 +40806,7 @@ static const insn_template i386_optab[] = 0, 1, 0, 0, 1, 0 } } } }, { MN_vextracti32x8, 0x3b, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0 }, { { 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -40846,7 +40846,7 @@ static const insn_template i386_optab[] = 0, 0, 1, 0, 0, 0 } } } }, { MN_vextractf64x2, 0x19, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, 0, 5, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, 0, 5, 1, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0 }, { { 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -40858,7 +40858,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 1, 0 } } } }, { MN_vextracti64x2, 0x39, 3, SPACE_0F3A, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, 0, 5, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, 0, 5, 1, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0 }, { { 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },