]> git.ipfire.org Git - thirdparty/binutils-gdb.git/commitdiff
x86: Check XMM destination when optimizing 128-bit VPBROADCASTQ
authorH.J. Lu <hjl.tools@gmail.com>
Wed, 27 May 2026 00:39:16 +0000 (08:39 +0800)
committerH.J. Lu <hjl.tools@gmail.com>
Wed, 27 May 2026 23:09:29 +0000 (07:09 +0800)
commit eb4031cb20aa710834be891f8638e04dbba81edc
Author: Jan Beulich <jbeulich@suse.com>
Date:   Tue Jul 4 17:07:26 2023 +0200

    x86: optimize 128-bit VPBROADCASTQ to VPUNPCKLQDQ

was supposed to optimize

vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)

But it didn't check if the destination operand is XMM.  As the result, it
turned:

vpbroadcastq %xmmN, %ymmM

into

vpunpcklqdq %xmmN, %xmmN, %xmmM

Fixing it by checking XMM destination.

PR gas/34171
* config/tc-i386.c (optimize_encoding): Check XMM destination
when optimizing 128-bit VPBROADCASTQ.
* testsuite/gas/i386/optimize-2.d: Updated.
* testsuite/gas/i386/optimize-2.s: Add 256-bit vpbroadcastq.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
gas/config/tc-i386.c
gas/testsuite/gas/i386/optimize-2.d
gas/testsuite/gas/i386/optimize-2.s

index 988b024f0b883399697f163f08c5951ae8b9788d..365c2ee95f5c58cba7c8116d80fad7ebb9ddeccc 100644 (file)
@@ -5802,6 +5802,7 @@ optimize_encoding (void)
           && i.tm.opcode_modifier.vex
           && !(i.op[0].regs->reg_flags & RegRex)
           && i.op[0].regs->reg_type.bitfield.xmmword
+          && i.op[1].regs->reg_type.bitfield.xmmword
           && pp.encoding != encoding_vex3)
     {
       /* Optimize: -Os:
index 2738b84b80d34cb4d537e17b6a396d524f7f9987..3c90cc9b178899ffeabce41a7567095198a22d25 100644 (file)
@@ -198,4 +198,5 @@ Disassembly of section .text:
  +[a-f0-9]+:   c5 .*   vpaddq %xmm2,%xmm2,%xmm3
  +[a-f0-9]+:   62 .*   vpaddq %zmm2,%zmm2,%zmm3
  +[a-f0-9]+:   c5 .*   vpunpcklqdq %xmm2,%xmm2,%xmm0
+ +[a-f0-9]+:   c4 .*   vpbroadcastq %xmm2,%ymm0
 #pass
index b2b1cc112dffa972fc797b3009b86649638dd578..80a46eab4858f9ff22dcad2d3dd4c46907110aae 100644 (file)
@@ -233,3 +233,4 @@ _start:
        vpsllq  $1, %zmm2, %zmm3
 
        vpbroadcastq    %xmm2, %xmm0
+       vpbroadcastq    %xmm2, %ymm0