]> git.ipfire.org Git - thirdparty/binutils-gdb.git/commitdiff
x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible
authorJan Beulich <jbeulich@suse.com>
Fri, 24 Nov 2023 08:55:29 +0000 (09:55 +0100)
committerJan Beulich <jbeulich@suse.com>
Fri, 24 Nov 2023 08:55:29 +0000 (09:55 +0100)
Deal with what 58bceb182740 ("x86: prefer VEX encodings over EVEX ones
when possible") left out, for being slightly less straightforward.

gas/config/tc-i386.c
gas/testsuite/gas/i386/avx-vex.l
gas/testsuite/gas/i386/avx-vex.s
opcodes/i386-opc.tbl

index 2651cd2d25a0b8903d8d5c3718f3e714696eb558..71e0c4d06351fa8d04cc78eaead88b1eed01da40 100644 (file)
@@ -7475,20 +7475,27 @@ match_template (char mnem_suffix)
        }
 
       /* Check whether to use the shorter VEX encoding for certain insns where
-        the EVEX enconding comes first in the table.  This requires the respective
-        AVX-* feature to be explicitly enabled.  */
-      if (t == current_templates->start
+        the EVEX encoding comes first in the table.  This requires the respective
+        AVX-* feature to be explicitly enabled.
+
+        Most of the respective insns have just a single EVEX and a single VEX
+        template.  The one that's presently different is generated using the
+        Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
+        two of which may fall back to their two corresponding VEX forms.  */
+      j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
+      if ((t == current_templates->start || j > 1)
          && t->opcode_modifier.disp8memshift
          && !t->opcode_modifier.vex
          && !need_evex_encoding ()
-         && t + 1 < current_templates->end
-         && t[1].opcode_modifier.vex)
+         && t + j < current_templates->end
+         && t[j].opcode_modifier.vex)
        {
          i386_cpu_flags cpu;
          unsigned int memshift = i.memshift;
 
          i.memshift = 0;
-         cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags);
+         cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
+                              cpu_arch_isa_flags);
          if (!cpu_flags_all_zero (&cpu)
              && (!i.types[0].bitfield.disp8
                  || !operand_type_check (i.types[0], disp)
@@ -7496,6 +7503,7 @@ match_template (char mnem_suffix)
                  || fits_in_disp8 (i.op[0].disps->X_add_number)))
            {
              specific_error = progress (internal_error);
+             t += j - 1;
              continue;
            }
          i.memshift = memshift;
index e409be39bae3f27d5c0c65656cef2543a00a94a1..d93768e2d624962350ed453aaa41ffeb06d856e4 100644 (file)
@@ -27,7 +27,8 @@
 [      ]*[0-9]+[       ]+\?\?\?\? 62F2F538     >  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+B410
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+724808
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E38     >  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
 [      ]*[0-9]+[       ]+7208
+[      ]*[0-9]+[       ]+> *
+[      ]*[0-9]+[       ]+>  \.intel_syntax noprefix
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E08     >  vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
 #...
 [      ]*[0-9]+[       ]+>  \.arch \.noavx512vl
 [      ]*[0-9]+[       ]+> *
@@ -56,7 +63,8 @@
 [      ]*[0-9]+[       ]+00
 [      ]*[0-9]+[       ]+>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+88000100 *
 [      ]*[0-9]+[       ]+00
 [      ]*[0-9]+[       ]+>  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
+[      ]*[0-9]+[       ]+> *
+[      ]*[0-9]+[       ]+>  \.intel_syntax noprefix
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27A72     >  vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+01
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+01
 #...
 [      ]*[0-9]+[       ]+>  \.arch \.noavx512f
 [      ]*[0-9]+[       ]+> *
@@ -85,7 +99,8 @@
 [      ]*[0-9]+[       ]+00
 [      ]*[0-9]+[       ]+>  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+\?\?\?\? 62F2F538     >  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+B410
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+724808
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E38     >  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
 [      ]*[0-9]+[       ]+7208
+[      ]*[0-9]+[       ]+> *
+[      ]*[0-9]+[       ]+>  \.intel_syntax noprefix
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E08     >  vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
 #...
 [      ]*[0-9]+[       ]+>  \.arch \.avx_ifma
 [      ]*[0-9]+[       ]+> *
 [      ]*[0-9]+[       ]+\?\?\?\? 62F2F538     >  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+B410
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+72C8
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+724808
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E38     >  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
 [      ]*[0-9]+[       ]+7208
+[      ]*[0-9]+[       ]+> *
+[      ]*[0-9]+[       ]+>  \.intel_syntax noprefix
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E08     >  vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
+[      ]*[0-9]+[       ]+\?\?\?\? 62F27E28     >  vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+7201
 #...
 [      ]*[0-9]+[       ]+>  \.arch \.avx_ne_convert
 [      ]*[0-9]+[       ]+> *
 [      ]*[0-9]+[       ]+\?\?\?\? 62F2F538     >  vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2
 [      ]*[0-9]+[       ]+B410
 [      ]*[0-9]+[       ]+> *
-[      ]*[0-9]+[       ]+>.*
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16 %ymm0,%xmm1
+[      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y %ymm0,%xmm1
 [      ]*[0-9]+[       ]+C8
 [      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16y 0x20\(%eax\),%xmm1
 [      ]*[0-9]+[       ]+724808
 [      ]*[0-9]+[       ]+\?\?\?\? 62F27E38     >  vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1
 [      ]*[0-9]+[       ]+7208
+[      ]*[0-9]+[       ]+> *
+[      ]*[0-9]+[       ]+>  \.intel_syntax noprefix
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27A72     >  vcvtneps2bf16 xmm0,xmmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+01
+[      ]*[0-9]+[       ]+\?\?\?\? C4E27E72     >  vcvtneps2bf16 xmm0,ymmword ptr \[ecx\]
+[      ]*[0-9]+[       ]+01
 #pass
index 1d87738c98f1658e7535f30b629c09b462087287..0490853ed1706023554bf86b6198a0c9a6ea0732 100644 (file)
        vpmadd52luq     0x100(%eax), %ymm1, %ymm2
        vpmadd52luq     (%eax){1to4}, %ymm1, %ymm2
 
-#      vcvtneps2bf16   %ymm0, %xmm1
+       vcvtneps2bf16   %ymm0, %xmm1
        vcvtneps2bf16y  %ymm0, %xmm1
        vcvtneps2bf16y  0x20(%eax), %xmm1
        vcvtneps2bf16y  0x100(%eax), %xmm1
        vcvtneps2bf16y  (%eax){1to8}, %xmm1
 
+       .intel_syntax noprefix
+       vcvtneps2bf16   xmm0, xmmword ptr [ecx]
+       vcvtneps2bf16   xmm0, ymmword ptr [ecx]
+       .att_syntax prefix
+
        .endr
index 167c0a0249f7e95f4ab46ef963a44883ece46381..b170d70d69a07e0ce9a71b4991ddfe282810ec5b 100644 (file)
@@ -1481,6 +1481,9 @@ gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Uns
     true_us:1f:C>
 
 // <Vxy> is used for VEX instructions with x/y suffixes.
+// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain
+//       in sync with <Exy>, for match_template()'s EVEX-to-VEX lowering to
+//       continue to work.
 <Vxy:vex:syntax:src, +
     $i:Vex:IntelSyntax:RegXMM|RegYMM|Unspecified|BaseIndex, +
     $a:Vex:ATTSyntax:RegXMM|RegYMM, +
@@ -2097,6 +2100,9 @@ vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ&(AVX|AVX512F), Modrm|Space0F3A|Vex|EVexDY
 // AVX512F instructions.
 
 // <Exy> is used for EVEX instructions with x/y suffixes.
+// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain
+//       in sync with <Vxy>, for match_template()'s EVEX-to-VEX lowering to
+//       continue to work.
 <Exy:vl:attr:sr:sae:src:dst, +
     $z::EVex512|Disp8MemShift=6:StaticRounding|SAE:SAE:RegZMM|Unspecified|BaseIndex:RegYMM, +
     $i:AVX512VL:Disp8ShiftVL|IntelSyntax:::RegXMM|RegYMM|Unspecified|BaseIndex:RegXMM, +