From: Jan Beulich Date: Fri, 24 Nov 2023 08:55:29 +0000 (+0100) Subject: x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible X-Git-Tag: binutils-2_42~835 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=39bb3ade816faf42ec34cc7ca962ad350cb4d16a;p=thirdparty%2Fbinutils-gdb.git x86: also prefer VEX encoding over EVEX one for VCVTNEPS2BF16 when possible Deal with what 58bceb182740 ("x86: prefer VEX encodings over EVEX ones when possible") left out, for being slightly less straightforward. --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 2651cd2d25a..71e0c4d0635 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -7475,20 +7475,27 @@ match_template (char mnem_suffix) } /* Check whether to use the shorter VEX encoding for certain insns where - the EVEX enconding comes first in the table. This requires the respective - AVX-* feature to be explicitly enabled. */ - if (t == current_templates->start + the EVEX encoding comes first in the table. This requires the respective + AVX-* feature to be explicitly enabled. + + Most of the respective insns have just a single EVEX and a single VEX + template. The one that's presently different is generated using the + Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter + two of which may fall back to their two corresponding VEX forms. */ + j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2; + if ((t == current_templates->start || j > 1) && t->opcode_modifier.disp8memshift && !t->opcode_modifier.vex && !need_evex_encoding () - && t + 1 < current_templates->end - && t[1].opcode_modifier.vex) + && t + j < current_templates->end + && t[j].opcode_modifier.vex) { i386_cpu_flags cpu; unsigned int memshift = i.memshift; i.memshift = 0; - cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags); + cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu), + cpu_arch_isa_flags); if (!cpu_flags_all_zero (&cpu) && (!i.types[0].bitfield.disp8 || !operand_type_check (i.types[0], disp) @@ -7496,6 +7503,7 @@ match_template (char mnem_suffix) || fits_in_disp8 (i.op[0].disps->X_add_number))) { specific_error = progress (internal_error); + t += j - 1; continue; } i.memshift = memshift; diff --git a/gas/testsuite/gas/i386/avx-vex.l b/gas/testsuite/gas/i386/avx-vex.l index e409be39bae..d93768e2d62 100644 --- a/gas/testsuite/gas/i386/avx-vex.l +++ b/gas/testsuite/gas/i386/avx-vex.l @@ -27,7 +27,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -36,6 +37,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512vl [ ]*[0-9]+[ ]+> * @@ -56,7 +63,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -65,6 +73,12 @@ [ ]*[0-9]+[ ]+88000100 * [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #... [ ]*[0-9]+[ ]+> \.arch \.noavx512f [ ]*[0-9]+[ ]+> * @@ -85,7 +99,8 @@ [ ]*[0-9]+[ ]+00 [ ]*[0-9]+[ ]+> vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -113,7 +128,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -122,6 +138,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ifma [ ]*[0-9]+[ ]+> * @@ -141,7 +163,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+72C8 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -150,6 +173,12 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E08 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 +[ ]*[0-9]+[ ]+\?\?\?\? 62F27E28 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+7201 #... [ ]*[0-9]+[ ]+> \.arch \.avx_ne_convert [ ]*[0-9]+[ ]+> * @@ -169,7 +198,8 @@ [ ]*[0-9]+[ ]+\?\?\?\? 62F2F538 > vpmadd52luq \(%eax\)\{1to4\},%ymm1,%ymm2 [ ]*[0-9]+[ ]+B410 [ ]*[0-9]+[ ]+> * -[ ]*[0-9]+[ ]+>.* +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 %ymm0,%xmm1 +[ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y %ymm0,%xmm1 [ ]*[0-9]+[ ]+C8 [ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16y 0x20\(%eax\),%xmm1 @@ -178,4 +208,10 @@ [ ]*[0-9]+[ ]+724808 [ ]*[0-9]+[ ]+\?\?\?\? 62F27E38 > vcvtneps2bf16y \(%eax\)\{1to8\},%xmm1 [ ]*[0-9]+[ ]+7208 +[ ]*[0-9]+[ ]+> * +[ ]*[0-9]+[ ]+> \.intel_syntax noprefix +[ ]*[0-9]+[ ]+\?\?\?\? C4E27A72 > vcvtneps2bf16 xmm0,xmmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 +[ ]*[0-9]+[ ]+\?\?\?\? C4E27E72 > vcvtneps2bf16 xmm0,ymmword ptr \[ecx\] +[ ]*[0-9]+[ ]+01 #pass diff --git a/gas/testsuite/gas/i386/avx-vex.s b/gas/testsuite/gas/i386/avx-vex.s index 1d87738c98f..0490853ed17 100644 --- a/gas/testsuite/gas/i386/avx-vex.s +++ b/gas/testsuite/gas/i386/avx-vex.s @@ -14,10 +14,15 @@ vpmadd52luq 0x100(%eax), %ymm1, %ymm2 vpmadd52luq (%eax){1to4}, %ymm1, %ymm2 -# vcvtneps2bf16 %ymm0, %xmm1 + vcvtneps2bf16 %ymm0, %xmm1 vcvtneps2bf16y %ymm0, %xmm1 vcvtneps2bf16y 0x20(%eax), %xmm1 vcvtneps2bf16y 0x100(%eax), %xmm1 vcvtneps2bf16y (%eax){1to8}, %xmm1 + .intel_syntax noprefix + vcvtneps2bf16 xmm0, xmmword ptr [ecx] + vcvtneps2bf16 xmm0, ymmword ptr [ecx] + .att_syntax prefix + .endr diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 167c0a0249f..b170d70d69a 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1481,6 +1481,9 @@ gf2p8mulb, 0x660f38cf, GFNI, Modrm||NoSuf, { RegXMM|Uns true_us:1f:C> // is used for VEX instructions with x/y suffixes. +// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain +// in sync with , for match_template()'s EVEX-to-VEX lowering to +// continue to work. is used for EVEX instructions with x/y suffixes. +// NOTE: The order of the "unnamed" ($-prefixed) entries here needs to remain +// in sync with , for match_template()'s EVEX-to-VEX lowering to +// continue to work.