From: Jakub Jelinek Date: Thu, 1 Aug 2024 08:32:54 +0000 (+0200) Subject: i386: Fix up *_vinsert_0 [PR115981] X-Git-Tag: basepoints/gcc-16~7080 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=df2b444a233e93b987adec76655ab89589b3fa10;p=thirdparty%2Fgcc.git i386: Fix up *_vinsert_0 [PR115981] The r14-537 change started canonicalizing VEC_MERGE operands based on swap_commutative_operands_p or if they have the same precedence least significant bit of the third operand. The *_vinsert_0 pattern was added for combine matching and no longer triggers after that change, as it used the reg_or_0_operand as the first operand and VEC_DUPLICATE as the second. Now, reg_or_0_operand could be a REG, SUBREG of object or CONST_VECTOR. REG has commutative_operand_precedence -1 or -2, SUBREG of object -3, CONST_VECTOR -4, while VEC_DUPLICATE has 0, so VEC_DUPLICATE will always go first and REG, SUBREG or CONST_VECTOR second. This patch swaps the operands so that it matches again. 2024-08-01 Jakub Jelinek PR target/115981 * config/i386/sse.md (*_vinsert_0): Swap the first two VEC_MERGE operands, renumber match_operands and test for 0xF or 0x3 rather than 0xFFF0 or 0xFC immediate. * gcc.target/i386/avx512dq-pr90991-1.c: Add tests for no separate zero extension instructions. * gcc.target/i386/avx512dq-pr90991-2.c: Likewise. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f54e966bdbb..baaec689749 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19692,47 +19692,47 @@ (define_insn "*_vinsert_0" [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv") (vec_merge:AVX512_VEC - (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C") (vec_duplicate:AVX512_VEC - (match_operand: 2 "nonimmediate_operand" "vm,xm,vm")) + (match_operand: 1 "nonimmediate_operand" "vm,xm,vm")) + (match_operand:AVX512_VEC 2 "reg_or_0_operand" "v,C,C") (match_operand:SI 3 "const_int_operand")))] "TARGET_AVX512F && (INTVAL (operands[3]) - == (GET_MODE_UNIT_SIZE (mode) == 4 ? 0xFFF0 : 0xFC))" + == (GET_MODE_UNIT_SIZE (mode) == 4 ? 0xF : 0x3))" { if (which_alternative == 0) - return "vinsert\t{$0, %2, %1, %0|%0, %1, %2, 0}"; + return "vinsert\t{$0, %1, %2, %0|%0, %2, %1, 0}"; bool egpr_used = (TARGET_APX_EGPR - && x86_extended_rex2reg_mentioned_p (operands[2])); - const char *align_templ = egpr_used ? "vmovaps\t{%2, %x0|%x0, %2}" - : "vmovdqa\t{%2, %x0|%x0, %2}"; - const char *unalign_templ = egpr_used ? "vmovups\t{%2, %x0|%x0, %2}" - : "vmovdqu\t{%2, %x0|%x0, %2}"; + && x86_extended_rex2reg_mentioned_p (operands[1])); + const char *align_templ = egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}" + : "vmovdqa\t{%1, %x0|%x0, %1}"; + const char *unalign_templ = egpr_used ? "vmovups\t{%1, %x0|%x0, %1}" + : "vmovdqu\t{%1, %x0|%x0, %1}"; switch (mode) { case E_V8DFmode: - if (misaligned_operand (operands[2], mode)) - return "vmovupd\t{%2, %x0|%x0, %2}"; + if (misaligned_operand (operands[1], mode)) + return "vmovupd\t{%1, %x0|%x0, %1}"; else - return "vmovapd\t{%2, %x0|%x0, %2}"; + return "vmovapd\t{%1, %x0|%x0, %1}"; case E_V16SFmode: - if (misaligned_operand (operands[2], mode)) - return "vmovups\t{%2, %x0|%x0, %2}"; + if (misaligned_operand (operands[1], mode)) + return "vmovups\t{%1, %x0|%x0, %1}"; else - return "vmovaps\t{%2, %x0|%x0, %2}"; + return "vmovaps\t{%1, %x0|%x0, %1}"; case E_V8DImode: - if (misaligned_operand (operands[2], mode)) - return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}" + if (misaligned_operand (operands[1], mode)) + return which_alternative == 2 ? "vmovdqu64\t{%1, %x0|%x0, %1}" : unalign_templ; else - return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}" + return which_alternative == 2 ? "vmovdqa64\t{%1, %x0|%x0, %1}" : align_templ; case E_V16SImode: - if (misaligned_operand (operands[2], mode)) - return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}" + if (misaligned_operand (operands[1], mode)) + return which_alternative == 2 ? "vmovdqu32\t{%1, %x0|%x0, %1}" : unalign_templ; else - return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}" + return which_alternative == 2 ? "vmovdqa32\t{%1, %x0|%x0, %1}" : align_templ; default: gcc_unreachable (); diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c index 6c968126b7d..5d19b07bdee 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c @@ -7,6 +7,9 @@ /* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */ /* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vmovaps\[ \t]\+%xmm0, %xmm0" } } */ +/* { dg-final { scan-assembler-not "vmovapd\[ \t]\+%xmm0, %xmm0" } } */ +/* { dg-final { scan-assembler-not "vmovdqa\[ \t]\+%xmm0, %xmm0" } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c index 7699c3149ae..68f53189d3e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c @@ -7,6 +7,9 @@ /* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 } } */ /* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 } } */ +/* { dg-final { scan-assembler-not "vmovaps\[ \t]\+%ymm0, %ymm0" } } */ +/* { dg-final { scan-assembler-not "vmovapd\[ \t]\+%ymm0, %ymm0" } } */ +/* { dg-final { scan-assembler-not "vmovdqa\[ \t]\+%ymm0, %ymm0" } } */ #include