From: Jan Beulich Date: Wed, 5 Jul 2023 07:41:09 +0000 (+0200) Subject: x86: use VPTERNLOG also for certain andnot forms X-Git-Tag: basepoints/gcc-15~7829 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2d11c99dfca3cc603dbbfafb3afc41689a68e40f;p=thirdparty%2Fgcc.git x86: use VPTERNLOG also for certain andnot forms When it's the memory operand which is to be inverted, using VPANDN* requires a further load instruction. The same can be achieved by a single VPTERNLOG*. Add two new alternatives (for plain memory and embedded broadcast), adjusting the predicate for the first operand accordingly. Two pre-existing testcases actually end up being affected (improved) by the change, which is reflected in updated expectations there. gcc/ PR target/93768 * config/i386/sse.md (*andnot3): Add new alternatives for memory form operand 1. gcc/testsuite/ PR target/93768 * gcc.target/i386/avx512f-andn-di-zmm-2.c: New test. * gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations towards generated code. * gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit code. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 73a873825196..2e6994e9eb65 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17210,11 +17210,13 @@ "TARGET_AVX512F") (define_insn "*andnot3" - [(set (match_operand:VI 0 "register_operand" "=x,x,v") + [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v") (and:VI - (not:VI (match_operand:VI 1 "vector_operand" "0,x,v")) - (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))] - "TARGET_SSE" + (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br")) + (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))] + "TARGET_SSE + && (register_operand (operands[1], mode) + || register_operand (operands[2], mode))" { char buf[64]; const char *ops; @@ -17281,6 +17283,15 @@ case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + case 4: + tmp = "pternlog"; + ssesuffix = ""; + if (which_alternative != 4 || TARGET_AVX512VL) + ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}"; + else + ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}"; + break; default: gcc_unreachable (); } @@ -17289,7 +17300,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx") + [(set_attr "isa" "noavx,avx,avx,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -17297,9 +17308,12 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex,evex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_AVX2") + (cond [(and (eq_attr "alternative" "3,4") + (match_test " < 64 && !TARGET_AVX512VL")) + (const_string "XI") + (match_test "TARGET_AVX2") (const_string "") (match_test "TARGET_AVX") (if_then_else @@ -17310,7 +17324,15 @@ (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] - (const_string "")))]) + (const_string ""))) + (set (attr "enabled") + (cond [(eq_attr "alternative" "3") + (symbol_ref " == 64 || TARGET_AVX512VL") + (eq_attr "alternative" "4") + (symbol_ref " == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") + ] + (const_string "*")))]) ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn (define_split diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c new file mode 100644 index 000000000000..4ebb30fa213f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op andnot +#define suffix epi64 +#define SCALAR long long + +#include "avx512-binop-2.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c index a9608ca095d4..86e7ebe7d72e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i #define vec 512 diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c b/gcc/testsuite/gcc.target/i386/pr100711-3.c index e90f2a48d8d7..98cc1c35beb5 100644 --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b; } -/* { dg-final { scan-assembler-times "vpandn" 4 } } */ +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */