From: Jan Beulich Date: Wed, 5 Jul 2023 07:48:47 +0000 (+0200) Subject: x86: further PR target/100711-like splitting X-Git-Tag: basepoints/gcc-15~7826 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fa58c2871a1235cb5ba475303a2bd11ae90416d5;p=thirdparty%2Fgcc.git x86: further PR target/100711-like splitting With respective two-operand bitwise operations now expressable by a single VPTERNLOG, add splitters to also deal with ior and xor counterparts of the original and-only case. Note that the splitters need to be separate, as the placement of "not" differs in the final insns (*iornot3, *xnor3) which are intended to pick up one half of the result. gcc/ PR target/100711 * config/i386/sse.md: New splitters to simplify not;vec_duplicate;{ior,xor} as vec_duplicate;{iornot,xnor}. gcc/testsuite/ PR target/100711 * gcc.target/i386/pr100711-4.c: New test. * gcc.target/i386/pr100711-5.c: New test. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ab9fd675d7e3..d828143c189f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17366,6 +17366,36 @@ (match_dup 2)))] "operands[3] = gen_reg_rtx (mode);") +(define_split + [(set (match_operand:VI 0 "register_operand") + (ior:VI + (vec_duplicate:VI + (not: + (match_operand: 1 "nonimmediate_operand"))) + (match_operand:VI 2 "vector_operand")))] + " == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" + [(set (match_dup 3) + (vec_duplicate:VI (match_dup 1))) + (set (match_dup 0) + (ior:VI (not:VI (match_dup 3)) (match_dup 2)))] + "operands[3] = gen_reg_rtx (mode);") + +(define_split + [(set (match_operand:VI 0 "register_operand") + (xor:VI + (vec_duplicate:VI + (not: + (match_operand: 1 "nonimmediate_operand"))) + (match_operand:VI 2 "vector_operand")))] + " == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" + [(set (match_dup 3) + (vec_duplicate:VI (match_dup 1))) + (set (match_dup 0) + (not:VI (xor:VI (match_dup 3) (match_dup 2))))] + "operands[3] = gen_reg_rtx (mode);") + (define_insn "*andnot3_mask" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") (vec_merge:VI48_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c b/gcc/testsuite/gcc.target/i386/pr100711-4.c new file mode 100644 index 000000000000..3ca524f8a8a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__((vector_size (64))); + +v64qi foo_v64qi (char a, v64qi b) +{ + return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v32hi foo_v32hi (short a, v32hi b) +{ + return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v16si foo_v16si (int a, v16si b) +{ + return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v8di foo_v8di (long long a, v8di b) +{ + return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c b/gcc/testsuite/gcc.target/i386/pr100711-5.c new file mode 100644 index 000000000000..161fbfcc2562 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__((vector_size (64))); + +v64qi foo_v64qi (char a, v64qi b) +{ + return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v32hi foo_v32hi (short a, v32hi b) +{ + return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v16si foo_v16si (int a, v16si b) +{ + return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v8di foo_v8di (long long a, v8di b) +{ + return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 4 } } */