From: Andre Vieira Date: Fri, 3 Nov 2023 19:09:07 +0000 (+0000) Subject: vect: allow using inbranch simdclones for masked loops X-Git-Tag: basepoints/gcc-15~5026 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=aed00696a01ac065e9ed327434ec29d1cf50179e;p=thirdparty%2Fgcc.git vect: allow using inbranch simdclones for masked loops In a previous patch I did most of the work for this, but forgot to change the check for number of arguments matching between call and simdclone. This check should accept calls without a mask to be matched against simdclones with mask arguments. I also added tests to verify this feature actually works. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_simd_clone_call): Allow unmasked calls to use masked simdclones. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-simd-clone-20.c: New file. * gfortran.dg/simd-builtins-1.h: Adapt. * gfortran.dg/simd-builtins-6.f90: Adapt. --- diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c new file mode 100644 index 000000000000..9f51a68f3a0c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c @@ -0,0 +1,87 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd inbranch +TYPE __attribute__((noinline)) +foo (TYPE a) +{ + return a + 1; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noipa)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = foo(a[i]); +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noipa)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = foo(a[i]); +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (b[i] != (TYPE)(i + 1)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support them. */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { x86_64*-*-* } } } } */ + +/* The LTO test produces two dump files and we scan the wrong one. */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gfortran.dg/simd-builtins-1.h b/gcc/testsuite/gfortran.dg/simd-builtins-1.h index 88d555cf41ad..08b73514a67d 100644 --- a/gcc/testsuite/gfortran.dg/simd-builtins-1.h +++ b/gcc/testsuite/gfortran.dg/simd-builtins-1.h @@ -1,4 +1,3 @@ -!GCC$ builtin (sin) attributes simd (inbranch) !GCC$ builtin (sinf) attributes simd (notinbranch) !GCC$ builtin (cosf) attributes simd !GCC$ builtin (cosf) attributes simd (notinbranch) diff --git a/gcc/testsuite/gfortran.dg/simd-builtins-6.f90 b/gcc/testsuite/gfortran.dg/simd-builtins-6.f90 index 60bcac78f3e0..2c68f9f1818a 100644 --- a/gcc/testsuite/gfortran.dg/simd-builtins-6.f90 +++ b/gcc/testsuite/gfortran.dg/simd-builtins-6.f90 @@ -2,7 +2,6 @@ ! { dg-additional-options "-nostdinc -Ofast -fdump-tree-optimized" } ! { dg-additional-options "-msse2 -mno-avx" { target i?86-*-linux* x86_64-*-linux* } } -!GCC$ builtin (sin) attributes simd (inbranch) !GCC$ builtin (sinf) attributes simd (notinbranch) !GCC$ builtin (cosf) attributes simd !GCC$ builtin (cosf) attributes simd (notinbranch) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index d374907095b5..f895aaf3083a 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -4149,10 +4149,19 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { unsigned int this_badness = 0; unsigned int num_calls; + /* The number of arguments in the call and the number of parameters in + the simdclone should match. However, when the simdclone is + 'inbranch', it could have one more paramater than nargs when using + an inbranch simdclone to call a non-inbranch call, either in a + non-masked loop using a all true constant mask, or inside a masked + loop using it's mask. */ + size_t simd_nargs = n->simdclone->nargs; + if (!masked_call_offset && n->simdclone->inbranch) + simd_nargs--; if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen, &num_calls) || (!n->simdclone->inbranch && (masked_call_offset > 0)) - || nargs != n->simdclone->nargs) + || (nargs != simd_nargs)) continue; if (num_calls != 1) this_badness += exact_log2 (num_calls) * 4096;