From: Christophe Lyon Date: Wed, 9 Jun 2021 16:07:43 +0000 (+0000) Subject: arm: Auto-vectorization for MVE: vclz X-Git-Tag: basepoints/gcc-13~6978 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7969d9c83d061e57ea80795768469cffb1a859f8;p=thirdparty%2Fgcc.git arm: Auto-vectorization for MVE: vclz This patch adds support for auto-vectorization of clz for MVE. It does so by removing the unspec from mve_vclzq_ and uses 'clz' instead. It moves to neon_vclz expander from neon.md to vec-common.md and renames it into the standard name clz2. 2021-06-09 Christophe Lyon gcc/ * config/arm/iterators.md (): Remove VCLZQ_U, VCLZQ_S. (VCLZQ): Remove. * config/arm/mve.md (mve_vclzq_): Add '@' prefix, remove iterator. (mve_vclzq_u): New. * config/arm/neon.md (clz2): Rename to neon_vclz. (neon_vclz" +(define_insn "@mve_vclzq_s" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VCLZQ)) + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vclz.i%# %q0, %q1" [(set_attr "type" "mve_move") ]) +(define_expand "mve_vclzq_u" + [ + (set (match_operand:MVE_2 0 "s_register_operand") + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) + ] + "TARGET_HAVE_MVE" +) ;; ;; [vclsq_s]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 18571d819eb0..0fdffaf4ec48 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3018,7 +3018,7 @@ [(set_attr "type" "neon_cls")] ) -(define_insn "clz2" +(define_insn "neon_vclz" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" @@ -3026,15 +3026,6 @@ [(set_attr "type" "neon_cnt")] ) -(define_expand "neon_vclz" - [(match_operand:VDQIW 0 "s_register_operand") - (match_operand:VDQIW 1 "s_register_operand")] - "TARGET_NEON" -{ - emit_insn (gen_clz2 (operands[0], operands[1])); - DONE; -}) - (define_insn "popcount2" [(set (match_operand:VE 0 "s_register_operand" "=w") (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ed1bc293b784..ad1c6edd0055 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -556,8 +556,6 @@ VQABSQ_S VDUPQ_N_U VDUPQ_N_S - VCLZQ_U - VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2779c1a8aaa2..430a92ce966d 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -625,3 +625,10 @@ operands[0], operands[1], operands[2])); DONE; }) + +(define_expand "clz2" + [(set (match_operand:VDQIW 0 "s_register_operand") + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))] + "ARM_HAVE__ARITH + && !TARGET_REALLY_IWMMXT" +) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c new file mode 100644 index 000000000000..7068736bc283 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include + +#define FUNC(SIGN, TYPE, BITS, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ + } \ +} + +FUNC(s, int, 32, clz) +FUNC(u, uint, 32, clz) +FUNC(s, int, 16, clz) +FUNC(u, uint, 16, clz) +FUNC(s, int, 8, clz) +FUNC(u, uint, 8, clz) + +/* 16 and 8-bit versions are not vectorized because they need pack/unpack + patterns since __builtin_clz uses 32-bit parameter and return value. */ +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */