From: Kyrylo Tkachov Date: Wed, 31 May 2023 16:46:19 +0000 (+0100) Subject: aarch64: PR target/99195 Annotate dot-product patterns for vec-concat-zero X-Git-Tag: basepoints/gcc-15~8691 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d0c064c3eabc75cf83df296ebcd1db19b4a68851;p=thirdparty%2Fgcc.git aarch64: PR target/99195 Annotate dot-product patterns for vec-concat-zero This straightforward patch annotates the dotproduct instructions, including the i8mm ones. Tests included. Nothing unexpected here. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (dot_prod): Rename to... (dot_prod): ... This. (usdot_prod): Rename to... (usdot_prod): ... This. (aarch64_dot_lane): Rename to... (aarch64_dot_lane): ... This. (aarch64_dot_laneq): Rename to... (aarch64_dot_laneq): ... This. (aarch64_dot_lane): Rename to... (aarch64_dot_lane): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_11.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1efae8d5e683..4904a50658bd 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -665,7 +665,7 @@ ;; ... ;; ;; and so the vectorizer provides r, in which the result has to be accumulated. -(define_insn "dot_prod" +(define_insn "dot_prod" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 1 "register_operand" "w") @@ -679,7 +679,7 @@ ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot ;; (vector) Dot Product operation and the vectorized optab. -(define_insn "usdot_prod" +(define_insn "usdot_prod" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 1 "register_operand" "w") @@ -693,7 +693,7 @@ ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. -(define_insn "aarch64_dot_lane" +(define_insn "aarch64_dot_lane" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 2 "register_operand" "w") @@ -709,7 +709,7 @@ [(set_attr "type" "neon_dot")] ) -(define_insn "aarch64_dot_laneq" +(define_insn "aarch64_dot_laneq" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 2 "register_operand" "w") @@ -727,7 +727,7 @@ ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot ;; (by element) Dot Product operations. -(define_insn "aarch64_dot_lane" +(define_insn "aarch64_dot_lane" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 2 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c new file mode 100644 index 000000000000..1ca8c6abe5dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c @@ -0,0 +1,38 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8.2-a+dotprod" } */ + +#include + +#define DOTPROD(OT,AT,IT1,IT2,OP,S) \ +OT \ +foo_##OP##_##S (AT a, IT1 b, IT2 c) \ +{ \ + AT zeros = vcreate_##S (0); \ + return vcombine_##S (v##OP##_##S (a, b, c), zeros); \ +} + +#define DOTPROD_IDX(OT,AT,IT1,IT2,OP,S) \ +OT \ +foo_##OP##_##S (AT a, IT1 b, IT2 c) \ +{ \ + AT zeros = vcreate_##S (0); \ + return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros); \ +} + +DOTPROD (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot, s32) +DOTPROD (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot, u32) +DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot_lane, s32) +DOTPROD_IDX (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot_lane, u32) + +#pragma GCC target ("+i8mm") +DOTPROD (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot, s32) +DOTPROD_IDX (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot_lane, s32) +DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, uint8x8_t, sudot_lane, s32) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ +