From c8977cf5f2daa9fecfc5d67a737506d0d31c578a Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 10 May 2023 11:50:01 +0100 Subject: [PATCH] aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero Moving onto the saturating instructions, this one goes through the simple add/sub ones. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_q): Rename to... (aarch64_q): ... This. (aarch64_qadd): Rename to... (aarch64_qadd): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_1.c: Add testing for qadd, qsub. * gcc.target/aarch64/simd/pr99195_6.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 4 +-- .../gcc.target/aarch64/simd/pr99195_1.c | 18 +++++------ .../gcc.target/aarch64/simd/pr99195_6.c | 30 +++++++++++++++++++ 3 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c1d51e366a3d..dc6efa0fe815 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5236,7 +5236,7 @@ ) ;; q -(define_insn "aarch64_q" +(define_insn "aarch64_q" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") (match_operand:VSDQ_I 2 "register_operand" "w")))] @@ -5247,7 +5247,7 @@ ;; suqadd and usqadd -(define_insn "aarch64_qadd" +(define_insn "aarch64_qadd" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") (match_operand:VSDQ_I 2 "register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index 5801598d4293..4e6b3412749c 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -57,17 +57,17 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11) OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14) -#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17) \ -OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ -OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17) +#define OPNINETEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17,OP18,OP19) \ +OPEIGHT (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7, OP8) \ +OPELEVEN (T, IS, OS, S, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17, OP18, OP19) -OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c new file mode 100644 index 000000000000..52ad2709400a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c @@ -0,0 +1,30 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +#define MYOP(OT,IT1,IT2,OP,OS) \ +OT \ +foo_##OP##_##OS (IT1 a, IT2 b) \ +{ \ + IT1 zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##OS (a, b), zeros); \ +} + +MYOP (int8x16_t, int8x8_t, uint8x8_t, uqadd, s8) +MYOP (int16x8_t, int16x4_t, uint16x4_t, uqadd, s16) +MYOP (int32x4_t, int32x2_t, uint32x2_t, uqadd, s32) +MYOP (int64x2_t, int64x1_t, uint64x1_t, uqadd, s64) + +MYOP (uint8x16_t, uint8x8_t, int8x8_t, sqadd, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ + -- 2.47.2