From: Kyrylo Tkachov Date: Wed, 10 May 2023 09:40:06 +0000 (+0100) Subject: aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero X-Git-Tag: basepoints/gcc-15~9488 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1e7f9993084b87e6676a5ccef3c8b7f807a6013;p=thirdparty%2Fgcc.git aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero This patch cleans up some almost-duplicate patterns for the XTN, SQXTN, UQXTN instructions. Using the attributes we can remove the BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN cases, as well as the intrinsic expanders that select between the two. Tests are also added. Thankfully the diffstat comes out negative \O/. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_xtn_insn_le): Delete. (aarch64_xtn_insn_be): Likewise. (trunc2): Rename to... (trunc2): ... This. (aarch64_xtn): Move under the above. Just emit the truncate RTL. (aarch64_qmovn): Likewise. (aarch64_qmovn): New define_insn. (aarch64_qmovn_insn_le): Delete. (aarch64_qmovn_insn_be): Likewise. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_4.c: Add tests for vmovn, vqmovn. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 46038889573a..9ad0489f79a7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1743,47 +1743,6 @@ ;; Narrowing operations. -(define_insn "aarch64_xtn_insn_le" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (truncate: (match_operand:VQN 1 "register_operand" "w")) - (match_operand: 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "xtn\\t%0., %1." - [(set_attr "type" "neon_move_narrow_q")] -) - -(define_insn "aarch64_xtn_insn_be" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (match_operand: 2 "aarch64_simd_or_scalar_imm_zero") - (truncate: (match_operand:VQN 1 "register_operand" "w"))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "xtn\\t%0., %1." - [(set_attr "type" "neon_move_narrow_q")] -) - -(define_expand "aarch64_xtn" - [(set (match_operand: 0 "register_operand") - (truncate: (match_operand:VQN 1 "register_operand")))] - "TARGET_SIMD" - { - rtx tmp = gen_reg_rtx (mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_xtn_insn_be (tmp, operands[1], - CONST0_RTX (mode))); - else - emit_insn (gen_aarch64_xtn_insn_le (tmp, operands[1], - CONST0_RTX (mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (mode, tmp, - mode)); - DONE; - } -) - (define_insn "aarch64_xtn2_insn_le" [(set (match_operand: 0 "register_operand" "=w") (vec_concat: @@ -5300,7 +5259,7 @@ ;; sqmovn and uqmovn -(define_insn "aarch64_qmovn" +(define_insn "aarch64_qmovn" [(set (match_operand: 0 "register_operand" "=w") (SAT_TRUNC: (match_operand:SD_HSDI 1 "register_operand" "w")))] @@ -5309,48 +5268,13 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) -(define_insn "aarch64_qmovn_insn_le" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (SAT_TRUNC: - (match_operand:VQN 1 "register_operand" "w")) - (match_operand: 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "qxtn\\t%0, %1" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] -) - -(define_insn "aarch64_qmovn_insn_be" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (match_operand: 2 "aarch64_simd_or_scalar_imm_zero") - (SAT_TRUNC: - (match_operand:VQN 1 "register_operand" "w"))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "qxtn\\t%0, %1" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] -) - -(define_expand "aarch64_qmovn" - [(set (match_operand: 0 "register_operand") +(define_insn "aarch64_qmovn" + [(set (match_operand: 0 "register_operand" "=w") (SAT_TRUNC: - (match_operand:VQN 1 "register_operand")))] + (match_operand:VQN 1 "register_operand" "w")))] "TARGET_SIMD" - { - rtx tmp = gen_reg_rtx (mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_qmovn_insn_be (tmp, operands[1], - CONST0_RTX (mode))); - else - emit_insn (gen_aarch64_qmovn_insn_le (tmp, operands[1], - CONST0_RTX (mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (mode, tmp, - mode)); - DONE; - } + "qxtn\\t%0, %1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) (define_insn "aarch64_qxtn2_le" @@ -9281,7 +9205,7 @@ ) ;; Truncate a 128-bit integer vector to a 64-bit vector. -(define_insn "trunc2" +(define_insn "trunc2" [(set (match_operand: 0 "register_operand" "=w") (truncate: (match_operand:VQN 1 "register_operand" "w")))] "TARGET_SIMD" @@ -9289,6 +9213,15 @@ [(set_attr "type" "neon_move_narrow_q")] ) +;; Expander for the intrinsics that only takes one mode unlike the two-mode +;; trunc optab. +(define_expand "aarch64_xtn" + [(set (match_operand: 0 "register_operand") + (truncate: (match_operand:VQN 1 "register_operand")))] + "TARGET_SIMD" + {} +) + (define_insn "aarch64_bfdot" [(set (match_operand:VDQSF 0 "register_operand" "=w") (plus:VDQSF diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c index b6ef15b6a972..6127cb26781b 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c @@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b) \ return vcombine_##OS (v##OP##_##IS (a, b), zeros); \ } - #define FUNC(OT,IT,IMT,IS,OS) \ MYOP (OT, IT, IMT, addhn, IS, OS) \ MYOP (OT, IT, IMT, subhn, IS, OS) \ @@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8) FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16) FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32) +#undef MYOP +#define MYOP(OT,IT,IMT,OP,IS,OS) \ +OT \ +foo_##OP##_##OS (IT a) \ +{ \ + IMT zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##IS (a), zeros); \ +} + +#undef FUNC +#define FUNC(OP) \ +MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8) \ +MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16) \ +MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32) \ +MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8) \ +MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16) \ +MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32) \ + +FUNC (movn) +FUNC (qmovn) + /* { dg-final { scan-assembler-not {\tfmov\t} } } */ /* { dg-final { scan-assembler-not {\tmov\t} } } */