From: Kyrylo Tkachov Date: Wed, 10 May 2023 09:44:30 +0000 (+0100) Subject: aarch64: Simplify QSHRN expanders and patterns X-Git-Tag: basepoints/gcc-15~9487 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e7fe650692d532551ea066a378af25b3ca207bb1;p=thirdparty%2Fgcc.git aarch64: Simplify QSHRN expanders and patterns This patch deletes the explicit BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN patterns for the QSHRN instructions in favour of annotating a single one with . This allows simplification of the expander too. Tests are added to ensure that we still optimise away the concat-with-zero use case. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_qshrn_n_insn_le): Delete. (aarch64_qshrn_n_insn_be): Delete. (aarch64_qshrn_n_insn): New define_insn. (aarch64_qshrn_n): Simplify expander. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/pr99195_5.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9ad0489f79a7..c1d51e366a3d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6569,28 +6569,13 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) -(define_insn "aarch64_qshrn_n_insn_le" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (unspec: - [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_")] - VQSHRN_N) - (match_operand: 3 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "qshrn\\t%0, %1, %2" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "aarch64_qshrn_n_insn_be" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (match_operand: 3 "aarch64_simd_or_scalar_imm_zero") - (unspec: - [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_")] - VQSHRN_N)))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" +(define_insn "aarch64_qshrn_n_insn" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: + [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_")] + VQSHRN_N))] + "TARGET_SIMD" "qshrn\\t%0, %1, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] ) @@ -6605,18 +6590,9 @@ { operands[2] = aarch64_simd_gen_const_vector_dup (mode, INTVAL (operands[2])); - rtx tmp = gen_reg_rtx (mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_qshrn_n_insn_be (tmp, - operands[1], operands[2], CONST0_RTX (mode))); - else - emit_insn (gen_aarch64_qshrn_n_insn_le (tmp, - operands[1], operands[2], CONST0_RTX (mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (mode, tmp, - mode)); + emit_insn (gen_aarch64_qshrn_n_insn (operands[0], + operands[1], + operands[2])); DONE; } ) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c new file mode 100644 index 000000000000..a07f82179cc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c @@ -0,0 +1,40 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +#define MYOP(OT,IT,IMT,OP,IS,OS) \ +OT \ +foo_##OP##_##OS (IT a) \ +{ \ + IMT zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##IS (a, 3), zeros); \ +} + +#define FUNC(OT,IT,IMT,IS,OS) \ +MYOP (OT, IT, IMT, qshrn_n, IS, OS) \ +MYOP (OT, IT, IMT, qrshrn_n, IS, OS) + +#define FUNCUN(OT,IT,IMT,IS,OS) \ +MYOP (OT, IT, IMT, qshrun_n, IS, OS) \ +MYOP (OT, IT, IMT, qrshrun_n, IS, OS) + +FUNC (int8x16_t, int16x8_t, int8x8_t, s16, s8) +FUNC (int16x8_t, int32x4_t, int16x4_t, s32, s16) +FUNC (int32x4_t, int64x2_t, int32x2_t, s64, s32) +FUNCUN (uint8x16_t, int16x8_t, uint8x8_t, s16, u8) +FUNCUN (uint16x8_t, int32x4_t, uint16x4_t, s32, u16) +FUNCUN (uint32x4_t, int64x2_t, uint32x2_t, s64, u32) + +FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8) +FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16) +FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32) + + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ +