From: Jonathan Wright Date: Mon, 14 Jun 2021 14:09:18 +0000 (+0100) Subject: aarch64: Model zero-high-half semantics of [SU]QXTN instructions X-Git-Tag: basepoints/gcc-13~6780 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d0889b5d37ff40149b44e3c7d82f693d430cd891;p=thirdparty%2Fgcc.git aarch64: Model zero-high-half semantics of [SU]QXTN instructions Split the aarch64_qmovn pattern into separate scalar and vector variants. Further split the vector RTL pattern into big/ little endian variants that model the zero-high-half semantics of the underlying instruction. Modeling these semantics allows for better RTL combinations while also removing some register allocation issues as the compiler now knows that the operation is totally destructive. Add new tests to narrow_zero_high_half.c to verify the benefit of this change. gcc/ChangeLog: 2021-06-14 Jonathan Wright * config/aarch64/aarch64-simd-builtins.def: Split generator for aarch64_qmovn builtins into scalar and vector variants. * config/aarch64/aarch64-simd.md (aarch64_qmovn_insn_le): Define. (aarch64_qmovn_insn_be): Define. (aarch64_qmovn): Split into scalar and vector variants. Change vector variant to an expander that emits the correct instruction depending on endianness. gcc/testsuite/ChangeLog: * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests. --- diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 2adb4b127527..ac5d4fc7ff1e 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -271,8 +271,10 @@ BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE) /* Implemented by aarch64_qmovn. */ - BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, NONE) - BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, NONE) + BUILTIN_VQN (UNOP, sqmovn, 0, NONE) + BUILTIN_SD_HSDI (UNOP, sqmovn, 0, NONE) + BUILTIN_VQN (UNOP, uqmovn, 0, NONE) + BUILTIN_SD_HSDI (UNOP, uqmovn, 0, NONE) /* Implemented by aarch64_qxtn2. */ BUILTIN_VQN (BINOP, sqxtn2, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 59779b851fbe..2b75e57eb77a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4875,10 +4875,54 @@ (define_insn "aarch64_qmovn" [(set (match_operand: 0 "register_operand" "=w") (SAT_TRUNC: - (match_operand:VSQN_HSDI 1 "register_operand" "w")))] + (match_operand:SD_HSDI 1 "register_operand" "w")))] "TARGET_SIMD" "qxtn\\t%0, %1" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "aarch64_qmovn_insn_le" + [(set (match_operand: 0 "register_operand" "=w") + (vec_concat: + (SAT_TRUNC: + (match_operand:VQN 1 "register_operand" "w")) + (match_operand: 2 "aarch64_simd_or_scalar_imm_zero")))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "qxtn\\t%0, %1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "aarch64_qmovn_insn_be" + [(set (match_operand: 0 "register_operand" "=w") + (vec_concat: + (match_operand: 2 "aarch64_simd_or_scalar_imm_zero") + (SAT_TRUNC: + (match_operand:VQN 1 "register_operand" "w"))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "qxtn\\t%0, %1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_expand "aarch64_qmovn" + [(set (match_operand: 0 "register_operand") + (SAT_TRUNC: + (match_operand:VQN 1 "register_operand")))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (mode); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_qmovn_insn_be (tmp, operands[1], + CONST0_RTX (mode))); + else + emit_insn (gen_aarch64_qmovn_insn_le (tmp, operands[1], + CONST0_RTX (mode))); + + /* The intrinsic expects a narrow result, so emit a subreg that will get + optimized away as appropriate. */ + emit_move_insn (operands[0], lowpart_subreg (mode, tmp, + mode)); + DONE; + } ) (define_insn "aarch64_qxtn2_le" diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c index 53e03d3594d4..aa6c7ef389dd 100644 --- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c +++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c @@ -67,6 +67,13 @@ TEST_UNARY (vqmovun, uint8x16_t, int16x8_t, s16, u8) TEST_UNARY (vqmovun, uint16x8_t, int32x4_t, s32, u16) TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32) +TEST_UNARY (vqmovn, int8x16_t, int16x8_t, s16, s8) +TEST_UNARY (vqmovn, int16x8_t, int32x4_t, s32, s16) +TEST_UNARY (vqmovn, int32x4_t, int64x2_t, s64, s32) +TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8) +TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16) +TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32) + /* { dg-final { scan-assembler-not "dup\\t" } } */ /* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */ @@ -79,3 +86,5 @@ TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32) /* { dg-final { scan-assembler-times "\\tsqrshrun\\tv" 3} } */ /* { dg-final { scan-assembler-times "\\txtn\\tv" 6} } */ /* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */ +/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */ +/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */