From: Remi Machet Date: Tue, 1 Jul 2025 12:45:04 +0000 (+0100) Subject: AArch64 SIMD: convert mvn+shrn into mvni+subhn X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1cbb3122cb2779198b0dcfb8afc28df711e64138;p=thirdparty%2Fgcc.git AArch64 SIMD: convert mvn+shrn into mvni+subhn Add an optimization to aarch64 SIMD converting mvn+shrn into mvni+subhn when possible, which allows for better optimization when the code is inside a loop by using a constant. The conversion is based on the fact that for an unsigned integer: -x = ~x + 1 => ~x = -1 - x thus '(u8)(~x >> imm)' is equivalent to '(u8)(((u16)-1 - x) >> imm)'. For the following function: uint8x8_t neg_narrow_v8hi(uint16x8_t a) { uint16x8_t b = vmvnq_u16(a); return vshrn_n_u16(b, 8); } Without this patch the assembly look like: not v0.16b, v0.16b shrn v0.8b, v0.8h, 8 After the patch it becomes: mvni v31.4s, 0 subhn v0.8b, v31.8h, v0.8h Bootstrapped and regtested on aarch64-linux-gnu. Signed-off-by: Remi Machet gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*shrn_to_subhn_): Add pattern converting mvn+shrn into mvni+subhn. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/shrn2subhn.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e771defc73f..af574d5bb0a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5046,6 +5046,36 @@ DONE; }) +;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm) +;; because it will result in the 'not' being replaced with a constant load +;; which allows for better loop optimization. +;; We limit this to truncations that take the upper half and shift it to the +;; lower half as we use subhn (patterns that would have generated an shrn +;; otherwise). +;; On some implementations the use of subhn also result in better throughput. +(define_insn_and_split "*shrn_to_subhn_" + [(set (match_operand: 0 "register_operand" "=&w") + (truncate: + (lshiftrt:VQN + (not:VQN (match_operand:VQN 1 "register_operand" "w")) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))] + "TARGET_SIMD" + "#" + "&& true" + [(const_int 0)] +{ + rtx tmp; + if (can_create_pseudo_p ()) + tmp = gen_reg_rtx (mode); + else + tmp = gen_rtx_REG (mode, REGNO (operands[0])); + emit_move_insn (tmp, CONSTM1_RTX (mode)); + emit_insn (gen_aarch64_subhn_insn (operands[0], tmp, + operands[1], operands[2])); + DONE; +}) + + ;; pmul. (define_insn "aarch64_pmul" diff --git a/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c b/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c new file mode 100644 index 00000000000..f90ea134f09 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/shrn2subhn.c @@ -0,0 +1,36 @@ +/* This test case checks that replacing a not+shift by a sub -1 works. */ +/* { dg-do compile } */ +/* { dg-additional-options "-O1" } */ +/* { dg-final { scan-assembler-times "\\tsubhn\\t" 6 } } */ + +#include + +uint8x8_t neg_narrow_v8hi(uint16x8_t a) { + uint16x8_t b = vmvnq_u16(a); + return vshrn_n_u16(b, 8); +} + +uint8x8_t neg_narrow_vsubhn_v8hi(uint16x8_t a) { + uint16x8_t ones = vdupq_n_u16(0xffff); + return vsubhn_u16(ones, a); +} + +uint16x4_t neg_narrow_v4si(uint32x4_t a) { + uint32x4_t b = vmvnq_u32(a); + return vshrn_n_u32(b, 16); +} + +uint16x4_t neg_narrow_vsubhn_v4si(uint32x4_t a) { + uint32x4_t ones = vdupq_n_u32(0xffffffff); + return vsubhn_u32(ones, a); +} + +uint32x2_t neg_narrow_v2di(uint64x2_t a) { + uint64x2_t b = ~a; + return vshrn_n_u64(b, 32); +} + +uint32x2_t neg_narrow_vsubhn_v2di(uint64x2_t a) { + uint64x2_t ones = vdupq_n_u64(0xffffffffffffffff); + return vsubhn_u64(ones, a); +}