From: Kyrylo Tkachov Date: Tue, 6 Jun 2023 09:51:34 +0000 (+0100) Subject: aarch64: Reimplement URSHR,SRSHR patterns with standard RTL codes X-Git-Tag: basepoints/gcc-15~8574 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9371640999eedb8bac3fb9d1429db8a1a905b853;p=thirdparty%2Fgcc.git aarch64: Reimplement URSHR,SRSHR patterns with standard RTL codes Having converted the patterns for the URSRA,SRSRA instructions to standard RTL codes we can also easily convert the non-accumulating forms URSHR,SRSHR. This patch does that, reusing the various helpers and predicates from that patch in a straightforward way. This allows GCC to perform the optimisations in the testcase, matching what Clang does. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_shr_n): Delete. (aarch64_rshr_n_insn): New define_insn. (aarch64_rshr_n): New define_expand. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vrshr_1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f7cf39f930ca..dd1b084f8569 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6586,17 +6586,47 @@ ;; vrshr_n -(define_insn "aarch64_shr_n" +(define_insn "aarch64_rshr_n_insn" [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 - "aarch64_simd_shift_imm_offset_" "i")] - VRSHR_N))] - "TARGET_SIMD" - "shr\\t%0, %1, %2" + (truncate:VSDQ_I_DI + (SHIFTRT: + (plus: + (: + (match_operand:VSDQ_I_DI 1 "register_operand" "w")) + (match_operand: 3 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm__"))))] + "TARGET_SIMD + && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])" + "rshr\t%0, %1, %2" [(set_attr "type" "neon_sat_shift_imm")] ) +(define_expand "aarch64_rshr_n" + [(match_operand:VSDQ_I_DI 0 "register_operand") + (SHIFTRT:VSDQ_I_DI + (match_operand:VSDQ_I_DI 1 "register_operand") + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_"))] + "TARGET_SIMD" + { + /* Use this expander to create the rounding constant vector, which is + 1 << (shift - 1). Use wide_int here to ensure that the right TImode + RTL is generated when handling the DImode expanders. */ + int prec = GET_MODE_UNIT_PRECISION (mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec); + rtx shft = gen_int_mode (INTVAL (operands[2]), DImode); + rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (mode)); + if (VECTOR_MODE_P (mode)) + { + shft = gen_const_vec_duplicate (mode, shft); + rnd = gen_const_vec_duplicate (mode, rnd); + } + + emit_insn (gen_aarch64_rshr_n_insn (operands[0], operands[1], + shft, rnd)); + DONE; + } +) + ;; v(r)sra_n (define_insn "aarch64_sra_ndi" diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c new file mode 100644 index 000000000000..a5e10ff99e93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* +** foo1: +** srsra v0\.16b, v1\.16b, 3 +** ret +*/ + +int8x16_t +foo1 (int8x16_t acc, int8x16_t a) +{ + return vaddq_s8 (acc, vrshrq_n_s8 (a, 3)); +} + +/* +** foo2: +** srshr v0\.16b, v1\.16b, 3 +** ret +*/ + +int8x16_t +foo2 (int8x16_t acc, int8x16_t a) +{ + int8x16_t z = vdupq_n_s8 (0); + return vrsraq_n_s8 (z, a, 3); +} + +/* +** foo3: +** ursra v0\.16b, v1\.16b, 3 +** ret +*/ + +uint8x16_t +foo3 (uint8x16_t acc, uint8x16_t a) +{ + return vaddq_u8 (acc, vrshrq_n_u8 (a, 3)); +} + +/* +** foo4: +** urshr v0\.16b, v1\.16b, 3 +** ret +*/ + +uint8x16_t +foo4 (uint8x16_t acc, uint8x16_t a) +{ + uint8x16_t z = vdupq_n_u8 (0); + return vrsraq_n_u8 (z, a, 3); +} +