]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Simplify QSHRN expanders and patterns
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:44:30 +0000 (10:44 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:44:30 +0000 (10:44 +0100)
This patch deletes the explicit BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN patterns for the QSHRN instructions in favour
of annotating a single one with <vczle><vczbe>. This allows simplification of the expander too.
Tests are added to ensure that we still optimise away the concat-with-zero use case.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): Delete.
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): Delete.
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>): New define_insn.
(aarch64_<sur>q<r>shr<u>n_n<mode>): Simplify expander.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/pr99195_5.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c [new file with mode: 0644]

index 9ad0489f79a7ad6e68d920b1b345ee274e372b6c..c1d51e366a3d4c0f722c338e7ea6614c1145a5ff 100644 (file)
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (unspec:<VNARROWQ>
-               [(match_operand:VQN 1 "register_operand" "w")
-                (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-               VQSHRN_N)
-         (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
-         (unspec:<VNARROWQ>
-               [(match_operand:VQN 1 "register_operand" "w")
-                (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-               VQSHRN_N)))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (unspec:<VNARROWQ>
+         [(match_operand:VQN 1 "register_operand" "w")
+          (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+          VQSHRN_N))]
+  "TARGET_SIMD"
   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
   {
     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
                                                 INTVAL (operands[2]));
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
-                   operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
-                   operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-                                                <VNARROWQ2>mode));
+    emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0],
+                                                         operands[1],
+                                                         operands[2]));
     DONE;
   }
 )
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c
new file mode 100644 (file)
index 0000000..a07f821
--- /dev/null
@@ -0,0 +1,40 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT,IMT,OP,IS,OS)                         \
+OT                                              \
+foo_##OP##_##OS (IT a)                     \
+{                                               \
+  IMT zeros = vcreate_##OS (0);                   \
+  return vcombine_##OS (v##OP##_##IS (a, 3), zeros);      \
+}
+
+#define FUNC(OT,IT,IMT,IS,OS)                  \
+MYOP (OT, IT, IMT, qshrn_n, IS, OS)            \
+MYOP (OT, IT, IMT, qrshrn_n, IS, OS)
+
+#define FUNCUN(OT,IT,IMT,IS,OS)                        \
+MYOP (OT, IT, IMT, qshrun_n, IS, OS)           \
+MYOP (OT, IT, IMT, qrshrun_n, IS, OS)
+
+FUNC (int8x16_t, int16x8_t, int8x8_t, s16, s8)
+FUNC (int16x8_t, int32x4_t, int16x4_t, s32, s16)
+FUNC (int32x4_t, int64x2_t, int32x2_t, s64, s32)
+FUNCUN (uint8x16_t, int16x8_t, uint8x8_t, s16, u8)
+FUNCUN (uint16x8_t, int32x4_t, uint16x4_t, s32, u16)
+FUNCUN (uint32x4_t, int64x2_t, uint32x2_t, s64, u32)
+
+FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
+FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
+FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
+
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+