[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (unspec:<VNARROWQ>
- [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
- VQSHRN_N)
- (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
- "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
- [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (vec_concat:<VNARROWQ2>
- (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
- (unspec:<VNARROWQ>
- [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
- VQSHRN_N)))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (unspec:<VNARROWQ>
+ [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ VQSHRN_N))]
+ "TARGET_SIMD"
"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
- rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
- operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
- else
- emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
- operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
- /* The intrinsic expects a narrow result, so emit a subreg that will get
- optimized away as appropriate. */
- emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
- <VNARROWQ2>mode));
+ emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0],
+ operands[1],
+ operands[2]));
DONE;
}
)
--- /dev/null
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT,IMT,OP,IS,OS) \
+OT \
+foo_##OP##_##OS (IT a) \
+{ \
+ IMT zeros = vcreate_##OS (0); \
+ return vcombine_##OS (v##OP##_##IS (a, 3), zeros); \
+}
+
+#define FUNC(OT,IT,IMT,IS,OS) \
+MYOP (OT, IT, IMT, qshrn_n, IS, OS) \
+MYOP (OT, IT, IMT, qrshrn_n, IS, OS)
+
+#define FUNCUN(OT,IT,IMT,IS,OS) \
+MYOP (OT, IT, IMT, qshrun_n, IS, OS) \
+MYOP (OT, IT, IMT, qrshrun_n, IS, OS)
+
+FUNC (int8x16_t, int16x8_t, int8x8_t, s16, s8)
+FUNC (int16x8_t, int32x4_t, int16x4_t, s32, s16)
+FUNC (int32x4_t, int64x2_t, int32x2_t, s64, s32)
+FUNCUN (uint8x16_t, int16x8_t, uint8x8_t, s16, u8)
+FUNCUN (uint16x8_t, int32x4_t, uint16x4_t, s32, u16)
+FUNCUN (uint32x4_t, int64x2_t, uint32x2_t, s64, u32)
+
+FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
+FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
+FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
+
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+