;; to describe the permute that is also required, but even if that is done
;; the permute would have been created as a LOAD_LANES which means the values
;; in the registers are in the wrong order.
-(define_insn "aarch64_fcadd<rot><mode>"
+(define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
"TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
)
-(define_insn "aarch64_fcmla<rot><mode>"
+(define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
- (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
- (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+ (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "w")]
- FCMLA)))]
+ FCMLA)
+ (match_operand:VHSDF 1 "register_operand" "0")))]
"TARGET_COMPLEX"
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
[(set_attr "type" "neon_fcmla")]
)
-(define_insn "aarch64_fcmla_lane<rot><mode>"
+(define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
- (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
- (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+ (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
- FCMLA)))]
+ FCMLA)
+ (match_operand:VHSDF 1 "register_operand" "0")))]
"TARGET_COMPLEX"
{
operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
[(set_attr "type" "neon_fcmla")]
)
-(define_insn "aarch64_fcmla_laneq<rot>v4hf"
+(define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
[(set (match_operand:V4HF 0 "register_operand" "=w")
- (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
- (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
+ (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V8HF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
- FCMLA)))]
+ FCMLA)
+ (match_operand:V4HF 1 "register_operand" "0")))]
"TARGET_COMPLEX"
{
operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
(define_insn "aarch64_fcmlaq_lane<rot><mode>"
[(set (match_operand:VQ_HSF 0 "register_operand" "=w")
- (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
- (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
+ (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
(match_operand:<VHALF> 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
- FCMLA)))]
+ FCMLA)
+ (match_operand:VQ_HSF 1 "register_operand" "0")))]
"TARGET_COMPLEX"
{
int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
--- /dev/null
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.3-a+fp16" } */
+
+#include <arm_neon.h>
+
+#define BINARY(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b), zeros); \
+}
+
+#define FUNC(T,IS,OS,OP,S) BINARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2) \
+FUNC (T, IS, OS, OP1, S) \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3) \
+FUNC (T, IS, OS, OP1, S) \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4) \
+FUNC (T, IS, OS, OP1, S) \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+OPTWO (float16, 4, 8, f16, cadd_rot90, cadd_rot270)
+OPTWO (float32, 2, 4, f32, cadd_rot90, cadd_rot270)
+
+#define TERNARY(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c), zeros); \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPFOUR (float16, 4, 8, f16, cmla, cmla_rot90, cmla_rot180, cmla_rot270)
+OPFOUR (float32, 2, 4, f32, cmla, cmla_rot90, cmla_rot180, cmla_rot270)
+
+#define TERNARY_IDX(OT,IT,OP,S) \
+OT \
+foo_##OP##_##S (IT a, IT b, IT c) \
+{ \
+ IT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros); \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY_IDX (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPFOUR (float16, 4, 8, f16, cmla_lane, cmla_rot90_lane, cmla_rot180_lane, cmla_rot270_lane)
+OPFOUR (float32, 2, 4, f32, cmla_lane, cmla_rot90_lane, cmla_rot180_lane, cmla_rot270_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+