Using the define_substs in aarch64-simd.md this is a straightforward annotation to remove
a redundant fmov insn.
So the codegen goes from:
foo_d:
fcvtn v0.2s, v0.2d
fmov d0, d0
ret
to the simple:
foo_d:
fcvtn v0.2s, v0.2d
ret
Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_float_truncate_lo_): Rename to...
(aarch64_float_truncate_lo_<mode><vczle><vczbe>): ... This.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/float_truncate_zero.c: New test.
}
)
-(define_insn "aarch64_float_truncate_lo_<mode>"
+(define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
[(set (match_operand:VDF 0 "register_operand" "=w")
(float_truncate:VDF
(match_operand:<VWIDE> 1 "register_operand" "w")))]
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** foo:
+** fcvtn v0.4h, v0.4s
+** ret
+*/
+
+float16x8_t
+foo (float32x4_t a)
+{
+ float16x4_t b = vcvt_f16_f32 (a);
+ return vcombine_f16 (b, vdup_n_f16 (0.0));
+}
+
+/*
+** foo_d:
+** fcvtn v0.2s, v0.2d
+** ret
+*/
+
+float32x4_t
+foo_d (float64x2_t a)
+{
+ float32x2_t b = vcvt_f32_f64 (a);
+ return vcombine_f32 (b, vdup_n_f32 (0.0));
+}
+