]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Annotate fcvtn pattern for vec_concat with zeroes
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Sun, 23 Apr 2023 13:44:13 +0000 (14:44 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Sun, 23 Apr 2023 13:44:13 +0000 (14:44 +0100)
Using the define_substs in aarch64-simd.md this is a straightforward annotation to remove
a redundant fmov insn.

So the codegen goes from:
foo_d:
        fcvtn   v0.2s, v0.2d
        fmov    d0, d0
        ret

to the simple:
foo_d:
        fcvtn   v0.2s, v0.2d
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_float_truncate_lo_): Rename to...
(aarch64_float_truncate_lo_<mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/float_truncate_zero.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/float_truncate_zero.c [new file with mode: 0644]

index 4a1ec71995da427567cc4d0df104e870a4091e34..7bd4362318b638366453e7549dd6b7204276661e 100644 (file)
 }
 )
 
-(define_insn "aarch64_float_truncate_lo_<mode>"
+(define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
   [(set (match_operand:VDF 0 "register_operand" "=w")
       (float_truncate:VDF
        (match_operand:<VWIDE> 1 "register_operand" "w")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/float_truncate_zero.c b/gcc/testsuite/gcc.target/aarch64/float_truncate_zero.c
new file mode 100644 (file)
index 0000000..41775d1
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** foo:
+**      fcvtn  v0.4h, v0.4s
+**      ret
+*/
+
+float16x8_t
+foo (float32x4_t a)
+{
+  float16x4_t b = vcvt_f16_f32 (a);
+  return vcombine_f16 (b, vdup_n_f16 (0.0));
+}
+
+/*
+** foo_d:
+**      fcvtn  v0.2s, v0.2d
+**      ret
+*/
+
+float32x4_t
+foo_d (float64x2_t a)
+{
+  float32x2_t b = vcvt_f32_f64 (a);
+  return vcombine_f32 (b, vdup_n_f32 (0.0));
+}
+