AArch64: Fix copysign patterns

author Wilco Dijkstra <wilco.dijkstra@arm.com>

Tue, 15 Oct 2024 16:22:23 +0000 (16:22 +0000)

committer Wilco Dijkstra <wilco.dijkstra@arm.com>

Wed, 23 Oct 2024 13:20:01 +0000 (13:20 +0000)
author Wilco Dijkstra <wilco.dijkstra@arm.com>
Tue, 15 Oct 2024 16:22:23 +0000 (16:22 +0000)
committer Wilco Dijkstra <wilco.dijkstra@arm.com>
Wed, 23 Oct 2024 13:20:01 +0000 (13:20 +0000)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index c54b29cd64b9e0dc6c6d12735049386ccedc5408..ec9c731498815d6efa2066fe6d6d5bd19591a3f5 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,13 +7218,12 @@
  }
  )
  
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
  ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;     movi    v31.4s, 0x80, lsl 24
+;;     bit     v0.16b, v1.16b, v31.16b
  ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
+;; Because we expect these operations to nearly always operate on
  ;; floating-point values, we do not want the operation to be
  ;; simplified into a bit-field insert operation that operates on the
  ;; integer side, since typically that would involve three inter-bank
@@ -7239,32 +7238,25 @@
     (match_operand:GPF 2 "nonmemory_operand")]
    "TARGET_SIMD"
  {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-                              << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
+  rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1));
+  rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign);
+  v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask);
+
+  /* copysign (x, -1) should instead be expanded as orr with the signbit.  */
    rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+
    if (GET_CODE (op2_elt) == CONST_DOUBLE
        && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
      {
-      rtx v_bitmask
-       = force_reg (V2<V_INT_EQUIV>mode,
-                    gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-                                             signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-       lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-       lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+      emit_insn (gen_ior<vq_int_equiv>3 (
+       lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode),
+       lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode),
         v_bitmask));
        DONE;
      }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
    operands[2] = force_reg (<MODE>mode, operands[2]);
    emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
-                                      bitmask));
+                                      v_bitmask));
    DONE;
  }
  )
@@ -7273,23 +7265,21 @@
    [(set (match_operand:GPF 0 "register_operand")
         (unspec:GPF [(match_operand:GPF 1 "register_operand")
                      (match_operand:GPF 2 "register_operand")
-                    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+                    (match_operand:<VQ_INT_EQUIV> 3 "register_operand")]
          UNSPEC_COPYSIGN))]
    "TARGET_SIMD"
    {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]
       [ w        , w , w , 0 ; neon_bsl<q>  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
       [ w        , 0 , w , w ; neon_bsl<q>  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
       [ w        , w , 0 , w ; neon_bsl<q>  ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
-     [ r        , r , 0 , X ; bfm          ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
    }
  )
  
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
  ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;     movi    v31.4s, 0x80, lsl 24
+;;     and     v31.16b, v31.16b, v1.16b
+;;     eor     v0.16b, v31.16b, v0.16b
  ;;
  
  (define_expand "@xorsign<mode>3"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index efba78375c26d6a22bb1554ddccd1cec171c099a..e13837504208d55a1cd28fc9469d09a3298799bf 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1891,6 +1891,14 @@
                                (VNx8SF  "vnx8si") (VNx16SF "vnx16si")
  ])
  
+;; Mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr VQ_INT_EQUIV [(DF   "V2DI")   (SF   "V4SI")
+])
+
+;; Lower case mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr vq_int_equiv [(DF   "v2di")   (SF   "v4si")
+])
+
  ;; Floating-point equivalent of selected modes.
  (define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
                               (VNx8BF "VNx8HF")
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c

new file mode 100644 (file)

index 0000000..be48682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c

new file mode 100644 (file)

index 0000000..f3cec2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c

index 18d10ee834d5d9b4361d890447060e78f09d3a73..9fe8e9bde6965875816e2aa722c36028ac233198 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@
  
  /*
  ** f1:
-**     orr     v[0-9]+.2s, #?128, lsl #?24
+**     orr     v[0-9]+.4s, #?128, lsl #?24
  **     ret
  */
  float32_t f1 (float32_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c

index fe08fe31fe87aab4a7ce8497d05488a42fe9ae21..cc97c95d1521be6693f3182b485bab2aa4b1daa0 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -7,7 +7,7 @@
  
  /*
  ** f1:
-**     orr     v0.2s, #?128, lsl #?24
+**     orr     v0.4s, #?128, lsl #?24
  **     ret
  */
  float32_t f1 (float32_t a)
author	Wilco Dijkstra <wilco.dijkstra@arm.com>
	Tue, 15 Oct 2024 16:22:23 +0000 (16:22 +0000)
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>
	Wed, 23 Oct 2024 13:20:01 +0000 (13:20 +0000)
gcc/config/aarch64/aarch64.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/copysign_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/copysign_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c		patch \| blob \| blame \| history