[4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 51bb6cf357c4a729994dbded74f2b73484f63c13..e420f58633a1be969a7a67b833a6f4991ad2b3db 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -969,17 +969,46 @@
    [(set_attr "type" "neon_arith_acc<q>")]
  )
  
-(define_insn "aarch64_<sur>abal2<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-       (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
-                         (match_operand:VQW 3 "register_operand" "w")
-                        (match_operand:<VWIDE> 1 "register_operand" "0")]
-       ABAL2))]
+(define_insn "aarch64_<su>abal2<mode>_insn"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+       (plus:<VDBLW>
+         (zero_extend:<VDBLW>
+           (minus:<VHALF>
+             (USMAX:<VHALF>
+               (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
+               (vec_select:<VHALF>
+                 (match_operand:VQW 3 "register_operand" "w")
+                 (match_dup 4)))
+             (<max_opp>:<VHALF>
+               (vec_select:<VHALF>
+                 (match_dup 2)
+                 (match_dup 4))
+               (vec_select:<VHALF>
+                 (match_dup 3)
+                 (match_dup 4)))))
+         (match_operand:<VDBLW> 1 "register_operand" "0")))]
    "TARGET_SIMD"
-  "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
    [(set_attr "type" "neon_arith_acc<q>")]
  )
  
+(define_expand "aarch64_<su>abal2<mode>"
+  [(match_operand:<VDBLW> 0 "register_operand")
+   (match_operand:<VDBLW> 1 "register_operand")
+   (USMAX:VQW
+     (match_operand:VQW 2 "register_operand")
+     (match_operand:VQW 3 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
+                                                operands[2], operands[3], hi));
+    DONE;
+  }
+)
+
  (define_insn "aarch64_<sur>adalp<mode>"
    [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
         (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index d7e895f8d348ecc10e0bc33f6d136bf68a401856..2b0de7ca0389be6698c329b54f9501b8ec09183f 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13822,6 +13822,31 @@ aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
    return (t == (t & -t));
  }
  
+/* Return true if X is an RTX representing an operation in the ABD family
+   of instructions.  */
+
+static bool
+aarch64_abd_rtx_p (rtx x)
+{
+  if (GET_CODE (x) != MINUS)
+    return false;
+  rtx max_arm = XEXP (x, 0);
+  rtx min_arm = XEXP (x, 1);
+  if (GET_CODE (max_arm) != SMAX && GET_CODE (max_arm) != UMAX)
+    return false;
+  bool signed_p = GET_CODE (max_arm) == SMAX;
+  if (signed_p && GET_CODE (min_arm) != SMIN)
+    return false;
+  else if (!signed_p && GET_CODE (min_arm) != UMIN)
+    return false;
+
+  rtx maxop0 = XEXP (max_arm, 0);
+  rtx maxop1 = XEXP (max_arm, 1);
+  rtx minop0 = XEXP (min_arm, 0);
+  rtx minop1 = XEXP (min_arm, 1);
+  return rtx_equal_p (maxop0, minop0) && rtx_equal_p (maxop1, minop1);
+}
+
  /* Calculate the cost of calculating X, storing it in *COST.  Result
     is true if the total cost of the operation has now been calculated.  */
  static bool
@@ -14218,11 +14243,20 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
  cost_minus:
         if (VECTOR_MODE_P (mode))
           {
-           /* SUBL2 and SUBW2.  */
             unsigned int vec_flags = aarch64_classify_vector_mode (mode);
             if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
               {
-               /* The select-operand-high-half versions of the sub instruction
+               /* Recognise the SABD and UABD operation here.
+                  Recursion from the PLUS case will catch the accumulating
+                  forms.  */
+               if (aarch64_abd_rtx_p (x))
+                 {
+                   if (speed)
+                     *cost += extra_cost->vect.alu;
+                   return true;
+                 }
+                 /* SUBL2 and SUBW2.
+                  The select-operand-high-half versions of the sub instruction
                    have the same cost as the regular three vector version -
                    don't add the costs of the select into the costs of the sub.
                    */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 426eb85d2db4fc9995d8c8fd68b764d23414f562..3e18f0405fa3dfddbf477682a38cd677c43d7c16 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -204,7 +204,6 @@
      UNSPEC_PRLG_STK
      UNSPEC_REV
      UNSPEC_RBIT
-    UNSPEC_SABAL2
      UNSPEC_SADALP
      UNSPEC_SCVTF
      UNSPEC_SETMEM
@@ -225,7 +224,6 @@
      UNSPEC_TLSLE24
      UNSPEC_TLSLE32
      UNSPEC_TLSLE48
-    UNSPEC_UABAL2
      UNSPEC_UADALP
      UNSPEC_UCVTF
      UNSPEC_USHL_2S
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 0195cdc545e1bff38160f740f75e0c89c28bfe14..13a7e89777d70da9695cdf2e9d55e2560d43f092 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2567,9 +2567,6 @@
  ;; Int Iterators.
  ;; -------------------------------------------------------------------
  
-;; The unspec codes for the SABAL2, UABAL2 AdvancedSIMD instructions.
-(define_int_iterator ABAL2 [UNSPEC_SABAL2 UNSPEC_UABAL2])
-
  ;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
  (define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
  
@@ -3351,7 +3348,6 @@
                       (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
                       (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
                       (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
-                     (UNSPEC_SABAL2 "s") (UNSPEC_UABAL2 "u")
                       (UNSPEC_SADALP "s") (UNSPEC_UADALP "u")
                       (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
                       (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c

new file mode 100644 (file)

index 0000000..c51878a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** test_vabal_s8:
+**      sabal2 v0.8h, v2.16b, v1.16b
+**      ret
+*/
+int16x8_t
+test_vabal_s8 (int16x8_t sadv, int8x16_t pv, int8x16_t sv)
+{
+  return vabal_s8 (sadv, vget_high_s8 (pv), vget_high_s8 (sv));
+}
+
+/*
+** test_vabal_u8:
+**      uabal2 v0.8h, v2.16b, v1.16b
+**      ret
+*/
+uint16x8_t
+test_vabal_u8 (uint16x8_t sadv, uint8x16_t pv, uint8x16_t sv)
+{
+  return vabal_u8 (sadv, vget_high_u8 (pv), vget_high_u8 (sv));
+}
+
+/*
+** test_vabal_s16:
+**      sabal2 v0.4s, v2.8h, v1.8h
+**      ret
+*/
+int32x4_t
+test_vabal_s16 (int32x4_t sadv, int16x8_t pv, int16x8_t sv)
+{
+  return vabal_s16 (sadv, vget_high_s16 (pv), vget_high_s16 (sv));
+}
+
+/*
+** test_vabal_u16:
+**      uabal2 v0.4s, v2.8h, v1.8h
+**      ret
+*/
+uint32x4_t
+test_vabal_u16 (uint32x4_t sadv, uint16x8_t pv, uint16x8_t sv)
+{
+  return vabal_u16 (sadv, vget_high_u16 (pv), vget_high_u16 (sv));
+}
+
+/*
+** test_vabal_s32:
+**      sabal2 v0.2d, v2.4s, v1.4s
+**      ret
+*/
+int64x2_t
+test_vabal_s32 (int64x2_t sadv, int32x4_t pv, int32x4_t sv)
+{
+  return vabal_s32 (sadv, vget_high_s32 (pv), vget_high_s32 (sv));
+}
+
+/*
+** test_vabal_u32:
+**      uabal2 v0.2d, v2.4s, v1.4s
+**      ret
+*/
+uint64x2_t
+test_vabal_u32 (uint64x2_t sadv, uint32x4_t pv, uint32x4_t sv)
+{
+  return vabal_u32 (sadv, vget_high_u32 (pv), vget_high_u32 (sv));
+}
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 24 Apr 2023 08:44:55 +0000 (09:44 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c	[new file with mode: 0644]	patch \| blob