aarch64: Use SVE ASRD instruction with Neon modes.

author Soumya AR <soumyaa@nvidia.com>

Wed, 11 Dec 2024 04:15:09 +0000 (09:45 +0530)

committer Soumya AR <soumyaa@nvidia.com>

Wed, 11 Dec 2024 04:20:02 +0000 (09:50 +0530)
author Soumya AR <soumyaa@nvidia.com>
Wed, 11 Dec 2024 04:15:09 +0000 (09:45 +0530)
committer Soumya AR <soumyaa@nvidia.com>
Wed, 11 Dec 2024 04:20:02 +0000 (09:50 +0530)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index db2baca58665d6a0c9f3ebb99f5fe780f6882cd3..bd17486e9128a21bd205ef1fb3ec3e323408ec59 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1018,6 +1018,7 @@ void aarch64_expand_mov_immediate (rtx, rtx);
  rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
  rtx aarch64_ptrue_reg (machine_mode);
  rtx aarch64_ptrue_reg (machine_mode, unsigned int);
+rtx aarch64_ptrue_reg (machine_mode, machine_mode);
  rtx aarch64_pfalse_reg (machine_mode);
  bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
  void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 04326bca0e7415e736156eb8db44f6142138480e..a72ca2a500d394598268c6adfe717eed94a304b3 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5009,34 +5009,34 @@
  
  ;; Unpredicated ASRD.
  (define_expand "sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+       (unspec:SVE_VDQ_I
           [(match_dup 3)
-          (unspec:SVE_I
-            [(match_operand:SVE_I 1 "register_operand")
+          (unspec:SVE_VDQ_I
+            [(match_operand:SVE_VDQ_I 1 "register_operand")
               (match_operand 2 "aarch64_simd_rshift_imm")]
              UNSPEC_ASRD)]
          UNSPEC_PRED_X))]
    "TARGET_SVE"
    {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
    }
  )
  
  ;; Predicated ASRD.
  (define_insn "*sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+       (unspec:SVE_VDQ_I
           [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_I
-            [(match_operand:SVE_I 2 "register_operand")
-             (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+          (unspec:SVE_VDQ_I
+            [(match_operand:SVE_VDQ_I 2 "register_operand")
+             (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
              UNSPEC_ASRD)]
           UNSPEC_PRED_X))]
    "TARGET_SVE"
    {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
-     [ w        , Upl , 0 ; *              ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+     [ w        , Upl , 0 ; *              ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
    }
  )
  
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 3606dc174c2f5ff941bf119457af5816f2a90cce..4d1b3cca0c42e053764933391be7b0e21f79999e 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3778,6 +3778,22 @@ aarch64_ptrue_reg (machine_mode mode, unsigned int vl)
    return gen_lowpart (mode, reg);
  }
  
+/* Return a register of mode PRED_MODE for controlling data of mode DATA_MODE.
+
+   DATA_MODE can be a scalar, an Advanced SIMD vector, or an SVE vector.
+   If it's an N-byte scalar or an Advanced SIMD vector, the first N bits
+   of the predicate will be active and the rest will be inactive.
+   If DATA_MODE is an SVE mode, every bit of the predicate will be active.  */
+rtx
+aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
+{
+  if (aarch64_sve_mode_p (data_mode))
+    return aarch64_ptrue_reg (pred_mode);
+
+  auto size = GET_MODE_SIZE (data_mode).to_constant ();
+  return aarch64_ptrue_reg (pred_mode, size);
+}
+
  /* Return an all-false predicate register of mode MODE.  */
  
  rtx
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c

new file mode 100644 (file)

index 0000000..341baae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define FUNC(TYPE, I)                                                          \
+  TYPE M_##TYPE##_##I[I];                                                      \
+  void asrd_##TYPE##_##I ()                                                    \
+  {                                                                            \
+    for (int i = 0; i < I; i++)                                                \
+      {                                                                        \
+       M_##TYPE##_##I[i] /= 4;                                                \
+      }                                                                        \
+  }
+
+/*
+** asrd_int8_t_8:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
+**     ...
+*/
+FUNC(int8_t, 8);
+
+/*
+** asrd_int8_t_16:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
+**     ...
+*/
+FUNC(int8_t, 16);
+
+/*
+** asrd_int16_t_4:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
+**     ...
+*/
+FUNC(int16_t, 4);
+
+/*
+** asrd_int16_t_8:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
+**     ...
+*/
+FUNC(int16_t, 8);
+
+/*
+** asrd_int32_t_2:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
+**     ...
+*/
+FUNC(int32_t, 2);
+
+/*
+** asrd_int32_t_4:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
+**     ...
+*/
+FUNC(int32_t, 4);
+
+/*
+** asrd_int64_t_2:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.d, \1/m, z[0-9]+\.d, #2
+**     ...
+*/
+FUNC(int64_t, 2);
+
author	Soumya AR <soumyaa@nvidia.com>
	Wed, 11 Dec 2024 04:15:09 +0000 (09:45 +0530)
committer	Soumya AR <soumyaa@nvidia.com>
	Wed, 11 Dec 2024 04:20:02 +0000 (09:50 +0530)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c	[new file with mode: 0644]	patch \| blob