aarch64: PR target/109406 Add support for SVE2 unpredicated MUL

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index b11b55f7ac718db199920b61bf3e4b4881c69660..4b4c02c90fec6ce1ff15a8b2a5df348224a307b7 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3657,6 +3657,15 @@
           UNSPEC_PRED_X))]
    "TARGET_SVE"
    {
+    /* SVE2 supports the MUL (vectors, unpredicated) form.  Emit the simple
+       pattern for it here rather than splitting off the MULT expander
+       separately.  */
+    if (TARGET_SVE2 && <CODE> == MULT)
+      {
+       emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
+                                                  operands[1], operands[2]));
+       DONE;
+      }
      operands[3] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md

index 2346f9f835d26f5b87afd47cdc9e44f9f47604ed..da8a424dd57fc5482cb20ba417d4141148ac61b6 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -189,7 +189,7 @@
  ;; -------------------------------------------------------------------------
  ;; ---- [INT] Multiplication
  ;; -------------------------------------------------------------------------
-;; Includes the lane forms of:
+;; Includes the lane and unpredicated forms of:
  ;; - MUL
  ;; -------------------------------------------------------------------------
  
@@ -205,6 +205,21 @@
    "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
  )
  
+;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
+;; we include them here to allow matching simpler, unpredicated RTL.
+(define_insn "*aarch64_mul_unpredicated_<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w,w,?&w")
+       (mult:SVE_I
+         (match_operand:SVE_I 1 "register_operand" "w,0,w")
+         (match_operand:SVE_I 2 "aarch64_sve_vsm_operand" "w,vsm,vsm")))]
+  "TARGET_SVE2"
+  "@
+   mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
+   mul\t%0.<Vetype>, %0.<Vetype>, #%2
+   movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
  ;; -------------------------------------------------------------------------
  ;; ---- [INT] Scaled high-part multiplication
  ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c

index e6f5098c30f4e2eb8ed1af153c0bb0d204cda6d9..1e546a93906962ba2469ddb3bf2ee9c0166dbae1 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c
@@ -7,7 +7,7 @@
  /*
  ** draw_bitmap1:
  ** ...
-**     mul     z[0-9]+.h, p[0-9]+/m, z[0-9]+.h, z[0-9]+.h
+**     mul     z[0-9]+.h, z[0-9]+.h, z[0-9]+.h
  **     addhnb  z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
  **     addhnb  z[0-9]+.b, z[0-9]+.h, z[0-9]+.h
  ** ...
@@ -27,7 +27,7 @@ void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n)
  /*
  ** draw_bitmap3:
  ** ...
-**     mul     z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
+**     mul     z[0-9]+.s, z[0-9]+.s, z[0-9]+.s
  **     addhnb  z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
  **     addhnb  z[0-9]+.h, z[0-9]+.s, z[0-9]+.s
  ** ...
@@ -41,7 +41,7 @@ void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n)
  /*
  ** draw_bitmap4:
  ** ...
-**     mul     z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
+**     mul     z[0-9]+.d, z[0-9]+.d, z[0-9]+.d
  **     addhnb  z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
  **     addhnb  z[0-9]+.s, z[0-9]+.d, z[0-9]+.d
  ** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c

new file mode 100644 (file)

index 0000000..aaf0ce4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define N 1024
+
+#define TYPE(N) int##N##_t
+
+#define TEMPLATE(SIZE)                                         \
+void __attribute__ ((noinline, noclone))                       \
+f_##SIZE##_##OP                                                        \
+  (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b,             \
+   TYPE(SIZE) *restrict c)                                     \
+{                                                              \
+  for (int i = 0; i < N; i++)                                  \
+    a[i] = b[i] * c[i];                                                \
+}
+
+TEMPLATE (8);
+TEMPLATE (16);
+TEMPLATE (32);
+TEMPLATE (64);
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 24 Apr 2023 09:27:31 +0000 (10:27 +0100)
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-sve2.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c	[new file with mode: 0644]	patch \| blob