]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add support for unpacked SVE FP binary arithmetic
authorSpencer Abson <spencer.abson@arm.com>
Tue, 8 Jul 2025 10:51:33 +0000 (10:51 +0000)
committerSpencer Abson <spencer.abson@arm.com>
Thu, 24 Jul 2025 09:43:18 +0000 (09:43 +0000)
This patch extends the expanders for unpredicated smax, smin, add, sub,
mul, min, and max, so that they support partial SVE FP modes.

The relevant insn and splitting patterns are also updated.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (<optab><mode>3): Extend from
SVE_FULL_F to SVE_F, use aarch64_sve_fp_pred.
(*post_ra_<sve_fp_op><mode>3): Extend from SVE_FULL_F to SVE_F.
(@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F to SVE_F,
use aarch64_predicate_operand (ADD/SUB/MUL/MAX/MIN).
(split for using unpredicated insns): Move SVE_RELAXED_GP into
the pattern, rather than testing for it in the condition.
* config/aarch64/aarch64-sve2.md (@aarch64_pred_<optab><mode>):
Extend from VNx8BF_ONLY to SVE_BF.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/sve/unpacked_binary_bf16_1.C: New test.
* g++.target/aarch64/sve/unpacked_binary_bf16_2.C: Likewise.
* gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fadd_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fadd_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fmaxnm_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fmaxnm_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fminnm_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fminnm_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fmul_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fmul_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fsubr_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_fsubr_2.c: Likewise.

18 files changed:
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64-sve2.md
gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C [new file with mode: 0644]
gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c [new file with mode: 0644]

index 66dd5809bcda31d71039cedfb0507a742464155f..a52ef61098bbff825b6d8efc94e7fa58cc6514cb 100644 (file)
 ;; Split a predicated instruction whose predicate is unused into an
 ;; unpredicated instruction.
 (define_split
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-          (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_F_B16B16 2 "register_operand")
+          (match_operand:SVE_F_B16B16 3 "register_operand")]
          <SVE_COND_FP>))]
-  "TARGET_SVE
-   && reload_completed
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  "TARGET_SVE && reload_completed"
   [(set (match_dup 0)
-       (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))]
+       (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))]
 )
 
 ;; Unpredicated floating-point binary operations (post-RA only).
 ;; These are generated by the split above.
 (define_insn "*post_ra_<sve_fp_op><mode>3"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
-       (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16
-         (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")
-         (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))]
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w")
+       (SVE_UNPRED_FP_BINARY:SVE_F_B16B16
+         (match_operand:SVE_F_B16B16 1 "register_operand" "w")
+         (match_operand:SVE_F_B16B16 2 "register_operand" "w")))]
   "TARGET_SVE && reload_completed"
   "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
 
 ;; Unpredicated floating-point binary operations that need to be predicated
 ;; for SVE.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_dup 3)
-          (const_int SVE_RELAXED_GP)
-          (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
-          (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
+          (match_dup 4)
+          (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+          (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
          SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
   }
 )
 
 
 ;; Predicated floating-point addition.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")]
          SVE_COND_FP_ADD))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
 
 ;; Predicated floating-point subtraction.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
-          (match_operand:SVE_FULL_F 3 "register_operand")]
+          (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand")
+          (match_operand:SVE_F 3 "register_operand")]
          SVE_COND_FP_SUB))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2   , 3 , 4   ; attrs: movprfx ]
 
 ;; Predicated floating-point multiplication.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")]
          SVE_COND_FP_MUL))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
 
 ;; Predicated floating-point maximum/minimum.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")]
          SVE_COND_FP_MAXMIN))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   ; attrs: movprfx ]
index 8c03e28cb0840a9390fd5afeb45bddf1846b625e..31bdd85ddb2a66403703dbc177f4ed63157734ab 100644 (file)
 
 ;; Predicated B16B16 binary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
-       (unspec:VNx8BF_ONLY
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_BF 0 "register_operand")
+       (unspec:SVE_BF
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:VNx8BF_ONLY 2 "register_operand")
-          (match_operand:VNx8BF_ONLY 3 "register_operand")]
+          (match_operand:SVE_BF 2 "register_operand")
+          (match_operand:SVE_BF 3 "register_operand")]
          SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SSVE_B16B16 && <supports_bf16>"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx , is_rev ]
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C
new file mode 100644 (file)
index 0000000..9d6342b
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048" } */
+
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define ADD(a, b) a + b
+#define SUB(a, b) a - b
+#define MUL(a, b) a * b
+#define MAX(a, b) (a > b) ? a : b
+#define MIN(a, b) (a > b) ? b : a
+
+#define TEST_OP(TYPE, OP)                                        \
+  TYPE test_##TYPE##_##OP (TYPE a, TYPE b) { return OP (a, b); } \
+
+#define TEST_ALL(TYPE, SIZE)                                        \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE)));       \
+  TEST_OP (TYPE##SIZE, ADD)                                         \
+  TEST_OP (TYPE##SIZE, SUB)                                         \
+  TEST_OP (TYPE##SIZE, MUL)                                         \
+  TEST_OP (TYPE##SIZE, MIN)                                         \
+  TEST_OP (TYPE##SIZE, MAX)
+
+TEST_ALL (__bf16, 64)
+
+TEST_ALL (__bf16, 128)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m. z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C
new file mode 100644 (file)
index 0000000..63de293
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -ffinite-math-only -fno-signed-zeros -fno-trapping-math -msve-vector-bits=2048 " } */
+
+#include "unpacked_binary_bf16_1.C"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c
new file mode 100644 (file)
index 0000000..e6aa047
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], RHS) > c[i])                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c
new file mode 100644 (file)
index 0000000..87125a6
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_builtin_fmax_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c
new file mode 100644 (file)
index 0000000..b9fded0
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], RHS) > c[i])                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fminf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c
new file mode 100644 (file)
index 0000000..5923b67
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_builtin_fmin_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c
new file mode 100644 (file)
index 0000000..9675f56
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define ADD(A, B)  A + B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS)            \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##NAME (TYPE1 *__restrict out,  \
+                                      TYPE0 *__restrict a,     \
+                                      TYPE0 *__restrict b,     \
+                                       TYPE0 *__restrict c)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)      \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i]) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1)
+
+TEST_ALL (ADD, _Float16, uint64_t, 32)
+
+TEST_ALL (ADD, _Float16, uint32_t, 64)
+
+TEST_ALL (ADD, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 11 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c
new file mode 100644 (file)
index 0000000..7a74efd
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fadd_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 11 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c
new file mode 100644 (file)
index 0000000..5239e4b
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define MAX(A, B) (A > B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (c[i] = FN (a[i], RHS))                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (MAX, _Float16, uint64_t, 32)
+
+TEST_ALL (MAX, _Float16, uint32_t, 64)
+
+TEST_ALL (MAX, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c
new file mode 100644 (file)
index 0000000..11aa7c0
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -fno-trapping-math -moverride=sve_width=2048" } */
+
+#include "unpacked_fmaxnm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c
new file mode 100644 (file)
index 0000000..02a5f46
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define MIN(A, B) (A < B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (c[i] = FN (a[i], RHS) )                              \
+       out[i] = 3;                                             \
+  }
+
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (MIN, _Float16, uint64_t, 32)
+
+TEST_ALL (MIN, _Float16, uint32_t, 64)
+
+TEST_ALL (MIN, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c
new file mode 100644 (file)
index 0000000..81f583b
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -fno-trapping-math -moverride=sve_width=2048" } */
+
+#include "unpacked_fminnm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c
new file mode 100644 (file)
index 0000000..a180a07
--- /dev/null
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define immp5 0.5
+#define MUL(A, B) A * B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5)
+
+TEST_ALL (MUL, _Float16, uint64_t, 32)
+
+TEST_ALL (MUL, _Float16, uint32_t, 64)
+
+TEST_ALL (MUL, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c
new file mode 100644 (file)
index 0000000..eb05600
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fmul_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c
new file mode 100644 (file)
index 0000000..2cc8ec2
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define immp5 0.5
+#define SUBR(A, B) B - A
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (SUBR, _Float16, uint64_t, 32)
+
+TEST_ALL (SUBR, _Float16, uint32_t, 64)
+
+TEST_ALL (SUBR, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr?\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c
new file mode 100644 (file)
index 0000000..de9325c
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fsubr_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */