]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add support for unpacked SVE FP unary operations
authorSpencer Abson <spencer.abson@arm.com>
Mon, 7 Jul 2025 16:49:17 +0000 (16:49 +0000)
committerSpencer Abson <spencer.abson@arm.com>
Wed, 23 Jul 2025 20:37:58 +0000 (20:37 +0000)
This patch extends the expander for unpredicated round, nearbyint, floor,
ceil, rint, and trunc, so that it can handle partial SVE FP modes.

We move fabs and fneg to a separate expander, since they are not trapping
instructions.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (<optab><mode>2): Replace use of
aarch64_ptrue_reg with aarch64_sve_fp_pred.
(@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F to SVE_F,
and use aarch64_predicate_operand.
* config/aarch64/iterators.md: Split FABS/FNEG out of
SVE_COND_FP_UNARY (into new SVE_COND_FP_UNARY_BITWISE).

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/unpacked_fabs_1.c: New test.
* gcc.target/aarch64/sve/unpacked_fneg_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frinta_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frinta_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frinti_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frinti_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintm_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintm_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintp_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintp_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintx_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintx_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintz_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_frintz_2.c: Likewise.

16 files changed:
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c [new file with mode: 0644]

index d53d297e7e4ecc3186aa7febc09e6cfcdce89f24..9a8ff216999f6e3cca1628d06b7368feb4774023 100644 (file)
 
 ;; Unpredicated floating-point unary operations.
 (define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_dup 2)
-          (const_int SVE_RELAXED_GP)
-          (match_operand:SVE_FULL_F 1 "register_operand")]
+          (match_dup 3)
+          (match_operand:SVE_F 1 "register_operand")]
          SVE_COND_FP_UNARY_OPTAB))]
   "TARGET_SVE"
+  {
+    operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]);
+  }
+)
+
+;; FABS and FNEG are non-trapping, so we can always expand with a <VPRED>
+;; predicate.  It doesn't matter whether the padding bits of a partial
+;; vector mode are active or inactive.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 2)
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_F 1 "register_operand")]
+         SVE_COND_FP_UNARY_BITWISE))]
+  "TARGET_SVE"
   {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 
 ;; Predicated floating-point unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")]
+          (match_operand:SVE_F 2 "register_operand")]
          SVE_COND_FP_UNARY))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
index 22d7aa9cf9d7e20d35b4b1aac999747790ca4f45..795c4ac7a57978b1f62b9f7c5f2f7624e62fa89b 100644 (file)
                                           UNSPEC_FMINQV
                                           UNSPEC_FMINNMQV])
 
-(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
-                                       UNSPEC_COND_FNEG
-                                       UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS
+                                               UNSPEC_COND_FNEG])
+
+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX
                                        UNSPEC_COND_FRINTA
                                        UNSPEC_COND_FRINTI
                                        UNSPEC_COND_FRINTM
                                        UNSPEC_COND_FRINTP
                                        UNSPEC_COND_FRINTX
                                        UNSPEC_COND_FRINTZ
-                                       UNSPEC_COND_FSQRT])
+                                       UNSPEC_COND_FSQRT
+                                       SVE_COND_FP_UNARY_BITWISE])
 
 ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated
 ;; <optab><mode>2 expander.
-(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS
-                                             UNSPEC_COND_FNEG
-                                             UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX
                                              UNSPEC_COND_FRINTA
                                              UNSPEC_COND_FRINTI
                                              UNSPEC_COND_FRINTM
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c
new file mode 100644 (file)
index 0000000..f09cfe8
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_fabsf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_fabsf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_fabsf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c
new file mode 100644 (file)
index 0000000..d489ecb
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define NEG(X) -X
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (NEG, _Float16, uint64_t, 32)
+
+TEST_FN (NEG, _Float16, uint32_t, 64)
+
+TEST_FN (NEG, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c
new file mode 100644 (file)
index 0000000..3cbdef3
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_roundf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_roundf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_roundf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c
new file mode 100644 (file)
index 0000000..4564686
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frinta_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c
new file mode 100644 (file)
index 0000000..7645fed
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_nearbyintf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_nearbyintf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_nearbyintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c
new file mode 100644 (file)
index 0000000..eadce07
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frinti_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c
new file mode 100644 (file)
index 0000000..98f85fb
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_floorf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_floorf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_floorf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c
new file mode 100644 (file)
index 0000000..56988be
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frintm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c
new file mode 100644 (file)
index 0000000..f233697
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_ceilf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_ceilf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_ceilf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c
new file mode 100644 (file)
index 0000000..c24c632
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frintp_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c
new file mode 100644 (file)
index 0000000..73403a5
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_rintf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_rintf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_rintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c
new file mode 100644 (file)
index 0000000..e8b8924
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frintx_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c
new file mode 100644 (file)
index 0000000..7377843
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_truncf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_truncf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_truncf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c
new file mode 100644 (file)
index 0000000..1779122
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include "unpacked_frintz_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */