From 556ed247adc9857ebd89a5bdbcdc8f929f73bd1e Mon Sep 17 00:00:00 2001 From: Spencer Abson Date: Mon, 7 Jul 2025 16:49:17 +0000 Subject: [PATCH] aarch64: Add support for unpacked SVE FP unary operations This patch extends the expander for unpredicated round, nearbyint, floor, ceil, rint, and trunc, so that it can handle partial SVE FP modes. We move fabs and fneg to a separate expander, since they are not trapping instructions. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (2): Replace use of aarch64_ptrue_reg with aarch64_sve_fp_pred. (@aarch64_pred_): Extend from SVE_FULL_F to SVE_F, and use aarch64_predicate_operand. * config/aarch64/iterators.md: Split FABS/FNEG out of SVE_COND_FP_UNARY (into new SVE_COND_FP_UNARY_BITWISE). gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fabs_1.c: New test. * gcc.target/aarch64/sve/unpacked_fneg_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintp_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintp_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_2.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 32 ++++++++++++++----- gcc/config/aarch64/iterators.md | 14 ++++---- .../gcc.target/aarch64/sve/unpacked_fabs_1.c | 28 ++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fneg_1.c | 30 +++++++++++++++++ .../aarch64/sve/unpacked_frinta_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frinta_2.c | 15 +++++++++ .../aarch64/sve/unpacked_frinti_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frinti_2.c | 15 +++++++++ .../aarch64/sve/unpacked_frintm_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frintm_2.c | 15 +++++++++ .../aarch64/sve/unpacked_frintp_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frintp_2.c | 15 +++++++++ .../aarch64/sve/unpacked_frintx_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frintx_2.c | 15 +++++++++ .../aarch64/sve/unpacked_frintz_1.c | 31 ++++++++++++++++++ .../aarch64/sve/unpacked_frintz_2.c | 15 +++++++++ 16 files changed, 365 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d53d297e7e4..9a8ff216999 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3762,13 +3762,29 @@ ;; Unpredicated floating-point unary operations. (define_expand "2" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 2) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand")] + (match_dup 3) + (match_operand:SVE_F 1 "register_operand")] SVE_COND_FP_UNARY_OPTAB))] "TARGET_SVE" + { + operands[2] = aarch64_sve_fp_pred (mode, &operands[3]); + } +) + +;; FABS and FNEG are non-trapping, so we can always expand with a +;; predicate. It doesn't matter whether the padding bits of a partial +;; vector mode are active or inactive. +(define_expand "2" + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_dup 2) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_F 1 "register_operand")] + SVE_COND_FP_UNARY_BITWISE))] + "TARGET_SVE" { operands[2] = aarch64_ptrue_reg (mode); } @@ -3776,11 +3792,11 @@ ;; Predicated floating-point unary operations. (define_insn "@aarch64_pred_" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand: 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand: 1 "aarch64_predicate_operand") (match_operand:SI 3 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 22d7aa9cf9d..795c4ac7a57 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3430,9 +3430,10 @@ UNSPEC_FMINQV UNSPEC_FMINNMQV]) -(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS + UNSPEC_COND_FNEG]) + +(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM @@ -3440,13 +3441,12 @@ UNSPEC_COND_FRINTP UNSPEC_COND_FRINTX UNSPEC_COND_FRINTZ - UNSPEC_COND_FSQRT]) + UNSPEC_COND_FSQRT + SVE_COND_FP_UNARY_BITWISE]) ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated ;; 2 expander. -(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c new file mode 100644 index 00000000000..f09cfe84065 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_fabsf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_fabsf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_fabsf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c new file mode 100644 index 00000000000..d489ecb6761 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define NEG(X) -X + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (NEG, _Float16, uint64_t, 32) + +TEST_FN (NEG, _Float16, uint32_t, 64) + +TEST_FN (NEG, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c new file mode 100644 index 00000000000..3cbdef3d99b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_roundf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_roundf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_roundf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c new file mode 100644 index 00000000000..4564686636b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinta_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c new file mode 100644 index 00000000000..7645fed5136 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_nearbyintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_nearbyintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_nearbyintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c new file mode 100644 index 00000000000..eadce07cf1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinti_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c new file mode 100644 index 00000000000..98f85fb5bfe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_floorf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_floorf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_floorf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c new file mode 100644 index 00000000000..56988be786d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintm_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c new file mode 100644 index 00000000000..f2336979ad0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_ceilf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_ceilf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_ceilf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c new file mode 100644 index 00000000000..c24c6326d1e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintp_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c new file mode 100644 index 00000000000..73403a54ecb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_rintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_rintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_rintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c new file mode 100644 index 00000000000..e8b8924537f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintx_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c new file mode 100644 index 00000000000..73778431c2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_truncf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_truncf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_truncf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c new file mode 100644 index 00000000000..17791229663 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintz_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ -- 2.47.2