From: Spencer Abson Date: Mon, 7 Jul 2025 18:26:35 +0000 (+0000) Subject: aarch64: Relaxed SEL combiner patterns for unpacked SVE FP unary operations X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8d588dbde026b1b96a921b81e80b2de60fd9f8fd;p=thirdparty%2Fgcc.git aarch64: Relaxed SEL combiner patterns for unpacked SVE FP unary operations Extend the unary op/UNSPEC_SEL combiner patterns from SVE_FULL_F to SVE_F, where the strictness value is SVE_RELAXED_GP. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*cond__2_relaxed): Extend from SVE_FULL_F to SVE_F. (*cond__any_relaxed): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_cond_fabs_1.c: New test. * gcc.target/aarch64/sve/unpacked_cond_fneg_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frinta_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frinta_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frinti_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frintm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frintp_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frintx_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_frintz_1.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9a8ff216999..66dd5809bcd 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3822,13 +3822,13 @@ ;; Predicated floating-point unary arithmetic, merging with the first input. (define_insn_and_rewrite "*cond__2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 3) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY) (match_dup 2)] UNSPEC_SEL))] @@ -3870,15 +3870,15 @@ ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. (define_insn_and_rewrite "*cond__any_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY) - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c new file mode 100644 index 00000000000..d959aa97252 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_fabsf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_fabsf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_fabsf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c new file mode 100644 index 00000000000..7280f4e9fa8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define NEG(X) -X + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (NEG, _Float16, uint64_t, 32) + +TEST_ALL (NEG, _Float16, uint32_t, 64) + +TEST_ALL (NEG, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c new file mode 100644 index 00000000000..ed4efb69f43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_roundf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_roundf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_roundf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c new file mode 100644 index 00000000000..f20e2e6d445 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -mtune=generic -ftree-vectorize" } */ + +#include "unpacked_cond_frinta_1.c" + +/* Test that we don't drop SELs without -fno-trapping-math. */ + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tsel\t} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c new file mode 100644 index 00000000000..d682d150a36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_nearbyintf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_nearbyintf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_nearbyintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c new file mode 100644 index 00000000000..7d429b39116 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_floorf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_floorf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_floorf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c new file mode 100644 index 00000000000..c6d0c8c98bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_ceilf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_ceilf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_ceilf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c new file mode 100644 index 00000000000..b8afef1c11f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_rintf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_rintf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_rintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c new file mode 100644 index 00000000000..d55279bbaea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include + +#define a_i a[i] +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \ + TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN (a[i]) : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (__builtin_truncf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_truncf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_truncf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */