From: Spencer Abson Date: Wed, 30 Jul 2025 08:58:50 +0000 (+0000) Subject: aarch64: Add support for unpacked SVE FP conditional ternary arithmetic X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=45fd943eabfe8e71aeecf001e9200f4d52748610;p=thirdparty%2Fgcc.git aarch64: Add support for unpacked SVE FP conditional ternary arithmetic This patch extends the expander for fma, fnma, fms, and fnms to support partial SVE FP modes. We add the missing BF16 tests, which we can now trigger for having implemented the conditional expander. We also add tests for the 'merging with multiplicand' case, which this expander canonicalizes (albeit under SVE_STRICT_GP). gcc/ChangeLog: * config/aarch64/aarch64-sve.md (@cond_): Extend to support partial FP modes. (*cond__2_strict): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (*cond__4_strict): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, use aarch64_predicate_operand. (*cond__any_strict): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases for merging with multiplcand. * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test. * gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise.. * gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise. * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise. * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 815dec97d87..80a32889f8c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7634,17 +7634,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && ( || !)" { @@ -7652,6 +7652,8 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + operands[1] = aarch64_sve_emit_masked_fp_pred (mode, operands[1]); }) ;; Predicated floating-point ternary operations, merging with the @@ -7681,15 +7683,15 @@ ) (define_insn "*cond__2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand: 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7727,15 +7729,15 @@ ) (define_insn "*cond__4_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand: 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7795,17 +7797,17 @@ ) (define_insn_and_rewrite "*cond__any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand: 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand: 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && ( || !) diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C new file mode 100644 index 00000000000..95cd698f1a6 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C @@ -0,0 +1,35 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */ + +#include +#pragma GCC target "arch=armv9-a+sve-b16b16" + +#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * b + c : MERGE; } + +#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * -b + c : MERGE; } + +#define TEST_OP(TYPE, PRED_TYPE, T) \ + T (TYPE, PRED_TYPE, c) \ + T (TYPE, PRED_TYPE, 0) + +#define TEST(TYPE, PTYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS) + +TEST (__bf16, uint16_t, 128) + +TEST (__bf16, uint16_t, 64) + +/* { dg-final { scan-assembler-times {\tptrue} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C new file mode 100644 index 00000000000..c0d7c50756e --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#include "unpacked_cond_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tand} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c index 8181c421fbc..cae92422597 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) @@ -34,14 +36,16 @@ TEST_ALL (FMLA (f16), _Float16, uint32_t, 64) TEST_ALL (FMLA (f32), float, uint64_t, 32) -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c new file mode 100644 index 00000000000..72e04a4958e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c index 3755fdf3c43..db0f81804b9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) @@ -34,14 +36,16 @@ TEST_ALL (FMLS (f16), _Float16, uint32_t, 64) TEST_ALL (FMLS (f32), float, uint64_t, 32) -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c new file mode 100644 index 00000000000..30120527deb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c index b83265304d0..07bab63d150 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) @@ -34,14 +36,16 @@ TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64) TEST_ALL (FNMLA (f32), float, uint64_t, 32) -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c new file mode 100644 index 00000000000..daef4e49fa9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c index a66af9e1334..5526378c521 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) @@ -34,14 +36,16 @@ TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64) TEST_ALL (FNMLS (f32), float, uint64_t, 32) -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c new file mode 100644 index 00000000000..8a8f34828fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */