]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AArch64: enable new predicate tuning for Neoverse cores.
authorTamar Christina <tamar.christina@arm.com>
Wed, 5 Jun 2024 18:32:16 +0000 (19:32 +0100)
committerTamar Christina <tamar.christina@arm.com>
Wed, 5 Jun 2024 18:32:16 +0000 (19:32 +0100)
This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
It is kept off for generic codegen.

Note the reason for the +sve even though they are in aarch64-sve.exp is if the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
the intrinsics end up being disabled because the -march is preferred over the
-mcpu even though the -mcpu comes later.

This prevents the tests from failing in such runs.

gcc/ChangeLog:

* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
* gcc.target/aarch64/sve/pred_clobber_4.c: New test.

gcc/config/aarch64/tuning_models/neoversen2.h
gcc/config/aarch64/tuning_models/neoversev1.h
gcc/config/aarch64/tuning_models/neoversev2.h
gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c [new file with mode: 0644]

index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644 (file)
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),    /* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),       /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS           /* stp_policy_model.  */
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644 (file)
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
   (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),   /* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),       /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644 (file)
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),    /* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),       /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS           /* stp_policy_model.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644 (file)
index 0000000..25129e8
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**     ...
+**     ptrue   p([1-3]).b, all
+**     cmplo   p0.h, p\1/z, z0.h, z[0-9]+.h
+**     ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644 (file)
index 0000000..58badb6
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**     ...
+**     ptrue   p([1-9][0-9]?).b, all
+**     cmplo   p0.h, p\1/z, z0.h, z[0-9]+.h
+**     ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644 (file)
index 0000000..c67c2bd
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**     ...
+**     ptrue   p([1-9][0-9]?).b, all
+**     cmplo   p0.h, p\1/z, z0.h, z[0-9]+.h
+**     ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644 (file)
index 0000000..c0120af
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**     ...
+**     ptrue   p0.b, all
+**     cmplo   p0.h, p0/z, z0.h, z[0-9]+.h
+**     ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}