]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AArch64: Fuse CMP+CSEL and CMP+CSET for -mcpu=neoverse-v2
authorJennifer Schmitz <jschmitz@nvidia.com>
Fri, 2 Aug 2024 14:58:32 +0000 (15:58 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 2 Aug 2024 14:58:32 +0000 (15:58 +0100)
According to the Neoverse V2 Software Optimization Guide (section 4.14), the
instruction pairs CMP+CSEL and CMP+CSET can be fused, which had not been
implemented so far. This patch implements and tests the two fusion pairs.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
There was also no non-noise impact on SPEC CPU2017 benchmark.
OK for mainline?

Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/

* config/aarch64/aarch64.cc (aarch_macro_fusion_pair_p): Implement
fusion logic.
* config/aarch64/aarch64-fusion-pairs.def (cmp+csel): New entry.
(cmp+cset): Likewise.
* config/aarch64/tuning_models/neoversev2.h: Enable logic in
field fusible_ops.

gcc/testsuite/

* gcc.target/aarch64/fuse_cmp_csel.c: New test.
* gcc.target/aarch64/fuse_cmp_cset.c: Likewise.

gcc/config/aarch64/aarch64-fusion-pairs.def
gcc/config/aarch64/aarch64.cc
gcc/config/aarch64/tuning_models/neoversev2.h
gcc/testsuite/gcc.target/aarch64/fuse_cmp_csel.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/fuse_cmp_cset.c [new file with mode: 0644]

index 9a43b0c806573c47b28bd0c625f87deca7d4b5c9..bf5e85ba8fe128721521505bd6b73b38c25d9f65 100644 (file)
@@ -37,5 +37,7 @@ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
 AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
 AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
 AARCH64_FUSION_PAIR ("addsub_2reg_const1", ADDSUB_2REG_CONST1)
+AARCH64_FUSION_PAIR ("cmp+csel", CMP_CSEL)
+AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
 
 #undef AARCH64_FUSION_PAIR
index 113ebb45cfda71da4b23df86f690888e4e7c89c5..9e12bd9711cdccfd9a503311257b125fb00ea32d 100644 (file)
@@ -27357,6 +27357,26 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
       && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
     return true;
 
+  /* Fuse CMP and CSEL/CSET.  */
+  if (prev_set && curr_set
+      && GET_CODE (SET_SRC (prev_set)) == COMPARE
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
+      && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
+    {
+      enum attr_type prev_type = get_attr_type (prev);
+      if ((prev_type == TYPE_ALUS_SREG || prev_type == TYPE_ALUS_IMM)
+         && ((aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSEL)
+              && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
+              && aarch64_reg_or_zero (XEXP (SET_SRC (curr_set), 1), VOIDmode)
+              && aarch64_reg_or_zero (XEXP (SET_SRC (curr_set), 2), VOIDmode)
+              && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (curr_set), 1))))
+             || (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSET)
+                 && GET_RTX_CLASS (GET_CODE (SET_SRC (curr_set)))
+                    == RTX_COMPARE
+                 && REG_P (SET_DEST (curr_set)))))
+       return true;
+    }
+
   /* Fuse flag-setting ALU instructions and conditional branch.  */
   if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
       && any_condjump_p (curr))
index c9c3019dd01a98bc20a76e8455fb59ff24a9ff6c..bd259a37e9c9562d354f04fde4f0c6c20f616414 100644 (file)
@@ -221,7 +221,10 @@ static const struct tune_params neoversev2_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   5, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC
+   | AARCH64_FUSE_CMP_BRANCH
+   | AARCH64_FUSE_CMP_CSEL
+   | AARCH64_FUSE_CMP_CSET), /* fusible_ops  */
   "32:16",     /* function_align.  */
   "4",         /* jump_align.  */
   "32:16",     /* loop_align.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/fuse_cmp_csel.c b/gcc/testsuite/gcc.target/aarch64/fuse_cmp_csel.c
new file mode 100644 (file)
index 0000000..85f302b
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, le
+**     ret
+*/
+int f1 (int a, int b, int c)
+{
+  int cmp = a > b;
+  int add1 = c + 3;
+  int add2 = c + 8;
+  return cmp ? add1 : add2;
+}
+
+/*
+** f2:
+**     ...
+**     cmp     x[0-9]+, x[0-9]+
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, le
+**     ret
+*/
+long long f2 (long long a, long long b, long long c)
+{
+ long long cmp = a > b;
+  long long add1 = c + 3;
+  long long add2 = c + 8;
+  return cmp ? add1 : add2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fuse_cmp_cset.c b/gcc/testsuite/gcc.target/aarch64/fuse_cmp_cset.c
new file mode 100644 (file)
index 0000000..04f1ce2
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+**     cmp     w[0-9]+, w[0-9]+
+**     cset    w[0-9]+, gt
+**     ...
+*/
+int g;
+int f1 (int a, int b)
+{
+  int cmp = a > b;
+  g = cmp + 1;
+  return cmp;
+}
+
+/*
+** f2:
+**     cmp     x[0-9]+, x[0-9]+
+**     cset    x[0-9]+, gt
+**     ...
+*/
+long long h;
+long long f2 (long long a, long long b)
+{
+  long long cmp = a > b;
+  h = cmp + 1;
+  return cmp;
+}