AArch64: Add if-conversion target cost model [PR123017]

author Tamar Christina <tamar.christina@arm.com>

Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)
author Tamar Christina <tamar.christina@arm.com>
Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)
diff --git a/gcc/config/aarch64/aarch64-json-schema.h b/gcc/config/aarch64/aarch64-json-schema.h

index 0c1863fad18aaec9bb73f0df7310ff0554d1ffb4..32fb39300feb0c5a4aa68f6cf524c158de1bcfa1 100644 (file)
--- a/gcc/config/aarch64/aarch64-json-schema.h
+++ b/gcc/config/aarch64/aarch64-json-schema.h
@@ -222,7 +222,11 @@ static const char *schema_json = R"json(
         }
        }
      },
-    "branch_costs": { "predictable": "int", "unpredictable": "int" },
+    "branch_costs": {
+      "predictable": "int",
+      "unpredictable": "int",
+      "br_mispredict_factor": "int"
+    },
      "approx_modes": { "division": "int", "sqrt": "int", "recip_sqrt": "int" },
      "sve_width": "uint",
      "memmov_cost": {
diff --git a/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc b/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc

index cf31e539d3ba6b78a2c6bb069165855a30a20f35..882d8f799755a3d304cb3adf7d8b6b79f6d80443 100644 (file)
--- a/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc
+++ b/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc
@@ -237,6 +237,7 @@ parse_branch_costs (const json::object *jo, T &branch_costs)
  {
    PARSE_INTEGER_FIELD (jo, "predictable", branch_costs.predictable);
    PARSE_INTEGER_FIELD (jo, "unpredictable", branch_costs.unpredictable);
+  PARSE_INTEGER_FIELD (jo, "br_mispredict_factor", branch_costs.br_mispredict_factor);
  }
  
  template <typename T>
diff --git a/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc b/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc

index 6ffc4427a8bdb97a433f400dd65719adcf60156e..498be96ff33ef5e169882cd5a1cd0071411e6534 100644 (file)
--- a/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc
+++ b/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc
@@ -287,6 +287,7 @@ serialize_branch_costs (const T &branch_costs)
  
    SERIALIZE_INTEGER_FIELD (branch_costs_obj, "predictable", branch_costs.predictable);
    SERIALIZE_INTEGER_FIELD (branch_costs_obj, "unpredictable", branch_costs.unpredictable);
+  SERIALIZE_INTEGER_FIELD (branch_costs_obj, "br_mispredict_factor", branch_costs.br_mispredict_factor);
  
    return branch_costs_obj;
  }
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index a646689296a97999ee96848835951dce11ae0ba0..48d3a3de2356d385fc9612b5a0c546623bbc60fe 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -481,6 +481,7 @@ struct cpu_branch_cost
  {
    int predictable;    /* Predictable branch or optimizing for size.  */
    int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
+  int br_mispredict_factor;  /* Scale factor for cost of misprediction on branches.  */
  };
  
  /* Control approximate alternatives to certain FP operators.  */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index b7f56c8d560fc3e3ea2a403ae8cba3735770fdc4..ab902b1283ac00bbf6e57acdacc184913ccc3d3b 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -99,6 +99,7 @@
  #include "ipa-prop.h"
  #include "ipa-fnsummary.h"
  #include "hash-map.h"
+#include "ifcvt.h"
  #include "aarch64-sched-dispatch.h"
  #include "aarch64-json-tunings-printer.h"
  #include "aarch64-json-tunings-parser.h"
@@ -2269,6 +2270,115 @@ aarch64_instruction_selection (function * /* fun */, gimple_stmt_iterator *gsi)
    return true;
  }
  
+/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  If an explicit max was set then
+   honor it, otherwise apply a tuning specific scale to branch costs.  */
+
+static unsigned int
+aarch64_max_noce_ifcvt_seq_cost (edge e)
+{
+  bool predictable_p = predictable_edge_p (e);
+  if (predictable_p)
+    {
+      if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
+       return param_max_rtl_if_conversion_predictable_cost;
+    }
+  else
+    {
+      if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
+       return param_max_rtl_if_conversion_unpredictable_cost;
+    }
+
+  /* For modern machines with long speculative execution chains and modern
+     branch prediction the penalty of the branch misprediction needs to weighed
+     against the cost of executing the instructions unconditionally.  RISC cores
+     tend to not have that deep pipelines and so the cost of mispredictions can
+     be reasonably cheap.  */
+
+  bool speed_p = optimize_function_for_speed_p (cfun);
+  return BRANCH_COST (speed_p, predictable_p)
+        * aarch64_tune_params.branch_costs->br_mispredict_factor;
+}
+
+/* Return true if SEQ is a good candidate as a replacement for the
+   if-convertible sequence described in IF_INFO.  AArch64 has a range of
+   branchless statements and not all of them are potentially problematic.  For
+   instance a cset is usually beneficial whereas a csel is more complicated.  */
+
+static bool
+aarch64_noce_conversion_profitable_p (rtx_insn *seq,
+                                     struct noce_if_info *if_info)
+{
+  /* If not in a loop, just accept all if-conversion as the branch predictor
+     won't have anything to train on.  So assume sequences are essentially
+     unpredictable.  ce1 is in CFG mode still while ce2 is outside.  For ce2
+     accept limited if-conversion based on the shape of the instruction.  */
+  if (current_loops
+      && (!if_info->test_bb->loop_father
+         || !if_info->test_bb->loop_father->header))
+    return true;
+
+  /* For now we only care about csel speciifcally.  */
+  bool is_csel_p = true;
+
+  if (if_info->then_simple
+      && if_info->a != NULL_RTX
+      && !REG_P (if_info->a)
+      && !SUBREG_P (if_info->a))
+    is_csel_p = false;
+
+  if (if_info->else_simple
+      && if_info->b != NULL_RTX
+      && !REG_P (if_info->b)
+      && !SUBREG_P (if_info->b))
+    is_csel_p = false;
+
+  for (rtx_insn *insn = seq; is_csel_p && insn; insn = NEXT_INSN (insn))
+    {
+      rtx set = single_set (insn);
+      if (!set)
+       continue;
+
+      rtx src = SET_SRC (set);
+      rtx dst = SET_DEST (set);
+      machine_mode mode = GET_MODE (src);
+      if (GET_MODE_CLASS (mode) != MODE_INT)
+       continue;
+
+      switch (GET_CODE (src))
+       {
+       case PLUS:
+       case MINUS:
+         {
+           /* Likely a CINC.  */
+           if (REG_P (dst)
+               && (REG_P (XEXP (src, 0)) || SUBREG_P (XEXP (src, 0)))
+               && (XEXP (src, 1) == CONST1_RTX (mode)
+                   || XEXP (src, 1) == CONSTM1_RTX (mode)))
+             is_csel_p = false;
+           break;
+         }
+       default:
+         break;
+       }
+    }
+
+  /* For now accept every variant but csel unconditionally because CSEL usually
+     means you have to wait for two values to be computed.  */
+  if (!is_csel_p)
+    return true;
+
+  /* TODO: Add detecting of csel, cinc, cset etc and take profiling in
+          consideration.  For now this basic costing is enough to cover
+          most cases.  */
+  if (if_info->speed_p)
+    {
+      unsigned cost = seq_cost (seq, true);
+      return cost <= if_info->max_seq_cost;
+    }
+
+  return default_noce_conversion_profitable_p (seq, if_info);
+}
+
  /* Implement TARGET_HARD_REGNO_NREGS.  */
  
  static unsigned int
@@ -33453,6 +33563,13 @@ aarch64_libgcc_floating_mode_supported_p
  #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
    aarch64_atomic_assign_expand_fenv
  
+/* If-conversion costs.  */
+#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
+#define TARGET_MAX_NOCE_IFCVT_SEQ_COST aarch64_max_noce_ifcvt_seq_cost
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P aarch64_noce_conversion_profitable_p
+
  /* Section anchor support.  */
  
  #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/tuning_models/generic.h b/gcc/config/aarch64/tuning_models/generic.h

index b04f871cc3b5ceaead27122f1453bcadd6eb874c..76071c695e0bee89c94f047a366a77ec13fb3cbc 100644 (file)
--- a/gcc/config/aarch64/tuning_models/generic.h
+++ b/gcc/config/aarch64/tuning_models/generic.h
@@ -128,7 +128,8 @@ static const struct cpu_vector_cost generic_vector_cost =
  static const struct cpu_branch_cost generic_branch_cost =
  {
    1,  /* Predictable.  */
-  3   /* Unpredictable.  */
+  3,  /* Unpredictable.  */
+  6, /* br_mispredict_factor.  */
  };
  
  /* Generic approximation modes.  */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h b/gcc/config/aarch64/tuning_models/generic_armv8_a.h

index 1756c1d5bdefccef86de9f0ef7194cae7253cd27..29cf3a5a7c7afa92a27488e7106ed2b82db2bc59 100644 (file)
--- a/gcc/config/aarch64/tuning_models/generic_armv8_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
@@ -125,13 +125,6 @@ static const struct cpu_vector_cost generic_armv8_a_vector_cost =
    nullptr /* issue_info  */
  };
  
-/* Generic costs for branch instructions.  */
-static const struct cpu_branch_cost generic_armv8_a_branch_cost =
-{
-  1,  /* Predictable.  */
-  3   /* Unpredictable.  */
-};
-
  /* Generic approximation modes.  */
  static const cpu_approx_modes generic_armv8_a_approx_modes =
  {
@@ -158,7 +151,7 @@ static const struct tune_params generic_armv8_a_tunings =
    &generic_armv8_a_addrcost_table,
    &generic_armv8_a_regmove_cost,
    &generic_armv8_a_vector_cost,
-  &generic_armv8_a_branch_cost,
+  &generic_branch_cost,
    &generic_armv8_a_approx_modes,
    SVE_NOT_IMPLEMENTED, /* sve_width  */
    { 4, /* load_int.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_1.c b/gcc/testsuite/gcc.target/aarch64/pr123017_1.c

new file mode 100644 (file)

index 0000000..38e4334
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 2 } } */
+
+#include <stdint.h>
+#include <stddef.h>
+
+// Branchy: compiler can emit a short compare+branch.
+size_t branchy(size_t cur, size_t dist, size_t fallback, const uint8_t *src, uint8_t *dst, size_t len) {
+    if (dist < cur && dist < fallback) {
+        const uint8_t *p = src - dist;
+        for (size_t i = 0; i < len; ++i)
+         dst[i] = p[i];
+        return dist;
+    } else {
+        const uint8_t *p = src - fallback;
+        for (size_t i = 0; i < len; ++i)
+         dst[i] = p[i];
+        return fallback;
+    }
+}
+
+// CSEL-heavy: chain ternaries so both paths stay live; compilers emit cmps + csel.
+size_t selecty(size_t cur, size_t dist, size_t fallback, const uint8_t *src, uint8_t *dst, size_t len) {
+    // Compute both candidates unconditionally, then pick with ternaries.
+    size_t candA_off = dist;
+    size_t candB_off = fallback;
+    const uint8_t *candA = src - candA_off;
+    const uint8_t *candB = src - candB_off;
+
+    size_t useA = (dist < cur && dist < fallback);
+    const uint8_t *p = useA ? candA : candB;
+    size_t chosen = useA ? candA_off : candB_off;
+
+    for (size_t i = 0; i < len; ++i)
+      dst[i] = p[i];
+    return chosen;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_2.c b/gcc/testsuite/gcc.target/aarch64/pr123017_2.c

new file mode 100644 (file)

index 0000000..72adb41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 0 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; i+=4) {
+    out[i+0] = in[i+0] > threshold ? in[i+0] : in[i+0] + i;
+    out[i+1] = in[i+1] > threshold ? in[i+1] : in[i+1] + i;
+    out[i+2] = in[i+2] > threshold ? in[i+2] : in[i+2] + i;
+    out[i+3] = in[i+3] > threshold ? in[i+3] : in[i+3] + i;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_3.c b/gcc/testsuite/gcc.target/aarch64/pr123017_3.c

new file mode 100644 (file)

index 0000000..ec39d78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_3.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 0 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; ++i) {
+    int v = in[i];
+    if (v > threshold) {
+      int t = v * 3;
+      t += 7;
+      t ^= 0x55;
+      t *= 0x55;
+      t -= 0x5;
+      t &= 0xFE;
+      t ^= 0x55;
+      out[i] = t;
+    } else {
+      int t = v * 5;
+      t += 4;
+      t ^= 0x65;
+      t *= 0x35;
+      t -= 0x7;
+      t &= 0x0E;
+      t ^= 0x45;
+      out[i] = t;
+    }
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_4.c b/gcc/testsuite/gcc.target/aarch64/pr123017_4.c

new file mode 100644 (file)

index 0000000..b350d49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 0 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; ++i) {
+    int v = in[i];
+    if (v > threshold) {
+      int t = v * 3;
+      out[i] = t;
+    }
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_5.c b/gcc/testsuite/gcc.target/aarch64/pr123017_5.c

new file mode 100644 (file)

index 0000000..6d7cd08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 1 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; ++i) {
+    int v = in[i];
+    if (v > threshold) {
+      int t = v * 3;
+      t += 7;
+      out[i] = t;
+    } else {
+      out[i] = v;
+    }
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_6.c b/gcc/testsuite/gcc.target/aarch64/pr123017_6.c

new file mode 100644 (file)

index 0000000..0402c4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_6.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 0 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; ++i) {
+    int v = in[i];
+    if (v > threshold) {
+      int t = v * 3;
+      t += 7;
+      t ^= 0x55;
+      t *= 0x55;
+      out[i] = t;
+    } else {
+      out[i] = v;
+    }
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123017_7.c b/gcc/testsuite/gcc.target/aarch64/pr123017_7.c

new file mode 100644 (file)

index 0000000..048a69e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123017_7.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "csel\t" 0 } } */
+
+void f(const int *restrict in,
+   int *restrict out,
+   int n, int threshold)
+{
+  for (int i = 0; i < n; ++i) {
+    int v = in[i];
+    if (v > threshold) {
+      int t = v * 3;
+      t += 7;
+      t ^= 0x55;
+      t *= 0x55;
+      t -= 0x5;
+      t &= 0xFE;
+      t ^= 0x55;
+      out[i] = t;
+    } else {
+      out[i] = v;
+    }
+  }
+}
+
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Mon, 5 Jan 2026 14:25:41 +0000 (14:25 +0000)
gcc/config/aarch64/aarch64-json-schema.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/tuning_models/generic.h		patch \| blob \| blame \| history
gcc/config/aarch64/tuning_models/generic_armv8_a.h		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/pr123017_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr123017_7.c	[new file with mode: 0644]	patch \| blob