GCC 15 added two new fusions CMP+CSEL and CMP+CSET.
This patch enables them for cores that support based on their Software
Optimization Guides and generically on Armv9-A. Even if a core does not
support it there's no negative performance impact.
gcc/ChangeLog:
* config/aarch64/aarch64-fusion-pairs.def (AARCH64_FUSE_NEOVERSE_BASE):
New.
* config/aarch64/tuning_models/neoverse512tvb.h: Use it.
* config/aarch64/tuning_models/neoversen2.h: Use it.
* config/aarch64/tuning_models/neoversen3.h: Use it.
* config/aarch64/tuning_models/neoversev1.h: Use it.
* config/aarch64/tuning_models/neoversev2.h: Use it.
* config/aarch64/tuning_models/neoversev3.h: Use it.
* config/aarch64/tuning_models/neoversev3ae.h: Use it.
* config/aarch64/tuning_models/cortexx925.h: Add fusions.
* config/aarch64/tuning_models/generic_armv9_a.h: Add fusions.
/* Baseline fusion settings suitable for all cores. */
#define AARCH64_FUSE_BASE (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC)
+/* Baseline fusion settings suitable for all Neoverse cores. */
+#define AARCH64_FUSE_NEOVERSE_BASE (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL \
+ | AARCH64_FUSE_CMP_CSET)
+
#define AARCH64_FUSE_MOVK (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK)
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ (AARCH64_FUSE_BASE
+ | AARCH64_FUSE_CMP_CSEL
+ | AARCH64_FUSE_CMP_CSET), /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ (AARCH64_FUSE_BASE
+ | AARCH64_FUSE_CMP_CSEL
+ | AARCH64_FUSE_CMP_CSET), /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
1 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL | AARCH64_FUSE_CMP_CSET), /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */