From 1aa49fe0b18deb98e324cee18538d26b46829611 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Wed, 19 Mar 2025 09:34:09 -0700 Subject: [PATCH] aarch64: Add support for -mcpu=olympus This adds support for the NVIDIA Olympus core to the AArch64 backend. The initial patch does not add any special tuning decisions, and those may come later. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-cores.def (olympus): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Dhruv Chawla --- gcc/config/aarch64/aarch64-cores.def | 3 +++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 5ac81332b67..0e22d72976e 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -207,6 +207,9 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +/* NVIDIA ('N') cores. */ +AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1) + /* Generic Architecture Processors. */ AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1) AARCH64_CORE("generic-armv8-a", generic_armv8_a, cortexa53, V8A, (), generic_armv8_a, 0x0, 0x0, -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 54c65cbf68d..56a914f12b9 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,generic,generic_armv8_a,generic_armv9_a" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a2d327d7c99..515d91ac2e3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21745,7 +21745,7 @@ performance of the code. Permissible values for this option are: @samp{oryon-1}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace}, -@samp{neoverse-v3}, @samp{neoverse-v3ae}, @samp{neoverse-n3}, +@samp{neoverse-v3}, @samp{neoverse-v3ae}, @samp{neoverse-n3}, @samp{olympus}, @samp{cortex-a725}, @samp{cortex-x925}, @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, -- 2.47.2