]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add support for NVIDIA GB10
authorKyrylo Tkachov <ktkachov@nvidia.com>
Mon, 2 Jun 2025 14:08:12 +0000 (07:08 -0700)
committerKyrylo Tkachov <ktkachov@nvidia.com>
Thu, 10 Jul 2025 08:24:54 +0000 (10:24 +0200)
This adds support for -mcpu=gb10.  This is a big.LITTLE configuration
involving Cortex-X925 and Cortex-A725 cores.  The appropriate MIDR numbers
are added to detect them in -mcpu=native.  We did not add an
-mcpu=cortex-x925.cortex-a725 option because GB10 does include the crypto
instructions which we want on by default, and the current convention is to not
enable such extensions for Arm Cortex cores in -mcpu where they are optional
in the IP.

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>
gcc/

* config/aarch64/aarch64-cores.def (gb10): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (AArch64 Options): Document the above.

(cherry picked from commit 9ff6ade24cae5a51d1ee9d9ad4b4a5c682e4a5ed)

gcc/config/aarch64/aarch64-cores.def
gcc/config/aarch64/aarch64-tune.md
gcc/doc/invoke.texi

index 24b7cd362aaf42f47b46f249af9394f1ac58c33b..8040409d283083df55dbb2d685ca56da103e2e3f 100644 (file)
@@ -226,6 +226,9 @@ AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG,
 /* NVIDIA ('N') cores. */
 AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
 
+/* Armv9-A big.LITTLE processors.  */
+AARCH64_CORE("gb10",  gb10, cortexa57, V9_2A,  (SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, MEMTAG, PROFILE), cortexx925, 0x41, AARCH64_BIG_LITTLE (0xd85, 0xd87), -1)
+
 /* Generic Architecture Processors.  */
 AARCH64_CORE("generic",  generic, cortexa53, V8A,  (), generic, 0x0, 0x0, -1)
 AARCH64_CORE("generic-armv8-a",  generic_armv8_a, cortexa53, V8A, (), generic_armv8_a, 0x0, 0x0, -1)
index 982074c2c21e375e23e45a6b83d58413cd4236ce..40ff147d6f8366a1adc46ba325a68212cc2b4ac8 100644 (file)
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-       "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a"
+       "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,gb10,generic,generic_armv8_a,generic_armv9_a"
        (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
index 14750aed64db5ecdbe33485d66e49d8cb87bcc06..eca871b93d9769160319341c242deeceb6432c28 100644 (file)
@@ -22027,7 +22027,7 @@ performance of the code.  Permissible values for this option are:
 The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},
 @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
-@samp{apple-m1}, @samp{apple-m2}, @samp{apple-m3} specify that GCC
+@samp{apple-m1}, @samp{apple-m2}, @samp{apple-m3}, @samp{gb10} specify that GCC
 should tune for a big.LITTLE system.
 
 The value @samp{neoverse-512tvb} specifies that GCC should tune