]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add A64FX machine model
authorQian Jianhua <qianjh@cn.fujitsu.com>
Fri, 7 Aug 2020 09:39:40 +0000 (10:39 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 7 Aug 2020 09:39:40 +0000 (10:39 +0100)
This patch add support for Fujitsu A64FX, as the first step of adding
A64FX machine model.

A64FX is used in FUJITSU Supercomputer PRIMEHPC FX1000,
PRIMEHPC FX700, and supercomputer Fugaku.
The official microarchitecture information of A64FX can be read at
https://github.com/fujitsu/A64FX.

2020-08-07  Qian jianhua  <qianjh@cn.fujitsu.com>

gcc/
* config/aarch64/aarch64-cores.def (a64fx): New core.
* config/aarch64/aarch64-tune.md: Regenerated.
* config/aarch64/aarch64.c (a64fx_prefetch_tune, a64fx_tunings): New.
* doc/invoke.texi: Add a64fx to the list.

(cherry picked from commit 02f21aea0679c5cac094a3f575e839d44cb57a39)

gcc/config/aarch64/aarch64-cores.def
gcc/config/aarch64/aarch64-tune.md
gcc/config/aarch64/aarch64.c
gcc/doc/invoke.texi

index c9be89e1a4c8a63fc372a64e265dbb46e1d0fdbb..9214686d9d18a7ce6c75e94a2b3860e69886f77a 100644 (file)
@@ -104,6 +104,9 @@ AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_F
 AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa53, 0x41, 0xd4a, -1)
 
+/* Fujitsu ('F') cores. */
+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
 
index b12d935342e896ce07471616538b15a5dee919bf..a3bd30754ead28eb0554d5d274bf1e8fdc6b1eb0 100644 (file)
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,neoversee1,tsv110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,neoversee1,a64fx,tsv110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
        (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
index e7af8e5e67157487666cbecea74b985b3378f29f..bcf2777313ecc17ead422273958b99a2441b046f 100644 (file)
@@ -681,6 +681,17 @@ static const cpu_prefetch_tune xgene1_prefetch_tune =
   -1                   /* default_opt_level  */
 };
 
+static const cpu_prefetch_tune a64fx_prefetch_tune =
+{
+  8,                   /* num_slots  */
+  64,                  /* l1_cache_size  */
+  256,                 /* l1_cache_line_size  */
+  32768,               /* l2_cache_size  */
+  true,                        /* prefetch_dynamic_strides */
+  -1,                  /* minimum_stride */
+  -1                   /* default_opt_level  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -1111,6 +1122,32 @@ static const struct tune_params neoversen1_tunings =
   &generic_prefetch_tune
 };
 
+static const struct tune_params a64fx_tunings =
+{
+  &generic_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_512, /* sve_width  */
+  4, /* memmov_cost  */
+  7, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32",        /* function_align.  */
+  "16",        /* jump_align.  */
+  "32",        /* loop_align.  */
+  4,   /* int_reassoc_width.  */
+  2,   /* fp_reassoc_width.  */
+  2,   /* vec_reassoc_width.  */
+  2,   /* min_div_recip_mul_sf.  */
+  2,   /* min_div_recip_mul_df.  */
+  0,   /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  &a64fx_prefetch_tune
+};
+
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
 {
index 1612ee75ce34b288ecdeb35be41b79a8bc77f845..93436dec08f6e9e374414793a70f4790edc5322a 100644 (file)
@@ -15853,7 +15853,9 @@ performance of the code.  Permissible values for this option are:
 @samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-e1},@samp{neoverse-n1},@samp{qdf24xx}, @samp{saphira},
 @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx},
-@samp{octeontx81},  @samp{octeontx83}, @samp{thunderx}, @samp{thunderxt88},
+@samp{octeontx81},  @samp{octeontx83},
+@samp{a64fx},
+@samp{thunderx}, @samp{thunderxt88},
 @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
 @samp{thunderxt83}, @samp{thunderx2t99}, @samp{zeus},
 @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},