]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add A64FX machine model
authorQian Jianhua <qianjh@cn.fujitsu.com>
Mon, 3 Aug 2020 13:01:40 +0000 (14:01 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Mon, 3 Aug 2020 13:01:40 +0000 (14:01 +0100)
This patch add support for Fujitsu A64FX, as the first step of adding
A64FX machine model.

A64FX is used in FUJITSU Supercomputer PRIMEHPC FX1000,
PRIMEHPC FX700, and supercomputer Fugaku.
The official microarchitecture information of A64FX can be read at
https://github.com/fujitsu/A64FX.

2020-08-03  Qian jianhua  <qianjh@cn.fujitsu.com>

gcc/
* config/aarch64/aarch64-cores.def (a64fx): New core.
* config/aarch64/aarch64-tune.md: Regenerated.
* config/aarch64/aarch64.c (a64fx_prefetch_tune, a64fx_tunings): New.
* doc/invoke.texi: Add a64fx to the list.

gcc/config/aarch64/aarch64-cores.def
gcc/config/aarch64/aarch64-tune.md
gcc/config/aarch64/aarch64.c
gcc/doc/invoke.texi

index bc89a14cb9e15640aa2e8ebc065da6fbd580e6bd..a7dde38d7687049825aec4eb9446e76db84cd9c0 100644 (file)
@@ -119,6 +119,9 @@ AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, 8_2A,  AARCH64_FL_FOR_
 AARCH64_CORE("octeontx2f95n",  octeontx2f95n,  cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
 AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
 
+/* Fujitsu ('F') cores. */
+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
 
index 841af9da10e12d52d30ad77920986a8d2b55eefb..ebf97c38fbda447ac004bc98573f8353301d2004 100644 (file)
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-       "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+       "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
        (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
index a108119ba3e1160809c48a80897c0c0ac175b271..7c3ab3eeb1f44e4908a49f4b5aa60bfa9545254d 100644 (file)
@@ -868,6 +868,17 @@ static const cpu_prefetch_tune xgene1_prefetch_tune =
   -1                   /* default_opt_level  */
 };
 
+static const cpu_prefetch_tune a64fx_prefetch_tune =
+{
+  8,                   /* num_slots  */
+  64,                  /* l1_cache_size  */
+  256,                 /* l1_cache_line_size  */
+  32768,               /* l2_cache_size  */
+  true,                        /* prefetch_dynamic_strides */
+  -1,                  /* minimum_stride */
+  -1                   /* default_opt_level  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -1325,6 +1336,32 @@ static const struct tune_params neoversen1_tunings =
   &generic_prefetch_tune
 };
 
+static const struct tune_params a64fx_tunings =
+{
+  &generic_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_512, /* sve_width  */
+  4, /* memmov_cost  */
+  7, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32",        /* function_align.  */
+  "16",        /* jump_align.  */
+  "32",        /* loop_align.  */
+  4,   /* int_reassoc_width.  */
+  2,   /* fp_reassoc_width.  */
+  2,   /* vec_reassoc_width.  */
+  2,   /* min_div_recip_mul_sf.  */
+  2,   /* min_div_recip_mul_df.  */
+  0,   /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  &a64fx_prefetch_tune
+};
+
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
 {
index 060d73ed0c506814a56974e0ce795e2fb63ce055..afe09a93b1d2cb30197100a881dce17b02190a0a 100644 (file)
@@ -17354,6 +17354,7 @@ performance of the code.  Permissible values for this option are:
 @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
 @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
 @samp{octeontx2f95mm}
+@samp{a64fx},
 @samp{thunderx}, @samp{thunderxt88},
 @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
 @samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},