]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Tweak the pipeline model for Exynos M1
authorEvandro Menezes <e.menezes@samsung.com>
Tue, 23 Feb 2016 21:31:00 +0000 (21:31 +0000)
committerEvandro Menezes <evandro@gcc.gnu.org>
Tue, 23 Feb 2016 21:31:00 +0000 (21:31 +0000)
gcc/
* config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
and AESMC pairs.
* config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
and add bypass for AES{D,E} and AESMC pairs.

From-SVN: r233647

gcc/ChangeLog
gcc/config/aarch64/aarch64.c
gcc/config/arm/exynos-m1.md

index 22dd022b6bab89ee39fac2659c9ac35627d49cdc..07b50b5d3a1d9ed669c12de77945ab87793cdc93 100644 (file)
@@ -1,3 +1,10 @@
+2016-02-23  Evandro Menezes  <e.menezes@samsung.com>
+
+       * config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
+       and add bypass for AES{D,E} and AESMC pairs.
+       * config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
+       and AESMC pairs.
+
 2016-02-23  Evandro Menezes  <e.menezes@samsung.com>
 
         * config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton
index dc3dfeac7ad25e0487296f3f8875806187f752c2..6dc8330200e9b353edc17483581b6daf2b97faaa 100644 (file)
@@ -526,7 +526,7 @@ static const struct tune_params exynosm1_tunings =
   &generic_branch_cost,
   4,   /* memmov_cost  */
   3,   /* issue_rate  */
-  (AARCH64_FUSE_NOTHING), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
   4,   /* function_align.  */
   4,   /* jump_align.  */
   4,   /* loop_align.  */
index 2f52b22fb50bd3864f4ccd713eb46ea9ffaec64b..318b151d64697001d0082295e54486a2ffcaa6e5 100644 (file)
          (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
            (const_string "neon_load4_all")
 
-         (eq_attr "type" "f_stores, f_stored,\
-                          neon_stp, neon_stp_q")
-           (const_string "neon_store")
-
          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
            (const_string "neon_store1_1")
 
 (define_insn_reservation
   "exynos_m1_neon_store" 1
   (and (eq_attr "tune" "exynosm1")
-       (eq_attr "exynos_m1_neon_type" "neon_store"))
-  "(em1_fst, em1_st)")
+       (eq_attr "type" "f_stores, f_stored, neon_stp"))
+  "em1_sfst")
+
+(define_insn_reservation
+  "exynos_m1_neon_store_q" 3
+  (and (eq_attr "tune" "exynosm1")
+       (eq_attr "type" "neon_stp_q"))
+  "(em1_sfst * 2)")
 
 (define_insn_reservation
   "exynos_m1_neon_store1_1" 1
   "exynos_m1_neon_store1_one" 7
   (and (eq_attr "tune" "exynosm1")
        (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
-  "(em1_fst, em1_st)")
+  "em1_sfst")
 
 (define_insn_reservation
   "exynos_m1_neon_store2" 7
 
 ;; Pre-decrement and post-increment addressing modes update the register quickly.
 ;; TODO: figure out how to tell the addressing mode register from the loaded one.
-(define_bypass 1 "exynos_m1_store*" "exynos_m1_store*")
+(define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
+                "exynos_m1_store*, exynos_m1_neon_store*,
+                 exynos_m1_load*, exynos_m1_neon_load*")
 
 ;; MLAs can feed other MLAs quickly.
 (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
 (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
                 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
                  exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
-
 (define_bypass 3 "exynos_m1_fp_add"
                 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
 (define_bypass 3 "exynos_m1_neon_fp_add"
                 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
                  exynos_m1_crypto_poly*")
 
+;; AES{D,E}/AESMC pairs can feed each other instantly.
+(define_bypass 0 "exynos_m1_crypto_simple"
+                "exynos_m1_crypto_simple"
+                "aarch_crypto_can_dual_issue")
+
 ;; Predicted branches take no time, but mispredicted ones take forever anyway.
 (define_bypass 1 "exynos_m1_*"
                 "exynos_m1_call, exynos_m1_branch")