From: Kyrylo Tkachov Date: Fri, 28 Mar 2014 17:24:52 +0000 (+0000) Subject: [ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description X-Git-Tag: releases/gcc-4.9.0~248 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed6eb6dc9f67948da5f87b9c75d6d03bf696446e;p=thirdparty%2Fgcc.git [ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description * config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New. * config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue): Declare extern. * config/arm/cortex-a53.md: Add reservations and bypass for crypto instructions as well as AdvancedSIMD loads. From-SVN: r208910 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c023b902c05a..8434f0448ecb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2014-03-28 Kyrylo Tkachov + + * config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New. + * config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue): + Declare extern. + * config/arm/cortex-a53.md: Add reservations and bypass for crypto + instructions as well as AdvancedSIMD loads. + 2014-03-28 Kyrylo Tkachov * config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi): diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index a5ff6b4f9cea..3e6e2429f10c 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -23,6 +23,7 @@ #ifndef GCC_AARCH_COMMON_PROTOS_H #define GCC_AARCH_COMMON_PROTOS_H +extern int aarch_crypto_can_dual_issue (rtx, rtx); extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index c11f7e9544c7..af8fc9996fab 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -31,6 +31,42 @@ #include "c-family/c-common.h" #include "rtl.h" +/* In ARMv8-A there's a general expectation that AESE/AESMC + and AESD/AESIMC sequences of the form: + + AESE Vn, _ + AESMC Vn, Vn + + will issue both instructions in a single cycle on super-scalar + implementations. This function identifies such pairs. */ + +int +aarch_crypto_can_dual_issue (rtx producer, rtx consumer) +{ + rtx producer_src, consumer_src; + + producer = single_set (producer); + consumer = single_set (consumer); + + producer_src = producer ? SET_SRC (producer) : NULL; + consumer_src = consumer ? SET_SRC (consumer) : NULL; + + if (producer_src && consumer_src + && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC + && ((XINT (producer_src, 1) == UNSPEC_AESE + && XINT (consumer_src, 1) == UNSPEC_AESMC) + || (XINT (producer_src, 1) == UNSPEC_AESD + && XINT (consumer_src, 1) == UNSPEC_AESIMC))) + { + unsigned int regno = REGNO (SET_DEST (producer)); + + return REGNO (SET_DEST (consumer)) == regno + && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; + } + + return 0; +} + typedef struct { rtx_code search_code; diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md index deae8eba522b..b131c814d075 100644 --- a/gcc/config/arm/cortex-a53.md +++ b/gcc/config/arm/cortex-a53.md @@ -61,6 +61,11 @@ (define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53") +;; The Advanced SIMD pipelines. + +(define_cpu_unit "cortex_a53_simd0" "cortex_a53") +(define_cpu_unit "cortex_a53_simd1" "cortex_a53") + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ALU instructions. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -247,6 +252,39 @@ (eq_attr "type" "fdivd, fsqrtd")) "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ARMv8-A Cryptographic extensions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_crypto_aese" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "crypto_aese")) + "cortex_a53_simd0") + +(define_insn_reservation "cortex_a53_crypto_aesmc" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "crypto_aesmc")) + "cortex_a53_simd0 | cortex_a53_simd1") + +(define_insn_reservation "cortex_a53_crypto_sha1_fast" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "crypto_sha1_fast, crypto_sha256_fast")) + "cortex_a53_simd0") + +(define_insn_reservation "cortex_a53_crypto_sha1_xor" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "crypto_sha1_xor")) + "cortex_a53_simd0") + +(define_insn_reservation "cortex_a53_crypto_sha_slow" 5 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow")) + "cortex_a53_simd0") + +(define_bypass 0 "cortex_a53_crypto_aese" + "cortex_a53_crypto_aesmc" + "aarch_crypto_can_dual_issue") + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; VFP to/from core transfers. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -284,6 +322,16 @@ (eq_attr "type" "f_loadd")) "cortex_a53_slot0") +(define_insn_reservation "cortex_a53_f_load_2reg" 5 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "neon_load2_2reg_q")) + "(cortex_a53_slot_any+cortex_a53_ls)*2") + +(define_insn_reservation "cortex_a53_f_loadq" 5 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "neon_load1_1reg_q")) + "cortex_a53_slot_any+cortex_a53_ls") + (define_insn_reservation "cortex_a53_f_stores" 0 (and (eq_attr "tune" "cortexa53") (eq_attr "type" "f_stores")) @@ -307,3 +355,11 @@ cortex_a53_fdivs, cortex_a53_fdivd,\ cortex_a53_f2r") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Crude Advanced SIMD approximation. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_53_advsimd" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "is_neon_type" "yes")) + "cortex_a53_simd0")