From b10baa9584d009f3725083c97c7d44f88749abe0 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Fri, 28 Mar 2014 17:22:47 +0000 Subject: [PATCH] [ARM/AArch64][1/2] Crypto intrinsics tuning for Cortex-A53 - "type" Attribute restructuring * config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi): Use crypto_aese type. (aarch64_crypto_aesv16qi): Use crypto_aesmc type. * config/arm/arm.md (is_neon_type): Replace crypto_aes with crypto_aese, crypto_aesmc. Move to types.md. * config/arm/types.md (crypto_aes): Split into crypto_aese, crypto_aesmc. * config/arm/iterators.md (crypto_type): Likewise. From-SVN: r208908 --- gcc/ChangeLog | 11 +++ gcc/config/aarch64/aarch64-simd.md | 4 +- gcc/config/arm/arm.md | 99 --------------------------- gcc/config/arm/iterators.md | 4 +- gcc/config/arm/types.md | 105 ++++++++++++++++++++++++++++- 5 files changed, 118 insertions(+), 105 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1ca072e61429..c023b902c05a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2014-03-28 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi): + Use crypto_aese type. + (aarch64_crypto_aesv16qi): Use crypto_aesmc type. + * config/arm/arm.md (is_neon_type): Replace crypto_aes with + crypto_aese, crypto_aesmc. Move to types.md. + * config/arm/types.md (crypto_aes): Split into crypto_aese, + crypto_aesmc. + * config/arm/iterators.md (crypto_type): Likewise. + 2014-03-28 Jan Hubicka * cgraph.c: Include expr.h and tree-dfa.h. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6048d605c72e..73aee2c3df09 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4250,7 +4250,7 @@ CRYPTO_AES))] "TARGET_SIMD && TARGET_CRYPTO" "aes\\t%0.16b, %2.16b" - [(set_attr "type" "crypto_aes")] + [(set_attr "type" "crypto_aese")] ) (define_insn "aarch64_crypto_aesv16qi" @@ -4259,7 +4259,7 @@ CRYPTO_AESMC))] "TARGET_SIMD && TARGET_CRYPTO" "aes\\t%0.16b, %1.16b" - [(set_attr "type" "crypto_aes")] + [(set_attr "type" "crypto_aesmc")] ) ;; sha1 diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 2ddda020863a..4df24a236a24 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -262,105 +262,6 @@ ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) -; YES if the "type" attribute assigned to the insn denotes an -; Advanced SIMD instruction, NO otherwise. -(define_attr "is_neon_type" "yes,no" - (if_then_else (eq_attr "type" - "neon_add, neon_add_q, neon_add_widen, neon_add_long,\ - neon_qadd, neon_qadd_q, neon_add_halve, neon_add_halve_q,\ - neon_add_halve_narrow_q,\ - neon_sub, neon_sub_q, neon_sub_widen, neon_sub_long, neon_qsub,\ - neon_qsub_q, neon_sub_halve, neon_sub_halve_q,\ - neon_sub_halve_narrow_q,\ - neon_abs, neon_abs_q, neon_neg, neon_neg_q, neon_qneg,\ - neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\ - neon_abd_long, neon_minmax, neon_minmax_q, neon_compare,\ - neon_compare_q, neon_compare_zero, neon_compare_zero_q,\ - neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\ - neon_reduc_add_q, neon_reduc_add_long, neon_reduc_add_acc,\ - neon_reduc_add_acc_q, neon_reduc_minmax, neon_reduc_minmax_q,\ - neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ - neon_shift_imm, neon_shift_imm_q, neon_shift_imm_narrow_q,\ - neon_shift_imm_long, neon_shift_reg, neon_shift_reg_q,\ - neon_shift_acc, neon_shift_acc_q, neon_sat_shift_imm,\ - neon_sat_shift_imm_q, neon_sat_shift_imm_narrow_q,\ - neon_sat_shift_reg, neon_sat_shift_reg_q,\ - neon_ins, neon_ins_q, neon_move, neon_move_q, neon_move_narrow_q,\ - neon_permute, neon_permute_q, neon_zip, neon_zip_q, neon_tbl1,\ - neon_tbl1_q, neon_tbl2, neon_tbl2_q, neon_tbl3, neon_tbl3_q,\ - neon_tbl4, neon_tbl4_q, neon_bsl, neon_bsl_q, neon_cls,\ - neon_cls_q, neon_cnt, neon_cnt_q, neon_dup, neon_dup_q,\ - neon_ext, neon_ext_q, neon_rbit, neon_rbit_q,\ - neon_rev, neon_rev_q, neon_mul_b, neon_mul_b_q, neon_mul_h,\ - neon_mul_h_q, neon_mul_s, neon_mul_s_q, neon_mul_b_long,\ - neon_mul_h_long, neon_mul_s_long, neon_mul_d_long, neon_mul_h_scalar,\ - neon_mul_h_scalar_q, neon_mul_s_scalar, neon_mul_s_scalar_q,\ - neon_mul_h_scalar_long, neon_mul_s_scalar_long, neon_sat_mul_b,\ - neon_sat_mul_b_q, neon_sat_mul_h, neon_sat_mul_h_q,\ - neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_b_long,\ - neon_sat_mul_h_long, neon_sat_mul_s_long, neon_sat_mul_h_scalar,\ - neon_sat_mul_h_scalar_q, neon_sat_mul_s_scalar,\ - neon_sat_mul_s_scalar_q, neon_sat_mul_h_scalar_long,\ - neon_sat_mul_s_scalar_long, neon_mla_b, neon_mla_b_q, neon_mla_h,\ - neon_mla_h_q, neon_mla_s, neon_mla_s_q, neon_mla_b_long,\ - neon_mla_h_long, neon_mla_s_long, neon_mla_h_scalar,\ - neon_mla_h_scalar_q, neon_mla_s_scalar, neon_mla_s_scalar_q,\ - neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ - neon_sat_mla_b_long, neon_sat_mla_h_long,\ - neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ - neon_sat_mla_s_scalar_long,\ - neon_to_gp, neon_to_gp_q, neon_from_gp, neon_from_gp_q,\ - neon_ldr, neon_load1_1reg, neon_load1_1reg_q, neon_load1_2reg,\ - neon_load1_2reg_q, neon_load1_3reg, neon_load1_3reg_q,\ - neon_load1_4reg, neon_load1_4reg_q, neon_load1_all_lanes,\ - neon_load1_all_lanes_q, neon_load1_one_lane, neon_load1_one_lane_q,\ - neon_load2_2reg, neon_load2_2reg_q, neon_load2_4reg,\ - neon_load2_4reg_q, neon_load2_all_lanes, neon_load2_all_lanes_q,\ - neon_load2_one_lane, neon_load2_one_lane_q,\ - neon_load3_3reg, neon_load3_3reg_q, neon_load3_all_lanes,\ - neon_load3_all_lanes_q, neon_load3_one_lane, neon_load3_one_lane_q,\ - neon_load4_4reg, neon_load4_4reg_q, neon_load4_all_lanes,\ - neon_load4_all_lanes_q, neon_load4_one_lane, neon_load4_one_lane_q,\ - neon_str, neon_store1_1reg, neon_store1_1reg_q, neon_store1_2reg,\ - neon_store1_2reg_q, neon_store1_3reg, neon_store1_3reg_q,\ - neon_store1_4reg, neon_store1_4reg_q, neon_store1_one_lane,\ - neon_store1_one_lane_q, neon_store2_2reg, neon_store2_2reg_q,\ - neon_store2_4reg, neon_store2_4reg_q, neon_store2_one_lane,\ - neon_store2_one_lane_q, neon_store3_3reg, neon_store3_3reg_q,\ - neon_store3_one_lane, neon_store3_one_lane_q, neon_store4_4reg,\ - neon_store4_4reg_q, neon_store4_one_lane, neon_store4_one_lane_q,\ - neon_fp_abd_s, neon_fp_abd_s_q, neon_fp_abd_d, neon_fp_abd_d_q,\ - neon_fp_addsub_s, neon_fp_addsub_s_q, neon_fp_addsub_d,\ - neon_fp_addsub_d_q, neon_fp_compare_s, neon_fp_compare_s_q,\ - neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\ - neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\ - neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\ - neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, - neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ - neon_fp_reduc_minmax_d_q,\ - neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ - neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ - neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ - neon_fp_round_s, neon_fp_round_s_q, neon_fp_recpe_s,\ - neon_fp_recpe_s_q,\ - neon_fp_recpe_d, neon_fp_recpe_d_q, neon_fp_recps_s,\ - neon_fp_recps_s_q, neon_fp_recps_d, neon_fp_recps_d_q,\ - neon_fp_recpx_s, neon_fp_recpx_s_q, neon_fp_recpx_d,\ - neon_fp_recpx_d_q, neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ - neon_fp_rsqrte_d, neon_fp_rsqrte_d_q, neon_fp_rsqrts_s,\ - neon_fp_rsqrts_s_q, neon_fp_rsqrts_d, neon_fp_rsqrts_d_q,\ - neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar,\ - neon_fp_mul_s_scalar_q, neon_fp_mul_d, neon_fp_mul_d_q,\ - neon_fp_mul_d_scalar_q, neon_fp_mla_s, neon_fp_mla_s_q,\ - neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q, neon_fp_mla_d,\ - neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, neon_fp_sqrt_s,\ - neon_fp_sqrt_s_q, neon_fp_sqrt_d, neon_fp_sqrt_d_q,\ - neon_fp_div_s, neon_fp_div_s_q, neon_fp_div_d, neon_fp_div_d_q, crypto_aes,\ - crypto_sha1_xor, crypto_sha1_fast, crypto_sha1_slow, crypto_sha256_fast,\ - crypto_sha256_slow") - (const_string "yes") - (const_string "no"))) - ; condition codes: this one is used by final_prescan_insn to speed up ; conditionalizing instructions. It saves having to scan the rtl to see if ; it uses or alters the condition codes. diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 33e09e4ce243..aebab9340498 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -551,8 +551,8 @@ (UNSPEC_SHA256SU1 "sha256su1")]) (define_int_attr crypto_type - [(UNSPEC_AESE "crypto_aes") (UNSPEC_AESD "crypto_aes") - (UNSPEC_AESMC "crypto_aes") (UNSPEC_AESIMC "crypto_aes") + [(UNSPEC_AESE "crypto_aese") (UNSPEC_AESD "crypto_aese") + (UNSPEC_AESMC "crypto_aesmc") (UNSPEC_AESIMC "crypto_aesmc") (UNSPEC_SHA1C "crypto_sha1_slow") (UNSPEC_SHA1P "crypto_sha1_slow") (UNSPEC_SHA1M "crypto_sha1_slow") (UNSPEC_SHA1SU1 "crypto_sha1_fast") (UNSPEC_SHA1SU0 "crypto_sha1_xor") (UNSPEC_SHA256H "crypto_sha256_slow") diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index cc39cd11f4a4..efbf7a753274 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -524,7 +524,8 @@ ; ; The classification below is for Crypto instructions. ; -; crypto_aes +; crypto_aese +; crypto_aesmc ; crypto_sha1_xor ; crypto_sha1_fast ; crypto_sha1_slow @@ -1051,7 +1052,8 @@ neon_fp_div_d,\ neon_fp_div_d_q,\ \ - crypto_aes,\ + crypto_aese,\ + crypto_aesmc,\ crypto_sha1_xor,\ crypto_sha1_fast,\ crypto_sha1_slow,\ @@ -1075,3 +1077,102 @@ "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") (const_string "yes") (const_string "no"))) + +; YES if the "type" attribute assigned to the insn denotes an +; Advanced SIMD instruction, NO otherwise. +(define_attr "is_neon_type" "yes,no" + (if_then_else (eq_attr "type" + "neon_add, neon_add_q, neon_add_widen, neon_add_long,\ + neon_qadd, neon_qadd_q, neon_add_halve, neon_add_halve_q,\ + neon_add_halve_narrow_q,\ + neon_sub, neon_sub_q, neon_sub_widen, neon_sub_long, neon_qsub,\ + neon_qsub_q, neon_sub_halve, neon_sub_halve_q,\ + neon_sub_halve_narrow_q,\ + neon_abs, neon_abs_q, neon_neg, neon_neg_q, neon_qneg,\ + neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\ + neon_abd_long, neon_minmax, neon_minmax_q, neon_compare,\ + neon_compare_q, neon_compare_zero, neon_compare_zero_q,\ + neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\ + neon_reduc_add_q, neon_reduc_add_long, neon_reduc_add_acc,\ + neon_reduc_add_acc_q, neon_reduc_minmax, neon_reduc_minmax_q,\ + neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ + neon_shift_imm, neon_shift_imm_q, neon_shift_imm_narrow_q,\ + neon_shift_imm_long, neon_shift_reg, neon_shift_reg_q,\ + neon_shift_acc, neon_shift_acc_q, neon_sat_shift_imm,\ + neon_sat_shift_imm_q, neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg, neon_sat_shift_reg_q,\ + neon_ins, neon_ins_q, neon_move, neon_move_q, neon_move_narrow_q,\ + neon_permute, neon_permute_q, neon_zip, neon_zip_q, neon_tbl1,\ + neon_tbl1_q, neon_tbl2, neon_tbl2_q, neon_tbl3, neon_tbl3_q,\ + neon_tbl4, neon_tbl4_q, neon_bsl, neon_bsl_q, neon_cls,\ + neon_cls_q, neon_cnt, neon_cnt_q, neon_dup, neon_dup_q,\ + neon_ext, neon_ext_q, neon_rbit, neon_rbit_q,\ + neon_rev, neon_rev_q, neon_mul_b, neon_mul_b_q, neon_mul_h,\ + neon_mul_h_q, neon_mul_s, neon_mul_s_q, neon_mul_b_long,\ + neon_mul_h_long, neon_mul_s_long, neon_mul_d_long, neon_mul_h_scalar,\ + neon_mul_h_scalar_q, neon_mul_s_scalar, neon_mul_s_scalar_q,\ + neon_mul_h_scalar_long, neon_mul_s_scalar_long, neon_sat_mul_b,\ + neon_sat_mul_b_q, neon_sat_mul_h, neon_sat_mul_h_q,\ + neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_b_long,\ + neon_sat_mul_h_long, neon_sat_mul_s_long, neon_sat_mul_h_scalar,\ + neon_sat_mul_h_scalar_q, neon_sat_mul_s_scalar,\ + neon_sat_mul_s_scalar_q, neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long, neon_mla_b, neon_mla_b_q, neon_mla_h,\ + neon_mla_h_q, neon_mla_s, neon_mla_s_q, neon_mla_b_long,\ + neon_mla_h_long, neon_mla_s_long, neon_mla_h_scalar,\ + neon_mla_h_scalar_q, neon_mla_s_scalar, neon_mla_s_scalar_q,\ + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ + neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ + neon_to_gp, neon_to_gp_q, neon_from_gp, neon_from_gp_q,\ + neon_ldr, neon_load1_1reg, neon_load1_1reg_q, neon_load1_2reg,\ + neon_load1_2reg_q, neon_load1_3reg, neon_load1_3reg_q,\ + neon_load1_4reg, neon_load1_4reg_q, neon_load1_all_lanes,\ + neon_load1_all_lanes_q, neon_load1_one_lane, neon_load1_one_lane_q,\ + neon_load2_2reg, neon_load2_2reg_q, neon_load2_4reg,\ + neon_load2_4reg_q, neon_load2_all_lanes, neon_load2_all_lanes_q,\ + neon_load2_one_lane, neon_load2_one_lane_q,\ + neon_load3_3reg, neon_load3_3reg_q, neon_load3_all_lanes,\ + neon_load3_all_lanes_q, neon_load3_one_lane, neon_load3_one_lane_q,\ + neon_load4_4reg, neon_load4_4reg_q, neon_load4_all_lanes,\ + neon_load4_all_lanes_q, neon_load4_one_lane, neon_load4_one_lane_q,\ + neon_str, neon_store1_1reg, neon_store1_1reg_q, neon_store1_2reg,\ + neon_store1_2reg_q, neon_store1_3reg, neon_store1_3reg_q,\ + neon_store1_4reg, neon_store1_4reg_q, neon_store1_one_lane,\ + neon_store1_one_lane_q, neon_store2_2reg, neon_store2_2reg_q,\ + neon_store2_4reg, neon_store2_4reg_q, neon_store2_one_lane,\ + neon_store2_one_lane_q, neon_store3_3reg, neon_store3_3reg_q,\ + neon_store3_one_lane, neon_store3_one_lane_q, neon_store4_4reg,\ + neon_store4_4reg_q, neon_store4_one_lane, neon_store4_one_lane_q,\ + neon_fp_abd_s, neon_fp_abd_s_q, neon_fp_abd_d, neon_fp_abd_d_q,\ + neon_fp_addsub_s, neon_fp_addsub_s_q, neon_fp_addsub_d,\ + neon_fp_addsub_d_q, neon_fp_compare_s, neon_fp_compare_s_q,\ + neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\ + neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\ + neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\ + neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, + neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ + neon_fp_reduc_minmax_d_q,\ + neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ + neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ + neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ + neon_fp_round_s, neon_fp_round_s_q, neon_fp_recpe_s,\ + neon_fp_recpe_s_q,\ + neon_fp_recpe_d, neon_fp_recpe_d_q, neon_fp_recps_s,\ + neon_fp_recps_s_q, neon_fp_recps_d, neon_fp_recps_d_q,\ + neon_fp_recpx_s, neon_fp_recpx_s_q, neon_fp_recpx_d,\ + neon_fp_recpx_d_q, neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ + neon_fp_rsqrte_d, neon_fp_rsqrte_d_q, neon_fp_rsqrts_s,\ + neon_fp_rsqrts_s_q, neon_fp_rsqrts_d, neon_fp_rsqrts_d_q,\ + neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar,\ + neon_fp_mul_s_scalar_q, neon_fp_mul_d, neon_fp_mul_d_q,\ + neon_fp_mul_d_scalar_q, neon_fp_mla_s, neon_fp_mla_s_q,\ + neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q, neon_fp_mla_d,\ + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, neon_fp_sqrt_s,\ + neon_fp_sqrt_s_q, neon_fp_sqrt_d, neon_fp_sqrt_d_q,\ + neon_fp_div_s, neon_fp_div_s_q, neon_fp_div_d, neon_fp_div_d_q, crypto_aese,\ + crypto_aesmc, crypto_sha1_xor, crypto_sha1_fast, crypto_sha1_slow,\ + crypto_sha256_fast, crypto_sha256_slow") + (const_string "yes") + (const_string "no"))) -- 2.47.2