From: Bernd Schmidt Date: Mon, 7 Sep 2009 18:06:51 +0000 (+0000) Subject: bfin.md (UNSPEC_VOLATILE_STALL): New constant. X-Git-Tag: releases/gcc-4.5.0~3641 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c7cb1555e56257a2165fe1a15b636c7d0ff1fc0e;p=thirdparty%2Fgcc.git bfin.md (UNSPEC_VOLATILE_STALL): New constant. * config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant. (attr "addrtype"): New member "spreg". Use it if mem_spfp_address_operand is true for the address. (attr "type"): New entry "stall". (cpu_unit "load"): New. (insn_reservations "load32", "loadp", "loadi"): Add reservation of "load". (insn_reservation "loadsp"): New. (insn_reservation "load_stall1"): New. (insn_reservation "load_stall3"): New. (stall): New insn. * config/bfin/predicates.md (const1_operand, const3_operand): New. (mem_p_address_operand): Exclude stack and frame pointer based addresses. (mem_spfp_address_operand): New; match them here. * config/bfin/bfin.c (add_sched_insns_for_speculation): New function. (bfin_reorg): Call it if scheduling insns. (bfin_gen_bundles): Remove dummy insns created by add_sched_insns_for_speculation. From-SVN: r151486 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5329be5c5775..cbbe9622a96d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2009-09-07 Bernd Schmidt + + * config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant. + (attr "addrtype"): New member "spreg". + Use it if mem_spfp_address_operand is true for the address. + (attr "type"): New entry "stall". + (cpu_unit "load"): New. + (insn_reservations "load32", "loadp", "loadi"): Add reservation of + "load". + (insn_reservation "loadsp"): New. + (insn_reservation "load_stall1"): New. + (insn_reservation "load_stall3"): New. + (stall): New insn. + * config/bfin/predicates.md (const1_operand, const3_operand): New. + (mem_p_address_operand): Exclude stack and frame pointer based + addresses. + (mem_spfp_address_operand): New; match them here. + * config/bfin/bfin.c (add_sched_insns_for_speculation): New function. + (bfin_reorg): Call it if scheduling insns. + (bfin_gen_bundles): Remove dummy insns created by + add_sched_insns_for_speculation. + 2009-09-07 Martin Jambor PR middle-end/41282 diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 29c00323f752..0cfb471d2971 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -4784,15 +4784,27 @@ bfin_gen_bundles (void) for (insn = BB_HEAD (bb);; insn = next) { int at_end; + rtx delete_this = NULL_RTX; + if (INSN_P (insn)) { - if (get_attr_type (insn) == TYPE_DSP32) - slot[0] = insn; - else if (slot[1] == NULL_RTX) - slot[1] = insn; + enum attr_type type = get_attr_type (insn); + + if (type == TYPE_STALL) + { + gcc_assert (n_filled == 0); + delete_this = insn; + } else - slot[2] = insn; - n_filled++; + { + if (type == TYPE_DSP32) + slot[0] = insn; + else if (slot[1] == NULL_RTX) + slot[1] = insn; + else + slot[2] = insn; + n_filled++; + } } next = NEXT_INSN (insn); @@ -4807,7 +4819,7 @@ bfin_gen_bundles (void) /* BB_END can change due to emitting extra NOPs, so check here. */ at_end = insn == BB_END (bb); - if (at_end || GET_MODE (next) == TImode) + if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode)) { if ((n_filled < 2 || !gen_one_bundle (slot)) @@ -4826,6 +4838,8 @@ bfin_gen_bundles (void) n_filled = 0; slot[0] = slot[1] = slot[2] = NULL_RTX; } + if (delete_this != NULL_RTX) + delete_insn (delete_this); if (at_end) break; } @@ -5226,6 +5240,65 @@ workaround_speculation (void) } } +/* Called just before the final scheduling pass. If we need to insert NOPs + later on to work around speculative loads, insert special placeholder + insns that cause loads to be delayed for as many cycles as necessary + (and possible). This reduces the number of NOPs we need to add. + The dummy insns we generate are later removed by bfin_gen_bundles. */ +static void +add_sched_insns_for_speculation (void) +{ + rtx insn; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS + && ! ENABLE_WA_INDIRECT_CALLS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (JUMP_P (insn)) + { + if (any_condjump_p (insn) + && !cbranch_predicted_taken_p (insn)) + { + rtx n = next_real_insn (insn); + emit_insn_before (gen_stall (GEN_INT (3)), n); + } + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (JUMP_P (insn) + && any_condjump_p (insn) + && (cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx next = next_real_insn (target); + + if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE + && get_attr_type (next) == TYPE_STALL) + continue; + emit_insn_before (gen_stall (GEN_INT (1)), next); + } + } +} + /* We use the machine specific reorg pass for emitting CSYNC instructions after conditional branches as needed. @@ -5259,6 +5332,8 @@ bfin_reorg (void) split_all_insns (); splitting_for_sched = 0; + add_sched_insns_for_speculation (); + timevar_push (TV_SCHED2); schedule_insns (); timevar_pop (TV_SCHED2); diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md index 01e40125cf6b..ba93214b3d44 100644 --- a/gcc/config/bfin/bfin.md +++ b/gcc/config/bfin/bfin.md @@ -146,7 +146,8 @@ (UNSPEC_VOLATILE_SSYNC 2) (UNSPEC_VOLATILE_LOAD_FUNCDESC 3) (UNSPEC_VOLATILE_STORE_EH_HANDLER 4) - (UNSPEC_VOLATILE_DUMMY 5)]) + (UNSPEC_VOLATILE_DUMMY 5) + (UNSPEC_VOLATILE_STALL 6)]) (define_constants [(MACFLAG_NONE 0) @@ -163,14 +164,18 @@ (MACFLAG_IH 11)]) (define_attr "type" - "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy" + "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall" (const_string "misc")) -(define_attr "addrtype" "32bit,preg,ireg" +(define_attr "addrtype" "32bit,preg,spreg,ireg" (cond [(and (eq_attr "type" "mcld") (and (match_operand 0 "d_register_operand" "") (match_operand 1 "mem_p_address_operand" ""))) (const_string "preg") + (and (eq_attr "type" "mcld") + (and (match_operand 0 "d_register_operand" "") + (match_operand 1 "mem_spfp_address_operand" ""))) + (const_string "spreg") (and (eq_attr "type" "mcld") (and (match_operand 0 "d_register_operand" "") (match_operand 1 "mem_i_address_operand" ""))) @@ -179,6 +184,10 @@ (and (match_operand 1 "d_register_operand" "") (match_operand 0 "mem_p_address_operand" ""))) (const_string "preg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "d_register_operand" "") + (match_operand 0 "mem_spfp_address_operand" ""))) + (const_string "spreg") (and (eq_attr "type" "mcst") (and (match_operand 1 "d_register_operand" "") (match_operand 0 "mem_i_address_operand" ""))) @@ -199,6 +208,10 @@ (define_cpu_unit "store" "bfin") (define_cpu_unit "pregs" "bfin") +;; A dummy unit used to delay scheduling of loads after a conditional +;; branch. +(define_cpu_unit "load" "bfin") + (define_reservation "core" "slot0+slot1+slot2") (define_insn_reservation "alu" 1 @@ -216,17 +229,22 @@ (define_insn_reservation "load32" 1 (and (not (eq_attr "seq_insns" "multi")) (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit"))) - "core") + "core+load") (define_insn_reservation "loadp" 1 (and (not (eq_attr "seq_insns" "multi")) (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg"))) + "(slot1|slot2)+pregs+load") + +(define_insn_reservation "loadsp" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg"))) "(slot1|slot2)+pregs") (define_insn_reservation "loadi" 1 (and (not (eq_attr "seq_insns" "multi")) (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg"))) - "(slot1|slot2)") + "(slot1|slot2)+load") (define_insn_reservation "store32" 1 (and (not (eq_attr "seq_insns" "multi")) @@ -235,7 +253,8 @@ (define_insn_reservation "storep" 1 (and (not (eq_attr "seq_insns" "multi")) - (and (eq_attr "type" "mcst") (eq_attr "addrtype" "preg"))) + (and (eq_attr "type" "mcst") + (ior (eq_attr "addrtype" "preg") (eq_attr "addrtype" "spreg")))) "(slot1|slot2)+pregs+store") (define_insn_reservation "storei" 1 @@ -247,6 +266,16 @@ (eq_attr "seq_insns" "multi") "core") +(define_insn_reservation "load_stall1" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const1_operand" "")) + "core+load*2") + +(define_insn_reservation "load_stall3" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const3_operand" "")) + "core+load*4") + (absence_set "slot0" "slot1,slot2") (absence_set "slot1" "slot2") @@ -2667,6 +2696,9 @@ gcc_unreachable (); }) +;; When used at a location where CC contains 1, causes a speculative load +;; that is later cancelled. This is used for certain workarounds in +;; interrupt handler prologues. (define_insn "dummy_load" [(unspec_volatile [(match_operand 0 "register_operand" "a") (match_operand 1 "register_operand" "C")] @@ -2677,6 +2709,17 @@ (set_attr "length" "4") (set_attr "seq_insns" "multi")]) +;; A placeholder insn inserted before the final scheduling pass. It is used +;; to improve scheduling of loads when workarounds for speculative loads are +;; needed, by not placing them in the first few cycles after a conditional +;; branch. +(define_insn "stall" + [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")] + UNSPEC_VOLATILE_STALL)] + "" + "" + [(set_attr "type" "stall")]) + (define_insn "csync" [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)] "" diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md index bce725a70091..37d1372216d6 100644 --- a/gcc/config/bfin/predicates.md +++ b/gcc/config/bfin/predicates.md @@ -59,6 +59,14 @@ (and (match_code "const_int") (match_test "op == const0_rtx || op == const1_rtx"))) +(define_predicate "const1_operand" + (and (match_code "const_int") + (match_test "op == const1_rtx"))) + +(define_predicate "const3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 3"))) + (define_predicate "vec_shift_operand" (ior (and (match_code "const_int") (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15")) @@ -180,10 +188,14 @@ (define_predicate "bfin_direct_comparison_operator" (match_code "eq,lt,le,leu,ltu")) -;; The following two are used to compute the addrtype attribute. They return +;; The following three are used to compute the addrtype attribute. They return ;; true if passed a memory address usable for a 16-bit load or store using a ;; P or I register, respectively. If neither matches, we know we have a ;; 32-bit instruction. +;; We subdivide the P case into normal P registers, and SP/FP. We can assume +;; that speculative loads through SP and FP are no problem, so this has +;; an effect on the anomaly workaround code. + (define_predicate "mem_p_address_operand" (match_code "mem") { @@ -193,7 +205,19 @@ if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) op = XEXP (op, 0); gcc_assert (REG_P (op)); - return PREG_P (op); + return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx; +}) + +(define_predicate "mem_spfp_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return op == stack_pointer_rtx || op == frame_pointer_rtx; }) (define_predicate "mem_i_address_operand"