* fwprop.c (should_replace_address): Add speed attribute.
(PR_OPTIMIZE_FOR_SPEED): New flag.
(propagate_rtx_1): Use it.
(propagate_rtx): Set it.
(try_fwprop_subst): Update call of rtx_costs.
(forward_propagate_and_simplify): LIkewise.
* hooks.c (hook_int_rtx_bool_0): New
(hook_bool_rtx_int_int_intp_false): Replace by ...
(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
* hooks.h (hook_int_rtx_bool_0): New
(hook_bool_rtx_int_int_intp_false): Replace by ...
(hook_bool_rtx_int_int_intp_bool_false): .. thisone.
* optabs.c (avoid_expensive_constant): UPdate call of rtx_cost.
(prepare_cmp_insn): UPdate call of rtx_cost.
* postreload.c (reload_cse_simplify_set): Update call of rtx_cost.
(reload_cse_simplify_operands): Update call of rtx_cost.
(reload_cse_move2add): call of rtx_cost.
* target.h (struct gcc_target): Update rtx_costs and address_costs.
* rtlanal.c (rtx_cost): Add speed argument.
(address_cost): Add speed argument
(default_address_cost): Likewise.
(insn_rtx_cost): Likewise.
* cfgloopanal.c (seq_cost): Add speed argument.
(target_reg_cost, target_spill_cost): Turn to array.
(init_set_costs): Update for speed.
(estimate_reg_pressure_cost): Add speed argument.
* auto-inc-dec.c (attempt_change): Update call of rtx_cost.
* dojump.c (prefer_and_bit_test): UPdate call of rtx_cost.
* tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed.
(seq_cost): Add speed argument.
(computation_cost): Add speed arugment.
(add_cost, multiply_by_const, get_address_cost): add speed argument.
(force_expr_to_var_cost): Update for profile info.
(force_var_cost): Likewise.
(split_address_cost): Likewise.
(ptr_difference_cost): Likewise.
(difference_cost): Likewise.
(get_computation_cost_at): Likewise.
(determine_iv_cost): Likewise.
(ivopts_global_cost_for_size): Likewise.
(rewrite_use_address): Likewise.
(tree_ssa_iv_optimize_loop): Initialize speed field.
* cse.c (optimize_this_for_speed_p): New static var.
(notreg_cost): Update call of rtx_cost.
(cse_extended_basic_block): set optimize_this_for_speed_p.
* ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost.
(noce_try_cmove_arith): Likewise.
(noce_try_sign_mask): LIkewise.
* expr.c (compress_float_constant): Update rtx_cost calls.
* tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument.
(addr_to_parts): Likewise.
(create_mem_ref): Likewise.
* dse.c (find_shift_sequence): Add speed argument.
(replace_read): Update call.
* calls.c (precompute_register_parameters): Update call of rtx_cost.
* expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost,
* neg_cost, shift_cost, shiftadd_cost,
shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost,
mul_highpart_cost): Increase dimension.
(init_expmed): Initialize for both size and speed.
(expand_shift): Use profile.
(synth_mult): Use profile.
(choose_mult_variant): Use profile.
(expand_mult): Use profile.
(expand_mult_highpart_optab): Use profile.
(expand_mult_highpart): Use profile.
(expand_smod_pow2): Use profile.
(expand_divmod): Use profile.
* simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost.
* loop-invariant.c (create_new_invariant): Use profile.
(gain_for_invariant): Add speed parameter.
(best_gain_for_invariant): Likewise.
(find_invariants_to_move): Likewise.
(move_single_loop_invariants): Set it.
* target-def.h (TARGET_RTX_COSTS): Use hook.
* rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe.
(optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare.
* output.h (default_address_cost): Update prototype.
* combine.c (optimize_this_for_speed_p): New static var.
(combine_validate_cost): Update call of rtx_cost.
(combine_instructions): Set optimize_this_for_speed_p.
(expand_compound_operation): Update call of rtx_cost.
(make_extraction):Update call of rtx_cost.
(force_to_mode):Update call of rtx_cost.
(distribute_and_simplify_rtx):Update call of rtx_cost.
* cfgloop.h (target_reg_cost, target_spill_cost): Turn to array.
(estimate_reg_pressure_cost): Update prototype.
* tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype.
* basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p):
Remove.
* config/alpha/alpha.c (alpha_rtx_costs): Update.
(alpha_rtx_costs): Update.
* config/frv/frv.c (frv_rtx_costs): Update.
* config/s390/s390.c (s390_rtx_costs): Update.
* config/m32c/m32c.c (m32c_memory_move_cost): Update.
(m32c_rtx_costs): Update.
* config/spu/spu.c (TARGET_ADDRESS_COST): Upate.
(spu_rtx_costs): Update.
* config/sparc/sparc.c (sparc_rtx_costs): Update.
* config/m32r/m32r.c (m32r_rtx_costs): Update.
* config/i386/i386.c (:ix86_address_cost): Update.
(ix86_rtx_costs): Update.
* config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update.
* config/pdp11/pdp11.c (pdp11_rtx_costs): Update.
* config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update.
* config/crx/crx.c (crx_address_cost): Update.
* config/xtensa/xtensa.c (xtensa_rtx_costs): Update.
* config/stormy16/stormy16.c
(xstormy16_address_cost, xstormy16_rtx_costs): Update.
* config/m68hc11/m68hc11.c
(m68hc11_address_cost, m68hc11_rtx_costs): Update.
* config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update.
* config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update.
* config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update
* config/ia64/ia64.c (ia64_rtx_costs): Update.
* config/m68k/m68k.c (m68k_rtx_costs): Update.
* config/rs6000/rs6000.c (rs6000_rtx_costs): Update.
* config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update.
* config/mcore/mcore.c (TARGET_ADDRESS_COST): Update.
(mcore_rtx_costs): update.
* config/score/score3.c (score3_rtx_costs): Update.
* config/score/score7.c (score7_rtx_costs): Update.
* config/score/score3.h (score3_rtx_costs):Update.
* config/score/score7.h (score7_rtx_costs): Update.
* config/score/score.c (score_rtx_costs): Update.
* config/arm/arm.c (arm_address_cost): Update.
(arm_rtx_costs_1): Update.
(arm_rtx_costs_1): Update.
(arm_size_rtx_costs): Update.
(arm_size_rtx_costs): Update.
(arm_size_rtx_costs): Update.
(arm_xscale_rtx_costs): Update.
(arm_thumb_address_cost): Update.
* config/pa/pa.c (hppa_address_cost): Update.
* config/mips/mips.c (mips_rtx_costs): Update.
* config/vax/vax.c (vax_address_cost): Update.
* config/h8300/h8300.c (h8300_shift_costs): Update.
(h8300_rtx_costs): Update.
* config/v850/v850.c (TARGET_ADDRESS_COST): Update.
(v850_rtx_costs): Update.
* config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update.
* config/bfin/bfin.c
(bfin_address_cost): Update.
(bfin_rtx_costs): Update.
* stmt.c (lshift_cheap_p): Update.
From-SVN: r139821
+2008-08-30 Jan Hubicka <jh@suse.cz>
+
+ * fwprop.c (should_replace_address): Add speed attribute.
+ (PR_OPTIMIZE_FOR_SPEED): New flag.
+ (propagate_rtx_1): Use it.
+ (propagate_rtx): Set it.
+ (try_fwprop_subst): Update call of rtx_costs.
+ (forward_propagate_and_simplify): LIkewise.
+ * hooks.c (hook_int_rtx_bool_0): New
+ (hook_bool_rtx_int_int_intp_false): Replace by ...
+ (hook_bool_rtx_int_int_intp_bool_false): .. thisone.
+ * hooks.h (hook_int_rtx_bool_0): New
+ (hook_bool_rtx_int_int_intp_false): Replace by ...
+ (hook_bool_rtx_int_int_intp_bool_false): .. thisone.
+ * optabs.c (avoid_expensive_constant): UPdate call of rtx_cost.
+ (prepare_cmp_insn): UPdate call of rtx_cost.
+ * postreload.c (reload_cse_simplify_set): Update call of rtx_cost.
+ (reload_cse_simplify_operands): Update call of rtx_cost.
+ (reload_cse_move2add): call of rtx_cost.
+ * target.h (struct gcc_target): Update rtx_costs and address_costs.
+ * rtlanal.c (rtx_cost): Add speed argument.
+ (address_cost): Add speed argument
+ (default_address_cost): Likewise.
+ (insn_rtx_cost): Likewise.
+ * cfgloopanal.c (seq_cost): Add speed argument.
+ (target_reg_cost, target_spill_cost): Turn to array.
+ (init_set_costs): Update for speed.
+ (estimate_reg_pressure_cost): Add speed argument.
+ * auto-inc-dec.c (attempt_change): Update call of rtx_cost.
+ * dojump.c (prefer_and_bit_test): UPdate call of rtx_cost.
+ * tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed.
+ (seq_cost): Add speed argument.
+ (computation_cost): Add speed arugment.
+ (add_cost, multiply_by_const, get_address_cost): add speed argument.
+ (force_expr_to_var_cost): Update for profile info.
+ (force_var_cost): Likewise.
+ (split_address_cost): Likewise.
+ (ptr_difference_cost): Likewise.
+ (difference_cost): Likewise.
+ (get_computation_cost_at): Likewise.
+ (determine_iv_cost): Likewise.
+ (ivopts_global_cost_for_size): Likewise.
+ (rewrite_use_address): Likewise.
+ (tree_ssa_iv_optimize_loop): Initialize speed field.
+ * cse.c (optimize_this_for_speed_p): New static var.
+ (notreg_cost): Update call of rtx_cost.
+ (cse_extended_basic_block): set optimize_this_for_speed_p.
+ * ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost.
+ (noce_try_cmove_arith): Likewise.
+ (noce_try_sign_mask): LIkewise.
+ * expr.c (compress_float_constant): Update rtx_cost calls.
+ * tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument.
+ (addr_to_parts): Likewise.
+ (create_mem_ref): Likewise.
+ * dse.c (find_shift_sequence): Add speed argument.
+ (replace_read): Update call.
+ * calls.c (precompute_register_parameters): Update call of rtx_cost.
+ * expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost,
+ * neg_cost, shift_cost, shiftadd_cost,
+ shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost,
+ mul_highpart_cost): Increase dimension.
+ (init_expmed): Initialize for both size and speed.
+ (expand_shift): Use profile.
+ (synth_mult): Use profile.
+ (choose_mult_variant): Use profile.
+ (expand_mult): Use profile.
+ (expand_mult_highpart_optab): Use profile.
+ (expand_mult_highpart): Use profile.
+ (expand_smod_pow2): Use profile.
+ (expand_divmod): Use profile.
+ * simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost.
+ * loop-invariant.c (create_new_invariant): Use profile.
+ (gain_for_invariant): Add speed parameter.
+ (best_gain_for_invariant): Likewise.
+ (find_invariants_to_move): Likewise.
+ (move_single_loop_invariants): Set it.
+ * target-def.h (TARGET_RTX_COSTS): Use hook.
+ * rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe.
+ (optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare.
+ * output.h (default_address_cost): Update prototype.
+ * combine.c (optimize_this_for_speed_p): New static var.
+ (combine_validate_cost): Update call of rtx_cost.
+ (combine_instructions): Set optimize_this_for_speed_p.
+ (expand_compound_operation): Update call of rtx_cost.
+ (make_extraction):Update call of rtx_cost.
+ (force_to_mode):Update call of rtx_cost.
+ (distribute_and_simplify_rtx):Update call of rtx_cost.
+ * cfgloop.h (target_reg_cost, target_spill_cost): Turn to array.
+ (estimate_reg_pressure_cost): Update prototype.
+ * tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype.
+ * basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p):
+ Remove.
+ * config/alpha/alpha.c (alpha_rtx_costs): Update.
+ (alpha_rtx_costs): Update.
+ * config/frv/frv.c (frv_rtx_costs): Update.
+ * config/s390/s390.c (s390_rtx_costs): Update.
+ * config/m32c/m32c.c (m32c_memory_move_cost): Update.
+ (m32c_rtx_costs): Update.
+ * config/spu/spu.c (TARGET_ADDRESS_COST): Upate.
+ (spu_rtx_costs): Update.
+ * config/sparc/sparc.c (sparc_rtx_costs): Update.
+ * config/m32r/m32r.c (m32r_rtx_costs): Update.
+ * config/i386/i386.c (:ix86_address_cost): Update.
+ (ix86_rtx_costs): Update.
+ * config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update.
+ * config/pdp11/pdp11.c (pdp11_rtx_costs): Update.
+ * config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update.
+ * config/crx/crx.c (crx_address_cost): Update.
+ * config/xtensa/xtensa.c (xtensa_rtx_costs): Update.
+ * config/stormy16/stormy16.c
+ (xstormy16_address_cost, xstormy16_rtx_costs): Update.
+ * config/m68hc11/m68hc11.c
+ (m68hc11_address_cost, m68hc11_rtx_costs): Update.
+ * config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update.
+ * config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update.
+ * config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update
+ * config/ia64/ia64.c (ia64_rtx_costs): Update.
+ * config/m68k/m68k.c (m68k_rtx_costs): Update.
+ * config/rs6000/rs6000.c (rs6000_rtx_costs): Update.
+ * config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update.
+ * config/mcore/mcore.c (TARGET_ADDRESS_COST): Update.
+ (mcore_rtx_costs): update.
+ * config/score/score3.c (score3_rtx_costs): Update.
+ * config/score/score7.c (score7_rtx_costs): Update.
+ * config/score/score3.h (score3_rtx_costs):Update.
+ * config/score/score7.h (score7_rtx_costs): Update.
+ * config/score/score.c (score_rtx_costs): Update.
+ * config/arm/arm.c (arm_address_cost): Update.
+ (arm_rtx_costs_1): Update.
+ (arm_rtx_costs_1): Update.
+ (arm_size_rtx_costs): Update.
+ (arm_size_rtx_costs): Update.
+ (arm_size_rtx_costs): Update.
+ (arm_xscale_rtx_costs): Update.
+ (arm_thumb_address_cost): Update.
+ * config/pa/pa.c (hppa_address_cost): Update.
+ * config/mips/mips.c (mips_rtx_costs): Update.
+ * config/vax/vax.c (vax_address_cost): Update.
+ * config/h8300/h8300.c (h8300_shift_costs): Update.
+ (h8300_rtx_costs): Update.
+ * config/v850/v850.c (TARGET_ADDRESS_COST): Update.
+ (v850_rtx_costs): Update.
+ * config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update.
+ * config/bfin/bfin.c
+ (bfin_address_cost): Update.
+ (bfin_rtx_costs): Update.
+ * stmt.c (lshift_cheap_p): Update.
+
2008-08-30 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR middle-end/36444
rtx new_mem;
int old_cost = 0;
int new_cost = 0;
+ bool speed = optimize_bb_for_speed_p (bb);
PUT_MODE (mem_tmp, mode);
XEXP (mem_tmp, 0) = new_addr;
- old_cost = rtx_cost (mem, 0)
- + rtx_cost (PATTERN (inc_insn.insn), 0);
- new_cost = rtx_cost (mem_tmp, 0);
+ old_cost = rtx_cost (mem, 0, speed)
+ + rtx_cost (PATTERN (inc_insn.insn), 0, speed);
+ new_cost = rtx_cost (mem_tmp, 0, speed);
/* The first item of business is to see if this is profitable. */
if (old_cost < new_cost)
extern bool optimize_bb_for_speed_p (const_basic_block);
extern bool optimize_edge_for_size_p (edge);
extern bool optimize_edge_for_speed_p (edge);
-extern bool optimize_insn_for_size_p (void);
-extern bool optimize_insn_for_speed_p (void);
extern bool optimize_function_for_size_p (struct function *);
extern bool optimize_function_for_speed_p (struct function *);
extern bool optimize_loop_for_size_p (struct loop *);
|| (GET_CODE (args[i].value) == SUBREG
&& REG_P (SUBREG_REG (args[i].value)))))
&& args[i].mode != BLKmode
- && rtx_cost (args[i].value, SET) > COSTS_N_INSNS (1)
+ && rtx_cost (args[i].value, SET, optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (1)
&& ((SMALL_REGISTER_CLASSES && *reg_parm_seen)
|| optimize))
args[i].value = copy_to_mode_reg (args[i].mode, args[i].value);
extern unsigned target_avail_regs;
extern unsigned target_res_regs;
-extern unsigned target_reg_cost;
-extern unsigned target_spill_cost;
+extern unsigned target_reg_cost [2];
+extern unsigned target_spill_cost [2];
/* Register pressure estimation for induction variable optimizations & loop
invariant motion. */
-extern unsigned estimate_reg_pressure_cost (unsigned, unsigned);
+extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
extern void init_set_costs (void);
/* Loop optimizer initialization. */
/* Returns estimate on cost of computing SEQ. */
static unsigned
-seq_cost (const_rtx seq)
+seq_cost (const_rtx seq, bool speed)
{
unsigned cost = 0;
rtx set;
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET);
+ cost += rtx_cost (set, SET, speed);
else
cost++;
}
unsigned target_avail_regs; /* Number of available registers. */
unsigned target_res_regs; /* Number of registers reserved for temporary
expressions. */
-unsigned target_reg_cost; /* The cost for register when there still
+unsigned target_reg_cost[2]; /* The cost for register when there still
is some reserve, but we are approaching
the number of available registers. */
-unsigned target_spill_cost; /* The cost for register when we need
+unsigned target_spill_cost[2]; /* The cost for register when we need
to spill. */
/* Initialize the constants for computing set costs. */
void
init_set_costs (void)
{
+ int speed;
rtx seq;
rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
target_res_regs = 3;
- /* Set up the costs for using extra registers:
-
- 1) If not many free registers remain, we should prefer having an
- additional move to decreasing the number of available registers.
- (TARGET_REG_COST).
- 2) If no registers are available, we need to spill, which may require
- storing the old value to memory and loading it back
- (TARGET_SPILL_COST). */
-
- start_sequence ();
- emit_move_insn (reg1, reg2);
- seq = get_insns ();
- end_sequence ();
- target_reg_cost = seq_cost (seq);
-
- start_sequence ();
- emit_move_insn (mem, reg1);
- emit_move_insn (reg2, mem);
- seq = get_insns ();
- end_sequence ();
- target_spill_cost = seq_cost (seq);
+ for (speed = 0; speed < 2; speed++)
+ {
+ crtl->maybe_hot_insn_p = speed;
+ /* Set up the costs for using extra registers:
+
+ 1) If not many free registers remain, we should prefer having an
+ additional move to decreasing the number of available registers.
+ (TARGET_REG_COST).
+ 2) If no registers are available, we need to spill, which may require
+ storing the old value to memory and loading it back
+ (TARGET_SPILL_COST). */
+
+ start_sequence ();
+ emit_move_insn (reg1, reg2);
+ seq = get_insns ();
+ end_sequence ();
+ target_reg_cost [speed] = seq_cost (seq, speed);
+
+ start_sequence ();
+ emit_move_insn (mem, reg1);
+ emit_move_insn (reg2, mem);
+ seq = get_insns ();
+ end_sequence ();
+ target_spill_cost [speed] = seq_cost (seq, speed);
+ }
+ default_rtl_profile ();
}
/* Estimates cost of increased register pressure caused by making N_NEW new
around the loop. */
unsigned
-estimate_reg_pressure_cost (unsigned n_new, unsigned n_old)
+estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
{
unsigned cost;
unsigned regs_needed = n_new + n_old;
if (regs_needed <= target_avail_regs)
/* If we are close to running out of registers, try to preserve
them. */
- cost = target_reg_cost * n_new;
+ cost = target_reg_cost [speed] * n_new;
else
/* If we run out of registers, it is very expensive to add another
one. */
- cost = target_spill_cost * n_new;
+ cost = target_spill_cost [speed] * n_new;
if (optimize && flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL
|| flag_ira_algorithm == IRA_ALGORITHM_MIXED)
/* Basic block in which we are performing combines. */
static basic_block this_basic_block;
+static bool optimize_this_for_speed_p;
\f
/* Length of the currently allocated uid_insn_cost array. */
}
/* Calculate the replacement insn_rtx_costs. */
- new_i3_cost = insn_rtx_cost (newpat);
+ new_i3_cost = insn_rtx_cost (newpat, optimize_this_for_speed_p);
if (newi2pat)
{
- new_i2_cost = insn_rtx_cost (newi2pat);
+ new_i2_cost = insn_rtx_cost (newi2pat, optimize_this_for_speed_p);
new_cost = (new_i2_cost > 0 && new_i3_cost > 0)
? new_i2_cost + new_i3_cost : 0;
}
int old_other_cost, new_other_cost;
old_other_cost = INSN_COST (undobuf.other_insn);
- new_other_cost = insn_rtx_cost (newotherpat);
+ new_other_cost = insn_rtx_cost (newotherpat, optimize_this_for_speed_p);
if (old_other_cost > 0 && new_other_cost > 0)
{
old_cost += old_other_cost;
create_log_links ();
FOR_EACH_BB (this_basic_block)
{
+ optimize_this_for_speed_p = optimize_bb_for_speed_p (this_basic_block);
last_call_luid = 0;
mem_last_set = -1;
label_tick++;
/* Record the current insn_rtx_cost of this instruction. */
if (NONJUMP_INSN_P (insn))
- INSN_COST (insn) = insn_rtx_cost (PATTERN (insn));
+ INSN_COST (insn) = insn_rtx_cost (PATTERN (insn),
+ optimize_this_for_speed_p);
if (dump_file)
fprintf(dump_file, "insn_cost %d: %d\n",
INSN_UID (insn), INSN_COST (insn));
rtx temp2 = expand_compound_operation (temp);
/* Make sure this is a profitable operation. */
- if (rtx_cost (x, SET) > rtx_cost (temp2, SET))
+ if (rtx_cost (x, SET, optimize_this_for_speed_p)
+ > rtx_cost (temp2, SET, optimize_this_for_speed_p))
return temp2;
- else if (rtx_cost (x, SET) > rtx_cost (temp, SET))
+ else if (rtx_cost (x, SET, optimize_this_for_speed_p)
+ > rtx_cost (temp, SET, optimize_this_for_speed_p))
return temp;
else
return x;
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
- if (rtx_cost (temp, SET) <= rtx_cost (temp1, SET))
+ if (rtx_cost (temp, SET, optimize_this_for_speed_p)
+ <= rtx_cost (temp1, SET, optimize_this_for_speed_p))
return temp;
return temp1;
}
/* Prefer ZERO_EXTENSION, since it gives more information to
backends. */
- if (rtx_cost (temp1, SET) < rtx_cost (temp, SET))
+ if (rtx_cost (temp1, SET, optimize_this_for_speed_p)
+ < rtx_cost (temp, SET, optimize_this_for_speed_p))
temp = temp1;
}
pos_rtx = temp;
y = simplify_gen_binary (AND, GET_MODE (x),
XEXP (x, 0), GEN_INT (cval));
- if (rtx_cost (y, SET) < rtx_cost (x, SET))
+ if (rtx_cost (y, SET, optimize_this_for_speed_p)
+ < rtx_cost (x, SET, optimize_this_for_speed_p))
x = y;
}
tmp = apply_distributive_law (simplify_gen_binary (inner_code, mode,
new_op0, new_op1));
if (GET_CODE (tmp) != outer_code
- && rtx_cost (tmp, SET) < rtx_cost (x, SET))
+ && rtx_cost (tmp, SET, optimize_this_for_speed_p)
+ < rtx_cost (x, SET, optimize_this_for_speed_p))
return tmp;
return NULL_RTX;
scanned. In either case, *TOTAL contains the cost result. */
static bool
-alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
+alpha_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
const struct alpha_rtx_cost_data *cost_data;
- if (optimize_size)
+ if (!speed)
cost_data = &alpha_rtx_cost_size;
else
cost_data = &alpha_rtx_cost_data[alpha_tune];
*total = COSTS_N_INSNS (15);
else
/* Otherwise we do a load from the GOT. */
- *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
+ *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case HIGH:
else if (GET_CODE (XEXP (x, 0)) == MULT
&& const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
{
- *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
- + rtx_cost (XEXP (x, 1), outer_code) + COSTS_N_INSNS (1));
+ *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ + rtx_cost (XEXP (x, 1), outer_code, speed) + COSTS_N_INSNS (1));
return true;
}
return false;
return false;
case MEM:
- *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
+ *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case NEG:
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS alpha_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
static void arc_va_start (tree, rtx);
static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
tree, int *, int);
-static bool arc_rtx_costs (rtx, int, int, int *);
-static int arc_address_cost (rtx);
+static bool arc_rtx_costs (rtx, int, int, int *, bool);
+static int arc_address_cost (rtx, bool);
static void arc_external_libcall (rtx);
static bool arc_return_in_memory (const_tree, const_tree);
static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
scanned. In either case, *TOTAL contains the cost result. */
static bool
-arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
+arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
If ADDR is not a valid address, its cost is irrelevant. */
static int
-arc_address_cost (rtx addr)
+arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (addr))
{
static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
static bool arm_xscale_rtx_costs (rtx, int, int, int *);
static bool arm_9e_rtx_costs (rtx, int, int, int *);
-static int arm_address_cost (rtx);
+static bool arm_rtx_costs (rtx, int, int, int *, bool);
+static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
static bool arm_cirrus_insn_p (rtx);
static void cirrus_reorg (rtx);
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
-/* This will be overridden in arm_override_options. */
#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS arm_slowmul_rtx_costs
+#define TARGET_RTX_COSTS arm_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST arm_address_cost
gcc_assert (arm_tune != arm_none);
tune_flags = all_cores[(int)arm_tune].flags;
- if (optimize_size)
- targetm.rtx_costs = arm_size_rtx_costs;
- else
- targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
case MINUS:
if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
{
- extra_cost = rtx_cost (XEXP (x, 1), code);
+ extra_cost = rtx_cost (XEXP (x, 1), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
if (GET_CODE (XEXP (x, 0)) == MULT)
{
- extra_cost = rtx_cost (XEXP (x, 0), code);
+ extra_cost = rtx_cost (XEXP (x, 0), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
case ROTATE:
if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
/* Fall through */
case ASHIFTRT:
if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
else if (mode == SImode)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
/* Slightly disparage register shifts, but not by much. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- *total += 1 + rtx_cost (XEXP (x, 1), code);
+ *total += 1 + rtx_cost (XEXP (x, 1), code, false);
return true;
}
}
}
+/* RTX costs when optimizing for size. */
+static bool
+arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+{
+ if (!speed)
+ return arm_size_rtx_costs (x, code, outer_code, total);
+ else
+ return all_cores[(int)arm_tune].rtx_costs;
+}
+
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
/* A COMPARE of a MULT is slow on XScale; the muls instruction
will stall until the multiplication is complete. */
if (GET_CODE (XEXP (x, 0)) == MULT)
- *total = 4 + rtx_cost (XEXP (x, 0), code);
+ *total = 4 + rtx_cost (XEXP (x, 0), code, true);
else
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
}
static int
-arm_address_cost (rtx x)
+arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
}
static void avr_asm_out_ctor (rtx, int);
static void avr_asm_out_dtor (rtx, int);
static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code);
-static bool avr_rtx_costs (rtx, int, int, int *);
-static int avr_address_cost (rtx);
+static bool avr_rtx_costs (rtx, int, int, int *, bool);
+static int avr_address_cost (rtx, bool);
static bool avr_return_in_memory (const_tree, const_tree);
static struct machine_function * avr_init_machine_status (void);
static rtx avr_builtin_setjmp_frame_value (void);
fprintf (asm_out_file, "/*DEBUG: 0x%x\t\t%d\t%d */\n",
INSN_ADDRESSES (uid),
INSN_ADDRESSES (uid) - last_insn_address,
- rtx_cost (PATTERN (insn), INSN));
+ rtx_cost (PATTERN (insn), INSN, !optimize_size));
}
last_insn_address = INSN_ADDRESSES (uid);
}
operand's parent operator. */
static int
-avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer)
+avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer,
+ bool speed ATTRIBUTE_UNUSED)
{
enum rtx_code code = GET_CODE (x);
int total;
case, *TOTAL contains the cost result. */
static bool
-avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
+avr_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+ bool speed)
{
enum machine_mode mode = GET_MODE (x);
HOST_WIDE_INT val;
{
case QImode:
if (AVR_HAVE_MUL)
- *total = COSTS_N_INSNS (optimize_size ? 3 : 4);
- else if (optimize_size)
+ *total = COSTS_N_INSNS (!speed ? 3 : 4);
+ else if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
case HImode:
if (AVR_HAVE_MUL)
- *total = COSTS_N_INSNS (optimize_size ? 7 : 10);
- else if (optimize_size)
+ *total = COSTS_N_INSNS (!speed ? 7 : 10);
+ else if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
case MOD:
case UDIV:
case UMOD:
- if (optimize_size)
+ if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
return false;
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 4 : 17);
+ *total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
*total = COSTS_N_INSNS (5);
break;
case 4:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 8);
+ *total = COSTS_N_INSNS (!speed ? 5 : 8);
break;
case 6:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 9);
+ *total = COSTS_N_INSNS (ptimize_size ? 5 : 9);
break;
case 5:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 10);
+ *total = COSTS_N_INSNS (!speed ? 5 : 10);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
*total = COSTS_N_INSNS (6);
break;
case 2:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 8);
+ *total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 4 : 17);
+ *total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
*total = COSTS_N_INSNS (5);
break;
case 11:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 6);
+ *total = COSTS_N_INSNS (!speed ? 5 : 6);
break;
case 12:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 7);
+ *total = COSTS_N_INSNS (!speed ? 5 : 7);
break;
case 6:
case 13:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 8);
+ *total = COSTS_N_INSNS (!speed ? 5 : 8);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
*total = COSTS_N_INSNS (6);
break;
case 2:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 8);
+ *total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
case 31:
*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
case QImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 4 : 17);
+ *total = COSTS_N_INSNS (!speed ? 4 : 17);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
case HImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
case 12:
case 13:
case 14:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 6);
+ *total = COSTS_N_INSNS (!speed ? 5 : 6);
break;
case 4:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 7);
+ *total = COSTS_N_INSNS (!speed ? 5 : 7);
break;
case 5:
case 6:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 9);
+ *total = COSTS_N_INSNS (!speed ? 5 : 9);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 5 : 41);
+ *total = COSTS_N_INSNS (!speed ? 5 : 41);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
case SImode:
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
else
*total = COSTS_N_INSNS (4);
break;
case 2:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 8);
+ *total = COSTS_N_INSNS (!speed ? 7 : 8);
break;
case 8:
case 16:
*total = COSTS_N_INSNS (6);
break;
default:
- *total = COSTS_N_INSNS (optimize_size ? 7 : 113);
+ *total = COSTS_N_INSNS (!speed ? 7 : 113);
*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code);
}
break;
/* Calculate the cost of a memory address. */
static int
-avr_address_cost (rtx x)
+avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x,1)) == CONST_INT
All addressing modes are equally cheap on the Blackfin. */
static int
-bfin_address_cost (rtx addr ATTRIBUTE_UNUSED)
+bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
{
return 1;
}
}
static bool
-bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
+bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
int cost2 = COSTS_N_INSNS (1);
rtx op0, op1;
if (val == 2 || val == 4)
{
*total = cost2;
- *total += rtx_cost (XEXP (op0, 0), outer_code);
- *total += rtx_cost (op1, outer_code);
+ *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
+ *total += rtx_cost (op1, outer_code, speed);
return true;
}
}
*total = cost2;
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, SET);
+ *total += rtx_cost (op0, SET, speed);
#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
towards creating too many induction variables. */
if (!reg_or_7bit_operand (op1, SImode))
- *total += rtx_cost (op1, SET);
+ *total += rtx_cost (op1, SET, speed);
#endif
}
else if (GET_MODE (x) == DImode)
*total = 6 * cost2;
if (GET_CODE (op1) != CONST_INT
|| !satisfies_constraint_Ks7 (op1))
- *total += rtx_cost (op1, PLUS);
+ *total += rtx_cost (op1, PLUS, speed);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, PLUS);
+ *total += rtx_cost (op0, PLUS, speed);
}
return true;
op1 = XEXP (x, 1);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, code);
+ *total += rtx_cost (op0, code, speed);
return true;
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, code);
+ *total += rtx_cost (op0, code, speed);
if (GET_MODE (x) == DImode)
{
if (code == AND)
{
if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
- *total += rtx_cost (XEXP (x, 1), code);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
}
else
{
if (! regorlog2_operand (XEXP (x, 1), SImode))
- *total += rtx_cost (XEXP (x, 1), code);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
}
return true;
op0 = XEXP (op0, 0);
op1 = XEXP (op1, 0);
}
- else if (optimize_size)
+ else if (!speed)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, MULT);
+ *total += rtx_cost (op0, MULT, speed);
if (GET_CODE (op1) != REG
&& (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
- *total += rtx_cost (op1, MULT);
+ *total += rtx_cost (op1, MULT, speed);
}
return true;
static void cris_file_start (void);
static void cris_init_libfuncs (void);
-static bool cris_rtx_costs (rtx, int, int, int *);
-static int cris_address_cost (rtx);
+static bool cris_rtx_costs (rtx, int, int, int *, bool);
+static int cris_address_cost (rtx, bool);
static bool cris_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
static int cris_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
scanned. In either case, *TOTAL contains the cost result. */
static bool
-cris_rtx_costs (rtx x, int code, int outer_code, int *total)
+cris_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
switch (code)
{
return false;
case ZERO_EXTEND: case SIGN_EXTEND:
- *total = rtx_cost (XEXP (x, 0), outer_code);
+ *total = rtx_cost (XEXP (x, 0), outer_code, speed);
return true;
default:
/* The ADDRESS_COST worker. */
static int
-cris_address_cost (rtx x)
+cris_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
/* The metric to use for the cost-macros is unclear.
The metric used here is (the number of cycles needed) / 2,
static rtx crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
int incoming ATTRIBUTE_UNUSED);
static bool crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED);
-static int crx_address_cost (rtx);
+static int crx_address_cost (rtx, bool);
/*****************************************************************************/
/* STACK LAYOUT AND CALLING CONVENTIONS */
/* Return cost of the memory address x. */
static int
-crx_address_cost (rtx addr)
+crx_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
enum crx_addrtype addrtype;
struct crx_address address;
frv_rtx_costs (rtx x,
int code ATTRIBUTE_UNUSED,
int outer_code ATTRIBUTE_UNUSED,
- int *total)
+ int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
if (outer_code == MEM)
{
/* Worker function for TARGET_RTX_COSTS. */
static bool
-h8300_rtx_costs (rtx x, int code, int outer_code, int *total)
+h8300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
if (TARGET_H8300SX && outer_code == MEM)
{
{
/* Constant operands need the same number of processor
states as register operands. Although we could try to
- use a size-based cost for optimize_size, the lack of
+ use a size-based cost for !speed, the lack of
of a mode makes the results very unpredictable. */
*total = 0;
return true;
{
case QImode:
case HImode:
- *total = COSTS_N_INSNS (optimize_size ? 4 : 10);
+ *total = COSTS_N_INSNS (!speed ? 4 : 10);
return false;
case SImode:
- *total = COSTS_N_INSNS (optimize_size ? 4 : 18);
+ *total = COSTS_N_INSNS (!speed ? 4 : 18);
return false;
default:
requires to two regs - that would mean more pseudos with longer
lifetimes. */
static int
-ix86_address_cost (rtx x)
+ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
struct ix86_address parts;
int cost = 1;
scanned. In either case, *TOTAL contains the cost result. */
static bool
-ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
+ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
switch (code)
{
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
- *total = ix86_cost->add;
+ *total = cost->add;
else
- *total = ix86_cost->movzx;
+ *total = cost->movzx;
return false;
case SIGN_EXTEND:
- *total = ix86_cost->movsx;
+ *total = cost->movsx;
return false;
case ASHIFT:
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
}
if ((value == 2 || value == 3)
- && ix86_cost->lea <= ix86_cost->shift_const)
+ && cost->lea <= cost->shift_const)
{
- *total = ix86_cost->lea;
+ *total = cost->lea;
return false;
}
}
if (CONST_INT_P (XEXP (x, 1)))
{
if (INTVAL (XEXP (x, 1)) > 32)
- *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
+ *total = cost->shift_const + COSTS_N_INSNS (2);
else
- *total = ix86_cost->shift_const * 2;
+ *total = cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
- *total = ix86_cost->shift_var * 2;
+ *total = cost->shift_var * 2;
else
- *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
+ *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (CONST_INT_P (XEXP (x, 1)))
- *total = ix86_cost->shift_const;
+ *total = cost->shift_const;
else
- *total = ix86_cost->shift_var;
+ *total = cost->shift_var;
}
return false;
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE scalar cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
- *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
- + nbits * ix86_cost->mult_bit
- + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
+ *total = (cost->mult_init[MODE_INDEX (mode)]
+ + nbits * cost->mult_bit
+ + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
return true;
}
case UMOD:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else
- *total = ix86_cost->divide[MODE_INDEX (mode)];
+ *total = cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
/* FALLTHRU */
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
- *total = (ix86_cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code)
+ *total = (cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code)
+ + (rtx_cost (XEXP (x, 1), outer_code, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
- *total = ix86_cost->add * 2;
+ *total = cost->add * 2;
else
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
case COMPARE:
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
- *total = (ix86_cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
- + rtx_cost (const1_rtx, outer_code));
+ *total = (cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ + rtx_cost (const1_rtx, outer_code, speed));
return true;
}
return false;
case ABS:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
return false;
case SQRT:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
return false;
case UNSPEC:
tree, bool);
static bool ia64_function_ok_for_sibcall (tree, tree);
static bool ia64_return_in_memory (const_tree, const_tree);
-static bool ia64_rtx_costs (rtx, int, int, int *);
+static bool ia64_rtx_costs (rtx, int, int, int *, bool);
static int ia64_unspec_may_trap_p (const_rtx, unsigned);
static void fix_range (const char *);
static bool ia64_handle_option (size_t, const char *, int);
/* ??? This is incomplete. */
static bool
-ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
+ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static void iq2000_setup_incoming_varargs (CUMULATIVE_ARGS *,
enum machine_mode, tree, int *,
int);
-static bool iq2000_rtx_costs (rtx, int, int, int *);
-static int iq2000_address_cost (rtx);
+static bool iq2000_rtx_costs (rtx, int, int, int *, bool);
+static int iq2000_address_cost (rtx, bool);
static section *iq2000_select_section (tree, int, unsigned HOST_WIDE_INT);
static bool iq2000_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
/* Provide the costs of an addressing mode that contains ADDR. */
static int
-iq2000_address_cost (rtx addr)
+iq2000_address_cost (rtx addr, bool speec ATTRIBUTE_UNUSED)
{
switch (GET_CODE (addr))
{
}
static bool
-iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total)
+iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS m32c_rtx_costs
static bool
-m32c_rtx_costs (rtx x, int code, int outer_code, int *total)
+m32c_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST m32c_address_cost
static int
-m32c_address_cost (rtx addr)
+m32c_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
int i;
/* fprintf(stderr, "\naddress_cost\n");
static void m32r_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
tree, int *, int);
static void init_idents (void);
-static bool m32r_rtx_costs (rtx, int, int, int *);
+static bool m32r_rtx_costs (rtx, int, int, int *, bool speed);
static bool m32r_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
const_tree, bool);
static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS m32r_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
/* Cost functions. */
static bool
-m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
+m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static int go_if_legitimate_address_internal (rtx, enum machine_mode, int);
static rtx m68hc11_expand_compare (enum rtx_code, rtx, rtx);
static int must_parenthesize (rtx);
-static int m68hc11_address_cost (rtx);
+static int m68hc11_address_cost (rtx, bool);
static int m68hc11_shift_cost (enum machine_mode, rtx, int);
static int m68hc11_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
-static bool m68hc11_rtx_costs (rtx, int, int, int *);
+static bool m68hc11_rtx_costs (rtx, int, int, int *, bool);
static tree m68hc11_handle_fntype_attribute (tree *, tree, tree, int, bool *);
const struct attribute_spec m68hc11_attribute_table[];
If ADDR is not a valid address, its cost is irrelevant. */
static int
-m68hc11_address_cost (rtx addr)
+m68hc11_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
int cost = 4;
{
int total;
- total = rtx_cost (x, SET);
+ total = rtx_cost (x, SET, !optimize_size);
if (mode == QImode)
total += m68hc11_cost->shiftQI_const[shift % 8];
else if (mode == HImode)
return m68hc11_shift_cost (mode, XEXP (x, 0), INTVAL (XEXP (x, 1)));
}
- total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
+ total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->shift_var;
return total;
case AND:
case XOR:
case IOR:
- total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
+ total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->logical;
/* Logical instructions are byte instructions only. */
case MINUS:
case PLUS:
- total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
+ total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
total += m68hc11_cost->add;
if (GET_MODE_SIZE (mode) > 2)
{
case UDIV:
case DIV:
case MOD:
- total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
+ total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
switch (mode)
{
case QImode:
if (mode == HImode && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
return m68hc11_cost->multQI
- + rtx_cost (XEXP (XEXP (x, 0), 0), code)
- + rtx_cost (XEXP (XEXP (x, 1), 0), code);
+ + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+ + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
/* emul instruction produces 32-bit result for 68HC12. */
if (TARGET_M6812 && mode == SImode
&& GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
return m68hc11_cost->multHI
- + rtx_cost (XEXP (XEXP (x, 0), 0), code)
- + rtx_cost (XEXP (XEXP (x, 1), 0), code);
+ + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+ + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
total = rtx_cost (XEXP (x, 0), code) + rtx_cost (XEXP (x, 1), code);
switch (mode)
case COMPARE:
case ABS:
case ZERO_EXTEND:
- total = extra_cost + rtx_cost (XEXP (x, 0), code);
+ total = extra_cost + rtx_cost (XEXP (x, 0), code, !optimize_size);
if (mode == QImode)
{
return total + COSTS_N_INSNS (1);
}
static bool
-m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total)
+m68hc11_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static void m68k_compute_frame_layout (void);
static bool m68k_save_reg (unsigned int regno, bool interrupt_handler);
static bool m68k_ok_for_sibcall_p (tree, tree);
-static bool m68k_rtx_costs (rtx, int, int, int *);
+static bool m68k_rtx_costs (rtx, int, int, int *, bool);
#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
static bool m68k_return_in_memory (const_tree, const_tree);
#endif
}
static bool
-m68k_rtx_costs (rtx x, int code, int outer_code, int *total)
+m68k_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS mcore_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG mcore_reorg
}
static bool
-mcore_rtx_costs (rtx x, int code, int outer_code, int * total)
+mcore_rtx_costs (rtx x, int code, int outer_code, int * total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
else
cost = single_cost;
return (cost
- + rtx_cost (XEXP (x, 0), 0)
- + rtx_cost (XEXP (x, 1), GET_CODE (x)));
+ + rtx_cost (XEXP (x, 0), 0, !optimize_size)
+ + rtx_cost (XEXP (x, 1), GET_CODE (x), !optimize_size));
}
/* Return the cost of floating-point multiplications of mode MODE. */
/* Implement TARGET_RTX_COSTS. */
static bool
-mips_rtx_costs (rtx x, int code, int outer_code, int *total)
+mips_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
&& UINTVAL (XEXP (x, 1)) == 0xffffffff)
{
*total = (mips_zero_extend_cost (mode, XEXP (x, 0))
- + rtx_cost (XEXP (x, 0), 0));
+ + rtx_cost (XEXP (x, 0), 0, speed));
return true;
}
/* Fall through. */
case LO_SUM:
/* Low-part immediates need an extended MIPS16 instruction. */
*total = (COSTS_N_INSNS (TARGET_MIPS16 ? 2 : 1)
- + rtx_cost (XEXP (x, 0), 0));
+ + rtx_cost (XEXP (x, 0), 0, speed));
return true;
case LT:
if (GET_CODE (op0) == MULT && GET_CODE (XEXP (op0, 0)) == NEG)
{
*total = (mips_fp_mult_cost (mode)
- + rtx_cost (XEXP (XEXP (op0, 0), 0), 0)
- + rtx_cost (XEXP (op0, 1), 0)
- + rtx_cost (op1, 0));
+ + rtx_cost (XEXP (XEXP (op0, 0), 0), 0, speed)
+ + rtx_cost (XEXP (op0, 1), 0, speed)
+ + rtx_cost (op1, 0, speed));
return true;
}
if (GET_CODE (op1) == MULT)
{
*total = (mips_fp_mult_cost (mode)
- + rtx_cost (op0, 0)
- + rtx_cost (XEXP (op1, 0), 0)
- + rtx_cost (XEXP (op1, 1), 0));
+ + rtx_cost (op0, 0, speed)
+ + rtx_cost (XEXP (op1, 0), 0, speed)
+ + rtx_cost (XEXP (op1, 1), 0, speed));
return true;
}
}
&& GET_CODE (XEXP (op, 0)) == MULT)
{
*total = (mips_fp_mult_cost (mode)
- + rtx_cost (XEXP (XEXP (op, 0), 0), 0)
- + rtx_cost (XEXP (XEXP (op, 0), 1), 0)
- + rtx_cost (XEXP (op, 1), 0));
+ + rtx_cost (XEXP (XEXP (op, 0), 0), 0, speed)
+ + rtx_cost (XEXP (XEXP (op, 0), 1), 0, speed)
+ + rtx_cost (XEXP (op, 1), 0, speed));
return true;
}
}
if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
/* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the
division as being free. */
- *total = rtx_cost (XEXP (x, 1), 0);
+ *total = rtx_cost (XEXP (x, 1), 0, speed);
else
- *total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0);
+ *total = mips_fp_div_cost (mode) + rtx_cost (XEXP (x, 1), 0, speed);
return true;
}
/* Fall through. */
&& CONST_INT_P (XEXP (x, 1))
&& exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0);
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), 0, speed);
return true;
}
*total = COSTS_N_INSNS (mips_idiv_insns ());
/* Implement TARGET_ADDRESS_COST. */
static int
-mips_address_cost (rtx addr)
+mips_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
return mips_address_insns (addr, SImode, false);
}
(CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
static void mmix_file_start (void);
static void mmix_file_end (void);
-static bool mmix_rtx_costs (rtx, int, int, int *);
+static bool mmix_rtx_costs (rtx, int, int, int *, bool);
static rtx mmix_struct_value_rtx (tree, int);
static bool mmix_pass_by_reference (CUMULATIVE_ARGS *,
enum machine_mode, const_tree, bool);
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS mmix_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG mmix_reorg
mmix_rtx_costs (rtx x ATTRIBUTE_UNUSED,
int code ATTRIBUTE_UNUSED,
int outer_code ATTRIBUTE_UNUSED,
- int *total ATTRIBUTE_UNUSED)
+ int *total ATTRIBUTE_UNUSED,
+ bool speed ATTRIBUTE_UNUSED)
{
/* For the time being, this is just a stub and we'll accept the
generic calculations, until we can do measurements, at least.
static bool mn10300_handle_option (size_t, const char *, int);
static int mn10300_address_cost_1 (rtx, int *);
-static int mn10300_address_cost (rtx);
-static bool mn10300_rtx_costs (rtx, int, int, int *);
+static int mn10300_address_cost (rtx, bool);
+static bool mn10300_rtx_costs (rtx, int, int, int *, bool);
static void mn10300_file_start (void);
static bool mn10300_return_in_memory (const_tree, const_tree);
static rtx mn10300_builtin_saveregs (void);
}
static int
-mn10300_address_cost_1 (rtx x, int *unsig)
+mn10300_address_cost_1 (rtx x, int *unsig, bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (x))
{
}
static int
-mn10300_address_cost (rtx x)
+mn10300_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
int s = 0;
return mn10300_address_cost_1 (x, &s);
}
static bool
-mn10300_rtx_costs (rtx x, int code, int outer_code, int *total)
+mn10300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static void copy_reg_pointer (rtx, rtx);
static void fix_range (const char *);
static bool pa_handle_option (size_t, const char *, int);
-static int hppa_address_cost (rtx);
-static bool hppa_rtx_costs (rtx, int, int, int *);
+static int hppa_address_cost (rtx, bool);
+static bool hppa_rtx_costs (rtx, int, int, int *, bool);
static inline rtx force_mode (enum machine_mode, rtx);
static void pa_reorg (void);
static void pa_combine_instructions (void);
as GO_IF_LEGITIMATE_ADDRESS. */
static int
-hppa_address_cost (rtx X)
+hppa_address_cost (rtx X,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (X))
{
scanned. In either case, *TOTAL contains the cost result. */
static bool
-hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
+hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static bool pdp11_assemble_integer (rtx, unsigned int, int);
static void pdp11_output_function_prologue (FILE *, HOST_WIDE_INT);
static void pdp11_output_function_epilogue (FILE *, HOST_WIDE_INT);
-static bool pdp11_rtx_costs (rtx, int, int, int *);
+static bool pdp11_rtx_costs (rtx, int, int, int *, bool);
static bool pdp11_return_in_memory (const_tree, const_tree);
\f
/* Initialize the GCC target structure. */
}
static bool
-pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
+pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
static void rs6000_xcoff_file_end (void);
#endif
static int rs6000_variable_issue (FILE *, int, rtx, int);
-static bool rs6000_rtx_costs (rtx, int, int, int *);
+static bool rs6000_rtx_costs (rtx, int, int, int *, bool);
static int rs6000_adjust_cost (rtx, rtx, rtx, int);
static void rs6000_sched_init (FILE *, int, int);
static bool is_microcoded_insn (rtx);
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS rs6000_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_VECTOR_OPAQUE_P
#define TARGET_VECTOR_OPAQUE_P rs6000_is_opaque_type
scanned. In either case, *TOTAL contains the cost result. */
static bool
-rs6000_rtx_costs (rtx x, int code, int outer_code, int *total)
+rs6000_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
enum machine_mode mode = GET_MODE (x);
/* When optimizing for size, MEM should be slightly more expensive
than generating address, e.g., (plus (reg) (const)).
L1 cache latency is about two instructions. */
- *total = optimize_size ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
+ *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
return true;
case LABEL_REF:
case CALL:
case IF_THEN_ELSE:
- if (optimize_size)
+ if (!speed)
{
*total = COSTS_N_INSNS (1);
return true;
of the superexpression of x. */
static bool
-s390_rtx_costs (rtx x, int code, int outer_code, int *total)
+s390_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
*total = s390_cost->madbr;
else
*total = s390_cost->maebr;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT)
- + rtx_cost (XEXP (XEXP (x, 0), 1), MULT)
- + rtx_cost (XEXP (x, 1), code);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), MULT, speed)
+ + rtx_cost (XEXP (XEXP (x, 0), 1), MULT, speed)
+ + rtx_cost (XEXP (x, 1), code, speed);
return true; /* Do not do an additional recursive descent. */
}
*total = COSTS_N_INSNS (1);
/* Return the cost of an address rtx ADDR. */
static int
-s390_address_cost (rtx addr)
+s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
struct s390_address ad;
if (!s390_decompose_address (addr, &ad))
extern const char * score_output_casesi (rtx *operands);
extern const char * score_rpush (rtx *ops);
extern const char * score_rpop (rtx *ops);
-extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total);
+extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
#ifdef RTX_CODE
extern enum machine_mode score_select_cc_mode (enum rtx_code op, rtx x, rtx y);
/* Implement TARGET_RTX_COSTS macro. */
bool
-score_rtx_costs (rtx x, int code, int outer_code, int *total)
+score_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D)
return score7_rtx_costs (x, code, outer_code, total);
/* Implement TARGET_ADDRESS_COST macro. */
int
-score_address_cost (rtx addr)
+score_address_cost (rtx addr,
+ bool speed ATTRIBUTE_UNUSED)
{
if (TARGET_SCORE5 || TARGET_SCORE5U || TARGET_SCORE7 || TARGET_SCORE7D)
return score7_address_cost (addr);
/* Implement TARGET_RTX_COSTS macro. */
bool
-score3_rtx_costs (rtx x, int code, int outer_code, int *total)
+score3_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
/* Implement TARGET_ADDRESS_COST macro. */
int
-score3_address_cost (rtx addr)
+score3_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
{
return score3_address_insns (addr, SImode);
}
extern int score3_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
enum reg_class from,
enum reg_class to);
-extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total);
+extern bool score3_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
extern int score3_address_cost (rtx addr);
extern int score3_output_external (FILE *file ATTRIBUTE_UNUSED,
tree decl,
/* Implement TARGET_RTX_COSTS macro. */
bool
-score7_rtx_costs (rtx x, int code, int outer_code, int *total)
+score7_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
/* Implement TARGET_ADDRESS_COST macro. */
int
-score7_address_cost (rtx addr)
+score7_address_cost (rtx addr,
+ bool speed ATTRIBUTE_UNUSED)
{
return score7_address_insns (addr, SImode);
}
extern int score7_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
enum reg_class from,
enum reg_class to);
-extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total);
+extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
extern int score7_address_cost (rtx addr);
extern int score7_output_external (FILE *file ATTRIBUTE_UNUSED,
tree decl,
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx);
-static bool sh_rtx_costs (rtx, int, int, int *);
-static int sh_address_cost (rtx);
+static bool sh_rtx_costs (rtx, int, int, int *, bool);
+static int sh_address_cost (rtx, bool);
static int sh_pr_n_sets (void);
static rtx sh_allocate_initial_value (rtx);
static int shmedia_target_regs_stack_space (HARD_REG_SET *);
|| satisfies_constraint_J16 (XEXP (x, 1)))
return 1;
else
- return 1 + rtx_cost (XEXP (x, 1), AND);
+ return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
}
/* These constants are single cycle extu.[bw] instructions. */
scanned. In either case, *TOTAL contains the cost result. */
static bool
-sh_rtx_costs (rtx x, int code, int outer_code, int *total)
+sh_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
since it increases pressure on r0. */
static int
-sh_address_cost (rtx X)
+sh_address_cost (rtx X,
+ bool speed ATTRIBUTE_UNUSED)
{
return (GET_CODE (X) == PLUS
&& ! CONSTANT_P (XEXP (X, 1))
static rtx sparc_tls_got (void);
static const char *get_some_local_dynamic_name (void);
static int get_some_local_dynamic_name_1 (rtx *, void *);
-static bool sparc_rtx_costs (rtx, int, int, int *);
+static bool sparc_rtx_costs (rtx, int, int, int *, bool);
static bool sparc_promote_prototypes (const_tree);
static rtx sparc_struct_value_rtx (tree, int);
static bool sparc_return_in_memory (const_tree, const_tree);
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS sparc_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
/* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime
??? the latencies and then CSE will just use that. */
static bool
-sparc_rtx_costs (rtx x, int code, int outer_code, int *total)
+sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
#define TARGET_RTX_COSTS spu_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
}
static bool
-spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
+spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
int cost = COSTS_N_INSNS (2);
static void xstormy16_init_builtins (void);
static rtx xstormy16_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
-static bool xstormy16_rtx_costs (rtx, int, int, int *);
-static int xstormy16_address_cost (rtx);
+static bool xstormy16_rtx_costs (rtx, int, int, int *, bool);
+static int xstormy16_address_cost (rtx, bool);
static bool xstormy16_return_in_memory (const_tree, const_tree);
/* Define the information needed to generate branch and scc insns. This is
static bool
xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
- int *total)
+ int *total, bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
}
static int
-xstormy16_address_cost (rtx x)
+xstormy16_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return (GET_CODE (x) == CONST_INT ? 2
: GET_CODE (x) == PLUS ? 7
#define TARGET_RTX_COSTS v850_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG v850_reorg
v850_rtx_costs (rtx x,
int code,
int outer_code ATTRIBUTE_UNUSED,
- int * total)
+ int * total, bool speed)
{
switch (code)
{
case DIV:
case UMOD:
case UDIV:
- if (TARGET_V850E && optimize_size)
+ if (TARGET_V850E && !speed)
*total = 6;
else
*total = 60;
static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static int vax_address_cost_1 (rtx);
-static int vax_address_cost (rtx);
-static bool vax_rtx_costs (rtx, int, int, int *);
+static int vax_address_cost (rtx, bool);
+static bool vax_rtx_costs (rtx, int, int, int *, bool);
static rtx vax_struct_value_rtx (tree, int);
\f
/* Initialize the GCC target structure. */
}
static int
-vax_address_cost (rtx x)
+vax_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
}
costs on a per cpu basis. */
static bool
-vax_rtx_costs (rtx x, int code, int outer_code, int *total)
+vax_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
int i = 0; /* may be modified in switch */
int) ATTRIBUTE_UNUSED;
static section *xtensa_select_rtx_section (enum machine_mode, rtx,
unsigned HOST_WIDE_INT);
-static bool xtensa_rtx_costs (rtx, int, int, int *);
+static bool xtensa_rtx_costs (rtx, int, int, int *, bool);
static tree xtensa_build_builtin_va_list (void);
static bool xtensa_return_in_memory (const_tree, const_tree);
static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS xtensa_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
scanned. In either case, *TOTAL contains the cost result. */
static bool
-xtensa_rtx_costs (rtx x, int code, int outer_code, int *total)
+xtensa_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
{
switch (code)
{
/* Insn being scanned. */
static rtx this_insn;
+static bool optimize_this_for_speed_p;
/* Index by register number, gives the number of the next (or
previous) register in the chain of registers sharing the same
&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)),
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))))
? 0
- : rtx_cost (x, outer) * 2);
+ : rtx_cost (x, outer, optimize_this_for_speed_p) * 2);
}
\f
FOR_BB_INSNS (bb, insn)
{
+ optimize_this_for_speed_p = optimize_bb_for_speed_p (bb);
/* If we have processed 1,000 insns, flush the hash table to
avoid extreme quadratic behavior. We must not include NOTEs
in the count since there may be more of them when generating
XEXP (and_test, 1) = GEN_INT ((unsigned HOST_WIDE_INT) 1 << bitnum);
XEXP (XEXP (shift_test, 0), 1) = GEN_INT (bitnum);
- return (rtx_cost (and_test, IF_THEN_ELSE)
- <= rtx_cost (shift_test, IF_THEN_ELSE));
+ return (rtx_cost (and_test, IF_THEN_ELSE, optimize_insn_for_speed_p ())
+ <= rtx_cost (shift_test, IF_THEN_ELSE, optimize_insn_for_speed_p ()));
}
/* Generate code to evaluate EXP and jump to IF_FALSE_LABEL if
find_shift_sequence (int access_size,
store_info_t store_info,
read_info_t read_info,
- int shift)
+ int shift,
+ bool speed)
{
enum machine_mode store_mode = GET_MODE (store_info->mem);
enum machine_mode read_mode = GET_MODE (read_info->mem);
cost = 0;
for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
if (INSN_P (insn))
- cost += insn_rtx_cost (PATTERN (insn));
+ cost += insn_rtx_cost (PATTERN (insn), speed);
/* The computation up to here is essentially independent
of the arguments and could be precomputed. It may
GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
start_sequence ();
if (shift)
- read_reg = find_shift_sequence (access_size, store_info, read_info, shift);
+ read_reg = find_shift_sequence (access_size, store_info, read_info, shift,
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn)));
else
read_reg = extract_low_bits (read_mode, store_mode,
copy_rtx (store_info->rhs));
Usually, this will mean that the MD file will emit non-branch
sequences. */
-static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
-static bool smod_pow2_cheap[NUM_MACHINE_MODES];
+static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
+static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
-static int zero_cost;
-static int add_cost[NUM_MACHINE_MODES];
-static int neg_cost[NUM_MACHINE_MODES];
-static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int mul_cost[NUM_MACHINE_MODES];
-static int sdiv_cost[NUM_MACHINE_MODES];
-static int udiv_cost[NUM_MACHINE_MODES];
-static int mul_widen_cost[NUM_MACHINE_MODES];
-static int mul_highpart_cost[NUM_MACHINE_MODES];
+static int zero_cost[2];
+static int add_cost[2][NUM_MACHINE_MODES];
+static int neg_cost[2][NUM_MACHINE_MODES];
+static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int mul_cost[2][NUM_MACHINE_MODES];
+static int sdiv_cost[2][NUM_MACHINE_MODES];
+static int udiv_cost[2][NUM_MACHINE_MODES];
+static int mul_widen_cost[2][NUM_MACHINE_MODES];
+static int mul_highpart_cost[2][NUM_MACHINE_MODES];
void
init_expmed (void)
rtx cint[MAX_BITS_PER_WORD];
int m, n;
enum machine_mode mode, wider_mode;
+ int speed;
- zero_cost = rtx_cost (const0_rtx, 0);
for (m = 1; m < MAX_BITS_PER_WORD; m++)
{
pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
cint[m] = GEN_INT (m);
}
-
memset (&all, 0, sizeof all);
PUT_CODE (&all.reg, REG);
XEXP (&all.shift_sub, 0) = &all.shift_mult;
XEXP (&all.shift_sub, 1) = &all.reg;
- for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
- mode != VOIDmode;
- mode = GET_MODE_WIDER_MODE (mode))
+ for (speed = 0; speed < 2; speed++)
{
- PUT_MODE (&all.reg, mode);
- PUT_MODE (&all.plus, mode);
- PUT_MODE (&all.neg, mode);
- PUT_MODE (&all.mult, mode);
- PUT_MODE (&all.sdiv, mode);
- PUT_MODE (&all.udiv, mode);
- PUT_MODE (&all.sdiv_32, mode);
- PUT_MODE (&all.smod_32, mode);
- PUT_MODE (&all.wide_trunc, mode);
- PUT_MODE (&all.shift, mode);
- PUT_MODE (&all.shift_mult, mode);
- PUT_MODE (&all.shift_add, mode);
- PUT_MODE (&all.shift_sub, mode);
-
- add_cost[mode] = rtx_cost (&all.plus, SET);
- neg_cost[mode] = rtx_cost (&all.neg, SET);
- mul_cost[mode] = rtx_cost (&all.mult, SET);
- sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
- udiv_cost[mode] = rtx_cost (&all.udiv, SET);
-
- sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
- <= 2 * add_cost[mode]);
- smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
- <= 4 * add_cost[mode]);
-
- wider_mode = GET_MODE_WIDER_MODE (mode);
- if (wider_mode != VOIDmode)
- {
- PUT_MODE (&all.zext, wider_mode);
- PUT_MODE (&all.wide_mult, wider_mode);
- PUT_MODE (&all.wide_lshr, wider_mode);
- XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
+ crtl->maybe_hot_insn_p = speed;
+ zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
- mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
- mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
- }
+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+ mode != VOIDmode;
+ mode = GET_MODE_WIDER_MODE (mode))
+ {
+ PUT_MODE (&all.reg, mode);
+ PUT_MODE (&all.plus, mode);
+ PUT_MODE (&all.neg, mode);
+ PUT_MODE (&all.mult, mode);
+ PUT_MODE (&all.sdiv, mode);
+ PUT_MODE (&all.udiv, mode);
+ PUT_MODE (&all.sdiv_32, mode);
+ PUT_MODE (&all.smod_32, mode);
+ PUT_MODE (&all.wide_trunc, mode);
+ PUT_MODE (&all.shift, mode);
+ PUT_MODE (&all.shift_mult, mode);
+ PUT_MODE (&all.shift_add, mode);
+ PUT_MODE (&all.shift_sub, mode);
+
+ add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
+ neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
+ mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
+ sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
+ udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
+
+ sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
+ <= 2 * add_cost[speed][mode]);
+ smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
+ <= 4 * add_cost[speed][mode]);
+
+ wider_mode = GET_MODE_WIDER_MODE (mode);
+ if (wider_mode != VOIDmode)
+ {
+ PUT_MODE (&all.zext, wider_mode);
+ PUT_MODE (&all.wide_mult, wider_mode);
+ PUT_MODE (&all.wide_lshr, wider_mode);
+ XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
+
+ mul_widen_cost[speed][wider_mode]
+ = rtx_cost (&all.wide_mult, SET, speed);
+ mul_highpart_cost[speed][mode]
+ = rtx_cost (&all.wide_trunc, SET, speed);
+ }
- shift_cost[mode][0] = 0;
- shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
+ shift_cost[speed][mode][0] = 0;
+ shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
+ = add_cost[speed][mode];
- n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
- for (m = 1; m < n; m++)
- {
- XEXP (&all.shift, 1) = cint[m];
- XEXP (&all.shift_mult, 1) = pow2[m];
+ n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+ for (m = 1; m < n; m++)
+ {
+ XEXP (&all.shift, 1) = cint[m];
+ XEXP (&all.shift_mult, 1) = pow2[m];
- shift_cost[mode][m] = rtx_cost (&all.shift, SET);
- shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
- shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
+ shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
+ shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
+ shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
+ }
}
}
+ default_rtl_profile ();
}
/* Return an rtx representing minus the value of X.
optab rrotate_optab = rotr_optab;
enum machine_mode op1_mode;
int attempt;
+ bool speed = optimize_insn_for_speed_p ();
op1 = expand_normal (amount);
op1_mode = GET_MODE (op1);
&& INTVAL (op1) > 0
&& INTVAL (op1) < GET_MODE_BITSIZE (mode)
&& INTVAL (op1) < MAX_BITS_PER_WORD
- && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
- && shift_cost[mode][INTVAL (op1)] != MAX_COST)
+ && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
+ && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
{
int i;
for (i = 0; i < INTVAL (op1); i++)
Otherwise, the cost within which multiplication by T is
impossible. */
struct mult_cost cost;
+
+ /* OPtimized for speed? */
+ bool speed;
};
/* The number of cache/hash entries. */
int hash_index;
bool cache_hit = false;
enum alg_code cache_alg = alg_zero;
+ bool speed = optimize_insn_for_speed_p ();
/* Indicate that no algorithm is yet found. If no algorithm
is found, this value will be returned and indicate failure. */
fail now. */
if (t == 0)
{
- if (MULT_COST_LESS (cost_limit, zero_cost))
+ if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
return;
else
{
alg_out->ops = 1;
- alg_out->cost.cost = zero_cost;
- alg_out->cost.latency = zero_cost;
+ alg_out->cost.cost = zero_cost[speed];
+ alg_out->cost.latency = zero_cost[speed];
alg_out->op[0] = alg_zero;
return;
}
best_cost = *cost_limit;
/* Compute the hash index. */
- hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
+ hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
/* See if we already know what to do for T. */
if (alg_hash[hash_index].t == t
&& alg_hash[hash_index].mode == mode
+ && alg_hash[hash_index].mode == mode
+ && alg_hash[hash_index].speed == speed
&& alg_hash[hash_index].alg != alg_unknown)
{
cache_alg = alg_hash[hash_index].alg;
q = t >> m;
/* The function expand_shift will choose between a shift and
a sequence of additions, so the observed cost is given as
- MIN (m * add_cost[mode], shift_cost[mode][m]). */
- op_cost = m * add_cost[mode];
- if (shift_cost[mode][m] < op_cost)
- op_cost = shift_cost[mode][m];
+ MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
+ op_cost = m * add_cost[speed][mode];
+ if (shift_cost[speed][mode][m] < op_cost)
+ op_cost = shift_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, q, &new_limit, mode);
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t + 1, &new_limit, mode);
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t - 1, &new_limit, mode);
equal to its cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[mode] + shift_cost[mode][m];
- if (shiftadd_cost[mode][m] < op_cost)
+ op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
+ if (shiftadd_cost[speed][mode][m] < op_cost)
{
- op_cost = shiftadd_cost[mode][m];
+ op_cost = shiftadd_cost[speed][mode][m];
op_latency = op_cost;
}
else
- op_latency = add_cost[mode];
+ op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[mode] + shift_cost[mode][m];
- if (shiftsub_cost[mode][m] < op_cost)
+ op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
+ if (shiftsub_cost[speed][mode][m] < op_cost)
{
- op_cost = shiftsub_cost[mode][m];
+ op_cost = shiftsub_cost[speed][mode][m];
op_latency = op_cost;
}
else
- op_latency = add_cost[mode];
+ op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftadd_cost[mode][m];
+ op_cost = shiftadd_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftsub_cost[mode][m];
+ op_cost = shiftsub_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
caller. */
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
+ alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = alg_impossible;
alg_hash[hash_index].cost = *cost_limit;
return;
{
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
+ alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
alg_hash[hash_index].cost.cost = best_cost.cost;
alg_hash[hash_index].cost.latency = best_cost.latency;
struct algorithm alg2;
struct mult_cost limit;
int op_cost;
+ bool speed = optimize_insn_for_speed_p ();
/* Fail quickly for impossible bounds. */
if (mult_cost < 0)
/* Ensure that mult_cost provides a reasonable upper bound.
Any constant multiplication can be performed with less
than 2 * bits additions. */
- op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
+ op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
if (mult_cost > op_cost)
mult_cost = op_cost;
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
- op_cost = neg_cost[mode];
+ op_cost = neg_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
}
/* This proves very useful for division-by-constant. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
enum mult_variant variant;
struct algorithm algorithm;
int max_cost;
+ bool speed = optimize_insn_for_speed_p ();
/* Handling const0_rtx here allows us to use zero as a rogue value for
coeff below. */
result is interpreted as an unsigned coefficient.
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
- - neg_cost[mode];
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
+ - neg_cost[speed][mode];
if (max_cost > 0
&& choose_mult_variant (mode, -INTVAL (op1), &algorithm,
&variant, max_cost))
/* Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
return expand_mult_const (mode, op0, coeff, target,
optab moptab;
rtx tem;
int size;
+ bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
- if (mul_highpart_cost[mode] < max_cost)
+ if (mul_highpart_cost[speed][mode] < max_cost)
{
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
- && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
- + 4 * add_cost[mode] < max_cost))
+ && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
+ + 4 * add_cost[speed][mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
/* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
- && mul_widen_cost[wider_mode] < max_cost)
+ && mul_widen_cost[speed][wider_mode] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
unsignedp, OPTAB_WIDEN);
/* Try widening the mode and perform a non-widening multiplication. */
if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
+ && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
{
rtx insns, wop0, wop1;
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
- + 4 * add_cost[mode] < max_cost))
+ && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
+ + 4 * add_cost[speed][mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
enum mult_variant variant;
struct algorithm alg;
rtx tem;
+ bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* We can't support modes wider than HOST_BITS_PER_INT. */
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
- extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
+ extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
- extra_cost += add_cost[mode];
+ extra_cost += add_cost[speed][mode];
}
/* See whether shift/add multiplication is cheap enough. */
temp = gen_rtx_LSHIFTRT (mode, result, shift);
if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
- || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
+ || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
{
temp = expand_binop (mode, xor_optab, op0, signmask,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
temp = gen_reg_rtx (mode);
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
- if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
+ if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
else
int max_cost, extra_cost;
static HOST_WIDE_INT last_div_const = 0;
static HOST_WIDE_INT ext_op1;
+ bool speed = optimize_insn_for_speed_p ();
op1_is_constant = GET_CODE (op1) == CONST_INT;
if (op1_is_constant)
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
- max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
+ max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
if (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const))
- max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
+ max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
goto fail1;
extra_cost
- = (shift_cost[compute_mode][post_shift - 1]
- + shift_cost[compute_mode][1]
- + 2 * add_cost[compute_mode]);
+ = (shift_cost[speed][compute_mode][post_shift - 1]
+ + shift_cost[speed][compute_mode][1]
+ + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
build_int_cst (NULL_TREE, pre_shift),
NULL_RTX, 1);
extra_cost
- = (shift_cost[compute_mode][pre_shift]
- + shift_cost[compute_mode][post_shift]);
+ = (shift_cost[speed][compute_mode][pre_shift]
+ + shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1,
max_cost - extra_cost);
|| size - 1 >= BITS_PER_WORD)
goto fail1;
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
mlr = gen_int_mode (ml, compute_mode);
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + 2 * add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + 2 * add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);
enum machine_mode srcmode;
REAL_VALUE_TYPE r;
int oldcost, newcost;
+ bool speed = optimize_insn_for_speed_p ();
REAL_VALUE_FROM_CONST_DOUBLE (r, y);
if (LEGITIMATE_CONSTANT_P (y))
- oldcost = rtx_cost (y, SET);
+ oldcost = rtx_cost (y, SET, speed);
else
- oldcost = rtx_cost (force_const_mem (dstmode, y), SET);
+ oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed);
for (srcmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (orig_srcmode));
srcmode != orig_srcmode;
if (! (*insn_data[ic].operand[1].predicate) (trunc_y, srcmode))
continue;
/* This is valid, but may not be cheaper than the original. */
- newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET);
+ newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
if (oldcost < newcost)
continue;
}
{
trunc_y = force_const_mem (srcmode, trunc_y);
/* This is valid, but may not be cheaper than the original. */
- newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET);
+ newcost = rtx_cost (gen_rtx_FLOAT_EXTEND (dstmode, trunc_y), SET, speed);
if (oldcost < newcost)
continue;
trunc_y = validize_mem (trunc_y);
for a memory access in the given MODE. */
static bool
-should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode)
+should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
+ bool speed)
{
int gain;
return true;
/* Prefer the new address if it is less expensive. */
- gain = address_cost (old_rtx, mode) - address_cost (new_rtx, mode);
+ gain = address_cost (old_rtx, mode, speed) - address_cost (new_rtx, mode, speed);
/* If the addresses have equivalent cost, prefer the new address
if it has the highest `rtx_cost'. That has the potential of
eliminating the most insns without additional costs, and it
is the same that cse.c used to do. */
if (gain == 0)
- gain = rtx_cost (new_rtx, SET) - rtx_cost (old_rtx, SET);
+ gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
return (gain > 0);
}
PR_HANDLE_MEM is set when the source of the propagation was not
another MEM. Then, it is safe not to treat non-read-only MEMs as
``opaque'' objects. */
- PR_HANDLE_MEM = 2
+ PR_HANDLE_MEM = 2,
+
+ /* Set when costs should be optimized for speed. */
+ PR_OPTIMIZE_FOR_SPEED = 4
};
/* Copy propagations are always ok. Otherwise check the costs. */
if (!(REG_P (old_rtx) && REG_P (new_rtx))
- && !should_replace_address (op0, new_op0, GET_MODE (x)))
+ && !should_replace_address (op0, new_op0, GET_MODE (x),
+ flags & PR_OPTIMIZE_FOR_SPEED))
return true;
tem = replace_equiv_address_nv (x, new_op0);
Otherwise, we accept simplifications that have a lower or equal cost. */
static rtx
-propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx)
+propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
+ bool speed)
{
rtx tem;
bool collapsed;
if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
flags |= PR_HANDLE_MEM;
+ if (speed)
+ flags |= PR_OPTIMIZE_FOR_SPEED;
+
tem = x;
collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
if (tem == x || !collapsed)
enum df_ref_type type = DF_REF_TYPE (use);
int flags = DF_REF_FLAGS (use);
rtx set = single_set (insn);
- int old_cost = rtx_cost (SET_SRC (set), SET);
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
+ int old_cost = rtx_cost (SET_SRC (set), SET, speed);
bool ok;
if (dump_file)
}
else if (DF_REF_TYPE (use) == DF_REF_REG_USE
- && rtx_cost (SET_SRC (set), SET) > old_cost)
+ && rtx_cost (SET_SRC (set), SET, speed) > old_cost)
{
if (dump_file)
fprintf (dump_file, "Changes to insn %d not profitable\n",
else
mode = GET_MODE (*loc);
- new_rtx = propagate_rtx (*loc, mode, reg, src);
+ new_rtx = propagate_rtx (*loc, mode, reg, src,
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
if (!new_rtx)
return false;
return 0;
}
+int
+hook_int_rtx_bool_0 (rtx a ATTRIBUTE_UNUSED, bool b ATTRIBUTE_UNUSED)
+{
+ return 0;
+}
+
int
hook_int_size_t_constcharptr_int_0 (size_t a ATTRIBUTE_UNUSED,
const char *b ATTRIBUTE_UNUSED,
}
bool
-hook_bool_rtx_int_int_intp_false (rtx a ATTRIBUTE_UNUSED,
- int b ATTRIBUTE_UNUSED,
- int c ATTRIBUTE_UNUSED,
- int *d ATTRIBUTE_UNUSED)
+hook_bool_rtx_int_int_intp_bool_false (rtx a ATTRIBUTE_UNUSED,
+ int b ATTRIBUTE_UNUSED,
+ int c ATTRIBUTE_UNUSED,
+ int *d ATTRIBUTE_UNUSED,
+ bool speed_p ATTRIBUTE_UNUSED)
{
return false;
}
const_tree);
extern bool hook_bool_rtx_false (rtx);
extern bool hook_bool_uintp_uintp_false (unsigned int *, unsigned int *);
-extern bool hook_bool_rtx_int_int_intp_false (rtx, int, int, int *);
+extern bool hook_bool_rtx_int_int_intp_bool_false (rtx, int, int, int *, bool);
extern bool hook_bool_constcharptr_size_t_false (const char *, size_t);
extern bool hook_bool_size_t_constcharptr_int_true (size_t, const char *, int);
extern bool hook_bool_tree_tree_false (tree, tree);
extern int hook_int_const_tree_0 (const_tree);
extern int hook_int_const_tree_const_tree_1 (const_tree, const_tree);
extern int hook_int_rtx_0 (rtx);
+extern int hook_int_rtx_bool_0 (rtx, bool);
extern int hook_int_size_t_constcharptr_int_0 (size_t, const char *, int);
extern int hook_int_void_no_regs (void);
{
int count = 0;
rtx insn = BB_HEAD (bb);
+ bool speed = optimize_bb_for_speed_p (bb);
while (1)
{
if (NONJUMP_INSN_P (insn))
{
- int cost = insn_rtx_cost (PATTERN (insn));
+ int cost = insn_rtx_cost (PATTERN (insn), speed);
if (cost == 0)
return false;
if insn_rtx_cost can't be estimated. */
if (insn_a)
{
- insn_cost = insn_rtx_cost (PATTERN (insn_a));
+ insn_cost = insn_rtx_cost (PATTERN (insn_a),
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_a)));
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
if (insn_b)
{
- insn_cost += insn_rtx_cost (PATTERN (insn_b));
+ insn_cost += insn_rtx_cost (PATTERN (insn_b),
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_b)));
if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
INSN_B which can happen for e.g. conditional stores to memory. */
b_unconditional = (if_info->insn_b == NULL_RTX
|| BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb);
- if (rtx_cost (t, SET) >= COSTS_N_INSNS (2)
+ if (rtx_cost (t, SET, optimize_bb_for_speed_p (BLOCK_FOR_INSN (if_info->insn_b)))
+ >= COSTS_N_INSNS (2)
&& (!b_unconditional
|| t != if_info->b))
return FALSE;
max_cost
= COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
- if (rtx_cost (desc->niter_expr, SET) > max_cost)
+ if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop))
+ > max_cost)
{
if (dump_file)
fprintf (dump_file,
{
struct invariant *inv = XNEW (struct invariant);
rtx set = single_set (insn);
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
inv->def = def;
inv->always_executed = always_executed;
/* If the set is simple, usually by moving it we move the whole store out of
the loop. Otherwise we save only cost of the computation. */
if (def)
- inv->cost = rtx_cost (set, SET);
+ inv->cost = rtx_cost (set, SET, speed);
else
- inv->cost = rtx_cost (SET_SRC (set), SET);
+ inv->cost = rtx_cost (SET_SRC (set), SET, speed);
inv->move = false;
inv->reg = NULL_RTX;
static int
gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
- unsigned new_regs, unsigned regs_used)
+ unsigned new_regs, unsigned regs_used, bool speed)
{
int comp_cost, size_cost;
get_inv_cost (inv, &comp_cost, regs_needed);
actual_stamp++;
- size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used)
- - estimate_reg_pressure_cost (new_regs, regs_used));
+ size_cost = (estimate_reg_pressure_cost (new_regs + *regs_needed, regs_used, speed)
+ - estimate_reg_pressure_cost (new_regs, regs_used, speed));
return comp_cost - size_cost;
}
static int
best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
- unsigned new_regs, unsigned regs_used)
+ unsigned new_regs, unsigned regs_used, bool speed)
{
struct invariant *inv;
int gain = 0, again;
if (inv->eqto != inv->invno)
continue;
- again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used);
+ again = gain_for_invariant (inv, &aregs_needed, new_regs, regs_used,
+ speed);
if (again > gain)
{
gain = again;
/* Determines which invariants to move. */
static void
-find_invariants_to_move (void)
+find_invariants_to_move (bool speed)
{
unsigned i, regs_used, regs_needed = 0, new_regs;
struct invariant *inv = NULL;
}
new_regs = 0;
- while (best_gain_for_invariant (&inv, ®s_needed, new_regs, regs_used) > 0)
+ while (best_gain_for_invariant (&inv, ®s_needed, new_regs, regs_used, speed) > 0)
{
set_move_mark (inv->invno);
new_regs += regs_needed;
init_inv_motion_data ();
find_invariants (loop);
- find_invariants_to_move ();
+ find_invariants_to_move (optimize_loop_for_speed_p (loop));
move_invariants (loop);
free_inv_motion_data ();
if (mode != VOIDmode
&& optimize
&& CONSTANT_P (x)
- && rtx_cost (x, binoptab->code) > COSTS_N_INSNS (1))
+ && rtx_cost (x, binoptab->code, optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (1))
{
if (GET_CODE (x) == CONST_INT)
{
/* If we are inside an appropriately-short loop and we are optimizing,
force expensive constants into a register. */
if (CONSTANT_P (x) && optimize
- && rtx_cost (x, COMPARE) > COSTS_N_INSNS (1))
+ && (rtx_cost (x, COMPARE, optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (1)))
x = force_reg (mode, x);
if (CONSTANT_P (y) && optimize
- && rtx_cost (y, COMPARE) > COSTS_N_INSNS (1))
+ && (rtx_cost (y, COMPARE, optimize_insn_for_speed_p ())
+ > COSTS_N_INSNS (1)))
y = force_reg (mode, y);
#ifdef HAVE_cc0
const char *);
extern int maybe_assemble_visibility (tree);
-extern int default_address_cost (rtx);
+extern int default_address_cost (rtx, bool);
/* dbxout helper functions */
#if defined DBX_DEBUGGING_INFO || defined XCOFF_DEBUGGING_INFO
#ifdef LOAD_EXTEND_OP
enum rtx_code extend_op = UNKNOWN;
#endif
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
dreg = true_regnum (SET_DEST (set));
if (dreg < 0)
old_cost = REGISTER_MOVE_COST (GET_MODE (src),
REGNO_REG_CLASS (REGNO (src)), dclass);
else
- old_cost = rtx_cost (src, SET);
+ old_cost = rtx_cost (src, SET, speed);
for (l = val->locs; l; l = l->next)
{
this_rtx = GEN_INT (this_val);
}
#endif
- this_cost = rtx_cost (this_rtx, SET);
+ this_cost = rtx_cost (this_rtx, SET, speed);
}
else if (REG_P (this_rtx))
{
if (extend_op != UNKNOWN)
{
this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
- this_cost = rtx_cost (this_rtx, SET);
+ this_cost = rtx_cost (this_rtx, SET, speed);
}
else
#endif
if (op_alt_regno[i][j] == -1
&& reg_fits_class_p (testreg, rclass, 0, mode)
&& (GET_CODE (recog_data.operand[i]) != CONST_INT
- || (rtx_cost (recog_data.operand[i], SET)
- > rtx_cost (testreg, SET))))
+ || (rtx_cost (recog_data.operand[i], SET,
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))
+ > rtx_cost (testreg, SET,
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))))))
{
alternative_nregs[j]++;
op_alt_regno[i][j] = regno;
{
rtx new_src = gen_int_mode (INTVAL (src) - reg_offset[regno],
GET_MODE (reg));
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
+
/* (set (reg) (plus (reg) (const_int 0))) is not canonical;
use (set (reg) (reg)) instead.
We don't delete this insn, nor do we convert it into a
if (INTVAL (src) == reg_offset [regno])
validate_change (insn, &SET_SRC (pat), reg, 0);
}
- else if (rtx_cost (new_src, PLUS) < rtx_cost (src, SET)
+ else if (rtx_cost (new_src, PLUS, speed) < rtx_cost (src, SET, speed)
&& have_add2_insn (reg, new_src))
{
rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src);
+ base_offset
- regno_offset,
GET_MODE (reg));
- int success = 0;
+ bool success = false;
+ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
if (new_src == const0_rtx)
/* See above why we create (set (reg) (reg)) here. */
success
= validate_change (next, &SET_SRC (set), reg, 0);
- else if ((rtx_cost (new_src, PLUS)
- < COSTS_N_INSNS (1) + rtx_cost (src3, SET))
+ else if ((rtx_cost (new_src, PLUS, speed)
+ < COSTS_N_INSNS (1) + rtx_cost (src3, SET, speed))
&& have_add2_insn (reg, new_src))
{
rtx newpat = gen_rtx_SET (VOIDmode,
#define MAX_COST INT_MAX
extern void init_rtlanal (void);
-extern int rtx_cost (rtx, enum rtx_code);
-extern int address_cost (rtx, enum machine_mode);
+extern int rtx_cost (rtx, enum rtx_code, bool);
+extern int address_cost (rtx, enum machine_mode, bool);
extern unsigned int subreg_lsb (const_rtx);
extern unsigned int subreg_lsb_1 (enum machine_mode, enum machine_mode,
unsigned int);
extern rtx find_first_parameter_load (rtx, rtx);
extern bool keep_with_call_p (const_rtx);
extern bool label_is_jump_target_p (const_rtx, const_rtx);
-extern int insn_rtx_cost (rtx);
+extern int insn_rtx_cost (rtx, bool);
/* Given an insn and condition, return a canonical description of
the test being made. */
extern void set_curr_insn_source_location (location_t);
extern void set_curr_insn_block (tree);
extern int curr_insn_locator (void);
+extern bool optimize_insn_for_size_p (void);
+extern bool optimize_insn_for_speed_p (void);
#endif /* ! GCC_RTL_H */
/* Return an estimate of the cost of computing rtx X.
One use is in cse, to decide which expression to keep in the hash table.
Another is in rtl generation, to pick the cheapest way to multiply.
- Other uses like the latter are expected in the future. */
+ Other uses like the latter are expected in the future.
+
+ SPEED parameter specify whether costs optimized for speed or size should
+ be returned. */
int
-rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)
+rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED, bool speed)
{
int i, j;
enum rtx_code code;
break;
default:
- if (targetm.rtx_costs (x, code, outer_code, &total))
+ if (targetm.rtx_costs (x, code, outer_code, &total, speed))
return total;
break;
}
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
if (fmt[i] == 'e')
- total += rtx_cost (XEXP (x, i), code);
+ total += rtx_cost (XEXP (x, i), code, speed);
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (x, i); j++)
- total += rtx_cost (XVECEXP (x, i, j), code);
+ total += rtx_cost (XVECEXP (x, i, j), code, speed);
return total;
}
\f
/* Return cost of address expression X.
- Expect that X is properly formed address reference. */
+ Expect that X is properly formed address reference.
+
+ SPEED parameter specify whether costs optimized for speed or size should
+ be returned. */
int
-address_cost (rtx x, enum machine_mode mode)
+address_cost (rtx x, enum machine_mode mode, bool speed)
{
/* We may be asked for cost of various unusual addresses, such as operands
of push instruction. It is not worthwhile to complicate writing
if (!memory_address_p (mode, x))
return 1000;
- return targetm.address_cost (x);
+ return targetm.address_cost (x, speed);
}
/* If the target doesn't override, compute the cost as with arithmetic. */
int
-default_address_cost (rtx x)
+default_address_cost (rtx x, bool speed)
{
- return rtx_cost (x, MEM);
+ return rtx_cost (x, MEM, speed);
}
\f
zero indicates an instruction pattern without a known cost. */
int
-insn_rtx_cost (rtx pat)
+insn_rtx_cost (rtx pat, bool speed)
{
int i, cost;
rtx set;
else
return 0;
- cost = rtx_cost (SET_SRC (set), SET);
+ cost = rtx_cost (SET_SRC (set), SET, speed);
return cost > 0 ? cost : COSTS_N_INSNS (1);
}
rtx coeff;
unsigned HOST_WIDE_INT l;
HOST_WIDE_INT h;
+ bool speed = optimize_function_for_speed_p (cfun);
add_double (coeff0l, coeff0h, coeff1l, coeff1h, &l, &h);
coeff = immed_double_const (l, h, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
- return rtx_cost (tem, SET) <= rtx_cost (orig, SET)
+ return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
? tem : 0;
}
}
rtx coeff;
unsigned HOST_WIDE_INT l;
HOST_WIDE_INT h;
+ bool speed = optimize_function_for_speed_p (cfun);
add_double (coeff0l, coeff0h, negcoeff1l, negcoeff1h, &l, &h);
coeff = immed_double_const (l, h, mode);
tem = simplify_gen_binary (MULT, mode, lhs, coeff);
- return rtx_cost (tem, SET) <= rtx_cost (orig, SET)
+ return rtx_cost (tem, SET, speed) <= rtx_cost (orig, SET, speed)
? tem : 0;
}
}
if (!init)
{
rtx reg = gen_rtx_REG (word_mode, 10000);
- int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET);
+ int cost = rtx_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg), SET,
+ optimize_insn_for_speed_p ());
cheap = cost < COSTS_N_INSNS (3);
init = true;
}
#define TARGET_MS_BITFIELD_LAYOUT_P hook_bool_const_tree_false
#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_false
#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
-#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_false
+#define TARGET_RTX_COSTS hook_bool_rtx_int_int_intp_bool_false
#define TARGET_MANGLE_TYPE hook_constcharptr_const_tree_null
#define TARGET_ALLOCATE_INITIAL_VALUE NULL
scanned. In either case, *TOTAL contains the cost result. */
/* Note that CODE and OUTER_CODE ought to be RTX_CODE, but that's
not necessarily defined at this point. */
- bool (* rtx_costs) (rtx x, int code, int outer_code, int *total);
+ bool (* rtx_costs) (rtx x, int code, int outer_code, int *total, bool speed);
/* Compute the cost of X, used as an address. Never called with
invalid addresses. */
- int (* address_cost) (rtx x);
+ int (* address_cost) (rtx x, bool speed);
/* Return where to allocate pseudo for a given hard register initial
value. */
bool expr_invariant_in_loop_p (struct loop *, tree);
bool stmt_invariant_in_loop_p (struct loop *, gimple);
bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode);
-unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
+unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool);
/* In tree-ssa-threadupdate.c. */
extern bool thread_through_all_blocks (bool);
struct affine_tree_combination;
tree create_mem_ref (gimple_stmt_iterator *, tree,
- struct affine_tree_combination *);
+ struct affine_tree_combination *, bool);
rtx addr_for_mem_ref (struct mem_address *, bool);
void get_address_description (tree, struct mem_address *);
tree maybe_fold_tmr (tree);
element(s) to PARTS. */
static void
-most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr)
+most_expensive_mult_to_index (struct mem_address *parts, aff_tree *addr,
+ bool speed)
{
HOST_WIDE_INT coef;
double_int best_mult, amult, amult_neg;
|| !multiplier_allowed_in_address_p (coef, Pmode))
continue;
- acost = multiply_by_cost (coef, Pmode);
+ acost = multiply_by_cost (coef, Pmode, speed);
if (acost > best_mult_cost)
{
addressing modes is useless. */
static void
-addr_to_parts (aff_tree *addr, struct mem_address *parts)
+addr_to_parts (aff_tree *addr, struct mem_address *parts, bool speed)
{
tree part;
unsigned i;
/* First move the most expensive feasible multiplication
to index. */
- most_expensive_mult_to_index (parts, addr);
+ most_expensive_mult_to_index (parts, addr, speed);
/* Try to find a base of the reference. Since at the moment
there is no reliable way how to distinguish between pointer and its
of created memory reference. */
tree
-create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr)
+create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr,
+ bool speed)
{
tree mem_ref, tmp;
tree atype;
struct mem_address parts;
- addr_to_parts (addr, &parts);
+ addr_to_parts (addr, &parts, speed);
gimplify_mem_ref_parts (gsi, &parts);
mem_ref = create_mem_ref_raw (type, &parts);
if (mem_ref)
/* The currently optimized loop. */
struct loop *current_loop;
+ /* Are we optimizing for speed? */
+ bool speed;
+
/* Number of registers used in it. */
unsigned regs_used;
/* Returns estimate on cost of computing SEQ. */
static unsigned
-seq_cost (rtx seq)
+seq_cost (rtx seq, bool speed)
{
unsigned cost = 0;
rtx set;
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET);
+ cost += rtx_cost (set, SET,speed);
else
cost++;
}
/* Determines cost of the computation of EXPR. */
static unsigned
-computation_cost (tree expr)
+computation_cost (tree expr, bool speed)
{
rtx seq, rslt;
tree type = TREE_TYPE (expr);
unsigned cost;
/* Avoid using hard regs in ways which may be unsupported. */
int regno = LAST_VIRTUAL_REGISTER + 1;
+ enum function_frequency real_frequency = cfun->function_frequency;
+ cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+ crtl->maybe_hot_insn_p = speed;
walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
start_sequence ();
rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
seq = get_insns ();
end_sequence ();
+ default_rtl_profile ();
+ cfun->function_frequency = real_frequency;
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (MEM_P (rslt))
- cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
+ cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
return cost;
}
/* Returns cost of addition in MODE. */
static unsigned
-add_cost (enum machine_mode mode)
+add_cost (enum machine_mode mode, bool speed)
{
static unsigned costs[NUM_MACHINE_MODES];
rtx seq;
seq = get_insns ();
end_sequence ();
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (!cost)
cost = 1;
/* Returns cost of multiplication by constant CST in MODE. */
unsigned
-multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
+multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
{
static htab_t costs;
struct mbc_entry **cached, act;
seq = get_insns ();
end_sequence ();
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
static comp_cost
get_address_cost (bool symbol_present, bool var_present,
unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
- enum machine_mode mem_mode)
+ enum machine_mode mem_mode,
+ bool speed)
{
static bool initialized[MAX_MACHINE_MODE];
static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
seq = get_insns ();
end_sequence ();
- acost = seq_cost (seq);
- acost += address_cost (addr, mem_mode);
+ acost = seq_cost (seq, speed);
+ acost += address_cost (addr, mem_mode, speed);
if (!acost)
acost = 1;
If VAR_PRESENT is true, try whether the mode with
SYMBOL_PRESENT = false is cheaper even with cost of addition, and
if this is the case, use it. */
- add_c = add_cost (Pmode);
+ add_c = add_cost (Pmode, speed);
for (i = 0; i < 8; i++)
{
var_p = i & 1;
&& multiplier_allowed_in_address_p (ratio, mem_mode));
if (ratio != 1 && !ratio_p)
- cost += multiply_by_cost (ratio, Pmode);
+ cost += multiply_by_cost (ratio, Pmode, speed);
if (s_offset && !offset_p && !symbol_present)
- cost += add_cost (Pmode);
+ cost += add_cost (Pmode, speed);
acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
/* Estimates cost of forcing expression EXPR into a variable. */
static comp_cost
-force_expr_to_var_cost (tree expr)
+force_expr_to_var_cost (tree expr, bool speed)
{
static bool costs_initialized = false;
- static unsigned integer_cost;
- static unsigned symbol_cost;
- static unsigned address_cost;
+ static unsigned integer_cost [2];
+ static unsigned symbol_cost [2];
+ static unsigned address_cost [2];
tree op0, op1;
comp_cost cost0, cost1, cost;
enum machine_mode mode;
tree type = build_pointer_type (integer_type_node);
tree var, addr;
rtx x;
+ int i;
var = create_tmp_var_raw (integer_type_node, "test_var");
TREE_STATIC (var) = 1;
x = produce_memory_decl_rtl (var, NULL);
SET_DECL_RTL (var, x);
- integer_cost = computation_cost (build_int_cst (integer_type_node,
- 2000));
-
addr = build1 (ADDR_EXPR, type, var);
- symbol_cost = computation_cost (addr) + 1;
- address_cost
- = computation_cost (build2 (POINTER_PLUS_EXPR, type,
- addr,
- build_int_cst (sizetype, 2000))) + 1;
- if (dump_file && (dump_flags & TDF_DETAILS))
+
+ for (i = 0; i < 2; i++)
{
- fprintf (dump_file, "force_expr_to_var_cost:\n");
- fprintf (dump_file, " integer %d\n", (int) integer_cost);
- fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
- fprintf (dump_file, " address %d\n", (int) address_cost);
- fprintf (dump_file, " other %d\n", (int) target_spill_cost);
- fprintf (dump_file, "\n");
+ integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
+ 2000), i);
+
+ symbol_cost[i] = computation_cost (addr, i) + 1;
+
+ address_cost[i]
+ = computation_cost (build2 (POINTER_PLUS_EXPR, type,
+ addr,
+ build_int_cst (sizetype, 2000)), i) + 1;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
+ fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
+ fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
+ fprintf (dump_file, " address %d\n", (int) address_cost[i]);
+ fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
+ fprintf (dump_file, "\n");
+ }
}
costs_initialized = true;
if (is_gimple_min_invariant (expr))
{
if (TREE_CODE (expr) == INTEGER_CST)
- return new_cost (integer_cost, 0);
+ return new_cost (integer_cost [speed], 0);
if (TREE_CODE (expr) == ADDR_EXPR)
{
if (TREE_CODE (obj) == VAR_DECL
|| TREE_CODE (obj) == PARM_DECL
|| TREE_CODE (obj) == RESULT_DECL)
- return new_cost (symbol_cost, 0);
+ return new_cost (symbol_cost [speed], 0);
}
- return new_cost (address_cost, 0);
+ return new_cost (address_cost [speed], 0);
}
switch (TREE_CODE (expr))
if (is_gimple_val (op0))
cost0 = zero_cost;
else
- cost0 = force_expr_to_var_cost (op0);
+ cost0 = force_expr_to_var_cost (op0, speed);
if (is_gimple_val (op1))
cost1 = zero_cost;
else
- cost1 = force_expr_to_var_cost (op1);
+ cost1 = force_expr_to_var_cost (op1, speed);
break;
default:
/* Just an arbitrary value, FIXME. */
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost[speed], 0);
}
mode = TYPE_MODE (TREE_TYPE (expr));
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
- cost = new_cost (add_cost (mode), 0);
+ cost = new_cost (add_cost (mode, speed), 0);
break;
case MULT_EXPR:
if (cst_and_fits_in_hwi (op0))
- cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
- else if (cst_and_fits_in_hwi (op1))
- cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
+ cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
+ else if (cst_and_fits_in_hwi (op1))
+ cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
else
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost [speed], 0);
break;
default:
computations often are either loop invariant or at least can
be shared between several iv uses, so letting this grow without
limits would not give reasonable results. */
- if (cost.cost > target_spill_cost)
- cost.cost = target_spill_cost;
+ if (cost.cost > target_spill_cost [speed])
+ cost.cost = target_spill_cost [speed];
return cost;
}
walk_tree (&expr, find_depends, depends_on, NULL);
}
- return force_expr_to_var_cost (expr);
+ return force_expr_to_var_cost (expr, data->speed);
}
/* Estimates cost of expressing address ADDR as var + symbol + offset. The
*var_present = true;
fd_ivopts_data = data;
walk_tree (&addr, find_depends, depends_on, NULL);
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost[data->speed], 0);
}
*offset += bitpos / BITS_PER_UNIT;
{
HOST_WIDE_INT diff = 0;
comp_cost cost;
+ bool speed = optimize_loop_for_speed_p (data->current_loop);
gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
- cost.cost += add_cost (Pmode);
+ cost.cost += add_cost (Pmode, speed);
return cost;
}
if (integer_zerop (e1))
{
cost = force_var_cost (data, e2, depends_on);
- cost.cost += multiply_by_cost (-1, mode);
+ cost.cost += multiply_by_cost (-1, mode, data->speed);
return cost;
}
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
- cost.cost += add_cost (mode);
+ cost.cost += add_cost (mode, data->speed);
return cost;
}
comp_cost cost;
unsigned n_sums;
double_int rat;
+ bool speed = optimize_bb_for_speed_p (gimple_bb (at));
*depends_on = NULL;
else
{
cost = force_var_cost (data, cbase, depends_on);
- cost.cost += add_cost (TYPE_MODE (ctype));
+ cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
cost = add_costs (cost,
difference_cost (data,
ubase, build_int_cst (utype, 0),
if (address_p)
return add_costs (cost, get_address_cost (symbol_present, var_present,
offset, ratio,
- TYPE_MODE (TREE_TYPE (*use->op_p))));
+ TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
/* Otherwise estimate the costs for computing the expression. */
aratio = ratio > 0 ? ratio : -ratio;
if (!symbol_present && !var_present && !offset)
{
if (ratio != 1)
- cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
+ cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
return cost;
}
if (aratio != 1)
- cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
+ cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
n_sums = 1;
if (var_present
if (offset)
cost.complexity++;
- cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
+ cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
return cost;
fallback:
if (address_p)
comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
- return new_cost (computation_cost (comp), 0);
+ return new_cost (computation_cost (comp, speed), 0);
}
}
base = cand->iv->base;
cost_base = force_var_cost (data, base, NULL);
- cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
+ cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
- return size + estimate_reg_pressure_cost (size, data->regs_used);
+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
}
/* For each size of the induction variable set determine the penalty. */
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
- fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
- fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
+ fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
+ fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
n = 0;
gcc_assert (ok);
unshare_aff_combination (&aff);
- ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
+ ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
gcc_assert (!data->niters);
data->current_loop = loop;
+ data->speed = optimize_loop_for_speed_p (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
{