+2000-09-06 Bernd Schmidt <bernds@redhat.co.uk>
+
+ * Makefile.in (cse.o): Depend on $(BASIC_BLOCK_H).
+ * cse.c: Include "basic-block.h".
+ (struct table_elt): New field REGCOST.
+ (CHEAP_REG): Delete macro.
+ (COST): Return 0 for REGs.
+ (approx_reg_cost_1, approx_reg_cost, preferrable): New functions.
+ (notreg_cost): Return 0 for appropriate SUBREGs.
+ (COSTS_N_INSNS): Return N * 2.
+ (rtx_cost): Return 0 for REGs, and use cost of nested rtx for cheap
+ SUBREGs.
+ (CHEAPER): Use new function preferrable.
+ (insert): Initialize REGCOST member.
+ (find_best_addr): Use approx_reg_cost for estimation of register
+ usage.
+ (cse_insn): Likewise.
+ * loop.c (iv_add_mult_cost): New function.
+ (add_cost, shift_cost, mult_cost): Delete variables.
+ (init_loop): Don't initialize add_cost; reduce copy_cost by half.
+ (strength_reduce): Use iv_add_mult_cost instead of fixed add_cost.
+ Make code that detects autoinc opportunities slightly less optimistic.
+ (simplify_giv_expr): If expression contains other reg that is also a
+ giv, only increment benefit if this is the only use of that reg.
+ (consec_sets_giv): Take that change into account.
+ (combine_givs): Slightly more verbose output.
+
+ * i386.h (RTX_COSTS): For MULT, return true cost of multiplication,
+ not the cost of an equivalent shift.
+ * sh-protos.h (addsubcosts): Declare.
+ * sh.c (addsubcosts): New function.
+ * sh.h (CONST_COSTS): If CONST_OK_FOR_I, then return 0.
+ (RTX_COSTS): Tweak. Use addsubcosts.
+ (ADDRESS_COST): Return higher cost for reg+reg addressing.
+
2000-09-06 Geoff Keating <geoffk@cygnus.com>
* config/rs6000/rs6000.c (validate_condition_mode): New function.
hard-reg-set.h flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h \
output.h function.h cselib.h ggc.h $(OBSTACK_H)
cse.o : cse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \
- real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h output.h function.h $(GGC_H)
+ real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h output.h function.h \
+ $(BASIC_BLOCK_H) $(GGC_H)
gcse.o : gcse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h \
flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \
function.h output.h toplev.h
unsigned HOST_WIDE_INT value = INTVAL (XEXP (X, 1)); \
int nbits = 0; \
\
- if (value == 2) \
- TOPLEVEL_COSTS_N_INSNS (ix86_cost->add); \
- if (value == 4 || value == 8) \
- TOPLEVEL_COSTS_N_INSNS (ix86_cost->lea); \
- \
while (value != 0) \
{ \
nbits++; \
value >>= 1; \
} \
\
- if (nbits == 1) \
- TOPLEVEL_COSTS_N_INSNS (ix86_cost->shift_const); \
- else \
- TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \
- + nbits * ix86_cost->mult_bit); \
+ TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \
+ + nbits * ix86_cost->mult_bit); \
} \
else /* This is arbitrary */ \
TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \
extern void from_compare PARAMS ((rtx *, int));
extern int shift_insns_rtx PARAMS ((rtx));
extern int shiftcosts PARAMS ((rtx));
+extern int addsubcosts PARAMS ((rtx));
extern int andcosts PARAMS ((rtx));
extern int multcosts PARAMS ((rtx));
extern void gen_ashift PARAMS ((int, int, rtx));
return 3;
}
+/* Return the cost of an addition or a subtraction. */
+
+int
+addsubcosts (x)
+ rtx x;
+{
+ /* Adding a register is a single cycle insn. */
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ return 1;
+
+ /* Likewise for small constants. */
+ if (CONST_OK_FOR_I (INTVAL (XEXP (x, 1))))
+ return 1;
+
+ /* Any other constant requires a 2 cycle pc-relative load plus an
+ addition. */
+ return 3;
+}
+
/* Return the cost of a multiply. */
int
multcosts (x)
#define Pmode SImode
#define FUNCTION_MODE Pmode
-/* The relative costs of various types of constants. Note that cse.c defines
- REG = 1, SUBREG = 2, any node = (2 + sum of subnodes). */
+/* The relative costs of various types of constants. */
#define CONST_COSTS(RTX, CODE, OUTER_CODE) \
case CONST_INT: \
- if (INTVAL (RTX) == 0) \
+ if (CONST_OK_FOR_I (INTVAL (RTX))) \
return 0; \
- else if (CONST_OK_FOR_I (INTVAL (RTX))) \
- return 1; \
else if (((OUTER_CODE) == AND || (OUTER_CODE) == IOR || (OUTER_CODE) == XOR) \
&& CONST_OK_FOR_L (INTVAL (RTX))) \
return 1; \
#define RTX_COSTS(X, CODE, OUTER_CODE) \
case PLUS: \
- return (COSTS_N_INSNS (1) \
- + rtx_cost (XEXP ((X), 0), PLUS) \
- + (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\
- ? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\
+ return COSTS_N_INSNS (addsubcosts (X)); \
case AND: \
return COSTS_N_INSNS (andcosts (X)); \
case MULT: \
case ASHIFT: \
case ASHIFTRT: \
case LSHIFTRT: \
- /* Add one extra unit for the matching constraint. \
- Otherwise loop strength reduction would think that\
- a shift with different sourc and destination is \
- as cheap as adding a constant to a register. */ \
- return (COSTS_N_INSNS (shiftcosts (X)) \
- + rtx_cost (XEXP ((X), 0), (CODE)) \
- + 1); \
+ return COSTS_N_INSNS (shiftcosts (X)); \
case DIV: \
case UDIV: \
case MOD: \
&& nonpic_symbol_mentioned_p (X))
\f
/* Compute the cost of an address. For the SH, all valid addresses are
- the same cost. */
-/* ??? Perhaps we should make reg+reg addresses have higher cost because
- they add to register pressure on r0. */
+ the same cost. Use a slightly higher cost for reg + reg addressing,
+ since it increases pressure on r0. */
-#define ADDRESS_COST(RTX) 1
+#define ADDRESS_COST(X) (GET_CODE (X) == PLUS && ! CONSTANT_P (XEXP (X, 1)) \
+ ? 1 : 0)
/* Compute extra cost of moving data between one register class
and another. */
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
+#include "basic-block.h"
#include "flags.h"
#include "real.h"
#include "insn-config.h"
chain is not useful.
The `cost' field stores the cost of this element's expression.
+ The `regcost' field stores the value returned by approx_reg_cost for
+ this element's expression.
The `is_const' flag is set if the element is a constant (including
a fixed address).
struct table_elt *first_same_value;
struct table_elt *related_value;
int cost;
+ int regcost;
enum machine_mode mode;
char in_memory;
char is_const;
? (((unsigned) REG << 7) + (unsigned) REG_QTY (REGNO (X))) \
: canon_hash (X, M)) & HASH_MASK)
-/* Determine whether register number N is considered a fixed register for CSE.
+/* Determine whether register number N is considered a fixed register for the
+ purpose of approximating register costs.
It is desirable to replace other regs with fixed regs, to reduce need for
non-fixed hard regs.
A reg wins if it is either the frame pointer or designated as fixed. */
|| ((N) < FIRST_PSEUDO_REGISTER \
&& FIXED_REGNO_P (N) && REGNO_REG_CLASS (N) != NO_REGS))
-/* A register is cheap if it is a user variable assigned to the register
- or if its register number always corresponds to a cheap register. */
-
-#define CHEAP_REG(N) \
- ((REG_USERVAR_P (N) && REGNO (N) < FIRST_PSEUDO_REGISTER) \
- || CHEAP_REGNO (REGNO (N)))
-
-#define COST(X) \
- (GET_CODE (X) == REG \
- ? (CHEAP_REG (X) ? 0 \
- : REGNO (X) >= FIRST_PSEUDO_REGISTER ? 1 \
- : 2) \
- : notreg_cost(X))
+#define COST(X) (GET_CODE (X) == REG ? 0 : notreg_cost (X))
/* Get the info associated with register N. */
|| GET_CODE (X) == ADDRESSOF)
static int notreg_cost PARAMS ((rtx));
+static int approx_reg_cost_1 PARAMS ((rtx *, void *));
+static int approx_reg_cost PARAMS ((rtx));
+static int preferrable PARAMS ((int, int, int, int));
static void new_basic_block PARAMS ((void));
static void make_new_qty PARAMS ((unsigned int, enum machine_mode));
static void make_regs_eqv PARAMS ((unsigned int, unsigned int));
}
}
+/* Subroutine of approx_reg_cost; called through for_each_rtx. */
+static int
+approx_reg_cost_1 (xp, data)
+ rtx *xp;
+ void *data;
+{
+ rtx x = *xp;
+ regset set = (regset) data;
+
+ if (x && GET_CODE (x) == REG)
+ SET_REGNO_REG_SET (set, REGNO (x));
+ return 0;
+}
+
+/* Return an estimate of the cost of the registers used in an rtx.
+ This is mostly the number of different REG expressions in the rtx;
+ however for some excecptions like fixed registers we use a cost of
+ 0. */
+
+static int
+approx_reg_cost (x)
+ rtx x;
+{
+ regset_head set;
+ int i;
+ int cost = 0;
+
+ INIT_REG_SET (&set);
+ for_each_rtx (&x, approx_reg_cost_1, (void *)&set);
+
+ EXECUTE_IF_SET_IN_REG_SET
+ (&set, 0, i,
+ {
+ if (! CHEAP_REGNO (i))
+ cost++;
+ });
+
+ CLEAR_REG_SET (&set);
+ return cost;
+}
+
+/* Return a negative value if an rtx A, whose costs are given by COST_A
+ and REGCOST_A, is more desirable than an rtx B.
+ Return a positive value if A is less desirable, or 0 if the two are
+ equally good. */
+static int
+preferrable (cost_a, regcost_a, cost_b, regcost_b)
+ int cost_a, regcost_a, cost_b, regcost_b;
+{
+ if (cost_a != cost_b)
+ return cost_a - cost_b;
+ if (regcost_a != regcost_b)
+ return regcost_a - regcost_b;
+ return 0;
+}
+
/* Internal function, to compute cost when X is not a register; called
from COST macro to keep it simple. */
&& subreg_lowpart_p (x)
&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)),
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x)))))
- ? (CHEAP_REG (SUBREG_REG (x)) ? 0
- : (REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER ? 1
- : 2))
+ ? 0
: rtx_cost (x, SET) * 2);
}
to make the cost of the corresponding register-to-register instruction
N times that of a fast register-to-register instruction. */
-#define COSTS_N_INSNS(N) ((N) * 4 - 2)
+#define COSTS_N_INSNS(N) ((N) * 2)
/* Return an estimate of the cost of computing rtx X.
One use is in cse, to decide which expression to keep in the hash table.
switch (code)
{
case REG:
- return ! CHEAP_REG (x);
+ return 0;
case SUBREG:
/* If we can't tie these modes, make this expensive. The larger
if (! MODES_TIEABLE_P (GET_MODE (x), GET_MODE (SUBREG_REG (x))))
return COSTS_N_INSNS (2
+ GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD);
- return 2;
+ break;
+
#ifdef RTX_COSTS
RTX_COSTS (x, code, outer_code);
#endif
return rtx_cost (x, MEM);
#endif
}
+
\f
static struct cse_reg_info *
get_cse_reg_info (regno)
If necessary, update table showing constant values of quantities. */
-#define CHEAPER(X,Y) ((X)->cost < (Y)->cost)
+#define CHEAPER(X, Y) \
+ (preferrable ((X)->cost, (X)->regcost, (Y)->cost, (Y)->regcost) < 0)
static struct table_elt *
insert (x, classp, hash, mode)
elt->exp = x;
elt->canon_exp = NULL_RTX;
elt->cost = COST (x);
+ elt->regcost = approx_reg_cost (x);
elt->next_same_value = 0;
elt->prev_same_value = 0;
elt->next_same_hash = table[hash];
int save_hash_arg_in_memory = hash_arg_in_memory;
int addr_volatile;
int regno;
- int folded_cost, addr_cost;
unsigned hash;
/* Do not try to replace constant addresses or addresses of local and
if (GET_CODE (addr) != REG)
{
rtx folded = fold_rtx (copy_rtx (addr), NULL_RTX);
-
- folded_cost = address_cost (folded, mode);
- addr_cost = address_cost (addr, mode);
-
- if ((folded_cost < addr_cost
- || (folded_cost == addr_cost
- && rtx_cost (folded, MEM) > rtx_cost (addr, MEM)))
- && rtx_cost (folded, MEM) < rtx_cost (addr, MEM)
+ int addr_folded_cost = address_cost (folded, mode);
+ int addr_cost = address_cost (addr, mode);
+
+ if ((addr_folded_cost < addr_cost
+ || (addr_folded_cost == addr_cost
+ /* ??? The rtx_cost comparison is left over from an older
+ version of this code. It is probably no longer helpful. */
+ && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM)
+ || approx_reg_cost (folded) < approx_reg_cost (addr))))
&& validate_change (insn, loc, folded, 0))
addr = folded;
}
struct table_elt *src_const_elt = 0;
int src_cost = 10000, src_eqv_cost = 10000, src_folded_cost = 10000;
int src_related_cost = 10000, src_elt_cost = 10000;
+ int src_regcost, src_eqv_regcost, src_folded_regcost;
+ int src_related_regcost, src_elt_regcost;
/* Set non-zero if we need to call force_const_mem on with the
contents of src_folded before using it. */
int src_folded_force_flag = 0;
if (rtx_equal_p (src, dest))
src_cost = -1;
else
- src_cost = COST (src);
+ {
+ src_cost = COST (src);
+ src_regcost = approx_reg_cost (src);
+ }
}
if (src_eqv_here)
if (rtx_equal_p (src_eqv_here, dest))
src_eqv_cost = -1;
else
- src_eqv_cost = COST (src_eqv_here);
+ {
+ src_eqv_cost = COST (src_eqv_here);
+ src_eqv_regcost = approx_reg_cost (src_eqv_here);
+ }
}
if (src_folded)
if (rtx_equal_p (src_folded, dest))
src_folded_cost = -1;
else
- src_folded_cost = COST (src_folded);
+ {
+ src_folded_cost = COST (src_folded);
+ src_folded_regcost = approx_reg_cost (src_folded);
+ }
}
if (src_related)
if (rtx_equal_p (src_related, dest))
src_related_cost = -1;
else
- src_related_cost = COST (src_related);
+ {
+ src_related_cost = COST (src_related);
+ src_related_regcost = approx_reg_cost (src_related);
+ }
}
/* If this was an indirect jump insn, a known label will really be
continue;
}
- if (elt)
- src_elt_cost = elt->cost;
+ if (elt)
+ {
+ src_elt_cost = elt->cost;
+ src_elt_regcost = elt->regcost;
+ }
- /* Find cheapest and skip it for the next time. For items
+ /* Find cheapest and skip it for the next time. For items
of equal cost, use this order:
src_folded, src, src_eqv, src_related and hash table entry. */
- if (src_folded_cost <= src_cost
- && src_folded_cost <= src_eqv_cost
- && src_folded_cost <= src_related_cost
- && src_folded_cost <= src_elt_cost)
+ if (preferrable (src_folded_cost, src_folded_regcost,
+ src_cost, src_regcost) <= 0
+ && preferrable (src_folded_cost, src_folded_regcost,
+ src_eqv_cost, src_eqv_regcost) <= 0
+ && preferrable (src_folded_cost, src_folded_regcost,
+ src_related_cost, src_related_regcost) <= 0
+ && preferrable (src_folded_cost, src_folded_regcost,
+ src_elt_cost, src_elt_regcost) <= 0)
{
trial = src_folded, src_folded_cost = 10000;
if (src_folded_force_flag)
trial = force_const_mem (mode, trial);
}
- else if (src_cost <= src_eqv_cost
- && src_cost <= src_related_cost
- && src_cost <= src_elt_cost)
+ else if (preferrable (src_cost, src_regcost,
+ src_eqv_cost, src_eqv_regcost) <= 0
+ && preferrable (src_cost, src_regcost,
+ src_related_cost, src_related_regcost) <= 0
+ && preferrable (src_cost, src_regcost,
+ src_elt_cost, src_elt_regcost) <= 0)
trial = src, src_cost = 10000;
- else if (src_eqv_cost <= src_related_cost
- && src_eqv_cost <= src_elt_cost)
+ else if (preferrable (src_eqv_cost, src_eqv_regcost,
+ src_related_cost, src_related_regcost) <= 0
+ && preferrable (src_eqv_cost, src_eqv_regcost,
+ src_elt_cost, src_elt_regcost) <= 0)
trial = copy_rtx (src_eqv_here), src_eqv_cost = 10000;
- else if (src_related_cost <= src_elt_cost)
- trial = copy_rtx (src_related), src_related_cost = 10000;
+ else if (preferrable (src_related_cost, src_related_regcost,
+ src_elt_cost, src_elt_regcost) <= 0)
+ trial = copy_rtx (src_related), src_related_cost = 10000;
else
{
trial = copy_rtx (elt->exp);
static int replace_label PARAMS ((rtx *, void *));
static rtx check_insn_for_givs PARAMS((struct loop *, rtx, int, int));
static rtx check_insn_for_bivs PARAMS((struct loop *, rtx, int, int));
+static int iv_add_mult_cost PARAMS ((rtx, rtx, rtx, rtx));
static void loop_dump_aux PARAMS ((const struct loop *, FILE *, int));
void debug_loop PARAMS ((const struct loop *));
static int biv_elimination_giv_has_0_offset PARAMS ((struct induction *,
struct induction *, rtx));
\f
-/* Relative gain of eliminating various kinds of operations. */
-static int add_cost;
-#if 0
-static int shift_cost;
-static int mult_cost;
-#endif
-
/* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
copy the value of the strength reduced giv to its original register. */
static int copy_cost;
char *free_point = (char *) oballoc (1);
rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
- add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
-
reg_address_cost = address_cost (reg, SImode);
- /* We multiply by 2 to reconcile the difference in scale between
- these two ways of computing costs. Otherwise the cost of a copy
- will be far less than the cost of an add. */
-
- copy_cost = 2 * 2;
+ copy_cost = 2;
/* Free the objects we just allocated. */
obfree (free_point);
rtx loop_start = loop->start;
rtx loop_end = loop->end;
rtx loop_scan_start = loop->scan_start;
+ rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
VARRAY_INT_INIT (reg_iv_type, max_reg_before_loop, "reg_iv_type");
VARRAY_GENERIC_PTR_INIT (reg_iv_info, max_reg_before_loop, "reg_iv_info");
for (v = bl->giv; v; v = v->next_iv)
{
struct induction *tv;
+ int add_cost;
if (v->ignore || v->same)
continue;
benefit = v->benefit;
+ PUT_MODE (test_reg, v->mode);
+ add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
+ test_reg, test_reg);
/* Reduce benefit if not replaceable, since we will insert
a move-insn to replace the insn that calculates this giv.
benefit -= copy_cost;
/* Decrease the benefit to count the add-insns that we will
- insert to increment the reduced reg for the giv. */
+ insert to increment the reduced reg for the giv.
+ ??? This can overestimate the run-time cost of the additional
+ insns, e.g. if there are multiple basic blocks that increment
+ the biv, but only one of these blocks is executed during each
+ iteration. There is no good way to detect cases like this with
+ the current structure of the loop optimizer.
+ This code is more accurate for determining code size than
+ run-time benefits. */
benefit -= add_cost * bl->biv_count;
/* Decide whether to strength-reduce this giv or to leave the code
new add insns; if so, increase BENEFIT (undo the subtraction of
add_cost that was done above). */
if (v->giv_type == DEST_ADDR
+ /* Increasing the benefit is risky, since this is only a guess.
+ Avoid increasing register pressure in cases where there would
+ be no other benefit from reducing this giv. */
+ && benefit > 0
&& GET_CODE (v->mult_val) == CONST_INT)
{
if (HAVE_POST_INCREMENT
/* Form expression from giv and add benefit. Ensure this giv
can derive another and subtract any needed adjustment if so. */
- *benefit += v->benefit;
+
+ /* Increasing the benefit here is risky. The only case in which it
+ is arguably correct is if this is the only use of V. In other
+ cases, this will artificially inflate the benefit of the current
+ giv, and lead to suboptimal code. Thus, it is disabled, since
+ potentially not reducing an only marginally beneficial giv is
+ less harmful than reducing many givs that are not really
+ beneficial. */
+ {
+ rtx single_use = VARRAY_RTX (reg_single_usage, REGNO (x));
+ if (single_use && single_use != const0_rtx)
+ *benefit += v->benefit;
+ }
+
if (v->cant_derive)
return 0;
count--;
v->mult_val = *mult_val;
v->add_val = *add_val;
- v->benefit = benefit;
+ v->benefit += benefit;
}
else if (code != NOTE)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
- "giv at %d combined with giv at %d\n",
- INSN_UID (g2->insn), INSN_UID (g1->insn));
+ "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
+ INSN_UID (g2->insn), INSN_UID (g1->insn),
+ g1->benefit, g1_add_benefit, g1->lifetime);
}
}
&& GET_CODE (SET_DEST (seq)) == REG)
record_base_value (REGNO (SET_DEST (seq)), SET_SRC (seq), 0);
}
+
+/* Similar to emit_iv_add_mult, but compute cost rather than emitting
+ insns. */
+static int
+iv_add_mult_cost (b, m, a, reg)
+ rtx b; /* initial value of basic induction variable */
+ rtx m; /* multiplicative constant */
+ rtx a; /* additive constant */
+ rtx reg; /* destination register */
+{
+ int cost = 0;
+ rtx last, result;
+
+ start_sequence ();
+ result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 0);
+ if (reg != result)
+ emit_move_insn (reg, result);
+ last = get_last_insn ();
+ while (last)
+ {
+ rtx t = single_set (last);
+ if (t)
+ cost += rtx_cost (SET_SRC (t), SET);
+ last = PREV_INSN (last);
+ }
+ end_sequence ();
+ return cost;
+}
\f
/* Test whether A * B can be computed without
an actual multiply insn. Value is 1 if so. */