#include "msp430-devices.h"
#include "incpath.h"
#include "prefix.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
/* This file should be included last. */
#include "target-def.h"
static void msp430_compute_frame_info (void);
static bool use_32bit_hwmult (void);
+static bool use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt);
\f
2, 2, 3
};
+struct msp430_multlib_costs
+{
+ const int mulhi;
+ const int mulsi;
+ const int muldi;
+};
+
+/* There is no precise size cost when using libcalls, instead it is disparaged
+ relative to other instructions.
+ The cycle costs are from the CALL to the RET, inclusive.
+ FIXME muldi cost is not accurate. */
+static const struct msp430_multlib_costs cycle_cost_multlib_32bit =
+{
+ 27, 33, 66
+};
+
+/* 32bit multiply takes a few more instructions on 16bit hwmult. */
+static const struct msp430_multlib_costs cycle_cost_multlib_16bit =
+{
+ 27, 42, 66
+};
+
/* TARGET_REGISTER_MOVE_COST
There is only one class of general-purpose, non-fixed registers, and the
relative cost of moving data between them is always the same.
because there are no conditional move insns - when a condition is involved,
the only option is to use a cbranch. */
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS msp430_rtx_costs
+/* For X, which must be a MEM RTX, return TRUE if it is an indirect memory
+ reference, @Rn or @Rn+. */
+static bool
+msp430_is_mem_indirect (rtx x)
+{
+ gcc_assert (GET_CODE (x) == MEM);
+ rtx op0 = XEXP (x, 0);
+ return (GET_CODE (op0) == REG || GET_CODE (op0) == POST_INC);
+}
+
+/* Costs of MSP430 instructions are generally based on the addressing mode
+ combination of the source and destination operands.
+ Given source operand SRC (which may be NULL to indicate a single-operand
+ instruction) and destination operand DST return the cost of this
+ expression. */
+static int
+msp430_costs (rtx src, rtx dst, bool speed, rtx outer_rtx)
+{
+ enum rtx_code src_code = GET_CODE (src);
+ enum rtx_code dst_code = GET_CODE (dst);
+ enum rtx_code outer_code = GET_CODE (outer_rtx);
+ machine_mode outer_mode = GET_MODE (outer_rtx);
+ const struct double_op_cost *cost_p;
+ cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op);
+
+ if (outer_code == TRUNCATE
+ && (outer_mode == QImode
+ || outer_mode == HImode
+ || outer_mode == PSImode))
+ /* Truncation to these modes is normally free as a side effect of the
+ instructions themselves. */
+ return 0;
+
+ if (dst_code == SYMBOL_REF
+ || dst_code == LABEL_REF
+ || dst_code == CONST_INT)
+ /* Catch RTX like (minus (const_int 0) (reg)) but don't add any cost. */
+ return 0;
+
+ switch (src_code)
+ {
+ case REG:
+ return (dst_code == REG ? cost_p->r2r
+ : (dst_code == PC ? cost_p->r2pc : cost_p->r2m));
+
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ case CONST:
+ return (dst_code == REG ? cost_p->imm2r
+ : (dst_code == PC ? cost_p->imm2pc : cost_p->imm2m));
+
+
+ case MEM:
+ if (msp430_is_mem_indirect (src))
+ return (dst_code == REG ? cost_p->ind2r : (dst_code == PC
+ ? cost_p->ind2pc
+ : cost_p->ind2m));
+ else
+ return (dst_code == REG ? cost_p->mem2r : (dst_code == PC
+ ? cost_p->mem2pc
+ : cost_p->mem2m));
+ default:
+ return cost_p->mem2m;
+ }
+}
+
+/* Given source operand SRC and destination operand DST from the shift or
+ rotate RTX OUTER_RTX, return the cost of performing that shift, assuming
+ optimization for speed when SPEED is true. */
+static int
+msp430_shift_costs (rtx src, rtx dst, bool speed, rtx outer_rtx)
+{
+ int amt;
+ enum rtx_code src_code = GET_CODE (src);
+ enum rtx_code dst_code = GET_CODE (dst);
+ const struct single_op_cost *cost_p;
+
+ cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op);
+
+ if (src_code != CONST_INT)
+ /* The size or speed cost when the shift amount is unknown cannot be
+ accurately calculated, so just disparage it slightly. */
+ return 2 * msp430_costs (src, dst, speed, outer_rtx);
-static bool msp430_rtx_costs (rtx x ATTRIBUTE_UNUSED,
- machine_mode mode,
- int outer_code ATTRIBUTE_UNUSED,
- int opno ATTRIBUTE_UNUSED,
- int * total,
- bool speed ATTRIBUTE_UNUSED)
+ if (use_helper_for_const_shift (GET_MODE (outer_rtx), amt = INTVAL (src)))
+ {
+ /* GCC sometimes tries to perform shifts in some very inventive ways,
+ resulting in much larger code size usage than necessary, if
+ they are disparaged too much here. So in general, if
+ use_helper_for_const_shift thinks a helper should be used, obey
+ that and don't disparage the shift any more than a regular
+ instruction, even though the shift may actually cost more.
+ This ensures that the RTL generated at the initial expand pass has the
+ expected shift instructions, which can be mapped to the helper
+ functions. */
+ return msp430_costs (src, dst, speed, outer_rtx);
+ }
+
+ if (!msp430x)
+ {
+ /* Each shift by one place will be emitted individually. */
+ switch (dst_code)
+ {
+ case REG:
+ case CONST_INT:
+ return amt * cost_p->reg;
+ case MEM:
+ if (msp430_is_mem_indirect (dst))
+ return amt * cost_p->ind;
+ else
+ return amt * cost_p->mem;
+ default:
+ return amt * cost_p->mem;
+ }
+ }
+
+ /* RRAM, RRCM, RRUM, RLAM are used for shift counts <= 4, otherwise, the 'X'
+ versions are used.
+ Instructions which shift a MEM operand will never actually be output. It
+ will always be copied into a register to allow for efficient shifting. So
+ the cost just takes into account the cost of an additional copy in that
+ case. */
+ return (amt <= 4 ? (speed ? amt : 1) : (speed ? amt + 1 : 2)
+ + (dst_code == REG ? 0
+ : msp430_costs (dst, gen_rtx_REG (HImode, 10), speed, outer_rtx)));
+}
+
+/* Given source operand SRC and destination operand DST from the MULT/DIV/MOD
+ RTX OUTER_RTX, return the cost of performing that operation, assuming
+ optimization for speed when SPEED is true. */
+static int
+msp430_muldiv_costs (rtx src, rtx dst, bool speed, rtx outer_rtx,
+ machine_mode outer_mode)
{
- int code = GET_CODE (x);
+ enum rtx_code outer_code = GET_CODE (outer_rtx);
+ const struct msp430_multlib_costs *cost_p;
+ bool hwmult_16bit = (msp430_has_hwmult () && !(msp430_use_f5_series_hwmult ()
+ || use_32bit_hwmult ()));
+ cost_p = (hwmult_16bit
+ ? &cycle_cost_multlib_32bit
+ : &cycle_cost_multlib_16bit);
+
+ int factor = 1;
+ /* Only used in some calculations. */
+ int mode_factor = 1;
+ if (outer_mode == SImode)
+ mode_factor = 2;
+ else if (outer_mode == PSImode)
+ /* PSImode multiplication is performed using SImode operands, so has extra
+ cost to factor in the conversions necessary before/after the
+ operation. */
+ mode_factor = 3;
+ else if (outer_mode == DImode)
+ mode_factor = 4;
+
+ if (!speed)
+ {
+ /* The codesize cost of using a helper function to perform the
+ multiplication or division cannot be accurately calculated, since the
+ cost depends on how many times the operation is performed in the
+ entire program. */
+ if (outer_code != MULT)
+ /* Division is always expensive. */
+ factor = 7;
+ else if (((hwmult_16bit && outer_mode != DImode)
+ || use_32bit_hwmult () || msp430_use_f5_series_hwmult ()))
+ /* When the hardware multiplier is available, only disparage
+ slightly. */
+ factor = 2;
+ else
+ factor = 5;
+ return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx);
+ }
+
+ /* When there is hardware multiply support, there is a relatively low, fixed
+ cycle cost to performing any multiplication, but when there is no hardware
+ multiply support it is very costly. That precise cycle cost has not been
+ calculated here.
+ Division is extra slow since it always uses a software library.
+ The 16-bit hardware multiply library cannot be used to produce 64-bit
+ results. */
+ if (outer_code != MULT || !msp430_has_hwmult ()
+ || (outer_mode == DImode && hwmult_16bit))
+ {
+ factor = (outer_code == MULT ? 50 : 70);
+ return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx);
+ }
+
+ switch (outer_mode)
+ {
+ case E_QImode:
+ case E_HImode:
+ /* Include the cost of copying the operands into and out of the hardware
+ multiply routine. */
+ return cost_p->mulhi + (3 * msp430_costs (src, dst, speed, outer_rtx));
+
+ case E_PSImode:
+ /* Extra factor for the conversions necessary to do PSI->SI before the
+ operation. */
+ factor = 2;
+ /* fallthru. */
+ case E_SImode:
+ return factor * (cost_p->mulsi
+ + (6 * msp430_costs (src, dst, speed, outer_rtx)));
+
+ case E_DImode:
+ default:
+ return cost_p->muldi + (12 * msp430_costs (src, dst, speed, outer_rtx));
+ }
+}
+/* Recurse within X to find the actual destination operand of the expression.
+ For example:
+ (plus (ashift (minus (ashift (reg)
+ (const_int) ......
+ should return the reg RTX. */
+static rtx
+msp430_get_inner_dest_code (rtx x)
+{
+ enum rtx_code code = GET_CODE (x);
+ rtx op0 = XEXP (x, 0);
switch (code)
{
- case SIGN_EXTEND:
- if (mode == SImode && outer_code == SET)
+ case REG:
+ case SYMBOL_REF:
+ case CONST_INT:
+ case CONST:
+ case LABEL_REF:
+ return x;
+
+ case MEM:
+ /* Return the MEM expr not the inner REG for these cases. */
+ switch (GET_CODE (op0))
{
- *total = COSTS_N_INSNS (4);
- return true;
+ case REG:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ case CONST:
+ case POST_INC:
+ return x;
+
+ case PLUS:
+ /* return MEM (PLUS (REG) (CONST)) */
+ if (GET_CODE (XEXP (op0, 0)) == REG)
+ {
+ if (GET_CODE (XEXP (op0, 1)) == CONST_INT
+ || GET_CODE (XEXP (op0, 1)) == CONST
+ || GET_CODE (XEXP (op0, 1)) == LABEL_REF
+ || GET_CODE (XEXP (op0, 1)) == SYMBOL_REF)
+ return x;
+ else
+ return msp430_get_inner_dest_code (op0);
+ }
+ return msp430_get_inner_dest_code (op0);
+
+ default:
+ if (GET_RTX_FORMAT (code)[0] != 'e')
+ return x;
+ return msp430_get_inner_dest_code (op0);
}
break;
+
+ default:
+ if (op0 == NULL_RTX)
+ gcc_unreachable ();
+ else
+ {
+ if (GET_RTX_FORMAT (code)[0] != 'e'
+ && code != ENTRY_VALUE)
+ return x;
+ return msp430_get_inner_dest_code (op0);
+ }
+ }
+}
+
+/* Calculate the cost of an MSP430 single-operand instruction, for operand DST
+ within the RTX OUTER_RTX, optimizing for speed if SPEED is true. */
+static int
+msp430_single_op_cost (rtx dst, bool speed, rtx outer_rtx)
+{
+ enum rtx_code dst_code = GET_CODE (dst);
+ const struct single_op_cost *cost_p;
+ const struct double_op_cost *double_op_cost_p;
+
+ cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op);
+ double_op_cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op);
+
+ switch (dst_code)
+ {
+ case REG:
+ return cost_p->reg;
+ case MEM:
+ if (msp430_is_mem_indirect (dst))
+ return cost_p->ind;
+ else
+ return cost_p->mem;
+
+ case CONST_INT:
+ case CONST_FIXED:
+ case CONST_DOUBLE:
+ case SYMBOL_REF:
+ case CONST:
+ /* A constant value would need to be copied into a register first. */
+ return double_op_cost_p->imm2r + cost_p->reg;
+
+ default:
+ return cost_p->mem;
+ }
+}
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS msp430_rtx_costs
+
+/* This target hook describes the relative costs of RTL expressions.
+ The function recurses to just before the lowest level of the expression,
+ when both of the operands of the expression can be examined at the same time.
+ This is because the cost of the expression depends on the specific
+ addressing mode combination of the operands.
+ The hook returns true when all subexpressions of X have been processed, and
+ false when rtx_cost should recurse. */
+static bool
+msp430_rtx_costs (rtx x,
+ machine_mode mode,
+ int outer_code ATTRIBUTE_UNUSED,
+ int opno ATTRIBUTE_UNUSED,
+ int * total,
+ bool speed)
+{
+ enum rtx_code code = GET_CODE (x);
+ rtx dst, src;
+ rtx dst_inner, src_inner;
+
+ *total = 0;
+ dst = XEXP (x, 0);
+ if (GET_RTX_LENGTH (code) == 1)
+ /* Some RTX that are single-op in GCC are double-op when translated to
+ MSP430 instructions e.g NOT, NEG, ZERO_EXTEND. */
+ src = dst;
+ else
+ src = XEXP (x, 1);
+
+
+ switch (code)
+ {
+ case SET:
+ /* Ignoring SET improves codesize. */
+ if (!speed)
+ return true;
+ /* fallthru. */
+ case PLUS:
+ if (outer_code == MEM)
+ /* Do not add any cost for the plus itself, but recurse in case there
+ are more complicated RTX inside. */
+ return false;
+ /* fallthru. */
+ case MINUS:
+ case AND:
+ case IOR:
+ case XOR:
+ case NOT:
+ case ZERO_EXTEND:
+ case TRUNCATE:
+ case NEG:
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ case IF_THEN_ELSE:
+ dst_inner = msp430_get_inner_dest_code (dst);
+ src_inner = msp430_get_inner_dest_code (src);
+ *total = COSTS_N_INSNS (msp430_costs (src_inner, dst_inner, speed, x));
+ if (mode == SImode)
+ *total *= 2;
+ if (mode == DImode)
+ *total *= 4;
+ return false;
+
+ case ROTATE:
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ dst_inner = msp430_get_inner_dest_code (dst);
+ src_inner = msp430_get_inner_dest_code (src);
+ *total = COSTS_N_INSNS (msp430_shift_costs (src_inner, dst_inner,
+ speed, x));
+ if (mode == SImode)
+ *total *= 2;
+ if (mode == DImode)
+ *total *= 4;
+ return false;
+
+ case MULT:
+ case DIV:
+ case MOD:
+ case UDIV:
+ case UMOD:
+ dst_inner = msp430_get_inner_dest_code (dst);
+ src_inner = msp430_get_inner_dest_code (src);
+ *total = COSTS_N_INSNS (msp430_muldiv_costs (src_inner, dst_inner, speed,
+ x, mode));
+ return false;
+
+ case CALL:
+ case SIGN_EXTEND:
+ dst_inner = msp430_get_inner_dest_code (dst);
+ *total = COSTS_N_INSNS (msp430_single_op_cost (dst_inner, speed, x));
+ if (mode == SImode)
+ *total *= 2;
+ if (mode == DImode)
+ *total *= 4;
+ return false;
+
+ case CONST_INT:
+ case CONST_FIXED:
+ case CONST_DOUBLE:
+ case SYMBOL_REF:
+ case CONST:
+ case LABEL_REF:
+ case REG:
+ case PC:
+ case POST_INC:
+ if (mode == SImode)
+ *total = COSTS_N_INSNS (2);
+ else if (mode == DImode)
+ *total = COSTS_N_INSNS (4);
+ return true;
+
+ case MEM:
+ /* PSImode operands are expensive when in memory. */
+ if (mode == PSImode)
+ *total = COSTS_N_INSNS (1);
+ else if (mode == SImode)
+ *total = COSTS_N_INSNS (2);
+ else if (mode == DImode)
+ *total = COSTS_N_INSNS (4);
+ /* Recurse into the MEM. */
+ return false;
+
+ case EQ:
+ case NE:
+ case GT:
+ case GTU:
+ case GE:
+ case GEU:
+ case LT:
+ case LTU:
+ case LE:
+ case LEU:
+ /* Conditions are mostly equivalent, changing their relative
+ costs has no effect. */
+ return false;
+
+ case ASM_OPERANDS:
+ case ASM_INPUT:
+ case CLOBBER:
+ case COMPARE:
+ case CONCAT:
+ case ENTRY_VALUE:
+ /* Other unhandled expressions. */
+ return false;
+
+ default:
+ return false;
}
- return false;
}
\f
/* Function Entry and Exit */
/* Return TRUE if the helper function should be used and FALSE if the shifts
insns should be emitted inline. */
static bool
-use_helper_for_const_shift (enum rtx_code code, machine_mode mode,
- HOST_WIDE_INT amt)
+use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt)
{
const int default_inline_shift = 4;
/* We initialize the option to 65 so we know if the user set it or not. */
the heuristic accordingly. */
int max_inline_32 = max_inline / 2;
+ if (mode == E_DImode)
+ return true;
+
/* Don't use helpers for these modes on 430X, when optimizing for speed, or
when emitting a small number of insns. */
if ((mode == E_QImode || mode == E_HImode || mode == E_PSImode)
constant. */
if (!CONST_INT_P (operands[2])
|| mode == E_DImode
- || use_helper_for_const_shift (code, mode, INTVAL (operands[2])))
+ || use_helper_for_const_shift (mode, INTVAL (operands[2])))
{
const char *helper_name = NULL;
/* The const variants of mspabi shifts have significantly larger code