+2010-04-08 Christian Borntraeger <borntraeger@de.ibm.com>
+ Wolfgang Gellerich <gellerich@de.ibm.com>
+
+ Implement target hook for loop unrolling
+ * target.h (loop_unroll_adjust): Add a new target hook function.
+ * target-def.h (TARGET_LOOP_UNROLL_ADJUST): Likewise.
+ * doc/tm.texi (TARGET_LOOP_UNROLL_ADJUST): Document it.
+ * config/s390/s390.c (TARGET_LOOP_UNROLL_ADJUST): Define it.
+ (s390_loop_unroll_adjust): Implement the new target hook for s390.
+ * loop-unroll.c (decide_unroll_runtime_iterations): Call loop unroll target hook
+ (decide_unroll_stupid): Likewise.
+
2010-04-08 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR target/43643
#include "gimple.h"
#include "df.h"
#include "params.h"
+#include "cfgloop.h"
/* Define the specific costs for a given cpu. */
last_scheduled_insn = NULL_RTX;
}
+/* This function checks the whole of insn X for memory references. The
+ function always returns zero because the framework it is called
+ from would stop recursively analyzing the insn upon a return value
+ other than zero. The real result of this function is updating
+ counter variable MEM_COUNT. */
+static int
+check_dpu (rtx *x, unsigned *mem_count)
+{
+ if (*x != NULL_RTX && MEM_P (*x))
+ (*mem_count)++;
+ return 0;
+}
+
+/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
+ a new number struct loop *loop should be unrolled if tuned for the z10
+ cpu. The loop is analyzed for memory accesses by calling check_dpu for
+ each rtx of the loop. Depending on the loop_depth and the amount of
+ memory accesses a new number <=nunroll is returned to improve the
+ behaviour of the hardware prefetch unit. */
+static unsigned
+s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+ basic_block *bbs;
+ rtx insn;
+ unsigned i;
+ unsigned mem_count = 0;
+
+ /* Only z10 needs special handling. */
+ if (s390_tune != PROCESSOR_2097_Z10)
+ return nunroll;
+
+ /* Count the number of memory references within the loop body. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+ if (INSN_P (insn) && INSN_CODE (insn) != -1)
+ for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
+ }
+ free (bbs);
+
+ /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
+ if (mem_count == 0)
+ return nunroll;
+
+ switch (loop_depth(loop))
+ {
+ case 1:
+ return MIN (nunroll, 28 / mem_count);
+ case 2:
+ return MIN (nunroll, 22 / mem_count);
+ default:
+ return MIN (nunroll, 16 / mem_count);
+ }
+}
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE s390_can_eliminate
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
+
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
#undef TARGET_TRAMPOLINE_INIT
modes and they have different conditional execution capability, such as ARM.
@end deftypefn
+@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
+This target hook returns a new value for the number of times @var{loop}
+should be unrolled. The parameter @var{nunroll} is the number of times
+the loop is to be unrolled. The parameter @var{loop} is a pointer to
+the loop, which is going to be checked for unrolling. This target hook
+is required only when the target has special constraints like maximum
+number of memory accesses.
+@end deftypefn
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
#include "expr.h"
#include "hashtab.h"
#include "recog.h"
+#include "target.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
default_branch_target_register_class
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
#define TARGET_HAVE_CONDITIONAL_EXECUTION default_have_conditional_execution
+#define TARGET_LOOP_UNROLL_ADJUST NULL
#define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
#define TARGET_CANNOT_COPY_INSN_P NULL
#define TARGET_COMMUTATIVE_P hook_bool_const_rtx_commutative_p
TARGET_BRANCH_TARGET_REGISTER_CLASS, \
TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \
TARGET_HAVE_CONDITIONAL_EXECUTION, \
+ TARGET_LOOP_UNROLL_ADJUST, \
TARGET_CANNOT_FORCE_CONST_MEM, \
TARGET_CANNOT_COPY_INSN_P, \
TARGET_COMMUTATIVE_P, \
/* This is defined in ddg.h . */
struct ddg;
+/* This is defined in cfgloop.h . */
+struct loop;
+
/* Assembler instructions for creating various kinds of integer object. */
struct asm_int_op
/* Return true if the target supports conditional execution. */
bool (* have_conditional_execution) (void);
+ /* Return a new value for loop unroll size. */
+ unsigned (* loop_unroll_adjust) (unsigned nunroll, struct loop *loop);
+
/* True if the constant X cannot be placed in the constant pool. */
bool (* cannot_force_const_mem) (rtx);