+2006-09-13 Richard Sandiford <richard@codesourcery.com>
+
+ PR rtl-optimization/28982
+ * reload.c (find_reloads_address_1): Use RELOAD_OTHER for the
+ index of a PRE_MODIFY or POST_MODIFY address.
+ * reload1.c (inc_for_reload): Use find_replacement on the original
+ base and index registers.
+
2006-09-12 H.J. Lu <hongjiu.lu@intel.com>
* doc/invoke.texi (mpreferred-stack-boundary): Remove exception
/* Require index register (or constant). Let's just handle the
register case in the meantime... If the target allows
auto-modify by a constant then we could try replacing a pseudo
- register with its equivalent constant where applicable. */
+ register with its equivalent constant where applicable.
+
+ If we later decide to reload the whole PRE_MODIFY or
+ POST_MODIFY, inc_for_reload might clobber the reload register
+ before reading the index. The index register might therefore
+ need to live longer than a TYPE reload normally would, so be
+ conservative and class it as RELOAD_OTHER. */
if (REG_P (XEXP (op1, 1)))
if (!REGNO_OK_FOR_INDEX_P (REGNO (XEXP (op1, 1))))
find_reloads_address_1 (mode, XEXP (op1, 1), 1, code, SCRATCH,
- &XEXP (op1, 1), opnum, type, ind_levels,
- insn);
+ &XEXP (op1, 1), opnum, RELOAD_OTHER,
+ ind_levels, insn);
gcc_assert (REG_P (XEXP (op1, 0)));
inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
{
/* REG or MEM to be copied and incremented. */
- rtx incloc = XEXP (value, 0);
+ rtx incloc = find_replacement (&XEXP (value, 0));
/* Nonzero if increment after copying. */
int post = (GET_CODE (value) == POST_DEC || GET_CODE (value) == POST_INC
|| GET_CODE (value) == POST_MODIFY);
rtx add_insn;
int code;
rtx store;
- rtx real_in = in == value ? XEXP (in, 0) : in;
+ rtx real_in = in == value ? incloc : in;
/* No hard register is equivalent to this register after
inc/dec operation. If REG_LAST_RELOAD_REG were nonzero,
if (GET_CODE (value) == PRE_MODIFY || GET_CODE (value) == POST_MODIFY)
{
gcc_assert (GET_CODE (XEXP (value, 1)) == PLUS);
- inc = XEXP (XEXP (value, 1), 1);
+ inc = find_replacement (&XEXP (XEXP (value, 1), 1));
}
else
{
+2006-09-13 Richard Sandiford <richard@codesourcery.com>
+
+ PR rtl-optimization/28982
+ * gcc.c-torture/execute/pr28982a.c: New test.
+ * gcc.c-torture/execute/pr28982b.c: Likewise.
+
2006-09-12 Eric Christopher <echristo@apple.com>
* gcc.target/x86_64/abi/asm-support-darwin.s: New.
--- /dev/null
+/* PR rtl-optimization/28982. Function foo() does the equivalent of:
+
+ float tmp_results[NVARS];
+ for (int i = 0; i < NVARS; i++)
+ {
+ int inc = incs[i];
+ float *ptr = ptrs[i], result = 0;
+ for (int j = 0; j < n; j++)
+ result += *ptr, ptr += inc;
+ tmp_results[i] = result;
+ }
+ memcpy (results, tmp_results, sizeof (results));
+
+ but without the outermost loop. The idea is to create high register
+ pressure and ensure that some INC and PTR variables are spilled.
+
+ On ARM targets, sequences like "result += *ptr, ptr += inc" can
+ usually be implemented using (mem (post_modify ...)), and we do
+ indeed create such MEMs before reload for this testcase. However,
+ (post_modify ...) is not a valid address for coprocessor loads, so
+ for -mfloat-abi=softfp, reload reloads the POST_MODIFY into a base
+ register. GCC did not deal correctly with cases where the base and
+ index of the POST_MODIFY are themselves reloaded. */
+#define NITER 4
+#define NVARS 20
+#define MULTI(X) \
+ X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
+ X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
+
+#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
+#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
+#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
+#define COPYOUT(INDEX) results[INDEX] = result##INDEX
+
+float *ptrs[NVARS];
+float results[NVARS];
+int incs[NVARS];
+
+void __attribute__((noinline))
+foo (int n)
+{
+ int MULTI (DECLAREI);
+ float MULTI (DECLAREF);
+ while (n--)
+ MULTI (LOOP);
+ MULTI (COPYOUT);
+}
+
+float input[NITER * NVARS];
+
+int
+main (void)
+{
+ int i;
+
+ for (i = 0; i < NVARS; i++)
+ ptrs[i] = input + i, incs[i] = i;
+ for (i = 0; i < NITER * NVARS; i++)
+ input[i] = i;
+ foo (NITER);
+ for (i = 0; i < NVARS; i++)
+ if (results[i] != i * NITER * (NITER + 1) / 2)
+ return 1;
+ return 0;
+}
--- /dev/null
+/* Like pr28982a.c, but with the spill slots outside the range of
+ a single sp-based load on ARM. This test tests for cases where
+ the addresses in the base and index reloads require further reloads. */
+#if defined(STACK_SIZE) && STACK_SIZE <= 0x80100
+int main (void) { return 0; }
+#else
+#define NITER 4
+#define NVARS 20
+#define MULTI(X) \
+ X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
+ X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
+
+#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
+#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
+#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
+#define COPYOUT(INDEX) results[INDEX] = result##INDEX
+
+float *ptrs[NVARS];
+float results[NVARS];
+int incs[NVARS];
+
+struct big { int i[0x10000]; };
+void __attribute__((noinline))
+bar (struct big b)
+{
+ incs[0] += b.i[0];
+}
+
+void __attribute__((noinline))
+foo (int n)
+{
+ struct big b = {};
+ int MULTI (DECLAREI);
+ float MULTI (DECLAREF);
+ while (n--)
+ MULTI (LOOP);
+ MULTI (COPYOUT);
+ bar (b);
+}
+
+float input[NITER * NVARS];
+
+int
+main (void)
+{
+ int i;
+
+ for (i = 0; i < NVARS; i++)
+ ptrs[i] = input + i, incs[i] = i;
+ for (i = 0; i < NITER * NVARS; i++)
+ input[i] = i;
+ foo (NITER);
+ for (i = 0; i < NVARS; i++)
+ if (results[i] != i * NITER * (NITER + 1) / 2)
+ return 1;
+ return 0;
+}
+#endif