From: Bob Wilson Date: Thu, 20 Dec 2007 22:35:59 +0000 (+0000) Subject: xtensa.md (fix_return_addr): Remove. X-Git-Tag: releases/gcc-4.3.0~857 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7f0ee69424f02f4c46bc13e6a3c77248b4d04bbe;p=thirdparty%2Fgcc.git xtensa.md (fix_return_addr): Remove. * config/xtensa/xtensa.md (fix_return_addr): Remove. * config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New. (xtensa_trampoline_template): New. * config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h. (xtensa_return_addr): Expand to standard Xtensa insns instead of fix_return_addr. Get high bits from a local label. (xtensa_trampoline_template): New function with code moved from TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except when using CONST16 or absolute-mode literals. (xtensa_initialize_trampoline): New function with code moved from INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending on which trampoline version is used. * config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment. * config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define. (MIN_FRAME_SIZE): Moved to xtensa.c. (TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template. (TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes. (INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline. * config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define. (__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3 helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match. * config/xtensa/lib1funcs.asm (__umulsidi3): Likewise. * config/xtensa/ieee754-sf.S (__mulsf3): Likewise. From-SVN: r131108 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 33922e029067..9674a7c0877f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2007-12-20 Bob Wilson + + * config/xtensa/xtensa.md (fix_return_addr): Remove. + * config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New. + (xtensa_trampoline_template): New. + * config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h. + (xtensa_return_addr): Expand to standard Xtensa insns instead of + fix_return_addr. Get high bits from a local label. + (xtensa_trampoline_template): New function with code moved from + TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except + when using CONST16 or absolute-mode literals. + (xtensa_initialize_trampoline): New function with code moved from + INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending + on which trampoline version is used. + * config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment. + * config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define. + (MIN_FRAME_SIZE): Moved to xtensa.c. + (TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template. + (TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes. + (INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline. + * config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define. + (__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3 + helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match. + * config/xtensa/lib1funcs.asm (__umulsidi3): Likewise. + * config/xtensa/ieee754-sf.S (__mulsf3): Likewise. + 2007-12-20 Jakub Jelinek PR c++/34459 diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S index 711b10c9df31..381e6ce31f34 100644 --- a/gcc/config/xtensa/ieee754-df.S +++ b/gcc/config/xtensa/ieee754-df.S @@ -1,5 +1,5 @@ /* IEEE-754 double-precision functions for Xtensa - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2007 Free Software Foundation, Inc. Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. This file is part of GCC. @@ -607,6 +607,10 @@ __subdf3: #ifdef L_muldf3 /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + __muldf3_aux: /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). @@ -728,13 +732,19 @@ __muldf3_aux: .global __muldf3 .type __muldf3, @function __muldf3: - leaf_entry sp, 32 #if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 addi sp, sp, -32 s32i a12, sp, 16 s32i a13, sp, 20 s32i a14, sp, 24 s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 #endif movi a6, 0x7ff00000 @@ -809,7 +819,7 @@ __muldf3: muluh xh, xh, yh add xh, xh, a9 -#else +#else /* ! XCHAL_HAVE_MUL32_HIGH */ /* Break the inputs into 16-bit chunks and compute 16 32-bit partial products. These partial products are: @@ -847,7 +857,7 @@ __muldf3: /* Save a7 since it is needed to hold a temporary value. */ s32i a7, sp, 4 -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL /* Calling a separate multiply function will clobber a0 and requires use of a8 as a temporary, so save those values now. (The function uses a custom ABI so nothing else needs to be saved.) */ @@ -915,12 +925,21 @@ __muldf3: #define set_arg_h(dst, src) \ srli dst, src, 16 +#if __XTENSA_CALL0_ABI__ #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ set_arg_ ## xhalf (a13, xreg); \ set_arg_ ## yhalf (a14, yreg); \ call0 .Lmul_mulsi3; \ mov dst, a12 -#endif +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ /* Add pp1 and pp2 into a10 with carry-out in a9. */ do_mul(a10, xl, l, yl, h) /* pp 1 */ @@ -1032,11 +1051,11 @@ __muldf3: /* Restore values saved on the stack during the multiplication. */ l32i a7, sp, 4 -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL l32i a0, sp, 0 l32i a8, sp, 8 #endif -#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ /* Shift left by 12 bits, unless there was a carry-out from the multiply, in which case, shift by 11 bits and increment the @@ -1157,38 +1176,47 @@ __muldf3: movi xl, 0 j .Lmul_done -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if XCHAL_NO_MUL /* For Xtensa processors with no multiply hardware, this simplified version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. It uses a custom ABI: the inputs - are passed in a13 and a14, the result is returned in a12, and - a8 and a15 are clobbered. */ + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ .align 4 .Lmul_mulsi3: - movi a12, 0 -.Lmul_mult_loop: - add a15, a14, a12 - extui a8, a13, 0, 1 - movnez a12, a15, a8 - - do_addx2 a15, a14, a12, a15 - extui a8, a13, 1, 1 - movnez a12, a15, a8 - - do_addx4 a15, a14, a12, a15 - extui a8, a13, 2, 1 - movnez a12, a15, a8 - - do_addx8 a15, a14, a12, a15 - extui a8, a13, 3, 1 - movnez a12, a15, a8 - - srli a13, a13, 4 - slli a14, a14, 4 - bnez a13, .Lmul_mult_loop - ret -#endif /* !MUL16 && !MUL32 && !MAC16 */ + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ #endif /* L_muldf3 */ #ifdef L_divdf3 diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S index a75e742898b7..abb641d9456b 100644 --- a/gcc/config/xtensa/ieee754-sf.S +++ b/gcc/config/xtensa/ieee754-sf.S @@ -1,5 +1,5 @@ /* IEEE-754 single-precision functions for Xtensa - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2007 Free Software Foundation, Inc. Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. This file is part of GCC. @@ -488,6 +488,10 @@ __subsf3: #ifdef L_mulsf3 /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + __mulsf3_aux: /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). @@ -570,13 +574,19 @@ __mulsf3_aux: .global __mulsf3 .type __mulsf3, @function __mulsf3: - leaf_entry sp, 32 #if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 addi sp, sp, -32 s32i a12, sp, 16 s32i a13, sp, 20 s32i a14, sp, 24 s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 #endif movi a6, 0x7f800000 @@ -633,7 +643,7 @@ __mulsf3: chunks can be extracted when setting up the arguments to the separate multiply function. */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL /* Calling a separate multiply function will clobber a0 and requires use of a8 as a temporary, so save those values now. (The function uses a custom ABI so nothing else needs to be saved.) */ @@ -693,12 +703,21 @@ __mulsf3: #define set_arg_h(dst, src) \ srli dst, src, 16 +#if __XTENSA_CALL0_ABI__ #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ set_arg_ ## xhalf (a13, xreg); \ set_arg_ ## yhalf (a14, yreg); \ call0 .Lmul_mulsi3; \ mov dst, a12 -#endif +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ /* Add pp1 and pp2 into a6 with carry-out in a9. */ do_mul(a6, a2, l, a3, h) /* pp 1 */ @@ -724,12 +743,12 @@ __mulsf3: do_mul(a2, a2, h, a3, h) /* pp 3 */ add a2, a2, a9 -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL /* Restore values saved on the stack during the multiplication. */ l32i a0, sp, 0 l32i a8, sp, 4 #endif -#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ /* Shift left by 9 bits, unless there was a carry-out from the multiply, in which case, shift by 8 bits and increment the @@ -825,38 +844,47 @@ __mulsf3: slli a2, a2, 31 j .Lmul_done -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if XCHAL_NO_MUL /* For Xtensa processors with no multiply hardware, this simplified version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. It uses a custom ABI: the inputs - are passed in a13 and a14, the result is returned in a12, and - a8 and a15 are clobbered. */ + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ .align 4 .Lmul_mulsi3: - movi a12, 0 -.Lmul_mult_loop: - add a15, a14, a12 - extui a8, a13, 0, 1 - movnez a12, a15, a8 - - do_addx2 a15, a14, a12, a15 - extui a8, a13, 1, 1 - movnez a12, a15, a8 - - do_addx4 a15, a14, a12, a15 - extui a8, a13, 2, 1 - movnez a12, a15, a8 - - do_addx8 a15, a14, a12, a15 - extui a8, a13, 3, 1 - movnez a12, a15, a8 - - srli a13, a13, 4 - slli a14, a14, 4 - bnez a13, .Lmul_mult_loop - ret -#endif /* !MUL16 && !MUL32 && !MAC16 */ + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ #endif /* L_mulsf3 */ #ifdef L_divsf3 diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm index 27b67c43d6fb..69162f036e99 100644 --- a/gcc/config/xtensa/lib1funcs.asm +++ b/gcc/config/xtensa/lib1funcs.asm @@ -201,17 +201,28 @@ __mulsi3: #ifdef L_umulsidi3 + +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + .align 4 .global __umulsidi3 .type __umulsidi3, @function __umulsidi3: - leaf_entry sp, 32 #if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 addi sp, sp, -32 s32i a12, sp, 16 s32i a13, sp, 20 s32i a14, sp, 24 s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 48 +#else + leaf_entry sp, 16 #endif #ifdef __XTENSA_EB__ @@ -232,7 +243,7 @@ __umulsidi3: #else /* ! MUL32_HIGH */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL /* a0 and a8 will be clobbered by calling the multiply function but a8 is not used here and need not be saved. */ s32i a0, sp, 0 @@ -290,12 +301,21 @@ __umulsidi3: #define set_arg_h(dst, src) \ srli dst, src, 16 +#if __XTENSA_CALL0_ABI__ #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ set_arg_ ## xhalf (a13, xreg); \ set_arg_ ## yhalf (a14, yreg); \ call0 .Lmul_mulsi3; \ mov dst, a12 -#endif +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ /* Add pp1 and pp2 into a6 with carry-out in a9. */ do_mul(a6, a2, l, a3, h) /* pp 1 */ @@ -324,7 +344,7 @@ __umulsidi3: #endif /* !MUL32_HIGH */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL /* Restore the original return address. */ l32i a0, sp, 0 #endif @@ -337,38 +357,47 @@ __umulsidi3: #endif leaf_return -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#if XCHAL_NO_MUL /* For Xtensa processors with no multiply hardware, this simplified version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. It uses a custom ABI: the inputs - are passed in a13 and a14, the result is returned in a12, and - a8 and a15 are clobbered. */ + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ .align 4 .Lmul_mulsi3: - movi a12, 0 -.Lmul_mult_loop: - add a15, a14, a12 - extui a8, a13, 0, 1 - movnez a12, a15, a8 - - do_addx2 a15, a14, a12, a15 - extui a8, a13, 1, 1 - movnez a12, a15, a8 - - do_addx4 a15, a14, a12, a15 - extui a8, a13, 2, 1 - movnez a12, a15, a8 - - do_addx8 a15, a14, a12, a15 - extui a8, a13, 3, 1 - movnez a12, a15, a8 - - srli a13, a13, 4 - slli a14, a14, 4 - bnez a13, .Lmul_mult_loop - ret -#endif /* !MUL16 && !MUL32 && !MAC16 */ + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ .size __umulsidi3, . - __umulsidi3 diff --git a/gcc/config/xtensa/lib2funcs.S b/gcc/config/xtensa/lib2funcs.S index 7e01a6ea6e19..16d6734c2773 100644 --- a/gcc/config/xtensa/lib2funcs.S +++ b/gcc/config/xtensa/lib2funcs.S @@ -1,5 +1,5 @@ /* Assembly functions for libgcc2. - Copyright (C) 2001, 2006 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc. Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. This file is part of GCC. @@ -151,6 +151,7 @@ __xtensa_nonlocal_goto: make sure that the modified instructions are loaded into the instruction fetch buffer. */ +/* Use the maximum trampoline size. Flushing a bit extra is OK. */ #define TRAMPOLINE_SIZE 60 .text diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h index 5ea777ca2328..82d7262922de 100644 --- a/gcc/config/xtensa/xtensa-protos.h +++ b/gcc/config/xtensa/xtensa-protos.h @@ -69,6 +69,7 @@ extern enum reg_class xtensa_preferred_reload_class (rtx, enum reg_class, int); extern enum reg_class xtensa_secondary_reload_class (enum reg_class, enum machine_mode, rtx, int); +extern void xtensa_initialize_trampoline (rtx, rtx, rtx); #endif /* RTX_CODE */ #ifdef TREE_CODE @@ -85,5 +86,6 @@ extern long compute_frame_size (int); extern int xtensa_frame_pointer_required (void); extern void xtensa_expand_prologue (void); extern void order_regs_for_local_alloc (void); +extern void xtensa_trampoline_template (FILE *); #endif /* !__XTENSA_PROTOS_H__ */ diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c index f98a66518692..62eadbb97982 100644 --- a/gcc/config/xtensa/xtensa.c +++ b/gcc/config/xtensa/xtensa.c @@ -2301,6 +2301,10 @@ xtensa_frame_pointer_required (void) } +/* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ +#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) + void xtensa_expand_prologue (void) { @@ -2379,7 +2383,7 @@ xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED, rtx xtensa_return_addr (int count, rtx frame) { - rtx result, retaddr; + rtx result, retaddr, curaddr, label; if (count == -1) retaddr = gen_rtx_REG (Pmode, A0_REG); @@ -2393,10 +2397,25 @@ xtensa_return_addr (int count, rtx frame) /* The 2 most-significant bits of the return address on Xtensa hold the register window size. To get the real return address, these - bits must be replaced with the high bits from the current PC. */ - + bits must be replaced with the high bits from some address in the + code. */ + + /* Get the 2 high bits of a local label in the code. */ + curaddr = gen_reg_rtx (Pmode); + label = gen_label_rtx (); + emit_label (label); + LABEL_PRESERVE_P (label) = 1; + emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label)); + emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30))); + emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30))); + + /* Clear the 2 high bits of the return address. */ result = gen_reg_rtx (Pmode); - emit_insn (gen_fix_return_addr (result, retaddr)); + emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2))); + emit_insn (gen_lshrsi3 (result, result, GEN_INT (2))); + + /* Combine them to get the result. */ + emit_insn (gen_iorsi3 (result, result, curaddr)); return result; } @@ -3126,4 +3145,95 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) > 4 * UNITS_PER_WORD); } + +/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY + instruction with a minimal stack frame in order to get some free + registers. Once the actual call target is known, the proper stack frame + size is extracted from the ENTRY instruction at the target and the + current frame is adjusted to match. The trampoline then transfers + control to the instruction following the ENTRY at the target. Note: + this assumes that the target begins with an ENTRY instruction. */ + +void +xtensa_trampoline_template (FILE *stream) +{ + bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS); + + fprintf (stream, "\t.begin no-transform\n"); + fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); + + if (use_call0) + { + /* Save the return address. */ + fprintf (stream, "\tmov\ta10, a0\n"); + + /* Use a CALL0 instruction to skip past the constants and in the + process get the PC into A0. This allows PC-relative access to + the constants without relying on L32R. */ + fprintf (stream, "\tcall0\t.Lskipconsts\n"); + } + else + fprintf (stream, "\tj\t.Lskipconsts\n"); + + fprintf (stream, "\t.align\t4\n"); + fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); + fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); + fprintf (stream, ".Lskipconsts:\n"); + + /* Load the static chain and function address from the trampoline. */ + if (use_call0) + { + fprintf (stream, "\taddi\ta0, a0, 3\n"); + fprintf (stream, "\tl32i\ta9, a0, 0\n"); + fprintf (stream, "\tl32i\ta8, a0, 4\n"); + } + else + { + fprintf (stream, "\tl32r\ta9, .Lchainval\n"); + fprintf (stream, "\tl32r\ta8, .Lfnaddr\n"); + } + + /* Store the static chain. */ + fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20); + + /* Set the proper stack pointer value. */ + fprintf (stream, "\tl32i\ta9, a8, 0\n"); + fprintf (stream, "\textui\ta9, a9, %d, 12\n", + TARGET_BIG_ENDIAN ? 8 : 12); + fprintf (stream, "\tslli\ta9, a9, 3\n"); + fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); + fprintf (stream, "\tsub\ta9, sp, a9\n"); + fprintf (stream, "\tmovsp\tsp, a9\n"); + + if (use_call0) + /* Restore the return address. */ + fprintf (stream, "\tmov\ta0, a10\n"); + + /* Jump to the instruction following the ENTRY. */ + fprintf (stream, "\taddi\ta8, a8, 3\n"); + fprintf (stream, "\tjx\ta8\n"); + + /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */ + if (use_call0) + fprintf (stream, "\t.byte\t0\n"); + else + fprintf (stream, "\tnop\n"); + + fprintf (stream, "\t.end no-transform\n"); +} + + +void +xtensa_initialize_trampoline (rtx addr, rtx func, rtx chain) +{ + bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS); + int chain_off = use_call0 ? 12 : 8; + int func_off = use_call0 ? 16 : 12; + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, chain_off)), chain); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, func_off)), func); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), + 0, VOIDmode, 1, addr, Pmode); +} + + #include "gt-xtensa.h" diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h index 79cd05f7fb25..7e0e9400aadb 100644 --- a/gcc/config/xtensa/xtensa.h +++ b/gcc/config/xtensa/xtensa.h @@ -72,6 +72,7 @@ extern unsigned xtensa_current_frame_size; #define TARGET_ADDX XCHAL_HAVE_ADDX #define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC #define TARGET_S32C1I XCHAL_HAVE_S32C1I +#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS #define TARGET_DEFAULT ( \ (XCHAL_HAVE_L32R ? 0 : MASK_CONST16)) @@ -704,83 +705,19 @@ typedef struct xtensa_args /* Stack pointer value doesn't matter at exit. */ #define EXIT_IGNORE_STACK 1 -/* A C statement to output, on the stream FILE, assembler code for a - block of data that contains the constant parts of a trampoline. - This code should not include a label--the label is taken care of - automatically. - - For Xtensa, the trampoline must perform an entry instruction with a - minimal stack frame in order to get some free registers. Once the - actual call target is known, the proper stack frame size is extracted - from the entry instruction at the target and the current frame is - adjusted to match. The trampoline then transfers control to the - instruction following the entry at the target. Note: this assumes - that the target begins with an entry instruction. */ - -/* minimum frame = reg save area (4 words) plus static chain (1 word) - and the total number of words must be a multiple of 128 bits */ -#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) - -#define TRAMPOLINE_TEMPLATE(STREAM) \ - do { \ - fprintf (STREAM, "\t.begin no-transform\n"); \ - fprintf (STREAM, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); \ - \ - /* save the return address */ \ - fprintf (STREAM, "\tmov\ta10, a0\n"); \ - \ - /* Use a CALL0 instruction to skip past the constants and in the \ - process get the PC into A0. This allows PC-relative access to \ - the constants without relying on L32R, which may not always be \ - available. */ \ - \ - fprintf (STREAM, "\tcall0\t.Lskipconsts\n"); \ - fprintf (STREAM, "\t.align\t4\n"); \ - fprintf (STREAM, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); \ - fprintf (STREAM, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); \ - fprintf (STREAM, ".Lskipconsts:\n"); \ - \ - /* store the static chain */ \ - fprintf (STREAM, "\taddi\ta0, a0, 3\n"); \ - fprintf (STREAM, "\tl32i\ta8, a0, 0\n"); \ - fprintf (STREAM, "\ts32i\ta8, sp, %d\n", MIN_FRAME_SIZE - 20); \ - \ - /* set the proper stack pointer value */ \ - fprintf (STREAM, "\tl32i\ta8, a0, 4\n"); \ - fprintf (STREAM, "\tl32i\ta9, a8, 0\n"); \ - fprintf (STREAM, "\textui\ta9, a9, %d, 12\n", \ - TARGET_BIG_ENDIAN ? 8 : 12); \ - fprintf (STREAM, "\tslli\ta9, a9, 3\n"); \ - fprintf (STREAM, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); \ - fprintf (STREAM, "\tsub\ta9, sp, a9\n"); \ - fprintf (STREAM, "\tmovsp\tsp, a9\n"); \ - \ - /* restore the return address */ \ - fprintf (STREAM, "\tmov\ta0, a10\n"); \ - \ - /* jump to the instruction following the entry */ \ - fprintf (STREAM, "\taddi\ta8, a8, 3\n"); \ - fprintf (STREAM, "\tjx\ta8\n"); \ - fprintf (STREAM, "\t.byte\t0\n"); \ - fprintf (STREAM, "\t.end no-transform\n"); \ - } while (0) +#define TRAMPOLINE_TEMPLATE(STREAM) xtensa_trampoline_template (STREAM) /* Size in bytes of the trampoline, as an integer. Make sure this is a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */ -#define TRAMPOLINE_SIZE 60 +#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52) /* Alignment required for trampolines, in bits. */ -#define TRAMPOLINE_ALIGNMENT (32) +#define TRAMPOLINE_ALIGNMENT 32 /* A C statement to initialize the variable parts of a trampoline. */ #define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN) \ - do { \ - rtx addr = ADDR; \ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 12)), CHAIN); \ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 16)), FUNC); \ - emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), \ - 0, VOIDmode, 1, addr, Pmode); \ - } while (0) + xtensa_initialize_trampoline (ADDR, FUNC, CHAIN) + /* If defined, a C expression that produces the machine-specific code to setup the stack so that arbitrary frames can be accessed. diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 37e29e700392..3774a61131a8 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -1666,21 +1666,6 @@ (set_attr "mode" "none") (set_attr "length" "0")]) -;; The fix_return_addr pattern sets the high 2 bits of an address in a -;; register to match the high bits of the current PC. -(define_insn "fix_return_addr" - [(set (match_operand:SI 0 "register_operand" "=a") - (unspec:SI [(match_operand:SI 1 "register_operand" "r")] - UNSPEC_RET_ADDR)) - (clobber (match_scratch:SI 2 "=r")) - (clobber (match_scratch:SI 3 "=r"))] - "" - "mov\t%2, a0\;call0\t0f\;.align\t4\;0:\;mov\t%3, a0\;mov\ta0, %2\;\ -srli\t%3, %3, 30\;slli\t%0, %1, 2\;ssai\t2\;src\t%0, %3, %0" - [(set_attr "type" "multi") - (set_attr "mode" "SI") - (set_attr "length" "24")]) - ;; Instructions for the Xtensa "boolean" option.