From 0aa0495ec3765878aad8f4d585797dea4455c16e Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 28 Feb 2004 23:44:41 +0000 Subject: [PATCH] pa.h (TRAMPOLINE_TEMPLATE, [...]): Backport fixes from 3.5. * pa.h (TRAMPOLINE_TEMPLATE, TRAMPOLINE_SIZE, TRAMPOLINE_CODE_SIZE, MIN_CACHELINE_SIZE, INITIALIZE_TRAMPOLINE): Backport fixes from 3.5. * pa.md (anddi3, dcacheflush, icacheflush): Likewise. From-SVN: r78639 --- gcc/ChangeLog | 4 ++ gcc/config/pa/pa.h | 141 +++++++++++++++++++++++++++++--------------- gcc/config/pa/pa.md | 73 +++++++++++++++++------ 3 files changed, 153 insertions(+), 65 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 33d563b0220a..afe301a72807 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,9 @@ 2004-02-28 John David Anglin + * pa.h (TRAMPOLINE_TEMPLATE, TRAMPOLINE_SIZE, TRAMPOLINE_CODE_SIZE, + MIN_CACHELINE_SIZE, INITIALIZE_TRAMPOLINE): Backport fixes from 3.5. + * pa.md (anddi3, dcacheflush, icacheflush): Likewise. + * pa64-hpux.h (LIB_SPEC): Fix linking under HP-UX 11.00 with -p and -pg. 2004-02-27 Ian Lance Taylor diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index a36b6b2572b6..0f7245fe95fb 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -991,7 +991,7 @@ extern int may_call_alloca; #define TRAMPOLINE_TEMPLATE(FILE) \ { \ - if (! TARGET_64BIT) \ + if (!TARGET_64BIT) \ { \ fputs ("\tldw 36(%r22),%r21\n", FILE); \ fputs ("\tbb,>=,n %r21,30,.+16\n", FILE); \ @@ -1001,10 +1001,20 @@ extern int may_call_alloca; fputs ("\tdepwi 0,31,2,%r21\n", FILE); \ fputs ("\tldw 4(%r21),%r19\n", FILE); \ fputs ("\tldw 0(%r21),%r21\n", FILE); \ - fputs ("\tldsid (%r21),%r1\n", FILE); \ - fputs ("\tmtsp %r1,%sr0\n", FILE); \ - fputs ("\tbe 0(%sr0,%r21)\n", FILE); \ - fputs ("\tldw 40(%r22),%r29\n", FILE); \ + if (TARGET_PA_20) \ + { \ + fputs ("\tbve (%r21)\n", FILE); \ + fputs ("\tldw 40(%r22),%r29\n", FILE); \ + fputs ("\t.word 0\n", FILE); \ + fputs ("\t.word 0\n", FILE); \ + } \ + else \ + { \ + fputs ("\tldsid (%r21),%r1\n", FILE); \ + fputs ("\tmtsp %r1,%sr0\n", FILE); \ + fputs ("\tbe 0(%sr0,%r21)\n", FILE); \ + fputs ("\tldw 40(%r22),%r29\n", FILE); \ + } \ fputs ("\t.word 0\n", FILE); \ fputs ("\t.word 0\n", FILE); \ fputs ("\t.word 0\n", FILE); \ @@ -1027,16 +1037,21 @@ extern int may_call_alloca; } \ } -/* Length in units of the trampoline for entering a nested function. +/* Length in units of the trampoline for entering a nested function. */ - Flush the cache entries corresponding to the first and last addresses - of the trampoline. This is necessary as the trampoline may cross two - cache lines. +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) - If the code part of the trampoline ever grows to > 32 bytes, then it - will become necessary to hack on the cacheflush pattern in pa.md. */ +/* Length in units of the trampoline instruction code. */ -#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) + +/* Minimum length of a cache line. A length of 16 will work on all + PA-RISC processors. All PA 1.1 processors have a cache line of + 32 bytes. Most but not all PA 2.0 processors have a cache line + of 64 bytes. As cache flushes are expensive and we don't support + PA 1.0, we use a minimum length of 32. */ + +#define MIN_CACHELINE_SIZE 32 /* Emit RTL insns to initialize the variable parts of a trampoline. FNADDR is an RTX for the address of the function's pure code. @@ -1046,55 +1061,85 @@ extern int may_call_alloca; Move the static chain value to trampoline template at offset 40. Move the trampoline address to trampoline template at offset 44. Move r19 to trampoline template at offset 48. The latter two - words create a plabel for the indirect call to the trampoline. */ + words create a plabel for the indirect call to the trampoline. + + A similar sequence is used for the 64-bit port but the plabel is + at the beginning of the trampoline. + + Finally, the cache entries for the trampoline code are flushed. + This is necessary to ensure that the trampoline instruction sequence + is written to memory prior to any attempts at prefetching the code + sequence. */ #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ { \ - if (! TARGET_64BIT) \ + rtx start_addr = gen_reg_rtx (Pmode); \ + rtx end_addr = gen_reg_rtx (Pmode); \ + rtx line_length = gen_reg_rtx (Pmode); \ + rtx tmp; \ + \ + if (!TARGET_64BIT) \ { \ - rtx start_addr, end_addr; \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 36)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 40)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \ \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 36)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 40)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 44)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (TRAMP)); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 48)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), \ + /* Create a fat pointer for the trampoline. */ \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 44)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), (TRAMP)); \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 48)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), \ gen_rtx_REG (Pmode, 19)); \ + \ /* fdc and fic only use registers for the address to flush, \ - they do not accept integer displacements. */ \ - start_addr = force_reg (Pmode, (TRAMP)); \ - end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \ - emit_insn (gen_dcacheflush (start_addr, end_addr)); \ - end_addr = force_reg (Pmode, plus_constant (start_addr, 32)); \ - emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \ - gen_reg_rtx (Pmode), gen_reg_rtx (Pmode)));\ + they do not accept integer displacements. We align the \ + start and end addresses to the beginning of their respective \ + cache lines to minimize the number of lines flushed. */ \ + tmp = force_reg (Pmode, (TRAMP)); \ + emit_insn (gen_andsi3 (start_addr, tmp, \ + GEN_INT (-MIN_CACHELINE_SIZE))); \ + tmp = force_reg (Pmode, \ + plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \ + emit_insn (gen_andsi3 (end_addr, tmp, \ + GEN_INT (-MIN_CACHELINE_SIZE))); \ + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \ + emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \ + emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \ + gen_reg_rtx (Pmode), \ + gen_reg_rtx (Pmode))); \ } \ else \ { \ - rtx start_addr, end_addr; \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 56)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 64)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \ \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 56)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 64)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \ /* Create a fat pointer for the trampoline. */ \ - end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 16)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \ - end_addr = gen_rtx_REG (Pmode, 27); \ - start_addr = memory_address (Pmode, plus_constant ((TRAMP), 24)); \ - emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 16)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), \ + force_reg (Pmode, plus_constant ((TRAMP), 32))); \ + tmp = memory_address (Pmode, plus_constant ((TRAMP), 24)); \ + emit_move_insn (gen_rtx_MEM (Pmode, tmp), \ + gen_rtx_REG (Pmode, 27)); \ + \ /* fdc and fic only use registers for the address to flush, \ - they do not accept integer displacements. */ \ - start_addr = force_reg (Pmode, (TRAMP)); \ - end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \ - emit_insn (gen_dcacheflush (start_addr, end_addr)); \ - end_addr = force_reg (Pmode, plus_constant (start_addr, 32)); \ - emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \ - gen_reg_rtx (Pmode), gen_reg_rtx (Pmode)));\ + they do not accept integer displacements. We align the \ + start and end addresses to the beginning of their respective \ + cache lines to minimize the number of lines flushed. */ \ + tmp = force_reg (Pmode, plus_constant ((TRAMP), 32)); \ + emit_insn (gen_anddi3 (start_addr, tmp, \ + GEN_INT (-MIN_CACHELINE_SIZE))); \ + tmp = force_reg (Pmode, \ + plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \ + emit_insn (gen_anddi3 (end_addr, tmp, \ + GEN_INT (-MIN_CACHELINE_SIZE))); \ + emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \ + emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \ + emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \ + gen_reg_rtx (Pmode), \ + gen_reg_rtx (Pmode))); \ } \ } diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 543a94053312..4886b3ca06fd 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -4390,10 +4390,20 @@ "" " { - if (! register_operand (operands[1], DImode) - || ! register_operand (operands[2], DImode)) - /* Let GCC break this into word-at-a-time operations. */ - FAIL; + if (TARGET_64BIT) + { + /* One operand must be a register operand. */ + if (!register_operand (operands[1], DImode) + && !register_operand (operands[2], DImode)) + FAIL; + } + else + { + /* Both operands must be register operands. */ + if (!register_operand (operands[1], DImode) + || !register_operand (operands[2], DImode)) + FAIL; + } }") (define_insn "" @@ -7769,29 +7779,58 @@ return \"\"; }") -;; Flush the I and D cache line found at the address in operand 0. +;; Flush the I and D cache lines from the start address (operand0) +;; to the end address (operand1). No lines are flushed if the end +;; address is less than the start address (unsigned). +;; +;; Because the range of memory flushed is variable and the size of +;; a MEM can only be a CONST_INT, the patterns specify that they +;; perform an unspecified volatile operation on all memory. +;; +;; The address range for an icache flush must lie within a single +;; space on targets with non-equivalent space registers. +;; ;; This is used by the trampoline code for nested functions. -;; So long as the trampoline itself is less than 32 bytes this -;; is sufficient. - +;; +;; Operand 0 contains the start address. +;; Operand 1 contains the end address. +;; Operand 2 contains the line length to use. +;; Operands 3 and 4 (icacheflush) are clobbered scratch registers. (define_insn "dcacheflush" - [(unspec_volatile [(const_int 1)] 0) - (use (mem:SI (match_operand 0 "pmode_register_operand" "r"))) - (use (mem:SI (match_operand 1 "pmode_register_operand" "r")))] + [(const_int 1) + (unspec_volatile [(mem:BLK (scratch))] 0) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) + (use (match_operand 2 "pmode_register_operand" "r")) + (clobber (match_scratch 3 "=&0"))] "" - "fdc 0(%0)\;fdc 0(%1)\;sync" + "* +{ + if (TARGET_64BIT) + return \"cmpb,*<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\"; + else + return \"cmpb,<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\"; +}" [(set_attr "type" "multi") (set_attr "length" "12")]) (define_insn "icacheflush" - [(unspec_volatile [(const_int 2)] 0) - (use (mem:SI (match_operand 0 "pmode_register_operand" "r"))) - (use (mem:SI (match_operand 1 "pmode_register_operand" "r"))) + [(const_int 2) + (unspec_volatile [(mem:BLK (scratch))] 0) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) (use (match_operand 2 "pmode_register_operand" "r")) (clobber (match_operand 3 "pmode_register_operand" "=&r")) - (clobber (match_operand 4 "pmode_register_operand" "=&r"))] + (clobber (match_operand 4 "pmode_register_operand" "=&r")) + (clobber (match_scratch 5 "=&0"))] "" - "mfsp %%sr0,%4\;ldsid (%2),%3\;mtsp %3,%%sr0\;fic 0(%%sr0,%0)\;fic 0(%%sr0,%1)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop" + "* +{ + if (TARGET_64BIT) + return \"mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,*<<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\"; + else + return \"mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\"; +}" [(set_attr "type" "multi") (set_attr "length" "52")]) -- 2.47.2