]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
Merge basic-improvements-branch to trunk
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48
49 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
50
51 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
52 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
53
54 /* These are some macros to abstract register modes. */
55 #define CONST_OK_FOR_ADD(size) \
56 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
57 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
58 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
59 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
60
61 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
62 int current_function_interrupt;
63
64 /* ??? The pragma interrupt support will not work for SH3. */
65 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
66 output code for the next function appropriate for an interrupt handler. */
67 int pragma_interrupt;
68
69 /* This is set by the trap_exit attribute for functions. It specifies
70 a trap number to be used in a trapa instruction at function exit
71 (instead of an rte instruction). */
72 int trap_exit;
73
74 /* This is used by the sp_switch attribute for functions. It specifies
75 a variable holding the address of the stack the interrupt function
76 should switch to/from at entry/exit. */
77 rtx sp_switch;
78
79 /* This is set by #pragma trapa, and is similar to the above, except that
80 the compiler doesn't emit code to preserve all registers. */
81 static int pragma_trapa;
82
83 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
84 which has a separate set of low regs for User and Supervisor modes.
85 This should only be used for the lowest level of interrupts. Higher levels
86 of interrupts must save the registers in case they themselves are
87 interrupted. */
88 int pragma_nosave_low_regs;
89
90 /* This is used for communication between SETUP_INCOMING_VARARGS and
91 sh_expand_prologue. */
92 int current_function_anonymous_args;
93
94 /* Global variables for machine-dependent things. */
95
96 /* Which cpu are we scheduling for. */
97 enum processor_type sh_cpu;
98
99 /* Saved operands from the last compare to use when we generate an scc
100 or bcc insn. */
101
102 rtx sh_compare_op0;
103 rtx sh_compare_op1;
104
105 /* Provides the class number of the smallest class containing
106 reg number. */
107
108 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
109 {
110 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
143 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
144 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
145 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
146 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
147 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
148 GENERAL_REGS,
149 };
150
151 char sh_register_names[FIRST_PSEUDO_REGISTER] \
152 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
153
154 char sh_additional_register_names[ADDREGNAMES_SIZE] \
155 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
156 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
157
158 /* Provide reg_class from a letter such as appears in the machine
159 description. *: target independently reserved letter.
160 reg_class_from_letter['e'] is set to NO_REGS for TARGET_FMOVD. */
161
162 enum reg_class reg_class_from_letter[] =
163 {
164 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
165 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
166 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
167 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
168 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
169 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
170 /* y */ FPUL_REGS, /* z */ R0_REGS
171 };
172
173 int assembler_dialect;
174
175 static void split_branches PARAMS ((rtx));
176 static int branch_dest PARAMS ((rtx));
177 static void force_into PARAMS ((rtx, rtx));
178 static void print_slot PARAMS ((rtx));
179 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
180 static void dump_table PARAMS ((rtx));
181 static int hi_const PARAMS ((rtx));
182 static int broken_move PARAMS ((rtx));
183 static int mova_p PARAMS ((rtx));
184 static rtx find_barrier PARAMS ((int, rtx, rtx));
185 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
186 static rtx gen_block_redirect PARAMS ((rtx, int, int));
187 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
188 static rtx frame_insn PARAMS ((rtx));
189 static rtx push PARAMS ((int));
190 static void pop PARAMS ((int));
191 static void push_regs PARAMS ((HOST_WIDE_INT *));
192 static void calc_live_regs PARAMS ((int *, HOST_WIDE_INT *));
193 static void mark_use PARAMS ((rtx, rtx *));
194 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
195 static rtx mark_constant_pool_use PARAMS ((rtx));
196 const struct attribute_spec sh_attribute_table[];
197 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
198 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
199 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
200 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
201 static void sh_insert_attributes PARAMS ((tree, tree *));
202 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
203 static int sh_use_dfa_interface PARAMS ((void));
204 static int sh_issue_rate PARAMS ((void));
205 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
206
207 static bool sh_cannot_modify_jumps_p PARAMS ((void));
208 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
209
210 static void sh_encode_section_info PARAMS ((tree, int));
211 static const char *sh_strip_name_encoding PARAMS ((const char *));
212 static void sh_init_builtins PARAMS ((void));
213 static void sh_media_init_builtins PARAMS ((void));
214 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
215 static int flow_dependent_p PARAMS ((rtx, rtx));
216 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
217
218 \f
219 /* Initialize the GCC target structure. */
220 #undef TARGET_ATTRIBUTE_TABLE
221 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
222
223 /* The next two are used for debug info when compiling with -gdwarf. */
224 #undef TARGET_ASM_UNALIGNED_HI_OP
225 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
226 #undef TARGET_ASM_UNALIGNED_SI_OP
227 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
228
229 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
230 #undef TARGET_ASM_UNALIGNED_DI_OP
231 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
232 #undef TARGET_ASM_ALIGNED_DI_OP
233 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
234
235 #undef TARGET_ASM_FUNCTION_EPILOGUE
236 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
237
238 #undef TARGET_INSERT_ATTRIBUTES
239 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
240
241 #undef TARGET_SCHED_ADJUST_COST
242 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
243
244 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
245 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
246 sh_use_dfa_interface
247 #undef TARGET_SCHED_ISSUE_RATE
248 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
249
250 #undef TARGET_CANNOT_MODIFY_JUMPS_P
251 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
252
253 #undef TARGET_MS_BITFIELD_LAYOUT_P
254 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
255
256 #undef TARGET_ENCODE_SECTION_INFO
257 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
258 #undef TARGET_STRIP_NAME_ENCODING
259 #define TARGET_STRIP_NAME_ENCODING sh_strip_name_encoding
260
261 #undef TARGET_INIT_BUILTINS
262 #define TARGET_INIT_BUILTINS sh_init_builtins
263 #undef TARGET_EXPAND_BUILTIN
264 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
265
266 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
267 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
268
269 struct gcc_target targetm = TARGET_INITIALIZER;
270 \f
271 /* Print the operand address in x to the stream. */
272
273 void
274 print_operand_address (stream, x)
275 FILE *stream;
276 rtx x;
277 {
278 switch (GET_CODE (x))
279 {
280 case REG:
281 case SUBREG:
282 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
283 break;
284
285 case PLUS:
286 {
287 rtx base = XEXP (x, 0);
288 rtx index = XEXP (x, 1);
289
290 switch (GET_CODE (index))
291 {
292 case CONST_INT:
293 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
294 reg_names[true_regnum (base)]);
295 break;
296
297 case REG:
298 case SUBREG:
299 {
300 int base_num = true_regnum (base);
301 int index_num = true_regnum (index);
302
303 fprintf (stream, "@(r0,%s)",
304 reg_names[MAX (base_num, index_num)]);
305 break;
306 }
307
308 default:
309 debug_rtx (x);
310 abort ();
311 }
312 }
313 break;
314
315 case PRE_DEC:
316 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
317 break;
318
319 case POST_INC:
320 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
321 break;
322
323 default:
324 x = mark_constant_pool_use (x);
325 output_addr_const (stream, x);
326 break;
327 }
328 }
329
330 /* Print operand x (an rtx) in assembler syntax to file stream
331 according to modifier code.
332
333 '.' print a .s if insn needs delay slot
334 ',' print LOCAL_LABEL_PREFIX
335 '@' print trap, rte or rts depending upon pragma interruptness
336 '#' output a nop if there is nothing to put in the delay slot
337 ''' print likelyhood suffix (/u for unlikely).
338 'O' print a constant without the #
339 'R' print the LSW of a dp value - changes if in little endian
340 'S' print the MSW of a dp value - changes if in little endian
341 'T' print the next word of a dp value - same as 'R' in big endian mode.
342 'M' print an `x' if `m' will print `base,index'.
343 'N' print 'r63' if the operand is (const_int 0).
344 'm' print a pair `base,offset' or `base,index', for LD and ST.
345 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
346 'o' output an operator. */
347
348 void
349 print_operand (stream, x, code)
350 FILE *stream;
351 rtx x;
352 int code;
353 {
354 switch (code)
355 {
356 case '.':
357 if (final_sequence
358 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
359 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
360 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
361 break;
362 case ',':
363 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
364 break;
365 case '@':
366 if (trap_exit)
367 fprintf (stream, "trapa #%d", trap_exit);
368 else if (sh_cfun_interrupt_handler_p ())
369 fprintf (stream, "rte");
370 else
371 fprintf (stream, "rts");
372 break;
373 case '#':
374 /* Output a nop if there's nothing in the delay slot. */
375 if (dbr_sequence_length () == 0)
376 fprintf (stream, "\n\tnop");
377 break;
378 case '\'':
379 {
380 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
381
382 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
383 fputs ("/u", stream);
384 break;
385 }
386 case 'O':
387 x = mark_constant_pool_use (x);
388 output_addr_const (stream, x);
389 break;
390 case 'R':
391 fputs (reg_names[REGNO (x) + LSW], (stream));
392 break;
393 case 'S':
394 fputs (reg_names[REGNO (x) + MSW], (stream));
395 break;
396 case 'T':
397 /* Next word of a double. */
398 switch (GET_CODE (x))
399 {
400 case REG:
401 fputs (reg_names[REGNO (x) + 1], (stream));
402 break;
403 case MEM:
404 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
405 && GET_CODE (XEXP (x, 0)) != POST_INC)
406 x = adjust_address (x, SImode, 4);
407 print_operand_address (stream, XEXP (x, 0));
408 break;
409 default:
410 break;
411 }
412 break;
413 case 'o':
414 switch (GET_CODE (x))
415 {
416 case PLUS: fputs ("add", stream); break;
417 case MINUS: fputs ("sub", stream); break;
418 case MULT: fputs ("mul", stream); break;
419 case DIV: fputs ("div", stream); break;
420 case EQ: fputs ("eq", stream); break;
421 case NE: fputs ("ne", stream); break;
422 case GT: case LT: fputs ("gt", stream); break;
423 case GE: case LE: fputs ("ge", stream); break;
424 case GTU: case LTU: fputs ("gtu", stream); break;
425 case GEU: case LEU: fputs ("geu", stream); break;
426 default:
427 break;
428 }
429 break;
430 case 'M':
431 if (GET_CODE (x) == MEM
432 && GET_CODE (XEXP (x, 0)) == PLUS
433 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
434 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
435 fputc ('x', stream);
436 break;
437
438 case 'm':
439 if (GET_CODE (x) != MEM)
440 abort ();
441 x = XEXP (x, 0);
442 switch (GET_CODE (x))
443 {
444 case REG:
445 case SUBREG:
446 print_operand (stream, x, 0);
447 fputs (", 0", stream);
448 break;
449
450 case PLUS:
451 print_operand (stream, XEXP (x, 0), 0);
452 fputs (", ", stream);
453 print_operand (stream, XEXP (x, 1), 0);
454 break;
455
456 default:
457 abort ();
458 }
459 break;
460
461 case 'N':
462 if (x == CONST0_RTX (GET_MODE (x)))
463 {
464 fprintf ((stream), "r63");
465 break;
466 }
467 goto default_output;
468 case 'u':
469 if (GET_CODE (x) == CONST_INT)
470 {
471 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
472 break;
473 }
474 /* Fall through. */
475
476 default_output:
477 default:
478 switch (GET_CODE (x))
479 {
480 /* FIXME: We need this on SHmedia32 because reload generates
481 some sign-extended HI or QI loads into DImode registers
482 but, because Pmode is SImode, the address ends up with a
483 subreg:SI of the DImode register. Maybe reload should be
484 fixed so as to apply alter_subreg to such loads? */
485 case SUBREG:
486 if (SUBREG_BYTE (x) != 0
487 || GET_CODE (SUBREG_REG (x)) != REG)
488 abort ();
489
490 x = SUBREG_REG (x);
491 /* Fall through. */
492
493 case REG:
494 if (FP_REGISTER_P (REGNO (x))
495 && GET_MODE (x) == V16SFmode)
496 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
497 else if (FP_REGISTER_P (REGNO (x))
498 && GET_MODE (x) == V4SFmode)
499 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
500 else if (GET_CODE (x) == REG
501 && GET_MODE (x) == V2SFmode)
502 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
503 else if (FP_REGISTER_P (REGNO (x))
504 && GET_MODE_SIZE (GET_MODE (x)) > 4)
505 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
506 else
507 fputs (reg_names[REGNO (x)], (stream));
508 break;
509
510 case MEM:
511 output_address (XEXP (x, 0));
512 break;
513
514 case CONST:
515 if (TARGET_SHMEDIA
516 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
517 && GET_MODE (XEXP (x, 0)) == DImode
518 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
519 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
520 {
521 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
522
523 fputc ('(', stream);
524 if (GET_CODE (val) == ASHIFTRT)
525 {
526 fputc ('(', stream);
527 if (GET_CODE (XEXP (val, 0)) == CONST)
528 fputc ('(', stream);
529 output_addr_const (stream, XEXP (val, 0));
530 if (GET_CODE (XEXP (val, 0)) == CONST)
531 fputc (')', stream);
532 fputs (" >> ", stream);
533 output_addr_const (stream, XEXP (val, 1));
534 fputc (')', stream);
535 }
536 else
537 {
538 if (GET_CODE (val) == CONST)
539 fputc ('(', stream);
540 output_addr_const (stream, val);
541 if (GET_CODE (val) == CONST)
542 fputc (')', stream);
543 }
544 fputs (" & 65535)", stream);
545 break;
546 }
547
548 /* Fall through. */
549 default:
550 if (TARGET_SH1)
551 fputc ('#', stream);
552 output_addr_const (stream, x);
553 break;
554 }
555 break;
556 }
557 }
558 \f
559 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
560 static void
561 force_into (value, target)
562 rtx value, target;
563 {
564 value = force_operand (value, target);
565 if (! rtx_equal_p (value, target))
566 emit_insn (gen_move_insn (target, value));
567 }
568
569 /* Emit code to perform a block move. Choose the best method.
570
571 OPERANDS[0] is the destination.
572 OPERANDS[1] is the source.
573 OPERANDS[2] is the size.
574 OPERANDS[3] is the alignment safe to use. */
575
576 int
577 expand_block_move (operands)
578 rtx *operands;
579 {
580 int align = INTVAL (operands[3]);
581 int constp = (GET_CODE (operands[2]) == CONST_INT);
582 int bytes = (constp ? INTVAL (operands[2]) : 0);
583
584 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
585 alignment, or if it isn't a multiple of 4 bytes, then fail. */
586 if (! constp || align < 4 || (bytes % 4 != 0))
587 return 0;
588
589 if (TARGET_HARD_SH4)
590 {
591 if (bytes < 12)
592 return 0;
593 else if (bytes == 12)
594 {
595 tree entry_name;
596 rtx sym;
597 rtx func_addr_rtx;
598 rtx r4 = gen_rtx (REG, SImode, 4);
599 rtx r5 = gen_rtx (REG, SImode, 5);
600
601 entry_name = get_identifier ("__movstrSI12_i4");
602
603 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
604 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
605 force_into (XEXP (operands[0], 0), r4);
606 force_into (XEXP (operands[1], 0), r5);
607 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
608 return 1;
609 }
610 else if (! TARGET_SMALLCODE)
611 {
612 tree entry_name;
613 rtx sym;
614 rtx func_addr_rtx;
615 int dwords;
616 rtx r4 = gen_rtx (REG, SImode, 4);
617 rtx r5 = gen_rtx (REG, SImode, 5);
618 rtx r6 = gen_rtx (REG, SImode, 6);
619
620 entry_name = get_identifier (bytes & 4
621 ? "__movstr_i4_odd"
622 : "__movstr_i4_even");
623 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
624 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
625 force_into (XEXP (operands[0], 0), r4);
626 force_into (XEXP (operands[1], 0), r5);
627
628 dwords = bytes >> 3;
629 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
630 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
631 return 1;
632 }
633 else
634 return 0;
635 }
636 if (bytes < 64)
637 {
638 char entry[30];
639 tree entry_name;
640 rtx sym;
641 rtx func_addr_rtx;
642 rtx r4 = gen_rtx_REG (SImode, 4);
643 rtx r5 = gen_rtx_REG (SImode, 5);
644
645 sprintf (entry, "__movstrSI%d", bytes);
646 entry_name = get_identifier (entry);
647 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
648 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
649 force_into (XEXP (operands[0], 0), r4);
650 force_into (XEXP (operands[1], 0), r5);
651 emit_insn (gen_block_move_real (func_addr_rtx));
652 return 1;
653 }
654
655 /* This is the same number of bytes as a memcpy call, but to a different
656 less common function name, so this will occasionally use more space. */
657 if (! TARGET_SMALLCODE)
658 {
659 tree entry_name;
660 rtx sym;
661 rtx func_addr_rtx;
662 int final_switch, while_loop;
663 rtx r4 = gen_rtx_REG (SImode, 4);
664 rtx r5 = gen_rtx_REG (SImode, 5);
665 rtx r6 = gen_rtx_REG (SImode, 6);
666
667 entry_name = get_identifier ("__movstr");
668 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
669 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
670 force_into (XEXP (operands[0], 0), r4);
671 force_into (XEXP (operands[1], 0), r5);
672
673 /* r6 controls the size of the move. 16 is decremented from it
674 for each 64 bytes moved. Then the negative bit left over is used
675 as an index into a list of move instructions. e.g., a 72 byte move
676 would be set up with size(r6) = 14, for one iteration through the
677 big while loop, and a switch of -2 for the last part. */
678
679 final_switch = 16 - ((bytes / 4) % 16);
680 while_loop = ((bytes / 4) / 16 - 1) * 16;
681 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
682 emit_insn (gen_block_lump_real (func_addr_rtx));
683 return 1;
684 }
685
686 return 0;
687 }
688
689 /* Prepare operands for a move define_expand; specifically, one of the
690 operands must be in a register. */
691
692 int
693 prepare_move_operands (operands, mode)
694 rtx operands[];
695 enum machine_mode mode;
696 {
697 if ((mode == SImode || mode == DImode) && flag_pic)
698 {
699 rtx temp;
700 if (SYMBOLIC_CONST_P (operands[1]))
701 {
702 if (GET_CODE (operands[0]) == MEM)
703 operands[1] = force_reg (Pmode, operands[1]);
704 else if (TARGET_SHMEDIA
705 && GET_CODE (operands[1]) == LABEL_REF
706 && target_reg_operand (operands[0], mode))
707 /* It's ok. */;
708 else
709 {
710 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
711 operands[1] = legitimize_pic_address (operands[1], mode, temp);
712 }
713 }
714 else if (GET_CODE (operands[1]) == CONST
715 && GET_CODE (XEXP (operands[1], 0)) == PLUS
716 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
717 {
718 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
719 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
720 mode, temp);
721 operands[1] = expand_binop (mode, add_optab, temp,
722 XEXP (XEXP (operands[1], 0), 1),
723 no_new_pseudos ? temp
724 : gen_reg_rtx (Pmode),
725 0, OPTAB_LIB_WIDEN);
726 }
727 }
728
729 if (! reload_in_progress && ! reload_completed)
730 {
731 /* Copy the source to a register if both operands aren't registers. */
732 if (! register_operand (operands[0], mode)
733 && ! sh_register_operand (operands[1], mode))
734 operands[1] = copy_to_mode_reg (mode, operands[1]);
735
736 /* This case can happen while generating code to move the result
737 of a library call to the target. Reject `st r0,@(rX,rY)' because
738 reload will fail to find a spill register for rX, since r0 is already
739 being used for the source. */
740 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
741 && GET_CODE (operands[0]) == MEM
742 && GET_CODE (XEXP (operands[0], 0)) == PLUS
743 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
744 operands[1] = copy_to_mode_reg (mode, operands[1]);
745 }
746
747 return 0;
748 }
749
750 /* Prepare the operands for an scc instruction; make sure that the
751 compare has been done. */
752 rtx
753 prepare_scc_operands (code)
754 enum rtx_code code;
755 {
756 rtx t_reg = gen_rtx_REG (SImode, T_REG);
757 enum rtx_code oldcode = code;
758 enum machine_mode mode;
759
760 /* First need a compare insn. */
761 switch (code)
762 {
763 case NE:
764 /* It isn't possible to handle this case. */
765 abort ();
766 case LT:
767 code = GT;
768 break;
769 case LE:
770 code = GE;
771 break;
772 case LTU:
773 code = GTU;
774 break;
775 case LEU:
776 code = GEU;
777 break;
778 default:
779 break;
780 }
781 if (code != oldcode)
782 {
783 rtx tmp = sh_compare_op0;
784 sh_compare_op0 = sh_compare_op1;
785 sh_compare_op1 = tmp;
786 }
787
788 mode = GET_MODE (sh_compare_op0);
789 if (mode == VOIDmode)
790 mode = GET_MODE (sh_compare_op1);
791
792 sh_compare_op0 = force_reg (mode, sh_compare_op0);
793 if ((code != EQ && code != NE
794 && (sh_compare_op1 != const0_rtx
795 || code == GTU || code == GEU || code == LTU || code == LEU))
796 || (mode == DImode && sh_compare_op1 != const0_rtx)
797 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
798 sh_compare_op1 = force_reg (mode, sh_compare_op1);
799
800 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
801 (mode == SFmode ? emit_sf_insn : emit_df_insn)
802 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
803 gen_rtx (SET, VOIDmode, t_reg,
804 gen_rtx (code, SImode,
805 sh_compare_op0, sh_compare_op1)),
806 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
807 else
808 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
809 gen_rtx (code, SImode, sh_compare_op0,
810 sh_compare_op1)));
811
812 return t_reg;
813 }
814
815 /* Called from the md file, set up the operands of a compare instruction. */
816
817 void
818 from_compare (operands, code)
819 rtx *operands;
820 int code;
821 {
822 enum machine_mode mode = GET_MODE (sh_compare_op0);
823 rtx insn;
824 if (mode == VOIDmode)
825 mode = GET_MODE (sh_compare_op1);
826 if (code != EQ
827 || mode == DImode
828 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
829 {
830 /* Force args into regs, since we can't use constants here. */
831 sh_compare_op0 = force_reg (mode, sh_compare_op0);
832 if (sh_compare_op1 != const0_rtx
833 || code == GTU || code == GEU
834 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
835 sh_compare_op1 = force_reg (mode, sh_compare_op1);
836 }
837 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
838 {
839 from_compare (operands, GT);
840 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
841 }
842 else
843 insn = gen_rtx_SET (VOIDmode,
844 gen_rtx_REG (SImode, T_REG),
845 gen_rtx (code, SImode, sh_compare_op0,
846 sh_compare_op1));
847 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
848 {
849 insn = gen_rtx (PARALLEL, VOIDmode,
850 gen_rtvec (2, insn,
851 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
852 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
853 }
854 else
855 emit_insn (insn);
856 }
857 \f
858 /* Functions to output assembly code. */
859
860 /* Return a sequence of instructions to perform DI or DF move.
861
862 Since the SH cannot move a DI or DF in one instruction, we have
863 to take care when we see overlapping source and dest registers. */
864
865 const char *
866 output_movedouble (insn, operands, mode)
867 rtx insn ATTRIBUTE_UNUSED;
868 rtx operands[];
869 enum machine_mode mode;
870 {
871 rtx dst = operands[0];
872 rtx src = operands[1];
873
874 if (GET_CODE (dst) == MEM
875 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
876 return "mov.l %T1,%0\n\tmov.l %1,%0";
877
878 if (register_operand (dst, mode)
879 && register_operand (src, mode))
880 {
881 if (REGNO (src) == MACH_REG)
882 return "sts mach,%S0\n\tsts macl,%R0";
883
884 /* When mov.d r1,r2 do r2->r3 then r1->r2;
885 when mov.d r1,r0 do r1->r0 then r2->r1. */
886
887 if (REGNO (src) + 1 == REGNO (dst))
888 return "mov %T1,%T0\n\tmov %1,%0";
889 else
890 return "mov %1,%0\n\tmov %T1,%T0";
891 }
892 else if (GET_CODE (src) == CONST_INT)
893 {
894 if (INTVAL (src) < 0)
895 output_asm_insn ("mov #-1,%S0", operands);
896 else
897 output_asm_insn ("mov #0,%S0", operands);
898
899 return "mov %1,%R0";
900 }
901 else if (GET_CODE (src) == MEM)
902 {
903 int ptrreg = -1;
904 int dreg = REGNO (dst);
905 rtx inside = XEXP (src, 0);
906
907 if (GET_CODE (inside) == REG)
908 ptrreg = REGNO (inside);
909 else if (GET_CODE (inside) == SUBREG)
910 ptrreg = subreg_regno (inside);
911 else if (GET_CODE (inside) == PLUS)
912 {
913 ptrreg = REGNO (XEXP (inside, 0));
914 /* ??? A r0+REG address shouldn't be possible here, because it isn't
915 an offsettable address. Unfortunately, offsettable addresses use
916 QImode to check the offset, and a QImode offsettable address
917 requires r0 for the other operand, which is not currently
918 supported, so we can't use the 'o' constraint.
919 Thus we must check for and handle r0+REG addresses here.
920 We punt for now, since this is likely very rare. */
921 if (GET_CODE (XEXP (inside, 1)) == REG)
922 abort ();
923 }
924 else if (GET_CODE (inside) == LABEL_REF)
925 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
926 else if (GET_CODE (inside) == POST_INC)
927 return "mov.l %1,%0\n\tmov.l %1,%T0";
928 else
929 abort ();
930
931 /* Work out the safe way to copy. Copy into the second half first. */
932 if (dreg == ptrreg)
933 return "mov.l %T1,%T0\n\tmov.l %1,%0";
934 }
935
936 return "mov.l %1,%0\n\tmov.l %T1,%T0";
937 }
938
939 /* Print an instruction which would have gone into a delay slot after
940 another instruction, but couldn't because the other instruction expanded
941 into a sequence where putting the slot insn at the end wouldn't work. */
942
943 static void
944 print_slot (insn)
945 rtx insn;
946 {
947 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
948
949 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
950 }
951
952 const char *
953 output_far_jump (insn, op)
954 rtx insn;
955 rtx op;
956 {
957 struct { rtx lab, reg, op; } this;
958 rtx braf_base_lab = NULL_RTX;
959 const char *jump;
960 int far;
961 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
962
963 this.lab = gen_label_rtx ();
964
965 if (TARGET_SH2
966 && offset >= -32764
967 && offset - get_attr_length (insn) <= 32766)
968 {
969 far = 0;
970 jump = "mov.w %O0,%1; braf %1";
971 }
972 else
973 {
974 far = 1;
975 if (flag_pic)
976 {
977 if (TARGET_SH2)
978 jump = "mov.l %O0,%1; braf %1";
979 else
980 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
981 }
982 else
983 jump = "mov.l %O0,%1; jmp @%1";
984 }
985 /* If we have a scratch register available, use it. */
986 if (GET_CODE (PREV_INSN (insn)) == INSN
987 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
988 {
989 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
990 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
991 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
992 output_asm_insn (jump, &this.lab);
993 if (dbr_sequence_length ())
994 print_slot (final_sequence);
995 else
996 output_asm_insn ("nop", 0);
997 }
998 else
999 {
1000 /* Output the delay slot insn first if any. */
1001 if (dbr_sequence_length ())
1002 print_slot (final_sequence);
1003
1004 this.reg = gen_rtx_REG (SImode, 13);
1005 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1006 Fortunately, MACL is fixed and call-clobbered, and we never
1007 need its value across jumps, so save r13 in it instead of in
1008 the stack. */
1009 if (TARGET_SH5)
1010 output_asm_insn ("lds r13, macl", 0);
1011 else
1012 output_asm_insn ("mov.l r13,@-r15", 0);
1013 output_asm_insn (jump, &this.lab);
1014 if (TARGET_SH5)
1015 output_asm_insn ("sts macl, r13", 0);
1016 else
1017 output_asm_insn ("mov.l @r15+,r13", 0);
1018 }
1019 if (far && flag_pic && TARGET_SH2)
1020 {
1021 braf_base_lab = gen_label_rtx ();
1022 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1023 CODE_LABEL_NUMBER (braf_base_lab));
1024 }
1025 if (far)
1026 output_asm_insn (".align 2", 0);
1027 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1028 this.op = op;
1029 if (far && flag_pic)
1030 {
1031 if (TARGET_SH2)
1032 this.lab = braf_base_lab;
1033 output_asm_insn (".long %O2-%O0", &this.lab);
1034 }
1035 else
1036 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1037 return "";
1038 }
1039
1040 /* Local label counter, used for constants in the pool and inside
1041 pattern branches. */
1042
1043 static int lf = 100;
1044
1045 /* Output code for ordinary branches. */
1046
1047 const char *
1048 output_branch (logic, insn, operands)
1049 int logic;
1050 rtx insn;
1051 rtx *operands;
1052 {
1053 switch (get_attr_length (insn))
1054 {
1055 case 6:
1056 /* This can happen if filling the delay slot has caused a forward
1057 branch to exceed its range (we could reverse it, but only
1058 when we know we won't overextend other branches; this should
1059 best be handled by relaxation).
1060 It can also happen when other condbranches hoist delay slot insn
1061 from their destination, thus leading to code size increase.
1062 But the branch will still be in the range -4092..+4098 bytes. */
1063
1064 if (! TARGET_RELAX)
1065 {
1066 int label = lf++;
1067 /* The call to print_slot will clobber the operands. */
1068 rtx op0 = operands[0];
1069
1070 /* If the instruction in the delay slot is annulled (true), then
1071 there is no delay slot where we can put it now. The only safe
1072 place for it is after the label. final will do that by default. */
1073
1074 if (final_sequence
1075 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1076 {
1077 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1078 ASSEMBLER_DIALECT ? "/" : ".", label);
1079 print_slot (final_sequence);
1080 }
1081 else
1082 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1083
1084 output_asm_insn ("bra\t%l0", &op0);
1085 fprintf (asm_out_file, "\tnop\n");
1086 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1087
1088 return "";
1089 }
1090 /* When relaxing, handle this like a short branch. The linker
1091 will fix it up if it still doesn't fit after relaxation. */
1092 case 2:
1093 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1094 default:
1095 /* There should be no longer branches now - that would
1096 indicate that something has destroyed the branches set
1097 up in machine_dependent_reorg. */
1098 abort ();
1099 }
1100 }
1101
1102 const char *
1103 output_branchy_insn (code, template, insn, operands)
1104 enum rtx_code code;
1105 const char *template;
1106 rtx insn;
1107 rtx *operands;
1108 {
1109 rtx next_insn = NEXT_INSN (insn);
1110
1111 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1112 {
1113 rtx src = SET_SRC (PATTERN (next_insn));
1114 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1115 {
1116 /* Following branch not taken */
1117 operands[9] = gen_label_rtx ();
1118 emit_label_after (operands[9], next_insn);
1119 INSN_ADDRESSES_NEW (operands[9],
1120 INSN_ADDRESSES (INSN_UID (next_insn))
1121 + get_attr_length (next_insn));
1122 return template;
1123 }
1124 else
1125 {
1126 int offset = (branch_dest (next_insn)
1127 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1128 if (offset >= -252 && offset <= 258)
1129 {
1130 if (GET_CODE (src) == IF_THEN_ELSE)
1131 /* branch_true */
1132 src = XEXP (src, 1);
1133 operands[9] = src;
1134 return template;
1135 }
1136 }
1137 }
1138 operands[9] = gen_label_rtx ();
1139 emit_label_after (operands[9], insn);
1140 INSN_ADDRESSES_NEW (operands[9],
1141 INSN_ADDRESSES (INSN_UID (insn))
1142 + get_attr_length (insn));
1143 return template;
1144 }
1145
1146 const char *
1147 output_ieee_ccmpeq (insn, operands)
1148 rtx insn, *operands;
1149 {
1150 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1151 }
1152 \f
1153 /* Output to FILE the start of the assembler file. */
1154
1155 void
1156 output_file_start (file)
1157 FILE *file;
1158 {
1159 output_file_directive (file, main_input_filename);
1160
1161 /* Switch to the data section so that the coffsem symbol
1162 isn't in the text section. */
1163 data_section ();
1164
1165 if (TARGET_LITTLE_ENDIAN)
1166 fprintf (file, "\t.little\n");
1167
1168 if (TARGET_SHCOMPACT)
1169 fprintf (file, "\t.mode\tSHcompact\n");
1170 else if (TARGET_SHMEDIA)
1171 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1172 TARGET_SHMEDIA64 ? 64 : 32);
1173 }
1174 \f
1175 /* Actual number of instructions used to make a shift by N. */
1176 static const char ashiftrt_insns[] =
1177 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1178
1179 /* Left shift and logical right shift are the same. */
1180 static const char shift_insns[] =
1181 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1182
1183 /* Individual shift amounts needed to get the above length sequences.
1184 One bit right shifts clobber the T bit, so when possible, put one bit
1185 shifts in the middle of the sequence, so the ends are eligible for
1186 branch delay slots. */
1187 static const short shift_amounts[32][5] = {
1188 {0}, {1}, {2}, {2, 1},
1189 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1190 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1191 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1192 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1193 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1194 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1195 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1196
1197 /* Likewise, but for shift amounts < 16, up to three highmost bits
1198 might be clobbered. This is typically used when combined with some
1199 kind of sign or zero extension. */
1200
1201 static const char ext_shift_insns[] =
1202 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1203
1204 static const short ext_shift_amounts[32][4] = {
1205 {0}, {1}, {2}, {2, 1},
1206 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1207 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1208 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1209 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1210 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1211 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1212 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1213
1214 /* Assuming we have a value that has been sign-extended by at least one bit,
1215 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1216 to shift it by N without data loss, and quicker than by other means? */
1217 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1218
1219 /* This is used in length attributes in sh.md to help compute the length
1220 of arbitrary constant shift instructions. */
1221
1222 int
1223 shift_insns_rtx (insn)
1224 rtx insn;
1225 {
1226 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1227 int shift_count = INTVAL (XEXP (set_src, 1));
1228 enum rtx_code shift_code = GET_CODE (set_src);
1229
1230 switch (shift_code)
1231 {
1232 case ASHIFTRT:
1233 return ashiftrt_insns[shift_count];
1234 case LSHIFTRT:
1235 case ASHIFT:
1236 return shift_insns[shift_count];
1237 default:
1238 abort();
1239 }
1240 }
1241
1242 /* Return the cost of a shift. */
1243
1244 int
1245 shiftcosts (x)
1246 rtx x;
1247 {
1248 int value;
1249
1250 if (TARGET_SHMEDIA)
1251 return 1;
1252
1253 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1254 {
1255 if (GET_MODE (x) == DImode
1256 && GET_CODE (XEXP (x, 1)) == CONST_INT
1257 && INTVAL (XEXP (x, 1)) == 1)
1258 return 2;
1259
1260 /* Everything else is invalid, because there is no pattern for it. */
1261 return 10000;
1262 }
1263 /* If shift by a non constant, then this will be expensive. */
1264 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1265 return SH_DYNAMIC_SHIFT_COST;
1266
1267 value = INTVAL (XEXP (x, 1));
1268
1269 /* Otherwise, return the true cost in instructions. */
1270 if (GET_CODE (x) == ASHIFTRT)
1271 {
1272 int cost = ashiftrt_insns[value];
1273 /* If SH3, then we put the constant in a reg and use shad. */
1274 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1275 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1276 return cost;
1277 }
1278 else
1279 return shift_insns[value];
1280 }
1281
1282 /* Return the cost of an AND operation. */
1283
1284 int
1285 andcosts (x)
1286 rtx x;
1287 {
1288 int i;
1289
1290 /* Anding with a register is a single cycle and instruction. */
1291 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1292 return 1;
1293
1294 i = INTVAL (XEXP (x, 1));
1295
1296 if (TARGET_SHMEDIA)
1297 {
1298 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1299 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1300 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1301 return 1;
1302 else
1303 return 2;
1304 }
1305
1306 /* These constants are single cycle extu.[bw] instructions. */
1307 if (i == 0xff || i == 0xffff)
1308 return 1;
1309 /* Constants that can be used in an and immediate instruction is a single
1310 cycle, but this requires r0, so make it a little more expensive. */
1311 if (CONST_OK_FOR_L (i))
1312 return 2;
1313 /* Constants that can be loaded with a mov immediate and an and.
1314 This case is probably unnecessary. */
1315 if (CONST_OK_FOR_I (i))
1316 return 2;
1317 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1318 This case is probably unnecessary. */
1319 return 3;
1320 }
1321
1322 /* Return the cost of an addition or a subtraction. */
1323
1324 int
1325 addsubcosts (x)
1326 rtx x;
1327 {
1328 /* Adding a register is a single cycle insn. */
1329 if (GET_CODE (XEXP (x, 1)) == REG
1330 || GET_CODE (XEXP (x, 1)) == SUBREG)
1331 return 1;
1332
1333 /* Likewise for small constants. */
1334 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1335 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1336 return 1;
1337
1338 if (TARGET_SHMEDIA)
1339 switch (GET_CODE (XEXP (x, 1)))
1340 {
1341 case CONST:
1342 case LABEL_REF:
1343 case SYMBOL_REF:
1344 return TARGET_SHMEDIA64 ? 5 : 3;
1345
1346 case CONST_INT:
1347 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1348 return 2;
1349 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1350 return 3;
1351 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1352 return 4;
1353
1354 /* Fall through. */
1355 default:
1356 return 5;
1357 }
1358
1359 /* Any other constant requires a 2 cycle pc-relative load plus an
1360 addition. */
1361 return 3;
1362 }
1363
1364 /* Return the cost of a multiply. */
1365 int
1366 multcosts (x)
1367 rtx x ATTRIBUTE_UNUSED;
1368 {
1369 if (TARGET_SHMEDIA)
1370 return 3;
1371
1372 if (TARGET_SH2)
1373 {
1374 /* We have a mul insn, so we can never take more than the mul and the
1375 read of the mac reg, but count more because of the latency and extra
1376 reg usage. */
1377 if (TARGET_SMALLCODE)
1378 return 2;
1379 return 3;
1380 }
1381
1382 /* If we're aiming at small code, then just count the number of
1383 insns in a multiply call sequence. */
1384 if (TARGET_SMALLCODE)
1385 return 5;
1386
1387 /* Otherwise count all the insns in the routine we'd be calling too. */
1388 return 20;
1389 }
1390
1391 /* Code to expand a shift. */
1392
1393 void
1394 gen_ashift (type, n, reg)
1395 int type;
1396 int n;
1397 rtx reg;
1398 {
1399 /* Negative values here come from the shift_amounts array. */
1400 if (n < 0)
1401 {
1402 if (type == ASHIFT)
1403 type = LSHIFTRT;
1404 else
1405 type = ASHIFT;
1406 n = -n;
1407 }
1408
1409 switch (type)
1410 {
1411 case ASHIFTRT:
1412 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1413 break;
1414 case LSHIFTRT:
1415 if (n == 1)
1416 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1417 else
1418 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1419 break;
1420 case ASHIFT:
1421 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1422 break;
1423 }
1424 }
1425
1426 /* Same for HImode */
1427
1428 void
1429 gen_ashift_hi (type, n, reg)
1430 int type;
1431 int n;
1432 rtx reg;
1433 {
1434 /* Negative values here come from the shift_amounts array. */
1435 if (n < 0)
1436 {
1437 if (type == ASHIFT)
1438 type = LSHIFTRT;
1439 else
1440 type = ASHIFT;
1441 n = -n;
1442 }
1443
1444 switch (type)
1445 {
1446 case ASHIFTRT:
1447 case LSHIFTRT:
1448 /* We don't have HImode right shift operations because using the
1449 ordinary 32 bit shift instructions for that doesn't generate proper
1450 zero/sign extension.
1451 gen_ashift_hi is only called in contexts where we know that the
1452 sign extension works out correctly. */
1453 {
1454 int offset = 0;
1455 if (GET_CODE (reg) == SUBREG)
1456 {
1457 offset = SUBREG_BYTE (reg);
1458 reg = SUBREG_REG (reg);
1459 }
1460 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1461 break;
1462 }
1463 case ASHIFT:
1464 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1465 break;
1466 }
1467 }
1468
1469 /* Output RTL to split a constant shift into its component SH constant
1470 shift instructions. */
1471
1472 void
1473 gen_shifty_op (code, operands)
1474 int code;
1475 rtx *operands;
1476 {
1477 int value = INTVAL (operands[2]);
1478 int max, i;
1479
1480 /* Truncate the shift count in case it is out of bounds. */
1481 value = value & 0x1f;
1482
1483 if (value == 31)
1484 {
1485 if (code == LSHIFTRT)
1486 {
1487 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1488 emit_insn (gen_movt (operands[0]));
1489 return;
1490 }
1491 else if (code == ASHIFT)
1492 {
1493 /* There is a two instruction sequence for 31 bit left shifts,
1494 but it requires r0. */
1495 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1496 {
1497 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1498 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1499 return;
1500 }
1501 }
1502 }
1503 else if (value == 0)
1504 {
1505 /* This can happen when not optimizing. We must output something here
1506 to prevent the compiler from aborting in final.c after the try_split
1507 call. */
1508 emit_insn (gen_nop ());
1509 return;
1510 }
1511
1512 max = shift_insns[value];
1513 for (i = 0; i < max; i++)
1514 gen_ashift (code, shift_amounts[value][i], operands[0]);
1515 }
1516
1517 /* Same as above, but optimized for values where the topmost bits don't
1518 matter. */
1519
1520 void
1521 gen_shifty_hi_op (code, operands)
1522 int code;
1523 rtx *operands;
1524 {
1525 int value = INTVAL (operands[2]);
1526 int max, i;
1527 void (*gen_fun) PARAMS ((int, int, rtx));
1528
1529 /* This operation is used by and_shl for SImode values with a few
1530 high bits known to be cleared. */
1531 value &= 31;
1532 if (value == 0)
1533 {
1534 emit_insn (gen_nop ());
1535 return;
1536 }
1537
1538 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1539 if (code == ASHIFT)
1540 {
1541 max = ext_shift_insns[value];
1542 for (i = 0; i < max; i++)
1543 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1544 }
1545 else
1546 /* When shifting right, emit the shifts in reverse order, so that
1547 solitary negative values come first. */
1548 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1549 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1550 }
1551
1552 /* Output RTL for an arithmetic right shift. */
1553
1554 /* ??? Rewrite to use super-optimizer sequences. */
1555
1556 int
1557 expand_ashiftrt (operands)
1558 rtx *operands;
1559 {
1560 rtx sym;
1561 rtx wrk;
1562 char func[18];
1563 tree func_name;
1564 int value;
1565
1566 if (TARGET_SH3)
1567 {
1568 if (GET_CODE (operands[2]) != CONST_INT)
1569 {
1570 rtx count = copy_to_mode_reg (SImode, operands[2]);
1571 emit_insn (gen_negsi2 (count, count));
1572 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1573 return 1;
1574 }
1575 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1576 > 1 + SH_DYNAMIC_SHIFT_COST)
1577 {
1578 rtx count
1579 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1580 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1581 return 1;
1582 }
1583 }
1584 if (GET_CODE (operands[2]) != CONST_INT)
1585 return 0;
1586
1587 value = INTVAL (operands[2]) & 31;
1588
1589 if (value == 31)
1590 {
1591 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1592 return 1;
1593 }
1594 else if (value >= 16 && value <= 19)
1595 {
1596 wrk = gen_reg_rtx (SImode);
1597 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1598 value -= 16;
1599 while (value--)
1600 gen_ashift (ASHIFTRT, 1, wrk);
1601 emit_move_insn (operands[0], wrk);
1602 return 1;
1603 }
1604 /* Expand a short sequence inline, longer call a magic routine. */
1605 else if (value <= 5)
1606 {
1607 wrk = gen_reg_rtx (SImode);
1608 emit_move_insn (wrk, operands[1]);
1609 while (value--)
1610 gen_ashift (ASHIFTRT, 1, wrk);
1611 emit_move_insn (operands[0], wrk);
1612 return 1;
1613 }
1614
1615 wrk = gen_reg_rtx (Pmode);
1616
1617 /* Load the value into an arg reg and call a helper. */
1618 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1619 sprintf (func, "__ashiftrt_r4_%d", value);
1620 func_name = get_identifier (func);
1621 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1622 emit_move_insn (wrk, sym);
1623 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1624 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1625 return 1;
1626 }
1627
1628 int
1629 sh_dynamicalize_shift_p (count)
1630 rtx count;
1631 {
1632 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1633 }
1634
1635 /* Try to find a good way to implement the combiner pattern
1636 [(set (match_operand:SI 0 "register_operand" "r")
1637 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1638 (match_operand:SI 2 "const_int_operand" "n"))
1639 (match_operand:SI 3 "const_int_operand" "n"))) .
1640 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1641 return 0 for simple right / left or left/right shift combination.
1642 return 1 for a combination of shifts with zero_extend.
1643 return 2 for a combination of shifts with an AND that needs r0.
1644 return 3 for a combination of shifts with an AND that needs an extra
1645 scratch register, when the three highmost bits of the AND mask are clear.
1646 return 4 for a combination of shifts with an AND that needs an extra
1647 scratch register, when any of the three highmost bits of the AND mask
1648 is set.
1649 If ATTRP is set, store an initial right shift width in ATTRP[0],
1650 and the instruction length in ATTRP[1] . These values are not valid
1651 when returning 0.
1652 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1653 shift_amounts for the last shift value that is to be used before the
1654 sign extend. */
1655 int
1656 shl_and_kind (left_rtx, mask_rtx, attrp)
1657 rtx left_rtx, mask_rtx;
1658 int *attrp;
1659 {
1660 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1661 int left = INTVAL (left_rtx), right;
1662 int best = 0;
1663 int cost, best_cost = 10000;
1664 int best_right = 0, best_len = 0;
1665 int i;
1666 int can_ext;
1667
1668 if (left < 0 || left > 31)
1669 return 0;
1670 if (GET_CODE (mask_rtx) == CONST_INT)
1671 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1672 else
1673 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1674 /* Can this be expressed as a right shift / left shift pair ? */
1675 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1676 right = exact_log2 (lsb);
1677 mask2 = ~(mask + lsb - 1);
1678 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1679 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1680 if (! mask2)
1681 best_cost = shift_insns[right] + shift_insns[right + left];
1682 /* mask has no trailing zeroes <==> ! right */
1683 else if (! right && mask2 == ~(lsb2 - 1))
1684 {
1685 int late_right = exact_log2 (lsb2);
1686 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1687 }
1688 /* Try to use zero extend */
1689 if (mask2 == ~(lsb2 - 1))
1690 {
1691 int width, first;
1692
1693 for (width = 8; width <= 16; width += 8)
1694 {
1695 /* Can we zero-extend right away? */
1696 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1697 {
1698 cost
1699 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1700 if (cost < best_cost)
1701 {
1702 best = 1;
1703 best_cost = cost;
1704 best_right = right;
1705 best_len = cost;
1706 if (attrp)
1707 attrp[2] = -1;
1708 }
1709 continue;
1710 }
1711 /* ??? Could try to put zero extend into initial right shift,
1712 or even shift a bit left before the right shift. */
1713 /* Determine value of first part of left shift, to get to the
1714 zero extend cut-off point. */
1715 first = width - exact_log2 (lsb2) + right;
1716 if (first >= 0 && right + left - first >= 0)
1717 {
1718 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1719 + ext_shift_insns[right + left - first];
1720 if (cost < best_cost)
1721 {
1722 best = 1;
1723 best_cost = cost;
1724 best_right = right;
1725 best_len = cost;
1726 if (attrp)
1727 attrp[2] = first;
1728 }
1729 }
1730 }
1731 }
1732 /* Try to use r0 AND pattern */
1733 for (i = 0; i <= 2; i++)
1734 {
1735 if (i > right)
1736 break;
1737 if (! CONST_OK_FOR_L (mask >> i))
1738 continue;
1739 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1740 if (cost < best_cost)
1741 {
1742 best = 2;
1743 best_cost = cost;
1744 best_right = i;
1745 best_len = cost - 1;
1746 }
1747 }
1748 /* Try to use a scratch register to hold the AND operand. */
1749 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1750 for (i = 0; i <= 2; i++)
1751 {
1752 if (i > right)
1753 break;
1754 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1755 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1756 if (cost < best_cost)
1757 {
1758 best = 4 - can_ext;
1759 best_cost = cost;
1760 best_right = i;
1761 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1762 }
1763 }
1764
1765 if (attrp)
1766 {
1767 attrp[0] = best_right;
1768 attrp[1] = best_len;
1769 }
1770 return best;
1771 }
1772
1773 /* This is used in length attributes of the unnamed instructions
1774 corresponding to shl_and_kind return values of 1 and 2. */
1775 int
1776 shl_and_length (insn)
1777 rtx insn;
1778 {
1779 rtx set_src, left_rtx, mask_rtx;
1780 int attributes[3];
1781
1782 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1783 left_rtx = XEXP (XEXP (set_src, 0), 1);
1784 mask_rtx = XEXP (set_src, 1);
1785 shl_and_kind (left_rtx, mask_rtx, attributes);
1786 return attributes[1];
1787 }
1788
1789 /* This is used in length attribute of the and_shl_scratch instruction. */
1790
1791 int
1792 shl_and_scr_length (insn)
1793 rtx insn;
1794 {
1795 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1796 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1797 rtx op = XEXP (set_src, 0);
1798 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1799 op = XEXP (XEXP (op, 0), 0);
1800 return len + shift_insns[INTVAL (XEXP (op, 1))];
1801 }
1802
1803 /* Generating rtl? */
1804 extern int rtx_equal_function_value_matters;
1805
1806 /* Generate rtl for instructions for which shl_and_kind advised a particular
1807 method of generating them, i.e. returned zero. */
1808
1809 int
1810 gen_shl_and (dest, left_rtx, mask_rtx, source)
1811 rtx dest, left_rtx, mask_rtx, source;
1812 {
1813 int attributes[3];
1814 unsigned HOST_WIDE_INT mask;
1815 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1816 int right, total_shift;
1817 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1818
1819 right = attributes[0];
1820 total_shift = INTVAL (left_rtx) + right;
1821 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1822 switch (kind)
1823 {
1824 default:
1825 return -1;
1826 case 1:
1827 {
1828 int first = attributes[2];
1829 rtx operands[3];
1830
1831 if (first < 0)
1832 {
1833 emit_insn ((mask << right) <= 0xff
1834 ? gen_zero_extendqisi2(dest,
1835 gen_lowpart (QImode, source))
1836 : gen_zero_extendhisi2(dest,
1837 gen_lowpart (HImode, source)));
1838 source = dest;
1839 }
1840 if (source != dest)
1841 emit_insn (gen_movsi (dest, source));
1842 operands[0] = dest;
1843 if (right)
1844 {
1845 operands[2] = GEN_INT (right);
1846 gen_shifty_hi_op (LSHIFTRT, operands);
1847 }
1848 if (first > 0)
1849 {
1850 operands[2] = GEN_INT (first);
1851 gen_shifty_hi_op (ASHIFT, operands);
1852 total_shift -= first;
1853 mask <<= first;
1854 }
1855 if (first >= 0)
1856 emit_insn (mask <= 0xff
1857 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1858 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1859 if (total_shift > 0)
1860 {
1861 operands[2] = GEN_INT (total_shift);
1862 gen_shifty_hi_op (ASHIFT, operands);
1863 }
1864 break;
1865 }
1866 case 4:
1867 shift_gen_fun = gen_shifty_op;
1868 case 3:
1869 /* If the topmost bit that matters is set, set the topmost bits
1870 that don't matter. This way, we might be able to get a shorter
1871 signed constant. */
1872 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1873 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1874 case 2:
1875 /* Don't expand fine-grained when combining, because that will
1876 make the pattern fail. */
1877 if (rtx_equal_function_value_matters
1878 || reload_in_progress || reload_completed)
1879 {
1880 rtx operands[3];
1881
1882 /* Cases 3 and 4 should be handled by this split
1883 only while combining */
1884 if (kind > 2)
1885 abort ();
1886 if (right)
1887 {
1888 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1889 source = dest;
1890 }
1891 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1892 if (total_shift)
1893 {
1894 operands[0] = dest;
1895 operands[1] = dest;
1896 operands[2] = GEN_INT (total_shift);
1897 shift_gen_fun (ASHIFT, operands);
1898 }
1899 break;
1900 }
1901 else
1902 {
1903 int neg = 0;
1904 if (kind != 4 && total_shift < 16)
1905 {
1906 neg = -ext_shift_amounts[total_shift][1];
1907 if (neg > 0)
1908 neg -= ext_shift_amounts[total_shift][2];
1909 else
1910 neg = 0;
1911 }
1912 emit_insn (gen_and_shl_scratch (dest, source,
1913 GEN_INT (right),
1914 GEN_INT (mask),
1915 GEN_INT (total_shift + neg),
1916 GEN_INT (neg)));
1917 emit_insn (gen_movsi (dest, dest));
1918 break;
1919 }
1920 }
1921 return 0;
1922 }
1923
1924 /* Try to find a good way to implement the combiner pattern
1925 [(set (match_operand:SI 0 "register_operand" "=r")
1926 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1927 (match_operand:SI 2 "const_int_operand" "n")
1928 (match_operand:SI 3 "const_int_operand" "n")
1929 (const_int 0)))
1930 (clobber (reg:SI T_REG))]
1931 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1932 return 0 for simple left / right shift combination.
1933 return 1 for left shift / 8 bit sign extend / left shift.
1934 return 2 for left shift / 16 bit sign extend / left shift.
1935 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1936 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1937 return 5 for left shift / 16 bit sign extend / right shift
1938 return 6 for < 8 bit sign extend / left shift.
1939 return 7 for < 8 bit sign extend / left shift / single right shift.
1940 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1941
1942 int
1943 shl_sext_kind (left_rtx, size_rtx, costp)
1944 rtx left_rtx, size_rtx;
1945 int *costp;
1946 {
1947 int left, size, insize, ext;
1948 int cost, best_cost;
1949 int kind;
1950
1951 left = INTVAL (left_rtx);
1952 size = INTVAL (size_rtx);
1953 insize = size - left;
1954 if (insize <= 0)
1955 abort ();
1956 /* Default to left / right shift. */
1957 kind = 0;
1958 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1959 if (size <= 16)
1960 {
1961 /* 16 bit shift / sign extend / 16 bit shift */
1962 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1963 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1964 below, by alternative 3 or something even better. */
1965 if (cost < best_cost)
1966 {
1967 kind = 5;
1968 best_cost = cost;
1969 }
1970 }
1971 /* Try a plain sign extend between two shifts. */
1972 for (ext = 16; ext >= insize; ext -= 8)
1973 {
1974 if (ext <= size)
1975 {
1976 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1977 if (cost < best_cost)
1978 {
1979 kind = ext / (unsigned) 8;
1980 best_cost = cost;
1981 }
1982 }
1983 /* Check if we can do a sloppy shift with a final signed shift
1984 restoring the sign. */
1985 if (EXT_SHIFT_SIGNED (size - ext))
1986 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1987 /* If not, maybe it's still cheaper to do the second shift sloppy,
1988 and do a final sign extend? */
1989 else if (size <= 16)
1990 cost = ext_shift_insns[ext - insize] + 1
1991 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1992 else
1993 continue;
1994 if (cost < best_cost)
1995 {
1996 kind = ext / (unsigned) 8 + 2;
1997 best_cost = cost;
1998 }
1999 }
2000 /* Check if we can sign extend in r0 */
2001 if (insize < 8)
2002 {
2003 cost = 3 + shift_insns[left];
2004 if (cost < best_cost)
2005 {
2006 kind = 6;
2007 best_cost = cost;
2008 }
2009 /* Try the same with a final signed shift. */
2010 if (left < 31)
2011 {
2012 cost = 3 + ext_shift_insns[left + 1] + 1;
2013 if (cost < best_cost)
2014 {
2015 kind = 7;
2016 best_cost = cost;
2017 }
2018 }
2019 }
2020 if (TARGET_SH3)
2021 {
2022 /* Try to use a dynamic shift. */
2023 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2024 if (cost < best_cost)
2025 {
2026 kind = 0;
2027 best_cost = cost;
2028 }
2029 }
2030 if (costp)
2031 *costp = cost;
2032 return kind;
2033 }
2034
2035 /* Function to be used in the length attribute of the instructions
2036 implementing this pattern. */
2037
2038 int
2039 shl_sext_length (insn)
2040 rtx insn;
2041 {
2042 rtx set_src, left_rtx, size_rtx;
2043 int cost;
2044
2045 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2046 left_rtx = XEXP (XEXP (set_src, 0), 1);
2047 size_rtx = XEXP (set_src, 1);
2048 shl_sext_kind (left_rtx, size_rtx, &cost);
2049 return cost;
2050 }
2051
2052 /* Generate rtl for this pattern */
2053
2054 int
2055 gen_shl_sext (dest, left_rtx, size_rtx, source)
2056 rtx dest, left_rtx, size_rtx, source;
2057 {
2058 int kind;
2059 int left, size, insize, cost;
2060 rtx operands[3];
2061
2062 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2063 left = INTVAL (left_rtx);
2064 size = INTVAL (size_rtx);
2065 insize = size - left;
2066 switch (kind)
2067 {
2068 case 1:
2069 case 2:
2070 case 3:
2071 case 4:
2072 {
2073 int ext = kind & 1 ? 8 : 16;
2074 int shift2 = size - ext;
2075
2076 /* Don't expand fine-grained when combining, because that will
2077 make the pattern fail. */
2078 if (! rtx_equal_function_value_matters
2079 && ! reload_in_progress && ! reload_completed)
2080 {
2081 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2082 emit_insn (gen_movsi (dest, source));
2083 break;
2084 }
2085 if (dest != source)
2086 emit_insn (gen_movsi (dest, source));
2087 operands[0] = dest;
2088 if (ext - insize)
2089 {
2090 operands[2] = GEN_INT (ext - insize);
2091 gen_shifty_hi_op (ASHIFT, operands);
2092 }
2093 emit_insn (kind & 1
2094 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2095 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2096 if (kind <= 2)
2097 {
2098 if (shift2)
2099 {
2100 operands[2] = GEN_INT (shift2);
2101 gen_shifty_op (ASHIFT, operands);
2102 }
2103 }
2104 else
2105 {
2106 if (shift2 > 0)
2107 {
2108 if (EXT_SHIFT_SIGNED (shift2))
2109 {
2110 operands[2] = GEN_INT (shift2 + 1);
2111 gen_shifty_op (ASHIFT, operands);
2112 operands[2] = GEN_INT (1);
2113 gen_shifty_op (ASHIFTRT, operands);
2114 break;
2115 }
2116 operands[2] = GEN_INT (shift2);
2117 gen_shifty_hi_op (ASHIFT, operands);
2118 }
2119 else if (shift2)
2120 {
2121 operands[2] = GEN_INT (-shift2);
2122 gen_shifty_hi_op (LSHIFTRT, operands);
2123 }
2124 emit_insn (size <= 8
2125 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2126 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2127 }
2128 break;
2129 }
2130 case 5:
2131 {
2132 int i = 16 - size;
2133 if (! rtx_equal_function_value_matters
2134 && ! reload_in_progress && ! reload_completed)
2135 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2136 else
2137 {
2138 operands[0] = dest;
2139 operands[2] = GEN_INT (16 - insize);
2140 gen_shifty_hi_op (ASHIFT, operands);
2141 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2142 }
2143 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2144 while (--i >= 0)
2145 gen_ashift (ASHIFTRT, 1, dest);
2146 break;
2147 }
2148 case 6:
2149 case 7:
2150 /* Don't expand fine-grained when combining, because that will
2151 make the pattern fail. */
2152 if (! rtx_equal_function_value_matters
2153 && ! reload_in_progress && ! reload_completed)
2154 {
2155 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2156 emit_insn (gen_movsi (dest, source));
2157 break;
2158 }
2159 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2160 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2161 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2162 operands[0] = dest;
2163 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2164 gen_shifty_op (ASHIFT, operands);
2165 if (kind == 7)
2166 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2167 break;
2168 default:
2169 return -1;
2170 }
2171 return 0;
2172 }
2173
2174 /* Prefix a symbol_ref name with "datalabel". */
2175
2176 rtx
2177 gen_datalabel_ref (sym)
2178 rtx sym;
2179 {
2180 if (GET_CODE (sym) == LABEL_REF)
2181 return gen_rtx_CONST (GET_MODE (sym),
2182 gen_rtx_UNSPEC (GET_MODE (sym),
2183 gen_rtvec (1, sym),
2184 UNSPEC_DATALABEL));
2185
2186 if (GET_CODE (sym) != SYMBOL_REF)
2187 abort ();
2188
2189 XSTR (sym, 0) = concat (SH_DATALABEL_ENCODING, XSTR (sym, 0), NULL);
2190
2191 return sym;
2192 }
2193
2194 \f
2195 /* The SH cannot load a large constant into a register, constants have to
2196 come from a pc relative load. The reference of a pc relative load
2197 instruction must be less than 1k infront of the instruction. This
2198 means that we often have to dump a constant inside a function, and
2199 generate code to branch around it.
2200
2201 It is important to minimize this, since the branches will slow things
2202 down and make things bigger.
2203
2204 Worst case code looks like:
2205
2206 mov.l L1,rn
2207 bra L2
2208 nop
2209 align
2210 L1: .long value
2211 L2:
2212 ..
2213
2214 mov.l L3,rn
2215 bra L4
2216 nop
2217 align
2218 L3: .long value
2219 L4:
2220 ..
2221
2222 We fix this by performing a scan before scheduling, which notices which
2223 instructions need to have their operands fetched from the constant table
2224 and builds the table.
2225
2226 The algorithm is:
2227
2228 scan, find an instruction which needs a pcrel move. Look forward, find the
2229 last barrier which is within MAX_COUNT bytes of the requirement.
2230 If there isn't one, make one. Process all the instructions between
2231 the find and the barrier.
2232
2233 In the above example, we can tell that L3 is within 1k of L1, so
2234 the first move can be shrunk from the 3 insn+constant sequence into
2235 just 1 insn, and the constant moved to L3 to make:
2236
2237 mov.l L1,rn
2238 ..
2239 mov.l L3,rn
2240 bra L4
2241 nop
2242 align
2243 L3:.long value
2244 L4:.long value
2245
2246 Then the second move becomes the target for the shortening process. */
2247
2248 typedef struct
2249 {
2250 rtx value; /* Value in table. */
2251 rtx label; /* Label of value. */
2252 rtx wend; /* End of window. */
2253 enum machine_mode mode; /* Mode of value. */
2254
2255 /* True if this constant is accessed as part of a post-increment
2256 sequence. Note that HImode constants are never accessed in this way. */
2257 bool part_of_sequence_p;
2258 } pool_node;
2259
2260 /* The maximum number of constants that can fit into one pool, since
2261 the pc relative range is 0...1020 bytes and constants are at least 4
2262 bytes long. */
2263
2264 #define MAX_POOL_SIZE (1020/4)
2265 static pool_node pool_vector[MAX_POOL_SIZE];
2266 static int pool_size;
2267 static rtx pool_window_label;
2268 static int pool_window_last;
2269
2270 /* ??? If we need a constant in HImode which is the truncated value of a
2271 constant we need in SImode, we could combine the two entries thus saving
2272 two bytes. Is this common enough to be worth the effort of implementing
2273 it? */
2274
2275 /* ??? This stuff should be done at the same time that we shorten branches.
2276 As it is now, we must assume that all branches are the maximum size, and
2277 this causes us to almost always output constant pools sooner than
2278 necessary. */
2279
2280 /* Add a constant to the pool and return its label. */
2281
2282 static rtx
2283 add_constant (x, mode, last_value)
2284 rtx x;
2285 enum machine_mode mode;
2286 rtx last_value;
2287 {
2288 int i;
2289 rtx lab, new, ref, newref;
2290
2291 /* First see if we've already got it. */
2292 for (i = 0; i < pool_size; i++)
2293 {
2294 if (x->code == pool_vector[i].value->code
2295 && mode == pool_vector[i].mode)
2296 {
2297 if (x->code == CODE_LABEL)
2298 {
2299 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2300 continue;
2301 }
2302 if (rtx_equal_p (x, pool_vector[i].value))
2303 {
2304 lab = new = 0;
2305 if (! last_value
2306 || ! i
2307 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2308 {
2309 new = gen_label_rtx ();
2310 LABEL_REFS (new) = pool_vector[i].label;
2311 pool_vector[i].label = lab = new;
2312 }
2313 if (lab && pool_window_label)
2314 {
2315 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2316 ref = pool_vector[pool_window_last].wend;
2317 LABEL_NEXTREF (newref) = ref;
2318 pool_vector[pool_window_last].wend = newref;
2319 }
2320 if (new)
2321 pool_window_label = new;
2322 pool_window_last = i;
2323 return lab;
2324 }
2325 }
2326 }
2327
2328 /* Need a new one. */
2329 pool_vector[pool_size].value = x;
2330 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2331 {
2332 lab = 0;
2333 pool_vector[pool_size - 1].part_of_sequence_p = true;
2334 }
2335 else
2336 lab = gen_label_rtx ();
2337 pool_vector[pool_size].mode = mode;
2338 pool_vector[pool_size].label = lab;
2339 pool_vector[pool_size].wend = NULL_RTX;
2340 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2341 if (lab && pool_window_label)
2342 {
2343 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2344 ref = pool_vector[pool_window_last].wend;
2345 LABEL_NEXTREF (newref) = ref;
2346 pool_vector[pool_window_last].wend = newref;
2347 }
2348 if (lab)
2349 pool_window_label = lab;
2350 pool_window_last = pool_size;
2351 pool_size++;
2352 return lab;
2353 }
2354
2355 /* Output the literal table. */
2356
2357 static void
2358 dump_table (scan)
2359 rtx scan;
2360 {
2361 int i;
2362 int need_align = 1;
2363 rtx lab, ref;
2364 int have_df = 0;
2365
2366 /* Do two passes, first time dump out the HI sized constants. */
2367
2368 for (i = 0; i < pool_size; i++)
2369 {
2370 pool_node *p = &pool_vector[i];
2371
2372 if (p->mode == HImode)
2373 {
2374 if (need_align)
2375 {
2376 scan = emit_insn_after (gen_align_2 (), scan);
2377 need_align = 0;
2378 }
2379 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2380 scan = emit_label_after (lab, scan);
2381 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2382 scan);
2383 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2384 {
2385 lab = XEXP (ref, 0);
2386 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2387 }
2388 }
2389 else if (p->mode == DFmode)
2390 have_df = 1;
2391 }
2392
2393 need_align = 1;
2394
2395 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2396 {
2397 rtx align_insn = NULL_RTX;
2398
2399 scan = emit_label_after (gen_label_rtx (), scan);
2400 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2401 need_align = 0;
2402
2403 for (i = 0; i < pool_size; i++)
2404 {
2405 pool_node *p = &pool_vector[i];
2406
2407 switch (p->mode)
2408 {
2409 case HImode:
2410 break;
2411 case SImode:
2412 case SFmode:
2413 if (align_insn && !p->part_of_sequence_p)
2414 {
2415 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2416 emit_label_before (lab, align_insn);
2417 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2418 align_insn);
2419 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2420 {
2421 lab = XEXP (ref, 0);
2422 emit_insn_before (gen_consttable_window_end (lab),
2423 align_insn);
2424 }
2425 delete_insn (align_insn);
2426 align_insn = NULL_RTX;
2427 continue;
2428 }
2429 else
2430 {
2431 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2432 scan = emit_label_after (lab, scan);
2433 scan = emit_insn_after (gen_consttable_4 (p->value,
2434 const0_rtx), scan);
2435 need_align = ! need_align;
2436 }
2437 break;
2438 case DFmode:
2439 if (need_align)
2440 {
2441 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2442 align_insn = scan;
2443 need_align = 0;
2444 }
2445 case DImode:
2446 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2447 scan = emit_label_after (lab, scan);
2448 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2449 scan);
2450 break;
2451 default:
2452 abort ();
2453 break;
2454 }
2455
2456 if (p->mode != HImode)
2457 {
2458 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2459 {
2460 lab = XEXP (ref, 0);
2461 scan = emit_insn_after (gen_consttable_window_end (lab),
2462 scan);
2463 }
2464 }
2465 }
2466
2467 pool_size = 0;
2468 }
2469
2470 for (i = 0; i < pool_size; i++)
2471 {
2472 pool_node *p = &pool_vector[i];
2473
2474 switch (p->mode)
2475 {
2476 case HImode:
2477 break;
2478 case SImode:
2479 case SFmode:
2480 if (need_align)
2481 {
2482 need_align = 0;
2483 scan = emit_label_after (gen_label_rtx (), scan);
2484 scan = emit_insn_after (gen_align_4 (), scan);
2485 }
2486 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2487 scan = emit_label_after (lab, scan);
2488 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2489 scan);
2490 break;
2491 case DFmode:
2492 case DImode:
2493 if (need_align)
2494 {
2495 need_align = 0;
2496 scan = emit_label_after (gen_label_rtx (), scan);
2497 scan = emit_insn_after (gen_align_4 (), scan);
2498 }
2499 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2500 scan = emit_label_after (lab, scan);
2501 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2502 scan);
2503 break;
2504 default:
2505 abort ();
2506 break;
2507 }
2508
2509 if (p->mode != HImode)
2510 {
2511 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2512 {
2513 lab = XEXP (ref, 0);
2514 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2515 }
2516 }
2517 }
2518
2519 scan = emit_insn_after (gen_consttable_end (), scan);
2520 scan = emit_barrier_after (scan);
2521 pool_size = 0;
2522 pool_window_label = NULL_RTX;
2523 pool_window_last = 0;
2524 }
2525
2526 /* Return nonzero if constant would be an ok source for a
2527 mov.w instead of a mov.l. */
2528
2529 static int
2530 hi_const (src)
2531 rtx src;
2532 {
2533 return (GET_CODE (src) == CONST_INT
2534 && INTVAL (src) >= -32768
2535 && INTVAL (src) <= 32767);
2536 }
2537
2538 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2539
2540 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2541 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2542 need to fix it if the input value is CONST_OK_FOR_I. */
2543
2544 static int
2545 broken_move (insn)
2546 rtx insn;
2547 {
2548 if (GET_CODE (insn) == INSN)
2549 {
2550 rtx pat = PATTERN (insn);
2551 if (GET_CODE (pat) == PARALLEL)
2552 pat = XVECEXP (pat, 0, 0);
2553 if (GET_CODE (pat) == SET
2554 /* We can load any 8 bit value if we don't care what the high
2555 order bits end up as. */
2556 && GET_MODE (SET_DEST (pat)) != QImode
2557 && (CONSTANT_P (SET_SRC (pat))
2558 /* Match mova_const. */
2559 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2560 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2561 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2562 && ! (TARGET_SH3E
2563 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2564 && (fp_zero_operand (SET_SRC (pat))
2565 || fp_one_operand (SET_SRC (pat)))
2566 /* ??? If this is a -m4 or -m4-single compilation, in general
2567 we don't know the current setting of fpscr, so disable fldi.
2568 There is an exception if this was a register-register move
2569 before reload - and hence it was ascertained that we have
2570 single precision setting - and in a post-reload optimization
2571 we changed this to do a constant load. In that case
2572 we don't have an r0 clobber, hence we must use fldi. */
2573 && (! TARGET_SH4 || TARGET_FMOVD
2574 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2575 == SCRATCH))
2576 && GET_CODE (SET_DEST (pat)) == REG
2577 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2578 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2579 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2580 return 1;
2581 }
2582
2583 return 0;
2584 }
2585
2586 static int
2587 mova_p (insn)
2588 rtx insn;
2589 {
2590 return (GET_CODE (insn) == INSN
2591 && GET_CODE (PATTERN (insn)) == SET
2592 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2593 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2594 /* Don't match mova_const. */
2595 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2596 }
2597
2598 /* Find the last barrier from insn FROM which is close enough to hold the
2599 constant pool. If we can't find one, then create one near the end of
2600 the range. */
2601
2602 static rtx
2603 find_barrier (num_mova, mova, from)
2604 int num_mova;
2605 rtx mova, from;
2606 {
2607 int count_si = 0;
2608 int count_hi = 0;
2609 int found_hi = 0;
2610 int found_si = 0;
2611 int found_di = 0;
2612 int hi_align = 2;
2613 int si_align = 2;
2614 int leading_mova = num_mova;
2615 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2616 int si_limit;
2617 int hi_limit;
2618
2619 /* For HImode: range is 510, add 4 because pc counts from address of
2620 second instruction after this one, subtract 2 for the jump instruction
2621 that we may need to emit before the table, subtract 2 for the instruction
2622 that fills the jump delay slot (in very rare cases, reorg will take an
2623 instruction from after the constant pool or will leave the delay slot
2624 empty). This gives 510.
2625 For SImode: range is 1020, add 4 because pc counts from address of
2626 second instruction after this one, subtract 2 in case pc is 2 byte
2627 aligned, subtract 2 for the jump instruction that we may need to emit
2628 before the table, subtract 2 for the instruction that fills the jump
2629 delay slot. This gives 1018. */
2630
2631 /* The branch will always be shortened now that the reference address for
2632 forward branches is the successor address, thus we need no longer make
2633 adjustments to the [sh]i_limit for -O0. */
2634
2635 si_limit = 1018;
2636 hi_limit = 510;
2637
2638 while (from && count_si < si_limit && count_hi < hi_limit)
2639 {
2640 int inc = get_attr_length (from);
2641 int new_align = 1;
2642
2643 if (GET_CODE (from) == CODE_LABEL)
2644 {
2645 if (optimize)
2646 new_align = 1 << label_to_alignment (from);
2647 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2648 new_align = 1 << barrier_align (from);
2649 else
2650 new_align = 1;
2651 inc = 0;
2652 }
2653
2654 if (GET_CODE (from) == BARRIER)
2655 {
2656
2657 found_barrier = from;
2658
2659 /* If we are at the end of the function, or in front of an alignment
2660 instruction, we need not insert an extra alignment. We prefer
2661 this kind of barrier. */
2662 if (barrier_align (from) > 2)
2663 good_barrier = from;
2664 }
2665
2666 if (broken_move (from))
2667 {
2668 rtx pat, src, dst;
2669 enum machine_mode mode;
2670
2671 pat = PATTERN (from);
2672 if (GET_CODE (pat) == PARALLEL)
2673 pat = XVECEXP (pat, 0, 0);
2674 src = SET_SRC (pat);
2675 dst = SET_DEST (pat);
2676 mode = GET_MODE (dst);
2677
2678 /* We must explicitly check the mode, because sometimes the
2679 front end will generate code to load unsigned constants into
2680 HImode targets without properly sign extending them. */
2681 if (mode == HImode
2682 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2683 {
2684 found_hi += 2;
2685 /* We put the short constants before the long constants, so
2686 we must count the length of short constants in the range
2687 for the long constants. */
2688 /* ??? This isn't optimal, but is easy to do. */
2689 si_limit -= 2;
2690 }
2691 else
2692 {
2693 /* We dump DF/DI constants before SF/SI ones, because
2694 the limit is the same, but the alignment requirements
2695 are higher. We may waste up to 4 additional bytes
2696 for alignment, and the DF/DI constant may have
2697 another SF/SI constant placed before it. */
2698 if (TARGET_SHCOMPACT
2699 && ! found_di
2700 && (mode == DFmode || mode == DImode))
2701 {
2702 found_di = 1;
2703 si_limit -= 8;
2704 }
2705 while (si_align > 2 && found_si + si_align - 2 > count_si)
2706 si_align >>= 1;
2707 if (found_si > count_si)
2708 count_si = found_si;
2709 found_si += GET_MODE_SIZE (mode);
2710 if (num_mova)
2711 si_limit -= GET_MODE_SIZE (mode);
2712 }
2713
2714 /* See the code in machine_dependent_reorg, which has a similar if
2715 statement that generates a new mova insn in many cases. */
2716 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2717 inc += 2;
2718 }
2719
2720 if (mova_p (from))
2721 {
2722 if (! num_mova++)
2723 {
2724 leading_mova = 0;
2725 mova = from;
2726 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2727 }
2728 if (found_si > count_si)
2729 count_si = found_si;
2730 }
2731 else if (GET_CODE (from) == JUMP_INSN
2732 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2733 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2734 {
2735 if (num_mova)
2736 num_mova--;
2737 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2738 {
2739 /* We have just passed the barrier in front of the
2740 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2741 the ADDR_DIFF_VEC is accessed as data, just like our pool
2742 constants, this is a good opportunity to accommodate what
2743 we have gathered so far.
2744 If we waited any longer, we could end up at a barrier in
2745 front of code, which gives worse cache usage for separated
2746 instruction / data caches. */
2747 good_barrier = found_barrier;
2748 break;
2749 }
2750 else
2751 {
2752 rtx body = PATTERN (from);
2753 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2754 }
2755 }
2756 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2757 else if (GET_CODE (from) == JUMP_INSN
2758 && ! TARGET_SH2
2759 && ! TARGET_SMALLCODE)
2760 new_align = 4;
2761
2762 if (found_si)
2763 {
2764 count_si += inc;
2765 if (new_align > si_align)
2766 {
2767 si_limit -= (count_si - 1) & (new_align - si_align);
2768 si_align = new_align;
2769 }
2770 count_si = (count_si + new_align - 1) & -new_align;
2771 }
2772 if (found_hi)
2773 {
2774 count_hi += inc;
2775 if (new_align > hi_align)
2776 {
2777 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2778 hi_align = new_align;
2779 }
2780 count_hi = (count_hi + new_align - 1) & -new_align;
2781 }
2782 from = NEXT_INSN (from);
2783 }
2784
2785 if (num_mova)
2786 {
2787 if (leading_mova)
2788 {
2789 /* Try as we might, the leading mova is out of range. Change
2790 it into a load (which will become a pcload) and retry. */
2791 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2792 INSN_CODE (mova) = -1;
2793 return find_barrier (0, 0, mova);
2794 }
2795 else
2796 {
2797 /* Insert the constant pool table before the mova instruction,
2798 to prevent the mova label reference from going out of range. */
2799 from = mova;
2800 good_barrier = found_barrier = barrier_before_mova;
2801 }
2802 }
2803
2804 if (found_barrier)
2805 {
2806 if (good_barrier && next_real_insn (found_barrier))
2807 found_barrier = good_barrier;
2808 }
2809 else
2810 {
2811 /* We didn't find a barrier in time to dump our stuff,
2812 so we'll make one. */
2813 rtx label = gen_label_rtx ();
2814
2815 /* If we exceeded the range, then we must back up over the last
2816 instruction we looked at. Otherwise, we just need to undo the
2817 NEXT_INSN at the end of the loop. */
2818 if (count_hi > hi_limit || count_si > si_limit)
2819 from = PREV_INSN (PREV_INSN (from));
2820 else
2821 from = PREV_INSN (from);
2822
2823 /* Walk back to be just before any jump or label.
2824 Putting it before a label reduces the number of times the branch
2825 around the constant pool table will be hit. Putting it before
2826 a jump makes it more likely that the bra delay slot will be
2827 filled. */
2828 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2829 || GET_CODE (from) == CODE_LABEL)
2830 from = PREV_INSN (from);
2831
2832 from = emit_jump_insn_after (gen_jump (label), from);
2833 JUMP_LABEL (from) = label;
2834 LABEL_NUSES (label) = 1;
2835 found_barrier = emit_barrier_after (from);
2836 emit_label_after (label, found_barrier);
2837 }
2838
2839 return found_barrier;
2840 }
2841
2842 /* If the instruction INSN is implemented by a special function, and we can
2843 positively find the register that is used to call the sfunc, and this
2844 register is not used anywhere else in this instruction - except as the
2845 destination of a set, return this register; else, return 0. */
2846 rtx
2847 sfunc_uses_reg (insn)
2848 rtx insn;
2849 {
2850 int i;
2851 rtx pattern, part, reg_part, reg;
2852
2853 if (GET_CODE (insn) != INSN)
2854 return 0;
2855 pattern = PATTERN (insn);
2856 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2857 return 0;
2858
2859 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2860 {
2861 part = XVECEXP (pattern, 0, i);
2862 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2863 reg_part = part;
2864 }
2865 if (! reg_part)
2866 return 0;
2867 reg = XEXP (reg_part, 0);
2868 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2869 {
2870 part = XVECEXP (pattern, 0, i);
2871 if (part == reg_part || GET_CODE (part) == CLOBBER)
2872 continue;
2873 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2874 && GET_CODE (SET_DEST (part)) == REG)
2875 ? SET_SRC (part) : part)))
2876 return 0;
2877 }
2878 return reg;
2879 }
2880
2881 /* See if the only way in which INSN uses REG is by calling it, or by
2882 setting it while calling it. Set *SET to a SET rtx if the register
2883 is set by INSN. */
2884
2885 static int
2886 noncall_uses_reg (reg, insn, set)
2887 rtx reg;
2888 rtx insn;
2889 rtx *set;
2890 {
2891 rtx pattern, reg2;
2892
2893 *set = NULL_RTX;
2894
2895 reg2 = sfunc_uses_reg (insn);
2896 if (reg2 && REGNO (reg2) == REGNO (reg))
2897 {
2898 pattern = single_set (insn);
2899 if (pattern
2900 && GET_CODE (SET_DEST (pattern)) == REG
2901 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2902 *set = pattern;
2903 return 0;
2904 }
2905 if (GET_CODE (insn) != CALL_INSN)
2906 {
2907 /* We don't use rtx_equal_p because we don't care if the mode is
2908 different. */
2909 pattern = single_set (insn);
2910 if (pattern
2911 && GET_CODE (SET_DEST (pattern)) == REG
2912 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2913 {
2914 rtx par, part;
2915 int i;
2916
2917 *set = pattern;
2918 par = PATTERN (insn);
2919 if (GET_CODE (par) == PARALLEL)
2920 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2921 {
2922 part = XVECEXP (par, 0, i);
2923 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2924 return 1;
2925 }
2926 return reg_mentioned_p (reg, SET_SRC (pattern));
2927 }
2928
2929 return 1;
2930 }
2931
2932 pattern = PATTERN (insn);
2933
2934 if (GET_CODE (pattern) == PARALLEL)
2935 {
2936 int i;
2937
2938 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2939 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2940 return 1;
2941 pattern = XVECEXP (pattern, 0, 0);
2942 }
2943
2944 if (GET_CODE (pattern) == SET)
2945 {
2946 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2947 {
2948 /* We don't use rtx_equal_p, because we don't care if the
2949 mode is different. */
2950 if (GET_CODE (SET_DEST (pattern)) != REG
2951 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2952 return 1;
2953
2954 *set = pattern;
2955 }
2956
2957 pattern = SET_SRC (pattern);
2958 }
2959
2960 if (GET_CODE (pattern) != CALL
2961 || GET_CODE (XEXP (pattern, 0)) != MEM
2962 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2963 return 1;
2964
2965 return 0;
2966 }
2967
2968 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2969 general registers. Bits 0..15 mean that the respective registers
2970 are used as inputs in the instruction. Bits 16..31 mean that the
2971 registers 0..15, respectively, are used as outputs, or are clobbered.
2972 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2973 int
2974 regs_used (x, is_dest)
2975 rtx x; int is_dest;
2976 {
2977 enum rtx_code code;
2978 const char *fmt;
2979 int i, used = 0;
2980
2981 if (! x)
2982 return used;
2983 code = GET_CODE (x);
2984 switch (code)
2985 {
2986 case REG:
2987 if (REGNO (x) < 16)
2988 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2989 << (REGNO (x) + is_dest));
2990 return 0;
2991 case SUBREG:
2992 {
2993 rtx y = SUBREG_REG (x);
2994
2995 if (GET_CODE (y) != REG)
2996 break;
2997 if (REGNO (y) < 16)
2998 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2999 << (REGNO (y) +
3000 subreg_regno_offset (REGNO (y),
3001 GET_MODE (y),
3002 SUBREG_BYTE (x),
3003 GET_MODE (x)) + is_dest));
3004 return 0;
3005 }
3006 case SET:
3007 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3008 case RETURN:
3009 /* If there was a return value, it must have been indicated with USE. */
3010 return 0x00ffff00;
3011 case CLOBBER:
3012 is_dest = 1;
3013 break;
3014 case MEM:
3015 is_dest = 0;
3016 break;
3017 case CALL:
3018 used |= 0x00ff00f0;
3019 break;
3020 default:
3021 break;
3022 }
3023
3024 fmt = GET_RTX_FORMAT (code);
3025
3026 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3027 {
3028 if (fmt[i] == 'E')
3029 {
3030 register int j;
3031 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3032 used |= regs_used (XVECEXP (x, i, j), is_dest);
3033 }
3034 else if (fmt[i] == 'e')
3035 used |= regs_used (XEXP (x, i), is_dest);
3036 }
3037 return used;
3038 }
3039
3040 /* Create an instruction that prevents redirection of a conditional branch
3041 to the destination of the JUMP with address ADDR.
3042 If the branch needs to be implemented as an indirect jump, try to find
3043 a scratch register for it.
3044 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3045 If any preceding insn that doesn't fit into a delay slot is good enough,
3046 pass 1. Pass 2 if a definite blocking insn is needed.
3047 -1 is used internally to avoid deep recursion.
3048 If a blocking instruction is made or recognized, return it. */
3049
3050 static rtx
3051 gen_block_redirect (jump, addr, need_block)
3052 rtx jump;
3053 int addr, need_block;
3054 {
3055 int dead = 0;
3056 rtx prev = prev_nonnote_insn (jump);
3057 rtx dest;
3058
3059 /* First, check if we already have an instruction that satisfies our need. */
3060 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3061 {
3062 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3063 return prev;
3064 if (GET_CODE (PATTERN (prev)) == USE
3065 || GET_CODE (PATTERN (prev)) == CLOBBER
3066 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3067 prev = jump;
3068 else if ((need_block &= ~1) < 0)
3069 return prev;
3070 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3071 need_block = 0;
3072 }
3073 /* We can't use JUMP_LABEL here because it might be undefined
3074 when not optimizing. */
3075 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3076 /* If the branch is out of range, try to find a scratch register for it. */
3077 if (optimize
3078 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3079 > 4092 + 4098))
3080 {
3081 rtx scan;
3082 /* Don't look for the stack pointer as a scratch register,
3083 it would cause trouble if an interrupt occurred. */
3084 unsigned try = 0x7fff, used;
3085 int jump_left = flag_expensive_optimizations + 1;
3086
3087 /* It is likely that the most recent eligible instruction is wanted for
3088 the delay slot. Therefore, find out which registers it uses, and
3089 try to avoid using them. */
3090
3091 for (scan = jump; (scan = PREV_INSN (scan)); )
3092 {
3093 enum rtx_code code;
3094
3095 if (INSN_DELETED_P (scan))
3096 continue;
3097 code = GET_CODE (scan);
3098 if (code == CODE_LABEL || code == JUMP_INSN)
3099 break;
3100 if (code == INSN
3101 && GET_CODE (PATTERN (scan)) != USE
3102 && GET_CODE (PATTERN (scan)) != CLOBBER
3103 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3104 {
3105 try &= ~regs_used (PATTERN (scan), 0);
3106 break;
3107 }
3108 }
3109 for (used = dead = 0, scan = JUMP_LABEL (jump);
3110 (scan = NEXT_INSN (scan)); )
3111 {
3112 enum rtx_code code;
3113
3114 if (INSN_DELETED_P (scan))
3115 continue;
3116 code = GET_CODE (scan);
3117 if (GET_RTX_CLASS (code) == 'i')
3118 {
3119 used |= regs_used (PATTERN (scan), 0);
3120 if (code == CALL_INSN)
3121 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3122 dead |= (used >> 16) & ~used;
3123 if (dead & try)
3124 {
3125 dead &= try;
3126 break;
3127 }
3128 if (code == JUMP_INSN)
3129 {
3130 if (jump_left-- && simplejump_p (scan))
3131 scan = JUMP_LABEL (scan);
3132 else
3133 break;
3134 }
3135 }
3136 }
3137 /* Mask out the stack pointer again, in case it was
3138 the only 'free' register we have found. */
3139 dead &= 0x7fff;
3140 }
3141 /* If the immediate destination is still in range, check for possible
3142 threading with a jump beyond the delay slot insn.
3143 Don't check if we are called recursively; the jump has been or will be
3144 checked in a different invocation then. */
3145
3146 else if (optimize && need_block >= 0)
3147 {
3148 rtx next = next_active_insn (next_active_insn (dest));
3149 if (next && GET_CODE (next) == JUMP_INSN
3150 && GET_CODE (PATTERN (next)) == SET
3151 && recog_memoized (next) == CODE_FOR_jump)
3152 {
3153 dest = JUMP_LABEL (next);
3154 if (dest
3155 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3156 > 4092 + 4098))
3157 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3158 }
3159 }
3160
3161 if (dead)
3162 {
3163 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3164
3165 /* It would be nice if we could convert the jump into an indirect
3166 jump / far branch right now, and thus exposing all constituent
3167 instructions to further optimization. However, reorg uses
3168 simplejump_p to determine if there is an unconditional jump where
3169 it should try to schedule instructions from the target of the
3170 branch; simplejump_p fails for indirect jumps even if they have
3171 a JUMP_LABEL. */
3172 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3173 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3174 , jump);
3175 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3176 return insn;
3177 }
3178 else if (need_block)
3179 /* We can't use JUMP_LABEL here because it might be undefined
3180 when not optimizing. */
3181 return emit_insn_before (gen_block_branch_redirect
3182 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3183 , jump);
3184 return prev;
3185 }
3186
3187 #define CONDJUMP_MIN -252
3188 #define CONDJUMP_MAX 262
3189 struct far_branch
3190 {
3191 /* A label (to be placed) in front of the jump
3192 that jumps to our ultimate destination. */
3193 rtx near_label;
3194 /* Where we are going to insert it if we cannot move the jump any farther,
3195 or the jump itself if we have picked up an existing jump. */
3196 rtx insert_place;
3197 /* The ultimate destination. */
3198 rtx far_label;
3199 struct far_branch *prev;
3200 /* If the branch has already been created, its address;
3201 else the address of its first prospective user. */
3202 int address;
3203 };
3204
3205 static void gen_far_branch PARAMS ((struct far_branch *));
3206 enum mdep_reorg_phase_e mdep_reorg_phase;
3207 static void
3208 gen_far_branch (bp)
3209 struct far_branch *bp;
3210 {
3211 rtx insn = bp->insert_place;
3212 rtx jump;
3213 rtx label = gen_label_rtx ();
3214
3215 emit_label_after (label, insn);
3216 if (bp->far_label)
3217 {
3218 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3219 LABEL_NUSES (bp->far_label)++;
3220 }
3221 else
3222 jump = emit_jump_insn_after (gen_return (), insn);
3223 /* Emit a barrier so that reorg knows that any following instructions
3224 are not reachable via a fall-through path.
3225 But don't do this when not optimizing, since we wouldn't supress the
3226 alignment for the barrier then, and could end up with out-of-range
3227 pc-relative loads. */
3228 if (optimize)
3229 emit_barrier_after (jump);
3230 emit_label_after (bp->near_label, insn);
3231 JUMP_LABEL (jump) = bp->far_label;
3232 if (! invert_jump (insn, label, 1))
3233 abort ();
3234 (emit_insn_after
3235 (gen_stuff_delay_slot
3236 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3237 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3238 insn));
3239 /* Prevent reorg from undoing our splits. */
3240 gen_block_redirect (jump, bp->address += 2, 2);
3241 }
3242
3243 /* Fix up ADDR_DIFF_VECs. */
3244 void
3245 fixup_addr_diff_vecs (first)
3246 rtx first;
3247 {
3248 rtx insn;
3249
3250 for (insn = first; insn; insn = NEXT_INSN (insn))
3251 {
3252 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3253
3254 if (GET_CODE (insn) != JUMP_INSN
3255 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3256 continue;
3257 pat = PATTERN (insn);
3258 vec_lab = XEXP (XEXP (pat, 0), 0);
3259
3260 /* Search the matching casesi_jump_2. */
3261 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3262 {
3263 if (GET_CODE (prev) != JUMP_INSN)
3264 continue;
3265 prevpat = PATTERN (prev);
3266 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3267 continue;
3268 x = XVECEXP (prevpat, 0, 1);
3269 if (GET_CODE (x) != USE)
3270 continue;
3271 x = XEXP (x, 0);
3272 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3273 break;
3274 }
3275
3276 /* Emit the reference label of the braf where it belongs, right after
3277 the casesi_jump_2 (i.e. braf). */
3278 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3279 emit_label_after (braf_label, prev);
3280
3281 /* Fix up the ADDR_DIF_VEC to be relative
3282 to the reference address of the braf. */
3283 XEXP (XEXP (pat, 0), 0) = braf_label;
3284 }
3285 }
3286
3287 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3288 a barrier. Return the base 2 logarithm of the desired alignment. */
3289 int
3290 barrier_align (barrier_or_label)
3291 rtx barrier_or_label;
3292 {
3293 rtx next = next_real_insn (barrier_or_label), pat, prev;
3294 int slot, credit, jump_to_next;
3295
3296 if (! next)
3297 return 0;
3298
3299 pat = PATTERN (next);
3300
3301 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3302 return 2;
3303
3304 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3305 /* This is a barrier in front of a constant table. */
3306 return 0;
3307
3308 prev = prev_real_insn (barrier_or_label);
3309 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3310 {
3311 pat = PATTERN (prev);
3312 /* If this is a very small table, we want to keep the alignment after
3313 the table to the minimum for proper code alignment. */
3314 return ((TARGET_SMALLCODE
3315 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3316 <= (unsigned)1 << (CACHE_LOG - 2)))
3317 ? 1 << TARGET_SHMEDIA : CACHE_LOG);
3318 }
3319
3320 if (TARGET_SMALLCODE)
3321 return 0;
3322
3323 if (! TARGET_SH2 || ! optimize)
3324 return CACHE_LOG;
3325
3326 /* When fixing up pcloads, a constant table might be inserted just before
3327 the basic block that ends with the barrier. Thus, we can't trust the
3328 instruction lengths before that. */
3329 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3330 {
3331 /* Check if there is an immediately preceding branch to the insn beyond
3332 the barrier. We must weight the cost of discarding useful information
3333 from the current cache line when executing this branch and there is
3334 an alignment, against that of fetching unneeded insn in front of the
3335 branch target when there is no alignment. */
3336
3337 /* There are two delay_slot cases to consider. One is the simple case
3338 where the preceding branch is to the insn beyond the barrier (simple
3339 delay slot filling), and the other is where the preceding branch has
3340 a delay slot that is a duplicate of the insn after the barrier
3341 (fill_eager_delay_slots) and the branch is to the insn after the insn
3342 after the barrier. */
3343
3344 /* PREV is presumed to be the JUMP_INSN for the barrier under
3345 investigation. Skip to the insn before it. */
3346 prev = prev_real_insn (prev);
3347
3348 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3349 credit >= 0 && prev && GET_CODE (prev) == INSN;
3350 prev = prev_real_insn (prev))
3351 {
3352 jump_to_next = 0;
3353 if (GET_CODE (PATTERN (prev)) == USE
3354 || GET_CODE (PATTERN (prev)) == CLOBBER)
3355 continue;
3356 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3357 {
3358 prev = XVECEXP (PATTERN (prev), 0, 1);
3359 if (INSN_UID (prev) == INSN_UID (next))
3360 {
3361 /* Delay slot was filled with insn at jump target. */
3362 jump_to_next = 1;
3363 continue;
3364 }
3365 }
3366
3367 if (slot &&
3368 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3369 slot = 0;
3370 credit -= get_attr_length (prev);
3371 }
3372 if (prev
3373 && GET_CODE (prev) == JUMP_INSN
3374 && JUMP_LABEL (prev))
3375 {
3376 rtx x;
3377 if (jump_to_next
3378 || next_real_insn (JUMP_LABEL (prev)) == next
3379 /* If relax_delay_slots() decides NEXT was redundant
3380 with some previous instruction, it will have
3381 redirected PREV's jump to the following insn. */
3382 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3383 /* There is no upper bound on redundant instructions
3384 that might have been skipped, but we must not put an
3385 alignment where none had been before. */
3386 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3387 (INSN_P (x)
3388 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3389 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3390 {
3391 rtx pat = PATTERN (prev);
3392 if (GET_CODE (pat) == PARALLEL)
3393 pat = XVECEXP (pat, 0, 0);
3394 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3395 return 0;
3396 }
3397 }
3398 }
3399
3400 return CACHE_LOG;
3401 }
3402
3403 /* If we are inside a phony loop, almost any kind of label can turn up as the
3404 first one in the loop. Aligning a braf label causes incorrect switch
3405 destination addresses; we can detect braf labels because they are
3406 followed by a BARRIER.
3407 Applying loop alignment to small constant or switch tables is a waste
3408 of space, so we suppress this too. */
3409 int
3410 sh_loop_align (label)
3411 rtx label;
3412 {
3413 rtx next = label;
3414
3415 do
3416 next = next_nonnote_insn (next);
3417 while (next && GET_CODE (next) == CODE_LABEL);
3418
3419 if (! next
3420 || ! INSN_P (next)
3421 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3422 || recog_memoized (next) == CODE_FOR_consttable_2)
3423 return 0;
3424
3425 if (TARGET_SH5)
3426 return 3;
3427
3428 return 2;
3429 }
3430
3431 /* Exported to toplev.c.
3432
3433 Do a final pass over the function, just before delayed branch
3434 scheduling. */
3435
3436 void
3437 machine_dependent_reorg (first)
3438 rtx first;
3439 {
3440 rtx insn, mova;
3441 int num_mova;
3442 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3443 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3444
3445 /* We must split call insns before introducing `mova's. If we're
3446 optimizing, they'll have already been split. Otherwise, make
3447 sure we don't split them too late. */
3448 if (! optimize)
3449 split_all_insns_noflow ();
3450
3451 if (TARGET_SHMEDIA)
3452 return;
3453
3454 /* If relaxing, generate pseudo-ops to associate function calls with
3455 the symbols they call. It does no harm to not generate these
3456 pseudo-ops. However, when we can generate them, it enables to
3457 linker to potentially relax the jsr to a bsr, and eliminate the
3458 register load and, possibly, the constant pool entry. */
3459
3460 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3461 if (TARGET_RELAX)
3462 {
3463 /* Remove all REG_LABEL notes. We want to use them for our own
3464 purposes. This works because none of the remaining passes
3465 need to look at them.
3466
3467 ??? But it may break in the future. We should use a machine
3468 dependent REG_NOTE, or some other approach entirely. */
3469 for (insn = first; insn; insn = NEXT_INSN (insn))
3470 {
3471 if (INSN_P (insn))
3472 {
3473 rtx note;
3474
3475 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3476 remove_note (insn, note);
3477 }
3478 }
3479
3480 for (insn = first; insn; insn = NEXT_INSN (insn))
3481 {
3482 rtx pattern, reg, link, set, scan, dies, label;
3483 int rescan = 0, foundinsn = 0;
3484
3485 if (GET_CODE (insn) == CALL_INSN)
3486 {
3487 pattern = PATTERN (insn);
3488
3489 if (GET_CODE (pattern) == PARALLEL)
3490 pattern = XVECEXP (pattern, 0, 0);
3491 if (GET_CODE (pattern) == SET)
3492 pattern = SET_SRC (pattern);
3493
3494 if (GET_CODE (pattern) != CALL
3495 || GET_CODE (XEXP (pattern, 0)) != MEM)
3496 continue;
3497
3498 reg = XEXP (XEXP (pattern, 0), 0);
3499 }
3500 else
3501 {
3502 reg = sfunc_uses_reg (insn);
3503 if (! reg)
3504 continue;
3505 }
3506
3507 if (GET_CODE (reg) != REG)
3508 continue;
3509
3510 /* This is a function call via REG. If the only uses of REG
3511 between the time that it is set and the time that it dies
3512 are in function calls, then we can associate all the
3513 function calls with the setting of REG. */
3514
3515 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3516 {
3517 if (REG_NOTE_KIND (link) != 0)
3518 continue;
3519 set = single_set (XEXP (link, 0));
3520 if (set && rtx_equal_p (reg, SET_DEST (set)))
3521 {
3522 link = XEXP (link, 0);
3523 break;
3524 }
3525 }
3526
3527 if (! link)
3528 {
3529 /* ??? Sometimes global register allocation will have
3530 deleted the insn pointed to by LOG_LINKS. Try
3531 scanning backward to find where the register is set. */
3532 for (scan = PREV_INSN (insn);
3533 scan && GET_CODE (scan) != CODE_LABEL;
3534 scan = PREV_INSN (scan))
3535 {
3536 if (! INSN_P (scan))
3537 continue;
3538
3539 if (! reg_mentioned_p (reg, scan))
3540 continue;
3541
3542 if (noncall_uses_reg (reg, scan, &set))
3543 break;
3544
3545 if (set)
3546 {
3547 link = scan;
3548 break;
3549 }
3550 }
3551 }
3552
3553 if (! link)
3554 continue;
3555
3556 /* The register is set at LINK. */
3557
3558 /* We can only optimize the function call if the register is
3559 being set to a symbol. In theory, we could sometimes
3560 optimize calls to a constant location, but the assembler
3561 and linker do not support that at present. */
3562 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3563 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3564 continue;
3565
3566 /* Scan forward from LINK to the place where REG dies, and
3567 make sure that the only insns which use REG are
3568 themselves function calls. */
3569
3570 /* ??? This doesn't work for call targets that were allocated
3571 by reload, since there may not be a REG_DEAD note for the
3572 register. */
3573
3574 dies = NULL_RTX;
3575 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3576 {
3577 rtx scanset;
3578
3579 /* Don't try to trace forward past a CODE_LABEL if we haven't
3580 seen INSN yet. Ordinarily, we will only find the setting insn
3581 in LOG_LINKS if it is in the same basic block. However,
3582 cross-jumping can insert code labels in between the load and
3583 the call, and can result in situations where a single call
3584 insn may have two targets depending on where we came from. */
3585
3586 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3587 break;
3588
3589 if (! INSN_P (scan))
3590 continue;
3591
3592 /* Don't try to trace forward past a JUMP. To optimize
3593 safely, we would have to check that all the
3594 instructions at the jump destination did not use REG. */
3595
3596 if (GET_CODE (scan) == JUMP_INSN)
3597 break;
3598
3599 if (! reg_mentioned_p (reg, scan))
3600 continue;
3601
3602 if (noncall_uses_reg (reg, scan, &scanset))
3603 break;
3604
3605 if (scan == insn)
3606 foundinsn = 1;
3607
3608 if (scan != insn
3609 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3610 {
3611 /* There is a function call to this register other
3612 than the one we are checking. If we optimize
3613 this call, we need to rescan again below. */
3614 rescan = 1;
3615 }
3616
3617 /* ??? We shouldn't have to worry about SCANSET here.
3618 We should just be able to check for a REG_DEAD note
3619 on a function call. However, the REG_DEAD notes are
3620 apparently not dependable around libcalls; c-torture
3621 execute/920501-2 is a test case. If SCANSET is set,
3622 then this insn sets the register, so it must have
3623 died earlier. Unfortunately, this will only handle
3624 the cases in which the register is, in fact, set in a
3625 later insn. */
3626
3627 /* ??? We shouldn't have to use FOUNDINSN here.
3628 However, the LOG_LINKS fields are apparently not
3629 entirely reliable around libcalls;
3630 newlib/libm/math/e_pow.c is a test case. Sometimes
3631 an insn will appear in LOG_LINKS even though it is
3632 not the most recent insn which sets the register. */
3633
3634 if (foundinsn
3635 && (scanset
3636 || find_reg_note (scan, REG_DEAD, reg)))
3637 {
3638 dies = scan;
3639 break;
3640 }
3641 }
3642
3643 if (! dies)
3644 {
3645 /* Either there was a branch, or some insn used REG
3646 other than as a function call address. */
3647 continue;
3648 }
3649
3650 /* Create a code label, and put it in a REG_LABEL note on
3651 the insn which sets the register, and on each call insn
3652 which uses the register. In final_prescan_insn we look
3653 for the REG_LABEL notes, and output the appropriate label
3654 or pseudo-op. */
3655
3656 label = gen_label_rtx ();
3657 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3658 REG_NOTES (link));
3659 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3660 REG_NOTES (insn));
3661 if (rescan)
3662 {
3663 scan = link;
3664 do
3665 {
3666 rtx reg2;
3667
3668 scan = NEXT_INSN (scan);
3669 if (scan != insn
3670 && ((GET_CODE (scan) == CALL_INSN
3671 && reg_mentioned_p (reg, scan))
3672 || ((reg2 = sfunc_uses_reg (scan))
3673 && REGNO (reg2) == REGNO (reg))))
3674 REG_NOTES (scan)
3675 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3676 }
3677 while (scan != dies);
3678 }
3679 }
3680 }
3681
3682 if (TARGET_SH2)
3683 fixup_addr_diff_vecs (first);
3684
3685 if (optimize)
3686 {
3687 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3688 shorten_branches (first);
3689 }
3690 /* Scan the function looking for move instructions which have to be
3691 changed to pc-relative loads and insert the literal tables. */
3692
3693 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3694 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3695 {
3696 if (mova_p (insn))
3697 {
3698 if (! num_mova++)
3699 mova = insn;
3700 }
3701 else if (GET_CODE (insn) == JUMP_INSN
3702 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3703 && num_mova)
3704 {
3705 rtx scan;
3706 int total;
3707
3708 num_mova--;
3709
3710 /* Some code might have been inserted between the mova and
3711 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3712 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3713 total += get_attr_length (scan);
3714
3715 /* range of mova is 1020, add 4 because pc counts from address of
3716 second instruction after this one, subtract 2 in case pc is 2
3717 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3718 cancels out with alignment effects of the mova itself. */
3719 if (total > 1022)
3720 {
3721 /* Change the mova into a load, and restart scanning
3722 there. broken_move will then return true for mova. */
3723 SET_SRC (PATTERN (mova))
3724 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3725 INSN_CODE (mova) = -1;
3726 insn = mova;
3727 }
3728 }
3729 if (broken_move (insn))
3730 {
3731 rtx scan;
3732 /* Scan ahead looking for a barrier to stick the constant table
3733 behind. */
3734 rtx barrier = find_barrier (num_mova, mova, insn);
3735 rtx last_float_move, last_float = 0, *last_float_addr;
3736
3737 if (num_mova && ! mova_p (mova))
3738 {
3739 /* find_barrier had to change the first mova into a
3740 pcload; thus, we have to start with this new pcload. */
3741 insn = mova;
3742 num_mova = 0;
3743 }
3744 /* Now find all the moves between the points and modify them. */
3745 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3746 {
3747 if (GET_CODE (scan) == CODE_LABEL)
3748 last_float = 0;
3749 if (broken_move (scan))
3750 {
3751 rtx *patp = &PATTERN (scan), pat = *patp;
3752 rtx src, dst;
3753 rtx lab;
3754 rtx newsrc;
3755 enum machine_mode mode;
3756
3757 if (GET_CODE (pat) == PARALLEL)
3758 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3759 src = SET_SRC (pat);
3760 dst = SET_DEST (pat);
3761 mode = GET_MODE (dst);
3762
3763 if (mode == SImode && hi_const (src)
3764 && REGNO (dst) != FPUL_REG)
3765 {
3766 int offset = 0;
3767
3768 mode = HImode;
3769 while (GET_CODE (dst) == SUBREG)
3770 {
3771 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3772 GET_MODE (SUBREG_REG (dst)),
3773 SUBREG_BYTE (dst),
3774 GET_MODE (dst));
3775 dst = SUBREG_REG (dst);
3776 }
3777 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3778 }
3779
3780 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3781 {
3782 /* This must be an insn that clobbers r0. */
3783 rtx clobber = XVECEXP (PATTERN (scan), 0,
3784 XVECLEN (PATTERN (scan), 0) - 1);
3785
3786 if (GET_CODE (clobber) != CLOBBER
3787 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3788 abort ();
3789
3790 if (last_float
3791 && reg_set_between_p (r0_rtx, last_float_move, scan))
3792 last_float = 0;
3793 if (last_float
3794 && TARGET_SHCOMPACT
3795 && GET_MODE_SIZE (mode) != 4
3796 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
3797 last_float = 0;
3798 lab = add_constant (src, mode, last_float);
3799 if (lab)
3800 emit_insn_before (gen_mova (lab), scan);
3801 else
3802 {
3803 /* There will be a REG_UNUSED note for r0 on
3804 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3805 lest reorg:mark_target_live_regs will not
3806 consider r0 to be used, and we end up with delay
3807 slot insn in front of SCAN that clobbers r0. */
3808 rtx note
3809 = find_regno_note (last_float_move, REG_UNUSED, 0);
3810
3811 /* If we are not optimizing, then there may not be
3812 a note. */
3813 if (note)
3814 PUT_MODE (note, REG_INC);
3815
3816 *last_float_addr = r0_inc_rtx;
3817 }
3818 last_float_move = scan;
3819 last_float = src;
3820 newsrc = gen_rtx (MEM, mode,
3821 (((TARGET_SH4 && ! TARGET_FMOVD)
3822 || REGNO (dst) == FPUL_REG)
3823 ? r0_inc_rtx
3824 : r0_rtx));
3825 last_float_addr = &XEXP (newsrc, 0);
3826
3827 /* Remove the clobber of r0. */
3828 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3829 RTX_UNCHANGING_P (newsrc) = 1;
3830 }
3831 /* This is a mova needing a label. Create it. */
3832 else if (GET_CODE (src) == UNSPEC
3833 && XINT (src, 1) == UNSPEC_MOVA
3834 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3835 {
3836 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3837 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3838 newsrc = gen_rtx_UNSPEC (SImode,
3839 gen_rtvec (1, newsrc),
3840 UNSPEC_MOVA);
3841 }
3842 else
3843 {
3844 lab = add_constant (src, mode, 0);
3845 newsrc = gen_rtx_MEM (mode,
3846 gen_rtx_LABEL_REF (VOIDmode, lab));
3847 RTX_UNCHANGING_P (newsrc) = 1;
3848 }
3849 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3850 INSN_CODE (scan) = -1;
3851 }
3852 }
3853 dump_table (barrier);
3854 insn = barrier;
3855 }
3856 }
3857
3858 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3859 INSN_ADDRESSES_FREE ();
3860 split_branches (first);
3861
3862 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3863 also has an effect on the register that holds the address of the sfunc.
3864 Insert an extra dummy insn in front of each sfunc that pretends to
3865 use this register. */
3866 if (flag_delayed_branch)
3867 {
3868 for (insn = first; insn; insn = NEXT_INSN (insn))
3869 {
3870 rtx reg = sfunc_uses_reg (insn);
3871
3872 if (! reg)
3873 continue;
3874 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3875 }
3876 }
3877 #if 0
3878 /* fpscr is not actually a user variable, but we pretend it is for the
3879 sake of the previous optimization passes, since we want it handled like
3880 one. However, we don't have any debugging information for it, so turn
3881 it into a non-user variable now. */
3882 if (TARGET_SH4)
3883 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3884 #endif
3885 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3886 }
3887
3888 int
3889 get_dest_uid (label, max_uid)
3890 rtx label;
3891 int max_uid;
3892 {
3893 rtx dest = next_real_insn (label);
3894 int dest_uid;
3895 if (! dest)
3896 /* This can happen for an undefined label. */
3897 return 0;
3898 dest_uid = INSN_UID (dest);
3899 /* If this is a newly created branch redirection blocking instruction,
3900 we cannot index the branch_uid or insn_addresses arrays with its
3901 uid. But then, we won't need to, because the actual destination is
3902 the following branch. */
3903 while (dest_uid >= max_uid)
3904 {
3905 dest = NEXT_INSN (dest);
3906 dest_uid = INSN_UID (dest);
3907 }
3908 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3909 return 0;
3910 return dest_uid;
3911 }
3912
3913 /* Split condbranches that are out of range. Also add clobbers for
3914 scratch registers that are needed in far jumps.
3915 We do this before delay slot scheduling, so that it can take our
3916 newly created instructions into account. It also allows us to
3917 find branches with common targets more easily. */
3918
3919 static void
3920 split_branches (first)
3921 rtx first;
3922 {
3923 rtx insn;
3924 struct far_branch **uid_branch, *far_branch_list = 0;
3925 int max_uid = get_max_uid ();
3926
3927 /* Find out which branches are out of range. */
3928 shorten_branches (first);
3929
3930 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3931 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3932
3933 for (insn = first; insn; insn = NEXT_INSN (insn))
3934 if (! INSN_P (insn))
3935 continue;
3936 else if (INSN_DELETED_P (insn))
3937 {
3938 /* Shorten_branches would split this instruction again,
3939 so transform it into a note. */
3940 PUT_CODE (insn, NOTE);
3941 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3942 NOTE_SOURCE_FILE (insn) = 0;
3943 }
3944 else if (GET_CODE (insn) == JUMP_INSN
3945 /* Don't mess with ADDR_DIFF_VEC */
3946 && (GET_CODE (PATTERN (insn)) == SET
3947 || GET_CODE (PATTERN (insn)) == RETURN))
3948 {
3949 enum attr_type type = get_attr_type (insn);
3950 if (type == TYPE_CBRANCH)
3951 {
3952 rtx next, beyond;
3953
3954 if (get_attr_length (insn) > 4)
3955 {
3956 rtx src = SET_SRC (PATTERN (insn));
3957 rtx olabel = XEXP (XEXP (src, 1), 0);
3958 int addr = INSN_ADDRESSES (INSN_UID (insn));
3959 rtx label = 0;
3960 int dest_uid = get_dest_uid (olabel, max_uid);
3961 struct far_branch *bp = uid_branch[dest_uid];
3962
3963 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3964 the label if the LABEL_NUSES count drops to zero. There is
3965 always a jump_optimize pass that sets these values, but it
3966 proceeds to delete unreferenced code, and then if not
3967 optimizing, to un-delete the deleted instructions, thus
3968 leaving labels with too low uses counts. */
3969 if (! optimize)
3970 {
3971 JUMP_LABEL (insn) = olabel;
3972 LABEL_NUSES (olabel)++;
3973 }
3974 if (! bp)
3975 {
3976 bp = (struct far_branch *) alloca (sizeof *bp);
3977 uid_branch[dest_uid] = bp;
3978 bp->prev = far_branch_list;
3979 far_branch_list = bp;
3980 bp->far_label
3981 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3982 LABEL_NUSES (bp->far_label)++;
3983 }
3984 else
3985 {
3986 label = bp->near_label;
3987 if (! label && bp->address - addr >= CONDJUMP_MIN)
3988 {
3989 rtx block = bp->insert_place;
3990
3991 if (GET_CODE (PATTERN (block)) == RETURN)
3992 block = PREV_INSN (block);
3993 else
3994 block = gen_block_redirect (block,
3995 bp->address, 2);
3996 label = emit_label_after (gen_label_rtx (),
3997 PREV_INSN (block));
3998 bp->near_label = label;
3999 }
4000 else if (label && ! NEXT_INSN (label))
4001 {
4002 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4003 bp->insert_place = insn;
4004 else
4005 gen_far_branch (bp);
4006 }
4007 }
4008 if (! label
4009 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4010 {
4011 bp->near_label = label = gen_label_rtx ();
4012 bp->insert_place = insn;
4013 bp->address = addr;
4014 }
4015 if (! redirect_jump (insn, label, 1))
4016 abort ();
4017 }
4018 else
4019 {
4020 /* get_attr_length (insn) == 2 */
4021 /* Check if we have a pattern where reorg wants to redirect
4022 the branch to a label from an unconditional branch that
4023 is too far away. */
4024 /* We can't use JUMP_LABEL here because it might be undefined
4025 when not optimizing. */
4026 /* A syntax error might cause beyond to be NULL_RTX. */
4027 beyond
4028 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4029 0));
4030
4031 if (beyond
4032 && (GET_CODE (beyond) == JUMP_INSN
4033 || ((beyond = next_active_insn (beyond))
4034 && GET_CODE (beyond) == JUMP_INSN))
4035 && GET_CODE (PATTERN (beyond)) == SET
4036 && recog_memoized (beyond) == CODE_FOR_jump
4037 && ((INSN_ADDRESSES
4038 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4039 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4040 > 252 + 258 + 2))
4041 gen_block_redirect (beyond,
4042 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4043 }
4044
4045 next = next_active_insn (insn);
4046
4047 if ((GET_CODE (next) == JUMP_INSN
4048 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4049 && GET_CODE (PATTERN (next)) == SET
4050 && recog_memoized (next) == CODE_FOR_jump
4051 && ((INSN_ADDRESSES
4052 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4053 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4054 > 252 + 258 + 2))
4055 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4056 }
4057 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4058 {
4059 int addr = INSN_ADDRESSES (INSN_UID (insn));
4060 rtx far_label = 0;
4061 int dest_uid = 0;
4062 struct far_branch *bp;
4063
4064 if (type == TYPE_JUMP)
4065 {
4066 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4067 dest_uid = get_dest_uid (far_label, max_uid);
4068 if (! dest_uid)
4069 {
4070 /* Parse errors can lead to labels outside
4071 the insn stream. */
4072 if (! NEXT_INSN (far_label))
4073 continue;
4074
4075 if (! optimize)
4076 {
4077 JUMP_LABEL (insn) = far_label;
4078 LABEL_NUSES (far_label)++;
4079 }
4080 redirect_jump (insn, NULL_RTX, 1);
4081 far_label = 0;
4082 }
4083 }
4084 bp = uid_branch[dest_uid];
4085 if (! bp)
4086 {
4087 bp = (struct far_branch *) alloca (sizeof *bp);
4088 uid_branch[dest_uid] = bp;
4089 bp->prev = far_branch_list;
4090 far_branch_list = bp;
4091 bp->near_label = 0;
4092 bp->far_label = far_label;
4093 if (far_label)
4094 LABEL_NUSES (far_label)++;
4095 }
4096 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4097 if (addr - bp->address <= CONDJUMP_MAX)
4098 emit_label_after (bp->near_label, PREV_INSN (insn));
4099 else
4100 {
4101 gen_far_branch (bp);
4102 bp->near_label = 0;
4103 }
4104 else
4105 bp->near_label = 0;
4106 bp->address = addr;
4107 bp->insert_place = insn;
4108 if (! far_label)
4109 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4110 else
4111 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4112 }
4113 }
4114 /* Generate all pending far branches,
4115 and free our references to the far labels. */
4116 while (far_branch_list)
4117 {
4118 if (far_branch_list->near_label
4119 && ! NEXT_INSN (far_branch_list->near_label))
4120 gen_far_branch (far_branch_list);
4121 if (optimize
4122 && far_branch_list->far_label
4123 && ! --LABEL_NUSES (far_branch_list->far_label))
4124 delete_insn (far_branch_list->far_label);
4125 far_branch_list = far_branch_list->prev;
4126 }
4127
4128 /* Instruction length information is no longer valid due to the new
4129 instructions that have been generated. */
4130 init_insn_lengths ();
4131 }
4132
4133 /* Dump out instruction addresses, which is useful for debugging the
4134 constant pool table stuff.
4135
4136 If relaxing, output the label and pseudo-ops used to link together
4137 calls and the instruction which set the registers. */
4138
4139 /* ??? This is unnecessary, and probably should be deleted. This makes
4140 the insn_addresses declaration above unnecessary. */
4141
4142 /* ??? The addresses printed by this routine for insns are nonsense for
4143 insns which are inside of a sequence where none of the inner insns have
4144 variable length. This is because the second pass of shorten_branches
4145 does not bother to update them. */
4146
4147 void
4148 final_prescan_insn (insn, opvec, noperands)
4149 rtx insn;
4150 rtx *opvec ATTRIBUTE_UNUSED;
4151 int noperands ATTRIBUTE_UNUSED;
4152 {
4153 if (TARGET_DUMPISIZE)
4154 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4155
4156 if (TARGET_RELAX)
4157 {
4158 rtx note;
4159
4160 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4161 if (note)
4162 {
4163 rtx pattern;
4164
4165 pattern = PATTERN (insn);
4166 if (GET_CODE (pattern) == PARALLEL)
4167 pattern = XVECEXP (pattern, 0, 0);
4168 if (GET_CODE (pattern) == CALL
4169 || (GET_CODE (pattern) == SET
4170 && (GET_CODE (SET_SRC (pattern)) == CALL
4171 || get_attr_type (insn) == TYPE_SFUNC)))
4172 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4173 CODE_LABEL_NUMBER (XEXP (note, 0)));
4174 else if (GET_CODE (pattern) == SET)
4175 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4176 CODE_LABEL_NUMBER (XEXP (note, 0)));
4177 else
4178 abort ();
4179 }
4180 }
4181 }
4182
4183 /* Dump out any constants accumulated in the final pass. These will
4184 only be labels. */
4185
4186 const char *
4187 output_jump_label_table ()
4188 {
4189 int i;
4190
4191 if (pool_size)
4192 {
4193 fprintf (asm_out_file, "\t.align 2\n");
4194 for (i = 0; i < pool_size; i++)
4195 {
4196 pool_node *p = &pool_vector[i];
4197
4198 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4199 CODE_LABEL_NUMBER (p->label));
4200 output_asm_insn (".long %O0", &p->value);
4201 }
4202 pool_size = 0;
4203 }
4204
4205 return "";
4206 }
4207 \f
4208 /* A full frame looks like:
4209
4210 arg-5
4211 arg-4
4212 [ if current_function_anonymous_args
4213 arg-3
4214 arg-2
4215 arg-1
4216 arg-0 ]
4217 saved-fp
4218 saved-r10
4219 saved-r11
4220 saved-r12
4221 saved-pr
4222 local-n
4223 ..
4224 local-1
4225 local-0 <- fp points here. */
4226
4227 /* Number of bytes pushed for anonymous args, used to pass information
4228 between expand_prologue and expand_epilogue. */
4229
4230 static int extra_push;
4231
4232 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4233 to be adjusted, and TEMP, if nonnegative, holds the register number
4234 of a general register that we may clobber. */
4235
4236 static void
4237 output_stack_adjust (size, reg, temp, emit_fn)
4238 int size;
4239 rtx reg;
4240 int temp;
4241 rtx (*emit_fn) PARAMS ((rtx));
4242 {
4243 if (size)
4244 {
4245 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4246
4247 if (size % align)
4248 abort ();
4249
4250 if (CONST_OK_FOR_ADD (size))
4251 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4252 /* Try to do it with two partial adjustments; however, we must make
4253 sure that the stack is properly aligned at all times, in case
4254 an interrupt occurs between the two partial adjustments. */
4255 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4256 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4257 {
4258 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4259 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4260 }
4261 else
4262 {
4263 rtx const_reg;
4264 rtx insn;
4265
4266 /* If TEMP is invalid, we could temporarily save a general
4267 register to MACL. However, there is currently no need
4268 to handle this case, so just abort when we see it. */
4269 if (temp < 0)
4270 abort ();
4271 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4272
4273 /* If SIZE is negative, subtract the positive value.
4274 This sometimes allows a constant pool entry to be shared
4275 between prologue and epilogue code. */
4276 if (size < 0)
4277 {
4278 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4279 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4280 }
4281 else
4282 {
4283 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4284 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4285 }
4286 if (emit_fn == frame_insn)
4287 REG_NOTES (insn)
4288 = (gen_rtx_EXPR_LIST
4289 (REG_FRAME_RELATED_EXPR,
4290 gen_rtx_SET (VOIDmode, reg,
4291 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4292 REG_NOTES (insn)));
4293 }
4294 }
4295 }
4296
4297 static rtx
4298 frame_insn (x)
4299 rtx x;
4300 {
4301 x = emit_insn (x);
4302 RTX_FRAME_RELATED_P (x) = 1;
4303 return x;
4304 }
4305
4306 /* Output RTL to push register RN onto the stack. */
4307
4308 static rtx
4309 push (rn)
4310 int rn;
4311 {
4312 rtx x;
4313 if (rn == FPUL_REG)
4314 x = gen_push_fpul ();
4315 else if (rn == FPSCR_REG)
4316 x = gen_push_fpscr ();
4317 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4318 && FP_OR_XD_REGISTER_P (rn))
4319 {
4320 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4321 return NULL_RTX;
4322 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4323 }
4324 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4325 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4326 else
4327 x = gen_push (gen_rtx_REG (SImode, rn));
4328
4329 x = frame_insn (x);
4330 REG_NOTES (x)
4331 = gen_rtx_EXPR_LIST (REG_INC,
4332 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4333 return x;
4334 }
4335
4336 /* Output RTL to pop register RN from the stack. */
4337
4338 static void
4339 pop (rn)
4340 int rn;
4341 {
4342 rtx x;
4343 if (rn == FPUL_REG)
4344 x = gen_pop_fpul ();
4345 else if (rn == FPSCR_REG)
4346 x = gen_pop_fpscr ();
4347 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4348 && FP_OR_XD_REGISTER_P (rn))
4349 {
4350 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4351 return;
4352 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4353 }
4354 else if (TARGET_SH3E && FP_REGISTER_P (rn))
4355 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4356 else
4357 x = gen_pop (gen_rtx_REG (SImode, rn));
4358
4359 x = emit_insn (x);
4360 REG_NOTES (x)
4361 = gen_rtx_EXPR_LIST (REG_INC,
4362 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4363 }
4364
4365 /* Generate code to push the regs specified in the mask. */
4366
4367 static void
4368 push_regs (mask)
4369 HOST_WIDE_INT *mask;
4370 {
4371 int i;
4372
4373 /* Push PR last; this gives better latencies after the prologue, and
4374 candidates for the return delay slot when there are no general
4375 registers pushed. */
4376 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4377 if (i != PR_REG && mask[i / 32] & (1 << (i % 32)))
4378 push (i);
4379 if (mask[PR_REG / 32] & (1 << (PR_REG % 32)))
4380 push (PR_REG);
4381 }
4382
4383 /* Work out the registers which need to be saved, both as a mask and a
4384 count of saved words.
4385
4386 If doing a pragma interrupt function, then push all regs used by the
4387 function, and if we call another function (we can tell by looking at PR),
4388 make sure that all the regs it clobbers are safe too. */
4389
4390 static void
4391 calc_live_regs (count_ptr, live_regs_mask)
4392 int *count_ptr;
4393 HOST_WIDE_INT *live_regs_mask;
4394 {
4395 int reg;
4396 int count;
4397 int interrupt_handler;
4398 int pr_live;
4399
4400 interrupt_handler = sh_cfun_interrupt_handler_p ();
4401
4402 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4403 live_regs_mask[count] = 0;
4404 /* If we can save a lot of saves by switching to double mode, do that. */
4405 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4406 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4407 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4408 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4409 && ++count > 2)
4410 {
4411 target_flags &= ~FPU_SINGLE_BIT;
4412 break;
4413 }
4414 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4415 knows how to use it. That means the pseudo originally allocated for
4416 the initial value can become the PR_MEDIA_REG hard register, as seen for
4417 execute/20010122-1.c:test9. */
4418 if (TARGET_SHMEDIA)
4419 pr_live = regs_ever_live[PR_MEDIA_REG];
4420 else
4421 {
4422 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4423 pr_live = (pr_initial
4424 ? REGNO (pr_initial) != (PR_REG) : regs_ever_live[PR_REG]);
4425 }
4426 /* Force PR to be live if the prologue has to call the SHmedia
4427 argument decoder or register saver. */
4428 if (TARGET_SHCOMPACT
4429 && ((current_function_args_info.call_cookie
4430 & ~ CALL_COOKIE_RET_TRAMP (1))
4431 || current_function_has_nonlocal_label))
4432 pr_live = 1;
4433 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4434 {
4435 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4436 ? pr_live
4437 : (interrupt_handler && ! pragma_trapa)
4438 ? (/* Need to save all the regs ever live. */
4439 (regs_ever_live[reg]
4440 || (call_used_regs[reg]
4441 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4442 && pr_live))
4443 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4444 && reg != RETURN_ADDRESS_POINTER_REGNUM
4445 && reg != T_REG && reg != GBR_REG
4446 /* Push fpscr only on targets which have FPU */
4447 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4448 : (/* Only push those regs which are used and need to be saved. */
4449 (TARGET_SHCOMPACT
4450 && flag_pic
4451 && current_function_args_info.call_cookie
4452 && reg == PIC_OFFSET_TABLE_REGNUM)
4453 || (regs_ever_live[reg] && ! call_used_regs[reg])
4454 || (current_function_calls_eh_return
4455 && (reg == EH_RETURN_DATA_REGNO (0)
4456 || reg == EH_RETURN_DATA_REGNO (1)
4457 || reg == EH_RETURN_DATA_REGNO (2)
4458 || reg == EH_RETURN_DATA_REGNO (3)))))
4459 {
4460 live_regs_mask[reg / 32] |= 1 << (reg % 32);
4461 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4462
4463 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4464 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4465 {
4466 if (FP_REGISTER_P (reg))
4467 {
4468 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4469 {
4470 live_regs_mask[(reg ^ 1) / 32] |= 1 << ((reg ^ 1) % 32);
4471 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4472 }
4473 }
4474 else if (XD_REGISTER_P (reg))
4475 {
4476 /* Must switch to double mode to access these registers. */
4477 target_flags &= ~FPU_SINGLE_BIT;
4478 }
4479 }
4480 }
4481 }
4482
4483 *count_ptr = count;
4484 }
4485
4486 /* Code to generate prologue and epilogue sequences */
4487
4488 /* PUSHED is the number of bytes that are bing pushed on the
4489 stack for register saves. Return the frame size, padded
4490 appropriately so that the stack stays properly aligned. */
4491 static HOST_WIDE_INT
4492 rounded_frame_size (pushed)
4493 int pushed;
4494 {
4495 HOST_WIDE_INT size = get_frame_size ();
4496 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4497
4498 return ((size + pushed + align - 1) & -align) - pushed;
4499 }
4500
4501 /* Choose a call-clobbered target-branch register that remains
4502 unchanged along the whole function. We set it up as the return
4503 value in the prologue. */
4504 int
4505 sh_media_register_for_return ()
4506 {
4507 int regno;
4508 int tr0_used;
4509
4510 if (! current_function_is_leaf)
4511 return -1;
4512
4513 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4514
4515 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4516 if (call_used_regs[regno] && ! regs_ever_live[regno])
4517 return regno;
4518
4519 return -1;
4520 }
4521
4522 void
4523 sh_expand_prologue ()
4524 {
4525 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4526 int d, i;
4527 int d_rounding = 0;
4528 int save_flags = target_flags;
4529
4530 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4531
4532 /* We have pretend args if we had an object sent partially in registers
4533 and partially on the stack, e.g. a large structure. */
4534 output_stack_adjust (-current_function_pretend_args_size
4535 - current_function_args_info.stack_regs * 8,
4536 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4537
4538 extra_push = 0;
4539
4540 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4541 /* We're going to use the PIC register to load the address of the
4542 incoming-argument decoder and/or of the return trampoline from
4543 the GOT, so make sure the PIC register is preserved and
4544 initialized. */
4545 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4546
4547 if (TARGET_SHCOMPACT
4548 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4549 {
4550 int reg;
4551
4552 /* First, make all registers with incoming arguments that will
4553 be pushed onto the stack live, so that register renaming
4554 doesn't overwrite them. */
4555 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4556 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4557 >= NPARM_REGS (SImode) - reg)
4558 for (; reg < NPARM_REGS (SImode); reg++)
4559 emit_insn (gen_shcompact_preserve_incoming_args
4560 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4561 else if (CALL_COOKIE_INT_REG_GET
4562 (current_function_args_info.call_cookie, reg) == 1)
4563 emit_insn (gen_shcompact_preserve_incoming_args
4564 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4565
4566 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4567 stack_pointer_rtx);
4568 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4569 GEN_INT (current_function_args_info.call_cookie));
4570 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4571 gen_rtx_REG (SImode, R0_REG));
4572 }
4573 else if (TARGET_SHMEDIA)
4574 {
4575 int tr = sh_media_register_for_return ();
4576
4577 if (tr >= 0)
4578 {
4579 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4580 gen_rtx_REG (DImode, PR_MEDIA_REG));
4581
4582 /* If this function only exits with sibcalls, this copy
4583 will be flagged as dead. */
4584 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4585 const0_rtx,
4586 REG_NOTES (insn));
4587 }
4588 }
4589
4590 /* Emit the code for SETUP_VARARGS. */
4591 if (current_function_stdarg)
4592 {
4593 /* This is not used by the SH3E calling convention */
4594 if (TARGET_SH1 && ! TARGET_SH3E && ! TARGET_SH5 && ! TARGET_HITACHI)
4595 {
4596 /* Push arg regs as if they'd been provided by caller in stack. */
4597 for (i = 0; i < NPARM_REGS(SImode); i++)
4598 {
4599 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4600 rtx insn;
4601
4602 if (i >= (NPARM_REGS(SImode)
4603 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4604 ))
4605 break;
4606 insn = push (rn);
4607 RTX_FRAME_RELATED_P (insn) = 0;
4608 extra_push += 4;
4609 }
4610 }
4611 }
4612
4613 /* If we're supposed to switch stacks at function entry, do so now. */
4614 if (sp_switch)
4615 emit_insn (gen_sp_switch_1 ());
4616
4617 calc_live_regs (&d, live_regs_mask);
4618 /* ??? Maybe we could save some switching if we can move a mode switch
4619 that already happens to be at the function start into the prologue. */
4620 if (target_flags != save_flags)
4621 emit_insn (gen_toggle_sz ());
4622
4623 if (TARGET_SH5)
4624 {
4625 int i;
4626 int offset;
4627 int align;
4628 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4629 int offset_in_r0 = -1;
4630 int sp_in_r0 = 0;
4631
4632 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4633 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4634 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4635
4636 offset = d + d_rounding;
4637 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4638
4639 /* We loop twice: first, we save 8-byte aligned registers in the
4640 higher addresses, that are known to be aligned. Then, we
4641 proceed to saving 32-bit registers that don't need 8-byte
4642 alignment. */
4643 /* Note that if you change this code in a way that affects where
4644 the return register is saved, you have to update not only
4645 sh_expand_epilogue, but also sh_set_return_address. */
4646 for (align = 1; align >= 0; align--)
4647 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4648 if (live_regs_mask[i/32] & (1 << (i % 32)))
4649 {
4650 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4651 int reg = i;
4652 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4653
4654 if (mode == SFmode && (i % 2) == 1
4655 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4656 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4657 {
4658 mode = DFmode;
4659 i--;
4660 reg--;
4661 }
4662
4663 /* If we're doing the aligned pass and this is not aligned,
4664 or we're doing the unaligned pass and this is aligned,
4665 skip it. */
4666 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4667 == 0) != align)
4668 continue;
4669
4670 offset -= GET_MODE_SIZE (mode);
4671
4672 reg_rtx = gen_rtx_REG (mode, reg);
4673
4674 mem_rtx = gen_rtx_MEM (mode,
4675 gen_rtx_PLUS (Pmode,
4676 stack_pointer_rtx,
4677 GEN_INT (offset)));
4678
4679 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4680
4681 mem_rtx = NULL_RTX;
4682
4683 try_pre_dec:
4684 do
4685 if (HAVE_PRE_DECREMENT
4686 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
4687 || mem_rtx == NULL_RTX
4688 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4689 {
4690 pre_dec = gen_rtx_MEM (mode,
4691 gen_rtx_PRE_DEC (Pmode, r0));
4692
4693 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
4694 pre_dec_ok);
4695
4696 pre_dec = NULL_RTX;
4697
4698 break;
4699
4700 pre_dec_ok:
4701 mem_rtx = NULL_RTX;
4702 offset += GET_MODE_SIZE (mode);
4703 }
4704 while (0);
4705
4706 if (mem_rtx != NULL_RTX)
4707 goto addr_ok;
4708
4709 if (offset_in_r0 == -1)
4710 {
4711 emit_move_insn (r0, GEN_INT (offset));
4712 offset_in_r0 = offset;
4713 }
4714 else if (offset != offset_in_r0)
4715 {
4716 emit_move_insn (r0,
4717 gen_rtx_PLUS
4718 (Pmode, r0,
4719 GEN_INT (offset - offset_in_r0)));
4720 offset_in_r0 += offset - offset_in_r0;
4721 }
4722
4723 if (pre_dec != NULL_RTX)
4724 {
4725 if (! sp_in_r0)
4726 {
4727 emit_move_insn (r0,
4728 gen_rtx_PLUS
4729 (Pmode, r0, stack_pointer_rtx));
4730 sp_in_r0 = 1;
4731 }
4732
4733 offset -= GET_MODE_SIZE (mode);
4734 offset_in_r0 -= GET_MODE_SIZE (mode);
4735
4736 mem_rtx = pre_dec;
4737 }
4738 else if (sp_in_r0)
4739 mem_rtx = gen_rtx_MEM (mode, r0);
4740 else
4741 mem_rtx = gen_rtx_MEM (mode,
4742 gen_rtx_PLUS (Pmode,
4743 stack_pointer_rtx,
4744 r0));
4745
4746 /* We must not use an r0-based address for target-branch
4747 registers or for special registers without pre-dec
4748 memory addresses, since we store their values in r0
4749 first. */
4750 if (TARGET_REGISTER_P (i)
4751 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4752 && mem_rtx != pre_dec))
4753 abort ();
4754
4755 addr_ok:
4756 if (TARGET_REGISTER_P (i)
4757 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
4758 && mem_rtx != pre_dec))
4759 {
4760 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
4761
4762 emit_move_insn (r0mode, reg_rtx);
4763
4764 offset_in_r0 = -1;
4765 sp_in_r0 = 0;
4766
4767 reg_rtx = r0mode;
4768 }
4769
4770 emit_move_insn (mem_rtx, reg_rtx);
4771 }
4772
4773 if (offset != d_rounding)
4774 abort ();
4775 }
4776 else
4777 push_regs (live_regs_mask);
4778
4779 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4780 {
4781 rtx insn = get_last_insn ();
4782 rtx last = emit_insn (gen_GOTaddr2picreg ());
4783
4784 /* Mark these insns as possibly dead. Sometimes, flow2 may
4785 delete all uses of the PIC register. In this case, let it
4786 delete the initialization too. */
4787 do
4788 {
4789 insn = NEXT_INSN (insn);
4790
4791 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4792 const0_rtx,
4793 REG_NOTES (insn));
4794 }
4795 while (insn != last);
4796 }
4797
4798 if (SHMEDIA_REGS_STACK_ADJUST ())
4799 {
4800 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4801 gen_rtx_SYMBOL_REF (Pmode,
4802 TARGET_FPU_ANY
4803 ? "__GCC_push_shmedia_regs"
4804 : "__GCC_push_shmedia_regs_nofpu"));
4805 /* This must NOT go through the PLT, otherwise mach and macl
4806 may be clobbered. */
4807 emit_insn (gen_shmedia_save_restore_regs_compact
4808 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
4809 }
4810
4811 if (target_flags != save_flags)
4812 {
4813 rtx insn = emit_insn (gen_toggle_sz ());
4814
4815 /* If we're lucky, a mode switch in the function body will
4816 overwrite fpscr, turning this insn dead. Tell flow this
4817 insn is ok to delete. */
4818 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4819 const0_rtx,
4820 REG_NOTES (insn));
4821 }
4822
4823 target_flags = save_flags;
4824
4825 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
4826 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4827
4828 if (frame_pointer_needed)
4829 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
4830
4831 if (TARGET_SHCOMPACT
4832 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4833 {
4834 /* This must NOT go through the PLT, otherwise mach and macl
4835 may be clobbered. */
4836 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4837 gen_rtx_SYMBOL_REF (Pmode,
4838 "__GCC_shcompact_incoming_args"));
4839 emit_insn (gen_shcompact_incoming_args ());
4840 }
4841 }
4842
4843 void
4844 sh_expand_epilogue ()
4845 {
4846 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
4847 int d, i;
4848 int d_rounding = 0;
4849
4850 int save_flags = target_flags;
4851 int frame_size;
4852
4853 calc_live_regs (&d, live_regs_mask);
4854
4855 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
4856 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4857 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4858
4859 frame_size = rounded_frame_size (d) - d_rounding;
4860
4861 if (frame_pointer_needed)
4862 {
4863 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
4864
4865 /* We must avoid moving the stack pointer adjustment past code
4866 which reads from the local frame, else an interrupt could
4867 occur after the SP adjustment and clobber data in the local
4868 frame. */
4869 emit_insn (gen_blockage ());
4870 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
4871 }
4872 else if (frame_size)
4873 {
4874 /* We must avoid moving the stack pointer adjustment past code
4875 which reads from the local frame, else an interrupt could
4876 occur after the SP adjustment and clobber data in the local
4877 frame. */
4878 emit_insn (gen_blockage ());
4879 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
4880 }
4881
4882 if (SHMEDIA_REGS_STACK_ADJUST ())
4883 {
4884 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
4885 gen_rtx_SYMBOL_REF (Pmode,
4886 TARGET_FPU_ANY
4887 ? "__GCC_pop_shmedia_regs"
4888 : "__GCC_pop_shmedia_regs_nofpu"));
4889 /* This must NOT go through the PLT, otherwise mach and macl
4890 may be clobbered. */
4891 emit_insn (gen_shmedia_save_restore_regs_compact
4892 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
4893 }
4894
4895 /* Pop all the registers. */
4896
4897 if (target_flags != save_flags)
4898 emit_insn (gen_toggle_sz ());
4899 if (TARGET_SH5)
4900 {
4901 int offset = d_rounding;
4902 int offset_in_r0 = -1;
4903 int sp_in_r0 = 0;
4904 int align;
4905 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4906
4907 /* We loop twice: first, we save 8-byte aligned registers in the
4908 higher addresses, that are known to be aligned. Then, we
4909 proceed to saving 32-bit registers that don't need 8-byte
4910 alignment. */
4911 for (align = 0; align <= 1; align++)
4912 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4913 if (live_regs_mask[i/32] & (1 << (i % 32)))
4914 {
4915 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4916 int reg = i;
4917 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
4918
4919 if (mode == SFmode && (i % 2) == 0
4920 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4921 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
4922 {
4923 mode = DFmode;
4924 i++;
4925 }
4926
4927 /* If we're doing the aligned pass and this is not aligned,
4928 or we're doing the unaligned pass and this is aligned,
4929 skip it. */
4930 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4931 == 0) != align)
4932 continue;
4933
4934 reg_rtx = gen_rtx_REG (mode, reg);
4935
4936 mem_rtx = gen_rtx_MEM (mode,
4937 gen_rtx_PLUS (Pmode,
4938 stack_pointer_rtx,
4939 GEN_INT (offset)));
4940
4941 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
4942
4943 mem_rtx = NULL_RTX;
4944
4945 try_post_inc:
4946 do
4947 if (HAVE_POST_INCREMENT
4948 && (offset == offset_in_r0
4949 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
4950 && mem_rtx == NULL_RTX)
4951 || i == PR_REG || SPECIAL_REGISTER_P (i)))
4952 {
4953 post_inc = gen_rtx_MEM (mode,
4954 gen_rtx_POST_INC (Pmode, r0));
4955
4956 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
4957 post_inc_ok);
4958
4959 post_inc = NULL_RTX;
4960
4961 break;
4962
4963 post_inc_ok:
4964 mem_rtx = NULL_RTX;
4965 }
4966 while (0);
4967
4968 if (mem_rtx != NULL_RTX)
4969 goto addr_ok;
4970
4971 if (offset_in_r0 == -1)
4972 {
4973 emit_move_insn (r0, GEN_INT (offset));
4974 offset_in_r0 = offset;
4975 }
4976 else if (offset != offset_in_r0)
4977 {
4978 emit_move_insn (r0,
4979 gen_rtx_PLUS
4980 (Pmode, r0,
4981 GEN_INT (offset - offset_in_r0)));
4982 offset_in_r0 += offset - offset_in_r0;
4983 }
4984
4985 if (post_inc != NULL_RTX)
4986 {
4987 if (! sp_in_r0)
4988 {
4989 emit_move_insn (r0,
4990 gen_rtx_PLUS
4991 (Pmode, r0, stack_pointer_rtx));
4992 sp_in_r0 = 1;
4993 }
4994
4995 mem_rtx = post_inc;
4996
4997 offset_in_r0 += GET_MODE_SIZE (mode);
4998 }
4999 else if (sp_in_r0)
5000 mem_rtx = gen_rtx_MEM (mode, r0);
5001 else
5002 mem_rtx = gen_rtx_MEM (mode,
5003 gen_rtx_PLUS (Pmode,
5004 stack_pointer_rtx,
5005 r0));
5006
5007 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5008 && mem_rtx != post_inc)
5009 abort ();
5010
5011 addr_ok:
5012 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5013 && mem_rtx != post_inc)
5014 {
5015 insn = emit_move_insn (r0, mem_rtx);
5016 mem_rtx = r0;
5017 }
5018 else if (TARGET_REGISTER_P (i))
5019 {
5020 rtx r1 = gen_rtx_REG (mode, R1_REG);
5021
5022 insn = emit_move_insn (r1, mem_rtx);
5023 mem_rtx = r1;
5024 }
5025
5026 insn = emit_move_insn (reg_rtx, mem_rtx);
5027
5028 offset += GET_MODE_SIZE (mode);
5029 }
5030
5031 if (offset != d + d_rounding)
5032 abort ();
5033
5034 goto finish;
5035 }
5036 else
5037 d = 0;
5038 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5039 pop (PR_REG);
5040 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5041 {
5042 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5043
5044 if (j != PR_REG && live_regs_mask[j / 32] & (1 << (j % 32)))
5045 pop (j);
5046 }
5047 finish:
5048 if (target_flags != save_flags)
5049 emit_insn (gen_toggle_sz ());
5050 target_flags = save_flags;
5051
5052 output_stack_adjust (extra_push + current_function_pretend_args_size
5053 + d + d_rounding
5054 + current_function_args_info.stack_regs * 8,
5055 stack_pointer_rtx, 7, emit_insn);
5056
5057 if (current_function_calls_eh_return)
5058 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5059 EH_RETURN_STACKADJ_RTX));
5060
5061 /* Switch back to the normal stack if necessary. */
5062 if (sp_switch)
5063 emit_insn (gen_sp_switch_2 ());
5064
5065 /* Tell flow the insn that pops PR isn't dead. */
5066 /* PR_REG will never be live in SHmedia mode, and we don't need to
5067 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5068 by the return pattern. */
5069 if (live_regs_mask[PR_REG / 32] & (1 << (PR_REG % 32)))
5070 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5071 }
5072
5073 static int sh_need_epilogue_known = 0;
5074
5075 int
5076 sh_need_epilogue ()
5077 {
5078 if (! sh_need_epilogue_known)
5079 {
5080 rtx epilogue;
5081
5082 start_sequence ();
5083 sh_expand_epilogue ();
5084 epilogue = get_insns ();
5085 end_sequence ();
5086 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5087 }
5088 return sh_need_epilogue_known > 0;
5089 }
5090
5091 /* Emit code to change the current function's return address to RA.
5092 TEMP is available as a scratch register, if needed. */
5093
5094 void
5095 sh_set_return_address (ra, tmp)
5096 rtx ra, tmp;
5097 {
5098 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5099 int d;
5100 int d_rounding = 0;
5101 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5102 int pr_offset;
5103
5104 calc_live_regs (&d, live_regs_mask);
5105
5106 /* If pr_reg isn't life, we can set it (or the register given in
5107 sh_media_register_for_return) directly. */
5108 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5109 {
5110 rtx rr;
5111
5112 if (TARGET_SHMEDIA)
5113 {
5114 int rr_regno = sh_media_register_for_return ();
5115
5116 if (rr_regno < 0)
5117 rr_regno = pr_reg;
5118
5119 rr = gen_rtx_REG (DImode, rr_regno);
5120 }
5121 else
5122 rr = gen_rtx_REG (SImode, pr_reg);
5123
5124 emit_insn (GEN_MOV (rr, ra));
5125 /* Tell flow the register for return isn't dead. */
5126 emit_insn (gen_rtx_USE (VOIDmode, rr));
5127 return;
5128 }
5129
5130 if (TARGET_SH5)
5131 {
5132 int i;
5133 int offset;
5134 int align;
5135
5136 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5137 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5138 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5139
5140 offset = 0;
5141
5142 /* We loop twice: first, we save 8-byte aligned registers in the
5143 higher addresses, that are known to be aligned. Then, we
5144 proceed to saving 32-bit registers that don't need 8-byte
5145 alignment. */
5146 for (align = 0; align <= 1; align++)
5147 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5148 if (live_regs_mask[i/32] & (1 << (i % 32)))
5149 {
5150 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5151
5152 if (mode == SFmode && (i % 2) == 0
5153 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5154 && (live_regs_mask[(i ^ 1) / 32] & (1 << ((i ^ 1) % 32))))
5155 {
5156 mode = DFmode;
5157 i++;
5158 }
5159
5160 /* If we're doing the aligned pass and this is not aligned,
5161 or we're doing the unaligned pass and this is aligned,
5162 skip it. */
5163 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5164 == 0) != align)
5165 continue;
5166
5167 if (i == pr_reg)
5168 goto found;
5169
5170 offset += GET_MODE_SIZE (mode);
5171 }
5172
5173 /* We can't find pr register. */
5174 abort ();
5175
5176 found:
5177 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5178 + SHMEDIA_REGS_STACK_ADJUST ());
5179 }
5180 else
5181 pr_offset = rounded_frame_size (d) - d_rounding;
5182
5183 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5184 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5185
5186 tmp = gen_rtx_MEM (Pmode, tmp);
5187 emit_insn (GEN_MOV (tmp, ra));
5188 }
5189
5190 /* Clear variables at function end. */
5191
5192 static void
5193 sh_output_function_epilogue (file, size)
5194 FILE *file ATTRIBUTE_UNUSED;
5195 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5196 {
5197 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5198 sh_need_epilogue_known = 0;
5199 sp_switch = NULL_RTX;
5200 }
5201
5202 rtx
5203 sh_builtin_saveregs ()
5204 {
5205 /* First unnamed integer register. */
5206 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5207 /* Number of integer registers we need to save. */
5208 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5209 /* First unnamed SFmode float reg */
5210 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5211 /* Number of SFmode float regs to save. */
5212 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5213 rtx regbuf, fpregs;
5214 int bufsize, regno;
5215 HOST_WIDE_INT alias_set;
5216
5217 if (TARGET_SH5)
5218 {
5219 if (n_intregs)
5220 {
5221 int pushregs = n_intregs;
5222
5223 while (pushregs < NPARM_REGS (SImode) - 1
5224 && (CALL_COOKIE_INT_REG_GET
5225 (current_function_args_info.call_cookie,
5226 NPARM_REGS (SImode) - pushregs)
5227 == 1))
5228 {
5229 current_function_args_info.call_cookie
5230 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5231 - pushregs, 1);
5232 pushregs++;
5233 }
5234
5235 if (pushregs == NPARM_REGS (SImode))
5236 current_function_args_info.call_cookie
5237 |= (CALL_COOKIE_INT_REG (0, 1)
5238 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5239 else
5240 current_function_args_info.call_cookie
5241 |= CALL_COOKIE_STACKSEQ (pushregs);
5242
5243 current_function_pretend_args_size += 8 * n_intregs;
5244 }
5245 if (TARGET_SHCOMPACT)
5246 return const0_rtx;
5247 }
5248
5249 if (! TARGET_SH3E && ! TARGET_SH4 && ! TARGET_SH5)
5250 {
5251 error ("__builtin_saveregs not supported by this subtarget");
5252 return const0_rtx;
5253 }
5254
5255 if (TARGET_SHMEDIA)
5256 n_floatregs = 0;
5257
5258 /* Allocate block of memory for the regs. */
5259 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5260 Or can assign_stack_local accept a 0 SIZE argument? */
5261 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5262
5263 if (TARGET_SHMEDIA)
5264 regbuf = gen_rtx_MEM (BLKmode,
5265 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5266 else if (n_floatregs & 1)
5267 {
5268 rtx addr;
5269
5270 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5271 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5272 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5273 regbuf = change_address (regbuf, BLKmode, addr);
5274 }
5275 else
5276 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5277 alias_set = get_varargs_alias_set ();
5278 set_mem_alias_set (regbuf, alias_set);
5279
5280 /* Save int args.
5281 This is optimized to only save the regs that are necessary. Explicitly
5282 named args need not be saved. */
5283 if (n_intregs > 0)
5284 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5285 adjust_address (regbuf, BLKmode,
5286 n_floatregs * UNITS_PER_WORD),
5287 n_intregs, n_intregs * UNITS_PER_WORD);
5288
5289 if (TARGET_SHMEDIA)
5290 /* Return the address of the regbuf. */
5291 return XEXP (regbuf, 0);
5292
5293 /* Save float args.
5294 This is optimized to only save the regs that are necessary. Explicitly
5295 named args need not be saved.
5296 We explicitly build a pointer to the buffer because it halves the insn
5297 count when not optimizing (otherwise the pointer is built for each reg
5298 saved).
5299 We emit the moves in reverse order so that we can use predecrement. */
5300
5301 fpregs = gen_reg_rtx (Pmode);
5302 emit_move_insn (fpregs, XEXP (regbuf, 0));
5303 emit_insn (gen_addsi3 (fpregs, fpregs,
5304 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5305 if (TARGET_SH4)
5306 {
5307 rtx mem;
5308 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5309 {
5310 emit_insn (gen_addsi3 (fpregs, fpregs,
5311 GEN_INT (-2 * UNITS_PER_WORD)));
5312 mem = gen_rtx_MEM (DFmode, fpregs);
5313 set_mem_alias_set (mem, alias_set);
5314 emit_move_insn (mem,
5315 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5316 }
5317 regno = first_floatreg;
5318 if (regno & 1)
5319 {
5320 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5321 mem = gen_rtx_MEM (SFmode, fpregs);
5322 set_mem_alias_set (mem, alias_set);
5323 emit_move_insn (mem,
5324 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5325 - (TARGET_LITTLE_ENDIAN != 0)));
5326 }
5327 }
5328 else
5329 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5330 {
5331 rtx mem;
5332
5333 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5334 mem = gen_rtx_MEM (SFmode, fpregs);
5335 set_mem_alias_set (mem, alias_set);
5336 emit_move_insn (mem,
5337 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5338 }
5339
5340 /* Return the address of the regbuf. */
5341 return XEXP (regbuf, 0);
5342 }
5343
5344 /* Define the `__builtin_va_list' type for the ABI. */
5345
5346 tree
5347 sh_build_va_list ()
5348 {
5349 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5350 tree record;
5351
5352 if (TARGET_SH5 || (! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5353 return ptr_type_node;
5354
5355 record = make_node (RECORD_TYPE);
5356
5357 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5358 ptr_type_node);
5359 f_next_o_limit = build_decl (FIELD_DECL,
5360 get_identifier ("__va_next_o_limit"),
5361 ptr_type_node);
5362 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5363 ptr_type_node);
5364 f_next_fp_limit = build_decl (FIELD_DECL,
5365 get_identifier ("__va_next_fp_limit"),
5366 ptr_type_node);
5367 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5368 ptr_type_node);
5369
5370 DECL_FIELD_CONTEXT (f_next_o) = record;
5371 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5372 DECL_FIELD_CONTEXT (f_next_fp) = record;
5373 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5374 DECL_FIELD_CONTEXT (f_next_stack) = record;
5375
5376 TYPE_FIELDS (record) = f_next_o;
5377 TREE_CHAIN (f_next_o) = f_next_o_limit;
5378 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5379 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5380 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5381
5382 layout_type (record);
5383
5384 return record;
5385 }
5386
5387 /* Implement `va_start' for varargs and stdarg. */
5388
5389 void
5390 sh_va_start (valist, nextarg)
5391 tree valist;
5392 rtx nextarg;
5393 {
5394 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5395 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5396 tree t, u;
5397 int nfp, nint;
5398
5399 if (TARGET_SH5)
5400 {
5401 expand_builtin_saveregs ();
5402 std_expand_builtin_va_start (valist, nextarg);
5403 return;
5404 }
5405
5406 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
5407 {
5408 std_expand_builtin_va_start (valist, nextarg);
5409 return;
5410 }
5411
5412 f_next_o = TYPE_FIELDS (va_list_type_node);
5413 f_next_o_limit = TREE_CHAIN (f_next_o);
5414 f_next_fp = TREE_CHAIN (f_next_o_limit);
5415 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5416 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5417
5418 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5419 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5420 valist, f_next_o_limit);
5421 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5422 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5423 valist, f_next_fp_limit);
5424 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5425 valist, f_next_stack);
5426
5427 /* Call __builtin_saveregs. */
5428 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5429 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5430 TREE_SIDE_EFFECTS (t) = 1;
5431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5432
5433 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5434 if (nfp < 8)
5435 nfp = 8 - nfp;
5436 else
5437 nfp = 0;
5438 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5439 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5440 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5441 TREE_SIDE_EFFECTS (t) = 1;
5442 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5443
5444 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5445 TREE_SIDE_EFFECTS (t) = 1;
5446 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5447
5448 nint = current_function_args_info.arg_count[SH_ARG_INT];
5449 if (nint < 4)
5450 nint = 4 - nint;
5451 else
5452 nint = 0;
5453 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5454 build_int_2 (UNITS_PER_WORD * nint, 0)));
5455 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5456 TREE_SIDE_EFFECTS (t) = 1;
5457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5458
5459 u = make_tree (ptr_type_node, nextarg);
5460 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5461 TREE_SIDE_EFFECTS (t) = 1;
5462 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5463 }
5464
5465 /* Implement `va_arg'. */
5466
5467 rtx
5468 sh_va_arg (valist, type)
5469 tree valist, type;
5470 {
5471 HOST_WIDE_INT size, rsize;
5472 tree tmp, pptr_type_node;
5473 rtx addr_rtx, r;
5474 rtx result;
5475 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5476
5477 size = int_size_in_bytes (type);
5478 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5479 pptr_type_node = build_pointer_type (ptr_type_node);
5480
5481 if (pass_by_ref)
5482 type = build_pointer_type (type);
5483
5484 if (! TARGET_SH5 && (TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
5485 {
5486 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5487 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5488 int pass_as_float;
5489 rtx lab_false, lab_over;
5490
5491 f_next_o = TYPE_FIELDS (va_list_type_node);
5492 f_next_o_limit = TREE_CHAIN (f_next_o);
5493 f_next_fp = TREE_CHAIN (f_next_o_limit);
5494 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5495 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5496
5497 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5498 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5499 valist, f_next_o_limit);
5500 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5501 valist, f_next_fp);
5502 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5503 valist, f_next_fp_limit);
5504 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5505 valist, f_next_stack);
5506
5507 if (TARGET_SH4)
5508 {
5509 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5510 || (TREE_CODE (type) == COMPLEX_TYPE
5511 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5512 && size <= 16));
5513 }
5514 else
5515 {
5516 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5517 }
5518
5519 addr_rtx = gen_reg_rtx (Pmode);
5520 lab_false = gen_label_rtx ();
5521 lab_over = gen_label_rtx ();
5522
5523 if (pass_as_float)
5524 {
5525 int first_floatreg
5526 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5527 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5528
5529 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5530 EXPAND_NORMAL),
5531 expand_expr (next_fp_limit, NULL_RTX,
5532 Pmode, EXPAND_NORMAL),
5533 GE, const1_rtx, Pmode, 1, lab_false);
5534
5535 if (TYPE_ALIGN (type) > BITS_PER_WORD
5536 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5537 && (n_floatregs & 1)))
5538 {
5539 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5540 build_int_2 (UNITS_PER_WORD, 0));
5541 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5542 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5543 TREE_SIDE_EFFECTS (tmp) = 1;
5544 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5545 }
5546
5547 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5548 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5549 if (r != addr_rtx)
5550 emit_move_insn (addr_rtx, r);
5551
5552 emit_jump_insn (gen_jump (lab_over));
5553 emit_barrier ();
5554 emit_label (lab_false);
5555
5556 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5557 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5558 if (r != addr_rtx)
5559 emit_move_insn (addr_rtx, r);
5560 }
5561 else
5562 {
5563 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5564 build_int_2 (rsize, 0));
5565
5566 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5567 EXPAND_NORMAL),
5568 expand_expr (next_o_limit, NULL_RTX,
5569 Pmode, EXPAND_NORMAL),
5570 GT, const1_rtx, Pmode, 1, lab_false);
5571
5572 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5573 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5574 if (r != addr_rtx)
5575 emit_move_insn (addr_rtx, r);
5576
5577 emit_jump_insn (gen_jump (lab_over));
5578 emit_barrier ();
5579 emit_label (lab_false);
5580
5581 if (size > 4 && ! TARGET_SH4)
5582 {
5583 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5584 TREE_SIDE_EFFECTS (tmp) = 1;
5585 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5586 }
5587
5588 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5589 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5590 if (r != addr_rtx)
5591 emit_move_insn (addr_rtx, r);
5592 }
5593
5594 emit_label (lab_over);
5595
5596 tmp = make_tree (pptr_type_node, addr_rtx);
5597 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5598 }
5599
5600 /* ??? In va-sh.h, there had been code to make values larger than
5601 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5602
5603 result = std_expand_builtin_va_arg (valist, type);
5604 if (pass_by_ref)
5605 {
5606 #ifdef POINTERS_EXTEND_UNSIGNED
5607 if (GET_MODE (addr) != Pmode)
5608 addr = convert_memory_address (Pmode, result);
5609 #endif
5610 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5611 set_mem_alias_set (result, get_varargs_alias_set ());
5612 }
5613 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5614 argument to the varargs alias set. */
5615 return result;
5616 }
5617
5618 /* Define the offset between two registers, one to be eliminated, and
5619 the other its replacement, at the start of a routine. */
5620
5621 int
5622 initial_elimination_offset (from, to)
5623 int from;
5624 int to;
5625 {
5626 int regs_saved;
5627 int regs_saved_rounding = 0;
5628 int total_saved_regs_space;
5629 int total_auto_space;
5630 int save_flags = target_flags;
5631 int copy_flags;
5632
5633 HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
5634 calc_live_regs (&regs_saved, live_regs_mask);
5635 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5636 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5637 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5638 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5639
5640 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5641 copy_flags = target_flags;
5642 target_flags = save_flags;
5643
5644 total_saved_regs_space = regs_saved + regs_saved_rounding;
5645
5646 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5647 return total_saved_regs_space + total_auto_space
5648 + current_function_args_info.byref_regs * 8;
5649
5650 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5651 return total_saved_regs_space + total_auto_space
5652 + current_function_args_info.byref_regs * 8;
5653
5654 /* Initial gap between fp and sp is 0. */
5655 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5656 return 0;
5657
5658 if (from == RETURN_ADDRESS_POINTER_REGNUM
5659 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5660 {
5661 if (TARGET_SH5)
5662 {
5663 int i, n = total_saved_regs_space;
5664 int align;
5665 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5666
5667 n += total_auto_space;
5668
5669 /* If it wasn't saved, there's not much we can do. */
5670 if ((live_regs_mask[pr_reg / 32] & (1 << (pr_reg % 32))) == 0)
5671 return n;
5672
5673 target_flags = copy_flags;
5674
5675 /* We loop twice: first, check 8-byte aligned registers,
5676 that are stored in the higher addresses, that are known
5677 to be aligned. Then, check 32-bit registers that don't
5678 need 8-byte alignment. */
5679 for (align = 1; align >= 0; align--)
5680 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5681 if (live_regs_mask[i/32] & (1 << (i % 32)))
5682 {
5683 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5684
5685 if (mode == SFmode && (i % 2) == 1
5686 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5687 && (live_regs_mask[(i ^ 1) / 32]
5688 & (1 << ((i ^ 1) % 32))))
5689 {
5690 mode = DFmode;
5691 i--;
5692 }
5693
5694 /* If we're doing the aligned pass and this is not aligned,
5695 or we're doing the unaligned pass and this is aligned,
5696 skip it. */
5697 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5698 == 0) != align)
5699 continue;
5700
5701 n -= GET_MODE_SIZE (mode);
5702
5703 if (i == pr_reg)
5704 {
5705 target_flags = save_flags;
5706 return n;
5707 }
5708 }
5709
5710 abort ();
5711 }
5712 else
5713 return total_auto_space;
5714 }
5715
5716 abort ();
5717 }
5718 \f
5719 /* Handle machine specific pragmas to be semi-compatible with Hitachi
5720 compiler. */
5721
5722 void
5723 sh_pr_interrupt (pfile)
5724 cpp_reader *pfile ATTRIBUTE_UNUSED;
5725 {
5726 pragma_interrupt = 1;
5727 }
5728
5729 void
5730 sh_pr_trapa (pfile)
5731 cpp_reader *pfile ATTRIBUTE_UNUSED;
5732 {
5733 pragma_interrupt = pragma_trapa = 1;
5734 }
5735
5736 void
5737 sh_pr_nosave_low_regs (pfile)
5738 cpp_reader *pfile ATTRIBUTE_UNUSED;
5739 {
5740 pragma_nosave_low_regs = 1;
5741 }
5742
5743 /* Generate 'handle_interrupt' attribute for decls */
5744
5745 static void
5746 sh_insert_attributes (node, attributes)
5747 tree node;
5748 tree * attributes;
5749 {
5750 if (! pragma_interrupt
5751 || TREE_CODE (node) != FUNCTION_DECL)
5752 return;
5753
5754 /* We are only interested in fields. */
5755 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
5756 return;
5757
5758 /* Add a 'handle_interrupt' attribute. */
5759 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
5760
5761 return;
5762 }
5763
5764 /* Supported attributes:
5765
5766 interrupt_handler -- specifies this function is an interrupt handler.
5767
5768 sp_switch -- specifies an alternate stack for an interrupt handler
5769 to run on.
5770
5771 trap_exit -- use a trapa to exit an interrupt function instead of
5772 an rte instruction. */
5773
5774 const struct attribute_spec sh_attribute_table[] =
5775 {
5776 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5777 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
5778 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
5779 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
5780 { NULL, 0, 0, false, false, false, NULL }
5781 };
5782
5783 /* Handle an "interrupt_handler" attribute; arguments as in
5784 struct attribute_spec.handler. */
5785 static tree
5786 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
5787 tree *node;
5788 tree name;
5789 tree args ATTRIBUTE_UNUSED;
5790 int flags ATTRIBUTE_UNUSED;
5791 bool *no_add_attrs;
5792 {
5793 if (TREE_CODE (*node) != FUNCTION_DECL)
5794 {
5795 warning ("`%s' attribute only applies to functions",
5796 IDENTIFIER_POINTER (name));
5797 *no_add_attrs = true;
5798 }
5799 else if (TARGET_SHCOMPACT)
5800 {
5801 error ("attribute interrupt_handler is not compatible with -m5-compact");
5802 *no_add_attrs = true;
5803 }
5804
5805 return NULL_TREE;
5806 }
5807
5808 /* Handle an "sp_switch" attribute; arguments as in
5809 struct attribute_spec.handler. */
5810 static tree
5811 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
5812 tree *node;
5813 tree name;
5814 tree args;
5815 int flags ATTRIBUTE_UNUSED;
5816 bool *no_add_attrs;
5817 {
5818 if (TREE_CODE (*node) != FUNCTION_DECL)
5819 {
5820 warning ("`%s' attribute only applies to functions",
5821 IDENTIFIER_POINTER (name));
5822 *no_add_attrs = true;
5823 }
5824 else if (!pragma_interrupt)
5825 {
5826 /* The sp_switch attribute only has meaning for interrupt functions. */
5827 warning ("`%s' attribute only applies to interrupt functions",
5828 IDENTIFIER_POINTER (name));
5829 *no_add_attrs = true;
5830 }
5831 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
5832 {
5833 /* The argument must be a constant string. */
5834 warning ("`%s' attribute argument not a string constant",
5835 IDENTIFIER_POINTER (name));
5836 *no_add_attrs = true;
5837 }
5838 else
5839 {
5840 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
5841 TREE_STRING_POINTER (TREE_VALUE (args)));
5842 }
5843
5844 return NULL_TREE;
5845 }
5846
5847 /* Handle an "trap_exit" attribute; arguments as in
5848 struct attribute_spec.handler. */
5849 static tree
5850 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
5851 tree *node;
5852 tree name;
5853 tree args;
5854 int flags ATTRIBUTE_UNUSED;
5855 bool *no_add_attrs;
5856 {
5857 if (TREE_CODE (*node) != FUNCTION_DECL)
5858 {
5859 warning ("`%s' attribute only applies to functions",
5860 IDENTIFIER_POINTER (name));
5861 *no_add_attrs = true;
5862 }
5863 else if (!pragma_interrupt)
5864 {
5865 /* The trap_exit attribute only has meaning for interrupt functions. */
5866 warning ("`%s' attribute only applies to interrupt functions",
5867 IDENTIFIER_POINTER (name));
5868 *no_add_attrs = true;
5869 }
5870 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
5871 {
5872 /* The argument must be a constant integer. */
5873 warning ("`%s' attribute argument not an integer constant",
5874 IDENTIFIER_POINTER (name));
5875 *no_add_attrs = true;
5876 }
5877 else
5878 {
5879 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
5880 }
5881
5882 return NULL_TREE;
5883 }
5884
5885 int
5886 sh_cfun_interrupt_handler_p ()
5887 {
5888 return (lookup_attribute ("interrupt_handler",
5889 DECL_ATTRIBUTES (current_function_decl))
5890 != NULL_TREE);
5891 }
5892 \f
5893 /* Predicates used by the templates. */
5894
5895 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
5896 Used only in general_movsrc_operand. */
5897
5898 int
5899 system_reg_operand (op, mode)
5900 rtx op;
5901 enum machine_mode mode ATTRIBUTE_UNUSED;
5902 {
5903 switch (REGNO (op))
5904 {
5905 case PR_REG:
5906 case MACL_REG:
5907 case MACH_REG:
5908 return 1;
5909 }
5910 return 0;
5911 }
5912
5913 /* Returns 1 if OP can be source of a simple move operation.
5914 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
5915 invalid as are subregs of system registers. */
5916
5917 int
5918 general_movsrc_operand (op, mode)
5919 rtx op;
5920 enum machine_mode mode;
5921 {
5922 if (GET_CODE (op) == MEM)
5923 {
5924 rtx inside = XEXP (op, 0);
5925 if (GET_CODE (inside) == CONST)
5926 inside = XEXP (inside, 0);
5927
5928 if (GET_CODE (inside) == LABEL_REF)
5929 return 1;
5930
5931 if (GET_CODE (inside) == PLUS
5932 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
5933 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
5934 return 1;
5935
5936 /* Only post inc allowed. */
5937 if (GET_CODE (inside) == PRE_DEC)
5938 return 0;
5939 }
5940
5941 if ((mode == QImode || mode == HImode)
5942 && (GET_CODE (op) == SUBREG
5943 && GET_CODE (XEXP (op, 0)) == REG
5944 && system_reg_operand (XEXP (op, 0), mode)))
5945 return 0;
5946
5947 return general_operand (op, mode);
5948 }
5949
5950 /* Returns 1 if OP can be a destination of a move.
5951 Same as general_operand, but no preinc allowed. */
5952
5953 int
5954 general_movdst_operand (op, mode)
5955 rtx op;
5956 enum machine_mode mode;
5957 {
5958 /* Only pre dec allowed. */
5959 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
5960 return 0;
5961
5962 return general_operand (op, mode);
5963 }
5964
5965 /* Returns 1 if OP is a normal arithmetic register. */
5966
5967 int
5968 arith_reg_operand (op, mode)
5969 rtx op;
5970 enum machine_mode mode;
5971 {
5972 if (register_operand (op, mode))
5973 {
5974 int regno;
5975
5976 if (GET_CODE (op) == REG)
5977 regno = REGNO (op);
5978 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
5979 regno = REGNO (SUBREG_REG (op));
5980 else
5981 return 1;
5982
5983 return (regno != T_REG && regno != PR_REG
5984 && ! TARGET_REGISTER_P (regno)
5985 && (regno != FPUL_REG || TARGET_SH4)
5986 && regno != MACH_REG && regno != MACL_REG);
5987 }
5988 return 0;
5989 }
5990
5991 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
5992 because this would lead to missing sign extensions when truncating from
5993 DImode to SImode. */
5994 int
5995 arith_reg_dest (op, mode)
5996 rtx op;
5997 enum machine_mode mode;
5998 {
5999 if (mode == DImode && GET_CODE (op) == SUBREG
6000 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6001 return 0;
6002 return arith_reg_operand (op, mode);
6003 }
6004
6005 int
6006 int_gpr_dest (op, mode)
6007 rtx op;
6008 enum machine_mode mode ATTRIBUTE_UNUSED;
6009 {
6010 enum machine_mode op_mode = GET_MODE (op);
6011
6012 if (GET_MODE_CLASS (op_mode) != MODE_INT
6013 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6014 return 0;
6015 if (! reload_completed)
6016 return 0;
6017 return true_regnum (op) <= LAST_GENERAL_REG;
6018 }
6019
6020 int
6021 fp_arith_reg_operand (op, mode)
6022 rtx op;
6023 enum machine_mode mode;
6024 {
6025 if (register_operand (op, mode))
6026 {
6027 int regno;
6028
6029 if (GET_CODE (op) == REG)
6030 regno = REGNO (op);
6031 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6032 regno = REGNO (SUBREG_REG (op));
6033 else
6034 return 1;
6035
6036 return (regno >= FIRST_PSEUDO_REGISTER
6037 || FP_REGISTER_P (regno));
6038 }
6039 return 0;
6040 }
6041
6042 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6043
6044 int
6045 arith_operand (op, mode)
6046 rtx op;
6047 enum machine_mode mode;
6048 {
6049 if (arith_reg_operand (op, mode))
6050 return 1;
6051
6052 if (TARGET_SHMEDIA)
6053 {
6054 /* FIXME: We should be checking whether the CONST_INT fits in a
6055 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6056 attempting to transform a sequence of two 64-bit sets of the
6057 same register from literal constants into a set and an add,
6058 when the difference is too wide for an add. */
6059 if (GET_CODE (op) == CONST_INT
6060 || EXTRA_CONSTRAINT_S (op))
6061 return 1;
6062 else
6063 return 0;
6064 }
6065 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6066 return 1;
6067
6068 return 0;
6069 }
6070
6071 /* Returns 1 if OP is a valid source operand for a compare insn. */
6072
6073 int
6074 arith_reg_or_0_operand (op, mode)
6075 rtx op;
6076 enum machine_mode mode;
6077 {
6078 if (arith_reg_operand (op, mode))
6079 return 1;
6080
6081 if (EXTRA_CONSTRAINT_U (op))
6082 return 1;
6083
6084 return 0;
6085 }
6086
6087 /* Return 1 if OP is a valid source operand for an SHmedia operation
6088 that takes either a register or a 6-bit immediate. */
6089
6090 int
6091 shmedia_6bit_operand (op, mode)
6092 rtx op;
6093 enum machine_mode mode;
6094 {
6095 return (arith_reg_operand (op, mode)
6096 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6097 }
6098
6099 /* Returns 1 if OP is a valid source operand for a logical operation. */
6100
6101 int
6102 logical_operand (op, mode)
6103 rtx op;
6104 enum machine_mode mode;
6105 {
6106 if (arith_reg_operand (op, mode))
6107 return 1;
6108
6109 if (TARGET_SHMEDIA)
6110 {
6111 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6112 return 1;
6113 else
6114 return 0;
6115 }
6116 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6117 return 1;
6118
6119 return 0;
6120 }
6121
6122 int
6123 and_operand (op, mode)
6124 rtx op;
6125 enum machine_mode mode;
6126 {
6127 if (logical_operand (op, mode))
6128 return 1;
6129
6130 /* Check mshflo.l / mshflhi.l opportunities. */
6131 if (TARGET_SHMEDIA
6132 && mode == DImode
6133 && GET_CODE (op) == CONST_INT
6134 && (INTVAL (op) == (unsigned) 0xffffffff
6135 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6136 return 1;
6137
6138 return 0;
6139 }
6140
6141 /* Nonzero if OP is a floating point value with value 0.0. */
6142
6143 int
6144 fp_zero_operand (op)
6145 rtx op;
6146 {
6147 REAL_VALUE_TYPE r;
6148
6149 if (GET_MODE (op) != SFmode)
6150 return 0;
6151
6152 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6153 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6154 }
6155
6156 /* Nonzero if OP is a floating point value with value 1.0. */
6157
6158 int
6159 fp_one_operand (op)
6160 rtx op;
6161 {
6162 REAL_VALUE_TYPE r;
6163
6164 if (GET_MODE (op) != SFmode)
6165 return 0;
6166
6167 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6168 return REAL_VALUES_EQUAL (r, dconst1);
6169 }
6170
6171 /* For -m4 and -m4-single-only, mode switching is used. If we are
6172 compiling without -mfmovd, movsf_ie isn't taken into account for
6173 mode switching. We could check in machine_dependent_reorg for
6174 cases where we know we are in single precision mode, but there is
6175 interface to find that out during reload, so we must avoid
6176 choosing an fldi alternative during reload and thus failing to
6177 allocate a scratch register for the constant loading. */
6178 int
6179 fldi_ok ()
6180 {
6181 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6182 }
6183
6184 int
6185 tertiary_reload_operand (op, mode)
6186 rtx op;
6187 enum machine_mode mode ATTRIBUTE_UNUSED;
6188 {
6189 enum rtx_code code = GET_CODE (op);
6190 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6191 }
6192
6193 int
6194 fpscr_operand (op, mode)
6195 rtx op;
6196 enum machine_mode mode ATTRIBUTE_UNUSED;
6197 {
6198 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6199 && GET_MODE (op) == PSImode);
6200 }
6201
6202 int
6203 fpul_operand (op, mode)
6204 rtx op;
6205 enum machine_mode mode;
6206 {
6207 if (TARGET_SHMEDIA)
6208 return fp_arith_reg_operand (op, mode);
6209
6210 return (GET_CODE (op) == REG
6211 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6212 && GET_MODE (op) == mode);
6213 }
6214
6215 int
6216 symbol_ref_operand (op, mode)
6217 rtx op;
6218 enum machine_mode mode ATTRIBUTE_UNUSED;
6219 {
6220 return (GET_CODE (op) == SYMBOL_REF);
6221 }
6222
6223 int
6224 commutative_float_operator (op, mode)
6225 rtx op;
6226 enum machine_mode mode;
6227 {
6228 if (GET_MODE (op) != mode)
6229 return 0;
6230 switch (GET_CODE (op))
6231 {
6232 case PLUS:
6233 case MULT:
6234 return 1;
6235 default:
6236 break;
6237 }
6238 return 0;
6239 }
6240
6241 int
6242 noncommutative_float_operator (op, mode)
6243 rtx op;
6244 enum machine_mode mode;
6245 {
6246 if (GET_MODE (op) != mode)
6247 return 0;
6248 switch (GET_CODE (op))
6249 {
6250 case MINUS:
6251 case DIV:
6252 return 1;
6253 default:
6254 break;
6255 }
6256 return 0;
6257 }
6258
6259 int
6260 unary_float_operator (op, mode)
6261 rtx op;
6262 enum machine_mode mode;
6263 {
6264 if (GET_MODE (op) != mode)
6265 return 0;
6266 switch (GET_CODE (op))
6267 {
6268 case ABS:
6269 case NEG:
6270 case SQRT:
6271 return 1;
6272 default:
6273 break;
6274 }
6275 return 0;
6276 }
6277
6278 int
6279 binary_float_operator (op, mode)
6280 rtx op;
6281 enum machine_mode mode;
6282 {
6283 if (GET_MODE (op) != mode)
6284 return 0;
6285 switch (GET_CODE (op))
6286 {
6287 case PLUS:
6288 case MINUS:
6289 case MULT:
6290 case DIV:
6291 return 1;
6292 default:
6293 break;
6294 }
6295 return 0;
6296 }
6297
6298 int
6299 binary_logical_operator (op, mode)
6300 rtx op;
6301 enum machine_mode mode;
6302 {
6303 if (GET_MODE (op) != mode)
6304 return 0;
6305 switch (GET_CODE (op))
6306 {
6307 case IOR:
6308 case AND:
6309 case XOR:
6310 return 1;
6311 default:
6312 break;
6313 }
6314 return 0;
6315 }
6316
6317 int
6318 equality_comparison_operator (op, mode)
6319 rtx op;
6320 enum machine_mode mode;
6321 {
6322 return ((mode == VOIDmode || GET_MODE (op) == mode)
6323 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6324 }
6325
6326 int greater_comparison_operator (op, mode)
6327 rtx op;
6328 enum machine_mode mode;
6329 {
6330 if (mode != VOIDmode && GET_MODE (op) == mode)
6331 return 0;
6332 switch (GET_CODE (op))
6333 {
6334 case GT:
6335 case GE:
6336 case GTU:
6337 case GEU:
6338 return 1;
6339 default:
6340 return 0;
6341 }
6342 }
6343
6344 int less_comparison_operator (op, mode)
6345 rtx op;
6346 enum machine_mode mode;
6347 {
6348 if (mode != VOIDmode && GET_MODE (op) == mode)
6349 return 0;
6350 switch (GET_CODE (op))
6351 {
6352 case LT:
6353 case LE:
6354 case LTU:
6355 case LEU:
6356 return 1;
6357 default:
6358 return 0;
6359 }
6360 }
6361
6362 /* Accept pseudos and branch target registers. */
6363 int
6364 target_reg_operand (op, mode)
6365 rtx op;
6366 enum machine_mode mode;
6367 {
6368 if (mode != DImode
6369 || GET_MODE (op) != DImode)
6370 return 0;
6371
6372 if (GET_CODE (op) == SUBREG)
6373 op = XEXP (op, 0);
6374
6375 if (GET_CODE (op) != REG)
6376 return 0;
6377
6378 /* We must protect ourselves from matching pseudos that are virtual
6379 register, because they will eventually be replaced with hardware
6380 registers that aren't branch-target registers. */
6381 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6382 || TARGET_REGISTER_P (REGNO (op)))
6383 return 1;
6384
6385 return 0;
6386 }
6387
6388 /* Same as target_reg_operand, except that label_refs and symbol_refs
6389 are accepted before reload. */
6390 int
6391 target_operand (op, mode)
6392 rtx op;
6393 enum machine_mode mode;
6394 {
6395 if (mode != DImode)
6396 return 0;
6397
6398 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6399 && EXTRA_CONSTRAINT_T (op))
6400 return ! reload_completed;
6401
6402 return target_reg_operand (op, mode);
6403 }
6404
6405 int
6406 mextr_bit_offset (op, mode)
6407 rtx op;
6408 enum machine_mode mode ATTRIBUTE_UNUSED;
6409 {
6410 HOST_WIDE_INT i;
6411
6412 if (GET_CODE (op) != CONST_INT)
6413 return 0;
6414 i = INTVAL (op);
6415 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6416 }
6417
6418 int
6419 extend_reg_operand (op, mode)
6420 rtx op;
6421 enum machine_mode mode;
6422 {
6423 return (GET_CODE (op) == TRUNCATE
6424 ? arith_operand
6425 : arith_reg_operand) (op, mode);
6426 }
6427
6428 int
6429 trunc_hi_operand (op, mode)
6430 rtx op;
6431 enum machine_mode mode;
6432 {
6433 enum machine_mode op_mode = GET_MODE (op);
6434
6435 if (op_mode != SImode && op_mode != DImode
6436 && op_mode != V4HImode && op_mode != V2SImode)
6437 return 0;
6438 return extend_reg_operand (op, mode);
6439 }
6440
6441 int
6442 extend_reg_or_0_operand (op, mode)
6443 rtx op;
6444 enum machine_mode mode;
6445 {
6446 return (GET_CODE (op) == TRUNCATE
6447 ? arith_operand
6448 : arith_reg_or_0_operand) (op, mode);
6449 }
6450
6451 int
6452 general_extend_operand (op, mode)
6453 rtx op;
6454 enum machine_mode mode;
6455 {
6456 return (GET_CODE (op) == TRUNCATE
6457 ? arith_operand
6458 : nonimmediate_operand) (op, mode);
6459 }
6460
6461 int
6462 inqhi_operand (op, mode)
6463 rtx op;
6464 enum machine_mode mode;
6465 {
6466 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6467 return 0;
6468 op = XEXP (op, 0);
6469 /* Can't use true_regnum here because copy_cost wants to know about
6470 SECONDARY_INPUT_RELOAD_CLASS. */
6471 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6472 }
6473
6474 int
6475 sh_rep_vec (v, mode)
6476 rtx v;
6477 enum machine_mode mode;
6478 {
6479 int i;
6480 rtx x, y;
6481
6482 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6483 || (GET_MODE (v) != mode && mode != VOIDmode))
6484 return 0;
6485 i = XVECLEN (v, 0) - 2;
6486 x = XVECEXP (v, 0, i + 1);
6487 if (GET_MODE_UNIT_SIZE (mode) == 1)
6488 {
6489 y = XVECEXP (v, 0, i);
6490 for (i -= 2 ; i >= 0; i -= 2)
6491 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6492 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6493 return 0;
6494 }
6495 else
6496 for (; i >= 0; i--)
6497 if (XVECEXP (v, 0, i) != x)
6498 return 0;
6499 return 1;
6500 }
6501
6502 /* Determine if V is a constant vector matching MODE with only one element
6503 that is not a sign extension. Two byte-sized elements count as one. */
6504 int
6505 sh_1el_vec (v, mode)
6506 rtx v;
6507 enum machine_mode mode;
6508 {
6509 int unit_size;
6510 int i, last, least, sign_ix;
6511 rtx sign;
6512
6513 if (GET_CODE (v) != CONST_VECTOR
6514 || (GET_MODE (v) != mode && mode != VOIDmode))
6515 return 0;
6516 /* Determine numbers of last and of least significat elements. */
6517 last = XVECLEN (v, 0) - 1;
6518 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6519 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6520 return 0;
6521 sign_ix = least;
6522 if (GET_MODE_UNIT_SIZE (mode) == 1)
6523 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6524 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6525 return 0;
6526 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6527 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6528 ? constm1_rtx : const0_rtx);
6529 i = XVECLEN (v, 0) - 1;
6530 do
6531 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6532 return 0;
6533 while (--i);
6534 return 1;
6535 }
6536
6537 int
6538 sh_const_vec (v, mode)
6539 rtx v;
6540 enum machine_mode mode;
6541 {
6542 int i;
6543
6544 if (GET_CODE (v) != CONST_VECTOR
6545 || (GET_MODE (v) != mode && mode != VOIDmode))
6546 return 0;
6547 i = XVECLEN (v, 0) - 1;
6548 for (; i >= 0; i--)
6549 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6550 return 0;
6551 return 1;
6552 }
6553 \f
6554 /* Return the destination address of a branch. */
6555
6556 static int
6557 branch_dest (branch)
6558 rtx branch;
6559 {
6560 rtx dest = SET_SRC (PATTERN (branch));
6561 int dest_uid;
6562
6563 if (GET_CODE (dest) == IF_THEN_ELSE)
6564 dest = XEXP (dest, 1);
6565 dest = XEXP (dest, 0);
6566 dest_uid = INSN_UID (dest);
6567 return INSN_ADDRESSES (dest_uid);
6568 }
6569 \f
6570 /* Return nonzero if REG is not used after INSN.
6571 We assume REG is a reload reg, and therefore does
6572 not live past labels. It may live past calls or jumps though. */
6573 int
6574 reg_unused_after (reg, insn)
6575 rtx reg;
6576 rtx insn;
6577 {
6578 enum rtx_code code;
6579 rtx set;
6580
6581 /* If the reg is set by this instruction, then it is safe for our
6582 case. Disregard the case where this is a store to memory, since
6583 we are checking a register used in the store address. */
6584 set = single_set (insn);
6585 if (set && GET_CODE (SET_DEST (set)) != MEM
6586 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6587 return 1;
6588
6589 while ((insn = NEXT_INSN (insn)))
6590 {
6591 code = GET_CODE (insn);
6592
6593 #if 0
6594 /* If this is a label that existed before reload, then the register
6595 if dead here. However, if this is a label added by reorg, then
6596 the register may still be live here. We can't tell the difference,
6597 so we just ignore labels completely. */
6598 if (code == CODE_LABEL)
6599 return 1;
6600 /* else */
6601 #endif
6602
6603 if (code == JUMP_INSN)
6604 return 0;
6605
6606 /* If this is a sequence, we must handle them all at once.
6607 We could have for instance a call that sets the target register,
6608 and an insn in a delay slot that uses the register. In this case,
6609 we must return 0. */
6610 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6611 {
6612 int i;
6613 int retval = 0;
6614
6615 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6616 {
6617 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6618 rtx set = single_set (this_insn);
6619
6620 if (GET_CODE (this_insn) == CALL_INSN)
6621 code = CALL_INSN;
6622 else if (GET_CODE (this_insn) == JUMP_INSN)
6623 {
6624 if (INSN_ANNULLED_BRANCH_P (this_insn))
6625 return 0;
6626 code = JUMP_INSN;
6627 }
6628
6629 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6630 return 0;
6631 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6632 {
6633 if (GET_CODE (SET_DEST (set)) != MEM)
6634 retval = 1;
6635 else
6636 return 0;
6637 }
6638 if (set == 0
6639 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6640 return 0;
6641 }
6642 if (retval == 1)
6643 return 1;
6644 else if (code == JUMP_INSN)
6645 return 0;
6646 }
6647 else if (GET_RTX_CLASS (code) == 'i')
6648 {
6649 rtx set = single_set (insn);
6650
6651 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6652 return 0;
6653 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6654 return GET_CODE (SET_DEST (set)) != MEM;
6655 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6656 return 0;
6657 }
6658
6659 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6660 return 1;
6661 }
6662 return 1;
6663 }
6664 \f
6665 #include "ggc.h"
6666
6667 static GTY(()) rtx fpscr_rtx;
6668 rtx
6669 get_fpscr_rtx ()
6670 {
6671 if (! fpscr_rtx)
6672 {
6673 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
6674 REG_USERVAR_P (fpscr_rtx) = 1;
6675 mark_user_reg (fpscr_rtx);
6676 }
6677 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
6678 mark_user_reg (fpscr_rtx);
6679 return fpscr_rtx;
6680 }
6681
6682 void
6683 emit_sf_insn (pat)
6684 rtx pat;
6685 {
6686 emit_insn (pat);
6687 }
6688
6689 void
6690 emit_df_insn (pat)
6691 rtx pat;
6692 {
6693 emit_insn (pat);
6694 }
6695
6696 void
6697 expand_sf_unop (fun, operands)
6698 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6699 rtx *operands;
6700 {
6701 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6702 }
6703
6704 void
6705 expand_sf_binop (fun, operands)
6706 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6707 rtx *operands;
6708 {
6709 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
6710 get_fpscr_rtx ()));
6711 }
6712
6713 void
6714 expand_df_unop (fun, operands)
6715 rtx (*fun) PARAMS ((rtx, rtx, rtx));
6716 rtx *operands;
6717 {
6718 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
6719 }
6720
6721 void
6722 expand_df_binop (fun, operands)
6723 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
6724 rtx *operands;
6725 {
6726 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
6727 get_fpscr_rtx ()));
6728 }
6729 \f
6730 /* ??? gcc does flow analysis strictly after common subexpression
6731 elimination. As a result, common subespression elimination fails
6732 when there are some intervening statements setting the same register.
6733 If we did nothing about this, this would hurt the precision switching
6734 for SH4 badly. There is some cse after reload, but it is unable to
6735 undo the extra register pressure from the unused instructions, and
6736 it cannot remove auto-increment loads.
6737
6738 A C code example that shows this flow/cse weakness for (at least) SH
6739 and sparc (as of gcc ss-970706) is this:
6740
6741 double
6742 f(double a)
6743 {
6744 double d;
6745 d = 0.1;
6746 a += d;
6747 d = 1.1;
6748 d = 0.1;
6749 a *= d;
6750 return a;
6751 }
6752
6753 So we add another pass before common subexpression elimination, to
6754 remove assignments that are dead due to a following assignment in the
6755 same basic block. */
6756
6757 static void
6758 mark_use (x, reg_set_block)
6759 rtx x, *reg_set_block;
6760 {
6761 enum rtx_code code;
6762
6763 if (! x)
6764 return;
6765 code = GET_CODE (x);
6766 switch (code)
6767 {
6768 case REG:
6769 {
6770 int regno = REGNO (x);
6771 int nregs = (regno < FIRST_PSEUDO_REGISTER
6772 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
6773 : 1);
6774 do
6775 {
6776 reg_set_block[regno + nregs - 1] = 0;
6777 }
6778 while (--nregs);
6779 break;
6780 }
6781 case SET:
6782 {
6783 rtx dest = SET_DEST (x);
6784
6785 if (GET_CODE (dest) == SUBREG)
6786 dest = SUBREG_REG (dest);
6787 if (GET_CODE (dest) != REG)
6788 mark_use (dest, reg_set_block);
6789 mark_use (SET_SRC (x), reg_set_block);
6790 break;
6791 }
6792 case CLOBBER:
6793 break;
6794 default:
6795 {
6796 const char *fmt = GET_RTX_FORMAT (code);
6797 int i, j;
6798 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6799 {
6800 if (fmt[i] == 'e')
6801 mark_use (XEXP (x, i), reg_set_block);
6802 else if (fmt[i] == 'E')
6803 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6804 mark_use (XVECEXP (x, i, j), reg_set_block);
6805 }
6806 break;
6807 }
6808 }
6809 }
6810 \f
6811 static rtx get_free_reg PARAMS ((HARD_REG_SET));
6812
6813 /* This function returns a register to use to load the address to load
6814 the fpscr from. Currently it always returns r1 or r7, but when we are
6815 able to use pseudo registers after combine, or have a better mechanism
6816 for choosing a register, it should be done here. */
6817 /* REGS_LIVE is the liveness information for the point for which we
6818 need this allocation. In some bare-bones exit blocks, r1 is live at the
6819 start. We can even have all of r0..r3 being live:
6820 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
6821 INSN before which new insns are placed with will clobber the register
6822 we return. If a basic block consists only of setting the return value
6823 register to a pseudo and using that register, the return value is not
6824 live before or after this block, yet we we'll insert our insns right in
6825 the middle. */
6826
6827 static rtx
6828 get_free_reg (regs_live)
6829 HARD_REG_SET regs_live;
6830 {
6831 if (! TEST_HARD_REG_BIT (regs_live, 1))
6832 return gen_rtx_REG (Pmode, 1);
6833
6834 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
6835 there shouldn't be anything but a jump before the function end. */
6836 if (! TEST_HARD_REG_BIT (regs_live, 7))
6837 return gen_rtx_REG (Pmode, 7);
6838
6839 abort ();
6840 }
6841
6842 /* This function will set the fpscr from memory.
6843 MODE is the mode we are setting it to. */
6844 void
6845 fpscr_set_from_mem (mode, regs_live)
6846 int mode;
6847 HARD_REG_SET regs_live;
6848 {
6849 enum attr_fp_mode fp_mode = mode;
6850 rtx addr_reg = get_free_reg (regs_live);
6851
6852 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
6853 emit_insn (gen_fpu_switch1 (addr_reg));
6854 else
6855 emit_insn (gen_fpu_switch0 (addr_reg));
6856 }
6857
6858 /* Is the given character a logical line separator for the assembler? */
6859 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
6860 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
6861 #endif
6862
6863 int
6864 sh_insn_length_adjustment (insn)
6865 rtx insn;
6866 {
6867 /* Instructions with unfilled delay slots take up an extra two bytes for
6868 the nop in the delay slot. */
6869 if (((GET_CODE (insn) == INSN
6870 && GET_CODE (PATTERN (insn)) != USE
6871 && GET_CODE (PATTERN (insn)) != CLOBBER)
6872 || GET_CODE (insn) == CALL_INSN
6873 || (GET_CODE (insn) == JUMP_INSN
6874 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
6875 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
6876 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
6877 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
6878 return 2;
6879
6880 /* sh-dsp parallel processing insn take four bytes instead of two. */
6881
6882 if (GET_CODE (insn) == INSN)
6883 {
6884 int sum = 0;
6885 rtx body = PATTERN (insn);
6886 const char *template;
6887 char c;
6888 int maybe_label = 1;
6889
6890 if (GET_CODE (body) == ASM_INPUT)
6891 template = XSTR (body, 0);
6892 else if (asm_noperands (body) >= 0)
6893 template
6894 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
6895 else
6896 return 0;
6897 do
6898 {
6899 int ppi_adjust = 0;
6900
6901 do
6902 c = *template++;
6903 while (c == ' ' || c == '\t');
6904 /* all sh-dsp parallel-processing insns start with p.
6905 The only non-ppi sh insn starting with p is pref.
6906 The only ppi starting with pr is prnd. */
6907 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
6908 ppi_adjust = 2;
6909 /* The repeat pseudo-insn expands two three insns, a total of
6910 six bytes in size. */
6911 else if ((c == 'r' || c == 'R')
6912 && ! strncasecmp ("epeat", template, 5))
6913 ppi_adjust = 4;
6914 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
6915 {
6916 /* If this is a label, it is obviously not a ppi insn. */
6917 if (c == ':' && maybe_label)
6918 {
6919 ppi_adjust = 0;
6920 break;
6921 }
6922 else if (c == '\'' || c == '"')
6923 maybe_label = 0;
6924 c = *template++;
6925 }
6926 sum += ppi_adjust;
6927 maybe_label = c != ':';
6928 }
6929 while (c);
6930 return sum;
6931 }
6932 return 0;
6933 }
6934 \f
6935 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
6936 isn't protected by a PIC unspec. */
6937 int
6938 nonpic_symbol_mentioned_p (x)
6939 rtx x;
6940 {
6941 register const char *fmt;
6942 register int i;
6943
6944 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
6945 || GET_CODE (x) == PC)
6946 return 1;
6947
6948 /* We don't want to look into the possible MEM location of a
6949 CONST_DOUBLE, since we're not going to use it, in general. */
6950 if (GET_CODE (x) == CONST_DOUBLE)
6951 return 0;
6952
6953 if (GET_CODE (x) == UNSPEC
6954 && (XINT (x, 1) == UNSPEC_PIC
6955 || XINT (x, 1) == UNSPEC_GOT
6956 || XINT (x, 1) == UNSPEC_GOTOFF
6957 || XINT (x, 1) == UNSPEC_GOTPLT
6958 || XINT (x, 1) == UNSPEC_PLT))
6959 return 0;
6960
6961 fmt = GET_RTX_FORMAT (GET_CODE (x));
6962 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6963 {
6964 if (fmt[i] == 'E')
6965 {
6966 register int j;
6967
6968 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6969 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
6970 return 1;
6971 }
6972 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
6973 return 1;
6974 }
6975
6976 return 0;
6977 }
6978
6979 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
6980 @GOTOFF in `reg'. */
6981 rtx
6982 legitimize_pic_address (orig, mode, reg)
6983 rtx orig;
6984 enum machine_mode mode ATTRIBUTE_UNUSED;
6985 rtx reg;
6986 {
6987 if (GET_CODE (orig) == LABEL_REF
6988 || (GET_CODE (orig) == SYMBOL_REF
6989 && (CONSTANT_POOL_ADDRESS_P (orig)
6990 /* SYMBOL_REF_FLAG is set on static symbols. */
6991 || SYMBOL_REF_FLAG (orig))))
6992 {
6993 if (reg == 0)
6994 reg = gen_reg_rtx (Pmode);
6995
6996 emit_insn (gen_symGOTOFF2reg (reg, orig));
6997 return reg;
6998 }
6999 else if (GET_CODE (orig) == SYMBOL_REF)
7000 {
7001 if (reg == 0)
7002 reg = gen_reg_rtx (Pmode);
7003
7004 emit_insn (gen_symGOT2reg (reg, orig));
7005 return reg;
7006 }
7007 return orig;
7008 }
7009
7010 /* Mark the use of a constant in the literal table. If the constant
7011 has multiple labels, make it unique. */
7012 static rtx
7013 mark_constant_pool_use (x)
7014 rtx x;
7015 {
7016 rtx insn, lab, pattern;
7017
7018 if (x == NULL)
7019 return x;
7020
7021 switch (GET_CODE (x))
7022 {
7023 case LABEL_REF:
7024 x = XEXP (x, 0);
7025 case CODE_LABEL:
7026 break;
7027 default:
7028 return x;
7029 }
7030
7031 /* Get the first label in the list of labels for the same constant
7032 and delete another labels in the list. */
7033 lab = x;
7034 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7035 {
7036 if (GET_CODE (insn) != CODE_LABEL
7037 || LABEL_REFS (insn) != NEXT_INSN (insn))
7038 break;
7039 lab = insn;
7040 }
7041
7042 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7043 INSN_DELETED_P (insn) = 1;
7044
7045 /* Mark constants in a window. */
7046 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7047 {
7048 if (GET_CODE (insn) != INSN)
7049 continue;
7050
7051 pattern = PATTERN (insn);
7052 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7053 continue;
7054
7055 switch (XINT (pattern, 1))
7056 {
7057 case UNSPECV_CONST2:
7058 case UNSPECV_CONST4:
7059 case UNSPECV_CONST8:
7060 XVECEXP (pattern, 0, 1) = const1_rtx;
7061 break;
7062 case UNSPECV_WINDOW_END:
7063 if (XVECEXP (pattern, 0, 0) == x)
7064 return lab;
7065 break;
7066 case UNSPECV_CONST_END:
7067 return lab;
7068 default:
7069 break;
7070 }
7071 }
7072
7073 return lab;
7074 }
7075 \f
7076 /* Return true if it's possible to redirect BRANCH1 to the destination
7077 of an unconditional jump BRANCH2. We only want to do this if the
7078 resulting branch will have a short displacement. */
7079 int
7080 sh_can_redirect_branch (branch1, branch2)
7081 rtx branch1;
7082 rtx branch2;
7083 {
7084 if (flag_expensive_optimizations && simplejump_p (branch2))
7085 {
7086 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7087 rtx insn;
7088 int distance;
7089
7090 for (distance = 0, insn = NEXT_INSN (branch1);
7091 insn && distance < 256;
7092 insn = PREV_INSN (insn))
7093 {
7094 if (insn == dest)
7095 return 1;
7096 else
7097 distance += get_attr_length (insn);
7098 }
7099 for (distance = 0, insn = NEXT_INSN (branch1);
7100 insn && distance < 256;
7101 insn = NEXT_INSN (insn))
7102 {
7103 if (insn == dest)
7104 return 1;
7105 else
7106 distance += get_attr_length (insn);
7107 }
7108 }
7109 return 0;
7110 }
7111
7112 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7113 int
7114 sh_hard_regno_rename_ok (old_reg, new_reg)
7115 unsigned int old_reg ATTRIBUTE_UNUSED;
7116 unsigned int new_reg;
7117 {
7118
7119 /* Interrupt functions can only use registers that have already been
7120 saved by the prologue, even if they would normally be
7121 call-clobbered. */
7122
7123 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7124 return 0;
7125
7126 return 1;
7127 }
7128
7129 /* Function to update the integer COST
7130 based on the relationship between INSN that is dependent on
7131 DEP_INSN through the dependence LINK. The default is to make no
7132 adjustment to COST. This can be used for example to specify to
7133 the scheduler that an output- or anti-dependence does not incur
7134 the same cost as a data-dependence. The return value should be
7135 the new value for COST. */
7136 static int
7137 sh_adjust_cost (insn, link, dep_insn, cost)
7138 rtx insn;
7139 rtx link ATTRIBUTE_UNUSED;
7140 rtx dep_insn;
7141 int cost;
7142 {
7143 rtx reg, use_pat;
7144
7145 if (TARGET_SHMEDIA)
7146 {
7147 /* On SHmedia, if the dependence is an anti-dependence or
7148 output-dependence, there is no cost. */
7149 if (REG_NOTE_KIND (link) != 0)
7150 cost = 0;
7151
7152 if (get_attr_is_mac_media (insn)
7153 && get_attr_is_mac_media (dep_insn))
7154 cost = 1;
7155 }
7156 else if (REG_NOTE_KIND (link) == 0)
7157 {
7158 enum attr_type dep_type, type;
7159
7160 if (recog_memoized (insn) < 0
7161 || recog_memoized (dep_insn) < 0)
7162 return cost;
7163
7164 dep_type = get_attr_type (dep_insn);
7165 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7166 cost--;
7167 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7168 && (type = get_attr_type (insn)) != TYPE_CALL
7169 && type != TYPE_SFUNC)
7170 cost--;
7171
7172 /* The only input for a call that is timing-critical is the
7173 function's address. */
7174 if (GET_CODE(insn) == CALL_INSN)
7175 {
7176 rtx call = PATTERN (insn);
7177
7178 if (GET_CODE (call) == PARALLEL)
7179 call = XVECEXP (call, 0 ,0);
7180 if (GET_CODE (call) == SET)
7181 call = SET_SRC (call);
7182 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7183 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7184 cost = 0;
7185 }
7186 /* Likewise, the most timing critical input for an sfuncs call
7187 is the function address. However, sfuncs typically start
7188 using their arguments pretty quickly.
7189 Assume a four cycle delay before they are needed. */
7190 /* All sfunc calls are parallels with at least four components.
7191 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7192 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7193 && XVECLEN (PATTERN (insn), 0) >= 4
7194 && (reg = sfunc_uses_reg (insn)))
7195 {
7196 if (! reg_set_p (reg, dep_insn))
7197 cost -= 4;
7198 }
7199 /* When the preceding instruction loads the shift amount of
7200 the following SHAD/SHLD, the latency of the load is increased
7201 by 1 cycle. */
7202 else if (TARGET_SH4
7203 && get_attr_type (insn) == TYPE_DYN_SHIFT
7204 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7205 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7206 XEXP (SET_SRC (single_set(insn)),
7207 1)))
7208 cost++;
7209 /* When an LS group instruction with a latency of less than
7210 3 cycles is followed by a double-precision floating-point
7211 instruction, FIPR, or FTRV, the latency of the first
7212 instruction is increased to 3 cycles. */
7213 else if (cost < 3
7214 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7215 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7216 cost = 3;
7217 /* The lsw register of a double-precision computation is ready one
7218 cycle earlier. */
7219 else if (reload_completed
7220 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7221 && (use_pat = single_set (insn))
7222 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7223 SET_SRC (use_pat)))
7224 cost -= 1;
7225
7226 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7227 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7228 cost -= 1;
7229 }
7230 /* An anti-dependence penalty of two applies if the first insn is a double
7231 precision fadd / fsub / fmul. */
7232 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7233 && recog_memoized (dep_insn) >= 0
7234 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7235 /* A lot of alleged anti-flow dependences are fake,
7236 so check this one is real. */
7237 && flow_dependent_p (dep_insn, insn))
7238 cost = 2;
7239
7240
7241 return cost;
7242 }
7243
7244 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7245 if DEP_INSN is anti-flow dependent on INSN. */
7246 static int
7247 flow_dependent_p (insn, dep_insn)
7248 rtx insn, dep_insn;
7249 {
7250 rtx tmp = PATTERN (insn);
7251
7252 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7253 return tmp == NULL_RTX;
7254 }
7255
7256 /* A helper function for flow_dependent_p called through note_stores. */
7257 static void
7258 flow_dependent_p_1 (x, pat, data)
7259 rtx x;
7260 rtx pat ATTRIBUTE_UNUSED;
7261 void *data;
7262 {
7263 rtx * pinsn = (rtx *) data;
7264
7265 if (*pinsn && reg_referenced_p (x, *pinsn))
7266 *pinsn = NULL_RTX;
7267 }
7268
7269 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7270 'special function' patterns (type sfunc) that clobber pr, but that
7271 do not look like function calls to leaf_function_p. Hence we must
7272 do this extra check. */
7273 int
7274 sh_pr_n_sets ()
7275 {
7276 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7277 }
7278
7279 /* This Function returns nonzero if the DFA based scheduler interface
7280 is to be used. At present this is supported for the SH4 only. */
7281 static int
7282 sh_use_dfa_interface()
7283 {
7284 if (TARGET_HARD_SH4)
7285 return 1;
7286 else
7287 return 0;
7288 }
7289
7290 /* This function returns "2" to indicate dual issue for the SH4
7291 processor. To be used by the DFA pipeline description. */
7292 static int
7293 sh_issue_rate()
7294 {
7295 if (TARGET_SUPERSCALAR)
7296 return 2;
7297 else
7298 return 1;
7299 }
7300
7301 /* SHmedia requires registers for branches, so we can't generate new
7302 branches past reload. */
7303 static bool
7304 sh_cannot_modify_jumps_p ()
7305 {
7306 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7307 }
7308
7309 static bool
7310 sh_ms_bitfield_layout_p (record_type)
7311 tree record_type ATTRIBUTE_UNUSED;
7312 {
7313 return TARGET_SH5;
7314 }
7315
7316 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
7317 may access it using GOTOFF instead of GOT. */
7318
7319 static void
7320 sh_encode_section_info (decl, first)
7321 tree decl;
7322 int first;
7323 {
7324 rtx rtl, symbol;
7325
7326 if (DECL_P (decl))
7327 rtl = DECL_RTL (decl);
7328 else
7329 rtl = TREE_CST_RTL (decl);
7330 if (GET_CODE (rtl) != MEM)
7331 return;
7332 symbol = XEXP (rtl, 0);
7333 if (GET_CODE (symbol) != SYMBOL_REF)
7334 return;
7335
7336 if (flag_pic)
7337 SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
7338
7339 if (TARGET_SH5 && first && TREE_CODE (decl) != FUNCTION_DECL)
7340 XEXP (rtl, 0) = gen_datalabel_ref (symbol);
7341 }
7342
7343 /* Undo the effects of the above. */
7344
7345 static const char *
7346 sh_strip_name_encoding (str)
7347 const char *str;
7348 {
7349 STRIP_DATALABEL_ENCODING (str, str);
7350 str += *str == '*';
7351 return str;
7352 }
7353
7354 \f
7355 /*
7356 On the SH1..SH4, the trampoline looks like
7357 2 0002 D202 mov.l l2,r2
7358 1 0000 D301 mov.l l1,r3
7359 3 0004 422B jmp @r2
7360 4 0006 0009 nop
7361 5 0008 00000000 l1: .long area
7362 6 000c 00000000 l2: .long function
7363
7364 SH5 (compact) uses r1 instead of r3 for the static chain. */
7365
7366
7367 /* Emit RTL insns to initialize the variable parts of a trampoline.
7368 FNADDR is an RTX for the address of the function's pure code.
7369 CXT is an RTX for the static chain value for the function. */
7370
7371 void
7372 sh_initialize_trampoline (tramp, fnaddr, cxt)
7373 rtx tramp, fnaddr, cxt;
7374 {
7375 if (TARGET_SHMEDIA64)
7376 {
7377 rtx tramp_templ;
7378 int fixed_len;
7379
7380 rtx movi1 = GEN_INT (0xcc000010);
7381 rtx shori1 = GEN_INT (0xc8000010);
7382 rtx src, dst;
7383
7384 /* The following trampoline works within a +- 128 KB range for cxt:
7385 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7386 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7387 gettr tr1,r1; blink tr0,r63 */
7388 /* Address rounding makes it hard to compute the exact bounds of the
7389 offset for this trampoline, but we have a rather generous offset
7390 range, so frame_offset should do fine as an upper bound. */
7391 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7392 {
7393 /* ??? could optimize this trampoline initialization
7394 by writing DImode words with two insns each. */
7395 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7396 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7397 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7398 insn = gen_rtx_AND (DImode, insn, mask);
7399 /* Or in ptb/u .,tr1 pattern */
7400 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7401 insn = force_operand (insn, NULL_RTX);
7402 insn = gen_lowpart (SImode, insn);
7403 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7404 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7405 insn = gen_rtx_AND (DImode, insn, mask);
7406 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7407 insn = gen_lowpart (SImode, insn);
7408 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7409 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7410 insn = gen_rtx_AND (DImode, insn, mask);
7411 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7412 insn = gen_lowpart (SImode, insn);
7413 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7414 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7415 insn = gen_rtx_AND (DImode, insn, mask);
7416 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7417 insn = gen_lowpart (SImode, insn);
7418 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7419 insn);
7420 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7421 insn = gen_rtx_AND (DImode, insn, mask);
7422 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7423 insn = gen_lowpart (SImode, insn);
7424 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7425 insn);
7426 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7427 GEN_INT (0x6bf10600));
7428 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7429 GEN_INT (0x4415fc10));
7430 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7431 GEN_INT (0x4401fff0));
7432 emit_insn (gen_ic_invalidate_line (tramp));
7433 return;
7434 }
7435 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7436 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7437
7438 tramp_templ = gen_datalabel_ref (tramp_templ);
7439 dst = gen_rtx_MEM (BLKmode, tramp);
7440 src = gen_rtx_MEM (BLKmode, tramp_templ);
7441 set_mem_align (dst, 256);
7442 set_mem_align (src, 64);
7443 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7444
7445 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7446 fnaddr);
7447 emit_move_insn (gen_rtx_MEM (Pmode,
7448 plus_constant (tramp,
7449 fixed_len
7450 + GET_MODE_SIZE (Pmode))),
7451 cxt);
7452 emit_insn (gen_ic_invalidate_line (tramp));
7453 return;
7454 }
7455 else if (TARGET_SHMEDIA)
7456 {
7457 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7458 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7459 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7460 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7461 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7462 rotated 10 right, and higer 16 bit of every 32 selected. */
7463 rtx movishori
7464 = force_reg (V2HImode, (simplify_gen_subreg
7465 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7466 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7467 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7468
7469 tramp = force_reg (Pmode, tramp);
7470 fnaddr = force_reg (SImode, fnaddr);
7471 cxt = force_reg (SImode, cxt);
7472 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7473 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7474 movishori));
7475 emit_insn (gen_rotldi3_mextr (quad0, quad0,
7476 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7477 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7478 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7479 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7480 gen_rtx_SUBREG (V2HImode, cxt, 0),
7481 movishori));
7482 emit_insn (gen_rotldi3_mextr (cxtload, cxtload,
7483 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7484 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7485 if (TARGET_LITTLE_ENDIAN)
7486 {
7487 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7488 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7489 }
7490 else
7491 {
7492 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7493 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7494 }
7495 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7496 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7497 emit_insn (gen_ic_invalidate_line (tramp));
7498 return;
7499 }
7500 else if (TARGET_SHCOMPACT)
7501 {
7502 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7503 return;
7504 }
7505 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7506 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7507 SImode));
7508 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7509 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7510 SImode));
7511 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7512 cxt);
7513 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7514 fnaddr);
7515 if (TARGET_HARVARD)
7516 {
7517 if (TARGET_USERMODE)
7518 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
7519 0, VOIDmode, 1, tramp, SImode);
7520 else
7521 emit_insn (gen_ic_invalidate_line (tramp));
7522 }
7523 }
7524
7525 /* FIXME: This is overly conservative. A SHcompact function that
7526 receives arguments ``by reference'' will have them stored in its
7527 own stack frame, so it must not pass pointers or references to
7528 these arguments to other functions by means of sibling calls. */
7529 static bool
7530 sh_function_ok_for_sibcall (decl, exp)
7531 tree decl;
7532 tree exp ATTRIBUTE_UNUSED;
7533 {
7534 return (decl
7535 && (! TARGET_SHCOMPACT
7536 || current_function_args_info.stack_regs == 0));
7537 }
7538 \f
7539 /* Machine specific built-in functions. */
7540
7541 struct builtin_description
7542 {
7543 const enum insn_code icode;
7544 const char *const name;
7545 int signature;
7546 };
7547
7548 /* describe number and signedness of arguments; arg[0] == result
7549 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7550 static const char signature_args[][4] =
7551 {
7552 #define SH_BLTIN_V2SI2 0
7553 { 4, 4 },
7554 #define SH_BLTIN_V4HI2 1
7555 { 4, 4 },
7556 #define SH_BLTIN_V2SI3 2
7557 { 4, 4, 4 },
7558 #define SH_BLTIN_V4HI3 3
7559 { 4, 4, 4 },
7560 #define SH_BLTIN_V8QI3 4
7561 { 4, 4, 4 },
7562 #define SH_BLTIN_MAC_HISI 5
7563 { 1, 4, 4, 1 },
7564 #define SH_BLTIN_SH_HI 6
7565 { 4, 4, 1 },
7566 #define SH_BLTIN_SH_SI 7
7567 { 4, 4, 1 },
7568 #define SH_BLTIN_V4HI2V2SI 8
7569 { 4, 4, 4 },
7570 #define SH_BLTIN_V4HI2V8QI 9
7571 { 4, 4, 4 },
7572 #define SH_BLTIN_SISF 10
7573 { 4, 2 },
7574 #define SH_BLTIN_LDUA_L 11
7575 { 2, 8 },
7576 #define SH_BLTIN_LDUA_Q 12
7577 { 1, 8 },
7578 #define SH_BLTIN_STUA_L 13
7579 { 0, 8, 2 },
7580 #define SH_BLTIN_STUA_Q 14
7581 { 0, 8, 1 },
7582 #define SH_BLTIN_UDI 15
7583 { 0, 8, 1 },
7584 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7585 #define SH_BLTIN_2 16
7586 #define SH_BLTIN_SU 16
7587 { 1, 2 },
7588 #define SH_BLTIN_3 17
7589 #define SH_BLTIN_SUS 17
7590 { 2, 2, 1 },
7591 #define SH_BLTIN_PSSV 18
7592 { 0, 8, 2, 2 },
7593 #define SH_BLTIN_XXUU 19
7594 #define SH_BLTIN_UUUU 19
7595 { 1, 1, 1, 1 },
7596 #define SH_BLTIN_PV 20
7597 { 0, 8 },
7598 };
7599 /* mcmv: operands considered unsigned. */
7600 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7601 /* mperm: control value considered unsigned int. */
7602 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7603 /* mshards_q: returns signed short. */
7604 /* nsb: takes long long arg, returns unsigned char. */
7605 static const struct builtin_description bdesc[] =
7606 {
7607 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7608 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7609 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7610 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7611 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7612 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7613 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7614 #if 0
7615 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7616 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7617 #endif
7618 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7619 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7620 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7621 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7622 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7623 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7624 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7625 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7626 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7627 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7628 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7629 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7630 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7631 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7632 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7633 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7634 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7635 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7636 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7637 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7638 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7639 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7640 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7641 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7642 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7643 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7644 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7645 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7646 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7647 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7648 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7649 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7650 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7651 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7652 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7653 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7654 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7655 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7656 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7657 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7658 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7659 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7660 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7661 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7662 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7663 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7664 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7665 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7666 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7667 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7668 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7669 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7670 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7671 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7672 #if 0
7673 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7674 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7675 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7676 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7677 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7678 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7679 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7680 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7681 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7682 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7683 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7684 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7685 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7686 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7687 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7688 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7689 #endif
7690 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
7691 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
7692 #if 0
7693 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
7694 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
7695 #endif
7696 };
7697
7698 static void
7699 sh_media_init_builtins ()
7700 {
7701 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
7702 const struct builtin_description *d;
7703
7704 memset (shared, 0, sizeof shared);
7705 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
7706 {
7707 tree type, arg_type;
7708 int signature = d->signature;
7709 int i;
7710
7711 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
7712 type = shared[signature];
7713 else
7714 {
7715 int has_result = signature_args[signature][0] != 0;
7716
7717 if (signature_args[signature][1] == 8
7718 && (insn_data[d->icode].operand[has_result].mode != Pmode))
7719 continue;
7720 if (! TARGET_FPU_ANY
7721 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
7722 continue;
7723 type = void_list_node;
7724 for (i = 3; ; i--)
7725 {
7726 int arg = signature_args[signature][i];
7727 int opno = i - 1 + has_result;
7728
7729 if (arg == 8)
7730 arg_type = ptr_type_node;
7731 else if (arg)
7732 arg_type = ((*lang_hooks.types.type_for_mode)
7733 (insn_data[d->icode].operand[opno].mode,
7734 (arg & 1)));
7735 else if (i)
7736 continue;
7737 else
7738 arg_type = void_type_node;
7739 if (i == 0)
7740 break;
7741 type = tree_cons (NULL_TREE, arg_type, type);
7742 }
7743 type = build_function_type (arg_type, type);
7744 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
7745 shared[signature] = type;
7746 }
7747 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
7748 NULL, NULL_TREE);
7749 }
7750 }
7751
7752 static void
7753 sh_init_builtins ()
7754 {
7755 if (TARGET_SHMEDIA)
7756 sh_media_init_builtins ();
7757 }
7758
7759 /* Expand an expression EXP that calls a built-in function,
7760 with result going to TARGET if that's convenient
7761 (and in mode MODE if that's convenient).
7762 SUBTARGET may be used as the target for computing one of EXP's operands.
7763 IGNORE is nonzero if the value is to be ignored. */
7764
7765 static rtx
7766 sh_expand_builtin (exp, target, subtarget, mode, ignore)
7767 tree exp;
7768 rtx target;
7769 rtx subtarget ATTRIBUTE_UNUSED;
7770 enum machine_mode mode ATTRIBUTE_UNUSED;
7771 int ignore;
7772 {
7773 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7774 tree arglist = TREE_OPERAND (exp, 1);
7775 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
7776 const struct builtin_description *d = &bdesc[fcode];
7777 enum insn_code icode = d->icode;
7778 int signature = d->signature;
7779 enum machine_mode tmode = VOIDmode;
7780 int nop = 0, i;
7781 rtx op[4];
7782 rtx pat;
7783
7784 if (signature_args[signature][0])
7785 {
7786 if (ignore)
7787 return 0;
7788
7789 tmode = insn_data[icode].operand[0].mode;
7790 if (! target
7791 || GET_MODE (target) != tmode
7792 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7793 target = gen_reg_rtx (tmode);
7794 op[nop++] = target;
7795 }
7796 else
7797 target = 0;
7798
7799 for (i = 1; i <= 3; i++, nop++)
7800 {
7801 tree arg;
7802 enum machine_mode opmode, argmode;
7803
7804 if (! signature_args[signature][i])
7805 break;
7806 arg = TREE_VALUE (arglist);
7807 if (arg == error_mark_node)
7808 return const0_rtx;
7809 arglist = TREE_CHAIN (arglist);
7810 opmode = insn_data[icode].operand[nop].mode;
7811 argmode = TYPE_MODE (TREE_TYPE (arg));
7812 if (argmode != opmode)
7813 arg = build1 (NOP_EXPR,
7814 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
7815 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
7816 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
7817 op[nop] = copy_to_mode_reg (opmode, op[nop]);
7818 }
7819
7820 switch (nop)
7821 {
7822 case 1:
7823 pat = (*insn_data[d->icode].genfun) (op[0]);
7824 break;
7825 case 2:
7826 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
7827 break;
7828 case 3:
7829 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
7830 break;
7831 case 4:
7832 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
7833 break;
7834 default:
7835 abort ();
7836 }
7837 if (! pat)
7838 return 0;
7839 emit_insn (pat);
7840 return target;
7841 }
7842
7843 void
7844 sh_expand_unop_v2sf (code, op0, op1)
7845 enum rtx_code code;
7846 rtx op0, op1;
7847 {
7848 rtx sel0 = const0_rtx;
7849 rtx sel1 = const1_rtx;
7850 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
7851 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
7852
7853 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
7854 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
7855 }
7856
7857 void
7858 sh_expand_binop_v2sf (code, op0, op1, op2)
7859 enum rtx_code code;
7860 rtx op0, op1, op2;
7861 {
7862 rtx sel0 = const0_rtx;
7863 rtx sel1 = const1_rtx;
7864 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx)) = gen_binary_sf_op;
7865 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
7866
7867 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0));
7868 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1));
7869 }
7870
7871 /* Return the class of registers for which a mode change from FROM to TO
7872 is invalid. */
7873 enum reg_class
7874 sh_cannot_change_mode_class (from, to)
7875 enum machine_mode from, to;
7876 {
7877 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
7878 {
7879 if (TARGET_LITTLE_ENDIAN)
7880 {
7881 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
7882 return DF_REGS;
7883 }
7884 else
7885 {
7886 if (GET_MODE_SIZE (from) < 8)
7887 return DF_HI_REGS;
7888 }
7889 }
7890 return NO_REGS;
7891 }
7892
7893
7894 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
7895 that label is used. */
7896
7897 void
7898 sh_mark_label (address, nuses)
7899 rtx address;
7900 int nuses;
7901 {
7902 if (GOTOFF_P (address))
7903 {
7904 /* Extract the label or symbol. */
7905 address = XEXP (address, 0);
7906 if (GET_CODE (address) == PLUS)
7907 address = XEXP (address, 0);
7908 address = XVECEXP (address, 0, 0);
7909 }
7910 if (GET_CODE (address) == LABEL_REF
7911 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
7912 LABEL_NUSES (XEXP (address, 0)) += nuses;
7913 }
7914
7915 /* Compute extra cost of moving data between one register class
7916 and another. */
7917
7918 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
7919 uses this information. Hence, the general register <-> floating point
7920 register information here is not used for SFmode. */
7921
7922 int
7923 sh_register_move_cost (mode, srcclass, dstclass)
7924 enum machine_mode mode;
7925 enum reg_class srcclass, dstclass;
7926 {
7927 if (dstclass == T_REGS || dstclass == PR_REGS)
7928 return 10;
7929
7930 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
7931 && REGCLASS_HAS_FP_REG (srcclass)
7932 && REGCLASS_HAS_FP_REG (dstclass))
7933 return 4;
7934
7935 if ((REGCLASS_HAS_FP_REG (dstclass)
7936 && REGCLASS_HAS_GENERAL_REG (srcclass))
7937 || (REGCLASS_HAS_GENERAL_REG (dstclass)
7938 && REGCLASS_HAS_FP_REG (srcclass)))
7939 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
7940 * ((GET_MODE_SIZE (mode) + 7) / 8U));
7941
7942 if ((dstclass == FPUL_REGS
7943 && REGCLASS_HAS_GENERAL_REG (srcclass))
7944 || (srcclass == FPUL_REGS
7945 && REGCLASS_HAS_GENERAL_REG (dstclass)))
7946 return 5;
7947
7948 if ((dstclass == FPUL_REGS
7949 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
7950 || (srcclass == FPUL_REGS
7951 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
7952 return 7;
7953
7954 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
7955 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
7956 return 20;
7957
7958 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
7959 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
7960 return 4;
7961
7962 if (TARGET_SHMEDIA
7963 || (TARGET_FMOVD
7964 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
7965 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
7966 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
7967
7968 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
7969 }
7970
7971 /* Like register_operand, but take into account that SHMEDIA can use
7972 the constant zero like a general register. */
7973 int
7974 sh_register_operand (op, mode)
7975 rtx op;
7976 enum machine_mode mode;
7977 {
7978 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
7979 return 1;
7980 return register_operand (op, mode);
7981 }
7982
7983 #include "gt-sh.h"