]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
NEXT_INSN and PREV_INSN take a const rtx_insn
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23 #include <vector>
24 #include <algorithm>
25
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "insn-config.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "calls.h"
36 #include "varasm.h"
37 #include "flags.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "dwarf2.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
54 #include "df.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "params.h"
58 #include "ggc.h"
59 #include "hash-table.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-fold.h"
63 #include "tree-eh.h"
64 #include "gimple-expr.h"
65 #include "is-a.h"
66 #include "gimple.h"
67 #include "gimplify.h"
68 #include "cfgloop.h"
69 #include "alloc-pool.h"
70 #include "tm-constrs.h"
71 #include "opts.h"
72 #include "tree-pass.h"
73 #include "pass_manager.h"
74 #include "context.h"
75 #include "builtins.h"
76
77 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
78
79 /* These are some macros to abstract register modes. */
80 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
81 && ((HOST_WIDE_INT)(VALUE)) <= 511)
82
83 #define CONST_OK_FOR_ADD(size) \
84 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
85 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
86 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
87 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
88
89 /* Used to simplify the logic below. Find the attributes wherever
90 they may be. */
91 #define SH_ATTRIBUTES(decl) \
92 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
93 : DECL_ATTRIBUTES (decl) \
94 ? (DECL_ATTRIBUTES (decl)) \
95 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
96
97 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
98 int current_function_interrupt;
99
100 tree sh_deferred_function_attributes;
101 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
102
103 /* Global variables for machine-dependent things. */
104
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
107
108 /* Definitions used in ready queue reordering for first scheduling pass. */
109
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
112
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
115
116 /* Number of r0 life regions. */
117 static int r0_life_regions;
118
119 /* If true, skip cycles for Q -> R movement. */
120 static int skip_cycles = 0;
121
122 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
123 and returned from sh_reorder2. */
124 static short cached_can_issue_more;
125
126 /* Unique number for UNSPEC_BBR pattern. */
127 static unsigned int unspec_bbr_uid = 1;
128
129 /* Provides the class number of the smallest class containing
130 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
132 {
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS, GENERAL_REGS,
172 };
173
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
176
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
180
181 int assembler_dialect;
182
183 static bool shmedia_space_reserved_for_target_registers;
184
185 static void split_branches (rtx_insn *);
186 static int branch_dest (rtx);
187 static void print_slot (rtx);
188 static rtx_code_label *add_constant (rtx, enum machine_mode, rtx);
189 static void dump_table (rtx_insn *, rtx_insn *);
190 static bool broken_move (rtx_insn *);
191 static bool mova_p (rtx_insn *);
192 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
193 static bool noncall_uses_reg (rtx, rtx, rtx *);
194 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
195 static void sh_reorg (void);
196 static void sh_option_override (void);
197 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
198 static rtx_insn *frame_insn (rtx);
199 static rtx push (int);
200 static void pop (int);
201 static void push_regs (HARD_REG_SET *, int);
202 static int calc_live_regs (HARD_REG_SET *);
203 static HOST_WIDE_INT rounded_frame_size (int);
204 static bool sh_frame_pointer_required (void);
205 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
206 static int sh_mode_needed (int, rtx_insn *);
207 static int sh_mode_after (int, int, rtx_insn *);
208 static int sh_mode_entry (int);
209 static int sh_mode_exit (int);
210 static int sh_mode_priority (int entity, int n);
211
212 static rtx mark_constant_pool_use (rtx);
213 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
214 int, bool *);
215 static tree sh_handle_resbank_handler_attribute (tree *, tree,
216 tree, int, bool *);
217 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
218 tree, int, bool *);
219 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
220 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
221 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
222 static void sh_print_operand (FILE *, rtx, int);
223 static void sh_print_operand_address (FILE *, rtx);
224 static bool sh_print_operand_punct_valid_p (unsigned char code);
225 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
226 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
227 static void sh_insert_attributes (tree, tree *);
228 static const char *sh_check_pch_target_flags (int);
229 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
230 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (basic_block, enum machine_mode);
236 static int find_r0_life_regions (basic_block);
237 static void sh_md_init_global (FILE *, int, int);
238 static void sh_md_finish_global (FILE *, int);
239 static int rank_for_reorder (const void *, const void *);
240 static void swap_reorder (rtx_insn **, int);
241 static void ready_reorder (rtx_insn **, int);
242 static bool high_pressure (enum machine_mode);
243 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
244 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
245 static void sh_md_init (FILE *, int, int);
246 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
247
248 static bool sh_function_ok_for_sibcall (tree, tree);
249
250 static bool sh_cannot_modify_jumps_p (void);
251 static reg_class_t sh_target_reg_class (void);
252 static bool sh_optimize_target_register_callee_saved (bool);
253 static bool sh_ms_bitfield_layout_p (const_tree);
254
255 static void sh_init_builtins (void);
256 static tree sh_builtin_decl (unsigned, bool);
257 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
258 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
259 HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static bool flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, const_rtx, void *);
263 static int shiftcosts (rtx);
264 static int and_xor_ior_costs (rtx, int);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx_insn *);
269 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
270 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
271 static int sh_pr_n_sets (void);
272 static rtx sh_allocate_initial_value (rtx);
273 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
274 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
275 enum machine_mode,
276 struct secondary_reload_info *);
277 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
278 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
279 static rtx sh_delegitimize_address (rtx);
280 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
281 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
282 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
283 static int scavenge_reg (HARD_REG_SET *s);
284 struct save_schedule_s;
285 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
286 struct save_schedule_s *, int);
287
288 static rtx sh_struct_value_rtx (tree, int);
289 static rtx sh_function_value (const_tree, const_tree, bool);
290 static bool sh_function_value_regno_p (const unsigned int);
291 static rtx sh_libcall_value (enum machine_mode, const_rtx);
292 static bool sh_return_in_memory (const_tree, const_tree);
293 static rtx sh_builtin_saveregs (void);
294 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
295 tree, int *, int);
296 static bool sh_strict_argument_naming (cumulative_args_t);
297 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
298 static tree sh_build_builtin_va_list (void);
299 static void sh_va_start (tree, rtx);
300 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
301 static bool sh_promote_prototypes (const_tree);
302 static enum machine_mode sh_promote_function_mode (const_tree type,
303 enum machine_mode,
304 int *punsignedp,
305 const_tree funtype,
306 int for_return);
307 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
308 const_tree, bool);
309 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
310 const_tree, bool);
311 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
312 tree, bool);
313 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
314 const_tree, bool);
315 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
316 const_tree, bool);
317 static bool sh_scalar_mode_supported_p (enum machine_mode);
318 static int sh_dwarf_calling_convention (const_tree);
319 static void sh_encode_section_info (tree, rtx, int);
320 static bool sh2a_function_vector_p (tree);
321 static void sh_trampoline_init (rtx, tree, rtx);
322 static rtx sh_trampoline_adjust_address (rtx);
323 static void sh_conditional_register_usage (void);
324 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
325 static int mov_insn_size (enum machine_mode, bool);
326 static int mov_insn_alignment_mask (enum machine_mode, bool);
327 static bool sequence_insn_p (rtx_insn *);
328 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
329 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
330 enum machine_mode, bool);
331 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
332
333 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
334 \f
335 static const struct attribute_spec sh_attribute_table[] =
336 {
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "interrupt_handler", 0, 0, true, false, false,
340 sh_handle_interrupt_handler_attribute, false },
341 { "sp_switch", 1, 1, true, false, false,
342 sh_handle_sp_switch_attribute, false },
343 { "trap_exit", 1, 1, true, false, false,
344 sh_handle_trap_exit_attribute, false },
345 { "renesas", 0, 0, false, true, false,
346 sh_handle_renesas_attribute, false },
347 { "trapa_handler", 0, 0, true, false, false,
348 sh_handle_interrupt_handler_attribute, false },
349 { "nosave_low_regs", 0, 0, true, false, false,
350 sh_handle_interrupt_handler_attribute, false },
351 { "resbank", 0, 0, true, false, false,
352 sh_handle_resbank_handler_attribute, false },
353 { "function_vector", 1, 1, true, false, false,
354 sh2a_handle_function_vector_handler_attribute, false },
355 { NULL, 0, 0, false, false, false, NULL, false }
356 };
357 \f
358 /* Initialize the GCC target structure. */
359 #undef TARGET_ATTRIBUTE_TABLE
360 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
361
362 /* The next two are used for debug info when compiling with -gdwarf. */
363 #undef TARGET_ASM_UNALIGNED_HI_OP
364 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
365 #undef TARGET_ASM_UNALIGNED_SI_OP
366 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
367
368 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
369 #undef TARGET_ASM_UNALIGNED_DI_OP
370 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
371 #undef TARGET_ASM_ALIGNED_DI_OP
372 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
373
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE sh_option_override
376
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND sh_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
385
386 #undef TARGET_ASM_FUNCTION_EPILOGUE
387 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
388
389 #undef TARGET_ASM_OUTPUT_MI_THUNK
390 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
391
392 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
393 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
394 hook_bool_const_tree_hwi_hwi_const_tree_true
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START sh_file_start
398 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
399 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
400
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
403
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
406
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
409
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
412
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
422
423 The description of the hooks are as below:
424
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
431
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
435
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
439
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
442
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
445
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
447
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
450
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
453
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
456
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
459
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
462
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
465
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
468
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
471
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
474
475 #undef TARGET_CANNOT_MODIFY_JUMPS_P
476 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
477 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
478 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
479 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
480 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
481 sh_optimize_target_register_callee_saved
482
483 #undef TARGET_MS_BITFIELD_LAYOUT_P
484 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
485
486 #undef TARGET_INIT_BUILTINS
487 #define TARGET_INIT_BUILTINS sh_init_builtins
488 #undef TARGET_BUILTIN_DECL
489 #define TARGET_BUILTIN_DECL sh_builtin_decl
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
492
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
495
496 #undef TARGET_CANNOT_COPY_INSN_P
497 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS sh_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST sh_address_cost
502 #undef TARGET_ALLOCATE_INITIAL_VALUE
503 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
504
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
507
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
510
511 #ifdef HAVE_AS_TLS
512 #undef TARGET_HAVE_TLS
513 #define TARGET_HAVE_TLS true
514 #endif
515
516 #undef TARGET_PROMOTE_PROTOTYPES
517 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
520
521 #undef TARGET_FUNCTION_VALUE
522 #define TARGET_FUNCTION_VALUE sh_function_value
523 #undef TARGET_FUNCTION_VALUE_REGNO_P
524 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
525 #undef TARGET_LIBCALL_VALUE
526 #define TARGET_LIBCALL_VALUE sh_libcall_value
527 #undef TARGET_STRUCT_VALUE_RTX
528 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
531
532 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
533 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
534 #undef TARGET_SETUP_INCOMING_VARARGS
535 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
536 #undef TARGET_STRICT_ARGUMENT_NAMING
537 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
538 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
539 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
540 #undef TARGET_MUST_PASS_IN_STACK
541 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
544 #undef TARGET_CALLEE_COPIES
545 #define TARGET_CALLEE_COPIES sh_callee_copies
546 #undef TARGET_ARG_PARTIAL_BYTES
547 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
548 #undef TARGET_FUNCTION_ARG
549 #define TARGET_FUNCTION_ARG sh_function_arg
550 #undef TARGET_FUNCTION_ARG_ADVANCE
551 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
552
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
559
560 #undef TARGET_SCALAR_MODE_SUPPORTED_P
561 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
563 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
564
565 #undef TARGET_CHECK_PCH_TARGET_FLAGS
566 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
567
568 #undef TARGET_DWARF_CALLING_CONVENTION
569 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
570
571 #undef TARGET_FRAME_POINTER_REQUIRED
572 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
573
574 #undef TARGET_MODE_EMIT
575 #define TARGET_MODE_EMIT sh_emit_mode_set
576
577 #undef TARGET_MODE_NEEDED
578 #define TARGET_MODE_NEEDED sh_mode_needed
579
580 #undef TARGET_MODE_AFTER
581 #define TARGET_MODE_AFTER sh_mode_after
582
583 #undef TARGET_MODE_ENTRY
584 #define TARGET_MODE_ENTRY sh_mode_entry
585
586 #undef TARGET_MODE_EXIT
587 #define TARGET_MODE_EXIT sh_mode_exit
588
589 #undef TARGET_MODE_PRIORITY
590 #define TARGET_MODE_PRIORITY sh_mode_priority
591
592 /* Return regmode weight for insn. */
593 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
594 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
595
596 /* Return current register pressure for regmode. */
597 #define CURR_REGMODE_PRESSURE(MODE)\
598 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
599
600 #undef TARGET_ENCODE_SECTION_INFO
601 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
602
603 #undef TARGET_SECONDARY_RELOAD
604 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
605
606 #undef TARGET_PREFERRED_RELOAD_CLASS
607 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
608
609 #undef TARGET_CONDITIONAL_REGISTER_USAGE
610 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
611
612 #undef TARGET_LEGITIMATE_ADDRESS_P
613 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
614
615 #undef TARGET_TRAMPOLINE_INIT
616 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
617 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
618 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
619
620 #undef TARGET_LEGITIMATE_CONSTANT_P
621 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
622
623 #undef TARGET_CANONICALIZE_COMPARISON
624 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
625
626 #undef TARGET_FIXED_CONDITION_CODE_REGS
627 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
628
629 /* Machine-specific symbol_ref flags. */
630 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
631
632 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
633 is used by optabs.c atomic op expansion code as well as in sync.md. */
634 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
635 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
636
637 struct gcc_target targetm = TARGET_INITIALIZER;
638 \f
639
640 /* Information on the currently selected atomic model.
641 This is initialized in sh_option_override. */
642 static sh_atomic_model selected_atomic_model_;
643
644 const sh_atomic_model&
645 selected_atomic_model (void)
646 {
647 return selected_atomic_model_;
648 }
649
650 static sh_atomic_model
651 parse_validate_atomic_model_option (const char* str)
652 {
653 const char* model_names[sh_atomic_model::num_models];
654 model_names[sh_atomic_model::none] = "none";
655 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
656 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
657 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
658 model_names[sh_atomic_model::soft_imask] = "soft-imask";
659
660 const char* model_cdef_names[sh_atomic_model::num_models];
661 model_cdef_names[sh_atomic_model::none] = "NONE";
662 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
663 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
664 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
665 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
666
667 sh_atomic_model ret;
668 ret.type = sh_atomic_model::none;
669 ret.name = model_names[sh_atomic_model::none];
670 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
671 ret.strict = false;
672 ret.tcb_gbr_offset = -1;
673
674 /* Handle empty string as 'none'. */
675 if (str == NULL || *str == '\0')
676 return ret;
677
678 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
679
680 std::vector<std::string> tokens;
681 for (std::stringstream ss (str); ss.good (); )
682 {
683 tokens.push_back (std::string ());
684 std::getline (ss, tokens.back (), ',');
685 }
686
687 if (tokens.empty ())
688 err_ret ("invalid atomic model option");
689
690 /* The first token must be the atomic model name. */
691 {
692 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
693 if (tokens.front () == model_names[i])
694 {
695 ret.type = (sh_atomic_model::enum_type)i;
696 ret.name = model_names[i];
697 ret.cdef_name = model_cdef_names[i];
698 goto got_mode_name;
699 }
700
701 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
702 got_mode_name:;
703 }
704
705 /* Go through the remaining tokens. */
706 for (size_t i = 1; i < tokens.size (); ++i)
707 {
708 if (tokens[i] == "strict")
709 ret.strict = true;
710 else if (tokens[i].find ("gbr-offset=") == 0)
711 {
712 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
713 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
714 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
715 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
716 "option", offset_str.c_str ());
717 }
718 else
719 err_ret ("unknown parameter \"%s\" in atomic model option",
720 tokens[i].c_str ());
721 }
722
723 /* Check that the selection makes sense. */
724 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
725 err_ret ("atomic operations are not supported on SHmedia");
726
727 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
728 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
729 ret.name);
730
731 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
732 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
733
734 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
735 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
736
737 if (ret.type == sh_atomic_model::soft_tcb
738 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
739 || (ret.tcb_gbr_offset & 3) != 0))
740 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
741 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
742 ret.name);
743
744 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
745 err_ret ("cannot use atomic model %s in user mode", ret.name);
746
747 return ret;
748
749 #undef err_ret
750 }
751
752 /* Register SH specific RTL passes. */
753 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
754 const char* name);
755 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
756 const char* name);
757 static void
758 register_sh_passes (void)
759 {
760 if (!TARGET_SH1)
761 return;
762
763 /* Running the sh_treg_combine pass after ce1 generates better code when
764 comparisons are combined and reg-reg moves are introduced, because
765 reg-reg moves will be eliminated afterwards. However, there are quite
766 some cases where combine will be unable to fold comparison related insns,
767 thus for now don't do it.
768 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
769 PASS_POS_INSERT_AFTER, "ce1", 1);
770 */
771
772 /* Run sh_treg_combine pass after combine but before register allocation. */
773 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
774 PASS_POS_INSERT_AFTER, "split1", 1);
775
776 /* Run sh_treg_combine pass after register allocation and basic block
777 reordering as this sometimes creates new opportunities. */
778 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
779 PASS_POS_INSERT_AFTER, "split4", 1);
780
781 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
782 is known after a conditional branch.
783 This must be done after basic blocks and branch conditions have
784 stabilized and won't be changed by further passes. */
785 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
786 PASS_POS_INSERT_BEFORE, "sched2", 1);
787 }
788
789 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
790 various options, and do some machine dependent initialization. */
791 static void
792 sh_option_override (void)
793 {
794 int regno;
795
796 SUBTARGET_OVERRIDE_OPTIONS;
797 if (optimize > 1 && !optimize_size)
798 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
799
800 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
801 TARGET_CBRANCHDI4 = 1;
802 TARGET_CMPEQDI_T = 0;
803
804 sh_cpu = PROCESSOR_SH1;
805 assembler_dialect = 0;
806 if (TARGET_SH2)
807 sh_cpu = PROCESSOR_SH2;
808 if (TARGET_SH2E)
809 sh_cpu = PROCESSOR_SH2E;
810 if (TARGET_SH2A)
811 sh_cpu = PROCESSOR_SH2A;
812 if (TARGET_SH3)
813 sh_cpu = PROCESSOR_SH3;
814 if (TARGET_SH3E)
815 sh_cpu = PROCESSOR_SH3E;
816 if (TARGET_SH4)
817 {
818 assembler_dialect = 1;
819 sh_cpu = PROCESSOR_SH4;
820 }
821 if (TARGET_SH4A_ARCH)
822 {
823 assembler_dialect = 1;
824 sh_cpu = PROCESSOR_SH4A;
825 }
826 if (TARGET_SH5)
827 {
828 sh_cpu = PROCESSOR_SH5;
829 target_flags |= MASK_ALIGN_DOUBLE;
830 if (TARGET_SHMEDIA_FPU)
831 target_flags |= MASK_FMOVD;
832 if (TARGET_SHMEDIA)
833 {
834 /* There are no delay slots on SHmedia. */
835 flag_delayed_branch = 0;
836 /* Relaxation isn't yet supported for SHmedia */
837 target_flags &= ~MASK_RELAX;
838 /* After reload, if conversion does little good but can cause
839 ICEs:
840 - find_if_block doesn't do anything for SH because we don't
841 have conditional execution patterns. (We use conditional
842 move patterns, which are handled differently, and only
843 before reload).
844 - find_cond_trap doesn't do anything for the SH because we
845 don't have conditional traps.
846 - find_if_case_1 uses redirect_edge_and_branch_force in
847 the only path that does an optimization, and this causes
848 an ICE when branch targets are in registers.
849 - find_if_case_2 doesn't do anything for the SHmedia after
850 reload except when it can redirect a tablejump - and
851 that's rather rare. */
852 flag_if_conversion2 = 0;
853 if (! strcmp (sh_div_str, "call"))
854 sh_div_strategy = SH_DIV_CALL;
855 else if (! strcmp (sh_div_str, "call2"))
856 sh_div_strategy = SH_DIV_CALL2;
857 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
858 sh_div_strategy = SH_DIV_FP;
859 else if (! strcmp (sh_div_str, "inv"))
860 sh_div_strategy = SH_DIV_INV;
861 else if (! strcmp (sh_div_str, "inv:minlat"))
862 sh_div_strategy = SH_DIV_INV_MINLAT;
863 else if (! strcmp (sh_div_str, "inv20u"))
864 sh_div_strategy = SH_DIV_INV20U;
865 else if (! strcmp (sh_div_str, "inv20l"))
866 sh_div_strategy = SH_DIV_INV20L;
867 else if (! strcmp (sh_div_str, "inv:call2"))
868 sh_div_strategy = SH_DIV_INV_CALL2;
869 else if (! strcmp (sh_div_str, "inv:call"))
870 sh_div_strategy = SH_DIV_INV_CALL;
871 else if (! strcmp (sh_div_str, "inv:fp"))
872 {
873 if (TARGET_FPU_ANY)
874 sh_div_strategy = SH_DIV_INV_FP;
875 else
876 sh_div_strategy = SH_DIV_INV;
877 }
878 TARGET_CBRANCHDI4 = 0;
879 /* Assembler CFI isn't yet fully supported for SHmedia. */
880 flag_dwarf2_cfi_asm = 0;
881 }
882 }
883 else
884 {
885 /* Only the sh64-elf assembler fully supports .quad properly. */
886 targetm.asm_out.aligned_op.di = NULL;
887 targetm.asm_out.unaligned_op.di = NULL;
888 }
889
890 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
891 Disable it for everything else. */
892 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
893 TARGET_USERMODE = false;
894
895 if (TARGET_SH1)
896 {
897 if (! strcmp (sh_div_str, "call-div1"))
898 sh_div_strategy = SH_DIV_CALL_DIV1;
899 else if (! strcmp (sh_div_str, "call-fp")
900 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
901 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
902 sh_div_strategy = SH_DIV_CALL_FP;
903 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
904 sh_div_strategy = SH_DIV_CALL_TABLE;
905 else
906 /* Pick one that makes most sense for the target in general.
907 It is not much good to use different functions depending
908 on -Os, since then we'll end up with two different functions
909 when some of the code is compiled for size, and some for
910 speed. */
911
912 /* SH4 tends to emphasize speed. */
913 if (TARGET_HARD_SH4)
914 sh_div_strategy = SH_DIV_CALL_TABLE;
915 /* These have their own way of doing things. */
916 else if (TARGET_SH2A)
917 sh_div_strategy = SH_DIV_INTRINSIC;
918 /* ??? Should we use the integer SHmedia function instead? */
919 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
920 sh_div_strategy = SH_DIV_CALL_FP;
921 /* SH1 .. SH3 cores often go into small-footprint systems, so
922 default to the smallest implementation available. */
923 else
924 sh_div_strategy = SH_DIV_CALL_DIV1;
925 }
926 if (!TARGET_SH1)
927 TARGET_PRETEND_CMOVE = 0;
928 if (sh_divsi3_libfunc[0])
929 ; /* User supplied - leave it alone. */
930 else if (TARGET_DIVIDE_CALL_FP)
931 sh_divsi3_libfunc = "__sdivsi3_i4";
932 else if (TARGET_DIVIDE_CALL_TABLE)
933 sh_divsi3_libfunc = "__sdivsi3_i4i";
934 else if (TARGET_SH5)
935 sh_divsi3_libfunc = "__sdivsi3_1";
936 else
937 sh_divsi3_libfunc = "__sdivsi3";
938
939 if (sh_branch_cost == -1)
940 {
941 /* The SH1 does not have delay slots, hence we get a pipeline stall
942 at every branch. The SH4 is superscalar, so the single delay slot
943 is not sufficient to keep both pipelines filled.
944 In any case, set the default branch cost to '2', as it results in
945 slightly overall smaller code and also enables some if conversions
946 that are required for matching special T bit related insns. */
947 sh_branch_cost = 2;
948 }
949
950 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
951 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
952 TARGET_ZDCBRANCH = 1;
953
954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
955 if (! VALID_REGISTER_P (regno))
956 sh_register_names[regno][0] = '\0';
957
958 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
959 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
960 sh_additional_register_names[regno][0] = '\0';
961
962 if ((flag_pic && ! TARGET_PREFERGOT)
963 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
964 flag_no_function_cse = 1;
965
966 if (targetm.small_register_classes_for_mode_p (VOIDmode))
967 {
968 /* Never run scheduling before reload, since that can
969 break global alloc, and generates slower code anyway due
970 to the pressure on R0. */
971 /* Enable sched1 for SH4 if the user explicitly requests.
972 When sched1 is enabled, the ready queue will be reordered by
973 the target hooks if pressure is high. We can not do this for
974 PIC, SH3 and lower as they give spill failures for R0. */
975 if (!TARGET_HARD_SH4 || flag_pic)
976 flag_schedule_insns = 0;
977 /* ??? Current exception handling places basic block boundaries
978 after call_insns. It causes the high pressure on R0 and gives
979 spill failures for R0 in reload. See PR 22553 and the thread
980 on gcc-patches
981 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
982 else if (flag_exceptions)
983 {
984 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
985 warning (0, "ignoring -fschedule-insns because of exception "
986 "handling bug");
987 flag_schedule_insns = 0;
988 }
989 else if (flag_schedule_insns
990 && !global_options_set.x_flag_schedule_insns)
991 flag_schedule_insns = 0;
992 }
993
994 /* Unwind info is not correct around the CFG unless either a frame
995 pointer is present or M_A_O_A is set. Fixing this requires rewriting
996 unwind info generation to be aware of the CFG and propagating states
997 around edges. */
998 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
999 || flag_exceptions || flag_non_call_exceptions)
1000 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1001 {
1002 warning (0, "unwind tables currently require either a frame pointer "
1003 "or -maccumulate-outgoing-args for correctness");
1004 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1005 }
1006
1007 /* Unwinding with -freorder-blocks-and-partition does not work on this
1008 architecture, because it requires far jumps to label crossing between
1009 hot/cold sections which are rejected on this architecture. */
1010 if (flag_reorder_blocks_and_partition)
1011 {
1012 if (flag_exceptions)
1013 {
1014 inform (input_location,
1015 "-freorder-blocks-and-partition does not work with "
1016 "exceptions on this architecture");
1017 flag_reorder_blocks_and_partition = 0;
1018 flag_reorder_blocks = 1;
1019 }
1020 else if (flag_unwind_tables)
1021 {
1022 inform (input_location,
1023 "-freorder-blocks-and-partition does not support unwind "
1024 "info on this architecture");
1025 flag_reorder_blocks_and_partition = 0;
1026 flag_reorder_blocks = 1;
1027 }
1028 }
1029
1030 /* Adjust loop, jump and function alignment values (in bytes), if those
1031 were not specified by the user using -falign-loops, -falign-jumps
1032 and -falign-functions options.
1033 32 bit alignment is better for speed, because instructions can be
1034 fetched as a pair from a longword boundary. For size use 16 bit
1035 alignment to get more compact code.
1036 Aligning all jumps increases the code size, even if it might
1037 result in slightly faster code. Thus, it is set to the smallest
1038 alignment possible if not specified by the user. */
1039 if (align_loops == 0)
1040 {
1041 if (TARGET_SH5)
1042 align_loops = 8;
1043 else
1044 align_loops = optimize_size ? 2 : 4;
1045 }
1046
1047 if (align_jumps == 0)
1048 {
1049 if (TARGET_SHMEDIA)
1050 align_jumps = 1 << CACHE_LOG;
1051 else
1052 align_jumps = 2;
1053 }
1054 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1055 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1056
1057 if (align_functions == 0)
1058 {
1059 if (TARGET_SHMEDIA)
1060 align_functions = optimize_size
1061 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1062 else
1063 align_functions = optimize_size ? 2 : 4;
1064 }
1065
1066 /* The linker relaxation code breaks when a function contains
1067 alignments that are larger than that at the start of a
1068 compilation unit. */
1069 if (TARGET_RELAX)
1070 {
1071 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1072
1073 /* Also take possible .long constants / mova tables into account. */
1074 if (min_align < 4)
1075 min_align = 4;
1076 if (align_functions < min_align)
1077 align_functions = min_align;
1078 }
1079
1080 if (flag_unsafe_math_optimizations)
1081 {
1082 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1083 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1084 TARGET_FSCA = 1;
1085
1086 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1087 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1088 TARGET_FSRRA = 1;
1089 }
1090
1091 /* Allow fsrra insn only if -funsafe-math-optimizations and
1092 -ffinite-math-only is enabled. */
1093 TARGET_FSRRA = TARGET_FSRRA
1094 && flag_unsafe_math_optimizations
1095 && flag_finite_math_only;
1096
1097 /* If the -mieee option was not explicitly set by the user, turn it on
1098 unless -ffinite-math-only was specified. See also PR 33135. */
1099 if (! global_options_set.x_TARGET_IEEE)
1100 TARGET_IEEE = ! flag_finite_math_only;
1101
1102 if (sh_fixed_range_str)
1103 sh_fix_range (sh_fixed_range_str);
1104
1105 /* This target defaults to strict volatile bitfields. */
1106 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1107 flag_strict_volatile_bitfields = 1;
1108
1109 /* Parse atomic model option and make sure it is valid for the current
1110 target CPU. */
1111 selected_atomic_model_
1112 = parse_validate_atomic_model_option (sh_atomic_model_str);
1113
1114 register_sh_passes ();
1115 }
1116 \f
1117 /* Print the operand address in x to the stream. */
1118 static void
1119 sh_print_operand_address (FILE *stream, rtx x)
1120 {
1121 switch (GET_CODE (x))
1122 {
1123 case REG:
1124 case SUBREG:
1125 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1126 break;
1127
1128 case PLUS:
1129 {
1130 rtx base = XEXP (x, 0);
1131 rtx index = XEXP (x, 1);
1132
1133 switch (GET_CODE (index))
1134 {
1135 case CONST_INT:
1136 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1137 reg_names[true_regnum (base)]);
1138 break;
1139
1140 case REG:
1141 case SUBREG:
1142 {
1143 int base_num = true_regnum (base);
1144 int index_num = true_regnum (index);
1145
1146 fprintf (stream, "@(r0,%s)",
1147 reg_names[MAX (base_num, index_num)]);
1148 break;
1149 }
1150
1151 default:
1152 gcc_unreachable ();
1153 }
1154 }
1155 break;
1156
1157 case PRE_DEC:
1158 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1159 break;
1160
1161 case POST_INC:
1162 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1163 break;
1164
1165 default:
1166 x = mark_constant_pool_use (x);
1167 output_addr_const (stream, x);
1168 break;
1169 }
1170 }
1171
1172 /* Print operand x (an rtx) in assembler syntax to file stream
1173 according to modifier code.
1174
1175 '.' print a .s if insn needs delay slot
1176 ',' print LOCAL_LABEL_PREFIX
1177 '@' print trap, rte or rts depending upon pragma interruptness
1178 '#' output a nop if there is nothing to put in the delay slot
1179 ''' print likelihood suffix (/u for unlikely).
1180 '>' print branch target if -fverbose-asm
1181 'O' print a constant without the #
1182 'R' print the LSW of a dp value - changes if in little endian
1183 'S' print the MSW of a dp value - changes if in little endian
1184 'T' print the next word of a dp value - same as 'R' in big endian mode.
1185 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1186 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1187 'N' print 'r63' if the operand is (const_int 0).
1188 'd' print a V2SF reg as dN instead of fpN.
1189 'm' print a pair `base,offset' or `base,index', for LD and ST.
1190 'U' Likewise for {LD,ST}{HI,LO}.
1191 'V' print the position of a single bit set.
1192 'W' print the position of a single bit cleared.
1193 't' print a memory address which is a register.
1194 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1195 'o' output an operator. */
1196 static void
1197 sh_print_operand (FILE *stream, rtx x, int code)
1198 {
1199 int regno;
1200 enum machine_mode mode;
1201
1202 switch (code)
1203 {
1204 tree trapa_attr;
1205
1206 case '.':
1207 if (final_sequence
1208 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1209 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1210 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1211 break;
1212 case ',':
1213 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1214 break;
1215 case '@':
1216 trapa_attr = lookup_attribute ("trap_exit",
1217 DECL_ATTRIBUTES (current_function_decl));
1218 if (trapa_attr)
1219 fprintf (stream, "trapa #%ld",
1220 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1221 else if (sh_cfun_interrupt_handler_p ())
1222 {
1223 if (sh_cfun_resbank_handler_p ())
1224 fprintf (stream, "resbank\n");
1225 fprintf (stream, "rte");
1226 }
1227 else
1228 fprintf (stream, "rts");
1229 break;
1230 case '#':
1231 /* Output a nop if there's nothing in the delay slot. */
1232 if (dbr_sequence_length () == 0)
1233 fprintf (stream, "\n\tnop");
1234 break;
1235 case '\'':
1236 {
1237 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1238
1239 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1240 fputs ("/u", stream);
1241 break;
1242 }
1243 case '>':
1244 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1245 {
1246 fputs ("\t! target: ", stream);
1247 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1248 }
1249 break;
1250 case 'O':
1251 x = mark_constant_pool_use (x);
1252 output_addr_const (stream, x);
1253 break;
1254 /* N.B.: %R / %S / %T adjust memory addresses by four.
1255 For SHMEDIA, that means they can be used to access the first and
1256 second 32 bit part of a 64 bit (or larger) value that
1257 might be held in floating point registers or memory.
1258 While they can be used to access 64 bit parts of a larger value
1259 held in general purpose registers, that won't work with memory -
1260 neither for fp registers, since the frxx names are used. */
1261 case 'R':
1262 if (REG_P (x) || GET_CODE (x) == SUBREG)
1263 {
1264 regno = true_regnum (x);
1265 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1266 fputs (reg_names[regno], (stream));
1267 }
1268 else if (MEM_P (x))
1269 {
1270 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1271 sh_print_operand_address (stream, XEXP (x, 0));
1272 }
1273 else
1274 {
1275 rtx sub = NULL_RTX;
1276
1277 mode = GET_MODE (x);
1278 if (mode == VOIDmode)
1279 mode = DImode;
1280 if (GET_MODE_SIZE (mode) >= 8)
1281 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1282 if (sub)
1283 sh_print_operand (stream, sub, 0);
1284 else
1285 output_operand_lossage ("invalid operand to %%R");
1286 }
1287 break;
1288 case 'S':
1289 if (REG_P (x) || GET_CODE (x) == SUBREG)
1290 {
1291 regno = true_regnum (x);
1292 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1293 fputs (reg_names[regno], (stream));
1294 }
1295 else if (MEM_P (x))
1296 {
1297 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1298 sh_print_operand_address (stream, XEXP (x, 0));
1299 }
1300 else
1301 {
1302 rtx sub = NULL_RTX;
1303
1304 mode = GET_MODE (x);
1305 if (mode == VOIDmode)
1306 mode = DImode;
1307 if (GET_MODE_SIZE (mode) >= 8)
1308 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1309 if (sub)
1310 sh_print_operand (stream, sub, 0);
1311 else
1312 output_operand_lossage ("invalid operand to %%S");
1313 }
1314 break;
1315 case 'T':
1316 /* Next word of a double. */
1317 switch (GET_CODE (x))
1318 {
1319 case REG:
1320 fputs (reg_names[REGNO (x) + 1], (stream));
1321 break;
1322 case MEM:
1323 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1324 && GET_CODE (XEXP (x, 0)) != POST_INC)
1325 x = adjust_address (x, SImode, 4);
1326 sh_print_operand_address (stream, XEXP (x, 0));
1327 break;
1328 default:
1329 break;
1330 }
1331 break;
1332
1333 case 't':
1334 gcc_assert (MEM_P (x));
1335 x = XEXP (x, 0);
1336 switch (GET_CODE (x))
1337 {
1338 case REG:
1339 case SUBREG:
1340 sh_print_operand (stream, x, 0);
1341 break;
1342 default:
1343 break;
1344 }
1345 break;
1346
1347 case 'o':
1348 switch (GET_CODE (x))
1349 {
1350 case PLUS: fputs ("add", stream); break;
1351 case MINUS: fputs ("sub", stream); break;
1352 case MULT: fputs ("mul", stream); break;
1353 case DIV: fputs ("div", stream); break;
1354 case EQ: fputs ("eq", stream); break;
1355 case NE: fputs ("ne", stream); break;
1356 case GT: case LT: fputs ("gt", stream); break;
1357 case GE: case LE: fputs ("ge", stream); break;
1358 case GTU: case LTU: fputs ("gtu", stream); break;
1359 case GEU: case LEU: fputs ("geu", stream); break;
1360 default:
1361 break;
1362 }
1363 break;
1364 case 'M':
1365 if (TARGET_SHMEDIA)
1366 {
1367 if (MEM_P (x)
1368 && GET_CODE (XEXP (x, 0)) == PLUS
1369 && (REG_P (XEXP (XEXP (x, 0), 1))
1370 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1371 fputc ('x', stream);
1372 }
1373 else
1374 {
1375 if (MEM_P (x))
1376 {
1377 switch (GET_MODE (x))
1378 {
1379 case QImode: fputs (".b", stream); break;
1380 case HImode: fputs (".w", stream); break;
1381 case SImode: fputs (".l", stream); break;
1382 case SFmode: fputs (".s", stream); break;
1383 case DFmode: fputs (".d", stream); break;
1384 default: gcc_unreachable ();
1385 }
1386 }
1387 }
1388 break;
1389
1390 case 'm':
1391 gcc_assert (MEM_P (x));
1392 x = XEXP (x, 0);
1393 /* Fall through. */
1394 case 'U':
1395 switch (GET_CODE (x))
1396 {
1397 case REG:
1398 case SUBREG:
1399 sh_print_operand (stream, x, 0);
1400 fputs (", 0", stream);
1401 break;
1402
1403 case PLUS:
1404 sh_print_operand (stream, XEXP (x, 0), 0);
1405 fputs (", ", stream);
1406 sh_print_operand (stream, XEXP (x, 1), 0);
1407 break;
1408
1409 default:
1410 gcc_unreachable ();
1411 }
1412 break;
1413
1414 case 'V':
1415 {
1416 int num = exact_log2 (INTVAL (x));
1417 gcc_assert (num >= 0);
1418 fprintf (stream, "#%d", num);
1419 }
1420 break;
1421
1422 case 'W':
1423 {
1424 int num = exact_log2 (~INTVAL (x));
1425 gcc_assert (num >= 0);
1426 fprintf (stream, "#%d", num);
1427 }
1428 break;
1429
1430 case 'd':
1431 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1432
1433 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1434 break;
1435
1436 case 'N':
1437 if (x == CONST0_RTX (GET_MODE (x)))
1438 {
1439 fprintf ((stream), "r63");
1440 break;
1441 }
1442 goto default_output;
1443 case 'u':
1444 if (CONST_INT_P (x))
1445 {
1446 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1447 break;
1448 }
1449 /* Fall through. */
1450
1451 default_output:
1452 default:
1453 regno = 0;
1454 mode = GET_MODE (x);
1455
1456 switch (GET_CODE (x))
1457 {
1458 case TRUNCATE:
1459 {
1460 rtx inner = XEXP (x, 0);
1461 int offset = 0;
1462 enum machine_mode inner_mode;
1463
1464 /* We might see SUBREGs with vector mode registers inside. */
1465 if (GET_CODE (inner) == SUBREG
1466 && (GET_MODE_SIZE (GET_MODE (inner))
1467 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1468 && subreg_lowpart_p (inner))
1469 inner = SUBREG_REG (inner);
1470 if (CONST_INT_P (inner))
1471 {
1472 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1473 goto default_output;
1474 }
1475 inner_mode = GET_MODE (inner);
1476 if (GET_CODE (inner) == SUBREG
1477 && (GET_MODE_SIZE (GET_MODE (inner))
1478 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1479 && REG_P (SUBREG_REG (inner)))
1480 {
1481 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1482 GET_MODE (SUBREG_REG (inner)),
1483 SUBREG_BYTE (inner),
1484 GET_MODE (inner));
1485 inner = SUBREG_REG (inner);
1486 }
1487 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1488 abort ();
1489 /* Floating point register pairs are always big endian;
1490 general purpose registers are 64 bit wide. */
1491 regno = REGNO (inner);
1492 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1493 - HARD_REGNO_NREGS (regno, mode))
1494 + offset;
1495 x = inner;
1496 goto reg;
1497 }
1498 case SIGN_EXTEND:
1499 x = XEXP (x, 0);
1500 goto reg;
1501 /* FIXME: We need this on SHmedia32 because reload generates
1502 some sign-extended HI or QI loads into DImode registers
1503 but, because Pmode is SImode, the address ends up with a
1504 subreg:SI of the DImode register. Maybe reload should be
1505 fixed so as to apply alter_subreg to such loads? */
1506 case IF_THEN_ELSE:
1507 gcc_assert (trapping_target_operand (x, VOIDmode));
1508 x = XEXP (XEXP (x, 2), 0);
1509 goto default_output;
1510 case SUBREG:
1511 gcc_assert (SUBREG_BYTE (x) == 0
1512 && REG_P (SUBREG_REG (x)));
1513
1514 x = SUBREG_REG (x);
1515 /* Fall through. */
1516
1517 reg:
1518 case REG:
1519 regno += REGNO (x);
1520 if (FP_REGISTER_P (regno)
1521 && mode == V16SFmode)
1522 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1523 else if (FP_REGISTER_P (REGNO (x))
1524 && mode == V4SFmode)
1525 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1526 else if (REG_P (x)
1527 && mode == V2SFmode)
1528 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1529 else if (FP_REGISTER_P (REGNO (x))
1530 && GET_MODE_SIZE (mode) > 4)
1531 fprintf ((stream), "d%s", reg_names[regno] + 1);
1532 else
1533 fputs (reg_names[regno], (stream));
1534 break;
1535
1536 case MEM:
1537 output_address (XEXP (x, 0));
1538 break;
1539
1540 default:
1541 if (TARGET_SH1)
1542 fputc ('#', stream);
1543 output_addr_const (stream, x);
1544 break;
1545 }
1546 break;
1547 }
1548 }
1549
1550 static bool
1551 sh_print_operand_punct_valid_p (unsigned char code)
1552 {
1553 return (code == '.' || code == '#' || code == '@' || code == ','
1554 || code == '$' || code == '\'' || code == '>');
1555 }
1556
1557 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1558 static bool
1559 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1560 {
1561 if (GET_CODE (x) == UNSPEC)
1562 {
1563 switch (XINT (x, 1))
1564 {
1565 case UNSPEC_DATALABEL:
1566 fputs ("datalabel ", file);
1567 output_addr_const (file, XVECEXP (x, 0, 0));
1568 break;
1569 case UNSPEC_PIC:
1570 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1571 output_addr_const (file, XVECEXP (x, 0, 0));
1572 break;
1573 case UNSPEC_GOT:
1574 output_addr_const (file, XVECEXP (x, 0, 0));
1575 fputs ("@GOT", file);
1576 break;
1577 case UNSPEC_GOTOFF:
1578 output_addr_const (file, XVECEXP (x, 0, 0));
1579 fputs ("@GOTOFF", file);
1580 break;
1581 case UNSPEC_PLT:
1582 output_addr_const (file, XVECEXP (x, 0, 0));
1583 fputs ("@PLT", file);
1584 break;
1585 case UNSPEC_GOTPLT:
1586 output_addr_const (file, XVECEXP (x, 0, 0));
1587 fputs ("@GOTPLT", file);
1588 break;
1589 case UNSPEC_DTPOFF:
1590 output_addr_const (file, XVECEXP (x, 0, 0));
1591 fputs ("@DTPOFF", file);
1592 break;
1593 case UNSPEC_GOTTPOFF:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1595 fputs ("@GOTTPOFF", file);
1596 break;
1597 case UNSPEC_TPOFF:
1598 output_addr_const (file, XVECEXP (x, 0, 0));
1599 fputs ("@TPOFF", file);
1600 break;
1601 case UNSPEC_CALLER:
1602 {
1603 char name[32];
1604 /* LPCS stands for Label for PIC Call Site. */
1605 targetm.asm_out.generate_internal_label (name, "LPCS",
1606 INTVAL (XVECEXP (x, 0, 0)));
1607 assemble_name (file, name);
1608 }
1609 break;
1610 case UNSPEC_EXTRACT_S16:
1611 case UNSPEC_EXTRACT_U16:
1612 {
1613 rtx val, shift;
1614
1615 val = XVECEXP (x, 0, 0);
1616 shift = XVECEXP (x, 0, 1);
1617 fputc ('(', file);
1618 if (shift != const0_rtx)
1619 fputc ('(', file);
1620 if (GET_CODE (val) == CONST
1621 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1622 {
1623 fputc ('(', file);
1624 output_addr_const (file, val);
1625 fputc (')', file);
1626 }
1627 else
1628 output_addr_const (file, val);
1629 if (shift != const0_rtx)
1630 {
1631 fputs (" >> ", file);
1632 output_addr_const (file, shift);
1633 fputc (')', file);
1634 }
1635 fputs (" & 65535)", file);
1636 }
1637 break;
1638 case UNSPEC_SYMOFF:
1639 output_addr_const (file, XVECEXP (x, 0, 0));
1640 fputc ('-', file);
1641 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1642 {
1643 fputc ('(', file);
1644 output_addr_const (file, XVECEXP (x, 0, 1));
1645 fputc (')', file);
1646 }
1647 else
1648 output_addr_const (file, XVECEXP (x, 0, 1));
1649 break;
1650 case UNSPEC_PCREL_SYMOFF:
1651 output_addr_const (file, XVECEXP (x, 0, 0));
1652 fputs ("-(", file);
1653 output_addr_const (file, XVECEXP (x, 0, 1));
1654 fputs ("-.)", file);
1655 break;
1656 default:
1657 return false;
1658 }
1659 return true;
1660 }
1661 else
1662 return false;
1663 }
1664 \f
1665 /* Encode symbol attributes of a SYMBOL_REF into its
1666 SYMBOL_REF_FLAGS. */
1667 static void
1668 sh_encode_section_info (tree decl, rtx rtl, int first)
1669 {
1670 default_encode_section_info (decl, rtl, first);
1671
1672 if (TREE_CODE (decl) == FUNCTION_DECL
1673 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1674 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1675 }
1676
1677 /* Prepare operands for a move define_expand; specifically, one of the
1678 operands must be in a register. */
1679 void
1680 prepare_move_operands (rtx operands[], enum machine_mode mode)
1681 {
1682 if ((mode == SImode || mode == DImode)
1683 && flag_pic
1684 && ! ((mode == Pmode || mode == ptr_mode)
1685 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1686 {
1687 rtx temp;
1688 if (SYMBOLIC_CONST_P (operands[1]))
1689 {
1690 if (MEM_P (operands[0]))
1691 operands[1] = force_reg (Pmode, operands[1]);
1692 else if (TARGET_SHMEDIA
1693 && GET_CODE (operands[1]) == LABEL_REF
1694 && target_reg_operand (operands[0], mode))
1695 /* It's ok. */;
1696 else
1697 {
1698 temp = (!can_create_pseudo_p ()
1699 ? operands[0]
1700 : gen_reg_rtx (Pmode));
1701 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1702 }
1703 }
1704 else if (GET_CODE (operands[1]) == CONST
1705 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1706 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1707 {
1708 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1709 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1710 mode, temp);
1711 operands[1] = expand_binop (mode, add_optab, temp,
1712 XEXP (XEXP (operands[1], 0), 1),
1713 (!can_create_pseudo_p ()
1714 ? temp
1715 : gen_reg_rtx (Pmode)),
1716 0, OPTAB_LIB_WIDEN);
1717 }
1718 }
1719
1720 if (! reload_in_progress && ! reload_completed)
1721 {
1722 /* Copy the source to a register if both operands aren't registers. */
1723 if (! register_operand (operands[0], mode)
1724 && ! sh_register_operand (operands[1], mode))
1725 operands[1] = copy_to_mode_reg (mode, operands[1]);
1726
1727 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1728 {
1729 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1730 except that we can't use that function because it is static. */
1731 rtx new_rtx = change_address (operands[0], mode, 0);
1732 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1733 operands[0] = new_rtx;
1734 }
1735
1736 /* This case can happen while generating code to move the result
1737 of a library call to the target. Reject `st r0,@(rX,rY)' because
1738 reload will fail to find a spill register for rX, since r0 is already
1739 being used for the source. */
1740 else if (TARGET_SH1
1741 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1742 && MEM_P (operands[0])
1743 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1744 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1745 operands[1] = copy_to_mode_reg (mode, operands[1]);
1746 }
1747
1748 if (mode == Pmode || mode == ptr_mode)
1749 {
1750 rtx op0, op1, opc;
1751 enum tls_model tls_kind;
1752
1753 op0 = operands[0];
1754 op1 = operands[1];
1755 if (GET_CODE (op1) == CONST
1756 && GET_CODE (XEXP (op1, 0)) == PLUS
1757 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1758 != TLS_MODEL_NONE))
1759 {
1760 opc = XEXP (XEXP (op1, 0), 1);
1761 op1 = XEXP (XEXP (op1, 0), 0);
1762 }
1763 else
1764 opc = NULL_RTX;
1765
1766 if (! reload_in_progress && ! reload_completed
1767 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1768 {
1769 rtx tga_op1, tga_ret, tmp, tmp2;
1770
1771 if (! flag_pic
1772 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1773 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1774 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1775 {
1776 /* Don't schedule insns for getting GOT address when
1777 the first scheduling is enabled, to avoid spill
1778 failures for R0. */
1779 if (flag_schedule_insns)
1780 emit_insn (gen_blockage ());
1781 emit_insn (gen_GOTaddr2picreg ());
1782 emit_use (gen_rtx_REG (SImode, PIC_REG));
1783 if (flag_schedule_insns)
1784 emit_insn (gen_blockage ());
1785 }
1786
1787 switch (tls_kind)
1788 {
1789 case TLS_MODEL_GLOBAL_DYNAMIC:
1790 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1791 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1792 tmp = gen_reg_rtx (Pmode);
1793 emit_move_insn (tmp, tga_ret);
1794 op1 = tmp;
1795 break;
1796
1797 case TLS_MODEL_LOCAL_DYNAMIC:
1798 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1799 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1800
1801 tmp = gen_reg_rtx (Pmode);
1802 emit_move_insn (tmp, tga_ret);
1803
1804 if (register_operand (op0, Pmode))
1805 tmp2 = op0;
1806 else
1807 tmp2 = gen_reg_rtx (Pmode);
1808
1809 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1810 op1 = tmp2;
1811 break;
1812
1813 case TLS_MODEL_INITIAL_EXEC:
1814 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1815 tmp = gen_sym2GOTTPOFF (op1);
1816 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1817 op1 = tga_op1;
1818 break;
1819
1820 case TLS_MODEL_LOCAL_EXEC:
1821 tmp2 = gen_reg_rtx (Pmode);
1822 emit_insn (gen_store_gbr (tmp2));
1823 tmp = gen_reg_rtx (Pmode);
1824 emit_insn (gen_symTPOFF2reg (tmp, op1));
1825
1826 if (register_operand (op0, Pmode))
1827 op1 = op0;
1828 else
1829 op1 = gen_reg_rtx (Pmode);
1830
1831 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1832 break;
1833
1834 default:
1835 gcc_unreachable ();
1836 }
1837 if (opc)
1838 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1839 operands[1] = op1;
1840 }
1841 }
1842 }
1843
1844 /* Implement the canonicalize_comparison target hook for the combine
1845 pass. For the target hook this function is invoked via
1846 sh_canonicalize_comparison. This function is also re-used to
1847 canonicalize comparisons in cbranch pattern expanders. */
1848 static void
1849 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1850 enum machine_mode mode,
1851 bool op0_preserve_value)
1852 {
1853 /* When invoked from within the combine pass the mode is not specified,
1854 so try to get it from one of the operands. */
1855 if (mode == VOIDmode)
1856 mode = GET_MODE (op0);
1857 if (mode == VOIDmode)
1858 mode = GET_MODE (op1);
1859
1860 // We need to have a mode to do something useful here.
1861 if (mode == VOIDmode)
1862 return;
1863
1864 // Currently, we don't deal with floats here.
1865 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1866 return;
1867
1868 // Make sure that the constant operand is the second operand.
1869 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1870 {
1871 if (op0_preserve_value)
1872 return;
1873
1874 std::swap (op0, op1);
1875 cmp = swap_condition (cmp);
1876 }
1877
1878 if (CONST_INT_P (op1))
1879 {
1880 /* Try to adjust the constant operand in such a way that available
1881 comparison insns can be utilized better and the constant can be
1882 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1883 constant pool. */
1884 const HOST_WIDE_INT val = INTVAL (op1);
1885
1886 /* x > -1 --> x >= 0
1887 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1888 x <= -1 --> x < 0
1889 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1890 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1891 {
1892 cmp = cmp == GT ? GE : LT;
1893 op1 = gen_int_mode (val + 1, mode);
1894 }
1895
1896 /* x >= 1 --> x > 0
1897 x >= 0x80 --> x > 0x7F
1898 x < 1 --> x <= 0
1899 x < 0x80 --> x <= 0x7F */
1900 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1901 {
1902 cmp = cmp == GE ? GT : LE;
1903 op1 = gen_int_mode (val - 1, mode);
1904 }
1905
1906 /* unsigned x >= 1 --> x != 0
1907 unsigned x < 1 --> x == 0 */
1908 else if (val == 1 && (cmp == GEU || cmp == LTU))
1909 {
1910 cmp = cmp == GEU ? NE : EQ;
1911 op1 = CONST0_RTX (mode);
1912 }
1913
1914 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1915 unsigned x < 0x80 --> unsigned x < 0x7F */
1916 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1917 {
1918 cmp = cmp == GEU ? GTU : LEU;
1919 op1 = gen_int_mode (val - 1, mode);
1920 }
1921
1922 /* unsigned x > 0 --> x != 0
1923 unsigned x <= 0 --> x == 0 */
1924 else if (val == 0 && (cmp == GTU || cmp == LEU))
1925 cmp = cmp == GTU ? NE : EQ;
1926
1927 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1928 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1929 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1930 && val == 0x7FFFFFFF)
1931 {
1932 cmp = cmp == GTU ? LT : GE;
1933 op1 = const0_rtx;
1934 }
1935
1936 /* unsigned x >= 0x80000000 --> signed x < 0
1937 unsigned x < 0x80000000 --> signed x >= 0 */
1938 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1939 && (unsigned HOST_WIDE_INT)val
1940 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1941 {
1942 cmp = cmp == GEU ? LT : GE;
1943 op1 = const0_rtx;
1944 }
1945 }
1946 }
1947
1948 /* This function implements the canonicalize_comparison target hook.
1949 This wrapper around the internally used sh_canonicalize_comparison
1950 function is needed to do the enum rtx_code <-> int conversion.
1951 Target hooks cannot use enum rtx_code in its definition. */
1952 static void
1953 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1954 bool op0_preserve_value)
1955 {
1956 enum rtx_code tmp_code = (enum rtx_code)*code;
1957 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1958 VOIDmode, op0_preserve_value);
1959 *code = (int)tmp_code;
1960 }
1961
1962 bool
1963 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1964 {
1965 *p1 = T_REG;
1966 *p2 = INVALID_REGNUM;
1967 return true;
1968 }
1969
1970 enum rtx_code
1971 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1972 enum rtx_code comparison)
1973 {
1974 /* The scratch reg is only available when this is invoked from within
1975 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1976 rtx scratch = NULL_RTX;
1977
1978 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1979 comparison = GET_CODE (operands[0]);
1980 else
1981 scratch = operands[4];
1982
1983 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1984 mode, false);
1985
1986 /* Notice that this function is also invoked after reload by
1987 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1988 rtx op1 = operands[1];
1989
1990 if (can_create_pseudo_p ())
1991 operands[1] = force_reg (mode, op1);
1992 /* When we are handling DImode comparisons, we want to keep constants so
1993 that we can optimize the component comparisons; however, memory loads
1994 are better issued as a whole so that they can be scheduled well.
1995 SImode equality comparisons allow I08 constants, but only when they
1996 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1997 into a register, that register might as well be r0, and we allow the
1998 constant. If it is already in a register, this is likely to be
1999 allocated to a different hard register, thus we load the constant into
2000 a register unless it is zero. */
2001 if (!REG_P (operands[2])
2002 && (!CONST_INT_P (operands[2])
2003 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2004 && ((comparison != EQ && comparison != NE)
2005 || (REG_P (op1) && REGNO (op1) != R0_REG)
2006 || !satisfies_constraint_I08 (operands[2])))))
2007 {
2008 if (scratch && GET_MODE (scratch) == mode)
2009 {
2010 emit_move_insn (scratch, operands[2]);
2011 operands[2] = scratch;
2012 }
2013 else if (can_create_pseudo_p ())
2014 operands[2] = force_reg (mode, operands[2]);
2015 }
2016 return comparison;
2017 }
2018
2019 void
2020 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2021 {
2022 rtx (*branch_expander) (rtx) = gen_branch_true;
2023 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2024 switch (comparison)
2025 {
2026 case NE: case LT: case LE: case LTU: case LEU:
2027 comparison = reverse_condition (comparison);
2028 branch_expander = gen_branch_false;
2029 default: ;
2030 }
2031 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2032 gen_rtx_fmt_ee (comparison, SImode,
2033 operands[1], operands[2])));
2034 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2035 if (probability >= 0)
2036 add_int_reg_note (jump, REG_BR_PROB, probability);
2037 }
2038
2039 /* ??? How should we distribute probabilities when more than one branch
2040 is generated. So far we only have some ad-hoc observations:
2041 - If the operands are random, they are likely to differ in both parts.
2042 - If comparing items in a hash chain, the operands are random or equal;
2043 operation should be EQ or NE.
2044 - If items are searched in an ordered tree from the root, we can expect
2045 the highpart to be unequal about half of the time; operation should be
2046 an inequality comparison, operands non-constant, and overall probability
2047 about 50%. Likewise for quicksort.
2048 - Range checks will be often made against constants. Even if we assume for
2049 simplicity an even distribution of the non-constant operand over a
2050 sub-range here, the same probability could be generated with differently
2051 wide sub-ranges - as long as the ratio of the part of the subrange that
2052 is before the threshold to the part that comes after the threshold stays
2053 the same. Thus, we can't really tell anything here;
2054 assuming random distribution is at least simple.
2055 */
2056 bool
2057 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2058 {
2059 enum rtx_code msw_taken, msw_skip, lsw_taken;
2060 rtx_code_label *skip_label = NULL;
2061 rtx op1h, op1l, op2h, op2l;
2062 int num_branches;
2063 int prob, rev_prob;
2064 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2065 rtx scratch = operands[4];
2066
2067 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2068 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2069 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2070 op1l = gen_lowpart (SImode, operands[1]);
2071 op2l = gen_lowpart (SImode, operands[2]);
2072 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2073 prob = split_branch_probability;
2074 rev_prob = REG_BR_PROB_BASE - prob;
2075 switch (comparison)
2076 {
2077 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2078 That costs 1 cycle more when the first branch can be predicted taken,
2079 but saves us mispredicts because only one branch needs prediction.
2080 It also enables generating the cmpeqdi_t-1 pattern. */
2081 case EQ:
2082 if (TARGET_CMPEQDI_T)
2083 {
2084 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2085 emit_jump_insn (gen_branch_true (operands[3]));
2086 return true;
2087 }
2088 msw_skip = NE;
2089 lsw_taken = EQ;
2090 if (prob >= 0)
2091 {
2092 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2093 msw_skip_prob = rev_prob;
2094 if (REG_BR_PROB_BASE <= 65535)
2095 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2096 else
2097 {
2098 lsw_taken_prob
2099 = (prob
2100 ? (REG_BR_PROB_BASE
2101 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2102 / ((gcov_type) prob << 32)))
2103 : 0);
2104 }
2105 }
2106 break;
2107 case NE:
2108 if (TARGET_CMPEQDI_T)
2109 {
2110 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2111 emit_jump_insn (gen_branch_false (operands[3]));
2112 return true;
2113 }
2114 msw_taken = NE;
2115 msw_taken_prob = prob;
2116 lsw_taken = NE;
2117 lsw_taken_prob = 0;
2118 break;
2119 case GTU: case GT:
2120 msw_taken = comparison;
2121 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2122 break;
2123 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2124 msw_skip = swap_condition (msw_taken);
2125 lsw_taken = GTU;
2126 break;
2127 case GEU: case GE:
2128 if (op2l == CONST0_RTX (SImode))
2129 msw_taken = comparison;
2130 else
2131 {
2132 msw_taken = comparison == GE ? GT : GTU;
2133 msw_skip = swap_condition (msw_taken);
2134 lsw_taken = GEU;
2135 }
2136 break;
2137 case LTU: case LT:
2138 msw_taken = comparison;
2139 if (op2l == CONST0_RTX (SImode))
2140 break;
2141 msw_skip = swap_condition (msw_taken);
2142 lsw_taken = LTU;
2143 break;
2144 case LEU: case LE:
2145 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2146 msw_taken = comparison;
2147 else
2148 {
2149 lsw_taken = LEU;
2150 if (comparison == LE)
2151 msw_taken = LT;
2152 else if (op2h != CONST0_RTX (SImode))
2153 msw_taken = LTU;
2154 else
2155 {
2156 msw_skip = swap_condition (LTU);
2157 break;
2158 }
2159 msw_skip = swap_condition (msw_taken);
2160 }
2161 break;
2162 default: return false;
2163 }
2164 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2165 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2166 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2167 if (comparison != EQ && comparison != NE && num_branches > 1)
2168 {
2169 if (!CONSTANT_P (operands[2])
2170 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2171 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2172 {
2173 msw_taken_prob = prob / 2U;
2174 msw_skip_prob
2175 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2176 lsw_taken_prob = prob;
2177 }
2178 else
2179 {
2180 msw_taken_prob = prob;
2181 msw_skip_prob = REG_BR_PROB_BASE;
2182 /* ??? If we have a constant op2h, should we use that when
2183 calculating lsw_taken_prob? */
2184 lsw_taken_prob = prob;
2185 }
2186 }
2187 operands[1] = op1h;
2188 operands[2] = op2h;
2189 operands[4] = NULL_RTX;
2190 if (reload_completed
2191 && ! arith_reg_or_0_operand (op2h, SImode)
2192 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2193 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2194 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2195 {
2196 emit_move_insn (scratch, operands[2]);
2197 operands[2] = scratch;
2198 }
2199 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2200 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2201 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2202 {
2203 rtx taken_label = operands[3];
2204
2205 /* Operands were possibly modified, but msw_skip doesn't expect this.
2206 Always use the original ones. */
2207 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2208 {
2209 operands[1] = op1h;
2210 operands[2] = op2h;
2211 if (reload_completed
2212 && ! arith_reg_or_0_operand (op2h, SImode)
2213 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2214 {
2215 emit_move_insn (scratch, operands[2]);
2216 operands[2] = scratch;
2217 }
2218 }
2219
2220 operands[3] = skip_label = gen_label_rtx ();
2221 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2222 operands[3] = taken_label;
2223 }
2224 operands[1] = op1l;
2225 operands[2] = op2l;
2226 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2227 {
2228 if (reload_completed
2229 && ! arith_reg_or_0_operand (op2l, SImode)
2230 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2231 {
2232 emit_move_insn (scratch, operands[2]);
2233 operands[2] = scratch;
2234 }
2235 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2236 }
2237 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2238 emit_label (skip_label);
2239 return true;
2240 }
2241
2242 /* Given an operand, return 1 if the evaluated operand plugged into an
2243 if_then_else will result in a branch_true, 0 if branch_false, or
2244 -1 if neither nor applies. The truth table goes like this:
2245
2246 op | cmpval | code | result
2247 ---------+--------+---------+--------------------
2248 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2249 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2250 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2251 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2252 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2253 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2254 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2255 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2256 int
2257 sh_eval_treg_value (rtx op)
2258 {
2259 if (t_reg_operand (op, GET_MODE (op)))
2260 return 1;
2261 if (negt_reg_operand (op, GET_MODE (op)))
2262 return 0;
2263
2264 rtx_code code = GET_CODE (op);
2265 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2266 return -1;
2267
2268 int cmpop = code == EQ ? 1 : 0;
2269 int cmpval = INTVAL (XEXP (op, 1));
2270 if (cmpval != 0 && cmpval != 1)
2271 return -1;
2272
2273 int t;
2274 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2275 t = 0;
2276 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2277 t = 1;
2278 else
2279 return -1;
2280
2281 return t ^ (cmpval == cmpop);
2282 }
2283
2284 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2285
2286 static void
2287 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2288 {
2289 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2290 {
2291 insn = gen_rtx_PARALLEL (VOIDmode,
2292 gen_rtvec (2, insn,
2293 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2294 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2295 }
2296 else
2297 emit_insn (insn);
2298 }
2299
2300 /* Prepare the operands for an scc instruction; make sure that the
2301 compare has been done and the result is in T_REG. */
2302 void
2303 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2304 {
2305 rtx t_reg = get_t_reg_rtx ();
2306 enum rtx_code oldcode = code;
2307 enum machine_mode mode;
2308
2309 /* First need a compare insn. */
2310 switch (code)
2311 {
2312 case NE:
2313 /* It isn't possible to handle this case. */
2314 gcc_unreachable ();
2315 case LT:
2316 code = GT;
2317 break;
2318 case LE:
2319 code = GE;
2320 break;
2321 case LTU:
2322 code = GTU;
2323 break;
2324 case LEU:
2325 code = GEU;
2326 break;
2327 default:
2328 break;
2329 }
2330 if (code != oldcode)
2331 {
2332 rtx tmp = op0;
2333 op0 = op1;
2334 op1 = tmp;
2335 }
2336
2337 mode = GET_MODE (op0);
2338 if (mode == VOIDmode)
2339 mode = GET_MODE (op1);
2340
2341 op0 = force_reg (mode, op0);
2342 if ((code != EQ && code != NE
2343 && (op1 != const0_rtx
2344 || code == GTU || code == GEU || code == LTU || code == LEU))
2345 || (mode == DImode && op1 != const0_rtx)
2346 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2347 op1 = force_reg (mode, op1);
2348
2349 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2350 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2351 mode);
2352 }
2353
2354 rtx
2355 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2356 rtx op0, rtx op1)
2357 {
2358 rtx target = gen_reg_rtx (SImode);
2359 rtx tmp;
2360
2361 gcc_assert (TARGET_SHMEDIA);
2362 switch (code)
2363 {
2364 case EQ:
2365 case GT:
2366 case LT:
2367 case UNORDERED:
2368 case GTU:
2369 case LTU:
2370 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2371 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2372 code = NE;
2373 break;
2374
2375 case NE:
2376 case GE:
2377 case LE:
2378 case ORDERED:
2379 case GEU:
2380 case LEU:
2381 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2382 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2383 code = EQ;
2384 break;
2385
2386 case UNEQ:
2387 case UNGE:
2388 case UNGT:
2389 case UNLE:
2390 case UNLT:
2391 case LTGT:
2392 return NULL_RTX;
2393
2394 default:
2395 gcc_unreachable ();
2396 }
2397
2398 if (mode == DImode)
2399 {
2400 rtx t2 = gen_reg_rtx (DImode);
2401 emit_insn (gen_extendsidi2 (t2, target));
2402 target = t2;
2403 }
2404
2405 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2406 }
2407
2408 /* Called from the md file, set up the operands of a compare instruction. */
2409 void
2410 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2411 {
2412 enum rtx_code code = GET_CODE (operands[0]);
2413 enum rtx_code branch_code;
2414 rtx op0 = operands[1];
2415 rtx op1 = operands[2];
2416 rtx insn, tem;
2417 bool need_ccmpeq = false;
2418
2419 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2420 {
2421 op0 = force_reg (mode, op0);
2422 op1 = force_reg (mode, op1);
2423 }
2424 else
2425 {
2426 if (code != EQ || mode == DImode)
2427 {
2428 /* Force args into regs, since we can't use constants here. */
2429 op0 = force_reg (mode, op0);
2430 if (op1 != const0_rtx || code == GTU || code == GEU)
2431 op1 = force_reg (mode, op1);
2432 }
2433 }
2434
2435 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2436 {
2437 if (code == LT
2438 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2439 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2440 {
2441 tem = op0, op0 = op1, op1 = tem;
2442 code = swap_condition (code);
2443 }
2444
2445 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2446 if (code == GE)
2447 {
2448 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2449 need_ccmpeq = true;
2450 code = GT;
2451 }
2452
2453 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2454 to EQ/GT respectively. */
2455 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2456 }
2457
2458 switch (code)
2459 {
2460 case EQ:
2461 case GT:
2462 case GE:
2463 case GTU:
2464 case GEU:
2465 branch_code = code;
2466 break;
2467 case NE:
2468 case LT:
2469 case LE:
2470 case LTU:
2471 case LEU:
2472 branch_code = reverse_condition (code);
2473 break;
2474 default:
2475 gcc_unreachable ();
2476 }
2477
2478 insn = gen_rtx_SET (VOIDmode,
2479 get_t_reg_rtx (),
2480 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2481
2482 sh_emit_set_t_insn (insn, mode);
2483 if (need_ccmpeq)
2484 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2485
2486 if (branch_code == code)
2487 emit_jump_insn (gen_branch_true (operands[3]));
2488 else
2489 emit_jump_insn (gen_branch_false (operands[3]));
2490 }
2491
2492 void
2493 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2494 {
2495 enum rtx_code code = GET_CODE (operands[1]);
2496 rtx op0 = operands[2];
2497 rtx op1 = operands[3];
2498 rtx_code_label *lab = NULL;
2499 bool invert = false;
2500 rtx tem;
2501
2502 op0 = force_reg (mode, op0);
2503 if ((code != EQ && code != NE
2504 && (op1 != const0_rtx
2505 || code == GTU || code == GEU || code == LTU || code == LEU))
2506 || (mode == DImode && op1 != const0_rtx)
2507 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2508 op1 = force_reg (mode, op1);
2509
2510 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2511 {
2512 if (code == LT || code == LE)
2513 {
2514 code = swap_condition (code);
2515 tem = op0, op0 = op1, op1 = tem;
2516 }
2517 if (code == GE)
2518 {
2519 if (TARGET_IEEE)
2520 {
2521 lab = gen_label_rtx ();
2522 sh_emit_scc_to_t (EQ, op0, op1);
2523 emit_jump_insn (gen_branch_true (lab));
2524 code = GT;
2525 }
2526 else
2527 {
2528 code = LT;
2529 invert = true;
2530 }
2531 }
2532 }
2533
2534 if (code == NE)
2535 {
2536 code = EQ;
2537 invert = true;
2538 }
2539
2540 sh_emit_scc_to_t (code, op0, op1);
2541 if (lab)
2542 emit_label (lab);
2543 if (invert)
2544 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2545 else
2546 emit_move_insn (operands[0], get_t_reg_rtx ());
2547 }
2548 \f
2549 /* Functions to output assembly code. */
2550
2551 /* Return a sequence of instructions to perform DI or DF move.
2552
2553 Since the SH cannot move a DI or DF in one instruction, we have
2554 to take care when we see overlapping source and dest registers. */
2555 const char *
2556 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2557 enum machine_mode mode)
2558 {
2559 rtx dst = operands[0];
2560 rtx src = operands[1];
2561
2562 if (MEM_P (dst)
2563 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2564 return "mov.l %T1,%0" "\n"
2565 " mov.l %1,%0";
2566
2567 if (register_operand (dst, mode)
2568 && register_operand (src, mode))
2569 {
2570 if (REGNO (src) == MACH_REG)
2571 return "sts mach,%S0" "\n"
2572 " sts macl,%R0";
2573
2574 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2575 when mov.d r1,r0 do r1->r0 then r2->r1. */
2576 if (REGNO (src) + 1 == REGNO (dst))
2577 return "mov %T1,%T0" "\n"
2578 " mov %1,%0";
2579 else
2580 return "mov %1,%0" "\n"
2581 " mov %T1,%T0";
2582 }
2583 else if (CONST_INT_P (src))
2584 {
2585 if (INTVAL (src) < 0)
2586 output_asm_insn ("mov #-1,%S0", operands);
2587 else
2588 output_asm_insn ("mov #0,%S0", operands);
2589
2590 return "mov %1,%R0";
2591 }
2592 else if (MEM_P (src))
2593 {
2594 int ptrreg = -1;
2595 int dreg = REGNO (dst);
2596 rtx inside = XEXP (src, 0);
2597
2598 switch (GET_CODE (inside))
2599 {
2600 case REG:
2601 ptrreg = REGNO (inside);
2602 break;
2603
2604 case SUBREG:
2605 ptrreg = subreg_regno (inside);
2606 break;
2607
2608 case PLUS:
2609 ptrreg = REGNO (XEXP (inside, 0));
2610 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2611 an offsettable address. Unfortunately, offsettable addresses use
2612 QImode to check the offset, and a QImode offsettable address
2613 requires r0 for the other operand, which is not currently
2614 supported, so we can't use the 'o' constraint.
2615 Thus we must check for and handle r0+REG addresses here.
2616 We punt for now, since this is likely very rare. */
2617 gcc_assert (!REG_P (XEXP (inside, 1)));
2618 break;
2619
2620 case LABEL_REF:
2621 return "mov.l %1,%0" "\n"
2622 " mov.l %1+4,%T0";
2623 case POST_INC:
2624 return "mov.l %1,%0" "\n"
2625 " mov.l %1,%T0";
2626 default:
2627 gcc_unreachable ();
2628 }
2629
2630 /* Work out the safe way to copy. Copy into the second half first. */
2631 if (dreg == ptrreg)
2632 return "mov.l %T1,%T0" "\n"
2633 " mov.l %1,%0";
2634 }
2635
2636 return "mov.l %1,%0" "\n"
2637 " mov.l %T1,%T0";
2638 }
2639
2640 /* Print an instruction which would have gone into a delay slot after
2641 another instruction, but couldn't because the other instruction expanded
2642 into a sequence where putting the slot insn at the end wouldn't work. */
2643 static void
2644 print_slot (rtx insn)
2645 {
2646 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2647
2648 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2649 }
2650
2651 const char *
2652 output_far_jump (rtx_insn *insn, rtx op)
2653 {
2654 struct { rtx lab, reg, op; } this_jmp;
2655 rtx braf_base_lab = NULL_RTX;
2656 const char *jump;
2657 int far;
2658 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2659 rtx_insn *prev;
2660
2661 this_jmp.lab = gen_label_rtx ();
2662
2663 if (TARGET_SH2
2664 && offset >= -32764
2665 && offset - get_attr_length (insn) <= 32766)
2666 {
2667 far = 0;
2668 jump = "mov.w %O0,%1" "\n"
2669 " braf %1";
2670 }
2671 else
2672 {
2673 far = 1;
2674 if (flag_pic)
2675 {
2676 if (TARGET_SH2)
2677 jump = "mov.l %O0,%1" "\n"
2678 " braf %1";
2679 else
2680 jump = "mov.l r0,@-r15" "\n"
2681 " mova %O0,r0" "\n"
2682 " mov.l @r0,%1" "\n"
2683 " add r0,%1" "\n"
2684 " mov.l @r15+,r0" "\n"
2685 " jmp @%1";
2686 }
2687 else
2688 jump = "mov.l %O0,%1" "\n"
2689 " jmp @%1";
2690 }
2691 /* If we have a scratch register available, use it. */
2692 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2693 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2694 {
2695 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2696 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2697 jump = "mov.l r1,@-r15" "\n"
2698 " mova %O0,r0" "\n"
2699 " mov.l @r0,r1" "\n"
2700 " add r1,r0" "\n"
2701 " mov.l @r15+,r1" "\n"
2702 " jmp @%1";
2703 output_asm_insn (jump, &this_jmp.lab);
2704 if (dbr_sequence_length ())
2705 print_slot (final_sequence);
2706 else
2707 output_asm_insn ("nop", 0);
2708 }
2709 else
2710 {
2711 /* Output the delay slot insn first if any. */
2712 if (dbr_sequence_length ())
2713 print_slot (final_sequence);
2714
2715 this_jmp.reg = gen_rtx_REG (SImode, 13);
2716 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2717 Fortunately, MACL is fixed and call-clobbered, and we never
2718 need its value across jumps, so save r13 in it instead of in
2719 the stack. */
2720 if (TARGET_SH5)
2721 output_asm_insn ("lds r13,macl", 0);
2722 else
2723 output_asm_insn ("mov.l r13,@-r15", 0);
2724 output_asm_insn (jump, &this_jmp.lab);
2725 if (TARGET_SH5)
2726 output_asm_insn ("sts macl,r13", 0);
2727 else
2728 output_asm_insn ("mov.l @r15+,r13", 0);
2729 }
2730 if (far && flag_pic && TARGET_SH2)
2731 {
2732 braf_base_lab = gen_label_rtx ();
2733 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2734 CODE_LABEL_NUMBER (braf_base_lab));
2735 }
2736 if (far)
2737 output_asm_insn (".align 2", 0);
2738 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2739 this_jmp.op = op;
2740 if (far && flag_pic)
2741 {
2742 if (TARGET_SH2)
2743 this_jmp.lab = braf_base_lab;
2744 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2745 }
2746 else
2747 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2748 return "";
2749 }
2750
2751 /* Local label counter, used for constants in the pool and inside
2752 pattern branches. */
2753 static int lf = 100;
2754
2755 /* Output code for ordinary branches. */
2756 const char *
2757 output_branch (int logic, rtx insn, rtx *operands)
2758 {
2759 switch (get_attr_length (insn))
2760 {
2761 case 6:
2762 /* This can happen if filling the delay slot has caused a forward
2763 branch to exceed its range (we could reverse it, but only
2764 when we know we won't overextend other branches; this should
2765 best be handled by relaxation).
2766 It can also happen when other condbranches hoist delay slot insn
2767 from their destination, thus leading to code size increase.
2768 But the branch will still be in the range -4092..+4098 bytes. */
2769 if (! TARGET_RELAX)
2770 {
2771 int label = lf++;
2772 /* The call to print_slot will clobber the operands. */
2773 rtx op0 = operands[0];
2774
2775 /* If the instruction in the delay slot is annulled (true), then
2776 there is no delay slot where we can put it now. The only safe
2777 place for it is after the label. final will do that by default. */
2778
2779 if (final_sequence
2780 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2781 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2782 {
2783 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2784 ASSEMBLER_DIALECT ? "/" : ".", label);
2785 print_slot (final_sequence);
2786 }
2787 else
2788 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2789
2790 output_asm_insn ("bra\t%l0", &op0);
2791 fprintf (asm_out_file, "\tnop\n");
2792 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2793
2794 return "";
2795 }
2796 /* When relaxing, handle this like a short branch. The linker
2797 will fix it up if it still doesn't fit after relaxation. */
2798 case 2:
2799 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2800
2801 /* These are for SH2e, in which we have to account for the
2802 extra nop because of the hardware bug in annulled branches. */
2803 case 8:
2804 if (! TARGET_RELAX)
2805 {
2806 int label = lf++;
2807
2808 gcc_assert (!final_sequence
2809 || !(INSN_ANNULLED_BRANCH_P
2810 (XVECEXP (final_sequence, 0, 0))));
2811 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2812 logic ? "f" : "t",
2813 ASSEMBLER_DIALECT ? "/" : ".", label);
2814 fprintf (asm_out_file, "\tnop\n");
2815 output_asm_insn ("bra\t%l0", operands);
2816 fprintf (asm_out_file, "\tnop\n");
2817 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2818
2819 return "";
2820 }
2821 /* When relaxing, fall through. */
2822 case 4:
2823 {
2824 char buffer[10];
2825
2826 sprintf (buffer, "b%s%ss\t%%l0",
2827 logic ? "t" : "f",
2828 ASSEMBLER_DIALECT ? "/" : ".");
2829 output_asm_insn (buffer, &operands[0]);
2830 return "nop";
2831 }
2832
2833 default:
2834 /* There should be no longer branches now - that would
2835 indicate that something has destroyed the branches set
2836 up in machine_dependent_reorg. */
2837 gcc_unreachable ();
2838 }
2839 }
2840
2841 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2842 fill in operands 9 as a label to the successor insn.
2843 We try to use jump threading where possible.
2844 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2845 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2846 follow jmp and bt, if the address is in range. */
2847 const char *
2848 output_branchy_insn (enum rtx_code code, const char *templ,
2849 rtx_insn *insn, rtx *operands)
2850 {
2851 rtx_insn *next_insn = NEXT_INSN (insn);
2852
2853 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2854 {
2855 rtx src = SET_SRC (PATTERN (next_insn));
2856 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2857 {
2858 /* Following branch not taken */
2859 operands[9] = gen_label_rtx ();
2860 emit_label_after (operands[9], next_insn);
2861 INSN_ADDRESSES_NEW (operands[9],
2862 INSN_ADDRESSES (INSN_UID (next_insn))
2863 + get_attr_length (next_insn));
2864 return templ;
2865 }
2866 else
2867 {
2868 int offset = (branch_dest (next_insn)
2869 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2870 if (offset >= -252 && offset <= 258)
2871 {
2872 if (GET_CODE (src) == IF_THEN_ELSE)
2873 /* branch_true */
2874 src = XEXP (src, 1);
2875 operands[9] = src;
2876 return templ;
2877 }
2878 }
2879 }
2880 operands[9] = gen_label_rtx ();
2881 emit_label_after (operands[9], insn);
2882 INSN_ADDRESSES_NEW (operands[9],
2883 INSN_ADDRESSES (INSN_UID (insn))
2884 + get_attr_length (insn));
2885 return templ;
2886 }
2887
2888 const char *
2889 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2890 {
2891 return output_branchy_insn (NE, "bt %l9" "\n"
2892 " fcmp/eq %1,%0",
2893 insn, operands);
2894 }
2895 \f
2896 /* Output the start of the assembler file. */
2897 static void
2898 sh_file_start (void)
2899 {
2900 default_file_start ();
2901
2902 if (TARGET_ELF)
2903 /* We need to show the text section with the proper
2904 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2905 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2906 will complain. We can teach GAS specifically about the
2907 default attributes for our choice of text section, but
2908 then we would have to change GAS again if/when we change
2909 the text section name. */
2910 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2911 else
2912 /* Switch to the data section so that the coffsem symbol
2913 isn't in the text section. */
2914 switch_to_section (data_section);
2915
2916 if (TARGET_LITTLE_ENDIAN)
2917 fputs ("\t.little\n", asm_out_file);
2918
2919 if (!TARGET_ELF)
2920 {
2921 if (TARGET_SHCOMPACT)
2922 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2923 else if (TARGET_SHMEDIA)
2924 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2925 TARGET_SHMEDIA64 ? 64 : 32);
2926 }
2927 }
2928 \f
2929 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2930 static bool
2931 unspec_caller_rtx_p (rtx pat)
2932 {
2933 rtx base, offset;
2934 int i;
2935
2936 split_const (pat, &base, &offset);
2937 if (GET_CODE (base) == UNSPEC)
2938 {
2939 if (XINT (base, 1) == UNSPEC_CALLER)
2940 return true;
2941 for (i = 0; i < XVECLEN (base, 0); i++)
2942 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2943 return true;
2944 }
2945 return false;
2946 }
2947
2948 /* Indicate that INSN cannot be duplicated. This is true for insn
2949 that generates a unique label. */
2950 static bool
2951 sh_cannot_copy_insn_p (rtx_insn *insn)
2952 {
2953 rtx pat;
2954
2955 if (!reload_completed || !flag_pic)
2956 return false;
2957
2958 if (!NONJUMP_INSN_P (insn))
2959 return false;
2960 if (asm_noperands (insn) >= 0)
2961 return false;
2962
2963 pat = PATTERN (insn);
2964 if (GET_CODE (pat) != SET)
2965 return false;
2966 pat = SET_SRC (pat);
2967
2968 if (unspec_caller_rtx_p (pat))
2969 return true;
2970
2971 return false;
2972 }
2973 \f
2974 /* Number of instructions used to make an arithmetic right shift by N. */
2975 static const char ashiftrt_insns[] =
2976 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2977
2978 /* Description of a logical left or right shift, when expanded to a sequence
2979 of 1/2/8/16 shifts.
2980 Notice that one bit right shifts clobber the T bit. One bit left shifts
2981 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2982 enum
2983 {
2984 ASHL_CLOBBERS_T = 1 << 0,
2985 LSHR_CLOBBERS_T = 1 << 1
2986 };
2987
2988 struct ashl_lshr_sequence
2989 {
2990 char insn_count;
2991 char amount[6];
2992 char clobbers_t;
2993 };
2994
2995 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2996 {
2997 { 0, { 0 }, 0 }, // 0
2998 { 1, { 1 }, LSHR_CLOBBERS_T },
2999 { 1, { 2 }, 0 },
3000 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3001 { 2, { 2, 2 }, 0 }, // 4
3002 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3003 { 3, { 2, 2, 2 }, 0 },
3004 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3005 { 1, { 8 }, 0 }, // 8
3006 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3007 { 2, { 8, 2 }, 0 },
3008 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3009 { 3, { 8, 2, 2 }, 0 }, // 12
3010 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3011 { 3, { 8, -2, 8 }, 0 },
3012 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3013 { 1, { 16 }, 0 }, // 16
3014 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3015 { 2, { 16, 2 }, 0 },
3016 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3017 { 3, { 16, 2, 2 }, 0 }, // 20
3018 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3019 { 3, { 16, -2, 8 }, 0 },
3020 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3021 { 2, { 16, 8 }, 0 }, // 24
3022 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3023 { 3, { 16, 8, 2 }, 0 },
3024 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3025 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3026 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3027 { 3, { 16, -2, 16 }, 0 },
3028
3029 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3030 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3031 However, the shift-and combiner code needs this entry here to be in
3032 terms of real shift insns. */
3033 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3034 };
3035
3036 /* Individual shift amounts for shift amounts < 16, up to three highmost
3037 bits might be clobbered. This is typically used when combined with some
3038 kind of sign or zero extension. */
3039 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3040 {
3041 { 0, { 0 }, 0 }, // 0
3042 { 1, { 1 }, LSHR_CLOBBERS_T },
3043 { 1, { 2 }, 0 },
3044 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3045 { 2, { 2, 2 }, 0 }, // 4
3046 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3047 { 2, { 8, -2 }, 0 },
3048 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3049 { 1, { 8 }, 0 }, // 8
3050 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3051 { 2, { 8, 2 }, 0 },
3052 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3053 { 3, { 8, 2, 2 }, 0 }, // 12
3054 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3055 { 2, { 16, -2 }, 0 },
3056 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3057 { 1, { 16 }, 0 }, // 16
3058 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3059 { 2, { 16, 2 }, 0 },
3060 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3061 { 3, { 16, 2, 2 }, 0 }, // 20
3062 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3063 { 3, { 16, -2, 8 }, 0 },
3064 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3065 { 2, { 16, 8 }, 0 }, // 24
3066 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3067 { 3, { 16, 8, 2 }, 0 },
3068 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3069 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3070 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3071 { 3, { 16, -2, 16 }, 0 },
3072 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3073 };
3074
3075 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3076 will clobber the T bit. */
3077 bool
3078 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3079 {
3080 gcc_assert (CONST_INT_P (shift_amount));
3081
3082 const int shift_amount_i = INTVAL (shift_amount) & 31;
3083
3084 /* Special case for shift count of 31: use and-rotl sequence. */
3085 if (shift_amount_i == 31)
3086 return true;
3087
3088 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3089 & ASHL_CLOBBERS_T) != 0;
3090 }
3091
3092 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3093 instructions will clobber the T bit. */
3094 bool
3095 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3096 {
3097 gcc_assert (CONST_INT_P (shift_amount));
3098
3099 const int shift_amount_i = INTVAL (shift_amount) & 31;
3100
3101 /* Special case for shift count of 31: use shll-movt sequence. */
3102 if (shift_amount_i == 31)
3103 return true;
3104
3105 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3106 & LSHR_CLOBBERS_T) != 0;
3107 }
3108
3109 /* Return true if it is potentially beneficial to use a dynamic shift
3110 instruction (shad / shar) instead of a combination of 1/2/8/16
3111 shift instructions for the specified shift count.
3112 If dynamic shifts are not available, always return false. */
3113 bool
3114 sh_dynamicalize_shift_p (rtx count)
3115 {
3116 gcc_assert (CONST_INT_P (count));
3117
3118 const int shift_amount_i = INTVAL (count) & 31;
3119 int insn_count;
3120
3121 /* For left and right shifts, there are shorter 2 insn sequences for
3122 shift amounts of 31. */
3123 if (shift_amount_i == 31)
3124 insn_count = 2;
3125 else
3126 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3127
3128 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3129 }
3130
3131 /* Assuming we have a value that has been sign-extended by at least one bit,
3132 can we use the ext_shift_amounts with the last shift turned to an
3133 arithmetic shift to shift it by N without data loss, and quicker than by
3134 other means? */
3135 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3136
3137 /* Return the cost of a shift. */
3138 static inline int
3139 shiftcosts (rtx x)
3140 {
3141 int value;
3142
3143 if (TARGET_SHMEDIA)
3144 return 1;
3145
3146 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3147 {
3148 if (GET_MODE (x) == DImode
3149 && CONST_INT_P (XEXP (x, 1))
3150 && INTVAL (XEXP (x, 1)) == 1)
3151 return 2;
3152
3153 /* Everything else is invalid, because there is no pattern for it. */
3154 return -1;
3155 }
3156 /* If shift by a non constant, then this will be expensive. */
3157 if (!CONST_INT_P (XEXP (x, 1)))
3158 return SH_DYNAMIC_SHIFT_COST;
3159
3160 /* Otherwise, return the true cost in instructions. Cope with out of range
3161 shift counts more or less arbitrarily. */
3162 value = INTVAL (XEXP (x, 1)) & 31;
3163
3164 if (GET_CODE (x) == ASHIFTRT)
3165 {
3166 int cost = ashiftrt_insns[value];
3167 /* If dynamic shifts are available and profitable in this case, then we
3168 put the constant in a reg and use shad. */
3169 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3170 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3171 return cost;
3172 }
3173 else
3174 return ashl_lshr_seq[value].insn_count;
3175 }
3176
3177 /* Return the cost of an AND/XOR/IOR operation. */
3178 static inline int
3179 and_xor_ior_costs (rtx x, int code)
3180 {
3181 /* On SH1-4 we have only max. SImode operations.
3182 Double the cost for modes > SImode. */
3183 const int cost_scale = !TARGET_SHMEDIA
3184 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3185 ? 2 : 1;
3186
3187 /* A logical operation with two registers is a single cycle
3188 instruction. */
3189 if (!CONST_INT_P (XEXP (x, 1)))
3190 return 1 * cost_scale;
3191
3192 int i = INTVAL (XEXP (x, 1));
3193
3194 if (TARGET_SHMEDIA)
3195 {
3196 if (satisfies_constraint_I10 (XEXP (x, 1))
3197 || satisfies_constraint_J16 (XEXP (x, 1)))
3198 return 1;
3199 else
3200 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3201 }
3202
3203 /* These constants are single cycle extu.[bw] instructions. */
3204 if ((i == 0xff || i == 0xffff) && code == AND)
3205 return 1 * cost_scale;
3206 /* Constants that can be used in an instruction as an immediate are
3207 a single cycle, but this requires r0, so make it a little more
3208 expensive. */
3209 if (CONST_OK_FOR_K08 (i))
3210 return 2 * cost_scale;
3211 /* Constants that can be loaded with a mov immediate need one more cycle.
3212 This case is probably unnecessary. */
3213 if (CONST_OK_FOR_I08 (i))
3214 return 2 * cost_scale;
3215 /* Any other constant requires an additional 2 cycle pc-relative load.
3216 This case is probably unnecessary. */
3217 return 3 * cost_scale;
3218 }
3219
3220 /* Return the cost of an addition or a subtraction. */
3221 static inline int
3222 addsubcosts (rtx x)
3223 {
3224 if (GET_MODE (x) == SImode)
3225 {
3226 /* The addc or subc patterns will eventually become one or two
3227 instructions. Below are some costs for some of the patterns
3228 which combine would reject because the costs of the individual
3229 insns in the patterns are lower.
3230
3231 FIXME: It would be much easier if we had something like insn cost
3232 attributes and the cost calculation machinery used those attributes
3233 in the first place. This would eliminate redundant recog-like C
3234 code to calculate costs of complex patterns. */
3235 rtx op0 = XEXP (x, 0);
3236 rtx op1 = XEXP (x, 1);
3237
3238 if (GET_CODE (x) == PLUS)
3239 {
3240 if (GET_CODE (op0) == AND
3241 && XEXP (op0, 1) == const1_rtx
3242 && (GET_CODE (op1) == PLUS
3243 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3244 return 1;
3245
3246 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3247 && GET_CODE (op1) == LSHIFTRT
3248 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3249 return 1;
3250 }
3251 }
3252
3253 /* On SH1-4 we have only max. SImode operations.
3254 Double the cost for modes > SImode. */
3255 const int cost_scale = !TARGET_SHMEDIA
3256 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3257 ? 2 : 1;
3258
3259 /* Adding a register is a single cycle insn. */
3260 if (REG_P (XEXP (x, 1))
3261 || GET_CODE (XEXP (x, 1)) == SUBREG)
3262 return 1 * cost_scale;
3263
3264 /* Likewise for small constants. */
3265 if (CONST_INT_P (XEXP (x, 1))
3266 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3267 return 1 * cost_scale;
3268
3269 if (TARGET_SHMEDIA)
3270 switch (GET_CODE (XEXP (x, 1)))
3271 {
3272 case CONST:
3273 case LABEL_REF:
3274 case SYMBOL_REF:
3275 return TARGET_SHMEDIA64 ? 5 : 3;
3276
3277 case CONST_INT:
3278 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3279 return 2;
3280 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3281 return 3;
3282 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3283 return 4;
3284
3285 /* Fall through. */
3286 default:
3287 return 5;
3288 }
3289
3290 /* Any other constant requires a 2 cycle pc-relative load plus an
3291 addition. */
3292 return 3 * cost_scale;
3293 }
3294
3295 /* Return the cost of a multiply. */
3296 static inline int
3297 multcosts (rtx x ATTRIBUTE_UNUSED)
3298 {
3299 if (sh_multcost >= 0)
3300 return sh_multcost;
3301 if (TARGET_SHMEDIA)
3302 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3303 accept constants. Ideally, we would use a cost of one or two and
3304 add the cost of the operand, but disregard the latter when inside loops
3305 and loop invariant code motion is still to follow.
3306 Using a multiply first and splitting it later if it's a loss
3307 doesn't work because of different sign / zero extension semantics
3308 of multiplies vs. shifts. */
3309 return optimize_size ? 2 : 3;
3310
3311 if (TARGET_SH2)
3312 {
3313 /* We have a mul insn, so we can never take more than the mul and the
3314 read of the mac reg, but count more because of the latency and extra
3315 reg usage. */
3316 if (optimize_size)
3317 return 2;
3318 return 3;
3319 }
3320
3321 /* If we're aiming at small code, then just count the number of
3322 insns in a multiply call sequence. */
3323 if (optimize_size)
3324 return 5;
3325
3326 /* Otherwise count all the insns in the routine we'd be calling too. */
3327 return 20;
3328 }
3329
3330 /* Compute a (partial) cost for rtx X. Return true if the complete
3331 cost has been computed, and false if subexpressions should be
3332 scanned. In either case, *TOTAL contains the cost result. */
3333 static bool
3334 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3335 int *total, bool speed ATTRIBUTE_UNUSED)
3336 {
3337 switch (code)
3338 {
3339 /* The lower-subreg pass decides whether to split multi-word regs
3340 into individual regs by looking at the cost for a SET of certain
3341 modes with the following patterns:
3342 (set (reg) (reg))
3343 (set (reg) (const_int 0))
3344 On machines that support vector-move operations a multi-word move
3345 is the same cost as individual reg move. On SH there is no
3346 vector-move, so we have to provide the correct cost in the number
3347 of move insns to load/store the reg of the mode in question. */
3348 case SET:
3349 if (register_operand (SET_DEST (x), VOIDmode)
3350 && (register_operand (SET_SRC (x), VOIDmode)
3351 || satisfies_constraint_Z (SET_SRC (x))))
3352 {
3353 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3354 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3355 / mov_insn_size (mode, TARGET_SH2A));
3356 return true;
3357 }
3358 return false;
3359
3360 /* The cost of a mem access is mainly the cost of the address mode. */
3361 case MEM:
3362 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3363 true);
3364 return true;
3365
3366 /* The cost of a sign or zero extend depends on whether the source is a
3367 reg or a mem. In case of a mem take the address into acount. */
3368 case SIGN_EXTEND:
3369 if (REG_P (XEXP (x, 0)))
3370 {
3371 *total = COSTS_N_INSNS (1);
3372 return true;
3373 }
3374 if (MEM_P (XEXP (x, 0)))
3375 {
3376 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3377 GET_MODE (XEXP (x, 0)),
3378 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3379 return true;
3380 }
3381 return false;
3382
3383 case ZERO_EXTEND:
3384 if (REG_P (XEXP (x, 0)))
3385 {
3386 *total = COSTS_N_INSNS (1);
3387 return true;
3388 }
3389 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3390 && (GET_MODE (XEXP (x, 0)) == QImode
3391 || GET_MODE (XEXP (x, 0)) == HImode))
3392 {
3393 /* Handle SH2A's movu.b and movu.w insn. */
3394 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3395 GET_MODE (XEXP (x, 0)),
3396 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3397 return true;
3398 }
3399 return false;
3400
3401 /* mems for SFmode and DFmode can be inside a parallel due to
3402 the way the fpscr is handled. */
3403 case PARALLEL:
3404 for (int i = 0; i < XVECLEN (x, 0); i++)
3405 {
3406 rtx xx = XVECEXP (x, 0, i);
3407 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3408 {
3409 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3410 GET_MODE (XEXP (xx, 0)),
3411 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3412 return true;
3413 }
3414 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3415 {
3416 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3417 GET_MODE (XEXP (xx, 1)),
3418 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3419 return true;
3420 }
3421 }
3422
3423 if (sh_1el_vec (x, VOIDmode))
3424 *total = outer_code != SET;
3425 else if (sh_rep_vec (x, VOIDmode))
3426 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3427 + (outer_code != SET));
3428 else
3429 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3430 return true;
3431
3432 case CONST_INT:
3433 if (TARGET_SHMEDIA)
3434 {
3435 if (INTVAL (x) == 0)
3436 *total = 0;
3437 else if (outer_code == AND && and_operand ((x), DImode))
3438 *total = 0;
3439 else if ((outer_code == IOR || outer_code == XOR
3440 || outer_code == PLUS)
3441 && CONST_OK_FOR_I10 (INTVAL (x)))
3442 *total = 0;
3443 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3444 *total = COSTS_N_INSNS (outer_code != SET);
3445 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3446 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3447 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3448 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3449 else
3450 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3451 return true;
3452 }
3453 if (CONST_OK_FOR_I08 (INTVAL (x)))
3454 *total = 0;
3455 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3456 && CONST_OK_FOR_K08 (INTVAL (x)))
3457 *total = 1;
3458 /* prepare_cmp_insn will force costly constants int registers before
3459 the cbranch[sd]i4 patterns can see them, so preserve potentially
3460 interesting ones not covered by I08 above. */
3461 else if (outer_code == COMPARE
3462 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3463 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3464 || INTVAL (x) == 0x7fffffff
3465 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3466 *total = 1;
3467 else
3468 *total = 8;
3469 return true;
3470
3471 case EQ:
3472 /* An and with a constant compared against zero is
3473 most likely going to be a TST #imm, R0 instruction.
3474 Notice that this does not catch the zero_extract variants from
3475 the md file. */
3476 if (GET_CODE (XEXP (x, 0)) == AND
3477 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3478 {
3479 *total = 1;
3480 return true;
3481 }
3482 else
3483 return false;
3484
3485 case SMIN:
3486 case SMAX:
3487 /* This is most likely a clips.b or clips.w insn that is being made up
3488 by combine. */
3489 if (TARGET_SH2A
3490 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3491 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3492 && REG_P (XEXP (XEXP (x, 0), 0))
3493 && CONST_INT_P (XEXP (x, 1)))
3494 {
3495 *total = COSTS_N_INSNS (1);
3496 return true;
3497 }
3498 else
3499 return false;
3500
3501 case CONST:
3502 case LABEL_REF:
3503 case SYMBOL_REF:
3504 if (TARGET_SHMEDIA64)
3505 *total = COSTS_N_INSNS (4);
3506 else if (TARGET_SHMEDIA32)
3507 *total = COSTS_N_INSNS (2);
3508 else
3509 *total = 5;
3510 return true;
3511
3512 case CONST_DOUBLE:
3513 if (TARGET_SHMEDIA)
3514 *total = COSTS_N_INSNS (4);
3515 /* prepare_cmp_insn will force costly constants int registers before
3516 the cbranchdi4 pattern can see them, so preserve potentially
3517 interesting ones. */
3518 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3519 *total = 1;
3520 else
3521 *total = 10;
3522 return true;
3523
3524 case CONST_VECTOR:
3525 /* FIXME: This looks broken. Only the last statement has any effect.
3526 Probably this could be folded with the PARALLEL case? */
3527 if (x == CONST0_RTX (GET_MODE (x)))
3528 *total = 0;
3529 else if (sh_1el_vec (x, VOIDmode))
3530 *total = outer_code != SET;
3531 if (sh_rep_vec (x, VOIDmode))
3532 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3533 + (outer_code != SET));
3534 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3535 return true;
3536
3537 case PLUS:
3538 case MINUS:
3539 *total = COSTS_N_INSNS (addsubcosts (x));
3540 return true;
3541
3542 case AND:
3543 case XOR:
3544 case IOR:
3545 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3546 return true;
3547
3548 case MULT:
3549 *total = COSTS_N_INSNS (multcosts (x));
3550 return true;
3551
3552 case LT:
3553 case GE:
3554 /* div0s sign comparison. */
3555 if (GET_CODE (XEXP (x, 0)) == XOR
3556 && REG_P ((XEXP (XEXP (x, 0), 0)))
3557 && REG_P ((XEXP (XEXP (x, 0), 1)))
3558 && satisfies_constraint_Z (XEXP (x, 1)))
3559 {
3560 *total = COSTS_N_INSNS (1);
3561 return true;
3562 }
3563 else
3564 return false;
3565
3566 case LSHIFTRT:
3567 /* div0s sign comparison. */
3568 if (GET_CODE (XEXP (x, 0)) == XOR
3569 && REG_P ((XEXP (XEXP (x, 0), 0)))
3570 && REG_P ((XEXP (XEXP (x, 0), 1)))
3571 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3572 {
3573 *total = COSTS_N_INSNS (1);
3574 return true;
3575 }
3576 /* Fall through to shiftcosts. */
3577 case ASHIFT:
3578 case ASHIFTRT:
3579 {
3580 int cost = shiftcosts (x);
3581 if (cost < 0)
3582 return false;
3583 *total = COSTS_N_INSNS (cost);
3584 return true;
3585 }
3586
3587 case DIV:
3588 case UDIV:
3589 case MOD:
3590 case UMOD:
3591 *total = COSTS_N_INSNS (20);
3592 return true;
3593
3594 case FLOAT:
3595 case FIX:
3596 *total = 100;
3597 return true;
3598
3599 default:
3600 return false;
3601 }
3602 }
3603
3604 /* Determine the size of the fundamental move insn that will be used
3605 for the specified mode. */
3606 static inline int
3607 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3608 {
3609 const int mode_sz = GET_MODE_SIZE (mode);
3610
3611 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3612 || (TARGET_FMOVD && mode == DFmode))
3613 return mode_sz;
3614 else
3615 {
3616 /* The max. available mode for actual move insns is SImode.
3617 Larger accesses will be split into multiple loads/stores. */
3618 const int max_mov_sz = GET_MODE_SIZE (SImode);
3619 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3620 }
3621 }
3622
3623 /* Determine the maximum possible displacement for a move insn for the
3624 specified mode. */
3625 int
3626 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3627 {
3628 /* The 4 byte displacement move insns are the same as the 2 byte
3629 versions but take a 12 bit displacement. All we need to do is to
3630 scale the max. displacement value accordingly. */
3631 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3632
3633 /* SH2A supports FPU move insns with 12 bit displacements.
3634 Other variants to do not support any kind of displacements for
3635 FPU move insns. */
3636 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3637 return 0;
3638 else
3639 {
3640 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3641 const int mode_sz = GET_MODE_SIZE (mode);
3642 int r = 15 * mov_insn_sz * disp_scale;
3643
3644 /* If the mov insn will be split into multiple loads/stores, the
3645 maximum possible displacement is a bit smaller. */
3646 if (mode_sz > mov_insn_sz)
3647 r -= mode_sz - mov_insn_sz;
3648 return r;
3649 }
3650 }
3651
3652 /* Determine the alignment mask for a move insn of the
3653 specified mode. */
3654 static inline int
3655 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3656 {
3657 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3658 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3659 }
3660
3661 /* Return the displacement value of a displacement address. */
3662 HOST_WIDE_INT
3663 sh_disp_addr_displacement (rtx x)
3664 {
3665 gcc_assert (satisfies_constraint_Sdd (x));
3666 return INTVAL (XEXP (XEXP (x, 0), 1));
3667 }
3668
3669 /* Compute the cost of an address. */
3670 static int
3671 sh_address_cost (rtx x, enum machine_mode mode,
3672 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3673 {
3674 /* 'GBR + 0'. Account one more because of R0 restriction. */
3675 if (REG_P (x) && REGNO (x) == GBR_REG)
3676 return 2;
3677
3678 /* Simple reg, post-inc, pre-dec addressing. */
3679 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3680 return 1;
3681
3682 /* 'reg + disp' addressing. */
3683 if (GET_CODE (x) == PLUS
3684 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3685 {
3686 /* 'GBR + disp'. Account one more because of R0 restriction. */
3687 if (REGNO (XEXP (x, 0)) == GBR_REG
3688 && gbr_displacement (XEXP (x, 1), mode))
3689 return 2;
3690
3691 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3692
3693 if (offset == 0)
3694 return 1;
3695
3696 /* The displacement would fit into a 2 byte move insn.
3697 HImode and QImode loads/stores with displacement put pressure on
3698 R0 which will most likely require another reg copy. Thus account
3699 a higher cost for that. */
3700 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3701 return (mode == HImode || mode == QImode) ? 2 : 1;
3702
3703 /* The displacement would fit into a 4 byte move insn (SH2A). */
3704 if (TARGET_SH2A
3705 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3706 return 2;
3707
3708 /* The displacement is probably out of range and will require extra
3709 calculations. */
3710 return 3;
3711 }
3712
3713 /* 'reg + reg' addressing. Account a slightly higher cost because of
3714 increased pressure on R0. */
3715 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3716 && ! TARGET_SHMEDIA)
3717 return 3;
3718
3719 /* Not sure what it is - probably expensive. */
3720 return 10;
3721 }
3722
3723 /* Code to expand a shift. */
3724 static void
3725 gen_ashift (int type, int n, rtx reg)
3726 {
3727 rtx n_rtx;
3728
3729 /* Negative values here come from the shift_amounts array. */
3730 if (n < 0)
3731 {
3732 if (type == ASHIFT)
3733 type = LSHIFTRT;
3734 else
3735 type = ASHIFT;
3736 n = -n;
3737 }
3738
3739 n_rtx = GEN_INT (n);
3740 gcc_assert (satisfies_constraint_P27 (n_rtx));
3741
3742 switch (type)
3743 {
3744 case ASHIFTRT:
3745 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3746 break;
3747 case LSHIFTRT:
3748 if (n == 1)
3749 emit_insn (gen_shlr (reg, reg));
3750 else
3751 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3752 break;
3753 case ASHIFT:
3754 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3755 break;
3756 default:
3757 gcc_unreachable ();
3758 }
3759 }
3760
3761 /* Code to expand a HImode shift. */
3762 static void
3763 gen_ashift_hi (int type, int n, rtx reg)
3764 {
3765 /* Negative values here come from the shift_amounts array. */
3766 if (n < 0)
3767 {
3768 if (type == ASHIFT)
3769 type = LSHIFTRT;
3770 else
3771 type = ASHIFT;
3772 n = -n;
3773 }
3774
3775 switch (type)
3776 {
3777 case ASHIFTRT:
3778 case LSHIFTRT:
3779 /* We don't have HImode right shift operations because using the
3780 ordinary 32 bit shift instructions for that doesn't generate proper
3781 zero/sign extension.
3782 gen_ashift_hi is only called in contexts where we know that the
3783 sign extension works out correctly. */
3784 {
3785 int offset = 0;
3786 if (GET_CODE (reg) == SUBREG)
3787 {
3788 offset = SUBREG_BYTE (reg);
3789 reg = SUBREG_REG (reg);
3790 }
3791 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3792 break;
3793 }
3794 case ASHIFT:
3795 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3796 break;
3797 }
3798 }
3799
3800 /* Output RTL to split a constant shift into its component SH constant
3801 shift instructions. */
3802 void
3803 gen_shifty_op (int code, rtx *operands)
3804 {
3805 int value = INTVAL (operands[2]);
3806 int max, i;
3807
3808 /* Truncate the shift count in case it is out of bounds. */
3809 value = value & 31;
3810
3811 if (value == 31)
3812 {
3813 if (code == LSHIFTRT)
3814 {
3815 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3816 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3817 return;
3818 }
3819 else if (code == ASHIFT)
3820 {
3821 /* There is a two instruction sequence for 31 bit left shifts,
3822 but it requires r0. */
3823 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3824 {
3825 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3826 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3827 return;
3828 }
3829 }
3830 }
3831 else if (value == 0)
3832 {
3833 /* This can happen even when optimizing, if there were subregs before
3834 reload. Don't output a nop here, as this is never optimized away;
3835 use a no-op move instead. */
3836 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3837 return;
3838 }
3839
3840 max = ashl_lshr_seq[value].insn_count;
3841 for (i = 0; i < max; i++)
3842 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3843 }
3844
3845 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3846 don't matter. */
3847 void
3848 gen_shifty_hi_op (int code, rtx *operands)
3849 {
3850 int value = INTVAL (operands[2]);
3851 int max, i;
3852 void (*gen_fun) (int, int, rtx);
3853
3854 /* This operation is used by and_shl for SImode values with a few
3855 high bits known to be cleared. */
3856 value &= 31;
3857 if (value == 0)
3858 {
3859 emit_insn (gen_nop ());
3860 return;
3861 }
3862
3863 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3864 if (code == ASHIFT)
3865 {
3866 max = ext_ashl_lshr_seq[value].insn_count;
3867 for (i = 0; i < max; i++)
3868 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3869 }
3870 else
3871 /* When shifting right, emit the shifts in reverse order, so that
3872 solitary negative values come first. */
3873 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3874 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3875 }
3876
3877 /* Output RTL for an arithmetic right shift.
3878 ??? Rewrite to use super-optimizer sequences. */
3879 bool
3880 expand_ashiftrt (rtx *operands)
3881 {
3882 rtx wrk;
3883 char func[18];
3884 int value;
3885
3886 if (TARGET_DYNSHIFT)
3887 {
3888 if (!CONST_INT_P (operands[2]))
3889 {
3890 rtx count = copy_to_mode_reg (SImode, operands[2]);
3891 emit_insn (gen_negsi2 (count, count));
3892 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3893 return true;
3894 }
3895 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3896 > 1 + SH_DYNAMIC_SHIFT_COST)
3897 {
3898 rtx count
3899 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3900 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3901 return true;
3902 }
3903 }
3904 if (!CONST_INT_P (operands[2]))
3905 return false;
3906
3907 value = INTVAL (operands[2]) & 31;
3908
3909 if (value == 31)
3910 {
3911 /* If we are called from abs expansion, arrange things so that we
3912 we can use a single MT instruction that doesn't clobber the source,
3913 if LICM can hoist out the load of the constant zero. */
3914 if (currently_expanding_to_rtl)
3915 {
3916 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3917 operands[1]));
3918 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3919 return true;
3920 }
3921 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3922 return true;
3923 }
3924 else if (value >= 16 && value <= 19)
3925 {
3926 wrk = gen_reg_rtx (SImode);
3927 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3928 value -= 16;
3929 while (value--)
3930 gen_ashift (ASHIFTRT, 1, wrk);
3931 emit_move_insn (operands[0], wrk);
3932 return true;
3933 }
3934 /* Expand a short sequence inline, longer call a magic routine. */
3935 else if (value <= 5)
3936 {
3937 wrk = gen_reg_rtx (SImode);
3938 emit_move_insn (wrk, operands[1]);
3939 while (value--)
3940 gen_ashift (ASHIFTRT, 1, wrk);
3941 emit_move_insn (operands[0], wrk);
3942 return true;
3943 }
3944
3945 wrk = gen_reg_rtx (Pmode);
3946
3947 /* Load the value into an arg reg and call a helper. */
3948 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3949 sprintf (func, "__ashiftrt_r4_%d", value);
3950 function_symbol (wrk, func, SFUNC_STATIC);
3951 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3952 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3953 return true;
3954 }
3955
3956 /* Try to find a good way to implement the combiner pattern
3957 [(set (match_operand:SI 0 "register_operand" "r")
3958 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3959 (match_operand:SI 2 "const_int_operand" "n"))
3960 (match_operand:SI 3 "const_int_operand" "n"))) .
3961 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3962 return 0 for simple right / left or left/right shift combination.
3963 return 1 for a combination of shifts with zero_extend.
3964 return 2 for a combination of shifts with an AND that needs r0.
3965 return 3 for a combination of shifts with an AND that needs an extra
3966 scratch register, when the three highmost bits of the AND mask are clear.
3967 return 4 for a combination of shifts with an AND that needs an extra
3968 scratch register, when any of the three highmost bits of the AND mask
3969 is set.
3970 If ATTRP is set, store an initial right shift width in ATTRP[0],
3971 and the instruction length in ATTRP[1] . These values are not valid
3972 when returning 0.
3973 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3974 shift_amounts for the last shift value that is to be used before the
3975 sign extend. */
3976 int
3977 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3978 {
3979 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3980 int left = INTVAL (left_rtx), right;
3981 int best = 0;
3982 int cost, best_cost = 10000;
3983 int best_right = 0, best_len = 0;
3984 int i;
3985 int can_ext;
3986
3987 if (left < 0 || left > 31)
3988 return 0;
3989 if (CONST_INT_P (mask_rtx))
3990 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3991 else
3992 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3993 /* Can this be expressed as a right shift / left shift pair? */
3994 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3995 right = exact_log2 (lsb);
3996 mask2 = ~(mask + lsb - 1);
3997 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3998 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3999 if (! mask2)
4000 best_cost = ashl_lshr_seq[right].insn_count
4001 + ashl_lshr_seq[right + left].insn_count;
4002 /* mask has no trailing zeroes <==> ! right */
4003 else if (! right && mask2 == ~(lsb2 - 1))
4004 {
4005 int late_right = exact_log2 (lsb2);
4006 best_cost = ashl_lshr_seq[left + late_right].insn_count
4007 + ashl_lshr_seq[late_right].insn_count;
4008 }
4009 /* Try to use zero extend. */
4010 if (mask2 == ~(lsb2 - 1))
4011 {
4012 int width, first;
4013
4014 for (width = 8; width <= 16; width += 8)
4015 {
4016 /* Can we zero-extend right away? */
4017 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4018 {
4019 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4020 + ext_ashl_lshr_seq[left + right].insn_count;
4021 if (cost < best_cost)
4022 {
4023 best = 1;
4024 best_cost = cost;
4025 best_right = right;
4026 best_len = cost;
4027 if (attrp)
4028 attrp[2] = -1;
4029 }
4030 continue;
4031 }
4032 /* ??? Could try to put zero extend into initial right shift,
4033 or even shift a bit left before the right shift. */
4034 /* Determine value of first part of left shift, to get to the
4035 zero extend cut-off point. */
4036 first = width - exact_log2 (lsb2) + right;
4037 if (first >= 0 && right + left - first >= 0)
4038 {
4039 cost = ext_ashl_lshr_seq[right].insn_count
4040 + ext_ashl_lshr_seq[first].insn_count + 1
4041 + ext_ashl_lshr_seq[right + left - first].insn_count;
4042
4043 if (cost < best_cost)
4044 {
4045 best = 1;
4046 best_cost = cost;
4047 best_right = right;
4048 best_len = cost;
4049 if (attrp)
4050 attrp[2] = first;
4051 }
4052 }
4053 }
4054 }
4055 /* Try to use r0 AND pattern */
4056 for (i = 0; i <= 2; i++)
4057 {
4058 if (i > right)
4059 break;
4060 if (! CONST_OK_FOR_K08 (mask >> i))
4061 continue;
4062 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4063 if (cost < best_cost)
4064 {
4065 best = 2;
4066 best_cost = cost;
4067 best_right = i;
4068 best_len = cost - 1;
4069 }
4070 }
4071 /* Try to use a scratch register to hold the AND operand. */
4072 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4073 for (i = 0; i <= 2; i++)
4074 {
4075 if (i > right)
4076 break;
4077 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4078 + (can_ext
4079 ? ext_ashl_lshr_seq
4080 : ashl_lshr_seq)[left + i].insn_count;
4081 if (cost < best_cost)
4082 {
4083 best = 4 - can_ext;
4084 best_cost = cost;
4085 best_right = i;
4086 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4087 }
4088 }
4089
4090 if (attrp)
4091 {
4092 attrp[0] = best_right;
4093 attrp[1] = best_len;
4094 }
4095 return best;
4096 }
4097
4098 /* This is used in length attributes of the unnamed instructions
4099 corresponding to shl_and_kind return values of 1 and 2. */
4100 int
4101 shl_and_length (rtx insn)
4102 {
4103 rtx set_src, left_rtx, mask_rtx;
4104 int attributes[3];
4105
4106 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4107 left_rtx = XEXP (XEXP (set_src, 0), 1);
4108 mask_rtx = XEXP (set_src, 1);
4109 shl_and_kind (left_rtx, mask_rtx, attributes);
4110 return attributes[1];
4111 }
4112
4113 /* This is used in length attribute of the and_shl_scratch instruction. */
4114 int
4115 shl_and_scr_length (rtx insn)
4116 {
4117 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4118 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4119 rtx op = XEXP (set_src, 0);
4120 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4121 op = XEXP (XEXP (op, 0), 0);
4122 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4123 }
4124
4125 /* Generate rtl for instructions for which shl_and_kind advised a particular
4126 method of generating them, i.e. returned zero. */
4127 bool
4128 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4129 {
4130 int attributes[3];
4131 unsigned HOST_WIDE_INT mask;
4132 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4133 int right, total_shift;
4134 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4135
4136 right = attributes[0];
4137 total_shift = INTVAL (left_rtx) + right;
4138 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4139 switch (kind)
4140 {
4141 default:
4142 return true;
4143 case 1:
4144 {
4145 int first = attributes[2];
4146 rtx operands[3];
4147
4148 if (first < 0)
4149 {
4150 emit_insn ((mask << right) <= 0xff
4151 ? gen_zero_extendqisi2 (dest,
4152 gen_lowpart (QImode, source))
4153 : gen_zero_extendhisi2 (dest,
4154 gen_lowpart (HImode, source)));
4155 source = dest;
4156 }
4157 if (source != dest)
4158 emit_insn (gen_movsi (dest, source));
4159 operands[0] = dest;
4160 if (right)
4161 {
4162 operands[2] = GEN_INT (right);
4163 gen_shifty_hi_op (LSHIFTRT, operands);
4164 }
4165 if (first > 0)
4166 {
4167 operands[2] = GEN_INT (first);
4168 gen_shifty_hi_op (ASHIFT, operands);
4169 total_shift -= first;
4170 mask <<= first;
4171 }
4172 if (first >= 0)
4173 emit_insn (mask <= 0xff
4174 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4175 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4176 if (total_shift > 0)
4177 {
4178 operands[2] = GEN_INT (total_shift);
4179 gen_shifty_hi_op (ASHIFT, operands);
4180 }
4181 break;
4182 }
4183 case 4:
4184 shift_gen_fun = gen_shifty_op;
4185 case 3:
4186 /* If the topmost bit that matters is set, set the topmost bits
4187 that don't matter. This way, we might be able to get a shorter
4188 signed constant. */
4189 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4190 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4191 case 2:
4192 /* Don't expand fine-grained when combining, because that will
4193 make the pattern fail. */
4194 if (currently_expanding_to_rtl
4195 || reload_in_progress || reload_completed)
4196 {
4197 rtx operands[3];
4198
4199 /* Cases 3 and 4 should be handled by this split
4200 only while combining */
4201 gcc_assert (kind <= 2);
4202 if (right)
4203 {
4204 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4205 source = dest;
4206 }
4207 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4208 if (total_shift)
4209 {
4210 operands[0] = dest;
4211 operands[1] = dest;
4212 operands[2] = GEN_INT (total_shift);
4213 shift_gen_fun (ASHIFT, operands);
4214 }
4215 break;
4216 }
4217 else
4218 {
4219 int neg = 0;
4220 if (kind != 4 && total_shift < 16)
4221 {
4222 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4223 if (neg > 0)
4224 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4225 else
4226 neg = 0;
4227 }
4228 emit_insn (gen_and_shl_scratch (dest, source,
4229 GEN_INT (right),
4230 GEN_INT (mask),
4231 GEN_INT (total_shift + neg),
4232 GEN_INT (neg)));
4233 emit_insn (gen_movsi (dest, dest));
4234 break;
4235 }
4236 }
4237 return false;
4238 }
4239
4240 /* Try to find a good way to implement the combiner pattern
4241 [(set (match_operand:SI 0 "register_operand" "=r")
4242 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4243 (match_operand:SI 2 "const_int_operand" "n")
4244 (match_operand:SI 3 "const_int_operand" "n")
4245 (const_int 0)))
4246 (clobber (reg:SI T_REG))]
4247 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4248 return 0 for simple left / right shift combination.
4249 return 1 for left shift / 8 bit sign extend / left shift.
4250 return 2 for left shift / 16 bit sign extend / left shift.
4251 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4252 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4253 return 5 for left shift / 16 bit sign extend / right shift
4254 return 6 for < 8 bit sign extend / left shift.
4255 return 7 for < 8 bit sign extend / left shift / single right shift.
4256 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4257 int
4258 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4259 {
4260 int left, size, insize, ext;
4261 int cost = 0, best_cost;
4262 int kind;
4263
4264 left = INTVAL (left_rtx);
4265 size = INTVAL (size_rtx);
4266 insize = size - left;
4267 gcc_assert (insize > 0);
4268 /* Default to left / right shift. */
4269 kind = 0;
4270 best_cost = ashl_lshr_seq[32 - insize].insn_count
4271 + ashl_lshr_seq[32 - size].insn_count;
4272 if (size <= 16)
4273 {
4274 /* 16 bit shift / sign extend / 16 bit shift */
4275 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4276 + ashl_lshr_seq[16 - size].insn_count;
4277 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4278 below, by alternative 3 or something even better. */
4279 if (cost < best_cost)
4280 {
4281 kind = 5;
4282 best_cost = cost;
4283 }
4284 }
4285 /* Try a plain sign extend between two shifts. */
4286 for (ext = 16; ext >= insize; ext -= 8)
4287 {
4288 if (ext <= size)
4289 {
4290 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4291 + ashl_lshr_seq[size - ext].insn_count;
4292 if (cost < best_cost)
4293 {
4294 kind = ext / (unsigned) 8;
4295 best_cost = cost;
4296 }
4297 }
4298 /* Check if we can do a sloppy shift with a final signed shift
4299 restoring the sign. */
4300 if (EXT_SHIFT_SIGNED (size - ext))
4301 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4302 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4303 /* If not, maybe it's still cheaper to do the second shift sloppy,
4304 and do a final sign extend? */
4305 else if (size <= 16)
4306 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4307 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4308 + 1;
4309 else
4310 continue;
4311 if (cost < best_cost)
4312 {
4313 kind = ext / (unsigned) 8 + 2;
4314 best_cost = cost;
4315 }
4316 }
4317 /* Check if we can sign extend in r0 */
4318 if (insize < 8)
4319 {
4320 cost = 3 + ashl_lshr_seq[left].insn_count;
4321 if (cost < best_cost)
4322 {
4323 kind = 6;
4324 best_cost = cost;
4325 }
4326 /* Try the same with a final signed shift. */
4327 if (left < 31)
4328 {
4329 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4330 if (cost < best_cost)
4331 {
4332 kind = 7;
4333 best_cost = cost;
4334 }
4335 }
4336 }
4337 if (TARGET_DYNSHIFT)
4338 {
4339 /* Try to use a dynamic shift. */
4340 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4341 if (cost < best_cost)
4342 {
4343 kind = 0;
4344 best_cost = cost;
4345 }
4346 }
4347 if (costp)
4348 *costp = cost;
4349 return kind;
4350 }
4351
4352 /* Function to be used in the length attribute of the instructions
4353 implementing this pattern. */
4354 int
4355 shl_sext_length (rtx insn)
4356 {
4357 rtx set_src, left_rtx, size_rtx;
4358 int cost;
4359
4360 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4361 left_rtx = XEXP (XEXP (set_src, 0), 1);
4362 size_rtx = XEXP (set_src, 1);
4363 shl_sext_kind (left_rtx, size_rtx, &cost);
4364 return cost;
4365 }
4366
4367 /* Generate rtl for this pattern */
4368 bool
4369 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4370 {
4371 int kind;
4372 int left, size, insize, cost;
4373 rtx operands[3];
4374
4375 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4376 left = INTVAL (left_rtx);
4377 size = INTVAL (size_rtx);
4378 insize = size - left;
4379 switch (kind)
4380 {
4381 case 1:
4382 case 2:
4383 case 3:
4384 case 4:
4385 {
4386 int ext = kind & 1 ? 8 : 16;
4387 int shift2 = size - ext;
4388
4389 /* Don't expand fine-grained when combining, because that will
4390 make the pattern fail. */
4391 if (! currently_expanding_to_rtl
4392 && ! reload_in_progress && ! reload_completed)
4393 {
4394 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4395 emit_insn (gen_movsi (dest, source));
4396 break;
4397 }
4398 if (dest != source)
4399 emit_insn (gen_movsi (dest, source));
4400 operands[0] = dest;
4401 if (ext - insize)
4402 {
4403 operands[2] = GEN_INT (ext - insize);
4404 gen_shifty_hi_op (ASHIFT, operands);
4405 }
4406 emit_insn (kind & 1
4407 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4408 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4409 if (kind <= 2)
4410 {
4411 if (shift2)
4412 {
4413 operands[2] = GEN_INT (shift2);
4414 gen_shifty_op (ASHIFT, operands);
4415 }
4416 }
4417 else
4418 {
4419 if (shift2 > 0)
4420 {
4421 if (EXT_SHIFT_SIGNED (shift2))
4422 {
4423 operands[2] = GEN_INT (shift2 + 1);
4424 gen_shifty_op (ASHIFT, operands);
4425 operands[2] = const1_rtx;
4426 gen_shifty_op (ASHIFTRT, operands);
4427 break;
4428 }
4429 operands[2] = GEN_INT (shift2);
4430 gen_shifty_hi_op (ASHIFT, operands);
4431 }
4432 else if (shift2)
4433 {
4434 operands[2] = GEN_INT (-shift2);
4435 gen_shifty_hi_op (LSHIFTRT, operands);
4436 }
4437 emit_insn (size <= 8
4438 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4439 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4440 }
4441 break;
4442 }
4443 case 5:
4444 {
4445 int i = 16 - size;
4446 if (! currently_expanding_to_rtl
4447 && ! reload_in_progress && ! reload_completed)
4448 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4449 else
4450 {
4451 operands[0] = dest;
4452 operands[2] = GEN_INT (16 - insize);
4453 gen_shifty_hi_op (ASHIFT, operands);
4454 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4455 }
4456 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4457 while (--i >= 0)
4458 gen_ashift (ASHIFTRT, 1, dest);
4459 break;
4460 }
4461 case 6:
4462 case 7:
4463 /* Don't expand fine-grained when combining, because that will
4464 make the pattern fail. */
4465 if (! currently_expanding_to_rtl
4466 && ! reload_in_progress && ! reload_completed)
4467 {
4468 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4469 emit_insn (gen_movsi (dest, source));
4470 break;
4471 }
4472 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4473 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4474 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4475 operands[0] = dest;
4476 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4477 gen_shifty_op (ASHIFT, operands);
4478 if (kind == 7)
4479 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4480 break;
4481 default:
4482 return true;
4483 }
4484 return false;
4485 }
4486
4487 /* Prefix a symbol_ref name with "datalabel". */
4488 rtx
4489 gen_datalabel_ref (rtx sym)
4490 {
4491 const char *str;
4492
4493 if (GET_CODE (sym) == LABEL_REF)
4494 return gen_rtx_CONST (GET_MODE (sym),
4495 gen_rtx_UNSPEC (GET_MODE (sym),
4496 gen_rtvec (1, sym),
4497 UNSPEC_DATALABEL));
4498
4499 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4500
4501 str = XSTR (sym, 0);
4502 /* Share all SYMBOL_REF strings with the same value - that is important
4503 for cse. */
4504 str = IDENTIFIER_POINTER (get_identifier (str));
4505 XSTR (sym, 0) = str;
4506
4507 return sym;
4508 }
4509
4510 \f
4511 static alloc_pool label_ref_list_pool;
4512
4513 typedef struct label_ref_list_d
4514 {
4515 rtx_code_label *label;
4516 struct label_ref_list_d *next;
4517 } *label_ref_list_t;
4518
4519 /* The SH cannot load a large constant into a register, constants have to
4520 come from a pc relative load. The reference of a pc relative load
4521 instruction must be less than 1k in front of the instruction. This
4522 means that we often have to dump a constant inside a function, and
4523 generate code to branch around it.
4524
4525 It is important to minimize this, since the branches will slow things
4526 down and make things bigger.
4527
4528 Worst case code looks like:
4529
4530 mov.l L1,rn
4531 bra L2
4532 nop
4533 align
4534 L1: .long value
4535 L2:
4536 ..
4537
4538 mov.l L3,rn
4539 bra L4
4540 nop
4541 align
4542 L3: .long value
4543 L4:
4544 ..
4545
4546 We fix this by performing a scan before scheduling, which notices which
4547 instructions need to have their operands fetched from the constant table
4548 and builds the table.
4549
4550 The algorithm is:
4551
4552 scan, find an instruction which needs a pcrel move. Look forward, find the
4553 last barrier which is within MAX_COUNT bytes of the requirement.
4554 If there isn't one, make one. Process all the instructions between
4555 the find and the barrier.
4556
4557 In the above example, we can tell that L3 is within 1k of L1, so
4558 the first move can be shrunk from the 3 insn+constant sequence into
4559 just 1 insn, and the constant moved to L3 to make:
4560
4561 mov.l L1,rn
4562 ..
4563 mov.l L3,rn
4564 bra L4
4565 nop
4566 align
4567 L3:.long value
4568 L4:.long value
4569
4570 Then the second move becomes the target for the shortening process. */
4571
4572 typedef struct
4573 {
4574 rtx value; /* Value in table. */
4575 rtx_code_label *label; /* Label of value. */
4576 label_ref_list_t wend; /* End of window. */
4577 enum machine_mode mode; /* Mode of value. */
4578
4579 /* True if this constant is accessed as part of a post-increment
4580 sequence. Note that HImode constants are never accessed in this way. */
4581 bool part_of_sequence_p;
4582 } pool_node;
4583
4584 /* The maximum number of constants that can fit into one pool, since
4585 constants in the range 0..510 are at least 2 bytes long, and in the
4586 range from there to 1018 at least 4 bytes. */
4587
4588 #define MAX_POOL_SIZE 372
4589 static pool_node pool_vector[MAX_POOL_SIZE];
4590 static int pool_size;
4591 static rtx_code_label *pool_window_label;
4592 static int pool_window_last;
4593
4594 static int max_labelno_before_reorg;
4595
4596 /* ??? If we need a constant in HImode which is the truncated value of a
4597 constant we need in SImode, we could combine the two entries thus saving
4598 two bytes. Is this common enough to be worth the effort of implementing
4599 it? */
4600
4601 /* ??? This stuff should be done at the same time that we shorten branches.
4602 As it is now, we must assume that all branches are the maximum size, and
4603 this causes us to almost always output constant pools sooner than
4604 necessary. */
4605
4606 /* Add a constant to the pool and return its label. */
4607 static rtx_code_label *
4608 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4609 {
4610 int i;
4611 rtx_code_label *lab, *new_rtx;
4612 label_ref_list_t ref, newref;
4613
4614 /* First see if we've already got it. */
4615 for (i = 0; i < pool_size; i++)
4616 {
4617 if (x->code == pool_vector[i].value->code
4618 && mode == pool_vector[i].mode)
4619 {
4620 if (x->code == CODE_LABEL)
4621 {
4622 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4623 continue;
4624 }
4625 if (rtx_equal_p (x, pool_vector[i].value))
4626 {
4627 lab = new_rtx = 0;
4628 if (! last_value
4629 || ! i
4630 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4631 {
4632 new_rtx = gen_label_rtx ();
4633 LABEL_REFS (new_rtx) = pool_vector[i].label;
4634 pool_vector[i].label = lab = new_rtx;
4635 }
4636 if (lab && pool_window_label)
4637 {
4638 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4639 newref->label = pool_window_label;
4640 ref = pool_vector[pool_window_last].wend;
4641 newref->next = ref;
4642 pool_vector[pool_window_last].wend = newref;
4643 }
4644 if (new_rtx)
4645 pool_window_label = new_rtx;
4646 pool_window_last = i;
4647 return lab;
4648 }
4649 }
4650 }
4651
4652 /* Need a new one. */
4653 pool_vector[pool_size].value = x;
4654 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4655 {
4656 lab = 0;
4657 pool_vector[pool_size - 1].part_of_sequence_p = true;
4658 }
4659 else
4660 lab = gen_label_rtx ();
4661 pool_vector[pool_size].mode = mode;
4662 pool_vector[pool_size].label = lab;
4663 pool_vector[pool_size].wend = NULL;
4664 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4665 if (lab && pool_window_label)
4666 {
4667 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4668 newref->label = pool_window_label;
4669 ref = pool_vector[pool_window_last].wend;
4670 newref->next = ref;
4671 pool_vector[pool_window_last].wend = newref;
4672 }
4673 if (lab)
4674 pool_window_label = lab;
4675 pool_window_last = pool_size;
4676 pool_size++;
4677 return lab;
4678 }
4679
4680 /* Output the literal table. START, if nonzero, is the first instruction
4681 this table is needed for, and also indicates that there is at least one
4682 casesi_worker_2 instruction; We have to emit the operand3 labels from
4683 these insns at a 4-byte aligned position. BARRIER is the barrier
4684 after which we are to place the table. */
4685 static void
4686 dump_table (rtx_insn *start, rtx_insn *barrier)
4687 {
4688 rtx_insn *scan = barrier;
4689 int i;
4690 bool need_align = true;
4691 rtx lab;
4692 label_ref_list_t ref;
4693 bool have_df = false;
4694
4695 /* Do two passes, first time dump out the HI sized constants. */
4696
4697 for (i = 0; i < pool_size; i++)
4698 {
4699 pool_node *p = &pool_vector[i];
4700
4701 if (p->mode == HImode)
4702 {
4703 if (need_align)
4704 {
4705 scan = emit_insn_after (gen_align_2 (), scan);
4706 need_align = false;
4707 }
4708 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4709 scan = emit_label_after (lab, scan);
4710 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4711 scan);
4712 for (ref = p->wend; ref; ref = ref->next)
4713 {
4714 lab = ref->label;
4715 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4716 }
4717 }
4718 else if (p->mode == DFmode)
4719 have_df = true;
4720 }
4721
4722 need_align = true;
4723
4724 if (start)
4725 {
4726 scan = emit_insn_after (gen_align_4 (), scan);
4727 need_align = false;
4728 for (; start != barrier; start = NEXT_INSN (start))
4729 if (NONJUMP_INSN_P (start)
4730 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4731 {
4732 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4733 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4734
4735 scan = emit_label_after (lab, scan);
4736 }
4737 }
4738 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4739 {
4740 rtx align_insn = NULL_RTX;
4741
4742 scan = emit_label_after (gen_label_rtx (), scan);
4743 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4744 need_align = false;
4745
4746 for (i = 0; i < pool_size; i++)
4747 {
4748 pool_node *p = &pool_vector[i];
4749
4750 switch (p->mode)
4751 {
4752 case HImode:
4753 break;
4754 case SImode:
4755 case SFmode:
4756 if (align_insn && !p->part_of_sequence_p)
4757 {
4758 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4759 emit_label_before (lab, align_insn);
4760 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4761 align_insn);
4762 for (ref = p->wend; ref; ref = ref->next)
4763 {
4764 lab = ref->label;
4765 emit_insn_before (gen_consttable_window_end (lab),
4766 align_insn);
4767 }
4768 delete_insn (align_insn);
4769 align_insn = NULL_RTX;
4770 continue;
4771 }
4772 else
4773 {
4774 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4775 scan = emit_label_after (lab, scan);
4776 scan = emit_insn_after (gen_consttable_4 (p->value,
4777 const0_rtx), scan);
4778 need_align = ! need_align;
4779 }
4780 break;
4781 case DFmode:
4782 if (need_align)
4783 {
4784 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4785 align_insn = scan;
4786 need_align = false;
4787 }
4788 case DImode:
4789 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4790 scan = emit_label_after (lab, scan);
4791 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4792 scan);
4793 break;
4794 default:
4795 gcc_unreachable ();
4796 }
4797
4798 if (p->mode != HImode)
4799 {
4800 for (ref = p->wend; ref; ref = ref->next)
4801 {
4802 lab = ref->label;
4803 scan = emit_insn_after (gen_consttable_window_end (lab),
4804 scan);
4805 }
4806 }
4807 }
4808
4809 pool_size = 0;
4810 }
4811
4812 for (i = 0; i < pool_size; i++)
4813 {
4814 pool_node *p = &pool_vector[i];
4815
4816 switch (p->mode)
4817 {
4818 case HImode:
4819 break;
4820 case SImode:
4821 case SFmode:
4822 if (need_align)
4823 {
4824 need_align = false;
4825 scan = emit_label_after (gen_label_rtx (), scan);
4826 scan = emit_insn_after (gen_align_4 (), scan);
4827 }
4828 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4829 scan = emit_label_after (lab, scan);
4830 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4831 scan);
4832 break;
4833 case DFmode:
4834 case DImode:
4835 if (need_align)
4836 {
4837 need_align = false;
4838 scan = emit_label_after (gen_label_rtx (), scan);
4839 scan = emit_insn_after (gen_align_4 (), scan);
4840 }
4841 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4842 scan = emit_label_after (lab, scan);
4843 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4844 scan);
4845 break;
4846 default:
4847 gcc_unreachable ();
4848 }
4849
4850 if (p->mode != HImode)
4851 {
4852 for (ref = p->wend; ref; ref = ref->next)
4853 {
4854 lab = ref->label;
4855 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4856 }
4857 }
4858 }
4859
4860 scan = emit_insn_after (gen_consttable_end (), scan);
4861 scan = emit_barrier_after (scan);
4862 pool_size = 0;
4863 pool_window_label = NULL;
4864 pool_window_last = 0;
4865 }
4866
4867 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4868
4869 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4870
4871 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4872 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4873 need to fix it if the input value is CONST_OK_FOR_I08. */
4874 static bool
4875 broken_move (rtx_insn *insn)
4876 {
4877 if (NONJUMP_INSN_P (insn))
4878 {
4879 rtx pat = PATTERN (insn);
4880 if (GET_CODE (pat) == PARALLEL)
4881 pat = XVECEXP (pat, 0, 0);
4882 if (GET_CODE (pat) == SET
4883 /* We can load any 8-bit value if we don't care what the high
4884 order bits end up as. */
4885 && GET_MODE (SET_DEST (pat)) != QImode
4886 && (CONSTANT_P (SET_SRC (pat))
4887 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4888 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4889 /* Match mova_const. */
4890 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4891 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4892 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4893 && ! (TARGET_SH2E
4894 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4895 && (fp_zero_operand (SET_SRC (pat))
4896 || fp_one_operand (SET_SRC (pat)))
4897 /* In general we don't know the current setting of fpscr, so
4898 disable fldi.
4899 There is an exception if this was a register-register move
4900 before reload - and hence it was ascertained that we have
4901 single precision setting - and in a post-reload optimization
4902 we changed this to do a constant load. In that case
4903 we don't have an r0 clobber, hence we must use fldi. */
4904 && (TARGET_FMOVD
4905 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4906 == SCRATCH))
4907 && REG_P (SET_DEST (pat))
4908 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4909 && ! (TARGET_SH2A
4910 && GET_MODE (SET_DEST (pat)) == SImode
4911 && (satisfies_constraint_I20 (SET_SRC (pat))
4912 || satisfies_constraint_I28 (SET_SRC (pat))))
4913 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4914 return true;
4915 }
4916
4917 return false;
4918 }
4919
4920 /* Return true if the specified insn is a mova insn. */
4921 static bool
4922 mova_p (rtx_insn *insn)
4923 {
4924 return (NONJUMP_INSN_P (insn)
4925 && GET_CODE (PATTERN (insn)) == SET
4926 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4927 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4928 /* Don't match mova_const. */
4929 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4930 }
4931
4932 /* Fix up a mova from a switch that went out of range. */
4933 static void
4934 fixup_mova (rtx_insn *mova)
4935 {
4936 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4937 if (! flag_pic)
4938 {
4939 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4940 INSN_CODE (mova) = -1;
4941 }
4942 else
4943 {
4944 rtx_insn *worker = mova;
4945 rtx lab = gen_label_rtx ();
4946 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4947
4948 do
4949 {
4950 worker = NEXT_INSN (worker);
4951 gcc_assert (worker
4952 && !LABEL_P (worker)
4953 && !JUMP_P (worker));
4954 } while (NOTE_P (worker)
4955 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4956 wpat = PATTERN (worker);
4957 wpat0 = XVECEXP (wpat, 0, 0);
4958 wpat1 = XVECEXP (wpat, 0, 1);
4959 wsrc = SET_SRC (wpat0);
4960 PATTERN (worker) = (gen_casesi_worker_2
4961 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4962 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4963 XEXP (wpat1, 0)));
4964 INSN_CODE (worker) = -1;
4965 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4966 base = gen_rtx_LABEL_REF (Pmode, lab);
4967 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4968 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4969 INSN_CODE (mova) = -1;
4970 }
4971 }
4972
4973 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4974 *num_mova, and check if the new mova is not nested within the first one.
4975 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4976 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4977 static int
4978 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4979 {
4980 int n_addr = 0; /* Initialization to shut up spurious warning. */
4981 int f_target, n_target = 0; /* Likewise. */
4982
4983 if (optimize)
4984 {
4985 /* If NEW_MOVA has no address yet, it will be handled later. */
4986 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4987 return -1;
4988
4989 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4990 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4991 if (n_addr > n_target || n_addr + 1022 < n_target)
4992 {
4993 /* Change the mova into a load.
4994 broken_move will then return true for it. */
4995 fixup_mova (new_mova);
4996 return 1;
4997 }
4998 }
4999 if (!(*num_mova)++)
5000 {
5001 *first_mova = new_mova;
5002 return 2;
5003 }
5004 if (!optimize
5005 || ((f_target
5006 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5007 >= n_target))
5008 return -1;
5009
5010 (*num_mova)--;
5011 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5012 > n_target - n_addr)
5013 {
5014 fixup_mova (*first_mova);
5015 return 0;
5016 }
5017 else
5018 {
5019 fixup_mova (new_mova);
5020 return 1;
5021 }
5022 }
5023
5024 /* Find the last barrier from insn FROM which is close enough to hold the
5025 constant pool. If we can't find one, then create one near the end of
5026 the range. */
5027 static rtx_insn *
5028 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5029 {
5030 int count_si = 0;
5031 int count_hi = 0;
5032 int found_hi = 0;
5033 int found_si = 0;
5034 int found_di = 0;
5035 int hi_align = 2;
5036 int si_align = 2;
5037 int leading_mova = num_mova;
5038 rtx_insn *barrier_before_mova = NULL;
5039 rtx_insn *found_barrier = NULL;
5040 rtx_insn *good_barrier = NULL;
5041 int si_limit;
5042 int hi_limit;
5043 rtx_insn *orig = from;
5044 rtx_insn *last_got = NULL;
5045 rtx_insn *last_symoff = NULL;
5046
5047 /* For HImode: range is 510, add 4 because pc counts from address of
5048 second instruction after this one, subtract 2 for the jump instruction
5049 that we may need to emit before the table, subtract 2 for the instruction
5050 that fills the jump delay slot (in very rare cases, reorg will take an
5051 instruction from after the constant pool or will leave the delay slot
5052 empty). This gives 510.
5053 For SImode: range is 1020, add 4 because pc counts from address of
5054 second instruction after this one, subtract 2 in case pc is 2 byte
5055 aligned, subtract 2 for the jump instruction that we may need to emit
5056 before the table, subtract 2 for the instruction that fills the jump
5057 delay slot. This gives 1018. */
5058
5059 /* The branch will always be shortened now that the reference address for
5060 forward branches is the successor address, thus we need no longer make
5061 adjustments to the [sh]i_limit for -O0. */
5062
5063 si_limit = 1018;
5064 hi_limit = 510;
5065
5066 while (from && count_si < si_limit && count_hi < hi_limit)
5067 {
5068 int inc = get_attr_length (from);
5069 int new_align = 1;
5070
5071 /* If this is a label that existed at the time of the compute_alignments
5072 call, determine the alignment. N.B. When find_barrier recurses for
5073 an out-of-reach mova, we might see labels at the start of previously
5074 inserted constant tables. */
5075 if (LABEL_P (from)
5076 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5077 {
5078 if (optimize)
5079 new_align = 1 << label_to_alignment (from);
5080 else if (BARRIER_P (prev_nonnote_insn (from)))
5081 new_align = 1 << barrier_align (from);
5082 else
5083 new_align = 1;
5084 inc = 0;
5085 }
5086 /* In case we are scanning a constant table because of recursion, check
5087 for explicit alignments. If the table is long, we might be forced
5088 to emit the new table in front of it; the length of the alignment
5089 might be the last straw. */
5090 else if (NONJUMP_INSN_P (from)
5091 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5092 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5093 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5094 /* When we find the end of a constant table, paste the new constant
5095 at the end. That is better than putting it in front because
5096 this way, we don't need extra alignment for adding a 4-byte-aligned
5097 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5098 else if (NONJUMP_INSN_P (from)
5099 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5100 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5101 return from;
5102
5103 if (BARRIER_P (from))
5104 {
5105 rtx_insn *next;
5106
5107 found_barrier = from;
5108
5109 /* If we are at the end of the function, or in front of an alignment
5110 instruction, we need not insert an extra alignment. We prefer
5111 this kind of barrier. */
5112 if (barrier_align (from) > 2)
5113 good_barrier = from;
5114
5115 /* If we are at the end of a hot/cold block, dump the constants
5116 here. */
5117 next = NEXT_INSN (from);
5118 if (next
5119 && NOTE_P (next)
5120 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5121 break;
5122 }
5123
5124 if (broken_move (from))
5125 {
5126 rtx pat, src, dst;
5127 enum machine_mode mode;
5128
5129 pat = PATTERN (from);
5130 if (GET_CODE (pat) == PARALLEL)
5131 pat = XVECEXP (pat, 0, 0);
5132 src = SET_SRC (pat);
5133 dst = SET_DEST (pat);
5134 mode = GET_MODE (dst);
5135
5136 /* GOT pcrelat setting comes in pair of
5137 mova .L8,r0
5138 mov.l .L8,r12
5139 instructions. (plus add r0,r12).
5140 Remember if we see one without the other. */
5141 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5142 last_got = last_got ? NULL : from;
5143 else if (PIC_ADDR_P (src))
5144 last_got = last_got ? NULL : from;
5145
5146 /* We must explicitly check the mode, because sometimes the
5147 front end will generate code to load unsigned constants into
5148 HImode targets without properly sign extending them. */
5149 if (mode == HImode
5150 || (mode == SImode && satisfies_constraint_I16 (src)
5151 && REGNO (dst) != FPUL_REG))
5152 {
5153 found_hi += 2;
5154 /* We put the short constants before the long constants, so
5155 we must count the length of short constants in the range
5156 for the long constants. */
5157 /* ??? This isn't optimal, but is easy to do. */
5158 si_limit -= 2;
5159 }
5160 else
5161 {
5162 /* We dump DF/DI constants before SF/SI ones, because
5163 the limit is the same, but the alignment requirements
5164 are higher. We may waste up to 4 additional bytes
5165 for alignment, and the DF/DI constant may have
5166 another SF/SI constant placed before it. */
5167 if (TARGET_SHCOMPACT
5168 && ! found_di
5169 && (mode == DFmode || mode == DImode))
5170 {
5171 found_di = 1;
5172 si_limit -= 8;
5173 }
5174 while (si_align > 2 && found_si + si_align - 2 > count_si)
5175 si_align >>= 1;
5176 if (found_si > count_si)
5177 count_si = found_si;
5178 found_si += GET_MODE_SIZE (mode);
5179 if (num_mova)
5180 si_limit -= GET_MODE_SIZE (mode);
5181 }
5182 }
5183
5184 if (mova_p (from))
5185 {
5186 switch (untangle_mova (&num_mova, &mova, from))
5187 {
5188 case 1:
5189 if (flag_pic)
5190 {
5191 rtx src = SET_SRC (PATTERN (from));
5192 if (GET_CODE (src) == CONST
5193 && GET_CODE (XEXP (src, 0)) == UNSPEC
5194 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5195 last_symoff = from;
5196 }
5197 break;
5198 case 0: return find_barrier (0, 0, mova);
5199 case 2:
5200 {
5201 leading_mova = 0;
5202 barrier_before_mova
5203 = good_barrier ? good_barrier : found_barrier;
5204 }
5205 default: break;
5206 }
5207 if (found_si > count_si)
5208 count_si = found_si;
5209 }
5210 else if (JUMP_TABLE_DATA_P (from)
5211 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5212 {
5213 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5214 || (num_mova
5215 && (prev_nonnote_insn (from)
5216 == XEXP (MOVA_LABELREF (mova), 0))))
5217 num_mova--;
5218 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5219 {
5220 /* We have just passed the barrier in front of the
5221 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5222 the ADDR_DIFF_VEC is accessed as data, just like our pool
5223 constants, this is a good opportunity to accommodate what
5224 we have gathered so far.
5225 If we waited any longer, we could end up at a barrier in
5226 front of code, which gives worse cache usage for separated
5227 instruction / data caches. */
5228 good_barrier = found_barrier;
5229 break;
5230 }
5231 else
5232 {
5233 rtx body = PATTERN (from);
5234 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5235 }
5236 }
5237 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5238 else if (JUMP_P (from)
5239 && ! TARGET_SH2
5240 && ! optimize_size)
5241 new_align = 4;
5242
5243 /* There is a possibility that a bf is transformed into a bf/s by the
5244 delay slot scheduler. */
5245 if (JUMP_P (from)
5246 && get_attr_type (from) == TYPE_CBRANCH
5247 && ! sequence_insn_p (from))
5248 inc += 2;
5249
5250 if (found_si)
5251 {
5252 count_si += inc;
5253 if (new_align > si_align)
5254 {
5255 si_limit -= (count_si - 1) & (new_align - si_align);
5256 si_align = new_align;
5257 }
5258 count_si = (count_si + new_align - 1) & -new_align;
5259 }
5260 if (found_hi)
5261 {
5262 count_hi += inc;
5263 if (new_align > hi_align)
5264 {
5265 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5266 hi_align = new_align;
5267 }
5268 count_hi = (count_hi + new_align - 1) & -new_align;
5269 }
5270 from = NEXT_INSN (from);
5271 }
5272
5273 if (num_mova)
5274 {
5275 if (leading_mova)
5276 {
5277 /* Try as we might, the leading mova is out of range. Change
5278 it into a load (which will become a pcload) and retry. */
5279 fixup_mova (mova);
5280 return find_barrier (0, 0, mova);
5281 }
5282 else
5283 {
5284 /* Insert the constant pool table before the mova instruction,
5285 to prevent the mova label reference from going out of range. */
5286 from = mova;
5287 good_barrier = found_barrier = barrier_before_mova;
5288 }
5289 }
5290
5291 if (found_barrier)
5292 {
5293 if (good_barrier && next_real_insn (found_barrier))
5294 found_barrier = good_barrier;
5295 }
5296 else
5297 {
5298 /* We didn't find a barrier in time to dump our stuff,
5299 so we'll make one. */
5300 rtx_code_label *label = gen_label_rtx ();
5301
5302 /* Don't emit a constant table in the middle of insns for
5303 casesi_worker_2. This is a bit overkill but is enough
5304 because casesi_worker_2 wouldn't appear so frequently. */
5305 if (last_symoff)
5306 from = last_symoff;
5307
5308 /* If we exceeded the range, then we must back up over the last
5309 instruction we looked at. Otherwise, we just need to undo the
5310 NEXT_INSN at the end of the loop. */
5311 if (PREV_INSN (from) != orig
5312 && (count_hi > hi_limit || count_si > si_limit))
5313 from = PREV_INSN (PREV_INSN (from));
5314 else
5315 from = PREV_INSN (from);
5316
5317 /* Don't emit a constant table int the middle of global pointer setting,
5318 since that that would move the addressing base GOT into another table.
5319 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5320 in the pool anyway, so just move up the whole constant pool.
5321
5322 However, avoid doing so when the last single GOT mov is the starting
5323 insn itself. Going past above the start insn would create a negative
5324 offset, causing errors. */
5325 if (last_got && last_got != orig)
5326 from = PREV_INSN (last_got);
5327
5328 /* Don't insert the constant pool table at the position which
5329 may be the landing pad. */
5330 if (flag_exceptions
5331 && CALL_P (from)
5332 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5333 from = PREV_INSN (from);
5334
5335 /* Walk back to be just before any jump or label.
5336 Putting it before a label reduces the number of times the branch
5337 around the constant pool table will be hit. Putting it before
5338 a jump makes it more likely that the bra delay slot will be
5339 filled. */
5340 while (NOTE_P (from) || JUMP_P (from)
5341 || LABEL_P (from))
5342 from = PREV_INSN (from);
5343
5344 /* Make sure we do not split between a call and its corresponding
5345 CALL_ARG_LOCATION note. */
5346 if (CALL_P (from))
5347 {
5348 rtx_insn *next = NEXT_INSN (from);
5349 if (next && NOTE_P (next)
5350 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5351 from = next;
5352 }
5353
5354 from = emit_jump_insn_after (gen_jump (label), from);
5355 JUMP_LABEL (from) = label;
5356 LABEL_NUSES (label) = 1;
5357 found_barrier = emit_barrier_after (from);
5358 emit_label_after (label, found_barrier);
5359 }
5360
5361 return found_barrier;
5362 }
5363
5364 /* If the instruction INSN is implemented by a special function, and we can
5365 positively find the register that is used to call the sfunc, and this
5366 register is not used anywhere else in this instruction - except as the
5367 destination of a set, return this register; else, return 0. */
5368 rtx
5369 sfunc_uses_reg (rtx insn)
5370 {
5371 int i;
5372 rtx pattern, part, reg_part, reg;
5373
5374 if (!NONJUMP_INSN_P (insn))
5375 return NULL_RTX;
5376 pattern = PATTERN (insn);
5377 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5378 return NULL_RTX;
5379
5380 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5381 {
5382 part = XVECEXP (pattern, 0, i);
5383 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5384 reg_part = part;
5385 }
5386 if (! reg_part)
5387 return NULL_RTX;
5388 reg = XEXP (reg_part, 0);
5389 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5390 {
5391 part = XVECEXP (pattern, 0, i);
5392 if (part == reg_part || GET_CODE (part) == CLOBBER)
5393 continue;
5394 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5395 && REG_P (SET_DEST (part)))
5396 ? SET_SRC (part) : part)))
5397 return NULL_RTX;
5398 }
5399 return reg;
5400 }
5401
5402 /* See if the only way in which INSN uses REG is by calling it, or by
5403 setting it while calling it. Set *SET to a SET rtx if the register
5404 is set by INSN. */
5405 static bool
5406 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5407 {
5408 rtx pattern, reg2;
5409
5410 *set = NULL_RTX;
5411
5412 reg2 = sfunc_uses_reg (insn);
5413 if (reg2 && REGNO (reg2) == REGNO (reg))
5414 {
5415 pattern = single_set (insn);
5416 if (pattern
5417 && REG_P (SET_DEST (pattern))
5418 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5419 *set = pattern;
5420 return false;
5421 }
5422 if (!CALL_P (insn))
5423 {
5424 /* We don't use rtx_equal_p because we don't care if the mode is
5425 different. */
5426 pattern = single_set (insn);
5427 if (pattern
5428 && REG_P (SET_DEST (pattern))
5429 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5430 {
5431 rtx par, part;
5432 int i;
5433
5434 *set = pattern;
5435 par = PATTERN (insn);
5436 if (GET_CODE (par) == PARALLEL)
5437 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5438 {
5439 part = XVECEXP (par, 0, i);
5440 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5441 return true;
5442 }
5443 return reg_mentioned_p (reg, SET_SRC (pattern));
5444 }
5445
5446 return true;
5447 }
5448
5449 pattern = PATTERN (insn);
5450
5451 if (GET_CODE (pattern) == PARALLEL)
5452 {
5453 int i;
5454
5455 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5456 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5457 return true;
5458 pattern = XVECEXP (pattern, 0, 0);
5459 }
5460
5461 if (GET_CODE (pattern) == SET)
5462 {
5463 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5464 {
5465 /* We don't use rtx_equal_p, because we don't care if the
5466 mode is different. */
5467 if (!REG_P (SET_DEST (pattern))
5468 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5469 return true;
5470
5471 *set = pattern;
5472 }
5473
5474 pattern = SET_SRC (pattern);
5475 }
5476
5477 if (GET_CODE (pattern) != CALL
5478 || !MEM_P (XEXP (pattern, 0))
5479 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5480 return true;
5481
5482 return false;
5483 }
5484
5485 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5486 general registers. Bits 0..15 mean that the respective registers
5487 are used as inputs in the instruction. Bits 16..31 mean that the
5488 registers 0..15, respectively, are used as outputs, or are clobbered.
5489 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5490 int
5491 regs_used (rtx x, int is_dest)
5492 {
5493 enum rtx_code code;
5494 const char *fmt;
5495 int i, used = 0;
5496
5497 if (! x)
5498 return used;
5499 code = GET_CODE (x);
5500 switch (code)
5501 {
5502 case REG:
5503 if (REGNO (x) < 16)
5504 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5505 << (REGNO (x) + is_dest));
5506 return 0;
5507 case SUBREG:
5508 {
5509 rtx y = SUBREG_REG (x);
5510
5511 if (!REG_P (y))
5512 break;
5513 if (REGNO (y) < 16)
5514 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5515 << (REGNO (y) +
5516 subreg_regno_offset (REGNO (y),
5517 GET_MODE (y),
5518 SUBREG_BYTE (x),
5519 GET_MODE (x)) + is_dest));
5520 return 0;
5521 }
5522 case SET:
5523 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5524 case RETURN:
5525 /* If there was a return value, it must have been indicated with USE. */
5526 return 0x00ffff00;
5527 case CLOBBER:
5528 is_dest = 1;
5529 break;
5530 case MEM:
5531 is_dest = 0;
5532 break;
5533 case CALL:
5534 used |= 0x00ff00f0;
5535 break;
5536 default:
5537 break;
5538 }
5539
5540 fmt = GET_RTX_FORMAT (code);
5541
5542 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5543 {
5544 if (fmt[i] == 'E')
5545 {
5546 int j;
5547 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5548 used |= regs_used (XVECEXP (x, i, j), is_dest);
5549 }
5550 else if (fmt[i] == 'e')
5551 used |= regs_used (XEXP (x, i), is_dest);
5552 }
5553 return used;
5554 }
5555
5556 /* Create an instruction that prevents redirection of a conditional branch
5557 to the destination of the JUMP with address ADDR.
5558 If the branch needs to be implemented as an indirect jump, try to find
5559 a scratch register for it.
5560 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5561 If any preceding insn that doesn't fit into a delay slot is good enough,
5562 pass 1. Pass 2 if a definite blocking insn is needed.
5563 -1 is used internally to avoid deep recursion.
5564 If a blocking instruction is made or recognized, return it. */
5565 static rtx_insn *
5566 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5567 {
5568 int dead = 0;
5569 rtx_insn *prev = prev_nonnote_insn (jump);
5570 rtx dest;
5571
5572 /* First, check if we already have an instruction that satisfies our need. */
5573 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5574 {
5575 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5576 return prev;
5577 if (GET_CODE (PATTERN (prev)) == USE
5578 || GET_CODE (PATTERN (prev)) == CLOBBER
5579 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5580 prev = jump;
5581 else if ((need_block &= ~1) < 0)
5582 return prev;
5583 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5584 need_block = 0;
5585 }
5586 if (GET_CODE (PATTERN (jump)) == RETURN)
5587 {
5588 if (! need_block)
5589 return prev;
5590 /* Reorg even does nasty things with return insns that cause branches
5591 to go out of range - see find_end_label and callers. */
5592 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5593 }
5594 /* We can't use JUMP_LABEL here because it might be undefined
5595 when not optimizing. */
5596 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5597 /* If the branch is out of range, try to find a scratch register for it. */
5598 if (optimize
5599 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5600 > 4092 + 4098))
5601 {
5602 rtx_insn *scan;
5603 /* Don't look for the stack pointer as a scratch register,
5604 it would cause trouble if an interrupt occurred. */
5605 unsigned attempt = 0x7fff, used;
5606 int jump_left = flag_expensive_optimizations + 1;
5607
5608 /* It is likely that the most recent eligible instruction is wanted for
5609 the delay slot. Therefore, find out which registers it uses, and
5610 try to avoid using them. */
5611
5612 for (scan = jump; (scan = PREV_INSN (scan)); )
5613 {
5614 enum rtx_code code;
5615
5616 if (INSN_DELETED_P (scan))
5617 continue;
5618 code = GET_CODE (scan);
5619 if (code == CODE_LABEL || code == JUMP_INSN)
5620 break;
5621 if (code == INSN
5622 && GET_CODE (PATTERN (scan)) != USE
5623 && GET_CODE (PATTERN (scan)) != CLOBBER
5624 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5625 {
5626 attempt &= ~regs_used (PATTERN (scan), 0);
5627 break;
5628 }
5629 }
5630 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5631 (scan = NEXT_INSN (scan)); )
5632 {
5633 enum rtx_code code;
5634
5635 if (INSN_DELETED_P (scan))
5636 continue;
5637 code = GET_CODE (scan);
5638 if (INSN_P (scan))
5639 {
5640 used |= regs_used (PATTERN (scan), 0);
5641 if (code == CALL_INSN)
5642 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5643 dead |= (used >> 16) & ~used;
5644 if (dead & attempt)
5645 {
5646 dead &= attempt;
5647 break;
5648 }
5649 if (code == JUMP_INSN)
5650 {
5651 if (jump_left-- && simplejump_p (scan))
5652 scan = JUMP_LABEL_AS_INSN (scan);
5653 else
5654 break;
5655 }
5656 }
5657 }
5658 /* Mask out the stack pointer again, in case it was
5659 the only 'free' register we have found. */
5660 dead &= 0x7fff;
5661 }
5662 /* If the immediate destination is still in range, check for possible
5663 threading with a jump beyond the delay slot insn.
5664 Don't check if we are called recursively; the jump has been or will be
5665 checked in a different invocation then. */
5666
5667 else if (optimize && need_block >= 0)
5668 {
5669 rtx_insn *next = next_active_insn (next_active_insn (dest));
5670 if (next && JUMP_P (next)
5671 && GET_CODE (PATTERN (next)) == SET
5672 && recog_memoized (next) == CODE_FOR_jump_compact)
5673 {
5674 dest = JUMP_LABEL (next);
5675 if (dest
5676 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5677 > 4092 + 4098))
5678 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5679 }
5680 }
5681
5682 if (dead)
5683 {
5684 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5685
5686 /* It would be nice if we could convert the jump into an indirect
5687 jump / far branch right now, and thus exposing all constituent
5688 instructions to further optimization. However, reorg uses
5689 simplejump_p to determine if there is an unconditional jump where
5690 it should try to schedule instructions from the target of the
5691 branch; simplejump_p fails for indirect jumps even if they have
5692 a JUMP_LABEL. */
5693 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5694 (reg, GEN_INT (unspec_bbr_uid++)),
5695 jump);
5696 /* ??? We would like this to have the scope of the jump, but that
5697 scope will change when a delay slot insn of an inner scope is added.
5698 Hence, after delay slot scheduling, we'll have to expect
5699 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5700 the jump. */
5701
5702 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5703 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5704 return insn;
5705 }
5706 else if (need_block)
5707 /* We can't use JUMP_LABEL here because it might be undefined
5708 when not optimizing. */
5709 return emit_insn_before (gen_block_branch_redirect
5710 (GEN_INT (unspec_bbr_uid++)),
5711 jump);
5712 return prev;
5713 }
5714
5715 #define CONDJUMP_MIN -252
5716 #define CONDJUMP_MAX 262
5717 struct far_branch
5718 {
5719 /* A label (to be placed) in front of the jump
5720 that jumps to our ultimate destination. */
5721 rtx_insn *near_label;
5722 /* Where we are going to insert it if we cannot move the jump any farther,
5723 or the jump itself if we have picked up an existing jump. */
5724 rtx_insn *insert_place;
5725 /* The ultimate destination. */
5726 rtx_insn *far_label;
5727 struct far_branch *prev;
5728 /* If the branch has already been created, its address;
5729 else the address of its first prospective user. */
5730 int address;
5731 };
5732
5733 static void gen_far_branch (struct far_branch *);
5734 enum mdep_reorg_phase_e mdep_reorg_phase;
5735 static void
5736 gen_far_branch (struct far_branch *bp)
5737 {
5738 rtx insn = bp->insert_place;
5739 rtx_insn *jump;
5740 rtx label = gen_label_rtx ();
5741 int ok;
5742
5743 emit_label_after (label, insn);
5744 if (bp->far_label)
5745 {
5746 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5747 LABEL_NUSES (bp->far_label)++;
5748 }
5749 else
5750 jump = emit_jump_insn_after (gen_return (), insn);
5751
5752 /* Emit a barrier so that reorg knows that any following instructions
5753 are not reachable via a fall-through path.
5754 But don't do this when not optimizing, since we wouldn't suppress the
5755 alignment for the barrier then, and could end up with out-of-range
5756 pc-relative loads. */
5757 if (optimize)
5758 emit_barrier_after (jump);
5759 emit_label_after (bp->near_label, insn);
5760
5761 if (bp->far_label)
5762 JUMP_LABEL (jump) = bp->far_label;
5763 else
5764 {
5765 rtx pat = PATTERN (jump);
5766 gcc_assert (ANY_RETURN_P (pat));
5767 JUMP_LABEL (jump) = pat;
5768 }
5769
5770 ok = invert_jump (insn, label, 1);
5771 gcc_assert (ok);
5772
5773 /* If we are branching around a jump (rather than a return), prevent
5774 reorg from using an insn from the jump target as the delay slot insn -
5775 when reorg did this, it pessimized code (we rather hide the delay slot)
5776 and it could cause branches to go out of range. */
5777 if (bp->far_label)
5778 (emit_insn_after
5779 (gen_stuff_delay_slot
5780 (GEN_INT (unspec_bbr_uid++),
5781 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5782 insn));
5783 /* Prevent reorg from undoing our splits. */
5784 gen_block_redirect (jump, bp->address += 2, 2);
5785 }
5786
5787 /* Fix up ADDR_DIFF_VECs. */
5788 void
5789 fixup_addr_diff_vecs (rtx_insn *first)
5790 {
5791 rtx_insn *insn;
5792
5793 for (insn = first; insn; insn = NEXT_INSN (insn))
5794 {
5795 rtx vec_lab, pat, prevpat, x, braf_label;
5796 rtx_insn *prev;
5797
5798 if (! JUMP_TABLE_DATA_P (insn)
5799 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5800 continue;
5801 pat = PATTERN (insn);
5802 vec_lab = XEXP (XEXP (pat, 0), 0);
5803
5804 /* Search the matching casesi_jump_2. */
5805 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5806 {
5807 if (!JUMP_P (prev))
5808 continue;
5809 prevpat = PATTERN (prev);
5810 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5811 continue;
5812 x = XVECEXP (prevpat, 0, 1);
5813 if (GET_CODE (x) != USE)
5814 continue;
5815 x = XEXP (x, 0);
5816 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5817 break;
5818 }
5819 /* FIXME: This is a bug in the optimizer, but it seems harmless
5820 to just avoid panicing. */
5821 if (!prev)
5822 continue;
5823
5824 /* Emit the reference label of the braf where it belongs, right after
5825 the casesi_jump_2 (i.e. braf). */
5826 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5827 emit_label_after (braf_label, prev);
5828
5829 /* Fix up the ADDR_DIF_VEC to be relative
5830 to the reference address of the braf. */
5831 XEXP (XEXP (pat, 0), 0) = braf_label;
5832 }
5833 }
5834
5835 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5836 a barrier. Return the base 2 logarithm of the desired alignment. */
5837 int
5838 barrier_align (rtx_insn *barrier_or_label)
5839 {
5840 rtx next, pat;
5841
5842 if (! barrier_or_label)
5843 return 0;
5844
5845 if (LABEL_P (barrier_or_label)
5846 && NEXT_INSN (barrier_or_label)
5847 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5848 return 2;
5849
5850 if (BARRIER_P (barrier_or_label)
5851 && PREV_INSN (barrier_or_label)
5852 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5853 {
5854 pat = PATTERN (PREV_INSN (barrier_or_label));
5855 /* If this is a very small table, we want to keep the alignment after
5856 the table to the minimum for proper code alignment. */
5857 return ((optimize_size
5858 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5859 <= (unsigned) 1 << (CACHE_LOG - 2)))
5860 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5861 }
5862
5863 next = next_active_insn (barrier_or_label);
5864
5865 if (! next)
5866 return 0;
5867
5868 pat = PATTERN (next);
5869
5870 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5871 /* This is a barrier in front of a constant table. */
5872 return 0;
5873
5874 if (optimize_size)
5875 return 0;
5876
5877 if (! TARGET_SH2 || ! optimize)
5878 return align_jumps_log;
5879
5880 /* When fixing up pcloads, a constant table might be inserted just before
5881 the basic block that ends with the barrier. Thus, we can't trust the
5882 instruction lengths before that. */
5883 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5884 {
5885 /* Check if there is an immediately preceding branch to the insn beyond
5886 the barrier. We must weight the cost of discarding useful information
5887 from the current cache line when executing this branch and there is
5888 an alignment, against that of fetching unneeded insn in front of the
5889 branch target when there is no alignment. */
5890
5891 /* There are two delay_slot cases to consider. One is the simple case
5892 where the preceding branch is to the insn beyond the barrier (simple
5893 delay slot filling), and the other is where the preceding branch has
5894 a delay slot that is a duplicate of the insn after the barrier
5895 (fill_eager_delay_slots) and the branch is to the insn after the insn
5896 after the barrier. */
5897
5898 int slot, credit;
5899 bool jump_to_next = false;
5900
5901 /* Skip to the insn before the JUMP_INSN before the barrier under
5902 investigation. */
5903 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5904
5905 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5906 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5907 prev = prev_real_insn (prev))
5908 {
5909 jump_to_next = false;
5910 if (GET_CODE (PATTERN (prev)) == USE
5911 || GET_CODE (PATTERN (prev)) == CLOBBER)
5912 continue;
5913 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5914 {
5915 prev = prev_seq->insn (1);
5916 if (INSN_UID (prev) == INSN_UID (next))
5917 {
5918 /* Delay slot was filled with insn at jump target. */
5919 jump_to_next = true;
5920 continue;
5921 }
5922 }
5923
5924 if (slot &&
5925 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5926 slot = 0;
5927 credit -= get_attr_length (prev);
5928 }
5929 if (prev && jump_to_label_p (prev))
5930 {
5931 rtx_insn *x;
5932 if (jump_to_next
5933 || next_real_insn (JUMP_LABEL (prev)) == next
5934 /* If relax_delay_slots() decides NEXT was redundant
5935 with some previous instruction, it will have
5936 redirected PREV's jump to the following insn. */
5937 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5938 /* There is no upper bound on redundant instructions
5939 that might have been skipped, but we must not put an
5940 alignment where none had been before. */
5941 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5942 (INSN_P (x)
5943 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5944 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5945 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5946 {
5947 rtx pat = PATTERN (prev);
5948 if (GET_CODE (pat) == PARALLEL)
5949 pat = XVECEXP (pat, 0, 0);
5950 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5951 return 0;
5952 }
5953 }
5954 }
5955
5956 return align_jumps_log;
5957 }
5958
5959 /* If we are inside a phony loop, almost any kind of label can turn up as the
5960 first one in the loop. Aligning a braf label causes incorrect switch
5961 destination addresses; we can detect braf labels because they are
5962 followed by a BARRIER.
5963 Applying loop alignment to small constant or switch tables is a waste
5964 of space, so we suppress this too. */
5965 int
5966 sh_loop_align (rtx label)
5967 {
5968 rtx next = label;
5969
5970 if (! optimize || optimize_size)
5971 return 0;
5972
5973 do
5974 next = next_nonnote_insn (next);
5975 while (next && LABEL_P (next));
5976
5977 if (! next
5978 || ! INSN_P (next)
5979 || recog_memoized (next) == CODE_FOR_consttable_2)
5980 return 0;
5981
5982 return align_loops_log;
5983 }
5984
5985 /* Do a final pass over the function, just before delayed branch
5986 scheduling. */
5987 static void
5988 sh_reorg (void)
5989 {
5990 rtx_insn *first, *insn, *mova = NULL;
5991 int num_mova;
5992 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5993 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5994
5995 first = get_insns ();
5996 max_labelno_before_reorg = max_label_num ();
5997
5998 /* We must split call insns before introducing `mova's. If we're
5999 optimizing, they'll have already been split. Otherwise, make
6000 sure we don't split them too late. */
6001 if (! optimize)
6002 split_all_insns_noflow ();
6003
6004 if (TARGET_SHMEDIA)
6005 return;
6006
6007 /* If relaxing, generate pseudo-ops to associate function calls with
6008 the symbols they call. It does no harm to not generate these
6009 pseudo-ops. However, when we can generate them, it enables the
6010 linker to potentially relax the jsr to a bsr, and eliminate the
6011 register load and, possibly, the constant pool entry. */
6012
6013 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6014 if (TARGET_RELAX)
6015 {
6016 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6017 own purposes. This works because none of the remaining passes
6018 need to look at them.
6019
6020 ??? But it may break in the future. We should use a machine
6021 dependent REG_NOTE, or some other approach entirely. */
6022 for (insn = first; insn; insn = NEXT_INSN (insn))
6023 {
6024 if (INSN_P (insn))
6025 {
6026 rtx note;
6027
6028 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6029 NULL_RTX)) != 0)
6030 remove_note (insn, note);
6031 }
6032 }
6033
6034 for (insn = first; insn; insn = NEXT_INSN (insn))
6035 {
6036 rtx pattern, reg, set, dies, label;
6037 rtx_insn *link, *scan;
6038 int rescan = 0, foundinsn = 0;
6039
6040 if (CALL_P (insn))
6041 {
6042 pattern = PATTERN (insn);
6043
6044 if (GET_CODE (pattern) == PARALLEL)
6045 pattern = XVECEXP (pattern, 0, 0);
6046 if (GET_CODE (pattern) == SET)
6047 pattern = SET_SRC (pattern);
6048
6049 if (GET_CODE (pattern) != CALL
6050 || !MEM_P (XEXP (pattern, 0)))
6051 continue;
6052
6053 reg = XEXP (XEXP (pattern, 0), 0);
6054 }
6055 else
6056 {
6057 reg = sfunc_uses_reg (insn);
6058 if (! reg)
6059 continue;
6060 }
6061
6062 if (!REG_P (reg))
6063 continue;
6064
6065 /* Try scanning backward to find where the register is set. */
6066 link = NULL;
6067 for (scan = PREV_INSN (insn);
6068 scan && !LABEL_P (scan);
6069 scan = PREV_INSN (scan))
6070 {
6071 if (! INSN_P (scan))
6072 continue;
6073
6074 if (! reg_mentioned_p (reg, scan))
6075 continue;
6076
6077 if (noncall_uses_reg (reg, scan, &set))
6078 break;
6079
6080 if (set)
6081 {
6082 link = scan;
6083 break;
6084 }
6085 }
6086
6087 if (! link)
6088 continue;
6089
6090 /* The register is set at LINK. */
6091
6092 /* We can only optimize the function call if the register is
6093 being set to a symbol. In theory, we could sometimes
6094 optimize calls to a constant location, but the assembler
6095 and linker do not support that at present. */
6096 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6097 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6098 continue;
6099
6100 /* Scan forward from LINK to the place where REG dies, and
6101 make sure that the only insns which use REG are
6102 themselves function calls. */
6103
6104 /* ??? This doesn't work for call targets that were allocated
6105 by reload, since there may not be a REG_DEAD note for the
6106 register. */
6107
6108 dies = NULL_RTX;
6109 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6110 {
6111 rtx scanset;
6112
6113 /* Don't try to trace forward past a CODE_LABEL if we haven't
6114 seen INSN yet. Ordinarily, we will only find the setting insn
6115 if it is in the same basic block. However,
6116 cross-jumping can insert code labels in between the load and
6117 the call, and can result in situations where a single call
6118 insn may have two targets depending on where we came from. */
6119
6120 if (LABEL_P (scan) && ! foundinsn)
6121 break;
6122
6123 if (! INSN_P (scan))
6124 continue;
6125
6126 /* Don't try to trace forward past a JUMP. To optimize
6127 safely, we would have to check that all the
6128 instructions at the jump destination did not use REG. */
6129
6130 if (JUMP_P (scan))
6131 break;
6132
6133 if (! reg_mentioned_p (reg, scan))
6134 continue;
6135
6136 if (noncall_uses_reg (reg, scan, &scanset))
6137 break;
6138
6139 if (scan == insn)
6140 foundinsn = 1;
6141
6142 if (scan != insn
6143 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6144 {
6145 /* There is a function call to this register other
6146 than the one we are checking. If we optimize
6147 this call, we need to rescan again below. */
6148 rescan = 1;
6149 }
6150
6151 /* ??? We shouldn't have to worry about SCANSET here.
6152 We should just be able to check for a REG_DEAD note
6153 on a function call. However, the REG_DEAD notes are
6154 apparently not dependable around libcalls; c-torture
6155 execute/920501-2 is a test case. If SCANSET is set,
6156 then this insn sets the register, so it must have
6157 died earlier. Unfortunately, this will only handle
6158 the cases in which the register is, in fact, set in a
6159 later insn. */
6160
6161 /* ??? We shouldn't have to use FOUNDINSN here.
6162 This dates back to when we used LOG_LINKS to find
6163 the most recent insn which sets the register. */
6164
6165 if (foundinsn
6166 && (scanset
6167 || find_reg_note (scan, REG_DEAD, reg)))
6168 {
6169 dies = scan;
6170 break;
6171 }
6172 }
6173
6174 if (! dies)
6175 {
6176 /* Either there was a branch, or some insn used REG
6177 other than as a function call address. */
6178 continue;
6179 }
6180
6181 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6182 on the insn which sets the register, and on each call insn
6183 which uses the register. In final_prescan_insn we look for
6184 the REG_LABEL_OPERAND notes, and output the appropriate label
6185 or pseudo-op. */
6186
6187 label = gen_label_rtx ();
6188 add_reg_note (link, REG_LABEL_OPERAND, label);
6189 add_reg_note (insn, REG_LABEL_OPERAND, label);
6190 if (rescan)
6191 {
6192 scan = link;
6193 do
6194 {
6195 rtx reg2;
6196
6197 scan = NEXT_INSN (scan);
6198 if (scan != insn
6199 && ((CALL_P (scan)
6200 && reg_mentioned_p (reg, scan))
6201 || ((reg2 = sfunc_uses_reg (scan))
6202 && REGNO (reg2) == REGNO (reg))))
6203 add_reg_note (scan, REG_LABEL_OPERAND, label);
6204 }
6205 while (scan != dies);
6206 }
6207 }
6208 }
6209
6210 if (TARGET_SH2)
6211 fixup_addr_diff_vecs (first);
6212
6213 if (optimize)
6214 {
6215 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6216 shorten_branches (first);
6217 }
6218
6219 /* Scan the function looking for move instructions which have to be
6220 changed to pc-relative loads and insert the literal tables. */
6221 label_ref_list_pool = create_alloc_pool ("label references list",
6222 sizeof (struct label_ref_list_d),
6223 30);
6224 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6225 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6226 {
6227 if (mova_p (insn))
6228 {
6229 /* ??? basic block reordering can move a switch table dispatch
6230 below the switch table. Check if that has happened.
6231 We only have the addresses available when optimizing; but then,
6232 this check shouldn't be needed when not optimizing. */
6233 if (!untangle_mova (&num_mova, &mova, insn))
6234 {
6235 insn = mova;
6236 num_mova = 0;
6237 }
6238 }
6239 else if (JUMP_TABLE_DATA_P (insn)
6240 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6241 && num_mova
6242 /* ??? loop invariant motion can also move a mova out of a
6243 loop. Since loop does this code motion anyway, maybe we
6244 should wrap UNSPEC_MOVA into a CONST, so that reload can
6245 move it back. */
6246 && ((num_mova > 1
6247 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6248 || (prev_nonnote_insn (insn)
6249 == XEXP (MOVA_LABELREF (mova), 0))))
6250 {
6251 rtx_insn *scan;
6252 int total;
6253
6254 num_mova--;
6255
6256 /* Some code might have been inserted between the mova and
6257 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6258 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6259 total += get_attr_length (scan);
6260
6261 /* range of mova is 1020, add 4 because pc counts from address of
6262 second instruction after this one, subtract 2 in case pc is 2
6263 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6264 cancels out with alignment effects of the mova itself. */
6265 if (total > 1022)
6266 {
6267 /* Change the mova into a load, and restart scanning
6268 there. broken_move will then return true for mova. */
6269 fixup_mova (mova);
6270 insn = mova;
6271 }
6272 }
6273 if (broken_move (insn)
6274 || (NONJUMP_INSN_P (insn)
6275 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6276 {
6277 rtx_insn *scan;
6278 /* Scan ahead looking for a barrier to stick the constant table
6279 behind. */
6280 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6281 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6282 int need_aligned_label = 0;
6283
6284 if (num_mova && ! mova_p (mova))
6285 {
6286 /* find_barrier had to change the first mova into a
6287 pcload; thus, we have to start with this new pcload. */
6288 insn = mova;
6289 num_mova = 0;
6290 }
6291 /* Now find all the moves between the points and modify them. */
6292 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6293 {
6294 if (LABEL_P (scan))
6295 last_float = 0;
6296 if (NONJUMP_INSN_P (scan)
6297 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6298 need_aligned_label = 1;
6299 if (broken_move (scan))
6300 {
6301 rtx *patp = &PATTERN (scan), pat = *patp;
6302 rtx src, dst;
6303 rtx lab;
6304 rtx newsrc;
6305 enum machine_mode mode;
6306
6307 if (GET_CODE (pat) == PARALLEL)
6308 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6309 src = SET_SRC (pat);
6310 dst = SET_DEST (pat);
6311 mode = GET_MODE (dst);
6312
6313 if (mode == SImode && satisfies_constraint_I16 (src)
6314 && REGNO (dst) != FPUL_REG)
6315 {
6316 int offset = 0;
6317
6318 mode = HImode;
6319 while (GET_CODE (dst) == SUBREG)
6320 {
6321 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6322 GET_MODE (SUBREG_REG (dst)),
6323 SUBREG_BYTE (dst),
6324 GET_MODE (dst));
6325 dst = SUBREG_REG (dst);
6326 }
6327 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6328 }
6329 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6330 {
6331 /* This must be an insn that clobbers r0. */
6332 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6333 XVECLEN (PATTERN (scan), 0)
6334 - 1);
6335 rtx clobber = *clobberp;
6336
6337 gcc_assert (GET_CODE (clobber) == CLOBBER
6338 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6339
6340 if (last_float
6341 && reg_set_between_p (r0_rtx, last_float_move, scan))
6342 last_float = 0;
6343 if (last_float
6344 && TARGET_SHCOMPACT
6345 && GET_MODE_SIZE (mode) != 4
6346 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6347 last_float = 0;
6348 lab = add_constant (src, mode, last_float);
6349 if (lab)
6350 emit_insn_before (gen_mova (lab), scan);
6351 else
6352 {
6353 /* There will be a REG_UNUSED note for r0 on
6354 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6355 lest reorg:mark_target_live_regs will not
6356 consider r0 to be used, and we end up with delay
6357 slot insn in front of SCAN that clobbers r0. */
6358 rtx note
6359 = find_regno_note (last_float_move, REG_UNUSED, 0);
6360
6361 /* If we are not optimizing, then there may not be
6362 a note. */
6363 if (note)
6364 PUT_REG_NOTE_KIND (note, REG_INC);
6365
6366 *last_float_addr = r0_inc_rtx;
6367 }
6368 last_float_move = scan;
6369 last_float = src;
6370 newsrc = gen_const_mem (mode,
6371 (((TARGET_SH4 && ! TARGET_FMOVD)
6372 || REGNO (dst) == FPUL_REG)
6373 ? r0_inc_rtx
6374 : r0_rtx));
6375 last_float_addr = &XEXP (newsrc, 0);
6376
6377 /* Remove the clobber of r0. */
6378 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6379 gen_rtx_SCRATCH (Pmode));
6380 }
6381 /* This is a mova needing a label. Create it. */
6382 else if (GET_CODE (src) == UNSPEC
6383 && XINT (src, 1) == UNSPEC_MOVA
6384 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6385 {
6386 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6387 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6388 newsrc = gen_rtx_UNSPEC (SImode,
6389 gen_rtvec (1, newsrc),
6390 UNSPEC_MOVA);
6391 }
6392 else if (GET_CODE (src) == UNSPEC_VOLATILE
6393 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6394 {
6395 newsrc = XVECEXP (src, 0, 0);
6396 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6397 INSN_CODE (scan) = -1;
6398 continue;
6399 }
6400 else
6401 {
6402 lab = add_constant (src, mode, 0);
6403 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6404 newsrc = gen_const_mem (mode, newsrc);
6405 }
6406 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6407 INSN_CODE (scan) = -1;
6408 }
6409 }
6410 dump_table (need_aligned_label ? insn : 0, barrier);
6411 insn = barrier;
6412 }
6413 }
6414 free_alloc_pool (label_ref_list_pool);
6415 for (insn = first; insn; insn = NEXT_INSN (insn))
6416 PUT_MODE (insn, VOIDmode);
6417
6418 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6419 INSN_ADDRESSES_FREE ();
6420 split_branches (first);
6421
6422 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6423 also has an effect on the register that holds the address of the sfunc.
6424 Insert an extra dummy insn in front of each sfunc that pretends to
6425 use this register. */
6426 if (flag_delayed_branch)
6427 {
6428 for (insn = first; insn; insn = NEXT_INSN (insn))
6429 {
6430 rtx reg = sfunc_uses_reg (insn);
6431
6432 if (! reg)
6433 continue;
6434 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6435 }
6436 }
6437 #if 0
6438 /* fpscr is not actually a user variable, but we pretend it is for the
6439 sake of the previous optimization passes, since we want it handled like
6440 one. However, we don't have any debugging information for it, so turn
6441 it into a non-user variable now. */
6442 if (TARGET_SH4)
6443 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6444 #endif
6445 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6446 }
6447
6448 /* Return the UID of the insn that follows the specified label. */
6449 int
6450 get_dest_uid (rtx label, int max_uid)
6451 {
6452 rtx_insn *dest = next_real_insn (label);
6453 int dest_uid;
6454 if (! dest)
6455 /* This can happen for an undefined label. */
6456 return 0;
6457 dest_uid = INSN_UID (dest);
6458 /* If this is a newly created branch redirection blocking instruction,
6459 we cannot index the branch_uid or insn_addresses arrays with its
6460 uid. But then, we won't need to, because the actual destination is
6461 the following branch. */
6462 while (dest_uid >= max_uid)
6463 {
6464 dest = NEXT_INSN (dest);
6465 dest_uid = INSN_UID (dest);
6466 }
6467 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6468 return 0;
6469 return dest_uid;
6470 }
6471
6472 /* Split condbranches that are out of range. Also add clobbers for
6473 scratch registers that are needed in far jumps.
6474 We do this before delay slot scheduling, so that it can take our
6475 newly created instructions into account. It also allows us to
6476 find branches with common targets more easily. */
6477 static void
6478 split_branches (rtx_insn *first)
6479 {
6480 rtx_insn *insn;
6481 struct far_branch **uid_branch, *far_branch_list = 0;
6482 int max_uid = get_max_uid ();
6483 int ok;
6484
6485 /* Find out which branches are out of range. */
6486 shorten_branches (first);
6487
6488 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6489 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6490
6491 for (insn = first; insn; insn = NEXT_INSN (insn))
6492 if (! INSN_P (insn))
6493 continue;
6494 else if (INSN_DELETED_P (insn))
6495 {
6496 /* Shorten_branches would split this instruction again,
6497 so transform it into a note. */
6498 SET_INSN_DELETED (insn);
6499 }
6500 else if (JUMP_P (insn))
6501 {
6502 enum attr_type type = get_attr_type (insn);
6503 if (type == TYPE_CBRANCH)
6504 {
6505 rtx_insn *next, *beyond;
6506
6507 if (get_attr_length (insn) > 4)
6508 {
6509 rtx src = SET_SRC (PATTERN (insn));
6510 rtx olabel = XEXP (XEXP (src, 1), 0);
6511 int addr = INSN_ADDRESSES (INSN_UID (insn));
6512 rtx_insn *label = 0;
6513 int dest_uid = get_dest_uid (olabel, max_uid);
6514 struct far_branch *bp = uid_branch[dest_uid];
6515
6516 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6517 the label if the LABEL_NUSES count drops to zero. There is
6518 always a jump_optimize pass that sets these values, but it
6519 proceeds to delete unreferenced code, and then if not
6520 optimizing, to un-delete the deleted instructions, thus
6521 leaving labels with too low uses counts. */
6522 if (! optimize)
6523 {
6524 JUMP_LABEL (insn) = olabel;
6525 LABEL_NUSES (olabel)++;
6526 }
6527 if (! bp)
6528 {
6529 bp = (struct far_branch *) alloca (sizeof *bp);
6530 uid_branch[dest_uid] = bp;
6531 bp->prev = far_branch_list;
6532 far_branch_list = bp;
6533 bp->far_label = as_a <rtx_insn *> (
6534 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6535 0));
6536 LABEL_NUSES (bp->far_label)++;
6537 }
6538 else
6539 {
6540 label = bp->near_label;
6541 if (! label && bp->address - addr >= CONDJUMP_MIN)
6542 {
6543 rtx_insn *block = bp->insert_place;
6544
6545 if (GET_CODE (PATTERN (block)) == RETURN)
6546 block = PREV_INSN (block);
6547 else
6548 block = gen_block_redirect (block,
6549 bp->address, 2);
6550 label = emit_label_after (gen_label_rtx (),
6551 PREV_INSN (block));
6552 bp->near_label = label;
6553 }
6554 else if (label && ! NEXT_INSN (label))
6555 {
6556 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6557 bp->insert_place = insn;
6558 else
6559 gen_far_branch (bp);
6560 }
6561 }
6562 if (! label
6563 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6564 {
6565 bp->near_label = label = gen_label_rtx ();
6566 bp->insert_place = insn;
6567 bp->address = addr;
6568 }
6569 ok = redirect_jump (insn, label, 0);
6570 gcc_assert (ok);
6571 }
6572 else
6573 {
6574 /* get_attr_length (insn) == 2 */
6575 /* Check if we have a pattern where reorg wants to redirect
6576 the branch to a label from an unconditional branch that
6577 is too far away. */
6578 /* We can't use JUMP_LABEL here because it might be undefined
6579 when not optimizing. */
6580 /* A syntax error might cause beyond to be NULL_RTX. */
6581 beyond
6582 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6583 0));
6584
6585 if (beyond
6586 && (JUMP_P (beyond)
6587 || ((beyond = next_active_insn (beyond))
6588 && JUMP_P (beyond)))
6589 && GET_CODE (PATTERN (beyond)) == SET
6590 && recog_memoized (beyond) == CODE_FOR_jump_compact
6591 && ((INSN_ADDRESSES
6592 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6593 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6594 > 252 + 258 + 2))
6595 gen_block_redirect (beyond,
6596 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6597 }
6598
6599 next = next_active_insn (insn);
6600
6601 if (next
6602 && (JUMP_P (next)
6603 || ((next = next_active_insn (next))
6604 && JUMP_P (next)))
6605 && GET_CODE (PATTERN (next)) == SET
6606 && recog_memoized (next) == CODE_FOR_jump_compact
6607 && ((INSN_ADDRESSES
6608 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6609 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6610 > 252 + 258 + 2))
6611 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6612 }
6613 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6614 {
6615 int addr = INSN_ADDRESSES (INSN_UID (insn));
6616 rtx_insn *far_label = 0;
6617 int dest_uid = 0;
6618 struct far_branch *bp;
6619
6620 if (type == TYPE_JUMP)
6621 {
6622 far_label = as_a <rtx_insn *> (
6623 XEXP (SET_SRC (PATTERN (insn)), 0));
6624 dest_uid = get_dest_uid (far_label, max_uid);
6625 if (! dest_uid)
6626 {
6627 /* Parse errors can lead to labels outside
6628 the insn stream. */
6629 if (! NEXT_INSN (far_label))
6630 continue;
6631
6632 if (! optimize)
6633 {
6634 JUMP_LABEL (insn) = far_label;
6635 LABEL_NUSES (far_label)++;
6636 }
6637 redirect_jump (insn, ret_rtx, 1);
6638 far_label = 0;
6639 }
6640 }
6641 bp = uid_branch[dest_uid];
6642 if (! bp)
6643 {
6644 bp = (struct far_branch *) alloca (sizeof *bp);
6645 uid_branch[dest_uid] = bp;
6646 bp->prev = far_branch_list;
6647 far_branch_list = bp;
6648 bp->near_label = 0;
6649 bp->far_label = far_label;
6650 if (far_label)
6651 LABEL_NUSES (far_label)++;
6652 }
6653 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6654 if (addr - bp->address <= CONDJUMP_MAX)
6655 emit_label_after (bp->near_label, PREV_INSN (insn));
6656 else
6657 {
6658 gen_far_branch (bp);
6659 bp->near_label = 0;
6660 }
6661 else
6662 bp->near_label = 0;
6663 bp->address = addr;
6664 bp->insert_place = insn;
6665 if (! far_label)
6666 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6667 else
6668 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6669 }
6670 }
6671 /* Generate all pending far branches,
6672 and free our references to the far labels. */
6673 while (far_branch_list)
6674 {
6675 if (far_branch_list->near_label
6676 && ! NEXT_INSN (far_branch_list->near_label))
6677 gen_far_branch (far_branch_list);
6678 if (optimize
6679 && far_branch_list->far_label
6680 && ! --LABEL_NUSES (far_branch_list->far_label))
6681 delete_insn (far_branch_list->far_label);
6682 far_branch_list = far_branch_list->prev;
6683 }
6684
6685 /* Instruction length information is no longer valid due to the new
6686 instructions that have been generated. */
6687 init_insn_lengths ();
6688 }
6689
6690 /* Dump out instruction addresses, which is useful for debugging the
6691 constant pool table stuff.
6692
6693 If relaxing, output the label and pseudo-ops used to link together
6694 calls and the instruction which set the registers.
6695
6696 ??? The addresses printed by this routine for insns are nonsense for
6697 insns which are inside of a sequence where none of the inner insns have
6698 variable length. This is because the second pass of shorten_branches
6699 does not bother to update them. */
6700 void
6701 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6702 int noperands ATTRIBUTE_UNUSED)
6703 {
6704 if (TARGET_DUMPISIZE)
6705 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6706
6707 if (TARGET_RELAX)
6708 {
6709 rtx note;
6710
6711 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6712 if (note)
6713 {
6714 rtx pattern;
6715
6716 pattern = PATTERN (insn);
6717 if (GET_CODE (pattern) == PARALLEL)
6718 pattern = XVECEXP (pattern, 0, 0);
6719 switch (GET_CODE (pattern))
6720 {
6721 case SET:
6722 if (GET_CODE (SET_SRC (pattern)) != CALL
6723 && get_attr_type (insn) != TYPE_SFUNC)
6724 {
6725 targetm.asm_out.internal_label
6726 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6727 break;
6728 }
6729 /* else FALLTHROUGH */
6730 case CALL:
6731 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6732 CODE_LABEL_NUMBER (XEXP (note, 0)));
6733 break;
6734
6735 default:
6736 gcc_unreachable ();
6737 }
6738 }
6739 }
6740 }
6741
6742 /* Dump out any constants accumulated in the final pass. These will
6743 only be labels. */
6744 const char *
6745 output_jump_label_table (void)
6746 {
6747 int i;
6748
6749 if (pool_size)
6750 {
6751 fprintf (asm_out_file, "\t.align 2\n");
6752 for (i = 0; i < pool_size; i++)
6753 {
6754 pool_node *p = &pool_vector[i];
6755
6756 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6757 CODE_LABEL_NUMBER (p->label));
6758 output_asm_insn (".long %O0", &p->value);
6759 }
6760 pool_size = 0;
6761 }
6762
6763 return "";
6764 }
6765 \f
6766 /* A full frame looks like:
6767
6768 arg-5
6769 arg-4
6770 [ if current_function_anonymous_args
6771 arg-3
6772 arg-2
6773 arg-1
6774 arg-0 ]
6775 saved-fp
6776 saved-r10
6777 saved-r11
6778 saved-r12
6779 saved-pr
6780 local-n
6781 ..
6782 local-1
6783 local-0 <- fp points here.
6784
6785 Number of bytes pushed for anonymous args, used to pass information
6786 between expand_prologue and expand_epilogue.
6787
6788 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6789 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6790 for an epilogue and a negative value means that it's for a sibcall
6791 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6792 all the registers that are about to be restored, and hence dead. */
6793 static void
6794 output_stack_adjust (int size, rtx reg, int epilogue_p,
6795 HARD_REG_SET *live_regs_mask, bool frame_p)
6796 {
6797 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6798 if (size)
6799 {
6800 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6801
6802 /* This test is bogus, as output_stack_adjust is used to re-align the
6803 stack. */
6804 #if 0
6805 gcc_assert (!(size % align));
6806 #endif
6807
6808 if (CONST_OK_FOR_ADD (size))
6809 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6810 /* Try to do it with two partial adjustments; however, we must make
6811 sure that the stack is properly aligned at all times, in case
6812 an interrupt occurs between the two partial adjustments. */
6813 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6814 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6815 {
6816 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6817 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6818 }
6819 else
6820 {
6821 rtx const_reg;
6822 rtx insn;
6823 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6824 int i;
6825
6826 /* If TEMP is invalid, we could temporarily save a general
6827 register to MACL. However, there is currently no need
6828 to handle this case, so just die when we see it. */
6829 if (epilogue_p < 0
6830 || current_function_interrupt
6831 || ! call_really_used_regs[temp] || fixed_regs[temp])
6832 temp = -1;
6833 if (temp < 0 && ! current_function_interrupt
6834 && (TARGET_SHMEDIA || epilogue_p >= 0))
6835 {
6836 HARD_REG_SET temps;
6837 COPY_HARD_REG_SET (temps, call_used_reg_set);
6838 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6839 if (epilogue_p > 0)
6840 {
6841 int nreg = 0;
6842 if (crtl->return_rtx)
6843 {
6844 enum machine_mode mode;
6845 mode = GET_MODE (crtl->return_rtx);
6846 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6847 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6848 }
6849 for (i = 0; i < nreg; i++)
6850 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6851 if (crtl->calls_eh_return)
6852 {
6853 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6854 for (i = 0; i <= 3; i++)
6855 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6856 }
6857 }
6858 if (TARGET_SHMEDIA && epilogue_p < 0)
6859 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6860 CLEAR_HARD_REG_BIT (temps, i);
6861 if (epilogue_p <= 0)
6862 {
6863 for (i = FIRST_PARM_REG;
6864 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6865 CLEAR_HARD_REG_BIT (temps, i);
6866 if (cfun->static_chain_decl != NULL)
6867 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6868 }
6869 temp = scavenge_reg (&temps);
6870 }
6871 if (temp < 0 && live_regs_mask)
6872 {
6873 HARD_REG_SET temps;
6874
6875 COPY_HARD_REG_SET (temps, *live_regs_mask);
6876 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6877 temp = scavenge_reg (&temps);
6878 }
6879 if (temp < 0)
6880 {
6881 rtx adj_reg, tmp_reg, mem;
6882
6883 /* If we reached here, the most likely case is the (sibcall)
6884 epilogue for non SHmedia. Put a special push/pop sequence
6885 for such case as the last resort. This looks lengthy but
6886 would not be problem because it seems to be very
6887 rare. */
6888
6889 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6890
6891
6892 /* ??? There is still the slight possibility that r4 or
6893 r5 have been reserved as fixed registers or assigned
6894 as global registers, and they change during an
6895 interrupt. There are possible ways to handle this:
6896
6897 - If we are adjusting the frame pointer (r14), we can do
6898 with a single temp register and an ordinary push / pop
6899 on the stack.
6900 - Grab any call-used or call-saved registers (i.e. not
6901 fixed or globals) for the temps we need. We might
6902 also grab r14 if we are adjusting the stack pointer.
6903 If we can't find enough available registers, issue
6904 a diagnostic and die - the user must have reserved
6905 way too many registers.
6906 But since all this is rather unlikely to happen and
6907 would require extra testing, we just die if r4 / r5
6908 are not available. */
6909 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6910 && !global_regs[4] && !global_regs[5]);
6911
6912 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6913 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6914 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6915 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6916 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6917 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6918 emit_move_insn (mem, tmp_reg);
6919 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6920 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6921 emit_move_insn (mem, tmp_reg);
6922 emit_move_insn (reg, adj_reg);
6923 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6924 emit_move_insn (adj_reg, mem);
6925 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6926 emit_move_insn (tmp_reg, mem);
6927 /* Tell flow the insns that pop r4/r5 aren't dead. */
6928 emit_use (tmp_reg);
6929 emit_use (adj_reg);
6930 return;
6931 }
6932 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6933
6934 /* If SIZE is negative, subtract the positive value.
6935 This sometimes allows a constant pool entry to be shared
6936 between prologue and epilogue code. */
6937 if (size < 0)
6938 {
6939 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6940 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6941 }
6942 else
6943 {
6944 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6945 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6946 }
6947 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6948 gen_rtx_SET (VOIDmode, reg,
6949 gen_rtx_PLUS (SImode, reg,
6950 GEN_INT (size))));
6951 }
6952 }
6953 }
6954
6955 /* Emit the specified insn and mark it as frame related.
6956 FIXME: Rename this to emit_frame_insn. */
6957 static rtx_insn *
6958 frame_insn (rtx x)
6959 {
6960 rtx_insn *insn = emit_insn (x);
6961 RTX_FRAME_RELATED_P (insn) = 1;
6962 return insn;
6963 }
6964
6965 /* Output RTL to push register RN onto the stack. */
6966 static rtx
6967 push (int rn)
6968 {
6969 rtx x;
6970 if (rn == FPUL_REG)
6971 x = gen_push_fpul ();
6972 else if (rn == FPSCR_REG)
6973 x = gen_push_fpscr ();
6974 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6975 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6976 {
6977 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6978 return NULL_RTX;
6979 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6980 }
6981 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6982 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6983 else
6984 x = gen_push (gen_rtx_REG (SImode, rn));
6985
6986 x = frame_insn (x);
6987 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6988 return x;
6989 }
6990
6991 /* Output RTL to pop register RN from the stack. */
6992 static void
6993 pop (int rn)
6994 {
6995 rtx x, sp_reg, reg;
6996 if (rn == FPUL_REG)
6997 x = gen_pop_fpul ();
6998 else if (rn == FPSCR_REG)
6999 x = gen_pop_fpscr ();
7000 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7001 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7002 {
7003 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7004 return;
7005 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7006 }
7007 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7008 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7009 else
7010 x = gen_pop (gen_rtx_REG (SImode, rn));
7011
7012 x = emit_insn (x);
7013
7014 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7015 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7016 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7017 : SET_DEST (PATTERN (x)));
7018 add_reg_note (x, REG_CFA_RESTORE, reg);
7019 add_reg_note (x, REG_CFA_ADJUST_CFA,
7020 gen_rtx_SET (SImode, sp_reg,
7021 plus_constant (SImode, sp_reg,
7022 GET_MODE_SIZE (GET_MODE (reg)))));
7023 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7024 RTX_FRAME_RELATED_P (x) = 1;
7025 }
7026
7027 /* Generate code to push the regs specified in the mask. */
7028 static void
7029 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7030 {
7031 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7032 int skip_fpscr = 0;
7033
7034 /* Push PR last; this gives better latencies after the prologue, and
7035 candidates for the return delay slot when there are no general
7036 registers pushed. */
7037 for (; i < FIRST_PSEUDO_REGISTER; i++)
7038 {
7039 /* If this is an interrupt handler, and the SZ bit varies,
7040 and we have to push any floating point register, we need
7041 to switch to the correct precision first. */
7042 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7043 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7044 {
7045 HARD_REG_SET unsaved;
7046
7047 push (FPSCR_REG);
7048 COMPL_HARD_REG_SET (unsaved, *mask);
7049 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7050 skip_fpscr = 1;
7051 }
7052 if (i != PR_REG
7053 && (i != FPSCR_REG || ! skip_fpscr)
7054 && TEST_HARD_REG_BIT (*mask, i))
7055 {
7056 /* If the ISR has RESBANK attribute assigned, don't push any of
7057 the following registers - R0-R14, MACH, MACL and GBR. */
7058 if (! (sh_cfun_resbank_handler_p ()
7059 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7060 || i == MACH_REG
7061 || i == MACL_REG
7062 || i == GBR_REG)))
7063 push (i);
7064 }
7065 }
7066
7067 /* Push banked registers last to improve delay slot opportunities. */
7068 if (interrupt_handler)
7069 {
7070 bool use_movml = false;
7071
7072 if (TARGET_SH2A)
7073 {
7074 unsigned int count = 0;
7075
7076 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7077 if (TEST_HARD_REG_BIT (*mask, i))
7078 count++;
7079 else
7080 break;
7081
7082 /* Use movml when all banked registers are pushed. */
7083 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7084 use_movml = true;
7085 }
7086
7087 if (sh_cfun_resbank_handler_p ())
7088 ; /* Do nothing. */
7089 else if (use_movml)
7090 {
7091 rtx x, mem, reg, set;
7092 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7093
7094 /* We must avoid scheduling multiple store insn with another
7095 insns. */
7096 emit_insn (gen_blockage ());
7097 x = gen_movml_push_banked (sp_reg);
7098 x = frame_insn (x);
7099 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7100 {
7101 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7102 reg = gen_rtx_REG (SImode, i);
7103 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7104 }
7105
7106 set = gen_rtx_SET (SImode, sp_reg,
7107 plus_constant (Pmode, sp_reg, - 32));
7108 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7109 emit_insn (gen_blockage ());
7110 }
7111 else
7112 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7113 if (TEST_HARD_REG_BIT (*mask, i))
7114 push (i);
7115 }
7116
7117 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7118 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7119 push (PR_REG);
7120 }
7121
7122 /* Calculate how much extra space is needed to save all callee-saved
7123 target registers.
7124 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7125 static int
7126 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7127 {
7128 int reg;
7129 int stack_space = 0;
7130 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7131
7132 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7133 if ((! call_really_used_regs[reg] || interrupt_handler)
7134 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7135 /* Leave space to save this target register on the stack,
7136 in case target register allocation wants to use it. */
7137 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7138 return stack_space;
7139 }
7140
7141 /* Decide whether we should reserve space for callee-save target registers,
7142 in case target register allocation wants to use them. REGS_SAVED is
7143 the space, in bytes, that is already required for register saves.
7144 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7145 static int
7146 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7147 HARD_REG_SET *live_regs_mask)
7148 {
7149 if (optimize_size)
7150 return 0;
7151 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7152 }
7153
7154 /* Decide how much space to reserve for callee-save target registers
7155 in case target register allocation wants to use them.
7156 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7157 static int
7158 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7159 {
7160 if (shmedia_space_reserved_for_target_registers)
7161 return shmedia_target_regs_stack_space (live_regs_mask);
7162 else
7163 return 0;
7164 }
7165
7166 /* Work out the registers which need to be saved, both as a mask and a
7167 count of saved words. Return the count.
7168
7169 If doing a pragma interrupt function, then push all regs used by the
7170 function, and if we call another function (we can tell by looking at PR),
7171 make sure that all the regs it clobbers are safe too. */
7172 static int
7173 calc_live_regs (HARD_REG_SET *live_regs_mask)
7174 {
7175 unsigned int reg;
7176 int count;
7177 tree attrs;
7178 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7179 bool nosave_low_regs;
7180 int pr_live, has_call;
7181
7182 attrs = DECL_ATTRIBUTES (current_function_decl);
7183 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7184 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7185 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7186 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7187
7188 CLEAR_HARD_REG_SET (*live_regs_mask);
7189 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7190 && df_regs_ever_live_p (FPSCR_REG))
7191 target_flags &= ~MASK_FPU_SINGLE;
7192 /* If we can save a lot of saves by switching to double mode, do that. */
7193 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7194 && TARGET_FPU_SINGLE)
7195 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7196 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7197 && (! call_really_used_regs[reg]
7198 || interrupt_handler)
7199 && ++count > 2)
7200 {
7201 target_flags &= ~MASK_FPU_SINGLE;
7202 break;
7203 }
7204 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7205 knows how to use it. That means the pseudo originally allocated for
7206 the initial value can become the PR_MEDIA_REG hard register, as seen for
7207 execute/20010122-1.c:test9. */
7208 if (TARGET_SHMEDIA)
7209 /* ??? this function is called from initial_elimination_offset, hence we
7210 can't use the result of sh_media_register_for_return here. */
7211 pr_live = sh_pr_n_sets ();
7212 else
7213 {
7214 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7215 pr_live = (pr_initial
7216 ? (!REG_P (pr_initial)
7217 || REGNO (pr_initial) != (PR_REG))
7218 : df_regs_ever_live_p (PR_REG));
7219 /* For Shcompact, if not optimizing, we end up with a memory reference
7220 using the return address pointer for __builtin_return_address even
7221 though there is no actual need to put the PR register on the stack. */
7222 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7223 }
7224 /* Force PR to be live if the prologue has to call the SHmedia
7225 argument decoder or register saver. */
7226 if (TARGET_SHCOMPACT
7227 && ((crtl->args.info.call_cookie
7228 & ~ CALL_COOKIE_RET_TRAMP (1))
7229 || crtl->saves_all_registers))
7230 pr_live = 1;
7231 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7232 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7233 {
7234 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7235 ? pr_live
7236 : interrupt_handler
7237 ? (/* Need to save all the regs ever live. */
7238 (df_regs_ever_live_p (reg)
7239 || (call_really_used_regs[reg]
7240 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7241 || reg == PIC_OFFSET_TABLE_REGNUM)
7242 && has_call)
7243 || (TARGET_SHMEDIA && has_call
7244 && REGISTER_NATURAL_MODE (reg) == SImode
7245 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7246 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7247 && reg != RETURN_ADDRESS_POINTER_REGNUM
7248 && reg != T_REG && reg != GBR_REG
7249 /* Push fpscr only on targets which have FPU */
7250 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7251 : (/* Only push those regs which are used and need to be saved. */
7252 (TARGET_SHCOMPACT
7253 && flag_pic
7254 && crtl->args.info.call_cookie
7255 && reg == PIC_OFFSET_TABLE_REGNUM)
7256 || (df_regs_ever_live_p (reg)
7257 && ((!call_really_used_regs[reg]
7258 && !(reg != PIC_OFFSET_TABLE_REGNUM
7259 && fixed_regs[reg] && call_used_regs[reg]))
7260 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7261 || (crtl->calls_eh_return
7262 && (reg == EH_RETURN_DATA_REGNO (0)
7263 || reg == EH_RETURN_DATA_REGNO (1)
7264 || reg == EH_RETURN_DATA_REGNO (2)
7265 || reg == EH_RETURN_DATA_REGNO (3)))
7266 || ((reg == MACL_REG || reg == MACH_REG)
7267 && df_regs_ever_live_p (reg)
7268 && sh_cfun_attr_renesas_p ())
7269 ))
7270 {
7271 SET_HARD_REG_BIT (*live_regs_mask, reg);
7272 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7273
7274 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7275 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7276 {
7277 if (FP_REGISTER_P (reg))
7278 {
7279 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7280 {
7281 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7282 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7283 }
7284 }
7285 else if (XD_REGISTER_P (reg))
7286 {
7287 /* Must switch to double mode to access these registers. */
7288 target_flags &= ~MASK_FPU_SINGLE;
7289 }
7290 }
7291 }
7292 if (nosave_low_regs && reg == R8_REG)
7293 break;
7294 }
7295 /* If we have a target register optimization pass after prologue / epilogue
7296 threading, we need to assume all target registers will be live even if
7297 they aren't now. */
7298 if (flag_branch_target_load_optimize2
7299 && TARGET_SAVE_ALL_TARGET_REGS
7300 && shmedia_space_reserved_for_target_registers)
7301 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7302 if ((! call_really_used_regs[reg] || interrupt_handler)
7303 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7304 {
7305 SET_HARD_REG_BIT (*live_regs_mask, reg);
7306 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7307 }
7308 /* If this is an interrupt handler, we don't have any call-clobbered
7309 registers we can conveniently use for target register save/restore.
7310 Make sure we save at least one general purpose register when we need
7311 to save target registers. */
7312 if (interrupt_handler
7313 && hard_reg_set_intersect_p (*live_regs_mask,
7314 reg_class_contents[TARGET_REGS])
7315 && ! hard_reg_set_intersect_p (*live_regs_mask,
7316 reg_class_contents[GENERAL_REGS]))
7317 {
7318 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7319 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7320 }
7321
7322 return count;
7323 }
7324
7325 /* Code to generate prologue and epilogue sequences */
7326
7327 /* PUSHED is the number of bytes that are being pushed on the
7328 stack for register saves. Return the frame size, padded
7329 appropriately so that the stack stays properly aligned. */
7330 static HOST_WIDE_INT
7331 rounded_frame_size (int pushed)
7332 {
7333 HOST_WIDE_INT size = get_frame_size ();
7334 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7335
7336 if (ACCUMULATE_OUTGOING_ARGS)
7337 size += crtl->outgoing_args_size;
7338
7339 return ((size + pushed + align - 1) & -align) - pushed;
7340 }
7341
7342 /* Choose a call-clobbered target-branch register that remains
7343 unchanged along the whole function. We set it up as the return
7344 value in the prologue. */
7345 int
7346 sh_media_register_for_return (void)
7347 {
7348 int regno;
7349 int tr0_used;
7350
7351 if (! crtl->is_leaf)
7352 return -1;
7353 if (lookup_attribute ("interrupt_handler",
7354 DECL_ATTRIBUTES (current_function_decl)))
7355 return -1;
7356 if (sh_cfun_interrupt_handler_p ())
7357 return -1;
7358
7359 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7360
7361 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7362 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7363 return regno;
7364
7365 return -1;
7366 }
7367
7368 /* The maximum registers we need to save are:
7369 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7370 - 32 floating point registers (for each pair, we save none,
7371 one single precision value, or a double precision value).
7372 - 8 target registers
7373 - add 1 entry for a delimiter. */
7374 #define MAX_SAVED_REGS (62+32+8)
7375
7376 typedef struct save_entry_s
7377 {
7378 unsigned char reg;
7379 unsigned char mode;
7380 short offset;
7381 } save_entry;
7382
7383 #define MAX_TEMPS 4
7384
7385 /* There will be a delimiter entry with VOIDmode both at the start and the
7386 end of a filled in schedule. The end delimiter has the offset of the
7387 save with the smallest (i.e. most negative) offset. */
7388 typedef struct save_schedule_s
7389 {
7390 save_entry entries[MAX_SAVED_REGS + 2];
7391 int temps[MAX_TEMPS+1];
7392 } save_schedule;
7393
7394 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7395 use reverse order. Returns the last entry written to (not counting
7396 the delimiter). OFFSET_BASE is a number to be added to all offset
7397 entries. */
7398 static save_entry *
7399 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7400 int offset_base)
7401 {
7402 int align, i;
7403 save_entry *entry = schedule->entries;
7404 int tmpx = 0;
7405 int offset;
7406
7407 if (! current_function_interrupt)
7408 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7409 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7410 && ! FUNCTION_ARG_REGNO_P (i)
7411 && i != FIRST_RET_REG
7412 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7413 && ! (crtl->calls_eh_return
7414 && (i == EH_RETURN_STACKADJ_REGNO
7415 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7416 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7417 schedule->temps[tmpx++] = i;
7418 entry->reg = -1;
7419 entry->mode = VOIDmode;
7420 entry->offset = offset_base;
7421 entry++;
7422 /* We loop twice: first, we save 8-byte aligned registers in the
7423 higher addresses, that are known to be aligned. Then, we
7424 proceed to saving 32-bit registers that don't need 8-byte
7425 alignment.
7426 If this is an interrupt function, all registers that need saving
7427 need to be saved in full. moreover, we need to postpone saving
7428 target registers till we have saved some general purpose registers
7429 we can then use as scratch registers. */
7430 offset = offset_base;
7431 for (align = 1; align >= 0; align--)
7432 {
7433 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7434 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7435 {
7436 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7437 int reg = i;
7438
7439 if (current_function_interrupt)
7440 {
7441 if (TARGET_REGISTER_P (i))
7442 continue;
7443 if (GENERAL_REGISTER_P (i))
7444 mode = DImode;
7445 }
7446 if (mode == SFmode && (i % 2) == 1
7447 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7448 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7449 {
7450 mode = DFmode;
7451 i--;
7452 reg--;
7453 }
7454
7455 /* If we're doing the aligned pass and this is not aligned,
7456 or we're doing the unaligned pass and this is aligned,
7457 skip it. */
7458 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7459 != align)
7460 continue;
7461
7462 if (current_function_interrupt
7463 && GENERAL_REGISTER_P (i)
7464 && tmpx < MAX_TEMPS)
7465 schedule->temps[tmpx++] = i;
7466
7467 offset -= GET_MODE_SIZE (mode);
7468 entry->reg = i;
7469 entry->mode = mode;
7470 entry->offset = offset;
7471 entry++;
7472 }
7473 if (align && current_function_interrupt)
7474 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7475 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7476 {
7477 offset -= GET_MODE_SIZE (DImode);
7478 entry->reg = i;
7479 entry->mode = DImode;
7480 entry->offset = offset;
7481 entry++;
7482 }
7483 }
7484 entry->reg = -1;
7485 entry->mode = VOIDmode;
7486 entry->offset = offset;
7487 schedule->temps[tmpx] = -1;
7488 return entry - 1;
7489 }
7490
7491 /* Expand code for the function prologue. */
7492 void
7493 sh_expand_prologue (void)
7494 {
7495 HARD_REG_SET live_regs_mask;
7496 int d, i;
7497 int d_rounding = 0;
7498 int save_flags = target_flags;
7499 int pretend_args;
7500 int stack_usage;
7501 tree sp_switch_attr
7502 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7503
7504 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7505
7506 /* We have pretend args if we had an object sent partially in registers
7507 and partially on the stack, e.g. a large structure. */
7508 pretend_args = crtl->args.pretend_args_size;
7509 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7510 && (NPARM_REGS(SImode)
7511 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7512 pretend_args = 0;
7513
7514 output_stack_adjust (-pretend_args
7515 - crtl->args.info.stack_regs * 8,
7516 stack_pointer_rtx, 0, NULL, true);
7517 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7518
7519 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7520 /* We're going to use the PIC register to load the address of the
7521 incoming-argument decoder and/or of the return trampoline from
7522 the GOT, so make sure the PIC register is preserved and
7523 initialized. */
7524 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7525
7526 if (TARGET_SHCOMPACT
7527 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7528 {
7529 int reg;
7530
7531 /* First, make all registers with incoming arguments that will
7532 be pushed onto the stack live, so that register renaming
7533 doesn't overwrite them. */
7534 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7535 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7536 >= NPARM_REGS (SImode) - reg)
7537 for (; reg < NPARM_REGS (SImode); reg++)
7538 emit_insn (gen_shcompact_preserve_incoming_args
7539 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7540 else if (CALL_COOKIE_INT_REG_GET
7541 (crtl->args.info.call_cookie, reg) == 1)
7542 emit_insn (gen_shcompact_preserve_incoming_args
7543 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7544
7545 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7546 stack_pointer_rtx);
7547 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7548 GEN_INT (crtl->args.info.call_cookie));
7549 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7550 gen_rtx_REG (SImode, R0_REG));
7551 }
7552 else if (TARGET_SHMEDIA)
7553 {
7554 int tr = sh_media_register_for_return ();
7555
7556 if (tr >= 0)
7557 emit_move_insn (gen_rtx_REG (DImode, tr),
7558 gen_rtx_REG (DImode, PR_MEDIA_REG));
7559 }
7560
7561 /* Emit the code for SETUP_VARARGS. */
7562 if (cfun->stdarg)
7563 {
7564 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7565 {
7566 /* Push arg regs as if they'd been provided by caller in stack. */
7567 for (i = 0; i < NPARM_REGS(SImode); i++)
7568 {
7569 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7570
7571 if (i >= (NPARM_REGS(SImode)
7572 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7573 ))
7574 break;
7575 push (rn);
7576 stack_usage += GET_MODE_SIZE (SImode);
7577 }
7578 }
7579 }
7580
7581 /* If we're supposed to switch stacks at function entry, do so now. */
7582 if (sp_switch_attr)
7583 {
7584 rtx lab, newsrc;
7585 /* The argument specifies a variable holding the address of the
7586 stack the interrupt function should switch to/from at entry/exit. */
7587 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7588 const char *s
7589 = ggc_strdup (TREE_STRING_POINTER (arg));
7590 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7591
7592 lab = add_constant (sp_switch, SImode, 0);
7593 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7594
7595 emit_insn (gen_sp_switch_1 (newsrc));
7596 }
7597
7598 d = calc_live_regs (&live_regs_mask);
7599 /* ??? Maybe we could save some switching if we can move a mode switch
7600 that already happens to be at the function start into the prologue. */
7601 if (target_flags != save_flags && ! current_function_interrupt)
7602 emit_insn (gen_toggle_sz ());
7603
7604 if (TARGET_SH5)
7605 {
7606 int offset_base, offset;
7607 rtx r0 = NULL_RTX;
7608 int offset_in_r0 = -1;
7609 int sp_in_r0 = 0;
7610 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7611 int total_size, save_size;
7612 save_schedule schedule;
7613 save_entry *entry;
7614 int *tmp_pnt;
7615
7616 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7617 && ! current_function_interrupt)
7618 r0 = gen_rtx_REG (Pmode, R0_REG);
7619
7620 /* D is the actual number of bytes that we need for saving registers,
7621 however, in initial_elimination_offset we have committed to using
7622 an additional TREGS_SPACE amount of bytes - in order to keep both
7623 addresses to arguments supplied by the caller and local variables
7624 valid, we must keep this gap. Place it between the incoming
7625 arguments and the actually saved registers in a bid to optimize
7626 locality of reference. */
7627 total_size = d + tregs_space;
7628 total_size += rounded_frame_size (total_size);
7629 save_size = total_size - rounded_frame_size (d);
7630 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7631 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7632 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7633
7634 /* If adjusting the stack in a single step costs nothing extra, do so.
7635 I.e. either if a single addi is enough, or we need a movi anyway,
7636 and we don't exceed the maximum offset range (the test for the
7637 latter is conservative for simplicity). */
7638 if (TARGET_SHMEDIA
7639 && (CONST_OK_FOR_I10 (-total_size)
7640 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7641 && total_size <= 2044)))
7642 d_rounding = total_size - save_size;
7643
7644 offset_base = d + d_rounding;
7645
7646 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7647 0, NULL, true);
7648 stack_usage += save_size + d_rounding;
7649
7650 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7651 tmp_pnt = schedule.temps;
7652 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7653 {
7654 enum machine_mode mode = (enum machine_mode) entry->mode;
7655 unsigned int reg = entry->reg;
7656 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7657 rtx orig_reg_rtx;
7658
7659 offset = entry->offset;
7660
7661 reg_rtx = gen_rtx_REG (mode, reg);
7662
7663 mem_rtx = gen_frame_mem (mode,
7664 gen_rtx_PLUS (Pmode,
7665 stack_pointer_rtx,
7666 GEN_INT (offset)));
7667
7668 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7669 {
7670 gcc_assert (r0);
7671 mem_rtx = NULL_RTX;
7672 }
7673
7674 if (HAVE_PRE_DECREMENT
7675 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7676 || mem_rtx == NULL_RTX
7677 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7678 {
7679 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7680
7681 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7682 pre_dec = NULL_RTX;
7683 else
7684 {
7685 mem_rtx = NULL_RTX;
7686 offset += GET_MODE_SIZE (mode);
7687 }
7688 }
7689
7690 if (mem_rtx != NULL_RTX)
7691 goto addr_ok;
7692
7693 if (offset_in_r0 == -1)
7694 {
7695 emit_move_insn (r0, GEN_INT (offset));
7696 offset_in_r0 = offset;
7697 }
7698 else if (offset != offset_in_r0)
7699 {
7700 emit_move_insn (r0,
7701 gen_rtx_PLUS
7702 (Pmode, r0,
7703 GEN_INT (offset - offset_in_r0)));
7704 offset_in_r0 += offset - offset_in_r0;
7705 }
7706
7707 if (pre_dec != NULL_RTX)
7708 {
7709 if (! sp_in_r0)
7710 {
7711 emit_move_insn (r0,
7712 gen_rtx_PLUS
7713 (Pmode, r0, stack_pointer_rtx));
7714 sp_in_r0 = 1;
7715 }
7716
7717 offset -= GET_MODE_SIZE (mode);
7718 offset_in_r0 -= GET_MODE_SIZE (mode);
7719
7720 mem_rtx = pre_dec;
7721 }
7722 else if (sp_in_r0)
7723 mem_rtx = gen_frame_mem (mode, r0);
7724 else
7725 mem_rtx = gen_frame_mem (mode,
7726 gen_rtx_PLUS (Pmode,
7727 stack_pointer_rtx,
7728 r0));
7729
7730 /* We must not use an r0-based address for target-branch
7731 registers or for special registers without pre-dec
7732 memory addresses, since we store their values in r0
7733 first. */
7734 gcc_assert (!TARGET_REGISTER_P (reg)
7735 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7736 || mem_rtx == pre_dec));
7737
7738 addr_ok:
7739 orig_reg_rtx = reg_rtx;
7740 if (TARGET_REGISTER_P (reg)
7741 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7742 && mem_rtx != pre_dec))
7743 {
7744 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7745
7746 emit_move_insn (tmp_reg, reg_rtx);
7747
7748 if (REGNO (tmp_reg) == R0_REG)
7749 {
7750 offset_in_r0 = -1;
7751 sp_in_r0 = 0;
7752 gcc_assert (!refers_to_regno_p
7753 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7754 }
7755
7756 if (*++tmp_pnt <= 0)
7757 tmp_pnt = schedule.temps;
7758
7759 reg_rtx = tmp_reg;
7760 }
7761 {
7762 rtx insn;
7763
7764 /* Mark as interesting for dwarf cfi generator */
7765 insn = emit_move_insn (mem_rtx, reg_rtx);
7766 RTX_FRAME_RELATED_P (insn) = 1;
7767 /* If we use an intermediate register for the save, we can't
7768 describe this exactly in cfi as a copy of the to-be-saved
7769 register into the temporary register and then the temporary
7770 register on the stack, because the temporary register can
7771 have a different natural size than the to-be-saved register.
7772 Thus, we gloss over the intermediate copy and pretend we do
7773 a direct save from the to-be-saved register. */
7774 if (REGNO (reg_rtx) != reg)
7775 {
7776 rtx set;
7777
7778 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7779 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7780 }
7781
7782 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7783 {
7784 rtx reg_rtx = gen_rtx_REG (mode, reg);
7785 rtx set;
7786 rtx mem_rtx = gen_frame_mem (mode,
7787 gen_rtx_PLUS (Pmode,
7788 stack_pointer_rtx,
7789 GEN_INT (offset)));
7790
7791 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7792 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7793 }
7794 }
7795 }
7796
7797 gcc_assert (entry->offset == d_rounding);
7798 }
7799 else
7800 {
7801 push_regs (&live_regs_mask, current_function_interrupt);
7802 stack_usage += d;
7803 }
7804
7805 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7806 emit_insn (gen_GOTaddr2picreg ());
7807
7808 if (SHMEDIA_REGS_STACK_ADJUST ())
7809 {
7810 /* This must NOT go through the PLT, otherwise mach and macl
7811 may be clobbered. */
7812 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7813 (TARGET_FPU_ANY
7814 ? "__GCC_push_shmedia_regs"
7815 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7816 emit_insn (gen_shmedia_save_restore_regs_compact
7817 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7818 }
7819
7820 if (target_flags != save_flags && ! current_function_interrupt)
7821 emit_insn (gen_toggle_sz ());
7822
7823 target_flags = save_flags;
7824
7825 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7826 stack_pointer_rtx, 0, NULL, true);
7827 stack_usage += rounded_frame_size (d) - d_rounding;
7828
7829 if (frame_pointer_needed)
7830 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7831
7832 if (TARGET_SHCOMPACT
7833 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7834 {
7835 /* This must NOT go through the PLT, otherwise mach and macl
7836 may be clobbered. */
7837 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7838 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7839 emit_insn (gen_shcompact_incoming_args ());
7840 }
7841
7842 /* If we are profiling, make sure no instructions are scheduled before
7843 the call to mcount. Similarly if some call instructions are swapped
7844 before frame related insns, it'll confuse the unwinder because
7845 currently SH has no unwind info for function epilogues. */
7846 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7847 emit_insn (gen_blockage ());
7848
7849 if (flag_stack_usage_info)
7850 current_function_static_stack_size = stack_usage;
7851 }
7852
7853 /* Expand code for the function epilogue. */
7854 void
7855 sh_expand_epilogue (bool sibcall_p)
7856 {
7857 HARD_REG_SET live_regs_mask;
7858 int d, i;
7859 int d_rounding = 0;
7860
7861 int save_flags = target_flags;
7862 int frame_size, save_size;
7863 int fpscr_deferred = 0;
7864 int e = sibcall_p ? -1 : 1;
7865
7866 d = calc_live_regs (&live_regs_mask);
7867
7868 save_size = d;
7869 frame_size = rounded_frame_size (d);
7870
7871 if (TARGET_SH5)
7872 {
7873 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7874 int total_size;
7875 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7876 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7877 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7878
7879 total_size = d + tregs_space;
7880 total_size += rounded_frame_size (total_size);
7881 save_size = total_size - frame_size;
7882
7883 /* If adjusting the stack in a single step costs nothing extra, do so.
7884 I.e. either if a single addi is enough, or we need a movi anyway,
7885 and we don't exceed the maximum offset range (the test for the
7886 latter is conservative for simplicity). */
7887 if (TARGET_SHMEDIA
7888 && ! frame_pointer_needed
7889 && (CONST_OK_FOR_I10 (total_size)
7890 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7891 && total_size <= 2044)))
7892 d_rounding = frame_size;
7893
7894 frame_size -= d_rounding;
7895 }
7896
7897 if (frame_pointer_needed)
7898 {
7899 /* We must avoid scheduling the epilogue with previous basic blocks.
7900 See PR/18032 and PR/40313. */
7901 emit_insn (gen_blockage ());
7902 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7903 &live_regs_mask, true);
7904
7905 /* We must avoid moving the stack pointer adjustment past code
7906 which reads from the local frame, else an interrupt could
7907 occur after the SP adjustment and clobber data in the local
7908 frame. */
7909 emit_insn (gen_blockage ());
7910 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7911 }
7912 else if (frame_size)
7913 {
7914 /* We must avoid moving the stack pointer adjustment past code
7915 which reads from the local frame, else an interrupt could
7916 occur after the SP adjustment and clobber data in the local
7917 frame. */
7918 emit_insn (gen_blockage ());
7919 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7920 &live_regs_mask, true);
7921 }
7922
7923 if (SHMEDIA_REGS_STACK_ADJUST ())
7924 {
7925 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7926 (TARGET_FPU_ANY
7927 ? "__GCC_pop_shmedia_regs"
7928 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7929 /* This must NOT go through the PLT, otherwise mach and macl
7930 may be clobbered. */
7931 emit_insn (gen_shmedia_save_restore_regs_compact
7932 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7933 }
7934
7935 /* Pop all the registers. */
7936
7937 if (target_flags != save_flags && ! current_function_interrupt)
7938 emit_insn (gen_toggle_sz ());
7939 if (TARGET_SH5)
7940 {
7941 int offset_base, offset;
7942 int offset_in_r0 = -1;
7943 int sp_in_r0 = 0;
7944 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7945 save_schedule schedule;
7946 save_entry *entry;
7947 int *tmp_pnt;
7948
7949 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7950 offset_base = -entry[1].offset + d_rounding;
7951 tmp_pnt = schedule.temps;
7952 for (; entry->mode != VOIDmode; entry--)
7953 {
7954 enum machine_mode mode = (enum machine_mode) entry->mode;
7955 int reg = entry->reg;
7956 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7957
7958 offset = offset_base + entry->offset;
7959 reg_rtx = gen_rtx_REG (mode, reg);
7960
7961 mem_rtx = gen_frame_mem (mode,
7962 gen_rtx_PLUS (Pmode,
7963 stack_pointer_rtx,
7964 GEN_INT (offset)));
7965
7966 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7967 mem_rtx = NULL_RTX;
7968
7969 if (HAVE_POST_INCREMENT
7970 && (offset == offset_in_r0
7971 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7972 && mem_rtx == NULL_RTX)
7973 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7974 {
7975 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7976
7977 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7978 post_inc = NULL_RTX;
7979 else
7980 mem_rtx = NULL_RTX;
7981 }
7982
7983 if (mem_rtx != NULL_RTX)
7984 goto addr_ok;
7985
7986 if (offset_in_r0 == -1)
7987 {
7988 emit_move_insn (r0, GEN_INT (offset));
7989 offset_in_r0 = offset;
7990 }
7991 else if (offset != offset_in_r0)
7992 {
7993 emit_move_insn (r0,
7994 gen_rtx_PLUS
7995 (Pmode, r0,
7996 GEN_INT (offset - offset_in_r0)));
7997 offset_in_r0 += offset - offset_in_r0;
7998 }
7999
8000 if (post_inc != NULL_RTX)
8001 {
8002 if (! sp_in_r0)
8003 {
8004 emit_move_insn (r0,
8005 gen_rtx_PLUS
8006 (Pmode, r0, stack_pointer_rtx));
8007 sp_in_r0 = 1;
8008 }
8009
8010 mem_rtx = post_inc;
8011
8012 offset_in_r0 += GET_MODE_SIZE (mode);
8013 }
8014 else if (sp_in_r0)
8015 mem_rtx = gen_frame_mem (mode, r0);
8016 else
8017 mem_rtx = gen_frame_mem (mode,
8018 gen_rtx_PLUS (Pmode,
8019 stack_pointer_rtx,
8020 r0));
8021
8022 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8023 || mem_rtx == post_inc);
8024
8025 addr_ok:
8026 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8027 && mem_rtx != post_inc)
8028 {
8029 emit_move_insn (r0, mem_rtx);
8030 mem_rtx = r0;
8031 }
8032 else if (TARGET_REGISTER_P (reg))
8033 {
8034 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8035
8036 /* Give the scheduler a bit of freedom by using up to
8037 MAX_TEMPS registers in a round-robin fashion. */
8038 emit_move_insn (tmp_reg, mem_rtx);
8039 mem_rtx = tmp_reg;
8040 if (*++tmp_pnt < 0)
8041 tmp_pnt = schedule.temps;
8042 }
8043
8044 emit_move_insn (reg_rtx, mem_rtx);
8045 }
8046
8047 gcc_assert (entry->offset + offset_base == d + d_rounding);
8048 }
8049 else /* ! TARGET_SH5 */
8050 {
8051 int last_reg;
8052
8053 save_size = 0;
8054 /* For an ISR with RESBANK attribute assigned, don't pop PR
8055 register. */
8056 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8057 && !sh_cfun_resbank_handler_p ())
8058 {
8059 if (!frame_pointer_needed)
8060 emit_insn (gen_blockage ());
8061 pop (PR_REG);
8062 }
8063
8064 /* Banked registers are popped first to avoid being scheduled in the
8065 delay slot. RTE switches banks before the ds instruction. */
8066 if (current_function_interrupt)
8067 {
8068 bool use_movml = false;
8069
8070 if (TARGET_SH2A)
8071 {
8072 unsigned int count = 0;
8073
8074 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8075 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8076 count++;
8077 else
8078 break;
8079
8080 /* Use movml when all banked register are poped. */
8081 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8082 use_movml = true;
8083 }
8084
8085 if (sh_cfun_resbank_handler_p ())
8086 ; /* Do nothing. */
8087 else if (use_movml)
8088 {
8089 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8090
8091 /* We must avoid scheduling multiple load insn with another
8092 insns. */
8093 emit_insn (gen_blockage ());
8094 emit_insn (gen_movml_pop_banked (sp_reg));
8095 emit_insn (gen_blockage ());
8096 }
8097 else
8098 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8099 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8100 pop (i);
8101
8102 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8103 }
8104 else
8105 last_reg = FIRST_PSEUDO_REGISTER;
8106
8107 for (i = 0; i < last_reg; i++)
8108 {
8109 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8110
8111 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8112 && hard_reg_set_intersect_p (live_regs_mask,
8113 reg_class_contents[DF_REGS]))
8114 fpscr_deferred = 1;
8115 /* For an ISR with RESBANK attribute assigned, don't pop
8116 following registers, R0-R14, MACH, MACL and GBR. */
8117 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8118 && ! (sh_cfun_resbank_handler_p ()
8119 && ((j >= FIRST_GENERAL_REG
8120 && j < LAST_GENERAL_REG)
8121 || j == MACH_REG
8122 || j == MACL_REG
8123 || j == GBR_REG)))
8124 pop (j);
8125
8126 if (j == FIRST_FP_REG && fpscr_deferred)
8127 pop (FPSCR_REG);
8128 }
8129 }
8130 if (target_flags != save_flags && ! current_function_interrupt)
8131 emit_insn (gen_toggle_sz ());
8132 target_flags = save_flags;
8133
8134 output_stack_adjust (crtl->args.pretend_args_size
8135 + save_size + d_rounding
8136 + crtl->args.info.stack_regs * 8,
8137 stack_pointer_rtx, e, NULL, true);
8138
8139 if (crtl->calls_eh_return)
8140 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8141 EH_RETURN_STACKADJ_RTX));
8142
8143 /* Switch back to the normal stack if necessary. */
8144 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8145 emit_insn (gen_sp_switch_2 ());
8146
8147 /* Tell flow the insn that pops PR isn't dead. */
8148 /* PR_REG will never be live in SHmedia mode, and we don't need to
8149 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8150 by the return pattern. */
8151 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8152 emit_use (gen_rtx_REG (SImode, PR_REG));
8153 }
8154
8155 /* Emit code to change the current function's return address to RA.
8156 TEMP is available as a scratch register, if needed. */
8157 void
8158 sh_set_return_address (rtx ra, rtx tmp)
8159 {
8160 HARD_REG_SET live_regs_mask;
8161 int d;
8162 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8163 int pr_offset;
8164
8165 d = calc_live_regs (&live_regs_mask);
8166
8167 /* If pr_reg isn't life, we can set it (or the register given in
8168 sh_media_register_for_return) directly. */
8169 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8170 {
8171 rtx rr;
8172
8173 if (TARGET_SHMEDIA)
8174 {
8175 int rr_regno = sh_media_register_for_return ();
8176
8177 if (rr_regno < 0)
8178 rr_regno = pr_reg;
8179
8180 rr = gen_rtx_REG (DImode, rr_regno);
8181 }
8182 else
8183 rr = gen_rtx_REG (SImode, pr_reg);
8184
8185 emit_insn (GEN_MOV (rr, ra));
8186 /* Tell flow the register for return isn't dead. */
8187 emit_use (rr);
8188 return;
8189 }
8190
8191 if (TARGET_SH5)
8192 {
8193 int offset;
8194 save_schedule schedule;
8195 save_entry *entry;
8196
8197 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8198 offset = entry[1].offset;
8199 for (; entry->mode != VOIDmode; entry--)
8200 if (entry->reg == pr_reg)
8201 goto found;
8202
8203 /* We can't find pr register. */
8204 gcc_unreachable ();
8205
8206 found:
8207 offset = entry->offset - offset;
8208 pr_offset = (rounded_frame_size (d) + offset
8209 + SHMEDIA_REGS_STACK_ADJUST ());
8210 }
8211 else
8212 pr_offset = rounded_frame_size (d);
8213
8214 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8215
8216 if (frame_pointer_needed)
8217 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8218 else
8219 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8220
8221 tmp = gen_frame_mem (Pmode, tmp);
8222 emit_insn (GEN_MOV (tmp, ra));
8223 /* Tell this store isn't dead. */
8224 emit_use (tmp);
8225 }
8226
8227 /* Clear variables at function end. */
8228 static void
8229 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8230 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8231 {
8232 }
8233
8234 static rtx
8235 sh_builtin_saveregs (void)
8236 {
8237 /* First unnamed integer register. */
8238 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8239 /* Number of integer registers we need to save. */
8240 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8241 /* First unnamed SFmode float reg */
8242 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8243 /* Number of SFmode float regs to save. */
8244 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8245 rtx regbuf, fpregs;
8246 int bufsize, regno;
8247 alias_set_type alias_set;
8248
8249 if (TARGET_SH5)
8250 {
8251 if (n_intregs)
8252 {
8253 int pushregs = n_intregs;
8254
8255 while (pushregs < NPARM_REGS (SImode) - 1
8256 && (CALL_COOKIE_INT_REG_GET
8257 (crtl->args.info.call_cookie,
8258 NPARM_REGS (SImode) - pushregs)
8259 == 1))
8260 {
8261 crtl->args.info.call_cookie
8262 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8263 - pushregs, 1);
8264 pushregs++;
8265 }
8266
8267 if (pushregs == NPARM_REGS (SImode))
8268 crtl->args.info.call_cookie
8269 |= (CALL_COOKIE_INT_REG (0, 1)
8270 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8271 else
8272 crtl->args.info.call_cookie
8273 |= CALL_COOKIE_STACKSEQ (pushregs);
8274
8275 crtl->args.pretend_args_size += 8 * n_intregs;
8276 }
8277 if (TARGET_SHCOMPACT)
8278 return const0_rtx;
8279 }
8280
8281 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8282 {
8283 error ("__builtin_saveregs not supported by this subtarget");
8284 return const0_rtx;
8285 }
8286
8287 if (TARGET_SHMEDIA)
8288 n_floatregs = 0;
8289
8290 /* Allocate block of memory for the regs. */
8291 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8292 Or can assign_stack_local accept a 0 SIZE argument? */
8293 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8294
8295 if (TARGET_SHMEDIA)
8296 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8297 else if (n_floatregs & 1)
8298 {
8299 rtx addr;
8300
8301 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8302 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8303 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8304 regbuf = change_address (regbuf, BLKmode, addr);
8305 }
8306 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8307 {
8308 rtx addr, mask;
8309
8310 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8311 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8312 XEXP (regbuf, 0), 4));
8313 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8314 emit_insn (gen_andsi3 (addr, addr, mask));
8315 regbuf = change_address (regbuf, BLKmode, addr);
8316 }
8317 else
8318 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8319 alias_set = get_varargs_alias_set ();
8320 set_mem_alias_set (regbuf, alias_set);
8321
8322 /* Save int args.
8323 This is optimized to only save the regs that are necessary. Explicitly
8324 named args need not be saved. */
8325 if (n_intregs > 0)
8326 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8327 adjust_address (regbuf, BLKmode,
8328 n_floatregs * UNITS_PER_WORD),
8329 n_intregs);
8330
8331 if (TARGET_SHMEDIA)
8332 /* Return the address of the regbuf. */
8333 return XEXP (regbuf, 0);
8334
8335 /* Save float args.
8336 This is optimized to only save the regs that are necessary. Explicitly
8337 named args need not be saved.
8338 We explicitly build a pointer to the buffer because it halves the insn
8339 count when not optimizing (otherwise the pointer is built for each reg
8340 saved).
8341 We emit the moves in reverse order so that we can use predecrement. */
8342
8343 fpregs = copy_to_mode_reg (Pmode,
8344 plus_constant (Pmode, XEXP (regbuf, 0),
8345 n_floatregs * UNITS_PER_WORD));
8346 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8347 {
8348 rtx mem;
8349 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8350 {
8351 emit_insn (gen_addsi3 (fpregs, fpregs,
8352 GEN_INT (-2 * UNITS_PER_WORD)));
8353 mem = change_address (regbuf, DFmode, fpregs);
8354 emit_move_insn (mem,
8355 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8356 }
8357 regno = first_floatreg;
8358 if (regno & 1)
8359 {
8360 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8361 mem = change_address (regbuf, SFmode, fpregs);
8362 emit_move_insn (mem,
8363 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8364 + regno - SH_REG_MSW_OFFSET));
8365 }
8366 }
8367 else
8368 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8369 {
8370 rtx mem;
8371
8372 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8373 mem = change_address (regbuf, SFmode, fpregs);
8374 emit_move_insn (mem,
8375 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8376 }
8377
8378 /* Return the address of the regbuf. */
8379 return XEXP (regbuf, 0);
8380 }
8381
8382 /* Define the `__builtin_va_list' type for the ABI. */
8383 static tree
8384 sh_build_builtin_va_list (void)
8385 {
8386 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8387 tree record, type_decl;
8388
8389 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8390 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8391 return ptr_type_node;
8392
8393 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8394 type_decl = build_decl (BUILTINS_LOCATION,
8395 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8396
8397 f_next_o = build_decl (BUILTINS_LOCATION,
8398 FIELD_DECL, get_identifier ("__va_next_o"),
8399 ptr_type_node);
8400 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8401 FIELD_DECL,
8402 get_identifier ("__va_next_o_limit"),
8403 ptr_type_node);
8404 f_next_fp = build_decl (BUILTINS_LOCATION,
8405 FIELD_DECL, get_identifier ("__va_next_fp"),
8406 ptr_type_node);
8407 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8408 FIELD_DECL,
8409 get_identifier ("__va_next_fp_limit"),
8410 ptr_type_node);
8411 f_next_stack = build_decl (BUILTINS_LOCATION,
8412 FIELD_DECL, get_identifier ("__va_next_stack"),
8413 ptr_type_node);
8414
8415 DECL_FIELD_CONTEXT (f_next_o) = record;
8416 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8417 DECL_FIELD_CONTEXT (f_next_fp) = record;
8418 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8419 DECL_FIELD_CONTEXT (f_next_stack) = record;
8420
8421 TYPE_STUB_DECL (record) = type_decl;
8422 TYPE_NAME (record) = type_decl;
8423 TYPE_FIELDS (record) = f_next_o;
8424 DECL_CHAIN (f_next_o) = f_next_o_limit;
8425 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8426 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8427 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8428
8429 layout_type (record);
8430
8431 return record;
8432 }
8433
8434 /* Implement `va_start' for varargs and stdarg. */
8435 static void
8436 sh_va_start (tree valist, rtx nextarg)
8437 {
8438 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8439 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8440 tree t, u;
8441 int nfp, nint;
8442
8443 if (TARGET_SH5)
8444 {
8445 expand_builtin_saveregs ();
8446 std_expand_builtin_va_start (valist, nextarg);
8447 return;
8448 }
8449
8450 if ((! TARGET_SH2E && ! TARGET_SH4)
8451 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8452 {
8453 std_expand_builtin_va_start (valist, nextarg);
8454 return;
8455 }
8456
8457 f_next_o = TYPE_FIELDS (va_list_type_node);
8458 f_next_o_limit = DECL_CHAIN (f_next_o);
8459 f_next_fp = DECL_CHAIN (f_next_o_limit);
8460 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8461 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8462
8463 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8464 NULL_TREE);
8465 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8466 valist, f_next_o_limit, NULL_TREE);
8467 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8468 NULL_TREE);
8469 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8470 valist, f_next_fp_limit, NULL_TREE);
8471 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8472 valist, f_next_stack, NULL_TREE);
8473
8474 /* Call __builtin_saveregs. */
8475 u = make_tree (sizetype, expand_builtin_saveregs ());
8476 u = fold_convert (ptr_type_node, u);
8477 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8478 TREE_SIDE_EFFECTS (t) = 1;
8479 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8480
8481 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8482 if (nfp < 8)
8483 nfp = 8 - nfp;
8484 else
8485 nfp = 0;
8486 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8487 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8488 TREE_SIDE_EFFECTS (t) = 1;
8489 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8490
8491 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8492 TREE_SIDE_EFFECTS (t) = 1;
8493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8494
8495 nint = crtl->args.info.arg_count[SH_ARG_INT];
8496 if (nint < 4)
8497 nint = 4 - nint;
8498 else
8499 nint = 0;
8500 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8501 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8502 TREE_SIDE_EFFECTS (t) = 1;
8503 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8504
8505 u = make_tree (ptr_type_node, nextarg);
8506 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8507 TREE_SIDE_EFFECTS (t) = 1;
8508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8509 }
8510
8511 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8512 member, return it. */
8513 static tree
8514 find_sole_member (tree type)
8515 {
8516 tree field, member = NULL_TREE;
8517
8518 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8519 {
8520 if (TREE_CODE (field) != FIELD_DECL)
8521 continue;
8522 if (!DECL_SIZE (field))
8523 return NULL_TREE;
8524 if (integer_zerop (DECL_SIZE (field)))
8525 continue;
8526 if (member)
8527 return NULL_TREE;
8528 member = field;
8529 }
8530 return member;
8531 }
8532
8533 /* Implement `va_arg'. */
8534 static tree
8535 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8536 gimple_seq *post_p ATTRIBUTE_UNUSED)
8537 {
8538 HOST_WIDE_INT size, rsize;
8539 tree tmp, pptr_type_node;
8540 tree addr, lab_over = NULL, result = NULL;
8541 bool pass_by_ref;
8542 tree eff_type;
8543
8544 if (!VOID_TYPE_P (type))
8545 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8546 else
8547 pass_by_ref = false;
8548
8549 if (pass_by_ref)
8550 type = build_pointer_type (type);
8551
8552 size = int_size_in_bytes (type);
8553 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8554 pptr_type_node = build_pointer_type (ptr_type_node);
8555
8556 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8557 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8558 {
8559 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8560 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8561 int pass_as_float;
8562 tree lab_false;
8563 tree member;
8564
8565 f_next_o = TYPE_FIELDS (va_list_type_node);
8566 f_next_o_limit = DECL_CHAIN (f_next_o);
8567 f_next_fp = DECL_CHAIN (f_next_o_limit);
8568 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8569 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8570
8571 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8572 NULL_TREE);
8573 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8574 valist, f_next_o_limit, NULL_TREE);
8575 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8576 valist, f_next_fp, NULL_TREE);
8577 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8578 valist, f_next_fp_limit, NULL_TREE);
8579 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8580 valist, f_next_stack, NULL_TREE);
8581
8582 /* Structures with a single member with a distinct mode are passed
8583 like their member. This is relevant if the latter has a REAL_TYPE
8584 or COMPLEX_TYPE type. */
8585 eff_type = type;
8586 while (TREE_CODE (eff_type) == RECORD_TYPE
8587 && (member = find_sole_member (eff_type))
8588 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8589 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8590 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8591 {
8592 tree field_type = TREE_TYPE (member);
8593
8594 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8595 eff_type = field_type;
8596 else
8597 {
8598 gcc_assert ((TYPE_ALIGN (eff_type)
8599 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8600 || (TYPE_ALIGN (eff_type)
8601 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8602 break;
8603 }
8604 }
8605
8606 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8607 {
8608 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8609 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8610 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8611 && size <= 16));
8612 }
8613 else
8614 {
8615 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8616 }
8617
8618 addr = create_tmp_var (pptr_type_node, NULL);
8619 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8620 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8621
8622 valist = build_simple_mem_ref (addr);
8623
8624 if (pass_as_float)
8625 {
8626 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8627 tree cmp;
8628 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8629
8630 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8631 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8632
8633 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8634 tmp = next_fp_limit;
8635 if (size > 4 && !is_double)
8636 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8637 tmp = build2 (GE_EXPR, boolean_type_node,
8638 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8639 cmp = build3 (COND_EXPR, void_type_node, tmp,
8640 build1 (GOTO_EXPR, void_type_node,
8641 unshare_expr (lab_false)), NULL_TREE);
8642 if (!is_double)
8643 gimplify_and_add (cmp, pre_p);
8644
8645 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8646 || (is_double || size == 16))
8647 {
8648 tmp = fold_convert (sizetype, next_fp_tmp);
8649 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8650 size_int (UNITS_PER_WORD));
8651 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8652 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8653 }
8654 if (is_double)
8655 gimplify_and_add (cmp, pre_p);
8656
8657 #ifdef FUNCTION_ARG_SCmode_WART
8658 if (TYPE_MODE (eff_type) == SCmode
8659 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8660 {
8661 tree subtype = TREE_TYPE (eff_type);
8662 tree real, imag;
8663
8664 imag
8665 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8666 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8667
8668 real
8669 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8670 real = get_initialized_tmp_var (real, pre_p, NULL);
8671
8672 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8673 if (type != eff_type)
8674 result = build1 (VIEW_CONVERT_EXPR, type, result);
8675 result = get_initialized_tmp_var (result, pre_p, NULL);
8676 }
8677 #endif /* FUNCTION_ARG_SCmode_WART */
8678
8679 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8680 gimplify_and_add (tmp, pre_p);
8681
8682 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8683 gimplify_and_add (tmp, pre_p);
8684
8685 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8686 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8687 gimplify_assign (unshare_expr (next_fp_tmp),
8688 unshare_expr (valist), pre_p);
8689
8690 gimplify_assign (unshare_expr (valist),
8691 unshare_expr (next_fp_tmp), post_p);
8692 valist = next_fp_tmp;
8693 }
8694 else
8695 {
8696 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8697 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8698 unshare_expr (next_o_limit));
8699 tmp = build3 (COND_EXPR, void_type_node, tmp,
8700 build1 (GOTO_EXPR, void_type_node,
8701 unshare_expr (lab_false)),
8702 NULL_TREE);
8703 gimplify_and_add (tmp, pre_p);
8704
8705 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8706 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8707
8708 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8709 gimplify_and_add (tmp, pre_p);
8710
8711 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8712 gimplify_and_add (tmp, pre_p);
8713
8714 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8715 gimplify_assign (unshare_expr (next_o),
8716 unshare_expr (next_o_limit), pre_p);
8717
8718 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8719 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8720 }
8721
8722 if (!result)
8723 {
8724 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8725 gimplify_and_add (tmp, pre_p);
8726 }
8727 }
8728
8729 /* ??? In va-sh.h, there had been code to make values larger than
8730 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8731
8732 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8733 if (result)
8734 {
8735 gimplify_assign (result, tmp, pre_p);
8736 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8737 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8738 gimplify_and_add (tmp, pre_p);
8739 }
8740 else
8741 result = tmp;
8742
8743 if (pass_by_ref)
8744 result = build_va_arg_indirect_ref (result);
8745
8746 return result;
8747 }
8748
8749 /* 64 bit floating points memory transfers are paired single precision loads
8750 or store. So DWARF information needs fixing in little endian (unless
8751 PR=SZ=1 in FPSCR). */
8752 rtx
8753 sh_dwarf_register_span (rtx reg)
8754 {
8755 unsigned regno = REGNO (reg);
8756
8757 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8758 return NULL_RTX;
8759
8760 return
8761 gen_rtx_PARALLEL (VOIDmode,
8762 gen_rtvec (2,
8763 gen_rtx_REG (SFmode, regno + 1),
8764 gen_rtx_REG (SFmode, regno)));
8765 }
8766
8767 static enum machine_mode
8768 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8769 int *punsignedp, const_tree funtype,
8770 int for_return)
8771 {
8772 if (sh_promote_prototypes (funtype))
8773 return promote_mode (type, mode, punsignedp);
8774 else
8775 return default_promote_function_mode (type, mode, punsignedp, funtype,
8776 for_return);
8777 }
8778
8779 static bool
8780 sh_promote_prototypes (const_tree type)
8781 {
8782 if (TARGET_HITACHI)
8783 return false;
8784 if (! type)
8785 return true;
8786 return ! sh_attr_renesas_p (type);
8787 }
8788
8789 /* Whether an argument must be passed by reference. On SHcompact, we
8790 pretend arguments wider than 32-bits that would have been passed in
8791 registers are passed by reference, so that an SHmedia trampoline
8792 loads them into the full 64-bits registers. */
8793 static int
8794 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8795 const_tree type, bool named)
8796 {
8797 unsigned HOST_WIDE_INT size;
8798
8799 if (type)
8800 size = int_size_in_bytes (type);
8801 else
8802 size = GET_MODE_SIZE (mode);
8803
8804 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8805 && (!named
8806 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8807 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8808 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8809 && size > 4
8810 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8811 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8812 return size;
8813 else
8814 return 0;
8815 }
8816
8817 static bool
8818 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8819 const_tree type, bool named)
8820 {
8821 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8822
8823 if (targetm.calls.must_pass_in_stack (mode, type))
8824 return true;
8825
8826 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8827 wants to know about pass-by-reference semantics for incoming
8828 arguments. */
8829 if (! cum)
8830 return false;
8831
8832 if (TARGET_SHCOMPACT)
8833 {
8834 cum->byref = shcompact_byref (cum, mode, type, named);
8835 return cum->byref != 0;
8836 }
8837
8838 return false;
8839 }
8840
8841 static bool
8842 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8843 const_tree type, bool named ATTRIBUTE_UNUSED)
8844 {
8845 /* ??? How can it possibly be correct to return true only on the
8846 caller side of the equation? Is there someplace else in the
8847 sh backend that's magically producing the copies? */
8848 return (get_cumulative_args (cum)->outgoing
8849 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8850 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8851 }
8852
8853 /* Round a register number up to a proper boundary for an arg of mode
8854 MODE.
8855 The SH doesn't care about double alignment, so we only
8856 round doubles to even regs when asked to explicitly. */
8857 static int
8858 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8859 {
8860 /* FIXME: This used to be a macro and has been copy pasted into this
8861 function as is. Make this more readable. */
8862 return
8863 (((TARGET_ALIGN_DOUBLE
8864 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8865 && (mode == DFmode || mode == DCmode)
8866 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8867 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8868 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8869 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8870 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8871 }
8872
8873 /* Return true if arg of the specified mode should be be passed in a register
8874 or false otherwise. */
8875 static bool
8876 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8877 const_tree type)
8878 {
8879 /* FIXME: This used to be a macro and has been copy pasted into this
8880 function as is. Make this more readable. */
8881 return
8882 ((type == 0
8883 || (! TREE_ADDRESSABLE (type)
8884 && (! (TARGET_HITACHI || cum.renesas_abi)
8885 || ! (AGGREGATE_TYPE_P (type)
8886 || (!TARGET_FPU_ANY
8887 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8888 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8889 && ! cum.force_mem
8890 && (TARGET_SH2E
8891 ? ((mode) == BLKmode
8892 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8893 + int_size_in_bytes (type))
8894 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8895 : ((sh_round_reg (cum, mode)
8896 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8897 <= NPARM_REGS (mode)))
8898 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8899 }
8900
8901 static int
8902 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8903 tree type, bool named ATTRIBUTE_UNUSED)
8904 {
8905 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8906 int words = 0;
8907
8908 if (!TARGET_SH5
8909 && sh_pass_in_reg_p (*cum, mode, type)
8910 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8911 && (sh_round_reg (*cum, mode)
8912 + (mode != BLKmode
8913 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8914 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8915 > NPARM_REGS (mode)))
8916 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8917
8918 else if (!TARGET_SHCOMPACT
8919 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8920 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8921
8922 return words * UNITS_PER_WORD;
8923 }
8924
8925
8926 /* Define where to put the arguments to a function.
8927 Value is zero to push the argument on the stack,
8928 or a hard register in which to store the argument.
8929
8930 MODE is the argument's machine mode.
8931 TYPE is the data type of the argument (as a tree).
8932 This is null for libcalls where that information may
8933 not be available.
8934 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8935 the preceding args and about the function being called.
8936 NAMED is nonzero if this argument is a named parameter
8937 (otherwise it is an extra parameter matching an ellipsis).
8938
8939 On SH the first args are normally in registers
8940 and the rest are pushed. Any arg that starts within the first
8941 NPARM_REGS words is at least partially passed in a register unless
8942 its data type forbids. */
8943 static rtx
8944 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8945 const_tree type, bool named)
8946 {
8947 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8948
8949 if (! TARGET_SH5 && mode == VOIDmode)
8950 return GEN_INT (ca->renesas_abi ? 1 : 0);
8951
8952 if (! TARGET_SH5
8953 && sh_pass_in_reg_p (*ca, mode, type)
8954 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8955 {
8956 int regno;
8957
8958 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8959 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8960 {
8961 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8962 gen_rtx_REG (SFmode,
8963 BASE_ARG_REG (mode)
8964 + (sh_round_reg (*ca, mode) ^ 1)),
8965 const0_rtx);
8966 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8967 gen_rtx_REG (SFmode,
8968 BASE_ARG_REG (mode)
8969 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8970 GEN_INT (4));
8971 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8972 }
8973
8974 /* If the alignment of a DF value causes an SF register to be
8975 skipped, we will use that skipped register for the next SF
8976 value. */
8977 if ((TARGET_HITACHI || ca->renesas_abi)
8978 && ca->free_single_fp_reg
8979 && mode == SFmode)
8980 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8981
8982 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8983 ^ (mode == SFmode && TARGET_SH4
8984 && TARGET_LITTLE_ENDIAN
8985 && ! TARGET_HITACHI && ! ca->renesas_abi);
8986 return gen_rtx_REG (mode, regno);
8987
8988 }
8989
8990 if (TARGET_SH5)
8991 {
8992 if (mode == VOIDmode && TARGET_SHCOMPACT)
8993 return GEN_INT (ca->call_cookie);
8994
8995 /* The following test assumes unnamed arguments are promoted to
8996 DFmode. */
8997 if (mode == SFmode && ca->free_single_fp_reg)
8998 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8999
9000 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9001 && (named || ! ca->prototype_p)
9002 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9003 {
9004 if (! ca->prototype_p && TARGET_SHMEDIA)
9005 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9006
9007 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9008 FIRST_FP_PARM_REG
9009 + ca->arg_count[(int) SH_ARG_FLOAT]);
9010 }
9011
9012 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9013 && (! TARGET_SHCOMPACT
9014 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9015 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9016 type, named))))
9017 {
9018 return gen_rtx_REG (mode, (FIRST_PARM_REG
9019 + ca->arg_count[(int) SH_ARG_INT]));
9020 }
9021
9022 return NULL_RTX;
9023 }
9024
9025 return NULL_RTX;
9026 }
9027
9028 /* Update the data in CUM to advance over an argument
9029 of mode MODE and data type TYPE.
9030 (TYPE is null for libcalls where that information may not be
9031 available.) */
9032 static void
9033 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
9034 const_tree type, bool named)
9035 {
9036 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9037
9038 if (ca->force_mem)
9039 ca->force_mem = 0;
9040 else if (TARGET_SH5)
9041 {
9042 const_tree type2 = (ca->byref && type
9043 ? TREE_TYPE (type)
9044 : type);
9045 enum machine_mode mode2 = (ca->byref && type
9046 ? TYPE_MODE (type2)
9047 : mode);
9048 int dwords = ((ca->byref
9049 ? ca->byref
9050 : mode2 == BLKmode
9051 ? int_size_in_bytes (type2)
9052 : GET_MODE_SIZE (mode2)) + 7) / 8;
9053 int numregs = MIN (dwords, NPARM_REGS (SImode)
9054 - ca->arg_count[(int) SH_ARG_INT]);
9055
9056 if (numregs)
9057 {
9058 ca->arg_count[(int) SH_ARG_INT] += numregs;
9059 if (TARGET_SHCOMPACT
9060 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9061 {
9062 ca->call_cookie
9063 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9064 - numregs, 1);
9065 /* N.B. We want this also for outgoing. */
9066 ca->stack_regs += numregs;
9067 }
9068 else if (ca->byref)
9069 {
9070 if (! ca->outgoing)
9071 ca->stack_regs += numregs;
9072 ca->byref_regs += numregs;
9073 ca->byref = 0;
9074 do
9075 ca->call_cookie
9076 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9077 - numregs, 2);
9078 while (--numregs);
9079 ca->call_cookie
9080 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9081 - 1, 1);
9082 }
9083 else if (dwords > numregs)
9084 {
9085 int pushregs = numregs;
9086
9087 if (TARGET_SHCOMPACT)
9088 ca->stack_regs += numregs;
9089 while (pushregs < NPARM_REGS (SImode) - 1
9090 && (CALL_COOKIE_INT_REG_GET
9091 (ca->call_cookie,
9092 NPARM_REGS (SImode) - pushregs)
9093 == 1))
9094 {
9095 ca->call_cookie
9096 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9097 - pushregs, 1);
9098 pushregs++;
9099 }
9100 if (numregs == NPARM_REGS (SImode))
9101 ca->call_cookie
9102 |= CALL_COOKIE_INT_REG (0, 1)
9103 | CALL_COOKIE_STACKSEQ (numregs - 1);
9104 else
9105 ca->call_cookie
9106 |= CALL_COOKIE_STACKSEQ (numregs);
9107 }
9108 }
9109 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9110 && (named || ! ca->prototype_p))
9111 {
9112 if (mode2 == SFmode && ca->free_single_fp_reg)
9113 ca->free_single_fp_reg = 0;
9114 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9115 < NPARM_REGS (SFmode))
9116 {
9117 int numfpregs
9118 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9119 NPARM_REGS (SFmode)
9120 - ca->arg_count[(int) SH_ARG_FLOAT]);
9121
9122 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9123
9124 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9125 {
9126 if (ca->outgoing && numregs > 0)
9127 do
9128 {
9129 ca->call_cookie
9130 |= (CALL_COOKIE_INT_REG
9131 (ca->arg_count[(int) SH_ARG_INT]
9132 - numregs + ((numfpregs - 2) / 2),
9133 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9134 - numfpregs) / 2));
9135 }
9136 while (numfpregs -= 2);
9137 }
9138 else if (mode2 == SFmode && (named)
9139 && (ca->arg_count[(int) SH_ARG_FLOAT]
9140 < NPARM_REGS (SFmode)))
9141 ca->free_single_fp_reg
9142 = FIRST_FP_PARM_REG - numfpregs
9143 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9144 }
9145 }
9146 return;
9147 }
9148
9149 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9150 {
9151 /* Note that we've used the skipped register. */
9152 if (mode == SFmode && ca->free_single_fp_reg)
9153 {
9154 ca->free_single_fp_reg = 0;
9155 return;
9156 }
9157 /* When we have a DF after an SF, there's an SF register that get
9158 skipped in order to align the DF value. We note this skipped
9159 register, because the next SF value will use it, and not the
9160 SF that follows the DF. */
9161 if (mode == DFmode
9162 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9163 {
9164 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9165 + BASE_ARG_REG (mode));
9166 }
9167 }
9168
9169 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9170 || sh_pass_in_reg_p (*ca, mode, type))
9171 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9172 = (sh_round_reg (*ca, mode)
9173 + (mode == BLKmode
9174 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9175 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9176 }
9177
9178 /* The Renesas calling convention doesn't quite fit into this scheme since
9179 the address is passed like an invisible argument, but one that is always
9180 passed in memory. */
9181 static rtx
9182 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9183 {
9184 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9185 return NULL_RTX;
9186 return gen_rtx_REG (Pmode, 2);
9187 }
9188
9189 /* Worker function for TARGET_FUNCTION_VALUE.
9190
9191 For the SH, this is like LIBCALL_VALUE, except that we must change the
9192 mode like PROMOTE_MODE does.
9193 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9194 tested here has to be kept in sync with the one in
9195 explow.c:promote_mode. */
9196 static rtx
9197 sh_function_value (const_tree valtype,
9198 const_tree fn_decl_or_type,
9199 bool outgoing ATTRIBUTE_UNUSED)
9200 {
9201 if (fn_decl_or_type
9202 && !DECL_P (fn_decl_or_type))
9203 fn_decl_or_type = NULL;
9204
9205 return gen_rtx_REG (
9206 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9207 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9208 && (TREE_CODE (valtype) == INTEGER_TYPE
9209 || TREE_CODE (valtype) == ENUMERAL_TYPE
9210 || TREE_CODE (valtype) == BOOLEAN_TYPE
9211 || TREE_CODE (valtype) == REAL_TYPE
9212 || TREE_CODE (valtype) == OFFSET_TYPE))
9213 && sh_promote_prototypes (fn_decl_or_type)
9214 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9215 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9216 }
9217
9218 /* Worker function for TARGET_LIBCALL_VALUE. */
9219 static rtx
9220 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9221 {
9222 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9223 }
9224
9225 /* Return true if N is a possible register number of function value. */
9226 static bool
9227 sh_function_value_regno_p (const unsigned int regno)
9228 {
9229 return ((regno) == FIRST_RET_REG
9230 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9231 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9232 }
9233
9234 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9235 static bool
9236 sh_return_in_memory (const_tree type, const_tree fndecl)
9237 {
9238 if (TARGET_SH5)
9239 {
9240 if (TYPE_MODE (type) == BLKmode)
9241 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9242 else
9243 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9244 }
9245 else
9246 {
9247 return (TYPE_MODE (type) == BLKmode
9248 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9249 && TREE_CODE (type) == RECORD_TYPE));
9250 }
9251 }
9252
9253 /* We actually emit the code in sh_expand_prologue. We used to use
9254 a static variable to flag that we need to emit this code, but that
9255 doesn't when inlining, when functions are deferred and then emitted
9256 later. Fortunately, we already have two flags that are part of struct
9257 function that tell if a function uses varargs or stdarg. */
9258 static void
9259 sh_setup_incoming_varargs (cumulative_args_t ca,
9260 enum machine_mode mode,
9261 tree type,
9262 int *pretend_arg_size,
9263 int second_time ATTRIBUTE_UNUSED)
9264 {
9265 gcc_assert (cfun->stdarg);
9266 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9267 {
9268 int named_parm_regs, anon_parm_regs;
9269
9270 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9271 + (mode == BLKmode
9272 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9273 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9274 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9275 if (anon_parm_regs > 0)
9276 *pretend_arg_size = anon_parm_regs * 4;
9277 }
9278 }
9279
9280 static bool
9281 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9282 {
9283 return TARGET_SH5;
9284 }
9285
9286 static bool
9287 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9288 {
9289 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9290
9291 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9292 }
9293
9294
9295 /* Define the offset between two registers, one to be eliminated, and
9296 the other its replacement, at the start of a routine. */
9297 int
9298 initial_elimination_offset (int from, int to)
9299 {
9300 int regs_saved;
9301 int regs_saved_rounding = 0;
9302 int total_saved_regs_space;
9303 int total_auto_space;
9304 int save_flags = target_flags;
9305 int copy_flags;
9306 HARD_REG_SET live_regs_mask;
9307
9308 shmedia_space_reserved_for_target_registers = false;
9309 regs_saved = calc_live_regs (&live_regs_mask);
9310 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9311
9312 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9313 {
9314 shmedia_space_reserved_for_target_registers = true;
9315 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9316 }
9317
9318 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9319 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9320 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9321
9322 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9323 copy_flags = target_flags;
9324 target_flags = save_flags;
9325
9326 total_saved_regs_space = regs_saved + regs_saved_rounding;
9327
9328 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9329 return total_saved_regs_space + total_auto_space
9330 + crtl->args.info.byref_regs * 8;
9331
9332 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9333 return total_saved_regs_space + total_auto_space
9334 + crtl->args.info.byref_regs * 8;
9335
9336 /* Initial gap between fp and sp is 0. */
9337 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9338 return 0;
9339
9340 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9341 return rounded_frame_size (0);
9342
9343 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9344 return rounded_frame_size (0);
9345
9346 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9347 && (to == HARD_FRAME_POINTER_REGNUM
9348 || to == STACK_POINTER_REGNUM));
9349 if (TARGET_SH5)
9350 {
9351 int n = total_saved_regs_space;
9352 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9353 save_schedule schedule;
9354 save_entry *entry;
9355
9356 n += total_auto_space;
9357
9358 /* If it wasn't saved, there's not much we can do. */
9359 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9360 return n;
9361
9362 target_flags = copy_flags;
9363
9364 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9365 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9366 if (entry->reg == pr_reg)
9367 {
9368 target_flags = save_flags;
9369 return entry->offset;
9370 }
9371 gcc_unreachable ();
9372 }
9373 else
9374 return total_auto_space;
9375 }
9376
9377 /* Parse the -mfixed-range= option string. */
9378 void
9379 sh_fix_range (const char *const_str)
9380 {
9381 int i, first, last;
9382 char *str, *dash, *comma;
9383
9384 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9385 REG2 are either register names or register numbers. The effect
9386 of this option is to mark the registers in the range from REG1 to
9387 REG2 as ``fixed'' so they won't be used by the compiler. */
9388
9389 i = strlen (const_str);
9390 str = (char *) alloca (i + 1);
9391 memcpy (str, const_str, i + 1);
9392
9393 while (1)
9394 {
9395 dash = strchr (str, '-');
9396 if (!dash)
9397 {
9398 warning (0, "value of -mfixed-range must have form REG1-REG2");
9399 return;
9400 }
9401 *dash = '\0';
9402 comma = strchr (dash + 1, ',');
9403 if (comma)
9404 *comma = '\0';
9405
9406 first = decode_reg_name (str);
9407 if (first < 0)
9408 {
9409 warning (0, "unknown register name: %s", str);
9410 return;
9411 }
9412
9413 last = decode_reg_name (dash + 1);
9414 if (last < 0)
9415 {
9416 warning (0, "unknown register name: %s", dash + 1);
9417 return;
9418 }
9419
9420 *dash = '-';
9421
9422 if (first > last)
9423 {
9424 warning (0, "%s-%s is an empty range", str, dash + 1);
9425 return;
9426 }
9427
9428 for (i = first; i <= last; ++i)
9429 fixed_regs[i] = call_used_regs[i] = 1;
9430
9431 if (!comma)
9432 break;
9433
9434 *comma = ',';
9435 str = comma + 1;
9436 }
9437 }
9438 \f
9439 /* Insert any deferred function attributes from earlier pragmas. */
9440 static void
9441 sh_insert_attributes (tree node, tree *attributes)
9442 {
9443 tree attrs;
9444
9445 if (TREE_CODE (node) != FUNCTION_DECL)
9446 return;
9447
9448 /* We are only interested in fields. */
9449 if (!DECL_P (node))
9450 return;
9451
9452 /* Append the attributes to the deferred attributes. */
9453 *sh_deferred_function_attributes_tail = *attributes;
9454 attrs = sh_deferred_function_attributes;
9455 if (!attrs)
9456 return;
9457
9458 /* Some attributes imply or require the interrupt attribute. */
9459 if (!lookup_attribute ("interrupt_handler", attrs)
9460 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9461 {
9462 /* If we have a trapa_handler, but no interrupt_handler attribute,
9463 insert an interrupt_handler attribute. */
9464 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9465 /* We can't use sh_pr_interrupt here because that's not in the
9466 java frontend. */
9467 attrs
9468 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9469 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9470 if the interrupt attribute is missing, we ignore the attribute
9471 and warn. */
9472 else if (lookup_attribute ("sp_switch", attrs)
9473 || lookup_attribute ("trap_exit", attrs)
9474 || lookup_attribute ("nosave_low_regs", attrs)
9475 || lookup_attribute ("resbank", attrs))
9476 {
9477 tree *tail;
9478
9479 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9480 {
9481 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9482 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9483 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9484 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9485 warning (OPT_Wattributes,
9486 "%qE attribute only applies to interrupt functions",
9487 TREE_PURPOSE (attrs));
9488 else
9489 {
9490 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9491 NULL_TREE);
9492 tail = &TREE_CHAIN (*tail);
9493 }
9494 }
9495 attrs = *attributes;
9496 }
9497 }
9498
9499 /* Install the processed list. */
9500 *attributes = attrs;
9501
9502 /* Clear deferred attributes. */
9503 sh_deferred_function_attributes = NULL_TREE;
9504 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9505
9506 return;
9507 }
9508
9509 /*------------------------------------------------------------------------------
9510 Target specific attributes
9511 Supported attributes are:
9512
9513 * interrupt_handler
9514 Specifies this function is an interrupt handler.
9515
9516 * trapa_handler
9517 Like interrupt_handler, but don't save all registers.
9518
9519 * sp_switch
9520 Specifies an alternate stack for an interrupt handler to run on.
9521
9522 * trap_exit
9523 Use a trapa to exit an interrupt function instead of rte.
9524
9525 * nosave_low_regs
9526 Don't save r0..r7 in an interrupt handler function.
9527 This is useful on SH3* and SH4*, which have a separate set of low
9528 regs for user and privileged modes.
9529 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9530 those that run with interrupts disabled and thus can't be
9531 interrupted thenselves).
9532
9533 * renesas
9534 Use Renesas calling/layout conventions (functions and structures).
9535
9536 * resbank
9537 In case of an interrupt handler function, use a register bank to
9538 save registers R0-R14, MACH, MACL, GBR and PR.
9539 This is available only on SH2A targets.
9540
9541 * function_vector
9542 Declares a function to be called using the TBR relative addressing
9543 mode. Takes an argument that specifies the slot number in the table
9544 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9545 */
9546
9547 /* Handle a 'resbank' attribute. */
9548 static tree
9549 sh_handle_resbank_handler_attribute (tree * node, tree name,
9550 tree args ATTRIBUTE_UNUSED,
9551 int flags ATTRIBUTE_UNUSED,
9552 bool * no_add_attrs)
9553 {
9554 if (!TARGET_SH2A)
9555 {
9556 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9557 name);
9558 *no_add_attrs = true;
9559 }
9560 if (TREE_CODE (*node) != FUNCTION_DECL)
9561 {
9562 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9563 name);
9564 *no_add_attrs = true;
9565 }
9566
9567 return NULL_TREE;
9568 }
9569
9570 /* Handle an "interrupt_handler" attribute; arguments as in
9571 struct attribute_spec.handler. */
9572 static tree
9573 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9574 tree args ATTRIBUTE_UNUSED,
9575 int flags ATTRIBUTE_UNUSED,
9576 bool *no_add_attrs)
9577 {
9578 if (TREE_CODE (*node) != FUNCTION_DECL)
9579 {
9580 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9581 name);
9582 *no_add_attrs = true;
9583 }
9584 else if (TARGET_SHCOMPACT)
9585 {
9586 error ("attribute interrupt_handler is not compatible with -m5-compact");
9587 *no_add_attrs = true;
9588 }
9589
9590 return NULL_TREE;
9591 }
9592
9593 /* Handle an 'function_vector' attribute; arguments as in
9594 struct attribute_spec.handler. */
9595 static tree
9596 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9597 tree args ATTRIBUTE_UNUSED,
9598 int flags ATTRIBUTE_UNUSED,
9599 bool * no_add_attrs)
9600 {
9601 if (!TARGET_SH2A)
9602 {
9603 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9604 name);
9605 *no_add_attrs = true;
9606 }
9607 else if (TREE_CODE (*node) != FUNCTION_DECL)
9608 {
9609 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9610 name);
9611 *no_add_attrs = true;
9612 }
9613 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9614 {
9615 /* The argument must be a constant integer. */
9616 warning (OPT_Wattributes,
9617 "%qE attribute argument not an integer constant",
9618 name);
9619 *no_add_attrs = true;
9620 }
9621 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9622 {
9623 /* The argument value must be between 0 to 255. */
9624 warning (OPT_Wattributes,
9625 "%qE attribute argument should be between 0 to 255",
9626 name);
9627 *no_add_attrs = true;
9628 }
9629 return NULL_TREE;
9630 }
9631
9632 /* Returns true if current function has been assigned the attribute
9633 'function_vector'. */
9634 bool
9635 sh2a_is_function_vector_call (rtx x)
9636 {
9637 if (GET_CODE (x) == SYMBOL_REF
9638 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9639 {
9640 tree tr = SYMBOL_REF_DECL (x);
9641
9642 if (sh2a_function_vector_p (tr))
9643 return true;
9644 }
9645
9646 return false;
9647 }
9648
9649 /* Returns the function vector number, if the attribute
9650 'function_vector' is assigned, otherwise returns zero. */
9651 int
9652 sh2a_get_function_vector_number (rtx x)
9653 {
9654 int num;
9655 tree list, t;
9656
9657 if ((GET_CODE (x) == SYMBOL_REF)
9658 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9659 {
9660 t = SYMBOL_REF_DECL (x);
9661
9662 if (TREE_CODE (t) != FUNCTION_DECL)
9663 return 0;
9664
9665 list = SH_ATTRIBUTES (t);
9666 while (list)
9667 {
9668 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9669 {
9670 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9671 return num;
9672 }
9673
9674 list = TREE_CHAIN (list);
9675 }
9676
9677 return 0;
9678 }
9679 else
9680 return 0;
9681 }
9682
9683 /* Handle an "sp_switch" attribute; arguments as in
9684 struct attribute_spec.handler. */
9685 static tree
9686 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9687 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9688 {
9689 if (TREE_CODE (*node) != FUNCTION_DECL)
9690 {
9691 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9692 name);
9693 *no_add_attrs = true;
9694 }
9695 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9696 {
9697 /* The argument must be a constant string. */
9698 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9699 name);
9700 *no_add_attrs = true;
9701 }
9702
9703 return NULL_TREE;
9704 }
9705
9706 /* Handle an "trap_exit" attribute; arguments as in
9707 struct attribute_spec.handler. */
9708 static tree
9709 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9710 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9711 {
9712 if (TREE_CODE (*node) != FUNCTION_DECL)
9713 {
9714 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9715 name);
9716 *no_add_attrs = true;
9717 }
9718 /* The argument specifies a trap number to be used in a trapa instruction
9719 at function exit (instead of an rte instruction). */
9720 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9721 {
9722 /* The argument must be a constant integer. */
9723 warning (OPT_Wattributes, "%qE attribute argument not an "
9724 "integer constant", name);
9725 *no_add_attrs = true;
9726 }
9727
9728 return NULL_TREE;
9729 }
9730
9731 static tree
9732 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9733 tree name ATTRIBUTE_UNUSED,
9734 tree args ATTRIBUTE_UNUSED,
9735 int flags ATTRIBUTE_UNUSED,
9736 bool *no_add_attrs ATTRIBUTE_UNUSED)
9737 {
9738 return NULL_TREE;
9739 }
9740
9741 /* True if __attribute__((renesas)) or -mrenesas. */
9742 bool
9743 sh_attr_renesas_p (const_tree td)
9744 {
9745 if (TARGET_HITACHI)
9746 return true;
9747 if (td == NULL_TREE)
9748 return false;
9749 if (DECL_P (td))
9750 td = TREE_TYPE (td);
9751 if (td == error_mark_node)
9752 return false;
9753 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9754 != NULL_TREE);
9755 }
9756
9757 /* True if __attribute__((renesas)) or -mrenesas, for the current
9758 function. */
9759 bool
9760 sh_cfun_attr_renesas_p (void)
9761 {
9762 return sh_attr_renesas_p (current_function_decl);
9763 }
9764
9765 /* Returns true if the current function has the "interrupt_handler"
9766 attribute set. */
9767 bool
9768 sh_cfun_interrupt_handler_p (void)
9769 {
9770 return (lookup_attribute ("interrupt_handler",
9771 DECL_ATTRIBUTES (current_function_decl))
9772 != NULL_TREE);
9773 }
9774
9775 /* Returns true if FUNC has been assigned the attribute
9776 "function_vector". */
9777 bool
9778 sh2a_function_vector_p (tree func)
9779 {
9780 tree list;
9781 if (TREE_CODE (func) != FUNCTION_DECL)
9782 return false;
9783
9784 list = SH_ATTRIBUTES (func);
9785 while (list)
9786 {
9787 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9788 return true;
9789
9790 list = TREE_CHAIN (list);
9791 }
9792 return false;
9793 }
9794
9795 /* Returns true if given tree has the "resbank" attribute set. */
9796 bool
9797 sh_cfun_resbank_handler_p (void)
9798 {
9799 return ((lookup_attribute ("resbank",
9800 DECL_ATTRIBUTES (current_function_decl))
9801 != NULL_TREE)
9802 && (lookup_attribute ("interrupt_handler",
9803 DECL_ATTRIBUTES (current_function_decl))
9804 != NULL_TREE) && TARGET_SH2A);
9805 }
9806
9807 /* Returns true if the current function has a "trap_exit" attribute set. */
9808 bool
9809 sh_cfun_trap_exit_p (void)
9810 {
9811 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9812 != NULL_TREE;
9813 }
9814
9815 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9816 static const char *
9817 sh_check_pch_target_flags (int old_flags)
9818 {
9819 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9820 | MASK_SH_E | MASK_HARD_SH4
9821 | MASK_FPU_SINGLE | MASK_SH4))
9822 return _("created and used with different architectures / ABIs");
9823 if ((old_flags ^ target_flags) & MASK_HITACHI)
9824 return _("created and used with different ABIs");
9825 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9826 return _("created and used with different endianness");
9827 return NULL;
9828 }
9829 \f
9830 /* Predicates used by the templates. */
9831
9832 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9833 Used only in general_movsrc_operand. */
9834 bool
9835 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9836 {
9837 switch (REGNO (op))
9838 {
9839 case PR_REG:
9840 case MACL_REG:
9841 case MACH_REG:
9842 return true;
9843 }
9844 return false;
9845 }
9846
9847 /* Returns true if OP is a floating point value with value 0.0. */
9848 bool
9849 fp_zero_operand (rtx op)
9850 {
9851 REAL_VALUE_TYPE r;
9852
9853 if (GET_MODE (op) != SFmode)
9854 return false;
9855
9856 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9857 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9858 }
9859
9860 /* Returns true if OP is a floating point value with value 1.0. */
9861 bool
9862 fp_one_operand (rtx op)
9863 {
9864 REAL_VALUE_TYPE r;
9865
9866 if (GET_MODE (op) != SFmode)
9867 return false;
9868
9869 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9870 return REAL_VALUES_EQUAL (r, dconst1);
9871 }
9872
9873 /* In general mode switching is used. If we are
9874 compiling without -mfmovd, movsf_ie isn't taken into account for
9875 mode switching. We could check in machine_dependent_reorg for
9876 cases where we know we are in single precision mode, but there is
9877 interface to find that out during reload, so we must avoid
9878 choosing an fldi alternative during reload and thus failing to
9879 allocate a scratch register for the constant loading. */
9880 bool
9881 fldi_ok (void)
9882 {
9883 return true;
9884 }
9885
9886 /* Return the TLS type for TLS symbols. */
9887 enum tls_model
9888 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9889 {
9890 if (GET_CODE (op) != SYMBOL_REF)
9891 return TLS_MODEL_NONE;
9892 return SYMBOL_REF_TLS_MODEL (op);
9893 }
9894 \f
9895 /* Return the destination address of a branch. */
9896 static int
9897 branch_dest (rtx branch)
9898 {
9899 rtx dest = SET_SRC (PATTERN (branch));
9900 int dest_uid;
9901
9902 if (GET_CODE (dest) == IF_THEN_ELSE)
9903 dest = XEXP (dest, 1);
9904 dest = XEXP (dest, 0);
9905 dest_uid = INSN_UID (dest);
9906 return INSN_ADDRESSES (dest_uid);
9907 }
9908 \f
9909 /* Return nonzero if REG is not used after INSN.
9910 We assume REG is a reload reg, and therefore does
9911 not live past labels. It may live past calls or jumps though. */
9912 bool
9913 reg_unused_after (rtx reg, rtx_insn *insn)
9914 {
9915 enum rtx_code code;
9916 rtx set;
9917
9918 /* If the reg is set by this instruction, then it is safe for our
9919 case. Disregard the case where this is a store to memory, since
9920 we are checking a register used in the store address. */
9921 set = single_set (insn);
9922 if (set && !MEM_P (SET_DEST (set))
9923 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9924 return true;
9925
9926 while ((insn = NEXT_INSN (insn)))
9927 {
9928 rtx set;
9929 if (!INSN_P (insn))
9930 continue;
9931
9932 code = GET_CODE (insn);
9933
9934 #if 0
9935 /* If this is a label that existed before reload, then the register
9936 is dead here. However, if this is a label added by reorg, then
9937 the register may still be live here. We can't tell the difference,
9938 so we just ignore labels completely. */
9939 if (code == CODE_LABEL)
9940 return 1;
9941 /* else */
9942 #endif
9943
9944 if (code == JUMP_INSN)
9945 return false;
9946
9947 /* If this is a sequence, we must handle them all at once.
9948 We could have for instance a call that sets the target register,
9949 and an insn in a delay slot that uses the register. In this case,
9950 we must return 0. */
9951 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9952 {
9953 int i;
9954 int retval = 0;
9955
9956 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9957 {
9958 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9959 rtx set = single_set (this_insn);
9960
9961 if (CALL_P (this_insn))
9962 code = CALL_INSN;
9963 else if (JUMP_P (this_insn))
9964 {
9965 if (INSN_ANNULLED_BRANCH_P (this_insn))
9966 return false;
9967 code = JUMP_INSN;
9968 }
9969
9970 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9971 return false;
9972 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9973 {
9974 if (!MEM_P (SET_DEST (set)))
9975 retval = true;
9976 else
9977 return false;
9978 }
9979 if (set == NULL_RTX
9980 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9981 return false;
9982 }
9983 if (retval == 1)
9984 return true;
9985 else if (code == JUMP_INSN)
9986 return false;
9987 }
9988
9989 set = single_set (insn);
9990 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9991 return false;
9992 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9993 return !MEM_P (SET_DEST (set));
9994 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9995 return false;
9996
9997 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9998 return true;
9999 }
10000 return true;
10001 }
10002 \f
10003 #include "ggc.h"
10004
10005 static GTY(()) rtx t_reg_rtx;
10006 rtx
10007 get_t_reg_rtx (void)
10008 {
10009 if (! t_reg_rtx)
10010 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10011 return t_reg_rtx;
10012 }
10013
10014 static GTY(()) rtx fpscr_rtx;
10015 rtx
10016 get_fpscr_rtx (void)
10017 {
10018 if (! fpscr_rtx)
10019 {
10020 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
10021 REG_USERVAR_P (fpscr_rtx) = 1;
10022 mark_user_reg (fpscr_rtx);
10023 }
10024 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
10025 mark_user_reg (fpscr_rtx);
10026 return fpscr_rtx;
10027 }
10028
10029 static GTY(()) tree fpscr_values;
10030
10031 static void
10032 emit_fpu_switch (rtx scratch, int index)
10033 {
10034 rtx dst, src;
10035
10036 if (fpscr_values == NULL)
10037 {
10038 tree t;
10039
10040 t = build_index_type (integer_one_node);
10041 t = build_array_type (integer_type_node, t);
10042 t = build_decl (BUILTINS_LOCATION,
10043 VAR_DECL, get_identifier ("__fpscr_values"), t);
10044 DECL_ARTIFICIAL (t) = 1;
10045 DECL_IGNORED_P (t) = 1;
10046 DECL_EXTERNAL (t) = 1;
10047 TREE_STATIC (t) = 1;
10048 TREE_PUBLIC (t) = 1;
10049 TREE_USED (t) = 1;
10050
10051 fpscr_values = t;
10052 }
10053
10054 src = DECL_RTL (fpscr_values);
10055 if (!can_create_pseudo_p ())
10056 {
10057 emit_move_insn (scratch, XEXP (src, 0));
10058 if (index != 0)
10059 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10060 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10061 }
10062 else
10063 src = adjust_address (src, PSImode, index * 4);
10064
10065 dst = get_fpscr_rtx ();
10066 emit_move_insn (dst, src);
10067 }
10068
10069 void
10070 emit_sf_insn (rtx pat)
10071 {
10072 emit_insn (pat);
10073 }
10074
10075 void
10076 emit_df_insn (rtx pat)
10077 {
10078 emit_insn (pat);
10079 }
10080
10081 void
10082 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10083 {
10084 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10085 }
10086
10087 void
10088 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10089 {
10090 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10091 get_fpscr_rtx ()));
10092 }
10093
10094 void
10095 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10096 {
10097 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10098 }
10099
10100 void
10101 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10102 {
10103 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10104 get_fpscr_rtx ()));
10105 }
10106 \f
10107 static rtx get_free_reg (HARD_REG_SET);
10108
10109 /* This function returns a register to use to load the address to load
10110 the fpscr from. Currently it always returns r1 or r7, but when we are
10111 able to use pseudo registers after combine, or have a better mechanism
10112 for choosing a register, it should be done here. */
10113 /* REGS_LIVE is the liveness information for the point for which we
10114 need this allocation. In some bare-bones exit blocks, r1 is live at the
10115 start. We can even have all of r0..r3 being live:
10116 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10117 INSN before which new insns are placed with will clobber the register
10118 we return. If a basic block consists only of setting the return value
10119 register to a pseudo and using that register, the return value is not
10120 live before or after this block, yet we we'll insert our insns right in
10121 the middle. */
10122 static rtx
10123 get_free_reg (HARD_REG_SET regs_live)
10124 {
10125 if (! TEST_HARD_REG_BIT (regs_live, 1))
10126 return gen_rtx_REG (Pmode, 1);
10127
10128 /* Hard reg 1 is live; since this is a small register classes target,
10129 there shouldn't be anything but a jump before the function end. */
10130 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10131 return gen_rtx_REG (Pmode, 7);
10132 }
10133
10134 /* This function will set the fpscr from memory.
10135 MODE is the mode we are setting it to. */
10136 void
10137 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10138 {
10139 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10140 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10141 rtx addr_reg;
10142
10143 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10144 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10145 }
10146
10147 /* Is the given character a logical line separator for the assembler? */
10148 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10149 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10150 #endif
10151
10152 static bool
10153 sequence_insn_p (rtx_insn *insn)
10154 {
10155 rtx_insn *prev, *next;
10156
10157 prev = PREV_INSN (insn);
10158 if (prev == NULL)
10159 return false;
10160
10161 next = NEXT_INSN (prev);
10162 if (next == NULL)
10163 return false;
10164
10165 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10166 }
10167
10168 int
10169 sh_insn_length_adjustment (rtx_insn *insn)
10170 {
10171 /* Instructions with unfilled delay slots take up an extra two bytes for
10172 the nop in the delay slot. */
10173 if (((NONJUMP_INSN_P (insn)
10174 && GET_CODE (PATTERN (insn)) != USE
10175 && GET_CODE (PATTERN (insn)) != CLOBBER)
10176 || CALL_P (insn) || JUMP_P (insn))
10177 && ! sequence_insn_p (insn)
10178 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10179 return 2;
10180
10181 /* SH2e has a bug that prevents the use of annulled branches, so if
10182 the delay slot is not filled, we'll have to put a NOP in it. */
10183 if (sh_cpu_attr == CPU_SH2E
10184 && JUMP_P (insn)
10185 && get_attr_type (insn) == TYPE_CBRANCH
10186 && ! sequence_insn_p (insn))
10187 return 2;
10188
10189 /* sh-dsp parallel processing insn take four bytes instead of two. */
10190
10191 if (NONJUMP_INSN_P (insn))
10192 {
10193 int sum = 0;
10194 rtx body = PATTERN (insn);
10195 const char *templ;
10196 char c;
10197 bool maybe_label = true;
10198
10199 if (GET_CODE (body) == ASM_INPUT)
10200 templ = XSTR (body, 0);
10201 else if (asm_noperands (body) >= 0)
10202 templ
10203 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10204 else
10205 return 0;
10206 do
10207 {
10208 int ppi_adjust = 0;
10209
10210 do
10211 c = *templ++;
10212 while (c == ' ' || c == '\t');
10213 /* all sh-dsp parallel-processing insns start with p.
10214 The only non-ppi sh insn starting with p is pref.
10215 The only ppi starting with pr is prnd. */
10216 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10217 ppi_adjust = 2;
10218 /* The repeat pseudo-insn expands two three insns, a total of
10219 six bytes in size. */
10220 else if ((c == 'r' || c == 'R')
10221 && ! strncasecmp ("epeat", templ, 5))
10222 ppi_adjust = 4;
10223 while (c && c != '\n'
10224 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10225 {
10226 /* If this is a label, it is obviously not a ppi insn. */
10227 if (c == ':' && maybe_label)
10228 {
10229 ppi_adjust = 0;
10230 break;
10231 }
10232 else if (c == '\'' || c == '"')
10233 maybe_label = false;
10234 c = *templ++;
10235 }
10236 sum += ppi_adjust;
10237 maybe_label = c != ':';
10238 }
10239 while (c);
10240 return sum;
10241 }
10242 return 0;
10243 }
10244 \f
10245 /* Return TRUE for a valid displacement for the REG+disp addressing
10246 with MODE. */
10247 bool
10248 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10249 bool allow_zero)
10250 {
10251 if (! CONST_INT_P (op))
10252 return false;
10253
10254 if (TARGET_SHMEDIA)
10255 {
10256 int size;
10257
10258 /* Check if this is the address of an unaligned load / store. */
10259 if (mode == VOIDmode)
10260 return satisfies_constraint_I06 (op);
10261
10262 size = GET_MODE_SIZE (mode);
10263 return (!(INTVAL (op) & (size - 1))
10264 && INTVAL (op) >= -512 * size
10265 && INTVAL (op) < 512 * size);
10266 }
10267 else
10268 {
10269 const HOST_WIDE_INT offset = INTVAL (op);
10270 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10271 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10272
10273 /* If the mode does not support any displacement always return false.
10274 Even though an index of '0' is actually always valid, it will cause
10275 troubles when e.g. a DFmode move is split into two SFmode moves,
10276 where one SFmode move will have index '0' and the other move will
10277 have index '4'. */
10278 if (!allow_zero && max_disp < 1)
10279 return false;
10280
10281 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10282 }
10283 }
10284
10285 /* Recognize an RTL expression that is a valid memory address for
10286 an instruction.
10287 The MODE argument is the machine mode for the MEM expression
10288 that wants to use this address.
10289 Allow REG
10290 REG+disp
10291 REG+r0
10292 REG++
10293 --REG
10294 GBR
10295 GBR+disp */
10296 static bool
10297 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10298 {
10299 if (! ALLOW_INDEXED_ADDRESS
10300 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10301 return false;
10302
10303 if (REG_P (x) && REGNO (x) == GBR_REG)
10304 return true;
10305
10306 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10307 return true;
10308 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10309 && ! TARGET_SHMEDIA
10310 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10311 return true;
10312 else if (GET_CODE (x) == PLUS
10313 && (mode != PSImode || reload_completed))
10314 {
10315 rtx xop0 = XEXP (x, 0);
10316 rtx xop1 = XEXP (x, 1);
10317
10318 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10319 return gbr_displacement (xop1, mode);
10320
10321 if (GET_MODE_SIZE (mode) <= 8
10322 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10323 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10324 return true;
10325
10326 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10327 || ((xop0 == stack_pointer_rtx
10328 || xop0 == hard_frame_pointer_rtx)
10329 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10330 || ((xop1 == stack_pointer_rtx
10331 || xop1 == hard_frame_pointer_rtx)
10332 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10333 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10334 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10335 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10336 && TARGET_FMOVD && mode == DFmode)))
10337 {
10338 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10339 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10340 return true;
10341 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10342 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10343 return true;
10344 }
10345 }
10346
10347 return false;
10348 }
10349 \f
10350 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10351 isn't protected by a PIC unspec. */
10352 bool
10353 nonpic_symbol_mentioned_p (rtx x)
10354 {
10355 const char *fmt;
10356 int i;
10357
10358 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10359 || GET_CODE (x) == PC)
10360 return true;
10361
10362 /* We don't want to look into the possible MEM location of a
10363 CONST_DOUBLE, since we're not going to use it, in general. */
10364 if (GET_CODE (x) == CONST_DOUBLE)
10365 return false;
10366
10367 if (GET_CODE (x) == UNSPEC
10368 && (XINT (x, 1) == UNSPEC_PIC
10369 || XINT (x, 1) == UNSPEC_GOT
10370 || XINT (x, 1) == UNSPEC_GOTOFF
10371 || XINT (x, 1) == UNSPEC_GOTPLT
10372 || XINT (x, 1) == UNSPEC_GOTTPOFF
10373 || XINT (x, 1) == UNSPEC_DTPOFF
10374 || XINT (x, 1) == UNSPEC_TPOFF
10375 || XINT (x, 1) == UNSPEC_PLT
10376 || XINT (x, 1) == UNSPEC_SYMOFF
10377 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10378 return false;
10379
10380 fmt = GET_RTX_FORMAT (GET_CODE (x));
10381 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10382 {
10383 if (fmt[i] == 'E')
10384 {
10385 int j;
10386 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10387 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10388 return true;
10389 }
10390 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10391 return true;
10392 }
10393
10394 return false;
10395 }
10396
10397 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10398 @GOTOFF in `reg'. */
10399 rtx
10400 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10401 rtx reg)
10402 {
10403 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10404 return orig;
10405
10406 if (GET_CODE (orig) == LABEL_REF
10407 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10408 {
10409 if (reg == NULL_RTX)
10410 reg = gen_reg_rtx (Pmode);
10411
10412 emit_insn (gen_symGOTOFF2reg (reg, orig));
10413 return reg;
10414 }
10415 else if (GET_CODE (orig) == SYMBOL_REF)
10416 {
10417 if (reg == NULL_RTX)
10418 reg = gen_reg_rtx (Pmode);
10419
10420 emit_insn (gen_symGOT2reg (reg, orig));
10421 return reg;
10422 }
10423 return orig;
10424 }
10425
10426 /* Given a (logical) mode size and an offset in bytes, try to find a the
10427 appropriate displacement value for a mov insn. On SH the displacements
10428 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10429 15 bytes in QImode. To compensate this we create a new base address by
10430 adding an adjustment value to it.
10431
10432 If the originally requested offset is greater than 127 we prefer using
10433 values 124..127 over 128..131 to increase opportunities to use the
10434 add #imm, Rn insn.
10435
10436 In some cases it is possible that a requested offset might seem unaligned
10437 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10438 This is compensated by adjusting the base address so that the effective
10439 address of the displacement move insn will be aligned.
10440
10441 This is not the best possible way of rebasing the base address, as it
10442 does not look at other present displacement addressings around it.
10443 In some cases this can create more base address adjustments than would
10444 actually be necessary. */
10445 struct disp_adjust
10446 {
10447 rtx offset_adjust;
10448 rtx mov_disp;
10449 };
10450
10451 static struct disp_adjust
10452 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10453 {
10454 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10455
10456 /* Do not try to use SH2A's large displacements here, because this would
10457 effectively disable the small displacement insns. */
10458 const int mode_sz = GET_MODE_SIZE (mode);
10459 const int mov_insn_sz = mov_insn_size (mode, false);
10460 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10461 const int max_disp_next = max_disp + mov_insn_sz;
10462 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10463 HOST_WIDE_INT offset_adjust;
10464
10465 /* In some cases this actually does happen and we must check for it. */
10466 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10467 return res;
10468
10469 /* Keeps the previous behavior for QImode displacement addressing.
10470 This just decides how the offset is re-based. Removing this special
10471 case will result in slightly bigger code on average, but it's not that
10472 bad actually. */
10473 if (mov_insn_sz == 1)
10474 align_modifier = 0;
10475
10476 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10477
10478 if (mode_sz + offset - offset_adjust <= max_disp_next)
10479 {
10480 res.offset_adjust = GEN_INT (offset_adjust);
10481 res.mov_disp = GEN_INT (offset - offset_adjust);
10482 }
10483
10484 return res;
10485 }
10486
10487 /* Try to modify an illegitimate address and make it legitimate.
10488 If we find one, return the new, valid address.
10489 Otherwise, return the original address. */
10490 static rtx
10491 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10492 {
10493 if (flag_pic)
10494 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10495
10496 if (TARGET_SHMEDIA)
10497 return x;
10498
10499 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10500 || (TARGET_SH2E && mode == SFmode))
10501 return x;
10502
10503 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10504 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10505 {
10506 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10507 INTVAL (XEXP (x, 1)));
10508
10509 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10510 {
10511 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10512 adj.offset_adjust, NULL_RTX, 0,
10513 OPTAB_LIB_WIDEN);
10514 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10515 }
10516 }
10517
10518 return x;
10519 }
10520
10521 /* Attempt to replace *p, which is an address that needs reloading, with
10522 a valid memory address for an operand of mode MODE.
10523 Like for sh_legitimize_address, for the SH we try to get a normal form
10524 of the address. That will allow inheritance of the address reloads. */
10525 bool
10526 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10527 int itype)
10528 {
10529 enum reload_type type = (enum reload_type) itype;
10530 const int mode_sz = GET_MODE_SIZE (mode);
10531
10532 if (! ALLOW_INDEXED_ADDRESS
10533 && GET_CODE (*p) == PLUS
10534 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10535 {
10536 *p = copy_rtx (*p);
10537 push_reload (*p, NULL_RTX, p, NULL,
10538 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10539 return true;
10540 }
10541
10542 if (! ALLOW_INDEXED_ADDRESS
10543 && GET_CODE (*p) == PLUS
10544 && GET_CODE (XEXP (*p, 0)) == PLUS)
10545 {
10546 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10547 XEXP (XEXP (*p, 0), 1));
10548 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10549 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10550 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10551 return true;
10552 }
10553
10554 if (TARGET_SHMEDIA)
10555 return false;
10556
10557 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10558 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10559 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10560 && (ALLOW_INDEXED_ADDRESS
10561 || XEXP (*p, 0) == stack_pointer_rtx
10562 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10563 {
10564 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10565 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10566
10567 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10568 {
10569 push_reload (*p, NULL_RTX, p, NULL,
10570 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10571 return true;
10572 }
10573
10574 if (TARGET_SH2E && mode == SFmode)
10575 {
10576 *p = copy_rtx (*p);
10577 push_reload (*p, NULL_RTX, p, NULL,
10578 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10579 return true;
10580 }
10581
10582 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10583 moves because then reload has a problem figuring the constraint
10584 that the move insn target/source reg must be R0.
10585 Or maybe some handling is wrong in sh_secondary_reload for this
10586 to work properly? */
10587 if ((mode_sz == 4 || mode_sz == 8)
10588 && ! (TARGET_SH4 && mode == DFmode)
10589 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10590 {
10591 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10592 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10593 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10594 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10595 return true;
10596 }
10597 }
10598
10599 /* We must re-recognize what we created before. */
10600 if (GET_CODE (*p) == PLUS
10601 && (mode_sz == 4 || mode_sz == 8)
10602 && GET_CODE (XEXP (*p, 0)) == PLUS
10603 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10604 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10605 && CONST_INT_P (XEXP (*p, 1))
10606 && ! (TARGET_SH2E && mode == SFmode))
10607 {
10608 /* Because this address is so complex, we know it must have
10609 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10610 it is already unshared, and needs no further unsharing. */
10611 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10612 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10613 return true;
10614 }
10615
10616 return false;
10617 }
10618
10619 /* In the name of slightly smaller debug output, and to cater to
10620 general assembler lossage, recognize various UNSPEC sequences
10621 and turn them back into a direct symbol reference. */
10622 static rtx
10623 sh_delegitimize_address (rtx orig_x)
10624 {
10625 rtx x, y;
10626
10627 orig_x = delegitimize_mem_from_attrs (orig_x);
10628
10629 x = orig_x;
10630 if (MEM_P (x))
10631 x = XEXP (x, 0);
10632 if (GET_CODE (x) == CONST)
10633 {
10634 y = XEXP (x, 0);
10635 if (GET_CODE (y) == UNSPEC)
10636 {
10637 if (XINT (y, 1) == UNSPEC_GOT
10638 || XINT (y, 1) == UNSPEC_GOTOFF
10639 || XINT (y, 1) == UNSPEC_SYMOFF)
10640 return XVECEXP (y, 0, 0);
10641 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10642 {
10643 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10644 {
10645 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10646
10647 if (GET_CODE (symplt) == UNSPEC
10648 && XINT (symplt, 1) == UNSPEC_PLT)
10649 return XVECEXP (symplt, 0, 0);
10650 }
10651 }
10652 else if (TARGET_SHMEDIA
10653 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10654 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10655 {
10656 rtx offset = XVECEXP (y, 0, 1);
10657
10658 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10659 if (MEM_P (orig_x))
10660 x = replace_equiv_address_nv (orig_x, x);
10661 return x;
10662 }
10663 }
10664 }
10665
10666 return orig_x;
10667 }
10668
10669 /* Mark the use of a constant in the literal table. If the constant
10670 has multiple labels, make it unique. */
10671 static rtx
10672 mark_constant_pool_use (rtx x)
10673 {
10674 rtx_insn *insn, *lab;
10675 rtx pattern;
10676
10677 if (x == NULL_RTX)
10678 return x;
10679
10680 switch (GET_CODE (x))
10681 {
10682 case LABEL_REF:
10683 x = XEXP (x, 0);
10684 case CODE_LABEL:
10685 break;
10686 default:
10687 return x;
10688 }
10689
10690 /* Get the first label in the list of labels for the same constant
10691 and delete another labels in the list. */
10692 lab = as_a <rtx_insn *> (x);
10693 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10694 {
10695 if (!LABEL_P (insn)
10696 || LABEL_REFS (insn) != NEXT_INSN (insn))
10697 break;
10698 lab = insn;
10699 }
10700
10701 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10702 INSN_DELETED_P (insn) = 1;
10703
10704 /* Mark constants in a window. */
10705 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10706 {
10707 if (!NONJUMP_INSN_P (insn))
10708 continue;
10709
10710 pattern = PATTERN (insn);
10711 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10712 continue;
10713
10714 switch (XINT (pattern, 1))
10715 {
10716 case UNSPECV_CONST2:
10717 case UNSPECV_CONST4:
10718 case UNSPECV_CONST8:
10719 XVECEXP (pattern, 0, 1) = const1_rtx;
10720 break;
10721 case UNSPECV_WINDOW_END:
10722 if (XVECEXP (pattern, 0, 0) == x)
10723 return lab;
10724 break;
10725 case UNSPECV_CONST_END:
10726 return lab;
10727 default:
10728 break;
10729 }
10730 }
10731
10732 return lab;
10733 }
10734 \f
10735 /* Return true if it's possible to redirect BRANCH1 to the destination
10736 of an unconditional jump BRANCH2. We only want to do this if the
10737 resulting branch will have a short displacement. */
10738 bool
10739 sh_can_redirect_branch (rtx_insn *branch1, rtx_insn *branch2)
10740 {
10741 if (flag_expensive_optimizations && simplejump_p (branch2))
10742 {
10743 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10744 rtx_insn *insn;
10745 int distance;
10746
10747 for (distance = 0, insn = NEXT_INSN (branch1);
10748 insn && distance < 256;
10749 insn = PREV_INSN (insn))
10750 {
10751 if (insn == dest)
10752 return true;
10753 else
10754 distance += get_attr_length (insn);
10755 }
10756 for (distance = 0, insn = NEXT_INSN (branch1);
10757 insn && distance < 256;
10758 insn = NEXT_INSN (insn))
10759 {
10760 if (insn == dest)
10761 return true;
10762 else
10763 distance += get_attr_length (insn);
10764 }
10765 }
10766 return false;
10767 }
10768
10769 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10770 bool
10771 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10772 unsigned int new_reg)
10773 {
10774 /* Interrupt functions can only use registers that have already been
10775 saved by the prologue, even if they would normally be
10776 call-clobbered. */
10777 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10778 return false;
10779
10780 return true;
10781 }
10782
10783 /* Function to update the integer COST
10784 based on the relationship between INSN that is dependent on
10785 DEP_INSN through the dependence LINK. The default is to make no
10786 adjustment to COST. This can be used for example to specify to
10787 the scheduler that an output- or anti-dependence does not incur
10788 the same cost as a data-dependence. The return value should be
10789 the new value for COST. */
10790 static int
10791 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10792 rtx_insn *dep_insn, int cost)
10793 {
10794 rtx reg, use_pat;
10795
10796 if (TARGET_SHMEDIA)
10797 {
10798 /* On SHmedia, if the dependence is an anti-dependence or
10799 output-dependence, there is no cost. */
10800 if (REG_NOTE_KIND (link) != 0)
10801 {
10802 /* However, dependencies between target register loads and
10803 uses of the register in a subsequent block that are separated
10804 by a conditional branch are not modelled - we have to do with
10805 the anti-dependency between the target register load and the
10806 conditional branch that ends the current block. */
10807 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10808 && GET_CODE (PATTERN (dep_insn)) == SET
10809 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10810 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10811 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10812 {
10813 int orig_cost = cost;
10814 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10815 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10816 ? insn : JUMP_LABEL (insn));
10817 /* On the likely path, the branch costs 1, on the unlikely path,
10818 it costs 3. */
10819 cost--;
10820 do
10821 target = next_active_insn (target);
10822 while (target && ! flow_dependent_p (target, dep_insn)
10823 && --cost > 0);
10824 /* If two branches are executed in immediate succession, with the
10825 first branch properly predicted, this causes a stall at the
10826 second branch, hence we won't need the target for the
10827 second branch for two cycles after the launch of the first
10828 branch. */
10829 if (cost > orig_cost - 2)
10830 cost = orig_cost - 2;
10831 }
10832 else
10833 cost = 0;
10834 }
10835
10836 else if (get_attr_is_mac_media (insn)
10837 && get_attr_is_mac_media (dep_insn))
10838 cost = 1;
10839
10840 else if (! reload_completed
10841 && GET_CODE (PATTERN (insn)) == SET
10842 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10843 && GET_CODE (PATTERN (dep_insn)) == SET
10844 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10845 && cost < 4)
10846 cost = 4;
10847 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10848 that is needed at the target. */
10849 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10850 && ! flow_dependent_p (insn, dep_insn))
10851 cost--;
10852 }
10853 else if (REG_NOTE_KIND (link) == 0)
10854 {
10855 enum attr_type type;
10856 rtx dep_set;
10857
10858 if (recog_memoized (insn) < 0
10859 || recog_memoized (dep_insn) < 0)
10860 return cost;
10861
10862 dep_set = single_set (dep_insn);
10863
10864 /* The latency that we specify in the scheduling description refers
10865 to the actual output, not to an auto-increment register; for that,
10866 the latency is one. */
10867 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10868 {
10869 rtx set = single_set (insn);
10870
10871 if (set
10872 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10873 && (!MEM_P (SET_DEST (set))
10874 || !reg_mentioned_p (SET_DEST (dep_set),
10875 XEXP (SET_DEST (set), 0))))
10876 cost = 1;
10877 }
10878 /* The only input for a call that is timing-critical is the
10879 function's address. */
10880 if (CALL_P (insn))
10881 {
10882 rtx call = get_call_rtx_from (insn);
10883 if (call
10884 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10885 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10886 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10887 cost -= TARGET_SH4_300 ? 3 : 6;
10888 }
10889 /* Likewise, the most timing critical input for an sfuncs call
10890 is the function address. However, sfuncs typically start
10891 using their arguments pretty quickly.
10892 Assume a four cycle delay for SH4 before they are needed.
10893 Cached ST40-300 calls are quicker, so assume only a one
10894 cycle delay there.
10895 ??? Maybe we should encode the delays till input registers
10896 are needed by sfuncs into the sfunc call insn. */
10897 /* All sfunc calls are parallels with at least four components.
10898 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10899 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10900 && XVECLEN (PATTERN (insn), 0) >= 4
10901 && (reg = sfunc_uses_reg (insn)))
10902 {
10903 if (! reg_set_p (reg, dep_insn))
10904 cost -= TARGET_SH4_300 ? 1 : 4;
10905 }
10906 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10907 {
10908 enum attr_type dep_type = get_attr_type (dep_insn);
10909
10910 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10911 cost--;
10912 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10913 && (type = get_attr_type (insn)) != TYPE_CALL
10914 && type != TYPE_SFUNC)
10915 cost--;
10916 /* When the preceding instruction loads the shift amount of
10917 the following SHAD/SHLD, the latency of the load is increased
10918 by 1 cycle. */
10919 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10920 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10921 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10922 XEXP (SET_SRC (single_set (insn)),
10923 1)))
10924 cost++;
10925 /* When an LS group instruction with a latency of less than
10926 3 cycles is followed by a double-precision floating-point
10927 instruction, FIPR, or FTRV, the latency of the first
10928 instruction is increased to 3 cycles. */
10929 else if (cost < 3
10930 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10931 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10932 cost = 3;
10933 /* The lsw register of a double-precision computation is ready one
10934 cycle earlier. */
10935 else if (reload_completed
10936 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10937 && (use_pat = single_set (insn))
10938 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10939 SET_SRC (use_pat)))
10940 cost -= 1;
10941
10942 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10943 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10944 cost -= 1;
10945 }
10946 else if (TARGET_SH4_300)
10947 {
10948 /* Stores need their input register two cycles later. */
10949 if (dep_set && cost >= 1
10950 && ((type = get_attr_type (insn)) == TYPE_STORE
10951 || type == TYPE_PSTORE
10952 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10953 {
10954 rtx set = single_set (insn);
10955
10956 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10957 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10958 {
10959 cost -= 2;
10960 /* But don't reduce the cost below 1 if the address depends
10961 on a side effect of dep_insn. */
10962 if (cost < 1
10963 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10964 cost = 1;
10965 }
10966 }
10967 }
10968 }
10969 /* An anti-dependence penalty of two applies if the first insn is a double
10970 precision fadd / fsub / fmul. */
10971 else if (!TARGET_SH4_300
10972 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10973 && recog_memoized (dep_insn) >= 0
10974 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10975 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10976 /* A lot of alleged anti-flow dependences are fake,
10977 so check this one is real. */
10978 && flow_dependent_p (dep_insn, insn))
10979 cost = 2;
10980
10981 return cost;
10982 }
10983
10984 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10985 if DEP_INSN is anti-flow dependent on INSN. */
10986 static bool
10987 flow_dependent_p (rtx insn, rtx dep_insn)
10988 {
10989 rtx tmp = PATTERN (insn);
10990
10991 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10992 return tmp == NULL_RTX;
10993 }
10994
10995 /* A helper function for flow_dependent_p called through note_stores. */
10996 static void
10997 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10998 {
10999 rtx * pinsn = (rtx *) data;
11000
11001 if (*pinsn && reg_referenced_p (x, *pinsn))
11002 *pinsn = NULL_RTX;
11003 }
11004
11005 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11006 'special function' patterns (type sfunc) that clobber pr, but that
11007 do not look like function calls to leaf_function_p. Hence we must
11008 do this extra check. */
11009 static int
11010 sh_pr_n_sets (void)
11011 {
11012 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11013 }
11014
11015 /* Return where to allocate pseudo for a given hard register initial
11016 value. */
11017 static rtx
11018 sh_allocate_initial_value (rtx hard_reg)
11019 {
11020 rtx x;
11021
11022 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11023 {
11024 if (crtl->is_leaf
11025 && ! sh_pr_n_sets ()
11026 && ! (TARGET_SHCOMPACT
11027 && ((crtl->args.info.call_cookie
11028 & ~ CALL_COOKIE_RET_TRAMP (1))
11029 || crtl->saves_all_registers)))
11030 x = hard_reg;
11031 else
11032 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11033 }
11034 else
11035 x = NULL_RTX;
11036
11037 return x;
11038 }
11039
11040 /* This function returns "2" to indicate dual issue for the SH4
11041 processor. To be used by the DFA pipeline description. */
11042 static int
11043 sh_issue_rate (void)
11044 {
11045 if (TARGET_SUPERSCALAR)
11046 return 2;
11047 else
11048 return 1;
11049 }
11050
11051 /* Functions for ready queue reordering for sched1. */
11052
11053 /* Get weight for mode for a set x. */
11054 static short
11055 find_set_regmode_weight (rtx x, enum machine_mode mode)
11056 {
11057 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11058 return 1;
11059 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11060 {
11061 if (REG_P (SET_DEST (x)))
11062 {
11063 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11064 return 1;
11065 else
11066 return 0;
11067 }
11068 return 1;
11069 }
11070 return 0;
11071 }
11072
11073 /* Get regmode weight for insn. */
11074 static short
11075 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11076 {
11077 short reg_weight = 0;
11078 rtx x;
11079
11080 /* Increment weight for each register born here. */
11081 x = PATTERN (insn);
11082 reg_weight += find_set_regmode_weight (x, mode);
11083 if (GET_CODE (x) == PARALLEL)
11084 {
11085 int j;
11086 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11087 {
11088 x = XVECEXP (PATTERN (insn), 0, j);
11089 reg_weight += find_set_regmode_weight (x, mode);
11090 }
11091 }
11092 /* Decrement weight for each register that dies here. */
11093 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11094 {
11095 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11096 {
11097 rtx note = XEXP (x, 0);
11098 if (REG_P (note) && GET_MODE (note) == mode)
11099 reg_weight--;
11100 }
11101 }
11102 return reg_weight;
11103 }
11104
11105 /* Calculate regmode weights for all insns of a basic block. */
11106 static void
11107 find_regmode_weight (basic_block b, enum machine_mode mode)
11108 {
11109 rtx_insn *insn, *next_tail, *head, *tail;
11110
11111 get_ebb_head_tail (b, b, &head, &tail);
11112 next_tail = NEXT_INSN (tail);
11113
11114 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11115 {
11116 /* Handle register life information. */
11117 if (!INSN_P (insn))
11118 continue;
11119
11120 if (mode == SFmode)
11121 INSN_REGMODE_WEIGHT (insn, mode) =
11122 find_insn_regmode_weight (insn, mode)
11123 + 2 * find_insn_regmode_weight (insn, DFmode);
11124 else if (mode == SImode)
11125 INSN_REGMODE_WEIGHT (insn, mode) =
11126 find_insn_regmode_weight (insn, mode)
11127 + 2 * find_insn_regmode_weight (insn, DImode);
11128 }
11129 }
11130
11131 /* Comparison function for ready queue sorting. */
11132 static int
11133 rank_for_reorder (const void *x, const void *y)
11134 {
11135 rtx_insn *tmp = *(rtx_insn * const *) y;
11136 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11137
11138 /* The insn in a schedule group should be issued the first. */
11139 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11140 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11141
11142 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11143 minimizes instruction movement, thus minimizing sched's effect on
11144 register pressure. */
11145 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11146 }
11147
11148 /* Resort the array A in which only element at index N may be out of order. */
11149 static void
11150 swap_reorder (rtx_insn **a, int n)
11151 {
11152 rtx_insn *insn = a[n - 1];
11153 int i = n - 2;
11154
11155 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11156 {
11157 a[i + 1] = a[i];
11158 i -= 1;
11159 }
11160 a[i + 1] = insn;
11161 }
11162
11163 /* Sort the ready list by ascending priority. */
11164 static void
11165 ready_reorder (rtx_insn **ready, int nready)
11166 {
11167 if (nready == 2)
11168 swap_reorder (ready, nready);
11169 else if (nready > 2)
11170 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11171 }
11172
11173 /* Count life regions of r0 for a block. */
11174 static int
11175 find_r0_life_regions (basic_block b)
11176 {
11177 rtx_insn *end, *insn;
11178 rtx pset;
11179 rtx r0_reg;
11180 int live;
11181 int set;
11182 int death = 0;
11183
11184 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11185 {
11186 set = 1;
11187 live = 1;
11188 }
11189 else
11190 {
11191 set = 0;
11192 live = 0;
11193 }
11194
11195 insn = BB_HEAD (b);
11196 end = BB_END (b);
11197 r0_reg = gen_rtx_REG (SImode, R0_REG);
11198 while (1)
11199 {
11200 if (INSN_P (insn))
11201 {
11202 if (find_regno_note (insn, REG_DEAD, R0_REG))
11203 {
11204 death++;
11205 live = 0;
11206 }
11207 if (!live
11208 && (pset = single_set (insn))
11209 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11210 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11211 {
11212 set++;
11213 live = 1;
11214 }
11215 }
11216 if (insn == end)
11217 break;
11218 insn = NEXT_INSN (insn);
11219 }
11220 return set - death;
11221 }
11222
11223 /* Calculate regmode weights for all insns of all basic block. */
11224 static void
11225 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11226 int verbose ATTRIBUTE_UNUSED,
11227 int old_max_uid)
11228 {
11229 basic_block b;
11230
11231 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11232 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11233 r0_life_regions = 0;
11234
11235 FOR_EACH_BB_REVERSE_FN (b, cfun)
11236 {
11237 find_regmode_weight (b, SImode);
11238 find_regmode_weight (b, SFmode);
11239 if (!reload_completed)
11240 r0_life_regions += find_r0_life_regions (b);
11241 }
11242
11243 CURR_REGMODE_PRESSURE (SImode) = 0;
11244 CURR_REGMODE_PRESSURE (SFmode) = 0;
11245 }
11246
11247 /* Cleanup. */
11248 static void
11249 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11250 int verbose ATTRIBUTE_UNUSED)
11251 {
11252 if (regmode_weight[0])
11253 {
11254 free (regmode_weight[0]);
11255 regmode_weight[0] = NULL;
11256 }
11257 if (regmode_weight[1])
11258 {
11259 free (regmode_weight[1]);
11260 regmode_weight[1] = NULL;
11261 }
11262 }
11263
11264 /* The scalar modes supported differs from the default version in TImode
11265 for 32-bit SHMEDIA. */
11266 static bool
11267 sh_scalar_mode_supported_p (enum machine_mode mode)
11268 {
11269 if (TARGET_SHMEDIA32 && mode == TImode)
11270 return false;
11271
11272 return default_scalar_mode_supported_p (mode);
11273 }
11274
11275 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11276 keep count of register pressures on SImode and SFmode. */
11277 static int
11278 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11279 int sched_verbose ATTRIBUTE_UNUSED,
11280 rtx_insn *insn,
11281 int can_issue_more)
11282 {
11283 if (GET_CODE (PATTERN (insn)) != USE
11284 && GET_CODE (PATTERN (insn)) != CLOBBER)
11285 cached_can_issue_more = can_issue_more - 1;
11286 else
11287 cached_can_issue_more = can_issue_more;
11288
11289 if (reload_completed)
11290 return cached_can_issue_more;
11291
11292 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11293 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11294
11295 return cached_can_issue_more;
11296 }
11297
11298 static void
11299 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11300 int verbose ATTRIBUTE_UNUSED,
11301 int veclen ATTRIBUTE_UNUSED)
11302 {
11303 CURR_REGMODE_PRESSURE (SImode) = 0;
11304 CURR_REGMODE_PRESSURE (SFmode) = 0;
11305 }
11306
11307 /* Some magic numbers. */
11308 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11309 functions that already have high pressure on r0. */
11310 #define R0_MAX_LIFE_REGIONS 2
11311 /* Register Pressure thresholds for SImode and SFmode registers. */
11312 #define SIMODE_MAX_WEIGHT 5
11313 #define SFMODE_MAX_WEIGHT 10
11314
11315 /* Return true if the pressure is high for MODE. */
11316 static bool
11317 high_pressure (enum machine_mode mode)
11318 {
11319 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11320 functions that already have high pressure on r0. */
11321 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11322 return true;
11323
11324 if (mode == SFmode)
11325 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11326 else
11327 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11328 }
11329
11330 /* Reorder ready queue if register pressure is high. */
11331 static int
11332 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11333 int sched_verbose ATTRIBUTE_UNUSED,
11334 rtx_insn **ready,
11335 int *n_readyp,
11336 int clock_var ATTRIBUTE_UNUSED)
11337 {
11338 if (reload_completed)
11339 return sh_issue_rate ();
11340
11341 if (high_pressure (SFmode) || high_pressure (SImode))
11342 {
11343 ready_reorder (ready, *n_readyp);
11344 }
11345
11346 return sh_issue_rate ();
11347 }
11348
11349 /* Skip cycles if the current register pressure is high. */
11350 static int
11351 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11352 int sched_verbose ATTRIBUTE_UNUSED,
11353 rtx_insn **ready ATTRIBUTE_UNUSED,
11354 int *n_readyp ATTRIBUTE_UNUSED,
11355 int clock_var ATTRIBUTE_UNUSED)
11356 {
11357 if (reload_completed)
11358 return cached_can_issue_more;
11359
11360 if (high_pressure(SFmode) || high_pressure (SImode))
11361 skip_cycles = 1;
11362
11363 return cached_can_issue_more;
11364 }
11365
11366 /* Skip cycles without sorting the ready queue. This will move insn from
11367 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11368 queue by sh_reorder. */
11369
11370 /* Generally, skipping these many cycles are sufficient for all insns to move
11371 from Q -> R. */
11372 #define MAX_SKIPS 8
11373
11374 static int
11375 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11376 int sched_verbose ATTRIBUTE_UNUSED,
11377 rtx_insn *insn ATTRIBUTE_UNUSED,
11378 int last_clock_var,
11379 int clock_var,
11380 int *sort_p)
11381 {
11382 if (reload_completed)
11383 return 0;
11384
11385 if (skip_cycles)
11386 {
11387 if ((clock_var - last_clock_var) < MAX_SKIPS)
11388 {
11389 *sort_p = 0;
11390 return 1;
11391 }
11392 /* If this is the last cycle we are skipping, allow reordering of R. */
11393 if ((clock_var - last_clock_var) == MAX_SKIPS)
11394 {
11395 *sort_p = 1;
11396 return 1;
11397 }
11398 }
11399
11400 skip_cycles = 0;
11401
11402 return 0;
11403 }
11404
11405 /* SHmedia requires registers for branches, so we can't generate new
11406 branches past reload. */
11407 static bool
11408 sh_cannot_modify_jumps_p (void)
11409 {
11410 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11411 }
11412
11413 static reg_class_t
11414 sh_target_reg_class (void)
11415 {
11416 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11417 }
11418
11419 static bool
11420 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11421 {
11422 if (! shmedia_space_reserved_for_target_registers)
11423 return 0;
11424 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11425 return 0;
11426
11427 HARD_REG_SET dummy;
11428 if (calc_live_regs (&dummy) >= 6 * 8)
11429 return 1;
11430 return 0;
11431 }
11432
11433 static bool
11434 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11435 {
11436 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11437 }
11438 \f
11439 /*
11440 On the SH1..SH4, the trampoline looks like
11441 2 0002 D202 mov.l l2,r2
11442 1 0000 D301 mov.l l1,r3
11443 3 0004 422B jmp @r2
11444 4 0006 0009 nop
11445 5 0008 00000000 l1: .long area
11446 6 000c 00000000 l2: .long function
11447
11448 SH5 (compact) uses r1 instead of r3 for the static chain. */
11449
11450
11451 /* Emit RTL insns to initialize the variable parts of a trampoline.
11452 FNADDR is an RTX for the address of the function's pure code.
11453 CXT is an RTX for the static chain value for the function. */
11454 static void
11455 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11456 {
11457 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11458 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11459
11460 if (TARGET_SHMEDIA64)
11461 {
11462 rtx tramp_templ;
11463 int fixed_len;
11464
11465 rtx movi1 = GEN_INT (0xcc000010);
11466 rtx shori1 = GEN_INT (0xc8000010);
11467 rtx src, dst;
11468
11469 /* The following trampoline works within a +- 128 KB range for cxt:
11470 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11471 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11472 gettr tr1,r1; blink tr0,r63 */
11473 /* Address rounding makes it hard to compute the exact bounds of the
11474 offset for this trampoline, but we have a rather generous offset
11475 range, so frame_offset should do fine as an upper bound. */
11476 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11477 {
11478 /* ??? could optimize this trampoline initialization
11479 by writing DImode words with two insns each. */
11480 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11481 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11482 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11483 insn = gen_rtx_AND (DImode, insn, mask);
11484 /* Or in ptb/u .,tr1 pattern */
11485 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11486 insn = force_operand (insn, NULL_RTX);
11487 insn = gen_lowpart (SImode, insn);
11488 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11489 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11490 insn = gen_rtx_AND (DImode, insn, mask);
11491 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11492 insn = gen_lowpart (SImode, insn);
11493 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11494 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11495 insn = gen_rtx_AND (DImode, insn, mask);
11496 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11497 insn = gen_lowpart (SImode, insn);
11498 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11499 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11500 insn = gen_rtx_AND (DImode, insn, mask);
11501 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11502 insn = gen_lowpart (SImode, insn);
11503 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11504 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11505 insn = gen_rtx_AND (DImode, insn, mask);
11506 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11507 insn = gen_lowpart (SImode, insn);
11508 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11509 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11510 GEN_INT (0x6bf10600));
11511 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11512 GEN_INT (0x4415fc10));
11513 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11514 GEN_INT (0x4401fff0));
11515 emit_insn (gen_ic_invalidate_line (tramp));
11516 return;
11517 }
11518 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11519 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11520
11521 tramp_templ = gen_datalabel_ref (tramp_templ);
11522 dst = tramp_mem;
11523 src = gen_const_mem (BLKmode, tramp_templ);
11524 set_mem_align (dst, 256);
11525 set_mem_align (src, 64);
11526 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11527
11528 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11529 emit_move_insn (adjust_address (tramp_mem, Pmode,
11530 fixed_len + GET_MODE_SIZE (Pmode)),
11531 cxt);
11532 emit_insn (gen_ic_invalidate_line (tramp));
11533 return;
11534 }
11535 else if (TARGET_SHMEDIA)
11536 {
11537 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11538 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11539 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11540 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11541 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11542 rotated 10 right, and higher 16 bit of every 32 selected. */
11543 rtx movishori
11544 = force_reg (V2HImode, (simplify_gen_subreg
11545 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11546 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11547 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11548
11549 fnaddr = force_reg (SImode, fnaddr);
11550 cxt = force_reg (SImode, cxt);
11551 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11552 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11553 movishori));
11554 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11555 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11556 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11557 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11558 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11559 gen_rtx_SUBREG (V2HImode, cxt, 0),
11560 movishori));
11561 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11562 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11563 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11564 if (TARGET_LITTLE_ENDIAN)
11565 {
11566 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11567 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11568 }
11569 else
11570 {
11571 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11572 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11573 }
11574 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11575 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11576 emit_insn (gen_ic_invalidate_line (tramp));
11577 return;
11578 }
11579 else if (TARGET_SHCOMPACT)
11580 {
11581 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11582 return;
11583 }
11584 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11585 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11586 SImode));
11587 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11588 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11589 SImode));
11590 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11591 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11592 if (TARGET_HARD_SH4 || TARGET_SH5)
11593 {
11594 if (!TARGET_INLINE_IC_INVALIDATE
11595 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11596 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11597 FUNCTION_ORDINARY),
11598 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11599 else
11600 emit_insn (gen_ic_invalidate_line (tramp));
11601 }
11602 }
11603
11604 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11605 static rtx
11606 sh_trampoline_adjust_address (rtx tramp)
11607 {
11608 if (TARGET_SHMEDIA)
11609 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11610 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11611 return tramp;
11612 }
11613
11614 /* FIXME: This is overly conservative. A SHcompact function that
11615 receives arguments ``by reference'' will have them stored in its
11616 own stack frame, so it must not pass pointers or references to
11617 these arguments to other functions by means of sibling calls. */
11618 /* If PIC, we cannot make sibling calls to global functions
11619 because the PLT requires r12 to be live. */
11620 static bool
11621 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11622 {
11623 return (1
11624 && (! TARGET_SHCOMPACT
11625 || crtl->args.info.stack_regs == 0)
11626 && ! sh_cfun_interrupt_handler_p ()
11627 && (! flag_pic
11628 || (decl && ! TREE_PUBLIC (decl))
11629 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11630 }
11631 \f
11632 /* Machine specific built-in functions. */
11633
11634 struct builtin_description
11635 {
11636 bool (* const is_enabled) (void);
11637 const enum insn_code icode;
11638 const char *const name;
11639 int signature;
11640 tree fndecl;
11641 };
11642
11643 static bool
11644 shmedia_builtin_p (void)
11645 {
11646 return TARGET_SHMEDIA;
11647 }
11648
11649 /* This function can be used if there are any built-ins that are not for
11650 SHmedia. It's commented out to avoid the defined-but-unused warning.
11651 static bool
11652 sh1_builtin_p (void)
11653 {
11654 return TARGET_SH1;
11655 }
11656 */
11657
11658 /* describe number and signedness of arguments; arg[0] == result
11659 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11660 /* 9: 64-bit pointer, 10: 32-bit pointer */
11661 static const char signature_args[][4] =
11662 {
11663 #define SH_BLTIN_V2SI2 0
11664 { 4, 4 },
11665 #define SH_BLTIN_V4HI2 1
11666 { 4, 4 },
11667 #define SH_BLTIN_V2SI3 2
11668 { 4, 4, 4 },
11669 #define SH_BLTIN_V4HI3 3
11670 { 4, 4, 4 },
11671 #define SH_BLTIN_V8QI3 4
11672 { 4, 4, 4 },
11673 #define SH_BLTIN_MAC_HISI 5
11674 { 1, 4, 4, 1 },
11675 #define SH_BLTIN_SH_HI 6
11676 { 4, 4, 1 },
11677 #define SH_BLTIN_SH_SI 7
11678 { 4, 4, 1 },
11679 #define SH_BLTIN_V4HI2V2SI 8
11680 { 4, 4, 4 },
11681 #define SH_BLTIN_V4HI2V8QI 9
11682 { 4, 4, 4 },
11683 #define SH_BLTIN_SISF 10
11684 { 4, 2 },
11685 #define SH_BLTIN_LDUA_L 11
11686 { 2, 10 },
11687 #define SH_BLTIN_LDUA_Q 12
11688 { 1, 10 },
11689 #define SH_BLTIN_STUA_L 13
11690 { 0, 10, 2 },
11691 #define SH_BLTIN_STUA_Q 14
11692 { 0, 10, 1 },
11693 #define SH_BLTIN_LDUA_L64 15
11694 { 2, 9 },
11695 #define SH_BLTIN_LDUA_Q64 16
11696 { 1, 9 },
11697 #define SH_BLTIN_STUA_L64 17
11698 { 0, 9, 2 },
11699 #define SH_BLTIN_STUA_Q64 18
11700 { 0, 9, 1 },
11701 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11702 #define SH_BLTIN_2 19
11703 #define SH_BLTIN_SU 19
11704 { 1, 2 },
11705 #define SH_BLTIN_3 20
11706 #define SH_BLTIN_SUS 20
11707 { 2, 2, 1 },
11708 #define SH_BLTIN_PSSV 21
11709 { 0, 8, 2, 2 },
11710 #define SH_BLTIN_XXUU 22
11711 #define SH_BLTIN_UUUU 22
11712 { 1, 1, 1, 1 },
11713 #define SH_BLTIN_PV 23
11714 { 0, 8 },
11715 #define SH_BLTIN_VP 24
11716 { 8, 0 },
11717 };
11718 /* mcmv: operands considered unsigned. */
11719 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11720 /* mperm: control value considered unsigned int. */
11721 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11722 /* mshards_q: returns signed short. */
11723 /* nsb: takes long long arg, returns unsigned char. */
11724 static struct builtin_description bdesc[] =
11725 {
11726 { shmedia_builtin_p,
11727 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11728 { shmedia_builtin_p,
11729 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11730 { shmedia_builtin_p,
11731 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11732 { shmedia_builtin_p,
11733 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11734 { shmedia_builtin_p,
11735 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11736 { shmedia_builtin_p,
11737 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11738 { shmedia_builtin_p,
11739 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11740 { shmedia_builtin_p,
11741 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11742 { shmedia_builtin_p,
11743 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11744 { shmedia_builtin_p,
11745 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11746 { shmedia_builtin_p,
11747 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11748 { shmedia_builtin_p,
11749 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11750 { shmedia_builtin_p,
11751 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11752 { shmedia_builtin_p,
11753 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11754 { shmedia_builtin_p,
11755 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11756 { shmedia_builtin_p,
11757 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11758 { shmedia_builtin_p,
11759 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11760 { shmedia_builtin_p,
11761 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11762 { shmedia_builtin_p,
11763 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11764 { shmedia_builtin_p,
11765 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11766 { shmedia_builtin_p,
11767 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11768 { shmedia_builtin_p,
11769 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11770 { shmedia_builtin_p,
11771 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11772 { shmedia_builtin_p,
11773 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11774 { shmedia_builtin_p,
11775 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11776 { shmedia_builtin_p,
11777 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11778 { shmedia_builtin_p,
11779 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11780 { shmedia_builtin_p,
11781 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11782 { shmedia_builtin_p,
11783 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11784 { shmedia_builtin_p,
11785 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11786 { shmedia_builtin_p,
11787 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11788 { shmedia_builtin_p,
11789 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11790 { shmedia_builtin_p,
11791 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11792 { shmedia_builtin_p,
11793 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11794 { shmedia_builtin_p,
11795 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11796 { shmedia_builtin_p,
11797 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11798 { shmedia_builtin_p,
11799 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11800 { shmedia_builtin_p,
11801 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11802 { shmedia_builtin_p,
11803 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11804 { shmedia_builtin_p,
11805 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11806 { shmedia_builtin_p,
11807 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11808 { shmedia_builtin_p,
11809 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11810 { shmedia_builtin_p,
11811 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11812 { shmedia_builtin_p,
11813 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11814 { shmedia_builtin_p,
11815 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11816 { shmedia_builtin_p,
11817 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11818 { shmedia_builtin_p,
11819 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11820 { shmedia_builtin_p,
11821 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11822 { shmedia_builtin_p,
11823 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11824 { shmedia_builtin_p,
11825 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11880 { shmedia_builtin_p,
11881 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11882 { shmedia_builtin_p,
11883 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11884 { shmedia_builtin_p,
11885 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11886 { shmedia_builtin_p,
11887 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11888 { shmedia_builtin_p,
11889 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11890 { shmedia_builtin_p,
11891 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11892 };
11893
11894 static void
11895 sh_init_builtins (void)
11896 {
11897 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11898 memset (shared, 0, sizeof shared);
11899
11900 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11901 {
11902 builtin_description* d = &bdesc[di];
11903
11904 if (!d->is_enabled ())
11905 continue;
11906
11907 tree type, arg_type = NULL_TREE;
11908 int signature = d->signature;
11909
11910 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11911 type = shared[signature];
11912 else
11913 {
11914 int has_result = signature_args[signature][0] != 0;
11915 tree args[3];
11916
11917 if ((signature_args[signature][1] & 8)
11918 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11919 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11920 continue;
11921 if (! TARGET_FPU_ANY
11922 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11923 continue;
11924 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11925 args[i] = NULL_TREE;
11926 for (int i = 3; ; i--)
11927 {
11928 int arg = signature_args[signature][i];
11929 int opno = i - 1 + has_result;
11930
11931 if (arg & 8)
11932 arg_type = ptr_type_node;
11933 else if (arg)
11934 arg_type = (*lang_hooks.types.type_for_mode)
11935 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11936 else if (i)
11937 continue;
11938 else
11939 arg_type = void_type_node;
11940 if (i == 0)
11941 break;
11942 args[i-1] = arg_type;
11943 }
11944 type = build_function_type_list (arg_type, args[0], args[1],
11945 args[2], NULL_TREE);
11946 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11947 shared[signature] = type;
11948 }
11949 d->fndecl =
11950 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11951 NULL, NULL_TREE);
11952 }
11953 }
11954
11955 /* Implements target hook vector_mode_supported_p. */
11956 bool
11957 sh_vector_mode_supported_p (enum machine_mode mode)
11958 {
11959 if (TARGET_FPU_ANY
11960 && ((mode == V2SFmode)
11961 || (mode == V4SFmode)
11962 || (mode == V16SFmode)))
11963 return true;
11964
11965 else if (TARGET_SHMEDIA
11966 && ((mode == V8QImode)
11967 || (mode == V2HImode)
11968 || (mode == V4HImode)
11969 || (mode == V2SImode)))
11970 return true;
11971
11972 return false;
11973 }
11974
11975 bool
11976 sh_frame_pointer_required (void)
11977 {
11978 /* If needed override this in other tm.h files to cope with various OS
11979 lossage requiring a frame pointer. */
11980 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11981 return true;
11982
11983 if (crtl->profile)
11984 return true;
11985
11986 return false;
11987 }
11988
11989 /* Implements target hook dwarf_calling_convention. Return an enum
11990 of dwarf_calling_convention. */
11991 int
11992 sh_dwarf_calling_convention (const_tree func)
11993 {
11994 if (sh_attr_renesas_p (func))
11995 return DW_CC_GNU_renesas_sh;
11996
11997 return DW_CC_normal;
11998 }
11999
12000 /* Returns the sh builtin decl for CODE. */
12001 static tree
12002 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12003 {
12004 if (code >= ARRAY_SIZE (bdesc))
12005 return error_mark_node;
12006
12007 if (!bdesc[code].is_enabled ())
12008 return error_mark_node;
12009
12010 return bdesc[code].fndecl;
12011 }
12012
12013 /* Expand an expression EXP that calls a built-in function,
12014 with result going to TARGET if that's convenient
12015 (and in mode MODE if that's convenient).
12016 SUBTARGET may be used as the target for computing one of EXP's operands.
12017 IGNORE is nonzero if the value is to be ignored. */
12018 static rtx
12019 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12020 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12021 {
12022 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12023 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12024 const struct builtin_description *d = &bdesc[fcode];
12025 enum insn_code icode = d->icode;
12026 int signature = d->signature;
12027 int nop = 0;
12028 rtx op[4];
12029
12030 if (signature_args[signature][0])
12031 {
12032 if (ignore)
12033 return NULL_RTX;
12034
12035 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12036 if (! target || GET_MODE (target) != tmode
12037 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12038 target = gen_reg_rtx (tmode);
12039 op[nop++] = target;
12040 }
12041 else
12042 target = NULL_RTX;
12043
12044 for (int i = 1; i <= 3; i++, nop++)
12045 {
12046 tree arg;
12047 enum machine_mode opmode, argmode;
12048 tree optype;
12049
12050 if (! signature_args[signature][i])
12051 break;
12052 arg = CALL_EXPR_ARG (exp, i - 1);
12053 if (arg == error_mark_node)
12054 return const0_rtx;
12055 if (signature_args[signature][i] & 8)
12056 {
12057 opmode = ptr_mode;
12058 optype = ptr_type_node;
12059 }
12060 else
12061 {
12062 opmode = insn_data[icode].operand[nop].mode;
12063 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12064 }
12065 argmode = TYPE_MODE (TREE_TYPE (arg));
12066 if (argmode != opmode)
12067 arg = build1 (NOP_EXPR, optype, arg);
12068 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12069 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12070 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12071 }
12072
12073 rtx pat = NULL_RTX;
12074
12075 switch (nop)
12076 {
12077 case 1:
12078 pat = (*insn_data[d->icode].genfun) (op[0]);
12079 break;
12080 case 2:
12081 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12082 break;
12083 case 3:
12084 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12085 break;
12086 case 4:
12087 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12088 break;
12089 default:
12090 gcc_unreachable ();
12091 }
12092 if (! pat)
12093 return NULL_RTX;
12094 emit_insn (pat);
12095 return target;
12096 }
12097
12098 void
12099 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12100 {
12101 rtx sel0 = const0_rtx;
12102 rtx sel1 = const1_rtx;
12103 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12104 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12105
12106 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12107 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12108 }
12109
12110 void
12111 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12112 {
12113 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12114
12115 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12116 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12117 }
12118
12119 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12120 We can allow any mode in any general register. The special registers
12121 only allow SImode. Don't allow any mode in the PR.
12122
12123 We cannot hold DCmode values in the XD registers because alter_reg
12124 handles subregs of them incorrectly. We could work around this by
12125 spacing the XD registers like the DR registers, but this would require
12126 additional memory in every compilation to hold larger register vectors.
12127 We could hold SFmode / SCmode values in XD registers, but that
12128 would require a tertiary reload when reloading from / to memory,
12129 and a secondary reload to reload from / to general regs; that
12130 seems to be a losing proposition.
12131
12132 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12133 it won't be ferried through GP registers first. */
12134 bool
12135 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12136 {
12137 if (SPECIAL_REGISTER_P (regno))
12138 return mode == SImode;
12139
12140 if (regno == FPUL_REG)
12141 return (mode == SImode || mode == SFmode);
12142
12143 if (FP_REGISTER_P (regno) && mode == SFmode)
12144 return true;
12145
12146 if (mode == V2SFmode)
12147 {
12148 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12149 || GENERAL_REGISTER_P (regno)))
12150 return true;
12151 else
12152 return false;
12153 }
12154
12155 if (mode == V4SFmode)
12156 {
12157 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12158 || GENERAL_REGISTER_P (regno))
12159 return true;
12160 else
12161 return false;
12162 }
12163
12164 if (mode == V16SFmode)
12165 {
12166 if (TARGET_SHMEDIA)
12167 {
12168 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12169 return true;
12170 else
12171 return false;
12172 }
12173 else
12174 return regno == FIRST_XD_REG;
12175 }
12176
12177 if (FP_REGISTER_P (regno))
12178 {
12179 if (mode == SFmode
12180 || mode == SImode
12181 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12182 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12183 || mode == DCmode
12184 || (TARGET_SHMEDIA
12185 && (mode == DFmode || mode == DImode
12186 || mode == V2SFmode || mode == TImode)))
12187 && ((regno - FIRST_FP_REG) & 1) == 0)
12188 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12189 && ((regno - FIRST_FP_REG) & 3) == 0))
12190 return true;
12191 else
12192 return false;
12193 }
12194
12195 if (XD_REGISTER_P (regno))
12196 return mode == DFmode;
12197
12198 if (TARGET_REGISTER_P (regno))
12199 return (mode == DImode || mode == SImode || mode == PDImode);
12200
12201 if (regno == PR_REG)
12202 return mode == SImode;
12203
12204 if (regno == FPSCR_REG)
12205 return mode == PSImode;
12206
12207 /* FIXME. This works around PR target/37633 for -O0. */
12208 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12209 {
12210 unsigned int n = GET_MODE_SIZE (mode) / 8;
12211
12212 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12213 && regno <= FIRST_GENERAL_REG + 14)
12214 return false;
12215 }
12216
12217 return true;
12218 }
12219
12220 /* Return the class of registers for which a mode change from FROM to TO
12221 is invalid. */
12222 bool
12223 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12224 enum reg_class rclass)
12225 {
12226 /* We want to enable the use of SUBREGs as a means to
12227 VEC_SELECT a single element of a vector. */
12228
12229 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12230 This can be problematic when SFmode vector subregs need to be accessed
12231 on the stack with displacement addressing, as it happens with -O0.
12232 Thus we disallow the mode change for -O0. */
12233 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12234 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12235
12236 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12237 {
12238 if (TARGET_LITTLE_ENDIAN)
12239 {
12240 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12241 return reg_classes_intersect_p (DF_REGS, rclass);
12242 }
12243 else
12244 {
12245 if (GET_MODE_SIZE (from) < 8)
12246 return reg_classes_intersect_p (DF_REGS, rclass);
12247 }
12248 }
12249 return false;
12250 }
12251
12252 /* Return true if registers in machine mode MODE will likely be
12253 allocated to registers in small register classes. */
12254 bool
12255 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12256 {
12257 return (! TARGET_SHMEDIA);
12258 }
12259
12260 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12261 that label is used. */
12262 void
12263 sh_mark_label (rtx address, int nuses)
12264 {
12265 if (GOTOFF_P (address))
12266 {
12267 /* Extract the label or symbol. */
12268 address = XEXP (address, 0);
12269 if (GET_CODE (address) == PLUS)
12270 address = XEXP (address, 0);
12271 address = XVECEXP (address, 0, 0);
12272 }
12273 if (GET_CODE (address) == LABEL_REF
12274 && LABEL_P (XEXP (address, 0)))
12275 LABEL_NUSES (XEXP (address, 0)) += nuses;
12276 }
12277
12278 /* Compute extra cost of moving data between one register class
12279 and another.
12280
12281 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12282 uses this information. Hence, the general register <-> floating point
12283 register information here is not used for SFmode. */
12284 static int
12285 sh_register_move_cost (enum machine_mode mode,
12286 reg_class_t srcclass, reg_class_t dstclass)
12287 {
12288 if (dstclass == T_REGS || dstclass == PR_REGS)
12289 return 10;
12290
12291 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12292 return 4;
12293
12294 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12295 && REGCLASS_HAS_FP_REG (srcclass)
12296 && REGCLASS_HAS_FP_REG (dstclass))
12297 return 4;
12298
12299 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12300 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12301
12302 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12303 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12304 return 9;
12305
12306 if ((REGCLASS_HAS_FP_REG (dstclass)
12307 && REGCLASS_HAS_GENERAL_REG (srcclass))
12308 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12309 && REGCLASS_HAS_FP_REG (srcclass)))
12310 {
12311 /* Discourage trying to use fp regs for a pointer. This also
12312 discourages fp regs with SImode because Pmode is an alias
12313 of SImode on this target. See PR target/48596. */
12314 int addend = (mode == Pmode) ? 40 : 0;
12315
12316 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12317 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12318 }
12319
12320 if ((dstclass == FPUL_REGS
12321 && REGCLASS_HAS_GENERAL_REG (srcclass))
12322 || (srcclass == FPUL_REGS
12323 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12324 return 5;
12325
12326 if ((dstclass == FPUL_REGS
12327 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12328 || (srcclass == FPUL_REGS
12329 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12330 return 7;
12331
12332 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12333 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12334 return 20;
12335
12336 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12337 if (TARGET_SHMEDIA
12338 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12339 {
12340 if (sh_gettrcost >= 0)
12341 return sh_gettrcost;
12342 else if (!TARGET_PT_FIXED)
12343 return 100;
12344 }
12345
12346 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12347 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12348 return 4;
12349
12350 if (TARGET_SHMEDIA
12351 || (TARGET_FMOVD
12352 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12353 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12354 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12355
12356 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12357 }
12358
12359 static rtx
12360 emit_load_ptr (rtx reg, rtx addr)
12361 {
12362 rtx mem = gen_const_mem (ptr_mode, addr);
12363
12364 if (Pmode != ptr_mode)
12365 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12366 return emit_move_insn (reg, mem);
12367 }
12368
12369 static void
12370 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12371 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12372 tree function)
12373 {
12374 CUMULATIVE_ARGS cum;
12375 int structure_value_byref = 0;
12376 rtx this_rtx, this_value, sibcall, funexp;
12377 rtx_insn *insns;
12378 tree funtype = TREE_TYPE (function);
12379 int simple_add = CONST_OK_FOR_ADD (delta);
12380 int did_load = 0;
12381 rtx scratch0, scratch1, scratch2;
12382 unsigned i;
12383
12384 reload_completed = 1;
12385 epilogue_completed = 1;
12386 crtl->uses_only_leaf_regs = 1;
12387
12388 emit_note (NOTE_INSN_PROLOGUE_END);
12389
12390 /* Find the "this" pointer. We have such a wide range of ABIs for the
12391 SH that it's best to do this completely machine independently.
12392 "this" is passed as first argument, unless a structure return pointer
12393 comes first, in which case "this" comes second. */
12394 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12395 #ifndef PCC_STATIC_STRUCT_RETURN
12396 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12397 structure_value_byref = 1;
12398 #endif /* not PCC_STATIC_STRUCT_RETURN */
12399 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12400 {
12401 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12402
12403 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12404 }
12405 this_rtx
12406 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12407
12408 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12409 static chain pointer (even if you can't have nested virtual functions
12410 right now, someone might implement them sometime), and the rest of the
12411 registers are used for argument passing, are callee-saved, or reserved. */
12412 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12413 -ffixed-reg has been used. */
12414 if (! call_used_regs[0] || fixed_regs[0])
12415 error ("r0 needs to be available as a call-clobbered register");
12416 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12417 if (! TARGET_SH5)
12418 {
12419 if (call_used_regs[1] && ! fixed_regs[1])
12420 scratch1 = gen_rtx_REG (ptr_mode, 1);
12421 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12422 pointing where to return struct values. */
12423 if (call_used_regs[3] && ! fixed_regs[3])
12424 scratch2 = gen_rtx_REG (Pmode, 3);
12425 }
12426 else if (TARGET_SHMEDIA)
12427 {
12428 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12429 if (i != REGNO (scratch0) &&
12430 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12431 {
12432 scratch1 = gen_rtx_REG (ptr_mode, i);
12433 break;
12434 }
12435 if (scratch1 == scratch0)
12436 error ("need a second call-clobbered general purpose register");
12437 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12438 if (call_used_regs[i] && ! fixed_regs[i])
12439 {
12440 scratch2 = gen_rtx_REG (Pmode, i);
12441 break;
12442 }
12443 if (scratch2 == scratch0)
12444 error ("need a call-clobbered target register");
12445 }
12446
12447 this_value = plus_constant (Pmode, this_rtx, delta);
12448 if (vcall_offset
12449 && (simple_add || scratch0 != scratch1)
12450 && strict_memory_address_p (ptr_mode, this_value))
12451 {
12452 emit_load_ptr (scratch0, this_value);
12453 did_load = 1;
12454 }
12455
12456 if (!delta)
12457 ; /* Do nothing. */
12458 else if (simple_add)
12459 emit_move_insn (this_rtx, this_value);
12460 else
12461 {
12462 emit_move_insn (scratch1, GEN_INT (delta));
12463 emit_insn (gen_add2_insn (this_rtx, scratch1));
12464 }
12465
12466 if (vcall_offset)
12467 {
12468 rtx offset_addr;
12469
12470 if (!did_load)
12471 emit_load_ptr (scratch0, this_rtx);
12472
12473 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12474 if (strict_memory_address_p (ptr_mode, offset_addr))
12475 ; /* Do nothing. */
12476 else if (! TARGET_SH5 && scratch0 != scratch1)
12477 {
12478 /* scratch0 != scratch1, and we have indexed loads. Get better
12479 schedule by loading the offset into r1 and using an indexed
12480 load - then the load of r1 can issue before the load from
12481 (this_rtx + delta) finishes. */
12482 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12483 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12484 }
12485 else if (CONST_OK_FOR_ADD (vcall_offset))
12486 {
12487 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12488 offset_addr = scratch0;
12489 }
12490 else if (scratch0 != scratch1)
12491 {
12492 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12493 emit_insn (gen_add2_insn (scratch0, scratch1));
12494 offset_addr = scratch0;
12495 }
12496 else
12497 gcc_unreachable (); /* FIXME */
12498 emit_load_ptr (scratch0, offset_addr);
12499
12500 if (Pmode != ptr_mode)
12501 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12502 emit_insn (gen_add2_insn (this_rtx, scratch0));
12503 }
12504
12505 /* Generate a tail call to the target function. */
12506 if (! TREE_USED (function))
12507 {
12508 assemble_external (function);
12509 TREE_USED (function) = 1;
12510 }
12511 funexp = XEXP (DECL_RTL (function), 0);
12512 /* If the function is overridden, so is the thunk, hence we don't
12513 need GOT addressing even if this is a public symbol. */
12514 #if 0
12515 if (TARGET_SH1 && ! flag_weak)
12516 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12517 else
12518 #endif
12519 if (TARGET_SH2 && flag_pic)
12520 {
12521 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12522 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12523 }
12524 else
12525 {
12526 if (TARGET_SHMEDIA && flag_pic)
12527 {
12528 funexp = gen_sym2PIC (funexp);
12529 PUT_MODE (funexp, Pmode);
12530 }
12531 emit_move_insn (scratch2, funexp);
12532 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12533 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12534 }
12535 sibcall = emit_call_insn (sibcall);
12536 SIBLING_CALL_P (sibcall) = 1;
12537 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12538 emit_barrier ();
12539
12540 /* Run just enough of rest_of_compilation to do scheduling and get
12541 the insns emitted. Note that use_thunk calls
12542 assemble_start_function and assemble_end_function. */
12543
12544 insns = get_insns ();
12545
12546 if (optimize > 0)
12547 {
12548 if (! cfun->cfg)
12549 init_flow (cfun);
12550 split_all_insns_noflow ();
12551 }
12552
12553 sh_reorg ();
12554 shorten_branches (insns);
12555 final_start_function (insns, file, 1);
12556 final (insns, file, 1);
12557 final_end_function ();
12558
12559 reload_completed = 0;
12560 epilogue_completed = 0;
12561 }
12562
12563 rtx
12564 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12565 {
12566 rtx sym;
12567
12568 /* If this is not an ordinary function, the name usually comes from a
12569 string literal or an sprintf buffer. Make sure we use the same
12570 string consistently, so that cse will be able to unify address loads. */
12571 if (kind != FUNCTION_ORDINARY)
12572 name = IDENTIFIER_POINTER (get_identifier (name));
12573 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12574 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12575 if (flag_pic)
12576 switch (kind)
12577 {
12578 case FUNCTION_ORDINARY:
12579 break;
12580 case SFUNC_GOT:
12581 {
12582 rtx reg = target ? target : gen_reg_rtx (Pmode);
12583
12584 emit_insn (gen_symGOT2reg (reg, sym));
12585 sym = reg;
12586 break;
12587 }
12588 case SFUNC_STATIC:
12589 {
12590 /* ??? To allow cse to work, we use GOTOFF relocations.
12591 We could add combiner patterns to transform this into
12592 straight pc-relative calls with sym2PIC / bsrf when
12593 label load and function call are still 1:1 and in the
12594 same basic block during combine. */
12595 rtx reg = target ? target : gen_reg_rtx (Pmode);
12596
12597 emit_insn (gen_symGOTOFF2reg (reg, sym));
12598 sym = reg;
12599 break;
12600 }
12601 }
12602 if (target && sym != target)
12603 {
12604 emit_move_insn (target, sym);
12605 return target;
12606 }
12607 return sym;
12608 }
12609
12610 /* Find the number of a general purpose register in S. */
12611 static int
12612 scavenge_reg (HARD_REG_SET *s)
12613 {
12614 int r;
12615 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12616 if (TEST_HARD_REG_BIT (*s, r))
12617 return r;
12618 return -1;
12619 }
12620
12621 rtx
12622 sh_get_pr_initial_val (void)
12623 {
12624 rtx val;
12625
12626 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12627 PR register on SHcompact, because it might be clobbered by the prologue.
12628 We check first if that is known to be the case. */
12629 if (TARGET_SHCOMPACT
12630 && ((crtl->args.info.call_cookie
12631 & ~ CALL_COOKIE_RET_TRAMP (1))
12632 || crtl->saves_all_registers))
12633 return gen_frame_mem (SImode, return_address_pointer_rtx);
12634
12635 /* If we haven't finished rtl generation, there might be a nonlocal label
12636 that we haven't seen yet.
12637 ??? get_hard_reg_initial_val fails if it is called after register
12638 allocation has started, unless it has been called before for the
12639 same register. And even then, we end in trouble if we didn't use
12640 the register in the same basic block before. So call
12641 get_hard_reg_initial_val now and wrap it in an unspec if we might
12642 need to replace it. */
12643 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12644 combine can put the pseudo returned by get_hard_reg_initial_val into
12645 instructions that need a general purpose registers, which will fail to
12646 be recognized when the pseudo becomes allocated to PR. */
12647 val
12648 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12649 if (TARGET_SH1)
12650 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12651 return val;
12652 }
12653
12654 bool
12655 sh_expand_t_scc (rtx operands[])
12656 {
12657 enum rtx_code code = GET_CODE (operands[1]);
12658 rtx target = operands[0];
12659 rtx op0 = operands[2];
12660 rtx op1 = operands[3];
12661 rtx result = target;
12662 HOST_WIDE_INT val;
12663
12664 if (!REG_P (op0) || REGNO (op0) != T_REG
12665 || !CONST_INT_P (op1))
12666 return false;
12667 if (!REG_P (result))
12668 result = gen_reg_rtx (SImode);
12669 val = INTVAL (op1);
12670 if ((code == EQ && val == 1) || (code == NE && val == 0))
12671 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12672 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12673 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12674 else if (code == EQ || code == NE)
12675 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12676 else
12677 return false;
12678 if (result != target)
12679 emit_move_insn (target, result);
12680 return true;
12681 }
12682
12683 /* INSN is an sfunc; return the rtx that describes the address used. */
12684 static rtx
12685 extract_sfunc_addr (rtx insn)
12686 {
12687 rtx pattern, part = NULL_RTX;
12688 int len, i;
12689
12690 pattern = PATTERN (insn);
12691 len = XVECLEN (pattern, 0);
12692 for (i = 0; i < len; i++)
12693 {
12694 part = XVECEXP (pattern, 0, i);
12695 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12696 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12697 return XEXP (part, 0);
12698 }
12699 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12700 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12701 }
12702
12703 /* Verify that the register in use_sfunc_addr still agrees with the address
12704 used in the sfunc. This prevents fill_slots_from_thread from changing
12705 use_sfunc_addr.
12706 INSN is the use_sfunc_addr instruction, and REG is the register it
12707 guards. */
12708 bool
12709 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12710 {
12711 /* Search for the sfunc. It should really come right after INSN. */
12712 while ((insn = NEXT_INSN (insn)))
12713 {
12714 if (LABEL_P (insn) || JUMP_P (insn))
12715 break;
12716 if (! INSN_P (insn))
12717 continue;
12718
12719 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12720 insn = seq->insn (0);
12721 if (GET_CODE (PATTERN (insn)) != PARALLEL
12722 || get_attr_type (insn) != TYPE_SFUNC)
12723 continue;
12724 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12725 }
12726 gcc_unreachable ();
12727 }
12728
12729 /* This function returns a constant rtx that represents 2**15 / pi in
12730 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12731 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12732 static GTY(()) rtx sh_fsca_sf2int_rtx;
12733
12734 rtx
12735 sh_fsca_sf2int (void)
12736 {
12737 if (! sh_fsca_sf2int_rtx)
12738 {
12739 REAL_VALUE_TYPE rv;
12740
12741 real_from_string (&rv, "10430.378350470453");
12742 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12743 }
12744
12745 return sh_fsca_sf2int_rtx;
12746 }
12747
12748 /* This function returns a constant rtx that represents pi / 2**15 in
12749 SFmode. It's used to scale SFmode angles, in radians, to a
12750 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12751 maps to 0x10000. */
12752 static GTY(()) rtx sh_fsca_int2sf_rtx;
12753
12754 rtx
12755 sh_fsca_int2sf (void)
12756 {
12757 if (! sh_fsca_int2sf_rtx)
12758 {
12759 REAL_VALUE_TYPE rv;
12760
12761 real_from_string (&rv, "9.587379924285257e-5");
12762 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12763 }
12764
12765 return sh_fsca_int2sf_rtx;
12766 }
12767
12768 /* Initialize the CUMULATIVE_ARGS structure. */
12769 void
12770 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12771 tree fntype,
12772 rtx libname ATTRIBUTE_UNUSED,
12773 tree fndecl,
12774 signed int n_named_args,
12775 enum machine_mode mode)
12776 {
12777 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12778 pcum->free_single_fp_reg = 0;
12779 pcum->stack_regs = 0;
12780 pcum->byref_regs = 0;
12781 pcum->byref = 0;
12782 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12783
12784 /* XXX - Should we check TARGET_HITACHI here ??? */
12785 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12786
12787 if (fntype)
12788 {
12789 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12790 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12791 pcum->prototype_p = prototype_p (fntype);
12792 pcum->arg_count [(int) SH_ARG_INT]
12793 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12794
12795 pcum->call_cookie
12796 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12797 && pcum->arg_count [(int) SH_ARG_INT] == 0
12798 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12799 ? int_size_in_bytes (TREE_TYPE (fntype))
12800 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12801 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12802 == FIRST_RET_REG));
12803 }
12804 else
12805 {
12806 pcum->arg_count [(int) SH_ARG_INT] = 0;
12807 pcum->prototype_p = FALSE;
12808 if (mode != VOIDmode)
12809 {
12810 pcum->call_cookie =
12811 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12812 && GET_MODE_SIZE (mode) > 4
12813 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12814
12815 /* If the default ABI is the Renesas ABI then all library
12816 calls must assume that the library will be using the
12817 Renesas ABI. So if the function would return its result
12818 in memory then we must force the address of this memory
12819 block onto the stack. Ideally we would like to call
12820 targetm.calls.return_in_memory() here but we do not have
12821 the TYPE or the FNDECL available so we synthesize the
12822 contents of that function as best we can. */
12823 pcum->force_mem =
12824 (TARGET_DEFAULT & MASK_HITACHI)
12825 && (mode == BLKmode
12826 || (GET_MODE_SIZE (mode) > 4
12827 && !(mode == DFmode
12828 && TARGET_FPU_DOUBLE)));
12829 }
12830 else
12831 {
12832 pcum->call_cookie = 0;
12833 pcum->force_mem = FALSE;
12834 }
12835 }
12836 }
12837
12838 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12839 not enter into CONST_DOUBLE for the replace.
12840
12841 Note that copying is not done so X must not be shared unless all copies
12842 are to be modified.
12843
12844 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12845 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12846 replacements[n*2+1] - and that we take mode changes into account.
12847
12848 If a replacement is ambiguous, return NULL_RTX.
12849
12850 If MODIFY is zero, don't modify any rtl in place,
12851 just return zero or nonzero for failure / success. */
12852 rtx
12853 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12854 {
12855 int i, j;
12856 const char *fmt;
12857
12858 /* The following prevents loops occurrence when we change MEM in
12859 CONST_DOUBLE onto the same CONST_DOUBLE. */
12860 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12861 return x;
12862
12863 for (i = n_replacements - 1; i >= 0 ; i--)
12864 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12865 return replacements[i*2+1];
12866
12867 /* Allow this function to make replacements in EXPR_LISTs. */
12868 if (x == NULL_RTX)
12869 return NULL_RTX;
12870
12871 if (GET_CODE (x) == SUBREG)
12872 {
12873 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12874 n_replacements, modify);
12875
12876 if (CONST_INT_P (new_rtx))
12877 {
12878 x = simplify_subreg (GET_MODE (x), new_rtx,
12879 GET_MODE (SUBREG_REG (x)),
12880 SUBREG_BYTE (x));
12881 if (! x)
12882 abort ();
12883 }
12884 else if (modify)
12885 SUBREG_REG (x) = new_rtx;
12886
12887 return x;
12888 }
12889 else if (REG_P (x))
12890 {
12891 unsigned regno = REGNO (x);
12892 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12893 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12894 rtx result = NULL_RTX;
12895
12896 for (i = n_replacements - 1; i >= 0; i--)
12897 {
12898 rtx from = replacements[i*2];
12899 rtx to = replacements[i*2+1];
12900 unsigned from_regno, from_nregs, to_regno, new_regno;
12901
12902 if (!REG_P (from))
12903 continue;
12904 from_regno = REGNO (from);
12905 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12906 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12907 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12908 {
12909 if (regno < from_regno
12910 || regno + nregs > from_regno + nregs
12911 || !REG_P (to)
12912 || result)
12913 return NULL_RTX;
12914 to_regno = REGNO (to);
12915 if (to_regno < FIRST_PSEUDO_REGISTER)
12916 {
12917 new_regno = regno + to_regno - from_regno;
12918 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12919 != nregs)
12920 return NULL_RTX;
12921 result = gen_rtx_REG (GET_MODE (x), new_regno);
12922 }
12923 else if (GET_MODE (x) <= GET_MODE (to))
12924 result = gen_lowpart_common (GET_MODE (x), to);
12925 else
12926 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12927 }
12928 }
12929 return result ? result : x;
12930 }
12931 else if (GET_CODE (x) == ZERO_EXTEND)
12932 {
12933 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12934 n_replacements, modify);
12935
12936 if (CONST_INT_P (new_rtx))
12937 {
12938 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12939 new_rtx, GET_MODE (XEXP (x, 0)));
12940 if (! x)
12941 abort ();
12942 }
12943 else if (modify)
12944 XEXP (x, 0) = new_rtx;
12945
12946 return x;
12947 }
12948
12949 fmt = GET_RTX_FORMAT (GET_CODE (x));
12950 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12951 {
12952 rtx new_rtx;
12953
12954 if (fmt[i] == 'e')
12955 {
12956 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12957 n_replacements, modify);
12958 if (!new_rtx)
12959 return NULL_RTX;
12960 if (modify)
12961 XEXP (x, i) = new_rtx;
12962 }
12963 else if (fmt[i] == 'E')
12964 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12965 {
12966 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12967 n_replacements, modify);
12968 if (!new_rtx)
12969 return NULL_RTX;
12970 if (modify)
12971 XVECEXP (x, i, j) = new_rtx;
12972 }
12973 }
12974
12975 return x;
12976 }
12977
12978 rtx
12979 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12980 {
12981 enum rtx_code code = TRUNCATE;
12982
12983 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12984 {
12985 rtx inner = XEXP (x, 0);
12986 enum machine_mode inner_mode = GET_MODE (inner);
12987
12988 if (inner_mode == mode)
12989 return inner;
12990 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12991 x = inner;
12992 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12993 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12994 {
12995 code = GET_CODE (x);
12996 x = inner;
12997 }
12998 }
12999 return gen_rtx_fmt_e (code, mode, x);
13000 }
13001
13002 /* Called via for_each_rtx after reload, to clean up truncates of
13003 registers that span multiple actual hard registers. */
13004 int
13005 shmedia_cleanup_truncate (rtx *p, void *n_changes)
13006 {
13007 rtx x = *p, reg;
13008
13009 if (GET_CODE (x) != TRUNCATE)
13010 return 0;
13011 reg = XEXP (x, 0);
13012 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
13013 {
13014 enum machine_mode reg_mode = GET_MODE (reg);
13015 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
13016 subreg_lowpart_offset (DImode, reg_mode));
13017 *(int*) n_changes += 1;
13018 return -1;
13019 }
13020 return 0;
13021 }
13022
13023 /* Load and store depend on the highpart of the address. However,
13024 set_attr_alternative does not give well-defined results before reload,
13025 so we must look at the rtl ourselves to see if any of the feeding
13026 registers is used in a memref.
13027
13028 Called by sh_contains_memref_p via for_each_rtx. */
13029 static int
13030 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
13031 {
13032 return (MEM_P (*loc));
13033 }
13034
13035 /* Return true iff INSN contains a MEM. */
13036 bool
13037 sh_contains_memref_p (rtx insn)
13038 {
13039 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
13040 }
13041
13042 /* Return true iff INSN loads a banked register. */
13043 bool
13044 sh_loads_bankedreg_p (rtx insn)
13045 {
13046 if (GET_CODE (PATTERN (insn)) == SET)
13047 {
13048 rtx op = SET_DEST (PATTERN(insn));
13049 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13050 return true;
13051 }
13052
13053 return false;
13054 }
13055
13056 /* FNADDR is the MEM expression from a call expander. Return an address
13057 to use in an SHmedia insn pattern. */
13058 rtx
13059 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13060 {
13061 int is_sym;
13062
13063 fnaddr = XEXP (fnaddr, 0);
13064 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13065 if (flag_pic && is_sym)
13066 {
13067 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13068 {
13069 rtx reg = gen_reg_rtx (Pmode);
13070
13071 /* We must not use GOTPLT for sibcalls, because PIC_REG
13072 must be restored before the PLT code gets to run. */
13073 if (is_sibcall)
13074 emit_insn (gen_symGOT2reg (reg, fnaddr));
13075 else
13076 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13077 fnaddr = reg;
13078 }
13079 else
13080 {
13081 fnaddr = gen_sym2PIC (fnaddr);
13082 PUT_MODE (fnaddr, Pmode);
13083 }
13084 }
13085 /* If ptabs might trap, make this visible to the rest of the compiler.
13086 We generally assume that symbols pertain to valid locations, but
13087 it is possible to generate invalid symbols with asm or linker tricks.
13088 In a list of functions where each returns its successor, an invalid
13089 symbol might denote an empty list. */
13090 if (!TARGET_PT_FIXED
13091 && (!is_sym || TARGET_INVALID_SYMBOLS)
13092 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13093 {
13094 rtx tr = gen_reg_rtx (PDImode);
13095
13096 emit_insn (gen_ptabs (tr, fnaddr));
13097 fnaddr = tr;
13098 }
13099 else if (! target_reg_operand (fnaddr, Pmode))
13100 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13101 return fnaddr;
13102 }
13103
13104 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13105 static reg_class_t
13106 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13107 {
13108 if (rclass == NO_REGS
13109 && TARGET_SHMEDIA
13110 && (CONST_DOUBLE_P (x)
13111 || GET_CODE (x) == SYMBOL_REF
13112 || PIC_ADDR_P (x)))
13113 return GENERAL_REGS;
13114
13115 return rclass;
13116 }
13117
13118 /* Implement TARGET_SECONDARY_RELOAD. */
13119 static reg_class_t
13120 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13121 enum machine_mode mode, secondary_reload_info *sri)
13122 {
13123 enum reg_class rclass = (enum reg_class) rclass_i;
13124
13125 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13126 && REG_P (XEXP (XEXP (x, 0), 0))
13127 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13128 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13129
13130 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13131 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13132
13133 if (REG_P (x) && REGNO (x) == GBR_REG)
13134 return NO_REGS;
13135
13136 if (in_p)
13137 {
13138 if (REGCLASS_HAS_FP_REG (rclass)
13139 && ! TARGET_SHMEDIA
13140 && immediate_operand ((x), mode)
13141 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13142 && mode == SFmode && fldi_ok ()))
13143 switch (mode)
13144 {
13145 case SFmode:
13146 sri->icode = CODE_FOR_reload_insf__frn;
13147 return NO_REGS;
13148 case DFmode:
13149 sri->icode = CODE_FOR_reload_indf__frn;
13150 return NO_REGS;
13151 case SImode:
13152 /* ??? If we knew that we are in the appropriate mode -
13153 single precision - we could use a reload pattern directly. */
13154 return FPUL_REGS;
13155 default:
13156 abort ();
13157 }
13158 if (rclass == FPUL_REGS
13159 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13160 || REGNO (x) == T_REG))
13161 || GET_CODE (x) == PLUS))
13162 return GENERAL_REGS;
13163 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13164 {
13165 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13166 return GENERAL_REGS;
13167 else if (mode == SFmode)
13168 return FP_REGS;
13169 sri->icode = CODE_FOR_reload_insi__i_fpul;
13170 return NO_REGS;
13171 }
13172 if (rclass == FPSCR_REGS
13173 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13174 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13175 return GENERAL_REGS;
13176 if (REGCLASS_HAS_FP_REG (rclass)
13177 && TARGET_SHMEDIA
13178 && immediate_operand (x, mode)
13179 && x != CONST0_RTX (GET_MODE (x))
13180 && GET_MODE (x) != V4SFmode)
13181 return GENERAL_REGS;
13182 if ((mode == QImode || mode == HImode)
13183 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13184 {
13185 sri->icode = ((mode == QImode)
13186 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13187 return NO_REGS;
13188 }
13189 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13190 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13191 return TARGET_REGS;
13192 } /* end of input-only processing. */
13193
13194 if (((REGCLASS_HAS_FP_REG (rclass)
13195 && (REG_P (x)
13196 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13197 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13198 && TARGET_FMOVD))))
13199 || (REGCLASS_HAS_GENERAL_REG (rclass)
13200 && REG_P (x)
13201 && FP_REGISTER_P (REGNO (x))))
13202 && ! TARGET_SHMEDIA
13203 && (mode == SFmode || mode == SImode))
13204 return FPUL_REGS;
13205 if ((rclass == FPUL_REGS
13206 || (REGCLASS_HAS_FP_REG (rclass)
13207 && ! TARGET_SHMEDIA && mode == SImode))
13208 && (MEM_P (x)
13209 || (REG_P (x)
13210 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13211 || REGNO (x) == T_REG
13212 || system_reg_operand (x, VOIDmode)))))
13213 {
13214 if (rclass == FPUL_REGS)
13215 return GENERAL_REGS;
13216 return FPUL_REGS;
13217 }
13218 if ((rclass == TARGET_REGS
13219 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13220 && !satisfies_constraint_Csy (x)
13221 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13222 return GENERAL_REGS;
13223 if ((rclass == MAC_REGS || rclass == PR_REGS)
13224 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13225 && rclass != REGNO_REG_CLASS (REGNO (x)))
13226 return GENERAL_REGS;
13227 if (rclass != GENERAL_REGS && REG_P (x)
13228 && TARGET_REGISTER_P (REGNO (x)))
13229 return GENERAL_REGS;
13230
13231 /* If here fall back to loading FPUL register through general registers.
13232 This case can happen when movsi_ie insn is picked initially to
13233 load/store the FPUL register from/to another register, and then the
13234 other register is allocated on the stack. */
13235 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13236 return GENERAL_REGS;
13237
13238 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13239 the other operand.
13240 On SH2A could also just leave it alone here, which would result in a
13241 4 byte move insn being generated instead. However, for this to work
13242 the insns must have the appropriate alternatives. */
13243 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13244 && satisfies_constraint_Sdd (x)
13245 && sh_disp_addr_displacement (x)
13246 <= sh_max_mov_insn_displacement (mode, false))
13247 return R0_REGS;
13248
13249 /* When reload is trying to address a QImode or HImode subreg on the stack,
13250 force any subreg byte into R0_REGS, as this is going to become a
13251 displacement address.
13252 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13253 is on the stack, the memref to it might already require a displacement
13254 and that has to be added to the final address. At this point we don't
13255 know the cumulative displacement so we assume the worst case. */
13256 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13257 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13258 return R0_REGS;
13259
13260 return NO_REGS;
13261 }
13262
13263 static void
13264 sh_conditional_register_usage (void)
13265 {
13266 int regno;
13267 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13268 if (! VALID_REGISTER_P (regno))
13269 fixed_regs[regno] = call_used_regs[regno] = 1;
13270 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13271 if (TARGET_SH5)
13272 {
13273 call_used_regs[FIRST_GENERAL_REG + 8]
13274 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13275 call_really_used_regs[FIRST_GENERAL_REG + 8]
13276 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13277 }
13278 if (TARGET_SHMEDIA)
13279 {
13280 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13281 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13282 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13283 }
13284 if (flag_pic)
13285 {
13286 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13287 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13288 }
13289 /* Renesas saves and restores mac registers on call. */
13290 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13291 {
13292 call_really_used_regs[MACH_REG] = 0;
13293 call_really_used_regs[MACL_REG] = 0;
13294 }
13295
13296 if (TARGET_SHMEDIA)
13297 {
13298 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13299 if (! fixed_regs[regno] && call_really_used_regs[regno])
13300 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13301 }
13302 else
13303 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13304 if (! fixed_regs[regno] && call_really_used_regs[regno])
13305 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13306 }
13307
13308 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13309
13310 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13311 static bool
13312 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13313 {
13314 return (TARGET_SHMEDIA
13315 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13316 || x == CONST0_RTX (mode)
13317 || !TARGET_SHMEDIA_FPU
13318 || TARGET_SHMEDIA64)
13319 : (GET_CODE (x) != CONST_DOUBLE
13320 || mode == DFmode || mode == SFmode
13321 || mode == DImode || GET_MODE (x) == VOIDmode));
13322 }
13323
13324 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13325
13326 static void
13327 sh_init_sync_libfuncs (void)
13328 {
13329 init_sync_libfuncs (UNITS_PER_WORD);
13330 }
13331
13332 /* Return true if it is appropriate to emit `ret' instructions in the
13333 body of a function. */
13334 bool
13335 sh_can_use_simple_return_p (void)
13336 {
13337 HARD_REG_SET live_regs_mask;
13338 int d;
13339
13340 /* Some targets require special return insns. */
13341 if (TARGET_SHMEDIA
13342 || (TARGET_SHCOMPACT
13343 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13344 return false;
13345
13346 if (! reload_completed || frame_pointer_needed)
13347 return false;
13348
13349 /* Moving prologue around does't reduce the size. */
13350 if (optimize_function_for_size_p (cfun))
13351 return false;
13352
13353 /* Finally, allow for pr save. */
13354 d = calc_live_regs (&live_regs_mask);
13355
13356 if (rounded_frame_size (d) > 4)
13357 return false;
13358
13359 return true;
13360 }
13361
13362 /*------------------------------------------------------------------------------
13363 Address mode optimization support code
13364 */
13365
13366 typedef HOST_WIDE_INT disp_t;
13367 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13368 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13369 static const disp_t INVALID_DISP = MAX_DISP;
13370
13371 /* A memory reference which is described by a base register and a
13372 displacement. */
13373 class base_reg_disp
13374 {
13375 public:
13376 base_reg_disp (rtx br, disp_t d);
13377
13378 bool is_reg (void) const;
13379 bool is_disp (void) const;
13380 rtx reg (void) const;
13381 disp_t disp (void) const;
13382
13383 private:
13384 rtx reg_;
13385 disp_t disp_;
13386 };
13387
13388 inline
13389 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13390 : reg_ (br), disp_ (d)
13391 {
13392 }
13393
13394 inline bool
13395 base_reg_disp::is_reg (void) const
13396 {
13397 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13398 }
13399
13400 inline bool
13401 base_reg_disp::is_disp (void) const
13402 {
13403 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13404 }
13405
13406 inline rtx
13407 base_reg_disp::reg (void) const
13408 {
13409 return reg_;
13410 }
13411
13412 inline disp_t
13413 base_reg_disp::disp (void) const
13414 {
13415 return disp_;
13416 }
13417
13418 /* Find the base register and calculate the displacement for a given
13419 address rtx 'x'.
13420 This is done by walking the insn list backwards and following SET insns
13421 that set the value of the specified reg 'x'. */
13422 static base_reg_disp
13423 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13424 {
13425 if (REG_P (x))
13426 {
13427 if (REGNO (x) == GBR_REG)
13428 return base_reg_disp (x, disp);
13429
13430 /* We've reached a hard-reg. This is probably the point where
13431 function args are copied to pseudos. Do not go any further and
13432 stick to the pseudo. If the original mem addr was in a hard reg
13433 from the beginning, it will become the base reg. */
13434 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13435 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13436
13437 /* Try to find the previous insn that sets the reg. */
13438 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13439 i = prev_nonnote_insn (i))
13440 {
13441 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13442 && CALL_P (i))
13443 break;
13444
13445 if (!NONJUMP_INSN_P (i))
13446 continue;
13447
13448 rtx p = PATTERN (i);
13449 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13450 && REGNO (XEXP (p, 0)) == REGNO (x))
13451 {
13452 /* If the recursion can't find out any more details about the
13453 source of the set, then this reg becomes our new base reg. */
13454 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13455 }
13456 }
13457
13458 /* When here, no previous insn was found that sets the reg.
13459 The input reg is already the base reg. */
13460 return base_reg_disp (x, disp);
13461 }
13462
13463 else if (GET_CODE (x) == PLUS)
13464 {
13465 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13466 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13467
13468 /* Either left or right val must be a reg.
13469 We don't handle the case of 'reg + reg' here. */
13470 if (left_val.is_reg () && right_val.is_disp ())
13471 return base_reg_disp (left_val.reg (), left_val.disp ()
13472 + right_val.disp () + disp);
13473 else if (right_val.is_reg () && left_val.is_disp ())
13474 return base_reg_disp (right_val.reg (), right_val.disp ()
13475 + left_val.disp () + disp);
13476 else
13477 return base_reg_disp (base_reg, disp);
13478 }
13479
13480 else if (CONST_INT_P (x))
13481 return base_reg_disp (NULL, disp + INTVAL (x));
13482
13483 /* Didn't find anything useful. */
13484 return base_reg_disp (base_reg, disp);
13485 }
13486
13487 /* Given an insn and a memory operand, try to find an equivalent GBR
13488 based memory address and return the corresponding new memory address.
13489 Return NULL_RTX if not found. */
13490 rtx
13491 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13492 {
13493 if (!MEM_P (mem))
13494 return NULL_RTX;
13495
13496 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13497 if (side_effects_p (XEXP (mem, 0)))
13498 return NULL_RTX;
13499
13500 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13501
13502 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13503 {
13504 rtx disp = GEN_INT (gbr_disp.disp ());
13505 if (gbr_displacement (disp, GET_MODE (mem)))
13506 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13507 }
13508
13509 return NULL_RTX;
13510 }
13511
13512 /*------------------------------------------------------------------------------
13513 Manual insn combine support code.
13514 */
13515
13516 /* Given a reg rtx and a start insn, try to find the insn that sets the
13517 specified reg by using the specified insn stepping function, such as
13518 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13519 of the reg set. */
13520 set_of_reg
13521 sh_find_set_of_reg (rtx reg, rtx insn, rtx_insn *(*stepfunc)(rtx))
13522 {
13523 set_of_reg result;
13524 result.insn = insn;
13525 result.set_rtx = NULL_RTX;
13526 result.set_src = NULL_RTX;
13527
13528 if (!REG_P (reg) || insn == NULL_RTX)
13529 return result;
13530
13531 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13532 result.insn = stepfunc (result.insn))
13533 {
13534 if (BARRIER_P (result.insn))
13535 return result;
13536 if (!NONJUMP_INSN_P (result.insn))
13537 continue;
13538 if (reg_set_p (reg, result.insn))
13539 {
13540 result.set_rtx = set_of (reg, result.insn);
13541
13542 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13543 return result;
13544
13545 result.set_src = XEXP (result.set_rtx, 1);
13546 return result;
13547 }
13548 }
13549
13550 return result;
13551 }
13552
13553 /* Given an op rtx and an insn, try to find out whether the result of the
13554 specified op consists only of logical operations on T bit stores. */
13555 bool
13556 sh_is_logical_t_store_expr (rtx op, rtx insn)
13557 {
13558 if (!logical_operator (op, SImode))
13559 return false;
13560
13561 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13562 int op_is_t_count = 0;
13563
13564 for (int i = 0; i < 2; ++i)
13565 {
13566 if (t_reg_operand (ops[i], VOIDmode)
13567 || negt_reg_operand (ops[i], VOIDmode))
13568 op_is_t_count++;
13569
13570 else
13571 {
13572 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13573 prev_nonnote_insn_bb);
13574 if (op_set.set_src == NULL_RTX)
13575 continue;
13576
13577 if (t_reg_operand (op_set.set_src, VOIDmode)
13578 || negt_reg_operand (op_set.set_src, VOIDmode)
13579 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13580 op_is_t_count++;
13581 }
13582 }
13583
13584 return op_is_t_count == 2;
13585 }
13586
13587 /* Given the operand that is extended in a sign/zero extend insn, and the
13588 insn, try to figure out whether the sign/zero extension can be replaced
13589 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13590 NULL_RTX otherwise. */
13591 rtx
13592 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13593 {
13594 if (REG_P (extended_op))
13595 extended_op = extended_op;
13596 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13597 extended_op = SUBREG_REG (extended_op);
13598 else
13599 return NULL_RTX;
13600
13601 /* Reg moves must be of the same mode. */
13602 if (GET_MODE (extended_op) != SImode)
13603 return NULL_RTX;
13604
13605 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13606 if (s.set_src == NULL_RTX)
13607 return NULL_RTX;
13608
13609 if (t_reg_operand (s.set_src, VOIDmode)
13610 || negt_reg_operand (s.set_src, VOIDmode))
13611 return extended_op;
13612
13613 /* If the zero extended reg was formed by a logical operation, check the
13614 operands of the logical operation. If both originated from T bit
13615 stores the zero extension can be eliminated. */
13616 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13617 return extended_op;
13618
13619 return NULL_RTX;
13620 }
13621
13622 static void
13623 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13624 int prev_mode, HARD_REG_SET regs_live)
13625 {
13626 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13627 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13628 {
13629 emit_insn (gen_toggle_pr ());
13630 if (TARGET_FMOVD)
13631 emit_insn (gen_toggle_sz ());
13632 }
13633 else
13634 fpscr_set_from_mem (mode, regs_live);
13635 }
13636
13637 static int
13638 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
13639 {
13640 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13641 }
13642
13643 static int
13644 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
13645 {
13646 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13647 get_attr_fp_set (insn) != FP_SET_NONE)
13648 return (int) get_attr_fp_set (insn);
13649 else
13650 return mode;
13651 }
13652
13653 static int
13654 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13655 {
13656 return NORMAL_MODE (entity);
13657 }
13658
13659 static int
13660 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13661 {
13662 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13663 }
13664
13665 static int
13666 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13667 {
13668 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13669 }
13670
13671 #include "gt-sh.h"