]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2013 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "flags.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "reload.h"
37 #include "function.h"
38 #include "regs.h"
39 #include "hard-reg-set.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "diagnostic-core.h"
43 #include "recog.h"
44 #include "dwarf2.h"
45 #include "tm_p.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "params.h"
54 #include "ggc.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-fold.h"
60 #include "tree-eh.h"
61 #include "gimple-expr.h"
62 #include "is-a.h"
63 #include "gimple.h"
64 #include "gimplify.h"
65 #include "cfgloop.h"
66 #include "alloc-pool.h"
67 #include "tm-constrs.h"
68 #include "opts.h"
69 #include "tree-pass.h"
70 #include "pass_manager.h"
71 #include "context.h"
72
73 #include <sstream>
74 #include <vector>
75 #include <algorithm>
76
77 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
78
79 /* These are some macros to abstract register modes. */
80 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
81 && ((HOST_WIDE_INT)(VALUE)) <= 511)
82
83 #define CONST_OK_FOR_ADD(size) \
84 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
85 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
86 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
87 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
88
89 /* Used to simplify the logic below. Find the attributes wherever
90 they may be. */
91 #define SH_ATTRIBUTES(decl) \
92 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
93 : DECL_ATTRIBUTES (decl) \
94 ? (DECL_ATTRIBUTES (decl)) \
95 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
96
97 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
98 int current_function_interrupt;
99
100 tree sh_deferred_function_attributes;
101 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
102
103 /* Global variables for machine-dependent things. */
104
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
107
108 /* Definitions used in ready queue reordering for first scheduling pass. */
109
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
112
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
115
116 /* Number of r0 life regions. */
117 static int r0_life_regions;
118
119 /* If true, skip cycles for Q -> R movement. */
120 static int skip_cycles = 0;
121
122 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
123 and returned from sh_reorder2. */
124 static short cached_can_issue_more;
125
126 /* Unique number for UNSPEC_BBR pattern. */
127 static unsigned int unspec_bbr_uid = 1;
128
129 /* Provides the class number of the smallest class containing
130 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
132 {
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS, GENERAL_REGS,
172 };
173
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
176
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
180
181 int assembler_dialect;
182
183 static bool shmedia_space_reserved_for_target_registers;
184
185 static void split_branches (rtx);
186 static int branch_dest (rtx);
187 static void print_slot (rtx);
188 static rtx add_constant (rtx, enum machine_mode, rtx);
189 static void dump_table (rtx, rtx);
190 static bool broken_move (rtx);
191 static bool mova_p (rtx);
192 static rtx find_barrier (int, rtx, rtx);
193 static bool noncall_uses_reg (rtx, rtx, rtx *);
194 static rtx gen_block_redirect (rtx, int, int);
195 static void sh_reorg (void);
196 static void sh_option_override (void);
197 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
198 static rtx frame_insn (rtx);
199 static rtx push (int);
200 static void pop (int);
201 static void push_regs (HARD_REG_SET *, int);
202 static int calc_live_regs (HARD_REG_SET *);
203 static HOST_WIDE_INT rounded_frame_size (int);
204 static bool sh_frame_pointer_required (void);
205 static rtx mark_constant_pool_use (rtx);
206 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
207 int, bool *);
208 static tree sh_handle_resbank_handler_attribute (tree *, tree,
209 tree, int, bool *);
210 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
211 tree, int, bool *);
212 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
214 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
215 static void sh_print_operand (FILE *, rtx, int);
216 static void sh_print_operand_address (FILE *, rtx);
217 static bool sh_print_operand_punct_valid_p (unsigned char code);
218 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
219 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
220 static void sh_insert_attributes (tree, tree *);
221 static const char *sh_check_pch_target_flags (int);
222 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
223 static int sh_adjust_cost (rtx, rtx, rtx, int);
224 static int sh_issue_rate (void);
225 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
226 static short find_set_regmode_weight (rtx, enum machine_mode);
227 static short find_insn_regmode_weight (rtx, enum machine_mode);
228 static void find_regmode_weight (basic_block, enum machine_mode);
229 static int find_r0_life_regions (basic_block);
230 static void sh_md_init_global (FILE *, int, int);
231 static void sh_md_finish_global (FILE *, int);
232 static int rank_for_reorder (const void *, const void *);
233 static void swap_reorder (rtx *, int);
234 static void ready_reorder (rtx *, int);
235 static bool high_pressure (enum machine_mode);
236 static int sh_reorder (FILE *, int, rtx *, int *, int);
237 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
238 static void sh_md_init (FILE *, int, int);
239 static int sh_variable_issue (FILE *, int, rtx, int);
240
241 static bool sh_function_ok_for_sibcall (tree, tree);
242
243 static bool sh_cannot_modify_jumps_p (void);
244 static reg_class_t sh_target_reg_class (void);
245 static bool sh_optimize_target_register_callee_saved (bool);
246 static bool sh_ms_bitfield_layout_p (const_tree);
247
248 static void sh_init_builtins (void);
249 static tree sh_builtin_decl (unsigned, bool);
250 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
251 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
252 HOST_WIDE_INT, tree);
253 static void sh_file_start (void);
254 static bool flow_dependent_p (rtx, rtx);
255 static void flow_dependent_p_1 (rtx, const_rtx, void *);
256 static int shiftcosts (rtx);
257 static int and_xor_ior_costs (rtx, int);
258 static int addsubcosts (rtx);
259 static int multcosts (rtx);
260 static bool unspec_caller_rtx_p (rtx);
261 static bool sh_cannot_copy_insn_p (rtx);
262 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
263 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
264 static int sh_pr_n_sets (void);
265 static rtx sh_allocate_initial_value (rtx);
266 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
267 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
268 enum machine_mode,
269 struct secondary_reload_info *);
270 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
271 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
272 static rtx sh_delegitimize_address (rtx);
273 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
274 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
275 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
276 static int scavenge_reg (HARD_REG_SET *s);
277 struct save_schedule_s;
278 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
279 struct save_schedule_s *, int);
280
281 static rtx sh_struct_value_rtx (tree, int);
282 static rtx sh_function_value (const_tree, const_tree, bool);
283 static bool sh_function_value_regno_p (const unsigned int);
284 static rtx sh_libcall_value (enum machine_mode, const_rtx);
285 static bool sh_return_in_memory (const_tree, const_tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
288 tree, int *, int);
289 static bool sh_strict_argument_naming (cumulative_args_t);
290 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
291 static tree sh_build_builtin_va_list (void);
292 static void sh_va_start (tree, rtx);
293 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
294 static bool sh_promote_prototypes (const_tree);
295 static enum machine_mode sh_promote_function_mode (const_tree type,
296 enum machine_mode,
297 int *punsignedp,
298 const_tree funtype,
299 int for_return);
300 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
301 const_tree, bool);
302 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
303 const_tree, bool);
304 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
305 tree, bool);
306 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
307 const_tree, bool);
308 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
309 const_tree, bool);
310 static bool sh_scalar_mode_supported_p (enum machine_mode);
311 static int sh_dwarf_calling_convention (const_tree);
312 static void sh_encode_section_info (tree, rtx, int);
313 static bool sh2a_function_vector_p (tree);
314 static void sh_trampoline_init (rtx, tree, rtx);
315 static rtx sh_trampoline_adjust_address (rtx);
316 static void sh_conditional_register_usage (void);
317 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
318 static int mov_insn_size (enum machine_mode, bool);
319 static int max_mov_insn_displacement (enum machine_mode, bool);
320 static int mov_insn_alignment_mask (enum machine_mode, bool);
321 static HOST_WIDE_INT disp_addr_displacement (rtx);
322 static bool sequence_insn_p (rtx);
323 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
324 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
325 enum machine_mode, bool);
326 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
327
328 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
329 \f
330 static const struct attribute_spec sh_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 { "interrupt_handler", 0, 0, true, false, false,
335 sh_handle_interrupt_handler_attribute, false },
336 { "sp_switch", 1, 1, true, false, false,
337 sh_handle_sp_switch_attribute, false },
338 { "trap_exit", 1, 1, true, false, false,
339 sh_handle_trap_exit_attribute, false },
340 { "renesas", 0, 0, false, true, false,
341 sh_handle_renesas_attribute, false },
342 { "trapa_handler", 0, 0, true, false, false,
343 sh_handle_interrupt_handler_attribute, false },
344 { "nosave_low_regs", 0, 0, true, false, false,
345 sh_handle_interrupt_handler_attribute, false },
346 { "resbank", 0, 0, true, false, false,
347 sh_handle_resbank_handler_attribute, false },
348 { "function_vector", 1, 1, true, false, false,
349 sh2a_handle_function_vector_handler_attribute, false },
350 { NULL, 0, 0, false, false, false, NULL, false }
351 };
352 \f
353 /* Initialize the GCC target structure. */
354 #undef TARGET_ATTRIBUTE_TABLE
355 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
356
357 /* The next two are used for debug info when compiling with -gdwarf. */
358 #undef TARGET_ASM_UNALIGNED_HI_OP
359 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
360 #undef TARGET_ASM_UNALIGNED_SI_OP
361 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
362
363 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
364 #undef TARGET_ASM_UNALIGNED_DI_OP
365 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
366 #undef TARGET_ASM_ALIGNED_DI_OP
367 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
368
369 #undef TARGET_OPTION_OVERRIDE
370 #define TARGET_OPTION_OVERRIDE sh_option_override
371
372 #undef TARGET_PRINT_OPERAND
373 #define TARGET_PRINT_OPERAND sh_print_operand
374 #undef TARGET_PRINT_OPERAND_ADDRESS
375 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
376 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
377 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
380
381 #undef TARGET_ASM_FUNCTION_EPILOGUE
382 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
383
384 #undef TARGET_ASM_OUTPUT_MI_THUNK
385 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
386
387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
389 hook_bool_const_tree_hwi_hwi_const_tree_true
390
391 #undef TARGET_ASM_FILE_START
392 #define TARGET_ASM_FILE_START sh_file_start
393 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
394 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
395
396 #undef TARGET_REGISTER_MOVE_COST
397 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
398
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
401
402 #undef TARGET_SCHED_ADJUST_COST
403 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
404
405 #undef TARGET_SCHED_ISSUE_RATE
406 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
407
408 /* The next 5 hooks have been implemented for reenabling sched1. With the
409 help of these macros we are limiting the movement of insns in sched1 to
410 reduce the register pressure. The overall idea is to keep count of SImode
411 and SFmode regs required by already scheduled insns. When these counts
412 cross some threshold values; give priority to insns that free registers.
413 The insn that frees registers is most likely to be the insn with lowest
414 LUID (original insn order); but such an insn might be there in the stalled
415 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
416 up to a max of 8 cycles so that such insns may move from Q -> R.
417
418 The description of the hooks are as below:
419
420 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
421 scheduler; it is called inside the sched_init function just after
422 find_insn_reg_weights function call. It is used to calculate the SImode
423 and SFmode weights of insns of basic blocks; much similar to what
424 find_insn_reg_weights does.
425 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
426
427 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
428 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
429 (Q)->(R).
430
431 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
432 high; reorder the ready queue so that the insn with lowest LUID will be
433 issued next.
434
435 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
436 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
437
438 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
439 can be returned from TARGET_SCHED_REORDER2.
440
441 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
442
443 #undef TARGET_SCHED_DFA_NEW_CYCLE
444 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
445
446 #undef TARGET_SCHED_INIT_GLOBAL
447 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
448
449 #undef TARGET_SCHED_FINISH_GLOBAL
450 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
451
452 #undef TARGET_SCHED_VARIABLE_ISSUE
453 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
454
455 #undef TARGET_SCHED_REORDER
456 #define TARGET_SCHED_REORDER sh_reorder
457
458 #undef TARGET_SCHED_REORDER2
459 #define TARGET_SCHED_REORDER2 sh_reorder2
460
461 #undef TARGET_SCHED_INIT
462 #define TARGET_SCHED_INIT sh_md_init
463
464 #undef TARGET_DELEGITIMIZE_ADDRESS
465 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
466
467 #undef TARGET_LEGITIMIZE_ADDRESS
468 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
469
470 #undef TARGET_CANNOT_MODIFY_JUMPS_P
471 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
472 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
473 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
474 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
475 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
476 sh_optimize_target_register_callee_saved
477
478 #undef TARGET_MS_BITFIELD_LAYOUT_P
479 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
480
481 #undef TARGET_INIT_BUILTINS
482 #define TARGET_INIT_BUILTINS sh_init_builtins
483 #undef TARGET_BUILTIN_DECL
484 #define TARGET_BUILTIN_DECL sh_builtin_decl
485 #undef TARGET_EXPAND_BUILTIN
486 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
487
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
490
491 #undef TARGET_CANNOT_COPY_INSN_P
492 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
493 #undef TARGET_RTX_COSTS
494 #define TARGET_RTX_COSTS sh_rtx_costs
495 #undef TARGET_ADDRESS_COST
496 #define TARGET_ADDRESS_COST sh_address_cost
497 #undef TARGET_ALLOCATE_INITIAL_VALUE
498 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
499
500 #undef TARGET_MACHINE_DEPENDENT_REORG
501 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
502
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
505
506 #ifdef HAVE_AS_TLS
507 #undef TARGET_HAVE_TLS
508 #define TARGET_HAVE_TLS true
509 #endif
510
511 #undef TARGET_PROMOTE_PROTOTYPES
512 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
513 #undef TARGET_PROMOTE_FUNCTION_MODE
514 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
515
516 #undef TARGET_FUNCTION_VALUE
517 #define TARGET_FUNCTION_VALUE sh_function_value
518 #undef TARGET_FUNCTION_VALUE_REGNO_P
519 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
520 #undef TARGET_LIBCALL_VALUE
521 #define TARGET_LIBCALL_VALUE sh_libcall_value
522 #undef TARGET_STRUCT_VALUE_RTX
523 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
524 #undef TARGET_RETURN_IN_MEMORY
525 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
526
527 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
528 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
529 #undef TARGET_SETUP_INCOMING_VARARGS
530 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
531 #undef TARGET_STRICT_ARGUMENT_NAMING
532 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
533 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
534 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
537 #undef TARGET_PASS_BY_REFERENCE
538 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
539 #undef TARGET_CALLEE_COPIES
540 #define TARGET_CALLEE_COPIES sh_callee_copies
541 #undef TARGET_ARG_PARTIAL_BYTES
542 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
543 #undef TARGET_FUNCTION_ARG
544 #define TARGET_FUNCTION_ARG sh_function_arg
545 #undef TARGET_FUNCTION_ARG_ADVANCE
546 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
547
548 #undef TARGET_BUILD_BUILTIN_VA_LIST
549 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
550 #undef TARGET_EXPAND_BUILTIN_VA_START
551 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
553 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
554
555 #undef TARGET_SCALAR_MODE_SUPPORTED_P
556 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
557 #undef TARGET_VECTOR_MODE_SUPPORTED_P
558 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
559
560 #undef TARGET_CHECK_PCH_TARGET_FLAGS
561 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
562
563 #undef TARGET_DWARF_CALLING_CONVENTION
564 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
565
566 #undef TARGET_FRAME_POINTER_REQUIRED
567 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
568
569 /* Return regmode weight for insn. */
570 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
571 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
572
573 /* Return current register pressure for regmode. */
574 #define CURR_REGMODE_PRESSURE(MODE)\
575 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
576
577 #undef TARGET_ENCODE_SECTION_INFO
578 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
579
580 #undef TARGET_SECONDARY_RELOAD
581 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
582
583 #undef TARGET_PREFERRED_RELOAD_CLASS
584 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
585
586 #undef TARGET_CONDITIONAL_REGISTER_USAGE
587 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
588
589 #undef TARGET_LEGITIMATE_ADDRESS_P
590 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
591
592 #undef TARGET_TRAMPOLINE_INIT
593 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
594 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
595 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
596
597 #undef TARGET_LEGITIMATE_CONSTANT_P
598 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
599
600 #undef TARGET_CANONICALIZE_COMPARISON
601 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
602
603 #undef TARGET_FIXED_CONDITION_CODE_REGS
604 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
605
606 /* Machine-specific symbol_ref flags. */
607 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
608
609 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
610 is used by optabs.c atomic op expansion code as well as in sync.md. */
611 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
612 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
613
614 struct gcc_target targetm = TARGET_INITIALIZER;
615 \f
616
617 /* Information on the currently selected atomic model.
618 This is initialized in sh_option_override. */
619 static sh_atomic_model selected_atomic_model_;
620
621 const sh_atomic_model&
622 selected_atomic_model (void)
623 {
624 return selected_atomic_model_;
625 }
626
627 static sh_atomic_model
628 parse_validate_atomic_model_option (const char* str)
629 {
630 const char* model_names[sh_atomic_model::num_models];
631 model_names[sh_atomic_model::none] = "none";
632 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
633 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
634 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
635 model_names[sh_atomic_model::soft_imask] = "soft-imask";
636
637 const char* model_cdef_names[sh_atomic_model::num_models];
638 model_cdef_names[sh_atomic_model::none] = "NONE";
639 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
640 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
641 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
642 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
643
644 sh_atomic_model ret;
645 ret.type = sh_atomic_model::none;
646 ret.name = model_names[sh_atomic_model::none];
647 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
648 ret.strict = false;
649 ret.tcb_gbr_offset = -1;
650
651 /* Handle empty string as 'none'. */
652 if (str == NULL || *str == '\0')
653 return ret;
654
655 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
656
657 std::vector<std::string> tokens;
658 for (std::stringstream ss (str); ss.good (); )
659 {
660 tokens.push_back (std::string ());
661 std::getline (ss, tokens.back (), ',');
662 }
663
664 if (tokens.empty ())
665 err_ret ("invalid atomic model option");
666
667 /* The first token must be the atomic model name. */
668 {
669 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
670 if (tokens.front () == model_names[i])
671 {
672 ret.type = (sh_atomic_model::enum_type)i;
673 ret.name = model_names[i];
674 ret.cdef_name = model_cdef_names[i];
675 goto got_mode_name;
676 }
677
678 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
679 got_mode_name:;
680 }
681
682 /* Go through the remaining tokens. */
683 for (size_t i = 1; i < tokens.size (); ++i)
684 {
685 if (tokens[i] == "strict")
686 ret.strict = true;
687 else if (tokens[i].find ("gbr-offset=") == 0)
688 {
689 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
690 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
691 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
692 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
693 "option", offset_str.c_str ());
694 }
695 else
696 err_ret ("unknown parameter \"%s\" in atomic model option",
697 tokens[i].c_str ());
698 }
699
700 /* Check that the selection makes sense. */
701 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
702 err_ret ("atomic operations are not supported on SHmedia");
703
704 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
705 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
706 ret.name);
707
708 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
709 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
710
711 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
712 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
713
714 if (ret.type == sh_atomic_model::soft_tcb
715 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
716 || (ret.tcb_gbr_offset & 3) != 0))
717 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
718 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
719 ret.name);
720
721 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
722 err_ret ("cannot use atomic model %s in user mode", ret.name);
723
724 return ret;
725
726 #undef err_ret
727 }
728
729 /* Register SH specific RTL passes. */
730 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
731 const char* name);
732 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
733 const char* name);
734 static void
735 register_sh_passes (void)
736 {
737 if (!TARGET_SH1)
738 return;
739
740 /* Running the sh_treg_combine pass after ce1 generates better code when
741 comparisons are combined and reg-reg moves are introduced, because
742 reg-reg moves will be eliminated afterwards. However, there are quite
743 some cases where combine will be unable to fold comparison related insns,
744 thus for now don't do it.
745 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
746 PASS_POS_INSERT_AFTER, "ce1", 1);
747 */
748
749 /* Run sh_treg_combine pass after combine but before register allocation. */
750 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
751 PASS_POS_INSERT_AFTER, "split1", 1);
752
753 /* Run sh_treg_combine pass after register allocation and basic block
754 reordering as this sometimes creates new opportunities. */
755 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
756 PASS_POS_INSERT_AFTER, "split4", 1);
757
758 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
759 is known after a conditional branch.
760 This must be done after basic blocks and branch conditions have
761 stabilized and won't be changed by further passes. */
762 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
763 PASS_POS_INSERT_BEFORE, "sched2", 1);
764 }
765
766 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
767 various options, and do some machine dependent initialization. */
768 static void
769 sh_option_override (void)
770 {
771 int regno;
772
773 SUBTARGET_OVERRIDE_OPTIONS;
774 if (optimize > 1 && !optimize_size)
775 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
776 sh_cpu = PROCESSOR_SH1;
777 assembler_dialect = 0;
778 if (TARGET_SH2)
779 sh_cpu = PROCESSOR_SH2;
780 if (TARGET_SH2E)
781 sh_cpu = PROCESSOR_SH2E;
782 if (TARGET_SH2A)
783 sh_cpu = PROCESSOR_SH2A;
784 if (TARGET_SH3)
785 sh_cpu = PROCESSOR_SH3;
786 if (TARGET_SH3E)
787 sh_cpu = PROCESSOR_SH3E;
788 if (TARGET_SH4)
789 {
790 assembler_dialect = 1;
791 sh_cpu = PROCESSOR_SH4;
792 }
793 if (TARGET_SH4A_ARCH)
794 {
795 assembler_dialect = 1;
796 sh_cpu = PROCESSOR_SH4A;
797 }
798 if (TARGET_SH5)
799 {
800 sh_cpu = PROCESSOR_SH5;
801 target_flags |= MASK_ALIGN_DOUBLE;
802 if (TARGET_SHMEDIA_FPU)
803 target_flags |= MASK_FMOVD;
804 if (TARGET_SHMEDIA)
805 {
806 /* There are no delay slots on SHmedia. */
807 flag_delayed_branch = 0;
808 /* Relaxation isn't yet supported for SHmedia */
809 target_flags &= ~MASK_RELAX;
810 /* After reload, if conversion does little good but can cause
811 ICEs:
812 - find_if_block doesn't do anything for SH because we don't
813 have conditional execution patterns. (We use conditional
814 move patterns, which are handled differently, and only
815 before reload).
816 - find_cond_trap doesn't do anything for the SH because we
817 don't have conditional traps.
818 - find_if_case_1 uses redirect_edge_and_branch_force in
819 the only path that does an optimization, and this causes
820 an ICE when branch targets are in registers.
821 - find_if_case_2 doesn't do anything for the SHmedia after
822 reload except when it can redirect a tablejump - and
823 that's rather rare. */
824 flag_if_conversion2 = 0;
825 if (! strcmp (sh_div_str, "call"))
826 sh_div_strategy = SH_DIV_CALL;
827 else if (! strcmp (sh_div_str, "call2"))
828 sh_div_strategy = SH_DIV_CALL2;
829 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
830 sh_div_strategy = SH_DIV_FP;
831 else if (! strcmp (sh_div_str, "inv"))
832 sh_div_strategy = SH_DIV_INV;
833 else if (! strcmp (sh_div_str, "inv:minlat"))
834 sh_div_strategy = SH_DIV_INV_MINLAT;
835 else if (! strcmp (sh_div_str, "inv20u"))
836 sh_div_strategy = SH_DIV_INV20U;
837 else if (! strcmp (sh_div_str, "inv20l"))
838 sh_div_strategy = SH_DIV_INV20L;
839 else if (! strcmp (sh_div_str, "inv:call2"))
840 sh_div_strategy = SH_DIV_INV_CALL2;
841 else if (! strcmp (sh_div_str, "inv:call"))
842 sh_div_strategy = SH_DIV_INV_CALL;
843 else if (! strcmp (sh_div_str, "inv:fp"))
844 {
845 if (TARGET_FPU_ANY)
846 sh_div_strategy = SH_DIV_INV_FP;
847 else
848 sh_div_strategy = SH_DIV_INV;
849 }
850 TARGET_CBRANCHDI4 = 0;
851 /* Assembler CFI isn't yet fully supported for SHmedia. */
852 flag_dwarf2_cfi_asm = 0;
853 }
854 }
855 else
856 {
857 /* Only the sh64-elf assembler fully supports .quad properly. */
858 targetm.asm_out.aligned_op.di = NULL;
859 targetm.asm_out.unaligned_op.di = NULL;
860 }
861 if (TARGET_SH1)
862 {
863 if (! strcmp (sh_div_str, "call-div1"))
864 sh_div_strategy = SH_DIV_CALL_DIV1;
865 else if (! strcmp (sh_div_str, "call-fp")
866 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
867 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
868 sh_div_strategy = SH_DIV_CALL_FP;
869 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
870 sh_div_strategy = SH_DIV_CALL_TABLE;
871 else
872 /* Pick one that makes most sense for the target in general.
873 It is not much good to use different functions depending
874 on -Os, since then we'll end up with two different functions
875 when some of the code is compiled for size, and some for
876 speed. */
877
878 /* SH4 tends to emphasize speed. */
879 if (TARGET_HARD_SH4)
880 sh_div_strategy = SH_DIV_CALL_TABLE;
881 /* These have their own way of doing things. */
882 else if (TARGET_SH2A)
883 sh_div_strategy = SH_DIV_INTRINSIC;
884 /* ??? Should we use the integer SHmedia function instead? */
885 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
886 sh_div_strategy = SH_DIV_CALL_FP;
887 /* SH1 .. SH3 cores often go into small-footprint systems, so
888 default to the smallest implementation available. */
889 else
890 sh_div_strategy = SH_DIV_CALL_DIV1;
891 }
892 if (!TARGET_SH1)
893 TARGET_PRETEND_CMOVE = 0;
894 if (sh_divsi3_libfunc[0])
895 ; /* User supplied - leave it alone. */
896 else if (TARGET_DIVIDE_CALL_FP)
897 sh_divsi3_libfunc = "__sdivsi3_i4";
898 else if (TARGET_DIVIDE_CALL_TABLE)
899 sh_divsi3_libfunc = "__sdivsi3_i4i";
900 else if (TARGET_SH5)
901 sh_divsi3_libfunc = "__sdivsi3_1";
902 else
903 sh_divsi3_libfunc = "__sdivsi3";
904 if (sh_branch_cost == -1)
905 {
906 sh_branch_cost = 1;
907
908 /* The SH1 does not have delay slots, hence we get a pipeline stall
909 at every branch. The SH4 is superscalar, so the single delay slot
910 is not sufficient to keep both pipelines filled. */
911 if (! TARGET_SH2 || TARGET_HARD_SH4)
912 sh_branch_cost = 2;
913 }
914
915 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
916 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
917 TARGET_ZDCBRANCH = 1;
918
919 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
920 if (! VALID_REGISTER_P (regno))
921 sh_register_names[regno][0] = '\0';
922
923 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
924 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
925 sh_additional_register_names[regno][0] = '\0';
926
927 if ((flag_pic && ! TARGET_PREFERGOT)
928 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
929 flag_no_function_cse = 1;
930
931 if (targetm.small_register_classes_for_mode_p (VOIDmode))
932 {
933 /* Never run scheduling before reload, since that can
934 break global alloc, and generates slower code anyway due
935 to the pressure on R0. */
936 /* Enable sched1 for SH4 if the user explicitly requests.
937 When sched1 is enabled, the ready queue will be reordered by
938 the target hooks if pressure is high. We can not do this for
939 PIC, SH3 and lower as they give spill failures for R0. */
940 if (!TARGET_HARD_SH4 || flag_pic)
941 flag_schedule_insns = 0;
942 /* ??? Current exception handling places basic block boundaries
943 after call_insns. It causes the high pressure on R0 and gives
944 spill failures for R0 in reload. See PR 22553 and the thread
945 on gcc-patches
946 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
947 else if (flag_exceptions)
948 {
949 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
950 warning (0, "ignoring -fschedule-insns because of exception "
951 "handling bug");
952 flag_schedule_insns = 0;
953 }
954 else if (flag_schedule_insns
955 && !global_options_set.x_flag_schedule_insns)
956 flag_schedule_insns = 0;
957 }
958
959 /* Unwind info is not correct around the CFG unless either a frame
960 pointer is present or M_A_O_A is set. Fixing this requires rewriting
961 unwind info generation to be aware of the CFG and propagating states
962 around edges. */
963 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
964 || flag_exceptions || flag_non_call_exceptions)
965 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
966 {
967 warning (0, "unwind tables currently require either a frame pointer "
968 "or -maccumulate-outgoing-args for correctness");
969 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
970 }
971
972 /* Unwinding with -freorder-blocks-and-partition does not work on this
973 architecture, because it requires far jumps to label crossing between
974 hot/cold sections which are rejected on this architecture. */
975 if (flag_reorder_blocks_and_partition)
976 {
977 if (flag_exceptions)
978 {
979 inform (input_location,
980 "-freorder-blocks-and-partition does not work with "
981 "exceptions on this architecture");
982 flag_reorder_blocks_and_partition = 0;
983 flag_reorder_blocks = 1;
984 }
985 else if (flag_unwind_tables)
986 {
987 inform (input_location,
988 "-freorder-blocks-and-partition does not support unwind "
989 "info on this architecture");
990 flag_reorder_blocks_and_partition = 0;
991 flag_reorder_blocks = 1;
992 }
993 }
994
995 /* Adjust loop, jump and function alignment values (in bytes), if those
996 were not specified by the user using -falign-loops, -falign-jumps
997 and -falign-functions options.
998 32 bit alignment is better for speed, because instructions can be
999 fetched as a pair from a longword boundary. For size use 16 bit
1000 alignment to get more compact code.
1001 Aligning all jumps increases the code size, even if it might
1002 result in slightly faster code. Thus, it is set to the smallest
1003 alignment possible if not specified by the user. */
1004 if (align_loops == 0)
1005 {
1006 if (TARGET_SH5)
1007 align_loops = 8;
1008 else
1009 align_loops = optimize_size ? 2 : 4;
1010 }
1011
1012 if (align_jumps == 0)
1013 {
1014 if (TARGET_SHMEDIA)
1015 align_jumps = 1 << CACHE_LOG;
1016 else
1017 align_jumps = 2;
1018 }
1019 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1020 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1021
1022 if (align_functions == 0)
1023 {
1024 if (TARGET_SHMEDIA)
1025 align_functions = optimize_size
1026 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1027 else
1028 align_functions = optimize_size ? 2 : 4;
1029 }
1030
1031 /* The linker relaxation code breaks when a function contains
1032 alignments that are larger than that at the start of a
1033 compilation unit. */
1034 if (TARGET_RELAX)
1035 {
1036 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1037
1038 /* Also take possible .long constants / mova tables into account. */
1039 if (min_align < 4)
1040 min_align = 4;
1041 if (align_functions < min_align)
1042 align_functions = min_align;
1043 }
1044
1045 if (flag_unsafe_math_optimizations)
1046 {
1047 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1048 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1049 TARGET_FSCA = 1;
1050
1051 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1052 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1053 TARGET_FSRRA = 1;
1054 }
1055
1056 /* Allow fsrra insn only if -funsafe-math-optimizations and
1057 -ffinite-math-only is enabled. */
1058 TARGET_FSRRA = TARGET_FSRRA
1059 && flag_unsafe_math_optimizations
1060 && flag_finite_math_only;
1061
1062 /* If the -mieee option was not explicitly set by the user, turn it on
1063 unless -ffinite-math-only was specified. See also PR 33135. */
1064 if (! global_options_set.x_TARGET_IEEE)
1065 TARGET_IEEE = ! flag_finite_math_only;
1066
1067 if (sh_fixed_range_str)
1068 sh_fix_range (sh_fixed_range_str);
1069
1070 /* This target defaults to strict volatile bitfields. */
1071 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1072 flag_strict_volatile_bitfields = 1;
1073
1074 /* Parse atomic model option and make sure it is valid for the current
1075 target CPU. */
1076 selected_atomic_model_
1077 = parse_validate_atomic_model_option (sh_atomic_model_str);
1078
1079 register_sh_passes ();
1080 }
1081 \f
1082 /* Print the operand address in x to the stream. */
1083 static void
1084 sh_print_operand_address (FILE *stream, rtx x)
1085 {
1086 switch (GET_CODE (x))
1087 {
1088 case REG:
1089 case SUBREG:
1090 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1091 break;
1092
1093 case PLUS:
1094 {
1095 rtx base = XEXP (x, 0);
1096 rtx index = XEXP (x, 1);
1097
1098 switch (GET_CODE (index))
1099 {
1100 case CONST_INT:
1101 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1102 reg_names[true_regnum (base)]);
1103 break;
1104
1105 case REG:
1106 case SUBREG:
1107 {
1108 int base_num = true_regnum (base);
1109 int index_num = true_regnum (index);
1110
1111 fprintf (stream, "@(r0,%s)",
1112 reg_names[MAX (base_num, index_num)]);
1113 break;
1114 }
1115
1116 default:
1117 gcc_unreachable ();
1118 }
1119 }
1120 break;
1121
1122 case PRE_DEC:
1123 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1124 break;
1125
1126 case POST_INC:
1127 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1128 break;
1129
1130 default:
1131 x = mark_constant_pool_use (x);
1132 output_addr_const (stream, x);
1133 break;
1134 }
1135 }
1136
1137 /* Print operand x (an rtx) in assembler syntax to file stream
1138 according to modifier code.
1139
1140 '.' print a .s if insn needs delay slot
1141 ',' print LOCAL_LABEL_PREFIX
1142 '@' print trap, rte or rts depending upon pragma interruptness
1143 '#' output a nop if there is nothing to put in the delay slot
1144 ''' print likelihood suffix (/u for unlikely).
1145 '>' print branch target if -fverbose-asm
1146 'O' print a constant without the #
1147 'R' print the LSW of a dp value - changes if in little endian
1148 'S' print the MSW of a dp value - changes if in little endian
1149 'T' print the next word of a dp value - same as 'R' in big endian mode.
1150 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1151 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1152 'N' print 'r63' if the operand is (const_int 0).
1153 'd' print a V2SF reg as dN instead of fpN.
1154 'm' print a pair `base,offset' or `base,index', for LD and ST.
1155 'U' Likewise for {LD,ST}{HI,LO}.
1156 'V' print the position of a single bit set.
1157 'W' print the position of a single bit cleared.
1158 't' print a memory address which is a register.
1159 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1160 'o' output an operator. */
1161 static void
1162 sh_print_operand (FILE *stream, rtx x, int code)
1163 {
1164 int regno;
1165 enum machine_mode mode;
1166
1167 switch (code)
1168 {
1169 tree trapa_attr;
1170
1171 case '.':
1172 if (final_sequence
1173 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1174 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1175 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1176 break;
1177 case ',':
1178 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1179 break;
1180 case '@':
1181 trapa_attr = lookup_attribute ("trap_exit",
1182 DECL_ATTRIBUTES (current_function_decl));
1183 if (trapa_attr)
1184 fprintf (stream, "trapa #%ld",
1185 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1186 else if (sh_cfun_interrupt_handler_p ())
1187 {
1188 if (sh_cfun_resbank_handler_p ())
1189 fprintf (stream, "resbank\n");
1190 fprintf (stream, "rte");
1191 }
1192 else
1193 fprintf (stream, "rts");
1194 break;
1195 case '#':
1196 /* Output a nop if there's nothing in the delay slot. */
1197 if (dbr_sequence_length () == 0)
1198 fprintf (stream, "\n\tnop");
1199 break;
1200 case '\'':
1201 {
1202 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1203
1204 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1205 fputs ("/u", stream);
1206 break;
1207 }
1208 case '>':
1209 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1210 {
1211 fputs ("\t! target: ", stream);
1212 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1213 }
1214 break;
1215 case 'O':
1216 x = mark_constant_pool_use (x);
1217 output_addr_const (stream, x);
1218 break;
1219 /* N.B.: %R / %S / %T adjust memory addresses by four.
1220 For SHMEDIA, that means they can be used to access the first and
1221 second 32 bit part of a 64 bit (or larger) value that
1222 might be held in floating point registers or memory.
1223 While they can be used to access 64 bit parts of a larger value
1224 held in general purpose registers, that won't work with memory -
1225 neither for fp registers, since the frxx names are used. */
1226 case 'R':
1227 if (REG_P (x) || GET_CODE (x) == SUBREG)
1228 {
1229 regno = true_regnum (x);
1230 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1231 fputs (reg_names[regno], (stream));
1232 }
1233 else if (MEM_P (x))
1234 {
1235 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1236 sh_print_operand_address (stream, XEXP (x, 0));
1237 }
1238 else
1239 {
1240 rtx sub = NULL_RTX;
1241
1242 mode = GET_MODE (x);
1243 if (mode == VOIDmode)
1244 mode = DImode;
1245 if (GET_MODE_SIZE (mode) >= 8)
1246 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1247 if (sub)
1248 sh_print_operand (stream, sub, 0);
1249 else
1250 output_operand_lossage ("invalid operand to %%R");
1251 }
1252 break;
1253 case 'S':
1254 if (REG_P (x) || GET_CODE (x) == SUBREG)
1255 {
1256 regno = true_regnum (x);
1257 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1258 fputs (reg_names[regno], (stream));
1259 }
1260 else if (MEM_P (x))
1261 {
1262 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1263 sh_print_operand_address (stream, XEXP (x, 0));
1264 }
1265 else
1266 {
1267 rtx sub = NULL_RTX;
1268
1269 mode = GET_MODE (x);
1270 if (mode == VOIDmode)
1271 mode = DImode;
1272 if (GET_MODE_SIZE (mode) >= 8)
1273 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1274 if (sub)
1275 sh_print_operand (stream, sub, 0);
1276 else
1277 output_operand_lossage ("invalid operand to %%S");
1278 }
1279 break;
1280 case 'T':
1281 /* Next word of a double. */
1282 switch (GET_CODE (x))
1283 {
1284 case REG:
1285 fputs (reg_names[REGNO (x) + 1], (stream));
1286 break;
1287 case MEM:
1288 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1289 && GET_CODE (XEXP (x, 0)) != POST_INC)
1290 x = adjust_address (x, SImode, 4);
1291 sh_print_operand_address (stream, XEXP (x, 0));
1292 break;
1293 default:
1294 break;
1295 }
1296 break;
1297
1298 case 't':
1299 gcc_assert (MEM_P (x));
1300 x = XEXP (x, 0);
1301 switch (GET_CODE (x))
1302 {
1303 case REG:
1304 case SUBREG:
1305 sh_print_operand (stream, x, 0);
1306 break;
1307 default:
1308 break;
1309 }
1310 break;
1311
1312 case 'o':
1313 switch (GET_CODE (x))
1314 {
1315 case PLUS: fputs ("add", stream); break;
1316 case MINUS: fputs ("sub", stream); break;
1317 case MULT: fputs ("mul", stream); break;
1318 case DIV: fputs ("div", stream); break;
1319 case EQ: fputs ("eq", stream); break;
1320 case NE: fputs ("ne", stream); break;
1321 case GT: case LT: fputs ("gt", stream); break;
1322 case GE: case LE: fputs ("ge", stream); break;
1323 case GTU: case LTU: fputs ("gtu", stream); break;
1324 case GEU: case LEU: fputs ("geu", stream); break;
1325 default:
1326 break;
1327 }
1328 break;
1329 case 'M':
1330 if (TARGET_SHMEDIA)
1331 {
1332 if (MEM_P (x)
1333 && GET_CODE (XEXP (x, 0)) == PLUS
1334 && (REG_P (XEXP (XEXP (x, 0), 1))
1335 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1336 fputc ('x', stream);
1337 }
1338 else
1339 {
1340 if (MEM_P (x))
1341 {
1342 switch (GET_MODE (x))
1343 {
1344 case QImode: fputs (".b", stream); break;
1345 case HImode: fputs (".w", stream); break;
1346 case SImode: fputs (".l", stream); break;
1347 case SFmode: fputs (".s", stream); break;
1348 case DFmode: fputs (".d", stream); break;
1349 default: gcc_unreachable ();
1350 }
1351 }
1352 }
1353 break;
1354
1355 case 'm':
1356 gcc_assert (MEM_P (x));
1357 x = XEXP (x, 0);
1358 /* Fall through. */
1359 case 'U':
1360 switch (GET_CODE (x))
1361 {
1362 case REG:
1363 case SUBREG:
1364 sh_print_operand (stream, x, 0);
1365 fputs (", 0", stream);
1366 break;
1367
1368 case PLUS:
1369 sh_print_operand (stream, XEXP (x, 0), 0);
1370 fputs (", ", stream);
1371 sh_print_operand (stream, XEXP (x, 1), 0);
1372 break;
1373
1374 default:
1375 gcc_unreachable ();
1376 }
1377 break;
1378
1379 case 'V':
1380 {
1381 int num = exact_log2 (INTVAL (x));
1382 gcc_assert (num >= 0);
1383 fprintf (stream, "#%d", num);
1384 }
1385 break;
1386
1387 case 'W':
1388 {
1389 int num = exact_log2 (~INTVAL (x));
1390 gcc_assert (num >= 0);
1391 fprintf (stream, "#%d", num);
1392 }
1393 break;
1394
1395 case 'd':
1396 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1397
1398 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1399 break;
1400
1401 case 'N':
1402 if (x == CONST0_RTX (GET_MODE (x)))
1403 {
1404 fprintf ((stream), "r63");
1405 break;
1406 }
1407 goto default_output;
1408 case 'u':
1409 if (CONST_INT_P (x))
1410 {
1411 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1412 break;
1413 }
1414 /* Fall through. */
1415
1416 default_output:
1417 default:
1418 regno = 0;
1419 mode = GET_MODE (x);
1420
1421 switch (GET_CODE (x))
1422 {
1423 case TRUNCATE:
1424 {
1425 rtx inner = XEXP (x, 0);
1426 int offset = 0;
1427 enum machine_mode inner_mode;
1428
1429 /* We might see SUBREGs with vector mode registers inside. */
1430 if (GET_CODE (inner) == SUBREG
1431 && (GET_MODE_SIZE (GET_MODE (inner))
1432 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1433 && subreg_lowpart_p (inner))
1434 inner = SUBREG_REG (inner);
1435 if (CONST_INT_P (inner))
1436 {
1437 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1438 goto default_output;
1439 }
1440 inner_mode = GET_MODE (inner);
1441 if (GET_CODE (inner) == SUBREG
1442 && (GET_MODE_SIZE (GET_MODE (inner))
1443 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1444 && REG_P (SUBREG_REG (inner)))
1445 {
1446 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1447 GET_MODE (SUBREG_REG (inner)),
1448 SUBREG_BYTE (inner),
1449 GET_MODE (inner));
1450 inner = SUBREG_REG (inner);
1451 }
1452 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1453 abort ();
1454 /* Floating point register pairs are always big endian;
1455 general purpose registers are 64 bit wide. */
1456 regno = REGNO (inner);
1457 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1458 - HARD_REGNO_NREGS (regno, mode))
1459 + offset;
1460 x = inner;
1461 goto reg;
1462 }
1463 case SIGN_EXTEND:
1464 x = XEXP (x, 0);
1465 goto reg;
1466 /* FIXME: We need this on SHmedia32 because reload generates
1467 some sign-extended HI or QI loads into DImode registers
1468 but, because Pmode is SImode, the address ends up with a
1469 subreg:SI of the DImode register. Maybe reload should be
1470 fixed so as to apply alter_subreg to such loads? */
1471 case IF_THEN_ELSE:
1472 gcc_assert (trapping_target_operand (x, VOIDmode));
1473 x = XEXP (XEXP (x, 2), 0);
1474 goto default_output;
1475 case SUBREG:
1476 gcc_assert (SUBREG_BYTE (x) == 0
1477 && REG_P (SUBREG_REG (x)));
1478
1479 x = SUBREG_REG (x);
1480 /* Fall through. */
1481
1482 reg:
1483 case REG:
1484 regno += REGNO (x);
1485 if (FP_REGISTER_P (regno)
1486 && mode == V16SFmode)
1487 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1488 else if (FP_REGISTER_P (REGNO (x))
1489 && mode == V4SFmode)
1490 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1491 else if (REG_P (x)
1492 && mode == V2SFmode)
1493 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1494 else if (FP_REGISTER_P (REGNO (x))
1495 && GET_MODE_SIZE (mode) > 4)
1496 fprintf ((stream), "d%s", reg_names[regno] + 1);
1497 else
1498 fputs (reg_names[regno], (stream));
1499 break;
1500
1501 case MEM:
1502 output_address (XEXP (x, 0));
1503 break;
1504
1505 default:
1506 if (TARGET_SH1)
1507 fputc ('#', stream);
1508 output_addr_const (stream, x);
1509 break;
1510 }
1511 break;
1512 }
1513 }
1514
1515 static bool
1516 sh_print_operand_punct_valid_p (unsigned char code)
1517 {
1518 return (code == '.' || code == '#' || code == '@' || code == ','
1519 || code == '$' || code == '\'' || code == '>');
1520 }
1521
1522 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1523 static bool
1524 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1525 {
1526 if (GET_CODE (x) == UNSPEC)
1527 {
1528 switch (XINT (x, 1))
1529 {
1530 case UNSPEC_DATALABEL:
1531 fputs ("datalabel ", file);
1532 output_addr_const (file, XVECEXP (x, 0, 0));
1533 break;
1534 case UNSPEC_PIC:
1535 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1536 output_addr_const (file, XVECEXP (x, 0, 0));
1537 break;
1538 case UNSPEC_GOT:
1539 output_addr_const (file, XVECEXP (x, 0, 0));
1540 fputs ("@GOT", file);
1541 break;
1542 case UNSPEC_GOTOFF:
1543 output_addr_const (file, XVECEXP (x, 0, 0));
1544 fputs ("@GOTOFF", file);
1545 break;
1546 case UNSPEC_PLT:
1547 output_addr_const (file, XVECEXP (x, 0, 0));
1548 fputs ("@PLT", file);
1549 break;
1550 case UNSPEC_GOTPLT:
1551 output_addr_const (file, XVECEXP (x, 0, 0));
1552 fputs ("@GOTPLT", file);
1553 break;
1554 case UNSPEC_DTPOFF:
1555 output_addr_const (file, XVECEXP (x, 0, 0));
1556 fputs ("@DTPOFF", file);
1557 break;
1558 case UNSPEC_GOTTPOFF:
1559 output_addr_const (file, XVECEXP (x, 0, 0));
1560 fputs ("@GOTTPOFF", file);
1561 break;
1562 case UNSPEC_TPOFF:
1563 output_addr_const (file, XVECEXP (x, 0, 0));
1564 fputs ("@TPOFF", file);
1565 break;
1566 case UNSPEC_CALLER:
1567 {
1568 char name[32];
1569 /* LPCS stands for Label for PIC Call Site. */
1570 targetm.asm_out.generate_internal_label (name, "LPCS",
1571 INTVAL (XVECEXP (x, 0, 0)));
1572 assemble_name (file, name);
1573 }
1574 break;
1575 case UNSPEC_EXTRACT_S16:
1576 case UNSPEC_EXTRACT_U16:
1577 {
1578 rtx val, shift;
1579
1580 val = XVECEXP (x, 0, 0);
1581 shift = XVECEXP (x, 0, 1);
1582 fputc ('(', file);
1583 if (shift != const0_rtx)
1584 fputc ('(', file);
1585 if (GET_CODE (val) == CONST
1586 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1587 {
1588 fputc ('(', file);
1589 output_addr_const (file, val);
1590 fputc (')', file);
1591 }
1592 else
1593 output_addr_const (file, val);
1594 if (shift != const0_rtx)
1595 {
1596 fputs (" >> ", file);
1597 output_addr_const (file, shift);
1598 fputc (')', file);
1599 }
1600 fputs (" & 65535)", file);
1601 }
1602 break;
1603 case UNSPEC_SYMOFF:
1604 output_addr_const (file, XVECEXP (x, 0, 0));
1605 fputc ('-', file);
1606 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1607 {
1608 fputc ('(', file);
1609 output_addr_const (file, XVECEXP (x, 0, 1));
1610 fputc (')', file);
1611 }
1612 else
1613 output_addr_const (file, XVECEXP (x, 0, 1));
1614 break;
1615 case UNSPEC_PCREL_SYMOFF:
1616 output_addr_const (file, XVECEXP (x, 0, 0));
1617 fputs ("-(", file);
1618 output_addr_const (file, XVECEXP (x, 0, 1));
1619 fputs ("-.)", file);
1620 break;
1621 default:
1622 return false;
1623 }
1624 return true;
1625 }
1626 else
1627 return false;
1628 }
1629 \f
1630 /* Encode symbol attributes of a SYMBOL_REF into its
1631 SYMBOL_REF_FLAGS. */
1632 static void
1633 sh_encode_section_info (tree decl, rtx rtl, int first)
1634 {
1635 default_encode_section_info (decl, rtl, first);
1636
1637 if (TREE_CODE (decl) == FUNCTION_DECL
1638 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1639 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1640 }
1641
1642 /* Prepare operands for a move define_expand; specifically, one of the
1643 operands must be in a register. */
1644 void
1645 prepare_move_operands (rtx operands[], enum machine_mode mode)
1646 {
1647 if ((mode == SImode || mode == DImode)
1648 && flag_pic
1649 && ! ((mode == Pmode || mode == ptr_mode)
1650 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1651 {
1652 rtx temp;
1653 if (SYMBOLIC_CONST_P (operands[1]))
1654 {
1655 if (MEM_P (operands[0]))
1656 operands[1] = force_reg (Pmode, operands[1]);
1657 else if (TARGET_SHMEDIA
1658 && GET_CODE (operands[1]) == LABEL_REF
1659 && target_reg_operand (operands[0], mode))
1660 /* It's ok. */;
1661 else
1662 {
1663 temp = (!can_create_pseudo_p ()
1664 ? operands[0]
1665 : gen_reg_rtx (Pmode));
1666 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1667 }
1668 }
1669 else if (GET_CODE (operands[1]) == CONST
1670 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1671 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1672 {
1673 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1674 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1675 mode, temp);
1676 operands[1] = expand_binop (mode, add_optab, temp,
1677 XEXP (XEXP (operands[1], 0), 1),
1678 (!can_create_pseudo_p ()
1679 ? temp
1680 : gen_reg_rtx (Pmode)),
1681 0, OPTAB_LIB_WIDEN);
1682 }
1683 }
1684
1685 if (! reload_in_progress && ! reload_completed)
1686 {
1687 /* Copy the source to a register if both operands aren't registers. */
1688 if (! register_operand (operands[0], mode)
1689 && ! sh_register_operand (operands[1], mode))
1690 operands[1] = copy_to_mode_reg (mode, operands[1]);
1691
1692 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1693 {
1694 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1695 except that we can't use that function because it is static. */
1696 rtx new_rtx = change_address (operands[0], mode, 0);
1697 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1698 operands[0] = new_rtx;
1699 }
1700
1701 /* This case can happen while generating code to move the result
1702 of a library call to the target. Reject `st r0,@(rX,rY)' because
1703 reload will fail to find a spill register for rX, since r0 is already
1704 being used for the source. */
1705 else if (TARGET_SH1
1706 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1707 && MEM_P (operands[0])
1708 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1709 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1710 operands[1] = copy_to_mode_reg (mode, operands[1]);
1711 }
1712
1713 if (mode == Pmode || mode == ptr_mode)
1714 {
1715 rtx op0, op1, opc;
1716 enum tls_model tls_kind;
1717
1718 op0 = operands[0];
1719 op1 = operands[1];
1720 if (GET_CODE (op1) == CONST
1721 && GET_CODE (XEXP (op1, 0)) == PLUS
1722 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1723 != TLS_MODEL_NONE))
1724 {
1725 opc = XEXP (XEXP (op1, 0), 1);
1726 op1 = XEXP (XEXP (op1, 0), 0);
1727 }
1728 else
1729 opc = NULL_RTX;
1730
1731 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1732 {
1733 rtx tga_op1, tga_ret, tmp, tmp2;
1734
1735 if (! flag_pic
1736 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1737 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1738 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1739 {
1740 /* Don't schedule insns for getting GOT address when
1741 the first scheduling is enabled, to avoid spill
1742 failures for R0. */
1743 if (flag_schedule_insns)
1744 emit_insn (gen_blockage ());
1745 emit_insn (gen_GOTaddr2picreg ());
1746 emit_use (gen_rtx_REG (SImode, PIC_REG));
1747 if (flag_schedule_insns)
1748 emit_insn (gen_blockage ());
1749 }
1750
1751 switch (tls_kind)
1752 {
1753 case TLS_MODEL_GLOBAL_DYNAMIC:
1754 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1755 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1756 tmp = gen_reg_rtx (Pmode);
1757 emit_move_insn (tmp, tga_ret);
1758 op1 = tmp;
1759 break;
1760
1761 case TLS_MODEL_LOCAL_DYNAMIC:
1762 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1763 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1764
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_move_insn (tmp, tga_ret);
1767
1768 if (register_operand (op0, Pmode))
1769 tmp2 = op0;
1770 else
1771 tmp2 = gen_reg_rtx (Pmode);
1772
1773 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1774 op1 = tmp2;
1775 break;
1776
1777 case TLS_MODEL_INITIAL_EXEC:
1778 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1779 tmp = gen_sym2GOTTPOFF (op1);
1780 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1781 op1 = tga_op1;
1782 break;
1783
1784 case TLS_MODEL_LOCAL_EXEC:
1785 tmp2 = gen_reg_rtx (Pmode);
1786 emit_insn (gen_store_gbr (tmp2));
1787 tmp = gen_reg_rtx (Pmode);
1788 emit_insn (gen_symTPOFF2reg (tmp, op1));
1789
1790 if (register_operand (op0, Pmode))
1791 op1 = op0;
1792 else
1793 op1 = gen_reg_rtx (Pmode);
1794
1795 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1796 break;
1797
1798 default:
1799 gcc_unreachable ();
1800 }
1801 if (opc)
1802 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1803 operands[1] = op1;
1804 }
1805 }
1806 }
1807
1808 /* Implement the canonicalize_comparison target hook for the combine
1809 pass. For the target hook this function is invoked via
1810 sh_canonicalize_comparison. This function is also re-used to
1811 canonicalize comparisons in cbranch pattern expanders. */
1812 static void
1813 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1814 enum machine_mode mode,
1815 bool op0_preserve_value)
1816 {
1817 /* When invoked from within the combine pass the mode is not specified,
1818 so try to get it from one of the operands. */
1819 if (mode == VOIDmode)
1820 mode = GET_MODE (op0);
1821 if (mode == VOIDmode)
1822 mode = GET_MODE (op1);
1823
1824 // We need to have a mode to do something useful here.
1825 if (mode == VOIDmode)
1826 return;
1827
1828 // Currently, we don't deal with floats here.
1829 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1830 return;
1831
1832 // Make sure that the constant operand is the second operand.
1833 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1834 {
1835 if (op0_preserve_value)
1836 return;
1837
1838 std::swap (op0, op1);
1839 cmp = swap_condition (cmp);
1840 }
1841
1842 if (CONST_INT_P (op1))
1843 {
1844 /* Try to adjust the constant operand in such a way that available
1845 comparison insns can be utilized better and the constant can be
1846 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1847 constant pool. */
1848 const HOST_WIDE_INT val = INTVAL (op1);
1849
1850 /* x > -1 --> x >= 0
1851 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1852 x <= -1 --> x < 0
1853 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1854 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1855 {
1856 cmp = cmp == GT ? GE : LT;
1857 op1 = gen_int_mode (val + 1, mode);
1858 }
1859
1860 /* x >= 1 --> x > 0
1861 x >= 0x80 --> x > 0x7F
1862 x < 1 --> x <= 0
1863 x < 0x80 --> x <= 0x7F */
1864 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1865 {
1866 cmp = cmp == GE ? GT : LE;
1867 op1 = gen_int_mode (val - 1, mode);
1868 }
1869
1870 /* unsigned x >= 1 --> x != 0
1871 unsigned x < 1 --> x == 0 */
1872 else if (val == 1 && (cmp == GEU || cmp == LTU))
1873 {
1874 cmp = cmp == GEU ? NE : EQ;
1875 op1 = CONST0_RTX (mode);
1876 }
1877
1878 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1879 unsigned x < 0x80 --> unsigned x < 0x7F */
1880 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1881 {
1882 cmp = cmp == GEU ? GTU : LEU;
1883 op1 = gen_int_mode (val - 1, mode);
1884 }
1885
1886 /* unsigned x > 0 --> x != 0
1887 unsigned x <= 0 --> x == 0 */
1888 else if (val == 0 && (cmp == GTU || cmp == LEU))
1889 cmp = cmp == GTU ? NE : EQ;
1890
1891 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1892 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1893 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1894 && val == 0x7FFFFFFF)
1895 {
1896 cmp = cmp == GTU ? LT : GE;
1897 op1 = const0_rtx;
1898 }
1899
1900 /* unsigned x >= 0x80000000 --> signed x < 0
1901 unsigned x < 0x80000000 --> signed x >= 0 */
1902 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1903 && (unsigned HOST_WIDE_INT)val
1904 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1905 {
1906 cmp = cmp == GEU ? LT : GE;
1907 op1 = const0_rtx;
1908 }
1909 }
1910 }
1911
1912 /* This function implements the canonicalize_comparison target hook.
1913 This wrapper around the internally used sh_canonicalize_comparison
1914 function is needed to do the enum rtx_code <-> int conversion.
1915 Target hooks cannot use enum rtx_code in its definition. */
1916 static void
1917 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1918 bool op0_preserve_value)
1919 {
1920 enum rtx_code tmp_code = (enum rtx_code)*code;
1921 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1922 VOIDmode, op0_preserve_value);
1923 *code = (int)tmp_code;
1924 }
1925
1926 bool
1927 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1928 {
1929 *p1 = T_REG;
1930 *p2 = INVALID_REGNUM;
1931 return true;
1932 }
1933
1934 enum rtx_code
1935 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1936 enum rtx_code comparison)
1937 {
1938 /* The scratch reg is only available when this is invoked from within
1939 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1940 rtx scratch = NULL_RTX;
1941
1942 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1943 comparison = GET_CODE (operands[0]);
1944 else
1945 scratch = operands[4];
1946
1947 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1948 mode, false);
1949
1950 /* Notice that this function is also invoked after reload by
1951 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1952 rtx op1 = operands[1];
1953
1954 if (can_create_pseudo_p ())
1955 operands[1] = force_reg (mode, op1);
1956 /* When we are handling DImode comparisons, we want to keep constants so
1957 that we can optimize the component comparisons; however, memory loads
1958 are better issued as a whole so that they can be scheduled well.
1959 SImode equality comparisons allow I08 constants, but only when they
1960 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1961 into a register, that register might as well be r0, and we allow the
1962 constant. If it is already in a register, this is likely to be
1963 allocated to a different hard register, thus we load the constant into
1964 a register unless it is zero. */
1965 if (!REG_P (operands[2])
1966 && (!CONST_INT_P (operands[2])
1967 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1968 && ((comparison != EQ && comparison != NE)
1969 || (REG_P (op1) && REGNO (op1) != R0_REG)
1970 || !satisfies_constraint_I08 (operands[2])))))
1971 {
1972 if (scratch && GET_MODE (scratch) == mode)
1973 {
1974 emit_move_insn (scratch, operands[2]);
1975 operands[2] = scratch;
1976 }
1977 else if (can_create_pseudo_p ())
1978 operands[2] = force_reg (mode, operands[2]);
1979 }
1980 return comparison;
1981 }
1982
1983 void
1984 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1985 {
1986 rtx (*branch_expander) (rtx) = gen_branch_true;
1987 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1988 switch (comparison)
1989 {
1990 case NE: case LT: case LE: case LTU: case LEU:
1991 comparison = reverse_condition (comparison);
1992 branch_expander = gen_branch_false;
1993 default: ;
1994 }
1995 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
1996 gen_rtx_fmt_ee (comparison, SImode,
1997 operands[1], operands[2])));
1998 rtx jump = emit_jump_insn (branch_expander (operands[3]));
1999 if (probability >= 0)
2000 add_int_reg_note (jump, REG_BR_PROB, probability);
2001 }
2002
2003 /* ??? How should we distribute probabilities when more than one branch
2004 is generated. So far we only have some ad-hoc observations:
2005 - If the operands are random, they are likely to differ in both parts.
2006 - If comparing items in a hash chain, the operands are random or equal;
2007 operation should be EQ or NE.
2008 - If items are searched in an ordered tree from the root, we can expect
2009 the highpart to be unequal about half of the time; operation should be
2010 an inequality comparison, operands non-constant, and overall probability
2011 about 50%. Likewise for quicksort.
2012 - Range checks will be often made against constants. Even if we assume for
2013 simplicity an even distribution of the non-constant operand over a
2014 sub-range here, the same probability could be generated with differently
2015 wide sub-ranges - as long as the ratio of the part of the subrange that
2016 is before the threshold to the part that comes after the threshold stays
2017 the same. Thus, we can't really tell anything here;
2018 assuming random distribution is at least simple.
2019 */
2020 bool
2021 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2022 {
2023 enum rtx_code msw_taken, msw_skip, lsw_taken;
2024 rtx skip_label = NULL_RTX;
2025 rtx op1h, op1l, op2h, op2l;
2026 int num_branches;
2027 int prob, rev_prob;
2028 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2029 rtx scratch = operands[4];
2030
2031 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2032 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2033 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2034 op1l = gen_lowpart (SImode, operands[1]);
2035 op2l = gen_lowpart (SImode, operands[2]);
2036 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2037 prob = split_branch_probability;
2038 rev_prob = REG_BR_PROB_BASE - prob;
2039 switch (comparison)
2040 {
2041 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2042 That costs 1 cycle more when the first branch can be predicted taken,
2043 but saves us mispredicts because only one branch needs prediction.
2044 It also enables generating the cmpeqdi_t-1 pattern. */
2045 case EQ:
2046 if (TARGET_CMPEQDI_T)
2047 {
2048 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2049 emit_jump_insn (gen_branch_true (operands[3]));
2050 return true;
2051 }
2052 msw_skip = NE;
2053 lsw_taken = EQ;
2054 if (prob >= 0)
2055 {
2056 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2057 msw_skip_prob = rev_prob;
2058 if (REG_BR_PROB_BASE <= 65535)
2059 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2060 else
2061 {
2062 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2063 lsw_taken_prob
2064 = (prob
2065 ? (REG_BR_PROB_BASE
2066 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2067 / ((HOST_WIDEST_INT) prob << 32)))
2068 : 0);
2069 }
2070 }
2071 break;
2072 case NE:
2073 if (TARGET_CMPEQDI_T)
2074 {
2075 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2076 emit_jump_insn (gen_branch_false (operands[3]));
2077 return true;
2078 }
2079 msw_taken = NE;
2080 msw_taken_prob = prob;
2081 lsw_taken = NE;
2082 lsw_taken_prob = 0;
2083 break;
2084 case GTU: case GT:
2085 msw_taken = comparison;
2086 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2087 break;
2088 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2089 msw_skip = swap_condition (msw_taken);
2090 lsw_taken = GTU;
2091 break;
2092 case GEU: case GE:
2093 if (op2l == CONST0_RTX (SImode))
2094 msw_taken = comparison;
2095 else
2096 {
2097 msw_taken = comparison == GE ? GT : GTU;
2098 msw_skip = swap_condition (msw_taken);
2099 lsw_taken = GEU;
2100 }
2101 break;
2102 case LTU: case LT:
2103 msw_taken = comparison;
2104 if (op2l == CONST0_RTX (SImode))
2105 break;
2106 msw_skip = swap_condition (msw_taken);
2107 lsw_taken = LTU;
2108 break;
2109 case LEU: case LE:
2110 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2111 msw_taken = comparison;
2112 else
2113 {
2114 lsw_taken = LEU;
2115 if (comparison == LE)
2116 msw_taken = LT;
2117 else if (op2h != CONST0_RTX (SImode))
2118 msw_taken = LTU;
2119 else
2120 {
2121 msw_skip = swap_condition (LTU);
2122 break;
2123 }
2124 msw_skip = swap_condition (msw_taken);
2125 }
2126 break;
2127 default: return false;
2128 }
2129 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2130 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2131 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2132 if (comparison != EQ && comparison != NE && num_branches > 1)
2133 {
2134 if (!CONSTANT_P (operands[2])
2135 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2136 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2137 {
2138 msw_taken_prob = prob / 2U;
2139 msw_skip_prob
2140 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2141 lsw_taken_prob = prob;
2142 }
2143 else
2144 {
2145 msw_taken_prob = prob;
2146 msw_skip_prob = REG_BR_PROB_BASE;
2147 /* ??? If we have a constant op2h, should we use that when
2148 calculating lsw_taken_prob? */
2149 lsw_taken_prob = prob;
2150 }
2151 }
2152 operands[1] = op1h;
2153 operands[2] = op2h;
2154 operands[4] = NULL_RTX;
2155 if (reload_completed
2156 && ! arith_reg_or_0_operand (op2h, SImode)
2157 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2158 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2159 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2160 {
2161 emit_move_insn (scratch, operands[2]);
2162 operands[2] = scratch;
2163 }
2164 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2165 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2166 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2167 {
2168 rtx taken_label = operands[3];
2169
2170 /* Operands were possibly modified, but msw_skip doesn't expect this.
2171 Always use the original ones. */
2172 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2173 {
2174 operands[1] = op1h;
2175 operands[2] = op2h;
2176 if (reload_completed
2177 && ! arith_reg_or_0_operand (op2h, SImode)
2178 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2179 {
2180 emit_move_insn (scratch, operands[2]);
2181 operands[2] = scratch;
2182 }
2183 }
2184
2185 operands[3] = skip_label = gen_label_rtx ();
2186 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2187 operands[3] = taken_label;
2188 }
2189 operands[1] = op1l;
2190 operands[2] = op2l;
2191 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2192 {
2193 if (reload_completed
2194 && ! arith_reg_or_0_operand (op2l, SImode)
2195 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2196 {
2197 emit_move_insn (scratch, operands[2]);
2198 operands[2] = scratch;
2199 }
2200 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2201 }
2202 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2203 emit_label (skip_label);
2204 return true;
2205 }
2206
2207 /* Given an operand, return 1 if the evaluated operand plugged into an
2208 if_then_else will result in a branch_true, 0 if branch_false, or
2209 -1 if neither nor applies. The truth table goes like this:
2210
2211 op | cmpval | code | result
2212 ---------+--------+---------+--------------------
2213 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2214 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2215 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2216 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2217 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2218 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2219 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2220 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2221 int
2222 sh_eval_treg_value (rtx op)
2223 {
2224 enum rtx_code code = GET_CODE (op);
2225 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2226 return -1;
2227
2228 int cmpop = code == EQ ? 1 : 0;
2229 int cmpval = INTVAL (XEXP (op, 1));
2230 if (cmpval != 0 && cmpval != 1)
2231 return -1;
2232
2233 int t;
2234 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2235 t = 0;
2236 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2237 t = 1;
2238 else
2239 return -1;
2240
2241 return t ^ (cmpval == cmpop);
2242 }
2243
2244 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2245
2246 static void
2247 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2248 {
2249 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2250 {
2251 insn = gen_rtx_PARALLEL (VOIDmode,
2252 gen_rtvec (2, insn,
2253 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2254 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2255 }
2256 else
2257 emit_insn (insn);
2258 }
2259
2260 /* Prepare the operands for an scc instruction; make sure that the
2261 compare has been done and the result is in T_REG. */
2262 void
2263 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2264 {
2265 rtx t_reg = get_t_reg_rtx ();
2266 enum rtx_code oldcode = code;
2267 enum machine_mode mode;
2268
2269 /* First need a compare insn. */
2270 switch (code)
2271 {
2272 case NE:
2273 /* It isn't possible to handle this case. */
2274 gcc_unreachable ();
2275 case LT:
2276 code = GT;
2277 break;
2278 case LE:
2279 code = GE;
2280 break;
2281 case LTU:
2282 code = GTU;
2283 break;
2284 case LEU:
2285 code = GEU;
2286 break;
2287 default:
2288 break;
2289 }
2290 if (code != oldcode)
2291 {
2292 rtx tmp = op0;
2293 op0 = op1;
2294 op1 = tmp;
2295 }
2296
2297 mode = GET_MODE (op0);
2298 if (mode == VOIDmode)
2299 mode = GET_MODE (op1);
2300
2301 op0 = force_reg (mode, op0);
2302 if ((code != EQ && code != NE
2303 && (op1 != const0_rtx
2304 || code == GTU || code == GEU || code == LTU || code == LEU))
2305 || (mode == DImode && op1 != const0_rtx)
2306 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2307 op1 = force_reg (mode, op1);
2308
2309 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2310 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2311 mode);
2312 }
2313
2314 rtx
2315 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2316 rtx op0, rtx op1)
2317 {
2318 rtx target = gen_reg_rtx (SImode);
2319 rtx tmp;
2320
2321 gcc_assert (TARGET_SHMEDIA);
2322 switch (code)
2323 {
2324 case EQ:
2325 case GT:
2326 case LT:
2327 case UNORDERED:
2328 case GTU:
2329 case LTU:
2330 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2331 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2332 code = NE;
2333 break;
2334
2335 case NE:
2336 case GE:
2337 case LE:
2338 case ORDERED:
2339 case GEU:
2340 case LEU:
2341 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2342 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2343 code = EQ;
2344 break;
2345
2346 case UNEQ:
2347 case UNGE:
2348 case UNGT:
2349 case UNLE:
2350 case UNLT:
2351 case LTGT:
2352 return NULL_RTX;
2353
2354 default:
2355 gcc_unreachable ();
2356 }
2357
2358 if (mode == DImode)
2359 {
2360 rtx t2 = gen_reg_rtx (DImode);
2361 emit_insn (gen_extendsidi2 (t2, target));
2362 target = t2;
2363 }
2364
2365 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2366 }
2367
2368 /* Called from the md file, set up the operands of a compare instruction. */
2369 void
2370 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2371 {
2372 enum rtx_code code = GET_CODE (operands[0]);
2373 enum rtx_code branch_code;
2374 rtx op0 = operands[1];
2375 rtx op1 = operands[2];
2376 rtx insn, tem;
2377 bool need_ccmpeq = false;
2378
2379 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2380 {
2381 op0 = force_reg (mode, op0);
2382 op1 = force_reg (mode, op1);
2383 }
2384 else
2385 {
2386 if (code != EQ || mode == DImode)
2387 {
2388 /* Force args into regs, since we can't use constants here. */
2389 op0 = force_reg (mode, op0);
2390 if (op1 != const0_rtx || code == GTU || code == GEU)
2391 op1 = force_reg (mode, op1);
2392 }
2393 }
2394
2395 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2396 {
2397 if (code == LT
2398 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2399 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2400 {
2401 tem = op0, op0 = op1, op1 = tem;
2402 code = swap_condition (code);
2403 }
2404
2405 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2406 if (code == GE)
2407 {
2408 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2409 need_ccmpeq = true;
2410 code = GT;
2411 }
2412
2413 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2414 to EQ/GT respectively. */
2415 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2416 }
2417
2418 switch (code)
2419 {
2420 case EQ:
2421 case GT:
2422 case GE:
2423 case GTU:
2424 case GEU:
2425 branch_code = code;
2426 break;
2427 case NE:
2428 case LT:
2429 case LE:
2430 case LTU:
2431 case LEU:
2432 branch_code = reverse_condition (code);
2433 break;
2434 default:
2435 gcc_unreachable ();
2436 }
2437
2438 insn = gen_rtx_SET (VOIDmode,
2439 get_t_reg_rtx (),
2440 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2441
2442 sh_emit_set_t_insn (insn, mode);
2443 if (need_ccmpeq)
2444 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2445
2446 if (branch_code == code)
2447 emit_jump_insn (gen_branch_true (operands[3]));
2448 else
2449 emit_jump_insn (gen_branch_false (operands[3]));
2450 }
2451
2452 void
2453 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2454 {
2455 enum rtx_code code = GET_CODE (operands[1]);
2456 rtx op0 = operands[2];
2457 rtx op1 = operands[3];
2458 rtx lab = NULL_RTX;
2459 bool invert = false;
2460 rtx tem;
2461
2462 op0 = force_reg (mode, op0);
2463 if ((code != EQ && code != NE
2464 && (op1 != const0_rtx
2465 || code == GTU || code == GEU || code == LTU || code == LEU))
2466 || (mode == DImode && op1 != const0_rtx)
2467 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2468 op1 = force_reg (mode, op1);
2469
2470 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2471 {
2472 if (code == LT || code == LE)
2473 {
2474 code = swap_condition (code);
2475 tem = op0, op0 = op1, op1 = tem;
2476 }
2477 if (code == GE)
2478 {
2479 if (TARGET_IEEE)
2480 {
2481 lab = gen_label_rtx ();
2482 sh_emit_scc_to_t (EQ, op0, op1);
2483 emit_jump_insn (gen_branch_true (lab));
2484 code = GT;
2485 }
2486 else
2487 {
2488 code = LT;
2489 invert = true;
2490 }
2491 }
2492 }
2493
2494 if (code == NE)
2495 {
2496 code = EQ;
2497 invert = true;
2498 }
2499
2500 sh_emit_scc_to_t (code, op0, op1);
2501 if (lab)
2502 emit_label (lab);
2503 if (invert)
2504 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2505 else
2506 emit_move_insn (operands[0], get_t_reg_rtx ());
2507 }
2508 \f
2509 /* Functions to output assembly code. */
2510
2511 /* Return a sequence of instructions to perform DI or DF move.
2512
2513 Since the SH cannot move a DI or DF in one instruction, we have
2514 to take care when we see overlapping source and dest registers. */
2515 const char *
2516 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2517 enum machine_mode mode)
2518 {
2519 rtx dst = operands[0];
2520 rtx src = operands[1];
2521
2522 if (MEM_P (dst)
2523 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2524 return "mov.l %T1,%0" "\n"
2525 " mov.l %1,%0";
2526
2527 if (register_operand (dst, mode)
2528 && register_operand (src, mode))
2529 {
2530 if (REGNO (src) == MACH_REG)
2531 return "sts mach,%S0" "\n"
2532 " sts macl,%R0";
2533
2534 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2535 when mov.d r1,r0 do r1->r0 then r2->r1. */
2536 if (REGNO (src) + 1 == REGNO (dst))
2537 return "mov %T1,%T0" "\n"
2538 " mov %1,%0";
2539 else
2540 return "mov %1,%0" "\n"
2541 " mov %T1,%T0";
2542 }
2543 else if (CONST_INT_P (src))
2544 {
2545 if (INTVAL (src) < 0)
2546 output_asm_insn ("mov #-1,%S0", operands);
2547 else
2548 output_asm_insn ("mov #0,%S0", operands);
2549
2550 return "mov %1,%R0";
2551 }
2552 else if (MEM_P (src))
2553 {
2554 int ptrreg = -1;
2555 int dreg = REGNO (dst);
2556 rtx inside = XEXP (src, 0);
2557
2558 switch (GET_CODE (inside))
2559 {
2560 case REG:
2561 ptrreg = REGNO (inside);
2562 break;
2563
2564 case SUBREG:
2565 ptrreg = subreg_regno (inside);
2566 break;
2567
2568 case PLUS:
2569 ptrreg = REGNO (XEXP (inside, 0));
2570 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2571 an offsettable address. Unfortunately, offsettable addresses use
2572 QImode to check the offset, and a QImode offsettable address
2573 requires r0 for the other operand, which is not currently
2574 supported, so we can't use the 'o' constraint.
2575 Thus we must check for and handle r0+REG addresses here.
2576 We punt for now, since this is likely very rare. */
2577 gcc_assert (!REG_P (XEXP (inside, 1)));
2578 break;
2579
2580 case LABEL_REF:
2581 return "mov.l %1,%0" "\n"
2582 " mov.l %1+4,%T0";
2583 case POST_INC:
2584 return "mov.l %1,%0" "\n"
2585 " mov.l %1,%T0";
2586 default:
2587 gcc_unreachable ();
2588 }
2589
2590 /* Work out the safe way to copy. Copy into the second half first. */
2591 if (dreg == ptrreg)
2592 return "mov.l %T1,%T0" "\n"
2593 " mov.l %1,%0";
2594 }
2595
2596 return "mov.l %1,%0" "\n"
2597 " mov.l %T1,%T0";
2598 }
2599
2600 /* Print an instruction which would have gone into a delay slot after
2601 another instruction, but couldn't because the other instruction expanded
2602 into a sequence where putting the slot insn at the end wouldn't work. */
2603 static void
2604 print_slot (rtx insn)
2605 {
2606 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2607
2608 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2609 }
2610
2611 const char *
2612 output_far_jump (rtx insn, rtx op)
2613 {
2614 struct { rtx lab, reg, op; } this_jmp;
2615 rtx braf_base_lab = NULL_RTX;
2616 const char *jump;
2617 int far;
2618 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2619 rtx prev;
2620
2621 this_jmp.lab = gen_label_rtx ();
2622
2623 if (TARGET_SH2
2624 && offset >= -32764
2625 && offset - get_attr_length (insn) <= 32766)
2626 {
2627 far = 0;
2628 jump = "mov.w %O0,%1" "\n"
2629 " braf %1";
2630 }
2631 else
2632 {
2633 far = 1;
2634 if (flag_pic)
2635 {
2636 if (TARGET_SH2)
2637 jump = "mov.l %O0,%1" "\n"
2638 " braf %1";
2639 else
2640 jump = "mov.l r0,@-r15" "\n"
2641 " mova %O0,r0" "\n"
2642 " mov.l @r0,%1" "\n"
2643 " add r0,%1" "\n"
2644 " mov.l @r15+,r0" "\n"
2645 " jmp @%1";
2646 }
2647 else
2648 jump = "mov.l %O0,%1" "\n"
2649 " jmp @%1";
2650 }
2651 /* If we have a scratch register available, use it. */
2652 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2653 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2654 {
2655 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2656 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2657 jump = "mov.l r1,@-r15" "\n"
2658 " mova %O0,r0" "\n"
2659 " mov.l @r0,r1" "\n"
2660 " add r1,r0" "\n"
2661 " mov.l @r15+,r1" "\n"
2662 " jmp @%1";
2663 output_asm_insn (jump, &this_jmp.lab);
2664 if (dbr_sequence_length ())
2665 print_slot (final_sequence);
2666 else
2667 output_asm_insn ("nop", 0);
2668 }
2669 else
2670 {
2671 /* Output the delay slot insn first if any. */
2672 if (dbr_sequence_length ())
2673 print_slot (final_sequence);
2674
2675 this_jmp.reg = gen_rtx_REG (SImode, 13);
2676 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2677 Fortunately, MACL is fixed and call-clobbered, and we never
2678 need its value across jumps, so save r13 in it instead of in
2679 the stack. */
2680 if (TARGET_SH5)
2681 output_asm_insn ("lds r13,macl", 0);
2682 else
2683 output_asm_insn ("mov.l r13,@-r15", 0);
2684 output_asm_insn (jump, &this_jmp.lab);
2685 if (TARGET_SH5)
2686 output_asm_insn ("sts macl,r13", 0);
2687 else
2688 output_asm_insn ("mov.l @r15+,r13", 0);
2689 }
2690 if (far && flag_pic && TARGET_SH2)
2691 {
2692 braf_base_lab = gen_label_rtx ();
2693 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2694 CODE_LABEL_NUMBER (braf_base_lab));
2695 }
2696 if (far)
2697 output_asm_insn (".align 2", 0);
2698 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2699 this_jmp.op = op;
2700 if (far && flag_pic)
2701 {
2702 if (TARGET_SH2)
2703 this_jmp.lab = braf_base_lab;
2704 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2705 }
2706 else
2707 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2708 return "";
2709 }
2710
2711 /* Local label counter, used for constants in the pool and inside
2712 pattern branches. */
2713 static int lf = 100;
2714
2715 /* Output code for ordinary branches. */
2716 const char *
2717 output_branch (int logic, rtx insn, rtx *operands)
2718 {
2719 switch (get_attr_length (insn))
2720 {
2721 case 6:
2722 /* This can happen if filling the delay slot has caused a forward
2723 branch to exceed its range (we could reverse it, but only
2724 when we know we won't overextend other branches; this should
2725 best be handled by relaxation).
2726 It can also happen when other condbranches hoist delay slot insn
2727 from their destination, thus leading to code size increase.
2728 But the branch will still be in the range -4092..+4098 bytes. */
2729 if (! TARGET_RELAX)
2730 {
2731 int label = lf++;
2732 /* The call to print_slot will clobber the operands. */
2733 rtx op0 = operands[0];
2734
2735 /* If the instruction in the delay slot is annulled (true), then
2736 there is no delay slot where we can put it now. The only safe
2737 place for it is after the label. final will do that by default. */
2738
2739 if (final_sequence
2740 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2741 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2742 {
2743 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2744 ASSEMBLER_DIALECT ? "/" : ".", label);
2745 print_slot (final_sequence);
2746 }
2747 else
2748 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2749
2750 output_asm_insn ("bra\t%l0", &op0);
2751 fprintf (asm_out_file, "\tnop\n");
2752 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2753
2754 return "";
2755 }
2756 /* When relaxing, handle this like a short branch. The linker
2757 will fix it up if it still doesn't fit after relaxation. */
2758 case 2:
2759 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2760
2761 /* These are for SH2e, in which we have to account for the
2762 extra nop because of the hardware bug in annulled branches. */
2763 case 8:
2764 if (! TARGET_RELAX)
2765 {
2766 int label = lf++;
2767
2768 gcc_assert (!final_sequence
2769 || !(INSN_ANNULLED_BRANCH_P
2770 (XVECEXP (final_sequence, 0, 0))));
2771 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2772 logic ? "f" : "t",
2773 ASSEMBLER_DIALECT ? "/" : ".", label);
2774 fprintf (asm_out_file, "\tnop\n");
2775 output_asm_insn ("bra\t%l0", operands);
2776 fprintf (asm_out_file, "\tnop\n");
2777 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2778
2779 return "";
2780 }
2781 /* When relaxing, fall through. */
2782 case 4:
2783 {
2784 char buffer[10];
2785
2786 sprintf (buffer, "b%s%ss\t%%l0",
2787 logic ? "t" : "f",
2788 ASSEMBLER_DIALECT ? "/" : ".");
2789 output_asm_insn (buffer, &operands[0]);
2790 return "nop";
2791 }
2792
2793 default:
2794 /* There should be no longer branches now - that would
2795 indicate that something has destroyed the branches set
2796 up in machine_dependent_reorg. */
2797 gcc_unreachable ();
2798 }
2799 }
2800
2801 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2802 fill in operands 9 as a label to the successor insn.
2803 We try to use jump threading where possible.
2804 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2805 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2806 follow jmp and bt, if the address is in range. */
2807 const char *
2808 output_branchy_insn (enum rtx_code code, const char *templ,
2809 rtx insn, rtx *operands)
2810 {
2811 rtx next_insn = NEXT_INSN (insn);
2812
2813 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2814 {
2815 rtx src = SET_SRC (PATTERN (next_insn));
2816 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2817 {
2818 /* Following branch not taken */
2819 operands[9] = gen_label_rtx ();
2820 emit_label_after (operands[9], next_insn);
2821 INSN_ADDRESSES_NEW (operands[9],
2822 INSN_ADDRESSES (INSN_UID (next_insn))
2823 + get_attr_length (next_insn));
2824 return templ;
2825 }
2826 else
2827 {
2828 int offset = (branch_dest (next_insn)
2829 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2830 if (offset >= -252 && offset <= 258)
2831 {
2832 if (GET_CODE (src) == IF_THEN_ELSE)
2833 /* branch_true */
2834 src = XEXP (src, 1);
2835 operands[9] = src;
2836 return templ;
2837 }
2838 }
2839 }
2840 operands[9] = gen_label_rtx ();
2841 emit_label_after (operands[9], insn);
2842 INSN_ADDRESSES_NEW (operands[9],
2843 INSN_ADDRESSES (INSN_UID (insn))
2844 + get_attr_length (insn));
2845 return templ;
2846 }
2847
2848 const char *
2849 output_ieee_ccmpeq (rtx insn, rtx *operands)
2850 {
2851 return output_branchy_insn (NE, "bt %l9" "\n"
2852 " fcmp/eq %1,%0",
2853 insn, operands);
2854 }
2855 \f
2856 /* Output the start of the assembler file. */
2857 static void
2858 sh_file_start (void)
2859 {
2860 default_file_start ();
2861
2862 if (TARGET_ELF)
2863 /* We need to show the text section with the proper
2864 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2865 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2866 will complain. We can teach GAS specifically about the
2867 default attributes for our choice of text section, but
2868 then we would have to change GAS again if/when we change
2869 the text section name. */
2870 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2871 else
2872 /* Switch to the data section so that the coffsem symbol
2873 isn't in the text section. */
2874 switch_to_section (data_section);
2875
2876 if (TARGET_LITTLE_ENDIAN)
2877 fputs ("\t.little\n", asm_out_file);
2878
2879 if (!TARGET_ELF)
2880 {
2881 if (TARGET_SHCOMPACT)
2882 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2883 else if (TARGET_SHMEDIA)
2884 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2885 TARGET_SHMEDIA64 ? 64 : 32);
2886 }
2887 }
2888 \f
2889 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2890 static bool
2891 unspec_caller_rtx_p (rtx pat)
2892 {
2893 rtx base, offset;
2894 int i;
2895
2896 split_const (pat, &base, &offset);
2897 if (GET_CODE (base) == UNSPEC)
2898 {
2899 if (XINT (base, 1) == UNSPEC_CALLER)
2900 return true;
2901 for (i = 0; i < XVECLEN (base, 0); i++)
2902 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2903 return true;
2904 }
2905 return false;
2906 }
2907
2908 /* Indicate that INSN cannot be duplicated. This is true for insn
2909 that generates a unique label. */
2910 static bool
2911 sh_cannot_copy_insn_p (rtx insn)
2912 {
2913 rtx pat;
2914
2915 if (!reload_completed || !flag_pic)
2916 return false;
2917
2918 if (!NONJUMP_INSN_P (insn))
2919 return false;
2920 if (asm_noperands (insn) >= 0)
2921 return false;
2922
2923 pat = PATTERN (insn);
2924 if (GET_CODE (pat) != SET)
2925 return false;
2926 pat = SET_SRC (pat);
2927
2928 if (unspec_caller_rtx_p (pat))
2929 return true;
2930
2931 return false;
2932 }
2933 \f
2934 /* Number of instructions used to make an arithmetic right shift by N. */
2935 static const char ashiftrt_insns[] =
2936 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2937
2938 /* Description of a logical left or right shift, when expanded to a sequence
2939 of 1/2/8/16 shifts.
2940 Notice that one bit right shifts clobber the T bit. One bit left shifts
2941 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2942 enum
2943 {
2944 ASHL_CLOBBERS_T = 1 << 0,
2945 LSHR_CLOBBERS_T = 1 << 1
2946 };
2947
2948 struct ashl_lshr_sequence
2949 {
2950 char insn_count;
2951 char amount[6];
2952 char clobbers_t;
2953 };
2954
2955 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2956 {
2957 { 0, { 0 }, 0 }, // 0
2958 { 1, { 1 }, LSHR_CLOBBERS_T },
2959 { 1, { 2 }, 0 },
2960 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2961 { 2, { 2, 2 }, 0 }, // 4
2962 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 3, { 2, 2, 2 }, 0 },
2964 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2965 { 1, { 8 }, 0 }, // 8
2966 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2967 { 2, { 8, 2 }, 0 },
2968 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2969 { 3, { 8, 2, 2 }, 0 }, // 12
2970 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2971 { 3, { 8, -2, 8 }, 0 },
2972 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2973 { 1, { 16 }, 0 }, // 16
2974 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2975 { 2, { 16, 2 }, 0 },
2976 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2977 { 3, { 16, 2, 2 }, 0 }, // 20
2978 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2979 { 3, { 16, -2, 8 }, 0 },
2980 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2981 { 2, { 16, 8 }, 0 }, // 24
2982 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2983 { 3, { 16, 8, 2 }, 0 },
2984 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2986 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2987 { 3, { 16, -2, 16 }, 0 },
2988
2989 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2990 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2991 However, the shift-and combiner code needs this entry here to be in
2992 terms of real shift insns. */
2993 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2994 };
2995
2996 /* Individual shift amounts for shift amounts < 16, up to three highmost
2997 bits might be clobbered. This is typically used when combined with some
2998 kind of sign or zero extension. */
2999 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3000 {
3001 { 0, { 0 }, 0 }, // 0
3002 { 1, { 1 }, LSHR_CLOBBERS_T },
3003 { 1, { 2 }, 0 },
3004 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3005 { 2, { 2, 2 }, 0 }, // 4
3006 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3007 { 2, { 8, -2 }, 0 },
3008 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3009 { 1, { 8 }, 0 }, // 8
3010 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3011 { 2, { 8, 2 }, 0 },
3012 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3013 { 3, { 8, 2, 2 }, 0 }, // 12
3014 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3015 { 2, { 16, -2 }, 0 },
3016 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3017 { 1, { 16 }, 0 }, // 16
3018 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3019 { 2, { 16, 2 }, 0 },
3020 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3021 { 3, { 16, 2, 2 }, 0 }, // 20
3022 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3023 { 3, { 16, -2, 8 }, 0 },
3024 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3025 { 2, { 16, 8 }, 0 }, // 24
3026 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3027 { 3, { 16, 8, 2 }, 0 },
3028 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3029 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3030 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3031 { 3, { 16, -2, 16 }, 0 },
3032 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3033 };
3034
3035 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3036 will clobber the T bit. */
3037 bool
3038 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3039 {
3040 gcc_assert (CONST_INT_P (shift_amount));
3041
3042 const int shift_amount_i = INTVAL (shift_amount) & 31;
3043
3044 /* Special case for shift count of 31: use and-rotl sequence. */
3045 if (shift_amount_i == 31)
3046 return true;
3047
3048 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3049 & ASHL_CLOBBERS_T) != 0;
3050 }
3051
3052 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3053 instructions will clobber the T bit. */
3054 bool
3055 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3056 {
3057 gcc_assert (CONST_INT_P (shift_amount));
3058
3059 const int shift_amount_i = INTVAL (shift_amount) & 31;
3060
3061 /* Special case for shift count of 31: use shll-movt sequence. */
3062 if (shift_amount_i == 31)
3063 return true;
3064
3065 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3066 & LSHR_CLOBBERS_T) != 0;
3067 }
3068
3069 /* Return true if it is potentially beneficial to use a dynamic shift
3070 instruction (shad / shar) instead of a combination of 1/2/8/16
3071 shift instructions for the specified shift count.
3072 If dynamic shifts are not available, always return false. */
3073 bool
3074 sh_dynamicalize_shift_p (rtx count)
3075 {
3076 gcc_assert (CONST_INT_P (count));
3077
3078 const int shift_amount_i = INTVAL (count) & 31;
3079 int insn_count;
3080
3081 /* For left and right shifts, there are shorter 2 insn sequences for
3082 shift amounts of 31. */
3083 if (shift_amount_i == 31)
3084 insn_count = 2;
3085 else
3086 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3087
3088 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3089 }
3090
3091 /* Assuming we have a value that has been sign-extended by at least one bit,
3092 can we use the ext_shift_amounts with the last shift turned to an
3093 arithmetic shift to shift it by N without data loss, and quicker than by
3094 other means? */
3095 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3096
3097 /* Return the cost of a shift. */
3098 static inline int
3099 shiftcosts (rtx x)
3100 {
3101 int value;
3102
3103 if (TARGET_SHMEDIA)
3104 return 1;
3105
3106 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3107 {
3108 if (GET_MODE (x) == DImode
3109 && CONST_INT_P (XEXP (x, 1))
3110 && INTVAL (XEXP (x, 1)) == 1)
3111 return 2;
3112
3113 /* Everything else is invalid, because there is no pattern for it. */
3114 return -1;
3115 }
3116 /* If shift by a non constant, then this will be expensive. */
3117 if (!CONST_INT_P (XEXP (x, 1)))
3118 return SH_DYNAMIC_SHIFT_COST;
3119
3120 /* Otherwise, return the true cost in instructions. Cope with out of range
3121 shift counts more or less arbitrarily. */
3122 value = INTVAL (XEXP (x, 1)) & 31;
3123
3124 if (GET_CODE (x) == ASHIFTRT)
3125 {
3126 int cost = ashiftrt_insns[value];
3127 /* If dynamic shifts are available and profitable in this case, then we
3128 put the constant in a reg and use shad. */
3129 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3130 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3131 return cost;
3132 }
3133 else
3134 return ashl_lshr_seq[value].insn_count;
3135 }
3136
3137 /* Return the cost of an AND/XOR/IOR operation. */
3138 static inline int
3139 and_xor_ior_costs (rtx x, int code)
3140 {
3141 /* On SH1-4 we have only max. SImode operations.
3142 Double the cost for modes > SImode. */
3143 const int cost_scale = !TARGET_SHMEDIA
3144 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3145 ? 2 : 1;
3146
3147 /* A logical operation with two registers is a single cycle
3148 instruction. */
3149 if (!CONST_INT_P (XEXP (x, 1)))
3150 return 1 * cost_scale;
3151
3152 int i = INTVAL (XEXP (x, 1));
3153
3154 if (TARGET_SHMEDIA)
3155 {
3156 if (satisfies_constraint_I10 (XEXP (x, 1))
3157 || satisfies_constraint_J16 (XEXP (x, 1)))
3158 return 1;
3159 else
3160 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3161 }
3162
3163 /* These constants are single cycle extu.[bw] instructions. */
3164 if ((i == 0xff || i == 0xffff) && code == AND)
3165 return 1 * cost_scale;
3166 /* Constants that can be used in an instruction as an immediate are
3167 a single cycle, but this requires r0, so make it a little more
3168 expensive. */
3169 if (CONST_OK_FOR_K08 (i))
3170 return 2 * cost_scale;
3171 /* Constants that can be loaded with a mov immediate need one more cycle.
3172 This case is probably unnecessary. */
3173 if (CONST_OK_FOR_I08 (i))
3174 return 2 * cost_scale;
3175 /* Any other constant requires an additional 2 cycle pc-relative load.
3176 This case is probably unnecessary. */
3177 return 3 * cost_scale;
3178 }
3179
3180 /* Return the cost of an addition or a subtraction. */
3181 static inline int
3182 addsubcosts (rtx x)
3183 {
3184 if (GET_MODE (x) == SImode)
3185 {
3186 /* The addc or subc patterns will eventually become one or two
3187 instructions. Below are some costs for some of the patterns
3188 which combine would reject because the costs of the individual
3189 insns in the patterns are lower.
3190
3191 FIXME: It would be much easier if we had something like insn cost
3192 attributes and the cost calculation machinery used those attributes
3193 in the first place. This would eliminate redundant recog-like C
3194 code to calculate costs of complex patterns. */
3195 rtx op0 = XEXP (x, 0);
3196 rtx op1 = XEXP (x, 1);
3197
3198 if (GET_CODE (x) == PLUS)
3199 {
3200 if (GET_CODE (op0) == AND
3201 && XEXP (op0, 1) == const1_rtx
3202 && (GET_CODE (op1) == PLUS
3203 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3204 return 1;
3205
3206 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3207 && GET_CODE (op1) == LSHIFTRT
3208 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3209 return 1;
3210 }
3211 }
3212
3213 /* On SH1-4 we have only max. SImode operations.
3214 Double the cost for modes > SImode. */
3215 const int cost_scale = !TARGET_SHMEDIA
3216 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3217 ? 2 : 1;
3218
3219 /* Adding a register is a single cycle insn. */
3220 if (REG_P (XEXP (x, 1))
3221 || GET_CODE (XEXP (x, 1)) == SUBREG)
3222 return 1 * cost_scale;
3223
3224 /* Likewise for small constants. */
3225 if (CONST_INT_P (XEXP (x, 1))
3226 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3227 return 1 * cost_scale;
3228
3229 if (TARGET_SHMEDIA)
3230 switch (GET_CODE (XEXP (x, 1)))
3231 {
3232 case CONST:
3233 case LABEL_REF:
3234 case SYMBOL_REF:
3235 return TARGET_SHMEDIA64 ? 5 : 3;
3236
3237 case CONST_INT:
3238 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3239 return 2;
3240 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3241 return 3;
3242 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3243 return 4;
3244
3245 /* Fall through. */
3246 default:
3247 return 5;
3248 }
3249
3250 /* Any other constant requires a 2 cycle pc-relative load plus an
3251 addition. */
3252 return 3 * cost_scale;
3253 }
3254
3255 /* Return the cost of a multiply. */
3256 static inline int
3257 multcosts (rtx x ATTRIBUTE_UNUSED)
3258 {
3259 if (sh_multcost >= 0)
3260 return sh_multcost;
3261 if (TARGET_SHMEDIA)
3262 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3263 accept constants. Ideally, we would use a cost of one or two and
3264 add the cost of the operand, but disregard the latter when inside loops
3265 and loop invariant code motion is still to follow.
3266 Using a multiply first and splitting it later if it's a loss
3267 doesn't work because of different sign / zero extension semantics
3268 of multiplies vs. shifts. */
3269 return optimize_size ? 2 : 3;
3270
3271 if (TARGET_SH2)
3272 {
3273 /* We have a mul insn, so we can never take more than the mul and the
3274 read of the mac reg, but count more because of the latency and extra
3275 reg usage. */
3276 if (optimize_size)
3277 return 2;
3278 return 3;
3279 }
3280
3281 /* If we're aiming at small code, then just count the number of
3282 insns in a multiply call sequence. */
3283 if (optimize_size)
3284 return 5;
3285
3286 /* Otherwise count all the insns in the routine we'd be calling too. */
3287 return 20;
3288 }
3289
3290 /* Compute a (partial) cost for rtx X. Return true if the complete
3291 cost has been computed, and false if subexpressions should be
3292 scanned. In either case, *TOTAL contains the cost result. */
3293 static bool
3294 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3295 int *total, bool speed ATTRIBUTE_UNUSED)
3296 {
3297 switch (code)
3298 {
3299 /* The lower-subreg pass decides whether to split multi-word regs
3300 into individual regs by looking at the cost for a SET of certain
3301 modes with the following patterns:
3302 (set (reg) (reg))
3303 (set (reg) (const_int 0))
3304 On machines that support vector-move operations a multi-word move
3305 is the same cost as individual reg move. On SH there is no
3306 vector-move, so we have to provide the correct cost in the number
3307 of move insns to load/store the reg of the mode in question. */
3308 case SET:
3309 if (register_operand (SET_DEST (x), VOIDmode)
3310 && (register_operand (SET_SRC (x), VOIDmode)
3311 || satisfies_constraint_Z (SET_SRC (x))))
3312 {
3313 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3314 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3315 / mov_insn_size (mode, TARGET_SH2A));
3316 return true;
3317 }
3318 return false;
3319
3320 /* The cost of a mem access is mainly the cost of the address mode. */
3321 case MEM:
3322 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3323 true);
3324 return true;
3325
3326 /* The cost of a sign or zero extend depends on whether the source is a
3327 reg or a mem. In case of a mem take the address into acount. */
3328 case SIGN_EXTEND:
3329 if (REG_P (XEXP (x, 0)))
3330 {
3331 *total = COSTS_N_INSNS (1);
3332 return true;
3333 }
3334 if (MEM_P (XEXP (x, 0)))
3335 {
3336 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3337 GET_MODE (XEXP (x, 0)),
3338 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3339 return true;
3340 }
3341 return false;
3342
3343 case ZERO_EXTEND:
3344 if (REG_P (XEXP (x, 0)))
3345 {
3346 *total = COSTS_N_INSNS (1);
3347 return true;
3348 }
3349 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3350 && (GET_MODE (XEXP (x, 0)) == QImode
3351 || GET_MODE (XEXP (x, 0)) == HImode))
3352 {
3353 /* Handle SH2A's movu.b and movu.w insn. */
3354 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3355 GET_MODE (XEXP (x, 0)),
3356 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3357 return true;
3358 }
3359 return false;
3360
3361 /* mems for SFmode and DFmode can be inside a parallel due to
3362 the way the fpscr is handled. */
3363 case PARALLEL:
3364 for (int i = 0; i < XVECLEN (x, 0); i++)
3365 {
3366 rtx xx = XVECEXP (x, 0, i);
3367 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3368 {
3369 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3370 GET_MODE (XEXP (xx, 0)),
3371 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3372 return true;
3373 }
3374 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3375 {
3376 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3377 GET_MODE (XEXP (xx, 1)),
3378 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3379 return true;
3380 }
3381 }
3382
3383 if (sh_1el_vec (x, VOIDmode))
3384 *total = outer_code != SET;
3385 else if (sh_rep_vec (x, VOIDmode))
3386 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3387 + (outer_code != SET));
3388 else
3389 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3390 return true;
3391
3392 case CONST_INT:
3393 if (TARGET_SHMEDIA)
3394 {
3395 if (INTVAL (x) == 0)
3396 *total = 0;
3397 else if (outer_code == AND && and_operand ((x), DImode))
3398 *total = 0;
3399 else if ((outer_code == IOR || outer_code == XOR
3400 || outer_code == PLUS)
3401 && CONST_OK_FOR_I10 (INTVAL (x)))
3402 *total = 0;
3403 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3404 *total = COSTS_N_INSNS (outer_code != SET);
3405 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3406 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3407 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3408 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3409 else
3410 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3411 return true;
3412 }
3413 if (CONST_OK_FOR_I08 (INTVAL (x)))
3414 *total = 0;
3415 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3416 && CONST_OK_FOR_K08 (INTVAL (x)))
3417 *total = 1;
3418 /* prepare_cmp_insn will force costly constants int registers before
3419 the cbranch[sd]i4 patterns can see them, so preserve potentially
3420 interesting ones not covered by I08 above. */
3421 else if (outer_code == COMPARE
3422 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3423 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3424 || INTVAL (x) == 0x7fffffff
3425 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3426 *total = 1;
3427 else
3428 *total = 8;
3429 return true;
3430
3431 case EQ:
3432 /* An and with a constant compared against zero is
3433 most likely going to be a TST #imm, R0 instruction.
3434 Notice that this does not catch the zero_extract variants from
3435 the md file. */
3436 if (GET_CODE (XEXP (x, 0)) == AND
3437 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3438 {
3439 *total = 1;
3440 return true;
3441 }
3442 else
3443 return false;
3444
3445 case SMIN:
3446 case SMAX:
3447 /* This is most likely a clips.b or clips.w insn that is being made up
3448 by combine. */
3449 if (TARGET_SH2A
3450 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3451 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3452 && REG_P (XEXP (XEXP (x, 0), 0))
3453 && CONST_INT_P (XEXP (x, 1)))
3454 {
3455 *total = COSTS_N_INSNS (1);
3456 return true;
3457 }
3458 else
3459 return false;
3460
3461 case CONST:
3462 case LABEL_REF:
3463 case SYMBOL_REF:
3464 if (TARGET_SHMEDIA64)
3465 *total = COSTS_N_INSNS (4);
3466 else if (TARGET_SHMEDIA32)
3467 *total = COSTS_N_INSNS (2);
3468 else
3469 *total = 5;
3470 return true;
3471
3472 case CONST_DOUBLE:
3473 if (TARGET_SHMEDIA)
3474 *total = COSTS_N_INSNS (4);
3475 /* prepare_cmp_insn will force costly constants int registers before
3476 the cbranchdi4 pattern can see them, so preserve potentially
3477 interesting ones. */
3478 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3479 *total = 1;
3480 else
3481 *total = 10;
3482 return true;
3483
3484 case CONST_VECTOR:
3485 /* FIXME: This looks broken. Only the last statement has any effect.
3486 Probably this could be folded with the PARALLEL case? */
3487 if (x == CONST0_RTX (GET_MODE (x)))
3488 *total = 0;
3489 else if (sh_1el_vec (x, VOIDmode))
3490 *total = outer_code != SET;
3491 if (sh_rep_vec (x, VOIDmode))
3492 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3493 + (outer_code != SET));
3494 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3495 return true;
3496
3497 case PLUS:
3498 case MINUS:
3499 *total = COSTS_N_INSNS (addsubcosts (x));
3500 return true;
3501
3502 case AND:
3503 case XOR:
3504 case IOR:
3505 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3506 return true;
3507
3508 case MULT:
3509 *total = COSTS_N_INSNS (multcosts (x));
3510 return true;
3511
3512 case LT:
3513 case GE:
3514 /* div0s sign comparison. */
3515 if (GET_CODE (XEXP (x, 0)) == XOR
3516 && REG_P ((XEXP (XEXP (x, 0), 0)))
3517 && REG_P ((XEXP (XEXP (x, 0), 1)))
3518 && satisfies_constraint_Z (XEXP (x, 1)))
3519 {
3520 *total = COSTS_N_INSNS (1);
3521 return true;
3522 }
3523 else
3524 return false;
3525
3526 case LSHIFTRT:
3527 /* div0s sign comparison. */
3528 if (GET_CODE (XEXP (x, 0)) == XOR
3529 && REG_P ((XEXP (XEXP (x, 0), 0)))
3530 && REG_P ((XEXP (XEXP (x, 0), 1)))
3531 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3532 {
3533 *total = COSTS_N_INSNS (1);
3534 return true;
3535 }
3536 /* Fall through to shiftcosts. */
3537 case ASHIFT:
3538 case ASHIFTRT:
3539 {
3540 int cost = shiftcosts (x);
3541 if (cost < 0)
3542 return false;
3543 *total = COSTS_N_INSNS (cost);
3544 return true;
3545 }
3546
3547 case DIV:
3548 case UDIV:
3549 case MOD:
3550 case UMOD:
3551 *total = COSTS_N_INSNS (20);
3552 return true;
3553
3554 case FLOAT:
3555 case FIX:
3556 *total = 100;
3557 return true;
3558
3559 default:
3560 return false;
3561 }
3562 }
3563
3564 /* Determine the size of the fundamental move insn that will be used
3565 for the specified mode. */
3566 static inline int
3567 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3568 {
3569 const int mode_sz = GET_MODE_SIZE (mode);
3570
3571 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3572 || (TARGET_FMOVD && mode == DFmode))
3573 return mode_sz;
3574 else
3575 {
3576 /* The max. available mode for actual move insns is SImode.
3577 Larger accesses will be split into multiple loads/stores. */
3578 const int max_mov_sz = GET_MODE_SIZE (SImode);
3579 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3580 }
3581 }
3582
3583 /* Determine the maximum possible displacement for a move insn for the
3584 specified mode. */
3585 static int
3586 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3587 {
3588 /* The 4 byte displacement move insns are the same as the 2 byte
3589 versions but take a 12 bit displacement. All we need to do is to
3590 scale the max. displacement value accordingly. */
3591 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3592
3593 /* SH2A supports FPU move insns with 12 bit displacements.
3594 Other variants to do not support any kind of displacements for
3595 FPU move insns. */
3596 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3597 return 0;
3598 else
3599 {
3600 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3601 const int mode_sz = GET_MODE_SIZE (mode);
3602 int r = 15 * mov_insn_sz * disp_scale;
3603
3604 /* If the mov insn will be split into multiple loads/stores, the
3605 maximum possible displacement is a bit smaller. */
3606 if (mode_sz > mov_insn_sz)
3607 r -= mode_sz - mov_insn_sz;
3608 return r;
3609 }
3610 }
3611
3612 /* Determine the alignment mask for a move insn of the
3613 specified mode. */
3614 static inline int
3615 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3616 {
3617 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3618 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3619 }
3620
3621 /* Return the displacement value of a displacement address. */
3622 static inline HOST_WIDE_INT
3623 disp_addr_displacement (rtx x)
3624 {
3625 gcc_assert (satisfies_constraint_Sdd (x));
3626 return INTVAL (XEXP (XEXP (x, 0), 1));
3627 }
3628
3629 /* Compute the cost of an address. */
3630 static int
3631 sh_address_cost (rtx x, enum machine_mode mode,
3632 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3633 {
3634 /* 'GBR + 0'. Account one more because of R0 restriction. */
3635 if (REG_P (x) && REGNO (x) == GBR_REG)
3636 return 2;
3637
3638 /* Simple reg, post-inc, pre-dec addressing. */
3639 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3640 return 1;
3641
3642 /* 'reg + disp' addressing. */
3643 if (GET_CODE (x) == PLUS
3644 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3645 {
3646 /* 'GBR + disp'. Account one more because of R0 restriction. */
3647 if (REGNO (XEXP (x, 0)) == GBR_REG
3648 && gbr_displacement (XEXP (x, 1), mode))
3649 return 2;
3650
3651 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3652
3653 if (offset == 0)
3654 return 1;
3655
3656 /* The displacement would fit into a 2 byte move insn.
3657 HImode and QImode loads/stores with displacement put pressure on
3658 R0 which will most likely require another reg copy. Thus account
3659 a higher cost for that. */
3660 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3661 return (mode == HImode || mode == QImode) ? 2 : 1;
3662
3663 /* The displacement would fit into a 4 byte move insn (SH2A). */
3664 if (TARGET_SH2A
3665 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3666 return 2;
3667
3668 /* The displacement is probably out of range and will require extra
3669 calculations. */
3670 return 3;
3671 }
3672
3673 /* 'reg + reg' addressing. Account a slightly higher cost because of
3674 increased pressure on R0. */
3675 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3676 && ! TARGET_SHMEDIA)
3677 return 3;
3678
3679 /* Not sure what it is - probably expensive. */
3680 return 10;
3681 }
3682
3683 /* Code to expand a shift. */
3684 static void
3685 gen_ashift (int type, int n, rtx reg)
3686 {
3687 rtx n_rtx;
3688
3689 /* Negative values here come from the shift_amounts array. */
3690 if (n < 0)
3691 {
3692 if (type == ASHIFT)
3693 type = LSHIFTRT;
3694 else
3695 type = ASHIFT;
3696 n = -n;
3697 }
3698
3699 n_rtx = GEN_INT (n);
3700 gcc_assert (satisfies_constraint_P27 (n_rtx));
3701
3702 switch (type)
3703 {
3704 case ASHIFTRT:
3705 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3706 break;
3707 case LSHIFTRT:
3708 if (n == 1)
3709 emit_insn (gen_shlr (reg, reg));
3710 else
3711 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3712 break;
3713 case ASHIFT:
3714 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3715 break;
3716 default:
3717 gcc_unreachable ();
3718 }
3719 }
3720
3721 /* Code to expand a HImode shift. */
3722 static void
3723 gen_ashift_hi (int type, int n, rtx reg)
3724 {
3725 /* Negative values here come from the shift_amounts array. */
3726 if (n < 0)
3727 {
3728 if (type == ASHIFT)
3729 type = LSHIFTRT;
3730 else
3731 type = ASHIFT;
3732 n = -n;
3733 }
3734
3735 switch (type)
3736 {
3737 case ASHIFTRT:
3738 case LSHIFTRT:
3739 /* We don't have HImode right shift operations because using the
3740 ordinary 32 bit shift instructions for that doesn't generate proper
3741 zero/sign extension.
3742 gen_ashift_hi is only called in contexts where we know that the
3743 sign extension works out correctly. */
3744 {
3745 int offset = 0;
3746 if (GET_CODE (reg) == SUBREG)
3747 {
3748 offset = SUBREG_BYTE (reg);
3749 reg = SUBREG_REG (reg);
3750 }
3751 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3752 break;
3753 }
3754 case ASHIFT:
3755 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3756 break;
3757 }
3758 }
3759
3760 /* Output RTL to split a constant shift into its component SH constant
3761 shift instructions. */
3762 void
3763 gen_shifty_op (int code, rtx *operands)
3764 {
3765 int value = INTVAL (operands[2]);
3766 int max, i;
3767
3768 /* Truncate the shift count in case it is out of bounds. */
3769 value = value & 31;
3770
3771 if (value == 31)
3772 {
3773 if (code == LSHIFTRT)
3774 {
3775 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3776 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3777 return;
3778 }
3779 else if (code == ASHIFT)
3780 {
3781 /* There is a two instruction sequence for 31 bit left shifts,
3782 but it requires r0. */
3783 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3784 {
3785 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3786 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3787 return;
3788 }
3789 }
3790 }
3791 else if (value == 0)
3792 {
3793 /* This can happen even when optimizing, if there were subregs before
3794 reload. Don't output a nop here, as this is never optimized away;
3795 use a no-op move instead. */
3796 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3797 return;
3798 }
3799
3800 max = ashl_lshr_seq[value].insn_count;
3801 for (i = 0; i < max; i++)
3802 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3803 }
3804
3805 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3806 don't matter. */
3807 void
3808 gen_shifty_hi_op (int code, rtx *operands)
3809 {
3810 int value = INTVAL (operands[2]);
3811 int max, i;
3812 void (*gen_fun) (int, int, rtx);
3813
3814 /* This operation is used by and_shl for SImode values with a few
3815 high bits known to be cleared. */
3816 value &= 31;
3817 if (value == 0)
3818 {
3819 emit_insn (gen_nop ());
3820 return;
3821 }
3822
3823 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3824 if (code == ASHIFT)
3825 {
3826 max = ext_ashl_lshr_seq[value].insn_count;
3827 for (i = 0; i < max; i++)
3828 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3829 }
3830 else
3831 /* When shifting right, emit the shifts in reverse order, so that
3832 solitary negative values come first. */
3833 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3834 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3835 }
3836
3837 /* Output RTL for an arithmetic right shift.
3838 ??? Rewrite to use super-optimizer sequences. */
3839 bool
3840 expand_ashiftrt (rtx *operands)
3841 {
3842 rtx wrk;
3843 char func[18];
3844 int value;
3845
3846 if (TARGET_DYNSHIFT)
3847 {
3848 if (!CONST_INT_P (operands[2]))
3849 {
3850 rtx count = copy_to_mode_reg (SImode, operands[2]);
3851 emit_insn (gen_negsi2 (count, count));
3852 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3853 return true;
3854 }
3855 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3856 > 1 + SH_DYNAMIC_SHIFT_COST)
3857 {
3858 rtx count
3859 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3860 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3861 return true;
3862 }
3863 }
3864 if (!CONST_INT_P (operands[2]))
3865 return false;
3866
3867 value = INTVAL (operands[2]) & 31;
3868
3869 if (value == 31)
3870 {
3871 /* If we are called from abs expansion, arrange things so that we
3872 we can use a single MT instruction that doesn't clobber the source,
3873 if LICM can hoist out the load of the constant zero. */
3874 if (currently_expanding_to_rtl)
3875 {
3876 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3877 operands[1]));
3878 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3879 return true;
3880 }
3881 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3882 return true;
3883 }
3884 else if (value >= 16 && value <= 19)
3885 {
3886 wrk = gen_reg_rtx (SImode);
3887 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3888 value -= 16;
3889 while (value--)
3890 gen_ashift (ASHIFTRT, 1, wrk);
3891 emit_move_insn (operands[0], wrk);
3892 return true;
3893 }
3894 /* Expand a short sequence inline, longer call a magic routine. */
3895 else if (value <= 5)
3896 {
3897 wrk = gen_reg_rtx (SImode);
3898 emit_move_insn (wrk, operands[1]);
3899 while (value--)
3900 gen_ashift (ASHIFTRT, 1, wrk);
3901 emit_move_insn (operands[0], wrk);
3902 return true;
3903 }
3904
3905 wrk = gen_reg_rtx (Pmode);
3906
3907 /* Load the value into an arg reg and call a helper. */
3908 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3909 sprintf (func, "__ashiftrt_r4_%d", value);
3910 function_symbol (wrk, func, SFUNC_STATIC);
3911 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3912 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3913 return true;
3914 }
3915
3916 /* Try to find a good way to implement the combiner pattern
3917 [(set (match_operand:SI 0 "register_operand" "r")
3918 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3919 (match_operand:SI 2 "const_int_operand" "n"))
3920 (match_operand:SI 3 "const_int_operand" "n"))) .
3921 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3922 return 0 for simple right / left or left/right shift combination.
3923 return 1 for a combination of shifts with zero_extend.
3924 return 2 for a combination of shifts with an AND that needs r0.
3925 return 3 for a combination of shifts with an AND that needs an extra
3926 scratch register, when the three highmost bits of the AND mask are clear.
3927 return 4 for a combination of shifts with an AND that needs an extra
3928 scratch register, when any of the three highmost bits of the AND mask
3929 is set.
3930 If ATTRP is set, store an initial right shift width in ATTRP[0],
3931 and the instruction length in ATTRP[1] . These values are not valid
3932 when returning 0.
3933 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3934 shift_amounts for the last shift value that is to be used before the
3935 sign extend. */
3936 int
3937 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3938 {
3939 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3940 int left = INTVAL (left_rtx), right;
3941 int best = 0;
3942 int cost, best_cost = 10000;
3943 int best_right = 0, best_len = 0;
3944 int i;
3945 int can_ext;
3946
3947 if (left < 0 || left > 31)
3948 return 0;
3949 if (CONST_INT_P (mask_rtx))
3950 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3951 else
3952 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3953 /* Can this be expressed as a right shift / left shift pair? */
3954 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3955 right = exact_log2 (lsb);
3956 mask2 = ~(mask + lsb - 1);
3957 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3958 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3959 if (! mask2)
3960 best_cost = ashl_lshr_seq[right].insn_count
3961 + ashl_lshr_seq[right + left].insn_count;
3962 /* mask has no trailing zeroes <==> ! right */
3963 else if (! right && mask2 == ~(lsb2 - 1))
3964 {
3965 int late_right = exact_log2 (lsb2);
3966 best_cost = ashl_lshr_seq[left + late_right].insn_count
3967 + ashl_lshr_seq[late_right].insn_count;
3968 }
3969 /* Try to use zero extend. */
3970 if (mask2 == ~(lsb2 - 1))
3971 {
3972 int width, first;
3973
3974 for (width = 8; width <= 16; width += 8)
3975 {
3976 /* Can we zero-extend right away? */
3977 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3978 {
3979 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3980 + ext_ashl_lshr_seq[left + right].insn_count;
3981 if (cost < best_cost)
3982 {
3983 best = 1;
3984 best_cost = cost;
3985 best_right = right;
3986 best_len = cost;
3987 if (attrp)
3988 attrp[2] = -1;
3989 }
3990 continue;
3991 }
3992 /* ??? Could try to put zero extend into initial right shift,
3993 or even shift a bit left before the right shift. */
3994 /* Determine value of first part of left shift, to get to the
3995 zero extend cut-off point. */
3996 first = width - exact_log2 (lsb2) + right;
3997 if (first >= 0 && right + left - first >= 0)
3998 {
3999 cost = ext_ashl_lshr_seq[right].insn_count
4000 + ext_ashl_lshr_seq[first].insn_count + 1
4001 + ext_ashl_lshr_seq[right + left - first].insn_count;
4002
4003 if (cost < best_cost)
4004 {
4005 best = 1;
4006 best_cost = cost;
4007 best_right = right;
4008 best_len = cost;
4009 if (attrp)
4010 attrp[2] = first;
4011 }
4012 }
4013 }
4014 }
4015 /* Try to use r0 AND pattern */
4016 for (i = 0; i <= 2; i++)
4017 {
4018 if (i > right)
4019 break;
4020 if (! CONST_OK_FOR_K08 (mask >> i))
4021 continue;
4022 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4023 if (cost < best_cost)
4024 {
4025 best = 2;
4026 best_cost = cost;
4027 best_right = i;
4028 best_len = cost - 1;
4029 }
4030 }
4031 /* Try to use a scratch register to hold the AND operand. */
4032 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4033 for (i = 0; i <= 2; i++)
4034 {
4035 if (i > right)
4036 break;
4037 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4038 + (can_ext
4039 ? ext_ashl_lshr_seq
4040 : ashl_lshr_seq)[left + i].insn_count;
4041 if (cost < best_cost)
4042 {
4043 best = 4 - can_ext;
4044 best_cost = cost;
4045 best_right = i;
4046 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4047 }
4048 }
4049
4050 if (attrp)
4051 {
4052 attrp[0] = best_right;
4053 attrp[1] = best_len;
4054 }
4055 return best;
4056 }
4057
4058 /* This is used in length attributes of the unnamed instructions
4059 corresponding to shl_and_kind return values of 1 and 2. */
4060 int
4061 shl_and_length (rtx insn)
4062 {
4063 rtx set_src, left_rtx, mask_rtx;
4064 int attributes[3];
4065
4066 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4067 left_rtx = XEXP (XEXP (set_src, 0), 1);
4068 mask_rtx = XEXP (set_src, 1);
4069 shl_and_kind (left_rtx, mask_rtx, attributes);
4070 return attributes[1];
4071 }
4072
4073 /* This is used in length attribute of the and_shl_scratch instruction. */
4074 int
4075 shl_and_scr_length (rtx insn)
4076 {
4077 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4078 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4079 rtx op = XEXP (set_src, 0);
4080 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4081 op = XEXP (XEXP (op, 0), 0);
4082 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4083 }
4084
4085 /* Generate rtl for instructions for which shl_and_kind advised a particular
4086 method of generating them, i.e. returned zero. */
4087 bool
4088 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4089 {
4090 int attributes[3];
4091 unsigned HOST_WIDE_INT mask;
4092 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4093 int right, total_shift;
4094 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4095
4096 right = attributes[0];
4097 total_shift = INTVAL (left_rtx) + right;
4098 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4099 switch (kind)
4100 {
4101 default:
4102 return true;
4103 case 1:
4104 {
4105 int first = attributes[2];
4106 rtx operands[3];
4107
4108 if (first < 0)
4109 {
4110 emit_insn ((mask << right) <= 0xff
4111 ? gen_zero_extendqisi2 (dest,
4112 gen_lowpart (QImode, source))
4113 : gen_zero_extendhisi2 (dest,
4114 gen_lowpart (HImode, source)));
4115 source = dest;
4116 }
4117 if (source != dest)
4118 emit_insn (gen_movsi (dest, source));
4119 operands[0] = dest;
4120 if (right)
4121 {
4122 operands[2] = GEN_INT (right);
4123 gen_shifty_hi_op (LSHIFTRT, operands);
4124 }
4125 if (first > 0)
4126 {
4127 operands[2] = GEN_INT (first);
4128 gen_shifty_hi_op (ASHIFT, operands);
4129 total_shift -= first;
4130 mask <<= first;
4131 }
4132 if (first >= 0)
4133 emit_insn (mask <= 0xff
4134 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4135 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4136 if (total_shift > 0)
4137 {
4138 operands[2] = GEN_INT (total_shift);
4139 gen_shifty_hi_op (ASHIFT, operands);
4140 }
4141 break;
4142 }
4143 case 4:
4144 shift_gen_fun = gen_shifty_op;
4145 case 3:
4146 /* If the topmost bit that matters is set, set the topmost bits
4147 that don't matter. This way, we might be able to get a shorter
4148 signed constant. */
4149 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4150 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4151 case 2:
4152 /* Don't expand fine-grained when combining, because that will
4153 make the pattern fail. */
4154 if (currently_expanding_to_rtl
4155 || reload_in_progress || reload_completed)
4156 {
4157 rtx operands[3];
4158
4159 /* Cases 3 and 4 should be handled by this split
4160 only while combining */
4161 gcc_assert (kind <= 2);
4162 if (right)
4163 {
4164 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4165 source = dest;
4166 }
4167 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4168 if (total_shift)
4169 {
4170 operands[0] = dest;
4171 operands[1] = dest;
4172 operands[2] = GEN_INT (total_shift);
4173 shift_gen_fun (ASHIFT, operands);
4174 }
4175 break;
4176 }
4177 else
4178 {
4179 int neg = 0;
4180 if (kind != 4 && total_shift < 16)
4181 {
4182 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4183 if (neg > 0)
4184 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4185 else
4186 neg = 0;
4187 }
4188 emit_insn (gen_and_shl_scratch (dest, source,
4189 GEN_INT (right),
4190 GEN_INT (mask),
4191 GEN_INT (total_shift + neg),
4192 GEN_INT (neg)));
4193 emit_insn (gen_movsi (dest, dest));
4194 break;
4195 }
4196 }
4197 return false;
4198 }
4199
4200 /* Try to find a good way to implement the combiner pattern
4201 [(set (match_operand:SI 0 "register_operand" "=r")
4202 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4203 (match_operand:SI 2 "const_int_operand" "n")
4204 (match_operand:SI 3 "const_int_operand" "n")
4205 (const_int 0)))
4206 (clobber (reg:SI T_REG))]
4207 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4208 return 0 for simple left / right shift combination.
4209 return 1 for left shift / 8 bit sign extend / left shift.
4210 return 2 for left shift / 16 bit sign extend / left shift.
4211 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4212 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4213 return 5 for left shift / 16 bit sign extend / right shift
4214 return 6 for < 8 bit sign extend / left shift.
4215 return 7 for < 8 bit sign extend / left shift / single right shift.
4216 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4217 int
4218 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4219 {
4220 int left, size, insize, ext;
4221 int cost = 0, best_cost;
4222 int kind;
4223
4224 left = INTVAL (left_rtx);
4225 size = INTVAL (size_rtx);
4226 insize = size - left;
4227 gcc_assert (insize > 0);
4228 /* Default to left / right shift. */
4229 kind = 0;
4230 best_cost = ashl_lshr_seq[32 - insize].insn_count
4231 + ashl_lshr_seq[32 - size].insn_count;
4232 if (size <= 16)
4233 {
4234 /* 16 bit shift / sign extend / 16 bit shift */
4235 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4236 + ashl_lshr_seq[16 - size].insn_count;
4237 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4238 below, by alternative 3 or something even better. */
4239 if (cost < best_cost)
4240 {
4241 kind = 5;
4242 best_cost = cost;
4243 }
4244 }
4245 /* Try a plain sign extend between two shifts. */
4246 for (ext = 16; ext >= insize; ext -= 8)
4247 {
4248 if (ext <= size)
4249 {
4250 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4251 + ashl_lshr_seq[size - ext].insn_count;
4252 if (cost < best_cost)
4253 {
4254 kind = ext / (unsigned) 8;
4255 best_cost = cost;
4256 }
4257 }
4258 /* Check if we can do a sloppy shift with a final signed shift
4259 restoring the sign. */
4260 if (EXT_SHIFT_SIGNED (size - ext))
4261 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4262 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4263 /* If not, maybe it's still cheaper to do the second shift sloppy,
4264 and do a final sign extend? */
4265 else if (size <= 16)
4266 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4267 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4268 + 1;
4269 else
4270 continue;
4271 if (cost < best_cost)
4272 {
4273 kind = ext / (unsigned) 8 + 2;
4274 best_cost = cost;
4275 }
4276 }
4277 /* Check if we can sign extend in r0 */
4278 if (insize < 8)
4279 {
4280 cost = 3 + ashl_lshr_seq[left].insn_count;
4281 if (cost < best_cost)
4282 {
4283 kind = 6;
4284 best_cost = cost;
4285 }
4286 /* Try the same with a final signed shift. */
4287 if (left < 31)
4288 {
4289 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4290 if (cost < best_cost)
4291 {
4292 kind = 7;
4293 best_cost = cost;
4294 }
4295 }
4296 }
4297 if (TARGET_DYNSHIFT)
4298 {
4299 /* Try to use a dynamic shift. */
4300 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4301 if (cost < best_cost)
4302 {
4303 kind = 0;
4304 best_cost = cost;
4305 }
4306 }
4307 if (costp)
4308 *costp = cost;
4309 return kind;
4310 }
4311
4312 /* Function to be used in the length attribute of the instructions
4313 implementing this pattern. */
4314 int
4315 shl_sext_length (rtx insn)
4316 {
4317 rtx set_src, left_rtx, size_rtx;
4318 int cost;
4319
4320 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4321 left_rtx = XEXP (XEXP (set_src, 0), 1);
4322 size_rtx = XEXP (set_src, 1);
4323 shl_sext_kind (left_rtx, size_rtx, &cost);
4324 return cost;
4325 }
4326
4327 /* Generate rtl for this pattern */
4328 bool
4329 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4330 {
4331 int kind;
4332 int left, size, insize, cost;
4333 rtx operands[3];
4334
4335 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4336 left = INTVAL (left_rtx);
4337 size = INTVAL (size_rtx);
4338 insize = size - left;
4339 switch (kind)
4340 {
4341 case 1:
4342 case 2:
4343 case 3:
4344 case 4:
4345 {
4346 int ext = kind & 1 ? 8 : 16;
4347 int shift2 = size - ext;
4348
4349 /* Don't expand fine-grained when combining, because that will
4350 make the pattern fail. */
4351 if (! currently_expanding_to_rtl
4352 && ! reload_in_progress && ! reload_completed)
4353 {
4354 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4355 emit_insn (gen_movsi (dest, source));
4356 break;
4357 }
4358 if (dest != source)
4359 emit_insn (gen_movsi (dest, source));
4360 operands[0] = dest;
4361 if (ext - insize)
4362 {
4363 operands[2] = GEN_INT (ext - insize);
4364 gen_shifty_hi_op (ASHIFT, operands);
4365 }
4366 emit_insn (kind & 1
4367 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4368 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4369 if (kind <= 2)
4370 {
4371 if (shift2)
4372 {
4373 operands[2] = GEN_INT (shift2);
4374 gen_shifty_op (ASHIFT, operands);
4375 }
4376 }
4377 else
4378 {
4379 if (shift2 > 0)
4380 {
4381 if (EXT_SHIFT_SIGNED (shift2))
4382 {
4383 operands[2] = GEN_INT (shift2 + 1);
4384 gen_shifty_op (ASHIFT, operands);
4385 operands[2] = const1_rtx;
4386 gen_shifty_op (ASHIFTRT, operands);
4387 break;
4388 }
4389 operands[2] = GEN_INT (shift2);
4390 gen_shifty_hi_op (ASHIFT, operands);
4391 }
4392 else if (shift2)
4393 {
4394 operands[2] = GEN_INT (-shift2);
4395 gen_shifty_hi_op (LSHIFTRT, operands);
4396 }
4397 emit_insn (size <= 8
4398 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4399 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4400 }
4401 break;
4402 }
4403 case 5:
4404 {
4405 int i = 16 - size;
4406 if (! currently_expanding_to_rtl
4407 && ! reload_in_progress && ! reload_completed)
4408 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4409 else
4410 {
4411 operands[0] = dest;
4412 operands[2] = GEN_INT (16 - insize);
4413 gen_shifty_hi_op (ASHIFT, operands);
4414 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4415 }
4416 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4417 while (--i >= 0)
4418 gen_ashift (ASHIFTRT, 1, dest);
4419 break;
4420 }
4421 case 6:
4422 case 7:
4423 /* Don't expand fine-grained when combining, because that will
4424 make the pattern fail. */
4425 if (! currently_expanding_to_rtl
4426 && ! reload_in_progress && ! reload_completed)
4427 {
4428 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4429 emit_insn (gen_movsi (dest, source));
4430 break;
4431 }
4432 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4433 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4434 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4435 operands[0] = dest;
4436 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4437 gen_shifty_op (ASHIFT, operands);
4438 if (kind == 7)
4439 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4440 break;
4441 default:
4442 return true;
4443 }
4444 return false;
4445 }
4446
4447 /* Prefix a symbol_ref name with "datalabel". */
4448 rtx
4449 gen_datalabel_ref (rtx sym)
4450 {
4451 const char *str;
4452
4453 if (GET_CODE (sym) == LABEL_REF)
4454 return gen_rtx_CONST (GET_MODE (sym),
4455 gen_rtx_UNSPEC (GET_MODE (sym),
4456 gen_rtvec (1, sym),
4457 UNSPEC_DATALABEL));
4458
4459 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4460
4461 str = XSTR (sym, 0);
4462 /* Share all SYMBOL_REF strings with the same value - that is important
4463 for cse. */
4464 str = IDENTIFIER_POINTER (get_identifier (str));
4465 XSTR (sym, 0) = str;
4466
4467 return sym;
4468 }
4469
4470 \f
4471 static alloc_pool label_ref_list_pool;
4472
4473 typedef struct label_ref_list_d
4474 {
4475 rtx label;
4476 struct label_ref_list_d *next;
4477 } *label_ref_list_t;
4478
4479 /* The SH cannot load a large constant into a register, constants have to
4480 come from a pc relative load. The reference of a pc relative load
4481 instruction must be less than 1k in front of the instruction. This
4482 means that we often have to dump a constant inside a function, and
4483 generate code to branch around it.
4484
4485 It is important to minimize this, since the branches will slow things
4486 down and make things bigger.
4487
4488 Worst case code looks like:
4489
4490 mov.l L1,rn
4491 bra L2
4492 nop
4493 align
4494 L1: .long value
4495 L2:
4496 ..
4497
4498 mov.l L3,rn
4499 bra L4
4500 nop
4501 align
4502 L3: .long value
4503 L4:
4504 ..
4505
4506 We fix this by performing a scan before scheduling, which notices which
4507 instructions need to have their operands fetched from the constant table
4508 and builds the table.
4509
4510 The algorithm is:
4511
4512 scan, find an instruction which needs a pcrel move. Look forward, find the
4513 last barrier which is within MAX_COUNT bytes of the requirement.
4514 If there isn't one, make one. Process all the instructions between
4515 the find and the barrier.
4516
4517 In the above example, we can tell that L3 is within 1k of L1, so
4518 the first move can be shrunk from the 3 insn+constant sequence into
4519 just 1 insn, and the constant moved to L3 to make:
4520
4521 mov.l L1,rn
4522 ..
4523 mov.l L3,rn
4524 bra L4
4525 nop
4526 align
4527 L3:.long value
4528 L4:.long value
4529
4530 Then the second move becomes the target for the shortening process. */
4531
4532 typedef struct
4533 {
4534 rtx value; /* Value in table. */
4535 rtx label; /* Label of value. */
4536 label_ref_list_t wend; /* End of window. */
4537 enum machine_mode mode; /* Mode of value. */
4538
4539 /* True if this constant is accessed as part of a post-increment
4540 sequence. Note that HImode constants are never accessed in this way. */
4541 bool part_of_sequence_p;
4542 } pool_node;
4543
4544 /* The maximum number of constants that can fit into one pool, since
4545 constants in the range 0..510 are at least 2 bytes long, and in the
4546 range from there to 1018 at least 4 bytes. */
4547
4548 #define MAX_POOL_SIZE 372
4549 static pool_node pool_vector[MAX_POOL_SIZE];
4550 static int pool_size;
4551 static rtx pool_window_label;
4552 static int pool_window_last;
4553
4554 static int max_labelno_before_reorg;
4555
4556 /* ??? If we need a constant in HImode which is the truncated value of a
4557 constant we need in SImode, we could combine the two entries thus saving
4558 two bytes. Is this common enough to be worth the effort of implementing
4559 it? */
4560
4561 /* ??? This stuff should be done at the same time that we shorten branches.
4562 As it is now, we must assume that all branches are the maximum size, and
4563 this causes us to almost always output constant pools sooner than
4564 necessary. */
4565
4566 /* Add a constant to the pool and return its label. */
4567 static rtx
4568 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4569 {
4570 int i;
4571 rtx lab, new_rtx;
4572 label_ref_list_t ref, newref;
4573
4574 /* First see if we've already got it. */
4575 for (i = 0; i < pool_size; i++)
4576 {
4577 if (x->code == pool_vector[i].value->code
4578 && mode == pool_vector[i].mode)
4579 {
4580 if (x->code == CODE_LABEL)
4581 {
4582 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4583 continue;
4584 }
4585 if (rtx_equal_p (x, pool_vector[i].value))
4586 {
4587 lab = new_rtx = 0;
4588 if (! last_value
4589 || ! i
4590 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4591 {
4592 new_rtx = gen_label_rtx ();
4593 LABEL_REFS (new_rtx) = pool_vector[i].label;
4594 pool_vector[i].label = lab = new_rtx;
4595 }
4596 if (lab && pool_window_label)
4597 {
4598 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4599 newref->label = pool_window_label;
4600 ref = pool_vector[pool_window_last].wend;
4601 newref->next = ref;
4602 pool_vector[pool_window_last].wend = newref;
4603 }
4604 if (new_rtx)
4605 pool_window_label = new_rtx;
4606 pool_window_last = i;
4607 return lab;
4608 }
4609 }
4610 }
4611
4612 /* Need a new one. */
4613 pool_vector[pool_size].value = x;
4614 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4615 {
4616 lab = 0;
4617 pool_vector[pool_size - 1].part_of_sequence_p = true;
4618 }
4619 else
4620 lab = gen_label_rtx ();
4621 pool_vector[pool_size].mode = mode;
4622 pool_vector[pool_size].label = lab;
4623 pool_vector[pool_size].wend = NULL;
4624 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4625 if (lab && pool_window_label)
4626 {
4627 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4628 newref->label = pool_window_label;
4629 ref = pool_vector[pool_window_last].wend;
4630 newref->next = ref;
4631 pool_vector[pool_window_last].wend = newref;
4632 }
4633 if (lab)
4634 pool_window_label = lab;
4635 pool_window_last = pool_size;
4636 pool_size++;
4637 return lab;
4638 }
4639
4640 /* Output the literal table. START, if nonzero, is the first instruction
4641 this table is needed for, and also indicates that there is at least one
4642 casesi_worker_2 instruction; We have to emit the operand3 labels from
4643 these insns at a 4-byte aligned position. BARRIER is the barrier
4644 after which we are to place the table. */
4645 static void
4646 dump_table (rtx start, rtx barrier)
4647 {
4648 rtx scan = barrier;
4649 int i;
4650 bool need_align = true;
4651 rtx lab;
4652 label_ref_list_t ref;
4653 bool have_df = false;
4654
4655 /* Do two passes, first time dump out the HI sized constants. */
4656
4657 for (i = 0; i < pool_size; i++)
4658 {
4659 pool_node *p = &pool_vector[i];
4660
4661 if (p->mode == HImode)
4662 {
4663 if (need_align)
4664 {
4665 scan = emit_insn_after (gen_align_2 (), scan);
4666 need_align = false;
4667 }
4668 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4669 scan = emit_label_after (lab, scan);
4670 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4671 scan);
4672 for (ref = p->wend; ref; ref = ref->next)
4673 {
4674 lab = ref->label;
4675 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4676 }
4677 }
4678 else if (p->mode == DFmode)
4679 have_df = true;
4680 }
4681
4682 need_align = true;
4683
4684 if (start)
4685 {
4686 scan = emit_insn_after (gen_align_4 (), scan);
4687 need_align = false;
4688 for (; start != barrier; start = NEXT_INSN (start))
4689 if (NONJUMP_INSN_P (start)
4690 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4691 {
4692 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4693 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4694
4695 scan = emit_label_after (lab, scan);
4696 }
4697 }
4698 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4699 {
4700 rtx align_insn = NULL_RTX;
4701
4702 scan = emit_label_after (gen_label_rtx (), scan);
4703 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4704 need_align = false;
4705
4706 for (i = 0; i < pool_size; i++)
4707 {
4708 pool_node *p = &pool_vector[i];
4709
4710 switch (p->mode)
4711 {
4712 case HImode:
4713 break;
4714 case SImode:
4715 case SFmode:
4716 if (align_insn && !p->part_of_sequence_p)
4717 {
4718 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4719 emit_label_before (lab, align_insn);
4720 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4721 align_insn);
4722 for (ref = p->wend; ref; ref = ref->next)
4723 {
4724 lab = ref->label;
4725 emit_insn_before (gen_consttable_window_end (lab),
4726 align_insn);
4727 }
4728 delete_insn (align_insn);
4729 align_insn = NULL_RTX;
4730 continue;
4731 }
4732 else
4733 {
4734 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4735 scan = emit_label_after (lab, scan);
4736 scan = emit_insn_after (gen_consttable_4 (p->value,
4737 const0_rtx), scan);
4738 need_align = ! need_align;
4739 }
4740 break;
4741 case DFmode:
4742 if (need_align)
4743 {
4744 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4745 align_insn = scan;
4746 need_align = false;
4747 }
4748 case DImode:
4749 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4750 scan = emit_label_after (lab, scan);
4751 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4752 scan);
4753 break;
4754 default:
4755 gcc_unreachable ();
4756 }
4757
4758 if (p->mode != HImode)
4759 {
4760 for (ref = p->wend; ref; ref = ref->next)
4761 {
4762 lab = ref->label;
4763 scan = emit_insn_after (gen_consttable_window_end (lab),
4764 scan);
4765 }
4766 }
4767 }
4768
4769 pool_size = 0;
4770 }
4771
4772 for (i = 0; i < pool_size; i++)
4773 {
4774 pool_node *p = &pool_vector[i];
4775
4776 switch (p->mode)
4777 {
4778 case HImode:
4779 break;
4780 case SImode:
4781 case SFmode:
4782 if (need_align)
4783 {
4784 need_align = false;
4785 scan = emit_label_after (gen_label_rtx (), scan);
4786 scan = emit_insn_after (gen_align_4 (), scan);
4787 }
4788 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4789 scan = emit_label_after (lab, scan);
4790 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4791 scan);
4792 break;
4793 case DFmode:
4794 case DImode:
4795 if (need_align)
4796 {
4797 need_align = false;
4798 scan = emit_label_after (gen_label_rtx (), scan);
4799 scan = emit_insn_after (gen_align_4 (), scan);
4800 }
4801 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4802 scan = emit_label_after (lab, scan);
4803 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4804 scan);
4805 break;
4806 default:
4807 gcc_unreachable ();
4808 }
4809
4810 if (p->mode != HImode)
4811 {
4812 for (ref = p->wend; ref; ref = ref->next)
4813 {
4814 lab = ref->label;
4815 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4816 }
4817 }
4818 }
4819
4820 scan = emit_insn_after (gen_consttable_end (), scan);
4821 scan = emit_barrier_after (scan);
4822 pool_size = 0;
4823 pool_window_label = NULL_RTX;
4824 pool_window_last = 0;
4825 }
4826
4827 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4828
4829 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4830
4831 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4832 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4833 need to fix it if the input value is CONST_OK_FOR_I08. */
4834 static bool
4835 broken_move (rtx insn)
4836 {
4837 if (NONJUMP_INSN_P (insn))
4838 {
4839 rtx pat = PATTERN (insn);
4840 if (GET_CODE (pat) == PARALLEL)
4841 pat = XVECEXP (pat, 0, 0);
4842 if (GET_CODE (pat) == SET
4843 /* We can load any 8-bit value if we don't care what the high
4844 order bits end up as. */
4845 && GET_MODE (SET_DEST (pat)) != QImode
4846 && (CONSTANT_P (SET_SRC (pat))
4847 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4848 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4849 /* Match mova_const. */
4850 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4851 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4852 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4853 && ! (TARGET_SH2E
4854 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4855 && (fp_zero_operand (SET_SRC (pat))
4856 || fp_one_operand (SET_SRC (pat)))
4857 /* In general we don't know the current setting of fpscr, so
4858 disable fldi.
4859 There is an exception if this was a register-register move
4860 before reload - and hence it was ascertained that we have
4861 single precision setting - and in a post-reload optimization
4862 we changed this to do a constant load. In that case
4863 we don't have an r0 clobber, hence we must use fldi. */
4864 && (TARGET_FMOVD
4865 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4866 == SCRATCH))
4867 && REG_P (SET_DEST (pat))
4868 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4869 && ! (TARGET_SH2A
4870 && GET_MODE (SET_DEST (pat)) == SImode
4871 && (satisfies_constraint_I20 (SET_SRC (pat))
4872 || satisfies_constraint_I28 (SET_SRC (pat))))
4873 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4874 return true;
4875 }
4876
4877 return false;
4878 }
4879
4880 /* Return true if the specified insn is a mova insn. */
4881 static bool
4882 mova_p (rtx insn)
4883 {
4884 return (NONJUMP_INSN_P (insn)
4885 && GET_CODE (PATTERN (insn)) == SET
4886 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4887 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4888 /* Don't match mova_const. */
4889 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4890 }
4891
4892 /* Fix up a mova from a switch that went out of range. */
4893 static void
4894 fixup_mova (rtx mova)
4895 {
4896 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4897 if (! flag_pic)
4898 {
4899 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4900 INSN_CODE (mova) = -1;
4901 }
4902 else
4903 {
4904 rtx worker = mova;
4905 rtx lab = gen_label_rtx ();
4906 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4907
4908 do
4909 {
4910 worker = NEXT_INSN (worker);
4911 gcc_assert (worker
4912 && !LABEL_P (worker)
4913 && !JUMP_P (worker));
4914 } while (NOTE_P (worker)
4915 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4916 wpat = PATTERN (worker);
4917 wpat0 = XVECEXP (wpat, 0, 0);
4918 wpat1 = XVECEXP (wpat, 0, 1);
4919 wsrc = SET_SRC (wpat0);
4920 PATTERN (worker) = (gen_casesi_worker_2
4921 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4922 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4923 XEXP (wpat1, 0)));
4924 INSN_CODE (worker) = -1;
4925 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4926 base = gen_rtx_LABEL_REF (Pmode, lab);
4927 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4928 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4929 INSN_CODE (mova) = -1;
4930 }
4931 }
4932
4933 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4934 *num_mova, and check if the new mova is not nested within the first one.
4935 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4936 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4937 static int
4938 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4939 {
4940 int n_addr = 0; /* Initialization to shut up spurious warning. */
4941 int f_target, n_target = 0; /* Likewise. */
4942
4943 if (optimize)
4944 {
4945 /* If NEW_MOVA has no address yet, it will be handled later. */
4946 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4947 return -1;
4948
4949 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4950 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4951 if (n_addr > n_target || n_addr + 1022 < n_target)
4952 {
4953 /* Change the mova into a load.
4954 broken_move will then return true for it. */
4955 fixup_mova (new_mova);
4956 return 1;
4957 }
4958 }
4959 if (!(*num_mova)++)
4960 {
4961 *first_mova = new_mova;
4962 return 2;
4963 }
4964 if (!optimize
4965 || ((f_target
4966 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4967 >= n_target))
4968 return -1;
4969
4970 (*num_mova)--;
4971 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4972 > n_target - n_addr)
4973 {
4974 fixup_mova (*first_mova);
4975 return 0;
4976 }
4977 else
4978 {
4979 fixup_mova (new_mova);
4980 return 1;
4981 }
4982 }
4983
4984 /* Find the last barrier from insn FROM which is close enough to hold the
4985 constant pool. If we can't find one, then create one near the end of
4986 the range. */
4987 static rtx
4988 find_barrier (int num_mova, rtx mova, rtx from)
4989 {
4990 int count_si = 0;
4991 int count_hi = 0;
4992 int found_hi = 0;
4993 int found_si = 0;
4994 int found_di = 0;
4995 int hi_align = 2;
4996 int si_align = 2;
4997 int leading_mova = num_mova;
4998 rtx barrier_before_mova = NULL_RTX;
4999 rtx found_barrier = NULL_RTX;
5000 rtx good_barrier = NULL_RTX;
5001 int si_limit;
5002 int hi_limit;
5003 rtx orig = from;
5004 rtx last_got = NULL_RTX;
5005 rtx last_symoff = NULL_RTX;
5006
5007 /* For HImode: range is 510, add 4 because pc counts from address of
5008 second instruction after this one, subtract 2 for the jump instruction
5009 that we may need to emit before the table, subtract 2 for the instruction
5010 that fills the jump delay slot (in very rare cases, reorg will take an
5011 instruction from after the constant pool or will leave the delay slot
5012 empty). This gives 510.
5013 For SImode: range is 1020, add 4 because pc counts from address of
5014 second instruction after this one, subtract 2 in case pc is 2 byte
5015 aligned, subtract 2 for the jump instruction that we may need to emit
5016 before the table, subtract 2 for the instruction that fills the jump
5017 delay slot. This gives 1018. */
5018
5019 /* The branch will always be shortened now that the reference address for
5020 forward branches is the successor address, thus we need no longer make
5021 adjustments to the [sh]i_limit for -O0. */
5022
5023 si_limit = 1018;
5024 hi_limit = 510;
5025
5026 while (from && count_si < si_limit && count_hi < hi_limit)
5027 {
5028 int inc = get_attr_length (from);
5029 int new_align = 1;
5030
5031 /* If this is a label that existed at the time of the compute_alignments
5032 call, determine the alignment. N.B. When find_barrier recurses for
5033 an out-of-reach mova, we might see labels at the start of previously
5034 inserted constant tables. */
5035 if (LABEL_P (from)
5036 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5037 {
5038 if (optimize)
5039 new_align = 1 << label_to_alignment (from);
5040 else if (BARRIER_P (prev_nonnote_insn (from)))
5041 new_align = 1 << barrier_align (from);
5042 else
5043 new_align = 1;
5044 inc = 0;
5045 }
5046 /* In case we are scanning a constant table because of recursion, check
5047 for explicit alignments. If the table is long, we might be forced
5048 to emit the new table in front of it; the length of the alignment
5049 might be the last straw. */
5050 else if (NONJUMP_INSN_P (from)
5051 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5052 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5053 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5054 /* When we find the end of a constant table, paste the new constant
5055 at the end. That is better than putting it in front because
5056 this way, we don't need extra alignment for adding a 4-byte-aligned
5057 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5058 else if (NONJUMP_INSN_P (from)
5059 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5060 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5061 return from;
5062
5063 if (BARRIER_P (from))
5064 {
5065 rtx next;
5066
5067 found_barrier = from;
5068
5069 /* If we are at the end of the function, or in front of an alignment
5070 instruction, we need not insert an extra alignment. We prefer
5071 this kind of barrier. */
5072 if (barrier_align (from) > 2)
5073 good_barrier = from;
5074
5075 /* If we are at the end of a hot/cold block, dump the constants
5076 here. */
5077 next = NEXT_INSN (from);
5078 if (next
5079 && NOTE_P (next)
5080 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5081 break;
5082 }
5083
5084 if (broken_move (from))
5085 {
5086 rtx pat, src, dst;
5087 enum machine_mode mode;
5088
5089 pat = PATTERN (from);
5090 if (GET_CODE (pat) == PARALLEL)
5091 pat = XVECEXP (pat, 0, 0);
5092 src = SET_SRC (pat);
5093 dst = SET_DEST (pat);
5094 mode = GET_MODE (dst);
5095
5096 /* GOT pcrelat setting comes in pair of
5097 mova .L8,r0
5098 mov.l .L8,r12
5099 instructions. (plus add r0,r12).
5100 Remember if we see one without the other. */
5101 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5102 last_got = last_got ? NULL_RTX : from;
5103 else if (PIC_ADDR_P (src))
5104 last_got = last_got ? NULL_RTX : from;
5105
5106 /* We must explicitly check the mode, because sometimes the
5107 front end will generate code to load unsigned constants into
5108 HImode targets without properly sign extending them. */
5109 if (mode == HImode
5110 || (mode == SImode && satisfies_constraint_I16 (src)
5111 && REGNO (dst) != FPUL_REG))
5112 {
5113 found_hi += 2;
5114 /* We put the short constants before the long constants, so
5115 we must count the length of short constants in the range
5116 for the long constants. */
5117 /* ??? This isn't optimal, but is easy to do. */
5118 si_limit -= 2;
5119 }
5120 else
5121 {
5122 /* We dump DF/DI constants before SF/SI ones, because
5123 the limit is the same, but the alignment requirements
5124 are higher. We may waste up to 4 additional bytes
5125 for alignment, and the DF/DI constant may have
5126 another SF/SI constant placed before it. */
5127 if (TARGET_SHCOMPACT
5128 && ! found_di
5129 && (mode == DFmode || mode == DImode))
5130 {
5131 found_di = 1;
5132 si_limit -= 8;
5133 }
5134 while (si_align > 2 && found_si + si_align - 2 > count_si)
5135 si_align >>= 1;
5136 if (found_si > count_si)
5137 count_si = found_si;
5138 found_si += GET_MODE_SIZE (mode);
5139 if (num_mova)
5140 si_limit -= GET_MODE_SIZE (mode);
5141 }
5142 }
5143
5144 if (mova_p (from))
5145 {
5146 switch (untangle_mova (&num_mova, &mova, from))
5147 {
5148 case 1:
5149 if (flag_pic)
5150 {
5151 rtx src = SET_SRC (PATTERN (from));
5152 if (GET_CODE (src) == CONST
5153 && GET_CODE (XEXP (src, 0)) == UNSPEC
5154 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5155 last_symoff = from;
5156 }
5157 break;
5158 case 0: return find_barrier (0, 0, mova);
5159 case 2:
5160 {
5161 leading_mova = 0;
5162 barrier_before_mova
5163 = good_barrier ? good_barrier : found_barrier;
5164 }
5165 default: break;
5166 }
5167 if (found_si > count_si)
5168 count_si = found_si;
5169 }
5170 else if (JUMP_TABLE_DATA_P (from)
5171 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5172 {
5173 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5174 || (num_mova
5175 && (prev_nonnote_insn (from)
5176 == XEXP (MOVA_LABELREF (mova), 0))))
5177 num_mova--;
5178 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5179 {
5180 /* We have just passed the barrier in front of the
5181 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5182 the ADDR_DIFF_VEC is accessed as data, just like our pool
5183 constants, this is a good opportunity to accommodate what
5184 we have gathered so far.
5185 If we waited any longer, we could end up at a barrier in
5186 front of code, which gives worse cache usage for separated
5187 instruction / data caches. */
5188 good_barrier = found_barrier;
5189 break;
5190 }
5191 else
5192 {
5193 rtx body = PATTERN (from);
5194 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5195 }
5196 }
5197 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5198 else if (JUMP_P (from)
5199 && ! TARGET_SH2
5200 && ! optimize_size)
5201 new_align = 4;
5202
5203 /* There is a possibility that a bf is transformed into a bf/s by the
5204 delay slot scheduler. */
5205 if (JUMP_P (from)
5206 && get_attr_type (from) == TYPE_CBRANCH
5207 && ! sequence_insn_p (from))
5208 inc += 2;
5209
5210 if (found_si)
5211 {
5212 count_si += inc;
5213 if (new_align > si_align)
5214 {
5215 si_limit -= (count_si - 1) & (new_align - si_align);
5216 si_align = new_align;
5217 }
5218 count_si = (count_si + new_align - 1) & -new_align;
5219 }
5220 if (found_hi)
5221 {
5222 count_hi += inc;
5223 if (new_align > hi_align)
5224 {
5225 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5226 hi_align = new_align;
5227 }
5228 count_hi = (count_hi + new_align - 1) & -new_align;
5229 }
5230 from = NEXT_INSN (from);
5231 }
5232
5233 if (num_mova)
5234 {
5235 if (leading_mova)
5236 {
5237 /* Try as we might, the leading mova is out of range. Change
5238 it into a load (which will become a pcload) and retry. */
5239 fixup_mova (mova);
5240 return find_barrier (0, 0, mova);
5241 }
5242 else
5243 {
5244 /* Insert the constant pool table before the mova instruction,
5245 to prevent the mova label reference from going out of range. */
5246 from = mova;
5247 good_barrier = found_barrier = barrier_before_mova;
5248 }
5249 }
5250
5251 if (found_barrier)
5252 {
5253 if (good_barrier && next_real_insn (found_barrier))
5254 found_barrier = good_barrier;
5255 }
5256 else
5257 {
5258 /* We didn't find a barrier in time to dump our stuff,
5259 so we'll make one. */
5260 rtx label = gen_label_rtx ();
5261
5262 /* Don't emit a constant table in the middle of insns for
5263 casesi_worker_2. This is a bit overkill but is enough
5264 because casesi_worker_2 wouldn't appear so frequently. */
5265 if (last_symoff)
5266 from = last_symoff;
5267
5268 /* If we exceeded the range, then we must back up over the last
5269 instruction we looked at. Otherwise, we just need to undo the
5270 NEXT_INSN at the end of the loop. */
5271 if (PREV_INSN (from) != orig
5272 && (count_hi > hi_limit || count_si > si_limit))
5273 from = PREV_INSN (PREV_INSN (from));
5274 else
5275 from = PREV_INSN (from);
5276
5277 /* Don't emit a constant table int the middle of global pointer setting,
5278 since that that would move the addressing base GOT into another table.
5279 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5280 in the pool anyway, so just move up the whole constant pool.
5281
5282 However, avoid doing so when the last single GOT mov is the starting
5283 insn itself. Going past above the start insn would create a negative
5284 offset, causing errors. */
5285 if (last_got && last_got != orig)
5286 from = PREV_INSN (last_got);
5287
5288 /* Don't insert the constant pool table at the position which
5289 may be the landing pad. */
5290 if (flag_exceptions
5291 && CALL_P (from)
5292 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5293 from = PREV_INSN (from);
5294
5295 /* Walk back to be just before any jump or label.
5296 Putting it before a label reduces the number of times the branch
5297 around the constant pool table will be hit. Putting it before
5298 a jump makes it more likely that the bra delay slot will be
5299 filled. */
5300 while (NOTE_P (from) || JUMP_P (from)
5301 || LABEL_P (from))
5302 from = PREV_INSN (from);
5303
5304 /* Make sure we do not split between a call and its corresponding
5305 CALL_ARG_LOCATION note. */
5306 if (CALL_P (from))
5307 {
5308 rtx next = NEXT_INSN (from);
5309 if (next && NOTE_P (next)
5310 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5311 from = next;
5312 }
5313
5314 from = emit_jump_insn_after (gen_jump (label), from);
5315 JUMP_LABEL (from) = label;
5316 LABEL_NUSES (label) = 1;
5317 found_barrier = emit_barrier_after (from);
5318 emit_label_after (label, found_barrier);
5319 }
5320
5321 return found_barrier;
5322 }
5323
5324 /* If the instruction INSN is implemented by a special function, and we can
5325 positively find the register that is used to call the sfunc, and this
5326 register is not used anywhere else in this instruction - except as the
5327 destination of a set, return this register; else, return 0. */
5328 rtx
5329 sfunc_uses_reg (rtx insn)
5330 {
5331 int i;
5332 rtx pattern, part, reg_part, reg;
5333
5334 if (!NONJUMP_INSN_P (insn))
5335 return NULL_RTX;
5336 pattern = PATTERN (insn);
5337 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5338 return NULL_RTX;
5339
5340 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5341 {
5342 part = XVECEXP (pattern, 0, i);
5343 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5344 reg_part = part;
5345 }
5346 if (! reg_part)
5347 return NULL_RTX;
5348 reg = XEXP (reg_part, 0);
5349 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5350 {
5351 part = XVECEXP (pattern, 0, i);
5352 if (part == reg_part || GET_CODE (part) == CLOBBER)
5353 continue;
5354 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5355 && REG_P (SET_DEST (part)))
5356 ? SET_SRC (part) : part)))
5357 return NULL_RTX;
5358 }
5359 return reg;
5360 }
5361
5362 /* See if the only way in which INSN uses REG is by calling it, or by
5363 setting it while calling it. Set *SET to a SET rtx if the register
5364 is set by INSN. */
5365 static bool
5366 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5367 {
5368 rtx pattern, reg2;
5369
5370 *set = NULL_RTX;
5371
5372 reg2 = sfunc_uses_reg (insn);
5373 if (reg2 && REGNO (reg2) == REGNO (reg))
5374 {
5375 pattern = single_set (insn);
5376 if (pattern
5377 && REG_P (SET_DEST (pattern))
5378 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5379 *set = pattern;
5380 return false;
5381 }
5382 if (!CALL_P (insn))
5383 {
5384 /* We don't use rtx_equal_p because we don't care if the mode is
5385 different. */
5386 pattern = single_set (insn);
5387 if (pattern
5388 && REG_P (SET_DEST (pattern))
5389 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5390 {
5391 rtx par, part;
5392 int i;
5393
5394 *set = pattern;
5395 par = PATTERN (insn);
5396 if (GET_CODE (par) == PARALLEL)
5397 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5398 {
5399 part = XVECEXP (par, 0, i);
5400 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5401 return true;
5402 }
5403 return reg_mentioned_p (reg, SET_SRC (pattern));
5404 }
5405
5406 return true;
5407 }
5408
5409 pattern = PATTERN (insn);
5410
5411 if (GET_CODE (pattern) == PARALLEL)
5412 {
5413 int i;
5414
5415 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5416 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5417 return true;
5418 pattern = XVECEXP (pattern, 0, 0);
5419 }
5420
5421 if (GET_CODE (pattern) == SET)
5422 {
5423 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5424 {
5425 /* We don't use rtx_equal_p, because we don't care if the
5426 mode is different. */
5427 if (!REG_P (SET_DEST (pattern))
5428 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5429 return true;
5430
5431 *set = pattern;
5432 }
5433
5434 pattern = SET_SRC (pattern);
5435 }
5436
5437 if (GET_CODE (pattern) != CALL
5438 || !MEM_P (XEXP (pattern, 0))
5439 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5440 return true;
5441
5442 return false;
5443 }
5444
5445 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5446 general registers. Bits 0..15 mean that the respective registers
5447 are used as inputs in the instruction. Bits 16..31 mean that the
5448 registers 0..15, respectively, are used as outputs, or are clobbered.
5449 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5450 int
5451 regs_used (rtx x, int is_dest)
5452 {
5453 enum rtx_code code;
5454 const char *fmt;
5455 int i, used = 0;
5456
5457 if (! x)
5458 return used;
5459 code = GET_CODE (x);
5460 switch (code)
5461 {
5462 case REG:
5463 if (REGNO (x) < 16)
5464 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5465 << (REGNO (x) + is_dest));
5466 return 0;
5467 case SUBREG:
5468 {
5469 rtx y = SUBREG_REG (x);
5470
5471 if (!REG_P (y))
5472 break;
5473 if (REGNO (y) < 16)
5474 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5475 << (REGNO (y) +
5476 subreg_regno_offset (REGNO (y),
5477 GET_MODE (y),
5478 SUBREG_BYTE (x),
5479 GET_MODE (x)) + is_dest));
5480 return 0;
5481 }
5482 case SET:
5483 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5484 case RETURN:
5485 /* If there was a return value, it must have been indicated with USE. */
5486 return 0x00ffff00;
5487 case CLOBBER:
5488 is_dest = 1;
5489 break;
5490 case MEM:
5491 is_dest = 0;
5492 break;
5493 case CALL:
5494 used |= 0x00ff00f0;
5495 break;
5496 default:
5497 break;
5498 }
5499
5500 fmt = GET_RTX_FORMAT (code);
5501
5502 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5503 {
5504 if (fmt[i] == 'E')
5505 {
5506 int j;
5507 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5508 used |= regs_used (XVECEXP (x, i, j), is_dest);
5509 }
5510 else if (fmt[i] == 'e')
5511 used |= regs_used (XEXP (x, i), is_dest);
5512 }
5513 return used;
5514 }
5515
5516 /* Create an instruction that prevents redirection of a conditional branch
5517 to the destination of the JUMP with address ADDR.
5518 If the branch needs to be implemented as an indirect jump, try to find
5519 a scratch register for it.
5520 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5521 If any preceding insn that doesn't fit into a delay slot is good enough,
5522 pass 1. Pass 2 if a definite blocking insn is needed.
5523 -1 is used internally to avoid deep recursion.
5524 If a blocking instruction is made or recognized, return it. */
5525 static rtx
5526 gen_block_redirect (rtx jump, int addr, int need_block)
5527 {
5528 int dead = 0;
5529 rtx prev = prev_nonnote_insn (jump);
5530 rtx dest;
5531
5532 /* First, check if we already have an instruction that satisfies our need. */
5533 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5534 {
5535 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5536 return prev;
5537 if (GET_CODE (PATTERN (prev)) == USE
5538 || GET_CODE (PATTERN (prev)) == CLOBBER
5539 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5540 prev = jump;
5541 else if ((need_block &= ~1) < 0)
5542 return prev;
5543 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5544 need_block = 0;
5545 }
5546 if (GET_CODE (PATTERN (jump)) == RETURN)
5547 {
5548 if (! need_block)
5549 return prev;
5550 /* Reorg even does nasty things with return insns that cause branches
5551 to go out of range - see find_end_label and callers. */
5552 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5553 }
5554 /* We can't use JUMP_LABEL here because it might be undefined
5555 when not optimizing. */
5556 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5557 /* If the branch is out of range, try to find a scratch register for it. */
5558 if (optimize
5559 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5560 > 4092 + 4098))
5561 {
5562 rtx scan;
5563 /* Don't look for the stack pointer as a scratch register,
5564 it would cause trouble if an interrupt occurred. */
5565 unsigned attempt = 0x7fff, used;
5566 int jump_left = flag_expensive_optimizations + 1;
5567
5568 /* It is likely that the most recent eligible instruction is wanted for
5569 the delay slot. Therefore, find out which registers it uses, and
5570 try to avoid using them. */
5571
5572 for (scan = jump; (scan = PREV_INSN (scan)); )
5573 {
5574 enum rtx_code code;
5575
5576 if (INSN_DELETED_P (scan))
5577 continue;
5578 code = GET_CODE (scan);
5579 if (code == CODE_LABEL || code == JUMP_INSN)
5580 break;
5581 if (code == INSN
5582 && GET_CODE (PATTERN (scan)) != USE
5583 && GET_CODE (PATTERN (scan)) != CLOBBER
5584 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5585 {
5586 attempt &= ~regs_used (PATTERN (scan), 0);
5587 break;
5588 }
5589 }
5590 for (used = dead = 0, scan = JUMP_LABEL (jump);
5591 (scan = NEXT_INSN (scan)); )
5592 {
5593 enum rtx_code code;
5594
5595 if (INSN_DELETED_P (scan))
5596 continue;
5597 code = GET_CODE (scan);
5598 if (INSN_P (scan))
5599 {
5600 used |= regs_used (PATTERN (scan), 0);
5601 if (code == CALL_INSN)
5602 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5603 dead |= (used >> 16) & ~used;
5604 if (dead & attempt)
5605 {
5606 dead &= attempt;
5607 break;
5608 }
5609 if (code == JUMP_INSN)
5610 {
5611 if (jump_left-- && simplejump_p (scan))
5612 scan = JUMP_LABEL (scan);
5613 else
5614 break;
5615 }
5616 }
5617 }
5618 /* Mask out the stack pointer again, in case it was
5619 the only 'free' register we have found. */
5620 dead &= 0x7fff;
5621 }
5622 /* If the immediate destination is still in range, check for possible
5623 threading with a jump beyond the delay slot insn.
5624 Don't check if we are called recursively; the jump has been or will be
5625 checked in a different invocation then. */
5626
5627 else if (optimize && need_block >= 0)
5628 {
5629 rtx next = next_active_insn (next_active_insn (dest));
5630 if (next && JUMP_P (next)
5631 && GET_CODE (PATTERN (next)) == SET
5632 && recog_memoized (next) == CODE_FOR_jump_compact)
5633 {
5634 dest = JUMP_LABEL (next);
5635 if (dest
5636 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5637 > 4092 + 4098))
5638 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5639 }
5640 }
5641
5642 if (dead)
5643 {
5644 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5645
5646 /* It would be nice if we could convert the jump into an indirect
5647 jump / far branch right now, and thus exposing all constituent
5648 instructions to further optimization. However, reorg uses
5649 simplejump_p to determine if there is an unconditional jump where
5650 it should try to schedule instructions from the target of the
5651 branch; simplejump_p fails for indirect jumps even if they have
5652 a JUMP_LABEL. */
5653 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5654 (reg, GEN_INT (unspec_bbr_uid++)),
5655 jump);
5656 /* ??? We would like this to have the scope of the jump, but that
5657 scope will change when a delay slot insn of an inner scope is added.
5658 Hence, after delay slot scheduling, we'll have to expect
5659 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5660 the jump. */
5661
5662 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5663 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5664 return insn;
5665 }
5666 else if (need_block)
5667 /* We can't use JUMP_LABEL here because it might be undefined
5668 when not optimizing. */
5669 return emit_insn_before (gen_block_branch_redirect
5670 (GEN_INT (unspec_bbr_uid++)),
5671 jump);
5672 return prev;
5673 }
5674
5675 #define CONDJUMP_MIN -252
5676 #define CONDJUMP_MAX 262
5677 struct far_branch
5678 {
5679 /* A label (to be placed) in front of the jump
5680 that jumps to our ultimate destination. */
5681 rtx near_label;
5682 /* Where we are going to insert it if we cannot move the jump any farther,
5683 or the jump itself if we have picked up an existing jump. */
5684 rtx insert_place;
5685 /* The ultimate destination. */
5686 rtx far_label;
5687 struct far_branch *prev;
5688 /* If the branch has already been created, its address;
5689 else the address of its first prospective user. */
5690 int address;
5691 };
5692
5693 static void gen_far_branch (struct far_branch *);
5694 enum mdep_reorg_phase_e mdep_reorg_phase;
5695 static void
5696 gen_far_branch (struct far_branch *bp)
5697 {
5698 rtx insn = bp->insert_place;
5699 rtx jump;
5700 rtx label = gen_label_rtx ();
5701 int ok;
5702
5703 emit_label_after (label, insn);
5704 if (bp->far_label)
5705 {
5706 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5707 LABEL_NUSES (bp->far_label)++;
5708 }
5709 else
5710 jump = emit_jump_insn_after (gen_return (), insn);
5711
5712 /* Emit a barrier so that reorg knows that any following instructions
5713 are not reachable via a fall-through path.
5714 But don't do this when not optimizing, since we wouldn't suppress the
5715 alignment for the barrier then, and could end up with out-of-range
5716 pc-relative loads. */
5717 if (optimize)
5718 emit_barrier_after (jump);
5719 emit_label_after (bp->near_label, insn);
5720
5721 if (bp->far_label)
5722 JUMP_LABEL (jump) = bp->far_label;
5723 else
5724 {
5725 rtx pat = PATTERN (jump);
5726 gcc_assert (ANY_RETURN_P (pat));
5727 JUMP_LABEL (jump) = pat;
5728 }
5729
5730 ok = invert_jump (insn, label, 1);
5731 gcc_assert (ok);
5732
5733 /* If we are branching around a jump (rather than a return), prevent
5734 reorg from using an insn from the jump target as the delay slot insn -
5735 when reorg did this, it pessimized code (we rather hide the delay slot)
5736 and it could cause branches to go out of range. */
5737 if (bp->far_label)
5738 (emit_insn_after
5739 (gen_stuff_delay_slot
5740 (GEN_INT (unspec_bbr_uid++),
5741 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5742 insn));
5743 /* Prevent reorg from undoing our splits. */
5744 gen_block_redirect (jump, bp->address += 2, 2);
5745 }
5746
5747 /* Fix up ADDR_DIFF_VECs. */
5748 void
5749 fixup_addr_diff_vecs (rtx first)
5750 {
5751 rtx insn;
5752
5753 for (insn = first; insn; insn = NEXT_INSN (insn))
5754 {
5755 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5756
5757 if (! JUMP_TABLE_DATA_P (insn)
5758 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5759 continue;
5760 pat = PATTERN (insn);
5761 vec_lab = XEXP (XEXP (pat, 0), 0);
5762
5763 /* Search the matching casesi_jump_2. */
5764 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5765 {
5766 if (!JUMP_P (prev))
5767 continue;
5768 prevpat = PATTERN (prev);
5769 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5770 continue;
5771 x = XVECEXP (prevpat, 0, 1);
5772 if (GET_CODE (x) != USE)
5773 continue;
5774 x = XEXP (x, 0);
5775 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5776 break;
5777 }
5778 /* FIXME: This is a bug in the optimizer, but it seems harmless
5779 to just avoid panicing. */
5780 if (!prev)
5781 continue;
5782
5783 /* Emit the reference label of the braf where it belongs, right after
5784 the casesi_jump_2 (i.e. braf). */
5785 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5786 emit_label_after (braf_label, prev);
5787
5788 /* Fix up the ADDR_DIF_VEC to be relative
5789 to the reference address of the braf. */
5790 XEXP (XEXP (pat, 0), 0) = braf_label;
5791 }
5792 }
5793
5794 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5795 a barrier. Return the base 2 logarithm of the desired alignment. */
5796 int
5797 barrier_align (rtx barrier_or_label)
5798 {
5799 rtx next, pat;
5800
5801 if (! barrier_or_label)
5802 return 0;
5803
5804 if (LABEL_P (barrier_or_label)
5805 && NEXT_INSN (barrier_or_label)
5806 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5807 return 2;
5808
5809 if (BARRIER_P (barrier_or_label)
5810 && PREV_INSN (barrier_or_label)
5811 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5812 {
5813 pat = PATTERN (PREV_INSN (barrier_or_label));
5814 /* If this is a very small table, we want to keep the alignment after
5815 the table to the minimum for proper code alignment. */
5816 return ((optimize_size
5817 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5818 <= (unsigned) 1 << (CACHE_LOG - 2)))
5819 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5820 }
5821
5822 next = next_active_insn (barrier_or_label);
5823
5824 if (! next)
5825 return 0;
5826
5827 pat = PATTERN (next);
5828
5829 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5830 /* This is a barrier in front of a constant table. */
5831 return 0;
5832
5833 if (optimize_size)
5834 return 0;
5835
5836 if (! TARGET_SH2 || ! optimize)
5837 return align_jumps_log;
5838
5839 /* When fixing up pcloads, a constant table might be inserted just before
5840 the basic block that ends with the barrier. Thus, we can't trust the
5841 instruction lengths before that. */
5842 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5843 {
5844 /* Check if there is an immediately preceding branch to the insn beyond
5845 the barrier. We must weight the cost of discarding useful information
5846 from the current cache line when executing this branch and there is
5847 an alignment, against that of fetching unneeded insn in front of the
5848 branch target when there is no alignment. */
5849
5850 /* There are two delay_slot cases to consider. One is the simple case
5851 where the preceding branch is to the insn beyond the barrier (simple
5852 delay slot filling), and the other is where the preceding branch has
5853 a delay slot that is a duplicate of the insn after the barrier
5854 (fill_eager_delay_slots) and the branch is to the insn after the insn
5855 after the barrier. */
5856
5857 int slot, credit;
5858 bool jump_to_next = false;
5859
5860 /* Skip to the insn before the JUMP_INSN before the barrier under
5861 investigation. */
5862 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5863
5864 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5865 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5866 prev = prev_real_insn (prev))
5867 {
5868 jump_to_next = false;
5869 if (GET_CODE (PATTERN (prev)) == USE
5870 || GET_CODE (PATTERN (prev)) == CLOBBER)
5871 continue;
5872 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5873 {
5874 prev = XVECEXP (PATTERN (prev), 0, 1);
5875 if (INSN_UID (prev) == INSN_UID (next))
5876 {
5877 /* Delay slot was filled with insn at jump target. */
5878 jump_to_next = true;
5879 continue;
5880 }
5881 }
5882
5883 if (slot &&
5884 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5885 slot = 0;
5886 credit -= get_attr_length (prev);
5887 }
5888 if (prev && jump_to_label_p (prev))
5889 {
5890 rtx x;
5891 if (jump_to_next
5892 || next_real_insn (JUMP_LABEL (prev)) == next
5893 /* If relax_delay_slots() decides NEXT was redundant
5894 with some previous instruction, it will have
5895 redirected PREV's jump to the following insn. */
5896 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5897 /* There is no upper bound on redundant instructions
5898 that might have been skipped, but we must not put an
5899 alignment where none had been before. */
5900 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5901 (INSN_P (x)
5902 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5903 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5904 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5905 {
5906 rtx pat = PATTERN (prev);
5907 if (GET_CODE (pat) == PARALLEL)
5908 pat = XVECEXP (pat, 0, 0);
5909 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5910 return 0;
5911 }
5912 }
5913 }
5914
5915 return align_jumps_log;
5916 }
5917
5918 /* If we are inside a phony loop, almost any kind of label can turn up as the
5919 first one in the loop. Aligning a braf label causes incorrect switch
5920 destination addresses; we can detect braf labels because they are
5921 followed by a BARRIER.
5922 Applying loop alignment to small constant or switch tables is a waste
5923 of space, so we suppress this too. */
5924 int
5925 sh_loop_align (rtx label)
5926 {
5927 rtx next = label;
5928
5929 if (! optimize || optimize_size)
5930 return 0;
5931
5932 do
5933 next = next_nonnote_insn (next);
5934 while (next && LABEL_P (next));
5935
5936 if (! next
5937 || ! INSN_P (next)
5938 || recog_memoized (next) == CODE_FOR_consttable_2)
5939 return 0;
5940
5941 return align_loops_log;
5942 }
5943
5944 /* Do a final pass over the function, just before delayed branch
5945 scheduling. */
5946 static void
5947 sh_reorg (void)
5948 {
5949 rtx first, insn, mova = NULL_RTX;
5950 int num_mova;
5951 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5952 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5953
5954 first = get_insns ();
5955 max_labelno_before_reorg = max_label_num ();
5956
5957 /* We must split call insns before introducing `mova's. If we're
5958 optimizing, they'll have already been split. Otherwise, make
5959 sure we don't split them too late. */
5960 if (! optimize)
5961 split_all_insns_noflow ();
5962
5963 if (TARGET_SHMEDIA)
5964 return;
5965
5966 /* If relaxing, generate pseudo-ops to associate function calls with
5967 the symbols they call. It does no harm to not generate these
5968 pseudo-ops. However, when we can generate them, it enables the
5969 linker to potentially relax the jsr to a bsr, and eliminate the
5970 register load and, possibly, the constant pool entry. */
5971
5972 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5973 if (TARGET_RELAX)
5974 {
5975 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5976 own purposes. This works because none of the remaining passes
5977 need to look at them.
5978
5979 ??? But it may break in the future. We should use a machine
5980 dependent REG_NOTE, or some other approach entirely. */
5981 for (insn = first; insn; insn = NEXT_INSN (insn))
5982 {
5983 if (INSN_P (insn))
5984 {
5985 rtx note;
5986
5987 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5988 NULL_RTX)) != 0)
5989 remove_note (insn, note);
5990 }
5991 }
5992
5993 for (insn = first; insn; insn = NEXT_INSN (insn))
5994 {
5995 rtx pattern, reg, link, set, scan, dies, label;
5996 int rescan = 0, foundinsn = 0;
5997
5998 if (CALL_P (insn))
5999 {
6000 pattern = PATTERN (insn);
6001
6002 if (GET_CODE (pattern) == PARALLEL)
6003 pattern = XVECEXP (pattern, 0, 0);
6004 if (GET_CODE (pattern) == SET)
6005 pattern = SET_SRC (pattern);
6006
6007 if (GET_CODE (pattern) != CALL
6008 || !MEM_P (XEXP (pattern, 0)))
6009 continue;
6010
6011 reg = XEXP (XEXP (pattern, 0), 0);
6012 }
6013 else
6014 {
6015 reg = sfunc_uses_reg (insn);
6016 if (! reg)
6017 continue;
6018 }
6019
6020 if (!REG_P (reg))
6021 continue;
6022
6023 /* Try scanning backward to find where the register is set. */
6024 link = NULL;
6025 for (scan = PREV_INSN (insn);
6026 scan && !LABEL_P (scan);
6027 scan = PREV_INSN (scan))
6028 {
6029 if (! INSN_P (scan))
6030 continue;
6031
6032 if (! reg_mentioned_p (reg, scan))
6033 continue;
6034
6035 if (noncall_uses_reg (reg, scan, &set))
6036 break;
6037
6038 if (set)
6039 {
6040 link = scan;
6041 break;
6042 }
6043 }
6044
6045 if (! link)
6046 continue;
6047
6048 /* The register is set at LINK. */
6049
6050 /* We can only optimize the function call if the register is
6051 being set to a symbol. In theory, we could sometimes
6052 optimize calls to a constant location, but the assembler
6053 and linker do not support that at present. */
6054 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6055 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6056 continue;
6057
6058 /* Scan forward from LINK to the place where REG dies, and
6059 make sure that the only insns which use REG are
6060 themselves function calls. */
6061
6062 /* ??? This doesn't work for call targets that were allocated
6063 by reload, since there may not be a REG_DEAD note for the
6064 register. */
6065
6066 dies = NULL_RTX;
6067 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6068 {
6069 rtx scanset;
6070
6071 /* Don't try to trace forward past a CODE_LABEL if we haven't
6072 seen INSN yet. Ordinarily, we will only find the setting insn
6073 if it is in the same basic block. However,
6074 cross-jumping can insert code labels in between the load and
6075 the call, and can result in situations where a single call
6076 insn may have two targets depending on where we came from. */
6077
6078 if (LABEL_P (scan) && ! foundinsn)
6079 break;
6080
6081 if (! INSN_P (scan))
6082 continue;
6083
6084 /* Don't try to trace forward past a JUMP. To optimize
6085 safely, we would have to check that all the
6086 instructions at the jump destination did not use REG. */
6087
6088 if (JUMP_P (scan))
6089 break;
6090
6091 if (! reg_mentioned_p (reg, scan))
6092 continue;
6093
6094 if (noncall_uses_reg (reg, scan, &scanset))
6095 break;
6096
6097 if (scan == insn)
6098 foundinsn = 1;
6099
6100 if (scan != insn
6101 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6102 {
6103 /* There is a function call to this register other
6104 than the one we are checking. If we optimize
6105 this call, we need to rescan again below. */
6106 rescan = 1;
6107 }
6108
6109 /* ??? We shouldn't have to worry about SCANSET here.
6110 We should just be able to check for a REG_DEAD note
6111 on a function call. However, the REG_DEAD notes are
6112 apparently not dependable around libcalls; c-torture
6113 execute/920501-2 is a test case. If SCANSET is set,
6114 then this insn sets the register, so it must have
6115 died earlier. Unfortunately, this will only handle
6116 the cases in which the register is, in fact, set in a
6117 later insn. */
6118
6119 /* ??? We shouldn't have to use FOUNDINSN here.
6120 This dates back to when we used LOG_LINKS to find
6121 the most recent insn which sets the register. */
6122
6123 if (foundinsn
6124 && (scanset
6125 || find_reg_note (scan, REG_DEAD, reg)))
6126 {
6127 dies = scan;
6128 break;
6129 }
6130 }
6131
6132 if (! dies)
6133 {
6134 /* Either there was a branch, or some insn used REG
6135 other than as a function call address. */
6136 continue;
6137 }
6138
6139 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6140 on the insn which sets the register, and on each call insn
6141 which uses the register. In final_prescan_insn we look for
6142 the REG_LABEL_OPERAND notes, and output the appropriate label
6143 or pseudo-op. */
6144
6145 label = gen_label_rtx ();
6146 add_reg_note (link, REG_LABEL_OPERAND, label);
6147 add_reg_note (insn, REG_LABEL_OPERAND, label);
6148 if (rescan)
6149 {
6150 scan = link;
6151 do
6152 {
6153 rtx reg2;
6154
6155 scan = NEXT_INSN (scan);
6156 if (scan != insn
6157 && ((CALL_P (scan)
6158 && reg_mentioned_p (reg, scan))
6159 || ((reg2 = sfunc_uses_reg (scan))
6160 && REGNO (reg2) == REGNO (reg))))
6161 add_reg_note (scan, REG_LABEL_OPERAND, label);
6162 }
6163 while (scan != dies);
6164 }
6165 }
6166 }
6167
6168 if (TARGET_SH2)
6169 fixup_addr_diff_vecs (first);
6170
6171 if (optimize)
6172 {
6173 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6174 shorten_branches (first);
6175 }
6176
6177 /* Scan the function looking for move instructions which have to be
6178 changed to pc-relative loads and insert the literal tables. */
6179 label_ref_list_pool = create_alloc_pool ("label references list",
6180 sizeof (struct label_ref_list_d),
6181 30);
6182 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6183 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6184 {
6185 if (mova_p (insn))
6186 {
6187 /* ??? basic block reordering can move a switch table dispatch
6188 below the switch table. Check if that has happened.
6189 We only have the addresses available when optimizing; but then,
6190 this check shouldn't be needed when not optimizing. */
6191 if (!untangle_mova (&num_mova, &mova, insn))
6192 {
6193 insn = mova;
6194 num_mova = 0;
6195 }
6196 }
6197 else if (JUMP_TABLE_DATA_P (insn)
6198 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6199 && num_mova
6200 /* ??? loop invariant motion can also move a mova out of a
6201 loop. Since loop does this code motion anyway, maybe we
6202 should wrap UNSPEC_MOVA into a CONST, so that reload can
6203 move it back. */
6204 && ((num_mova > 1
6205 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6206 || (prev_nonnote_insn (insn)
6207 == XEXP (MOVA_LABELREF (mova), 0))))
6208 {
6209 rtx scan;
6210 int total;
6211
6212 num_mova--;
6213
6214 /* Some code might have been inserted between the mova and
6215 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6216 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6217 total += get_attr_length (scan);
6218
6219 /* range of mova is 1020, add 4 because pc counts from address of
6220 second instruction after this one, subtract 2 in case pc is 2
6221 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6222 cancels out with alignment effects of the mova itself. */
6223 if (total > 1022)
6224 {
6225 /* Change the mova into a load, and restart scanning
6226 there. broken_move will then return true for mova. */
6227 fixup_mova (mova);
6228 insn = mova;
6229 }
6230 }
6231 if (broken_move (insn)
6232 || (NONJUMP_INSN_P (insn)
6233 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6234 {
6235 rtx scan;
6236 /* Scan ahead looking for a barrier to stick the constant table
6237 behind. */
6238 rtx barrier = find_barrier (num_mova, mova, insn);
6239 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6240 int need_aligned_label = 0;
6241
6242 if (num_mova && ! mova_p (mova))
6243 {
6244 /* find_barrier had to change the first mova into a
6245 pcload; thus, we have to start with this new pcload. */
6246 insn = mova;
6247 num_mova = 0;
6248 }
6249 /* Now find all the moves between the points and modify them. */
6250 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6251 {
6252 if (LABEL_P (scan))
6253 last_float = 0;
6254 if (NONJUMP_INSN_P (scan)
6255 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6256 need_aligned_label = 1;
6257 if (broken_move (scan))
6258 {
6259 rtx *patp = &PATTERN (scan), pat = *patp;
6260 rtx src, dst;
6261 rtx lab;
6262 rtx newsrc;
6263 enum machine_mode mode;
6264
6265 if (GET_CODE (pat) == PARALLEL)
6266 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6267 src = SET_SRC (pat);
6268 dst = SET_DEST (pat);
6269 mode = GET_MODE (dst);
6270
6271 if (mode == SImode && satisfies_constraint_I16 (src)
6272 && REGNO (dst) != FPUL_REG)
6273 {
6274 int offset = 0;
6275
6276 mode = HImode;
6277 while (GET_CODE (dst) == SUBREG)
6278 {
6279 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6280 GET_MODE (SUBREG_REG (dst)),
6281 SUBREG_BYTE (dst),
6282 GET_MODE (dst));
6283 dst = SUBREG_REG (dst);
6284 }
6285 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6286 }
6287 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6288 {
6289 /* This must be an insn that clobbers r0. */
6290 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6291 XVECLEN (PATTERN (scan), 0)
6292 - 1);
6293 rtx clobber = *clobberp;
6294
6295 gcc_assert (GET_CODE (clobber) == CLOBBER
6296 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6297
6298 if (last_float
6299 && reg_set_between_p (r0_rtx, last_float_move, scan))
6300 last_float = 0;
6301 if (last_float
6302 && TARGET_SHCOMPACT
6303 && GET_MODE_SIZE (mode) != 4
6304 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6305 last_float = 0;
6306 lab = add_constant (src, mode, last_float);
6307 if (lab)
6308 emit_insn_before (gen_mova (lab), scan);
6309 else
6310 {
6311 /* There will be a REG_UNUSED note for r0 on
6312 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6313 lest reorg:mark_target_live_regs will not
6314 consider r0 to be used, and we end up with delay
6315 slot insn in front of SCAN that clobbers r0. */
6316 rtx note
6317 = find_regno_note (last_float_move, REG_UNUSED, 0);
6318
6319 /* If we are not optimizing, then there may not be
6320 a note. */
6321 if (note)
6322 PUT_REG_NOTE_KIND (note, REG_INC);
6323
6324 *last_float_addr = r0_inc_rtx;
6325 }
6326 last_float_move = scan;
6327 last_float = src;
6328 newsrc = gen_const_mem (mode,
6329 (((TARGET_SH4 && ! TARGET_FMOVD)
6330 || REGNO (dst) == FPUL_REG)
6331 ? r0_inc_rtx
6332 : r0_rtx));
6333 last_float_addr = &XEXP (newsrc, 0);
6334
6335 /* Remove the clobber of r0. */
6336 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6337 gen_rtx_SCRATCH (Pmode));
6338 }
6339 /* This is a mova needing a label. Create it. */
6340 else if (GET_CODE (src) == UNSPEC
6341 && XINT (src, 1) == UNSPEC_MOVA
6342 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6343 {
6344 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6345 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6346 newsrc = gen_rtx_UNSPEC (SImode,
6347 gen_rtvec (1, newsrc),
6348 UNSPEC_MOVA);
6349 }
6350 else if (GET_CODE (src) == UNSPEC_VOLATILE
6351 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6352 {
6353 newsrc = XVECEXP (src, 0, 0);
6354 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6355 INSN_CODE (scan) = -1;
6356 continue;
6357 }
6358 else
6359 {
6360 lab = add_constant (src, mode, 0);
6361 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6362 newsrc = gen_const_mem (mode, newsrc);
6363 }
6364 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6365 INSN_CODE (scan) = -1;
6366 }
6367 }
6368 dump_table (need_aligned_label ? insn : 0, barrier);
6369 insn = barrier;
6370 }
6371 }
6372 free_alloc_pool (label_ref_list_pool);
6373 for (insn = first; insn; insn = NEXT_INSN (insn))
6374 PUT_MODE (insn, VOIDmode);
6375
6376 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6377 INSN_ADDRESSES_FREE ();
6378 split_branches (first);
6379
6380 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6381 also has an effect on the register that holds the address of the sfunc.
6382 Insert an extra dummy insn in front of each sfunc that pretends to
6383 use this register. */
6384 if (flag_delayed_branch)
6385 {
6386 for (insn = first; insn; insn = NEXT_INSN (insn))
6387 {
6388 rtx reg = sfunc_uses_reg (insn);
6389
6390 if (! reg)
6391 continue;
6392 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6393 }
6394 }
6395 #if 0
6396 /* fpscr is not actually a user variable, but we pretend it is for the
6397 sake of the previous optimization passes, since we want it handled like
6398 one. However, we don't have any debugging information for it, so turn
6399 it into a non-user variable now. */
6400 if (TARGET_SH4)
6401 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6402 #endif
6403 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6404 }
6405
6406 /* Return the UID of the insn that follows the specified label. */
6407 int
6408 get_dest_uid (rtx label, int max_uid)
6409 {
6410 rtx dest = next_real_insn (label);
6411 int dest_uid;
6412 if (! dest)
6413 /* This can happen for an undefined label. */
6414 return 0;
6415 dest_uid = INSN_UID (dest);
6416 /* If this is a newly created branch redirection blocking instruction,
6417 we cannot index the branch_uid or insn_addresses arrays with its
6418 uid. But then, we won't need to, because the actual destination is
6419 the following branch. */
6420 while (dest_uid >= max_uid)
6421 {
6422 dest = NEXT_INSN (dest);
6423 dest_uid = INSN_UID (dest);
6424 }
6425 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6426 return 0;
6427 return dest_uid;
6428 }
6429
6430 /* Split condbranches that are out of range. Also add clobbers for
6431 scratch registers that are needed in far jumps.
6432 We do this before delay slot scheduling, so that it can take our
6433 newly created instructions into account. It also allows us to
6434 find branches with common targets more easily. */
6435 static void
6436 split_branches (rtx first)
6437 {
6438 rtx insn;
6439 struct far_branch **uid_branch, *far_branch_list = 0;
6440 int max_uid = get_max_uid ();
6441 int ok;
6442
6443 /* Find out which branches are out of range. */
6444 shorten_branches (first);
6445
6446 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6447 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6448
6449 for (insn = first; insn; insn = NEXT_INSN (insn))
6450 if (! INSN_P (insn))
6451 continue;
6452 else if (INSN_DELETED_P (insn))
6453 {
6454 /* Shorten_branches would split this instruction again,
6455 so transform it into a note. */
6456 SET_INSN_DELETED (insn);
6457 }
6458 else if (JUMP_P (insn))
6459 {
6460 enum attr_type type = get_attr_type (insn);
6461 if (type == TYPE_CBRANCH)
6462 {
6463 rtx next, beyond;
6464
6465 if (get_attr_length (insn) > 4)
6466 {
6467 rtx src = SET_SRC (PATTERN (insn));
6468 rtx olabel = XEXP (XEXP (src, 1), 0);
6469 int addr = INSN_ADDRESSES (INSN_UID (insn));
6470 rtx label = 0;
6471 int dest_uid = get_dest_uid (olabel, max_uid);
6472 struct far_branch *bp = uid_branch[dest_uid];
6473
6474 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6475 the label if the LABEL_NUSES count drops to zero. There is
6476 always a jump_optimize pass that sets these values, but it
6477 proceeds to delete unreferenced code, and then if not
6478 optimizing, to un-delete the deleted instructions, thus
6479 leaving labels with too low uses counts. */
6480 if (! optimize)
6481 {
6482 JUMP_LABEL (insn) = olabel;
6483 LABEL_NUSES (olabel)++;
6484 }
6485 if (! bp)
6486 {
6487 bp = (struct far_branch *) alloca (sizeof *bp);
6488 uid_branch[dest_uid] = bp;
6489 bp->prev = far_branch_list;
6490 far_branch_list = bp;
6491 bp->far_label
6492 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6493 LABEL_NUSES (bp->far_label)++;
6494 }
6495 else
6496 {
6497 label = bp->near_label;
6498 if (! label && bp->address - addr >= CONDJUMP_MIN)
6499 {
6500 rtx block = bp->insert_place;
6501
6502 if (GET_CODE (PATTERN (block)) == RETURN)
6503 block = PREV_INSN (block);
6504 else
6505 block = gen_block_redirect (block,
6506 bp->address, 2);
6507 label = emit_label_after (gen_label_rtx (),
6508 PREV_INSN (block));
6509 bp->near_label = label;
6510 }
6511 else if (label && ! NEXT_INSN (label))
6512 {
6513 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6514 bp->insert_place = insn;
6515 else
6516 gen_far_branch (bp);
6517 }
6518 }
6519 if (! label
6520 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6521 {
6522 bp->near_label = label = gen_label_rtx ();
6523 bp->insert_place = insn;
6524 bp->address = addr;
6525 }
6526 ok = redirect_jump (insn, label, 0);
6527 gcc_assert (ok);
6528 }
6529 else
6530 {
6531 /* get_attr_length (insn) == 2 */
6532 /* Check if we have a pattern where reorg wants to redirect
6533 the branch to a label from an unconditional branch that
6534 is too far away. */
6535 /* We can't use JUMP_LABEL here because it might be undefined
6536 when not optimizing. */
6537 /* A syntax error might cause beyond to be NULL_RTX. */
6538 beyond
6539 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6540 0));
6541
6542 if (beyond
6543 && (JUMP_P (beyond)
6544 || ((beyond = next_active_insn (beyond))
6545 && JUMP_P (beyond)))
6546 && GET_CODE (PATTERN (beyond)) == SET
6547 && recog_memoized (beyond) == CODE_FOR_jump_compact
6548 && ((INSN_ADDRESSES
6549 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6550 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6551 > 252 + 258 + 2))
6552 gen_block_redirect (beyond,
6553 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6554 }
6555
6556 next = next_active_insn (insn);
6557
6558 if (next
6559 && (JUMP_P (next)
6560 || ((next = next_active_insn (next))
6561 && JUMP_P (next)))
6562 && GET_CODE (PATTERN (next)) == SET
6563 && recog_memoized (next) == CODE_FOR_jump_compact
6564 && ((INSN_ADDRESSES
6565 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6566 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6567 > 252 + 258 + 2))
6568 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6569 }
6570 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6571 {
6572 int addr = INSN_ADDRESSES (INSN_UID (insn));
6573 rtx far_label = 0;
6574 int dest_uid = 0;
6575 struct far_branch *bp;
6576
6577 if (type == TYPE_JUMP)
6578 {
6579 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6580 dest_uid = get_dest_uid (far_label, max_uid);
6581 if (! dest_uid)
6582 {
6583 /* Parse errors can lead to labels outside
6584 the insn stream. */
6585 if (! NEXT_INSN (far_label))
6586 continue;
6587
6588 if (! optimize)
6589 {
6590 JUMP_LABEL (insn) = far_label;
6591 LABEL_NUSES (far_label)++;
6592 }
6593 redirect_jump (insn, ret_rtx, 1);
6594 far_label = 0;
6595 }
6596 }
6597 bp = uid_branch[dest_uid];
6598 if (! bp)
6599 {
6600 bp = (struct far_branch *) alloca (sizeof *bp);
6601 uid_branch[dest_uid] = bp;
6602 bp->prev = far_branch_list;
6603 far_branch_list = bp;
6604 bp->near_label = 0;
6605 bp->far_label = far_label;
6606 if (far_label)
6607 LABEL_NUSES (far_label)++;
6608 }
6609 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6610 if (addr - bp->address <= CONDJUMP_MAX)
6611 emit_label_after (bp->near_label, PREV_INSN (insn));
6612 else
6613 {
6614 gen_far_branch (bp);
6615 bp->near_label = 0;
6616 }
6617 else
6618 bp->near_label = 0;
6619 bp->address = addr;
6620 bp->insert_place = insn;
6621 if (! far_label)
6622 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6623 else
6624 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6625 }
6626 }
6627 /* Generate all pending far branches,
6628 and free our references to the far labels. */
6629 while (far_branch_list)
6630 {
6631 if (far_branch_list->near_label
6632 && ! NEXT_INSN (far_branch_list->near_label))
6633 gen_far_branch (far_branch_list);
6634 if (optimize
6635 && far_branch_list->far_label
6636 && ! --LABEL_NUSES (far_branch_list->far_label))
6637 delete_insn (far_branch_list->far_label);
6638 far_branch_list = far_branch_list->prev;
6639 }
6640
6641 /* Instruction length information is no longer valid due to the new
6642 instructions that have been generated. */
6643 init_insn_lengths ();
6644 }
6645
6646 /* Dump out instruction addresses, which is useful for debugging the
6647 constant pool table stuff.
6648
6649 If relaxing, output the label and pseudo-ops used to link together
6650 calls and the instruction which set the registers.
6651
6652 ??? The addresses printed by this routine for insns are nonsense for
6653 insns which are inside of a sequence where none of the inner insns have
6654 variable length. This is because the second pass of shorten_branches
6655 does not bother to update them. */
6656 void
6657 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6658 int noperands ATTRIBUTE_UNUSED)
6659 {
6660 if (TARGET_DUMPISIZE)
6661 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6662
6663 if (TARGET_RELAX)
6664 {
6665 rtx note;
6666
6667 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6668 if (note)
6669 {
6670 rtx pattern;
6671
6672 pattern = PATTERN (insn);
6673 if (GET_CODE (pattern) == PARALLEL)
6674 pattern = XVECEXP (pattern, 0, 0);
6675 switch (GET_CODE (pattern))
6676 {
6677 case SET:
6678 if (GET_CODE (SET_SRC (pattern)) != CALL
6679 && get_attr_type (insn) != TYPE_SFUNC)
6680 {
6681 targetm.asm_out.internal_label
6682 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6683 break;
6684 }
6685 /* else FALLTHROUGH */
6686 case CALL:
6687 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6688 CODE_LABEL_NUMBER (XEXP (note, 0)));
6689 break;
6690
6691 default:
6692 gcc_unreachable ();
6693 }
6694 }
6695 }
6696 }
6697
6698 /* Dump out any constants accumulated in the final pass. These will
6699 only be labels. */
6700 const char *
6701 output_jump_label_table (void)
6702 {
6703 int i;
6704
6705 if (pool_size)
6706 {
6707 fprintf (asm_out_file, "\t.align 2\n");
6708 for (i = 0; i < pool_size; i++)
6709 {
6710 pool_node *p = &pool_vector[i];
6711
6712 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6713 CODE_LABEL_NUMBER (p->label));
6714 output_asm_insn (".long %O0", &p->value);
6715 }
6716 pool_size = 0;
6717 }
6718
6719 return "";
6720 }
6721 \f
6722 /* A full frame looks like:
6723
6724 arg-5
6725 arg-4
6726 [ if current_function_anonymous_args
6727 arg-3
6728 arg-2
6729 arg-1
6730 arg-0 ]
6731 saved-fp
6732 saved-r10
6733 saved-r11
6734 saved-r12
6735 saved-pr
6736 local-n
6737 ..
6738 local-1
6739 local-0 <- fp points here.
6740
6741 Number of bytes pushed for anonymous args, used to pass information
6742 between expand_prologue and expand_epilogue.
6743
6744 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6745 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6746 for an epilogue and a negative value means that it's for a sibcall
6747 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6748 all the registers that are about to be restored, and hence dead. */
6749 static void
6750 output_stack_adjust (int size, rtx reg, int epilogue_p,
6751 HARD_REG_SET *live_regs_mask, bool frame_p)
6752 {
6753 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6754 if (size)
6755 {
6756 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6757
6758 /* This test is bogus, as output_stack_adjust is used to re-align the
6759 stack. */
6760 #if 0
6761 gcc_assert (!(size % align));
6762 #endif
6763
6764 if (CONST_OK_FOR_ADD (size))
6765 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6766 /* Try to do it with two partial adjustments; however, we must make
6767 sure that the stack is properly aligned at all times, in case
6768 an interrupt occurs between the two partial adjustments. */
6769 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6770 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6771 {
6772 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6773 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6774 }
6775 else
6776 {
6777 rtx const_reg;
6778 rtx insn;
6779 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6780 int i;
6781
6782 /* If TEMP is invalid, we could temporarily save a general
6783 register to MACL. However, there is currently no need
6784 to handle this case, so just die when we see it. */
6785 if (epilogue_p < 0
6786 || current_function_interrupt
6787 || ! call_really_used_regs[temp] || fixed_regs[temp])
6788 temp = -1;
6789 if (temp < 0 && ! current_function_interrupt
6790 && (TARGET_SHMEDIA || epilogue_p >= 0))
6791 {
6792 HARD_REG_SET temps;
6793 COPY_HARD_REG_SET (temps, call_used_reg_set);
6794 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6795 if (epilogue_p > 0)
6796 {
6797 int nreg = 0;
6798 if (crtl->return_rtx)
6799 {
6800 enum machine_mode mode;
6801 mode = GET_MODE (crtl->return_rtx);
6802 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6803 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6804 }
6805 for (i = 0; i < nreg; i++)
6806 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6807 if (crtl->calls_eh_return)
6808 {
6809 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6810 for (i = 0; i <= 3; i++)
6811 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6812 }
6813 }
6814 if (TARGET_SHMEDIA && epilogue_p < 0)
6815 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6816 CLEAR_HARD_REG_BIT (temps, i);
6817 if (epilogue_p <= 0)
6818 {
6819 for (i = FIRST_PARM_REG;
6820 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6821 CLEAR_HARD_REG_BIT (temps, i);
6822 if (cfun->static_chain_decl != NULL)
6823 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6824 }
6825 temp = scavenge_reg (&temps);
6826 }
6827 if (temp < 0 && live_regs_mask)
6828 {
6829 HARD_REG_SET temps;
6830
6831 COPY_HARD_REG_SET (temps, *live_regs_mask);
6832 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6833 temp = scavenge_reg (&temps);
6834 }
6835 if (temp < 0)
6836 {
6837 rtx adj_reg, tmp_reg, mem;
6838
6839 /* If we reached here, the most likely case is the (sibcall)
6840 epilogue for non SHmedia. Put a special push/pop sequence
6841 for such case as the last resort. This looks lengthy but
6842 would not be problem because it seems to be very
6843 rare. */
6844
6845 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6846
6847
6848 /* ??? There is still the slight possibility that r4 or
6849 r5 have been reserved as fixed registers or assigned
6850 as global registers, and they change during an
6851 interrupt. There are possible ways to handle this:
6852
6853 - If we are adjusting the frame pointer (r14), we can do
6854 with a single temp register and an ordinary push / pop
6855 on the stack.
6856 - Grab any call-used or call-saved registers (i.e. not
6857 fixed or globals) for the temps we need. We might
6858 also grab r14 if we are adjusting the stack pointer.
6859 If we can't find enough available registers, issue
6860 a diagnostic and die - the user must have reserved
6861 way too many registers.
6862 But since all this is rather unlikely to happen and
6863 would require extra testing, we just die if r4 / r5
6864 are not available. */
6865 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6866 && !global_regs[4] && !global_regs[5]);
6867
6868 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6869 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6870 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6871 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6872 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6873 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6874 emit_move_insn (mem, tmp_reg);
6875 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6876 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6877 emit_move_insn (mem, tmp_reg);
6878 emit_move_insn (reg, adj_reg);
6879 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6880 emit_move_insn (adj_reg, mem);
6881 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6882 emit_move_insn (tmp_reg, mem);
6883 /* Tell flow the insns that pop r4/r5 aren't dead. */
6884 emit_use (tmp_reg);
6885 emit_use (adj_reg);
6886 return;
6887 }
6888 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6889
6890 /* If SIZE is negative, subtract the positive value.
6891 This sometimes allows a constant pool entry to be shared
6892 between prologue and epilogue code. */
6893 if (size < 0)
6894 {
6895 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6896 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6897 }
6898 else
6899 {
6900 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6901 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6902 }
6903 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6904 gen_rtx_SET (VOIDmode, reg,
6905 gen_rtx_PLUS (SImode, reg,
6906 GEN_INT (size))));
6907 }
6908 }
6909 }
6910
6911 /* Emit the specified insn and mark it as frame related.
6912 FIXME: Rename this to emit_frame_insn. */
6913 static rtx
6914 frame_insn (rtx x)
6915 {
6916 x = emit_insn (x);
6917 RTX_FRAME_RELATED_P (x) = 1;
6918 return x;
6919 }
6920
6921 /* Output RTL to push register RN onto the stack. */
6922 static rtx
6923 push (int rn)
6924 {
6925 rtx x;
6926 if (rn == FPUL_REG)
6927 x = gen_push_fpul ();
6928 else if (rn == FPSCR_REG)
6929 x = gen_push_fpscr ();
6930 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6931 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6932 {
6933 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6934 return NULL_RTX;
6935 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6936 }
6937 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6938 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6939 else
6940 x = gen_push (gen_rtx_REG (SImode, rn));
6941
6942 x = frame_insn (x);
6943 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6944 return x;
6945 }
6946
6947 /* Output RTL to pop register RN from the stack. */
6948 static void
6949 pop (int rn)
6950 {
6951 rtx x, sp_reg, reg;
6952 if (rn == FPUL_REG)
6953 x = gen_pop_fpul ();
6954 else if (rn == FPSCR_REG)
6955 x = gen_pop_fpscr ();
6956 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6957 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6958 {
6959 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6960 return;
6961 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6962 }
6963 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6964 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6965 else
6966 x = gen_pop (gen_rtx_REG (SImode, rn));
6967
6968 x = emit_insn (x);
6969
6970 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6971 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6972 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6973 : SET_DEST (PATTERN (x)));
6974 add_reg_note (x, REG_CFA_RESTORE, reg);
6975 add_reg_note (x, REG_CFA_ADJUST_CFA,
6976 gen_rtx_SET (SImode, sp_reg,
6977 plus_constant (SImode, sp_reg,
6978 GET_MODE_SIZE (GET_MODE (reg)))));
6979 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6980 RTX_FRAME_RELATED_P (x) = 1;
6981 }
6982
6983 /* Generate code to push the regs specified in the mask. */
6984 static void
6985 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6986 {
6987 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6988 int skip_fpscr = 0;
6989
6990 /* Push PR last; this gives better latencies after the prologue, and
6991 candidates for the return delay slot when there are no general
6992 registers pushed. */
6993 for (; i < FIRST_PSEUDO_REGISTER; i++)
6994 {
6995 /* If this is an interrupt handler, and the SZ bit varies,
6996 and we have to push any floating point register, we need
6997 to switch to the correct precision first. */
6998 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6999 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7000 {
7001 HARD_REG_SET unsaved;
7002
7003 push (FPSCR_REG);
7004 COMPL_HARD_REG_SET (unsaved, *mask);
7005 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7006 skip_fpscr = 1;
7007 }
7008 if (i != PR_REG
7009 && (i != FPSCR_REG || ! skip_fpscr)
7010 && TEST_HARD_REG_BIT (*mask, i))
7011 {
7012 /* If the ISR has RESBANK attribute assigned, don't push any of
7013 the following registers - R0-R14, MACH, MACL and GBR. */
7014 if (! (sh_cfun_resbank_handler_p ()
7015 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7016 || i == MACH_REG
7017 || i == MACL_REG
7018 || i == GBR_REG)))
7019 push (i);
7020 }
7021 }
7022
7023 /* Push banked registers last to improve delay slot opportunities. */
7024 if (interrupt_handler)
7025 {
7026 bool use_movml = false;
7027
7028 if (TARGET_SH2A)
7029 {
7030 unsigned int count = 0;
7031
7032 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7033 if (TEST_HARD_REG_BIT (*mask, i))
7034 count++;
7035 else
7036 break;
7037
7038 /* Use movml when all banked registers are pushed. */
7039 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7040 use_movml = true;
7041 }
7042
7043 if (sh_cfun_resbank_handler_p ())
7044 ; /* Do nothing. */
7045 else if (use_movml)
7046 {
7047 rtx x, mem, reg, set;
7048 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7049
7050 /* We must avoid scheduling multiple store insn with another
7051 insns. */
7052 emit_insn (gen_blockage ());
7053 x = gen_movml_push_banked (sp_reg);
7054 x = frame_insn (x);
7055 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7056 {
7057 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7058 reg = gen_rtx_REG (SImode, i);
7059 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7060 }
7061
7062 set = gen_rtx_SET (SImode, sp_reg,
7063 plus_constant (Pmode, sp_reg, - 32));
7064 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7065 emit_insn (gen_blockage ());
7066 }
7067 else
7068 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7069 if (TEST_HARD_REG_BIT (*mask, i))
7070 push (i);
7071 }
7072
7073 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7074 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7075 push (PR_REG);
7076 }
7077
7078 /* Calculate how much extra space is needed to save all callee-saved
7079 target registers.
7080 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7081 static int
7082 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7083 {
7084 int reg;
7085 int stack_space = 0;
7086 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7087
7088 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7089 if ((! call_really_used_regs[reg] || interrupt_handler)
7090 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7091 /* Leave space to save this target register on the stack,
7092 in case target register allocation wants to use it. */
7093 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7094 return stack_space;
7095 }
7096
7097 /* Decide whether we should reserve space for callee-save target registers,
7098 in case target register allocation wants to use them. REGS_SAVED is
7099 the space, in bytes, that is already required for register saves.
7100 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7101 static int
7102 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7103 HARD_REG_SET *live_regs_mask)
7104 {
7105 if (optimize_size)
7106 return 0;
7107 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7108 }
7109
7110 /* Decide how much space to reserve for callee-save target registers
7111 in case target register allocation wants to use them.
7112 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7113 static int
7114 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7115 {
7116 if (shmedia_space_reserved_for_target_registers)
7117 return shmedia_target_regs_stack_space (live_regs_mask);
7118 else
7119 return 0;
7120 }
7121
7122 /* Work out the registers which need to be saved, both as a mask and a
7123 count of saved words. Return the count.
7124
7125 If doing a pragma interrupt function, then push all regs used by the
7126 function, and if we call another function (we can tell by looking at PR),
7127 make sure that all the regs it clobbers are safe too. */
7128 static int
7129 calc_live_regs (HARD_REG_SET *live_regs_mask)
7130 {
7131 unsigned int reg;
7132 int count;
7133 tree attrs;
7134 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7135 bool nosave_low_regs;
7136 int pr_live, has_call;
7137
7138 attrs = DECL_ATTRIBUTES (current_function_decl);
7139 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7140 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7141 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7142 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7143
7144 CLEAR_HARD_REG_SET (*live_regs_mask);
7145 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7146 && df_regs_ever_live_p (FPSCR_REG))
7147 target_flags &= ~MASK_FPU_SINGLE;
7148 /* If we can save a lot of saves by switching to double mode, do that. */
7149 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7150 && TARGET_FPU_SINGLE)
7151 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7152 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7153 && (! call_really_used_regs[reg]
7154 || interrupt_handler)
7155 && ++count > 2)
7156 {
7157 target_flags &= ~MASK_FPU_SINGLE;
7158 break;
7159 }
7160 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7161 knows how to use it. That means the pseudo originally allocated for
7162 the initial value can become the PR_MEDIA_REG hard register, as seen for
7163 execute/20010122-1.c:test9. */
7164 if (TARGET_SHMEDIA)
7165 /* ??? this function is called from initial_elimination_offset, hence we
7166 can't use the result of sh_media_register_for_return here. */
7167 pr_live = sh_pr_n_sets ();
7168 else
7169 {
7170 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7171 pr_live = (pr_initial
7172 ? (!REG_P (pr_initial)
7173 || REGNO (pr_initial) != (PR_REG))
7174 : df_regs_ever_live_p (PR_REG));
7175 /* For Shcompact, if not optimizing, we end up with a memory reference
7176 using the return address pointer for __builtin_return_address even
7177 though there is no actual need to put the PR register on the stack. */
7178 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7179 }
7180 /* Force PR to be live if the prologue has to call the SHmedia
7181 argument decoder or register saver. */
7182 if (TARGET_SHCOMPACT
7183 && ((crtl->args.info.call_cookie
7184 & ~ CALL_COOKIE_RET_TRAMP (1))
7185 || crtl->saves_all_registers))
7186 pr_live = 1;
7187 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7188 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7189 {
7190 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7191 ? pr_live
7192 : interrupt_handler
7193 ? (/* Need to save all the regs ever live. */
7194 (df_regs_ever_live_p (reg)
7195 || (call_really_used_regs[reg]
7196 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7197 || reg == PIC_OFFSET_TABLE_REGNUM)
7198 && has_call)
7199 || (TARGET_SHMEDIA && has_call
7200 && REGISTER_NATURAL_MODE (reg) == SImode
7201 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7202 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7203 && reg != RETURN_ADDRESS_POINTER_REGNUM
7204 && reg != T_REG && reg != GBR_REG
7205 /* Push fpscr only on targets which have FPU */
7206 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7207 : (/* Only push those regs which are used and need to be saved. */
7208 (TARGET_SHCOMPACT
7209 && flag_pic
7210 && crtl->args.info.call_cookie
7211 && reg == PIC_OFFSET_TABLE_REGNUM)
7212 || (df_regs_ever_live_p (reg)
7213 && ((!call_really_used_regs[reg]
7214 && !(reg != PIC_OFFSET_TABLE_REGNUM
7215 && fixed_regs[reg] && call_used_regs[reg]))
7216 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7217 || (crtl->calls_eh_return
7218 && (reg == EH_RETURN_DATA_REGNO (0)
7219 || reg == EH_RETURN_DATA_REGNO (1)
7220 || reg == EH_RETURN_DATA_REGNO (2)
7221 || reg == EH_RETURN_DATA_REGNO (3)))
7222 || ((reg == MACL_REG || reg == MACH_REG)
7223 && df_regs_ever_live_p (reg)
7224 && sh_cfun_attr_renesas_p ())
7225 ))
7226 {
7227 SET_HARD_REG_BIT (*live_regs_mask, reg);
7228 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7229
7230 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7231 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7232 {
7233 if (FP_REGISTER_P (reg))
7234 {
7235 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7236 {
7237 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7238 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7239 }
7240 }
7241 else if (XD_REGISTER_P (reg))
7242 {
7243 /* Must switch to double mode to access these registers. */
7244 target_flags &= ~MASK_FPU_SINGLE;
7245 }
7246 }
7247 }
7248 if (nosave_low_regs && reg == R8_REG)
7249 break;
7250 }
7251 /* If we have a target register optimization pass after prologue / epilogue
7252 threading, we need to assume all target registers will be live even if
7253 they aren't now. */
7254 if (flag_branch_target_load_optimize2
7255 && TARGET_SAVE_ALL_TARGET_REGS
7256 && shmedia_space_reserved_for_target_registers)
7257 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7258 if ((! call_really_used_regs[reg] || interrupt_handler)
7259 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7260 {
7261 SET_HARD_REG_BIT (*live_regs_mask, reg);
7262 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7263 }
7264 /* If this is an interrupt handler, we don't have any call-clobbered
7265 registers we can conveniently use for target register save/restore.
7266 Make sure we save at least one general purpose register when we need
7267 to save target registers. */
7268 if (interrupt_handler
7269 && hard_reg_set_intersect_p (*live_regs_mask,
7270 reg_class_contents[TARGET_REGS])
7271 && ! hard_reg_set_intersect_p (*live_regs_mask,
7272 reg_class_contents[GENERAL_REGS]))
7273 {
7274 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7275 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7276 }
7277
7278 return count;
7279 }
7280
7281 /* Code to generate prologue and epilogue sequences */
7282
7283 /* PUSHED is the number of bytes that are being pushed on the
7284 stack for register saves. Return the frame size, padded
7285 appropriately so that the stack stays properly aligned. */
7286 static HOST_WIDE_INT
7287 rounded_frame_size (int pushed)
7288 {
7289 HOST_WIDE_INT size = get_frame_size ();
7290 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7291
7292 if (ACCUMULATE_OUTGOING_ARGS)
7293 size += crtl->outgoing_args_size;
7294
7295 return ((size + pushed + align - 1) & -align) - pushed;
7296 }
7297
7298 /* Choose a call-clobbered target-branch register that remains
7299 unchanged along the whole function. We set it up as the return
7300 value in the prologue. */
7301 int
7302 sh_media_register_for_return (void)
7303 {
7304 int regno;
7305 int tr0_used;
7306
7307 if (! crtl->is_leaf)
7308 return -1;
7309 if (lookup_attribute ("interrupt_handler",
7310 DECL_ATTRIBUTES (current_function_decl)))
7311 return -1;
7312 if (sh_cfun_interrupt_handler_p ())
7313 return -1;
7314
7315 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7316
7317 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7318 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7319 return regno;
7320
7321 return -1;
7322 }
7323
7324 /* The maximum registers we need to save are:
7325 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7326 - 32 floating point registers (for each pair, we save none,
7327 one single precision value, or a double precision value).
7328 - 8 target registers
7329 - add 1 entry for a delimiter. */
7330 #define MAX_SAVED_REGS (62+32+8)
7331
7332 typedef struct save_entry_s
7333 {
7334 unsigned char reg;
7335 unsigned char mode;
7336 short offset;
7337 } save_entry;
7338
7339 #define MAX_TEMPS 4
7340
7341 /* There will be a delimiter entry with VOIDmode both at the start and the
7342 end of a filled in schedule. The end delimiter has the offset of the
7343 save with the smallest (i.e. most negative) offset. */
7344 typedef struct save_schedule_s
7345 {
7346 save_entry entries[MAX_SAVED_REGS + 2];
7347 int temps[MAX_TEMPS+1];
7348 } save_schedule;
7349
7350 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7351 use reverse order. Returns the last entry written to (not counting
7352 the delimiter). OFFSET_BASE is a number to be added to all offset
7353 entries. */
7354 static save_entry *
7355 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7356 int offset_base)
7357 {
7358 int align, i;
7359 save_entry *entry = schedule->entries;
7360 int tmpx = 0;
7361 int offset;
7362
7363 if (! current_function_interrupt)
7364 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7365 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7366 && ! FUNCTION_ARG_REGNO_P (i)
7367 && i != FIRST_RET_REG
7368 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7369 && ! (crtl->calls_eh_return
7370 && (i == EH_RETURN_STACKADJ_REGNO
7371 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7372 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7373 schedule->temps[tmpx++] = i;
7374 entry->reg = -1;
7375 entry->mode = VOIDmode;
7376 entry->offset = offset_base;
7377 entry++;
7378 /* We loop twice: first, we save 8-byte aligned registers in the
7379 higher addresses, that are known to be aligned. Then, we
7380 proceed to saving 32-bit registers that don't need 8-byte
7381 alignment.
7382 If this is an interrupt function, all registers that need saving
7383 need to be saved in full. moreover, we need to postpone saving
7384 target registers till we have saved some general purpose registers
7385 we can then use as scratch registers. */
7386 offset = offset_base;
7387 for (align = 1; align >= 0; align--)
7388 {
7389 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7390 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7391 {
7392 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7393 int reg = i;
7394
7395 if (current_function_interrupt)
7396 {
7397 if (TARGET_REGISTER_P (i))
7398 continue;
7399 if (GENERAL_REGISTER_P (i))
7400 mode = DImode;
7401 }
7402 if (mode == SFmode && (i % 2) == 1
7403 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7404 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7405 {
7406 mode = DFmode;
7407 i--;
7408 reg--;
7409 }
7410
7411 /* If we're doing the aligned pass and this is not aligned,
7412 or we're doing the unaligned pass and this is aligned,
7413 skip it. */
7414 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7415 != align)
7416 continue;
7417
7418 if (current_function_interrupt
7419 && GENERAL_REGISTER_P (i)
7420 && tmpx < MAX_TEMPS)
7421 schedule->temps[tmpx++] = i;
7422
7423 offset -= GET_MODE_SIZE (mode);
7424 entry->reg = i;
7425 entry->mode = mode;
7426 entry->offset = offset;
7427 entry++;
7428 }
7429 if (align && current_function_interrupt)
7430 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7431 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7432 {
7433 offset -= GET_MODE_SIZE (DImode);
7434 entry->reg = i;
7435 entry->mode = DImode;
7436 entry->offset = offset;
7437 entry++;
7438 }
7439 }
7440 entry->reg = -1;
7441 entry->mode = VOIDmode;
7442 entry->offset = offset;
7443 schedule->temps[tmpx] = -1;
7444 return entry - 1;
7445 }
7446
7447 /* Expand code for the function prologue. */
7448 void
7449 sh_expand_prologue (void)
7450 {
7451 HARD_REG_SET live_regs_mask;
7452 int d, i;
7453 int d_rounding = 0;
7454 int save_flags = target_flags;
7455 int pretend_args;
7456 int stack_usage;
7457 tree sp_switch_attr
7458 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7459
7460 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7461
7462 /* We have pretend args if we had an object sent partially in registers
7463 and partially on the stack, e.g. a large structure. */
7464 pretend_args = crtl->args.pretend_args_size;
7465 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7466 && (NPARM_REGS(SImode)
7467 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7468 pretend_args = 0;
7469
7470 output_stack_adjust (-pretend_args
7471 - crtl->args.info.stack_regs * 8,
7472 stack_pointer_rtx, 0, NULL, true);
7473 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7474
7475 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7476 /* We're going to use the PIC register to load the address of the
7477 incoming-argument decoder and/or of the return trampoline from
7478 the GOT, so make sure the PIC register is preserved and
7479 initialized. */
7480 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7481
7482 if (TARGET_SHCOMPACT
7483 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7484 {
7485 int reg;
7486
7487 /* First, make all registers with incoming arguments that will
7488 be pushed onto the stack live, so that register renaming
7489 doesn't overwrite them. */
7490 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7491 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7492 >= NPARM_REGS (SImode) - reg)
7493 for (; reg < NPARM_REGS (SImode); reg++)
7494 emit_insn (gen_shcompact_preserve_incoming_args
7495 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7496 else if (CALL_COOKIE_INT_REG_GET
7497 (crtl->args.info.call_cookie, reg) == 1)
7498 emit_insn (gen_shcompact_preserve_incoming_args
7499 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7500
7501 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7502 stack_pointer_rtx);
7503 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7504 GEN_INT (crtl->args.info.call_cookie));
7505 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7506 gen_rtx_REG (SImode, R0_REG));
7507 }
7508 else if (TARGET_SHMEDIA)
7509 {
7510 int tr = sh_media_register_for_return ();
7511
7512 if (tr >= 0)
7513 emit_move_insn (gen_rtx_REG (DImode, tr),
7514 gen_rtx_REG (DImode, PR_MEDIA_REG));
7515 }
7516
7517 /* Emit the code for SETUP_VARARGS. */
7518 if (cfun->stdarg)
7519 {
7520 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7521 {
7522 /* Push arg regs as if they'd been provided by caller in stack. */
7523 for (i = 0; i < NPARM_REGS(SImode); i++)
7524 {
7525 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7526
7527 if (i >= (NPARM_REGS(SImode)
7528 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7529 ))
7530 break;
7531 push (rn);
7532 stack_usage += GET_MODE_SIZE (SImode);
7533 }
7534 }
7535 }
7536
7537 /* If we're supposed to switch stacks at function entry, do so now. */
7538 if (sp_switch_attr)
7539 {
7540 rtx lab, newsrc;
7541 /* The argument specifies a variable holding the address of the
7542 stack the interrupt function should switch to/from at entry/exit. */
7543 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7544 const char *s
7545 = ggc_strdup (TREE_STRING_POINTER (arg));
7546 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7547
7548 lab = add_constant (sp_switch, SImode, 0);
7549 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7550
7551 emit_insn (gen_sp_switch_1 (newsrc));
7552 }
7553
7554 d = calc_live_regs (&live_regs_mask);
7555 /* ??? Maybe we could save some switching if we can move a mode switch
7556 that already happens to be at the function start into the prologue. */
7557 if (target_flags != save_flags && ! current_function_interrupt)
7558 emit_insn (gen_toggle_sz ());
7559
7560 if (TARGET_SH5)
7561 {
7562 int offset_base, offset;
7563 rtx r0 = NULL_RTX;
7564 int offset_in_r0 = -1;
7565 int sp_in_r0 = 0;
7566 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7567 int total_size, save_size;
7568 save_schedule schedule;
7569 save_entry *entry;
7570 int *tmp_pnt;
7571
7572 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7573 && ! current_function_interrupt)
7574 r0 = gen_rtx_REG (Pmode, R0_REG);
7575
7576 /* D is the actual number of bytes that we need for saving registers,
7577 however, in initial_elimination_offset we have committed to using
7578 an additional TREGS_SPACE amount of bytes - in order to keep both
7579 addresses to arguments supplied by the caller and local variables
7580 valid, we must keep this gap. Place it between the incoming
7581 arguments and the actually saved registers in a bid to optimize
7582 locality of reference. */
7583 total_size = d + tregs_space;
7584 total_size += rounded_frame_size (total_size);
7585 save_size = total_size - rounded_frame_size (d);
7586 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7587 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7588 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7589
7590 /* If adjusting the stack in a single step costs nothing extra, do so.
7591 I.e. either if a single addi is enough, or we need a movi anyway,
7592 and we don't exceed the maximum offset range (the test for the
7593 latter is conservative for simplicity). */
7594 if (TARGET_SHMEDIA
7595 && (CONST_OK_FOR_I10 (-total_size)
7596 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7597 && total_size <= 2044)))
7598 d_rounding = total_size - save_size;
7599
7600 offset_base = d + d_rounding;
7601
7602 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7603 0, NULL, true);
7604 stack_usage += save_size + d_rounding;
7605
7606 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7607 tmp_pnt = schedule.temps;
7608 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7609 {
7610 enum machine_mode mode = (enum machine_mode) entry->mode;
7611 unsigned int reg = entry->reg;
7612 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7613 rtx orig_reg_rtx;
7614
7615 offset = entry->offset;
7616
7617 reg_rtx = gen_rtx_REG (mode, reg);
7618
7619 mem_rtx = gen_frame_mem (mode,
7620 gen_rtx_PLUS (Pmode,
7621 stack_pointer_rtx,
7622 GEN_INT (offset)));
7623
7624 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7625 {
7626 gcc_assert (r0);
7627 mem_rtx = NULL_RTX;
7628 }
7629
7630 if (HAVE_PRE_DECREMENT
7631 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7632 || mem_rtx == NULL_RTX
7633 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7634 {
7635 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7636
7637 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7638 pre_dec = NULL_RTX;
7639 else
7640 {
7641 mem_rtx = NULL_RTX;
7642 offset += GET_MODE_SIZE (mode);
7643 }
7644 }
7645
7646 if (mem_rtx != NULL_RTX)
7647 goto addr_ok;
7648
7649 if (offset_in_r0 == -1)
7650 {
7651 emit_move_insn (r0, GEN_INT (offset));
7652 offset_in_r0 = offset;
7653 }
7654 else if (offset != offset_in_r0)
7655 {
7656 emit_move_insn (r0,
7657 gen_rtx_PLUS
7658 (Pmode, r0,
7659 GEN_INT (offset - offset_in_r0)));
7660 offset_in_r0 += offset - offset_in_r0;
7661 }
7662
7663 if (pre_dec != NULL_RTX)
7664 {
7665 if (! sp_in_r0)
7666 {
7667 emit_move_insn (r0,
7668 gen_rtx_PLUS
7669 (Pmode, r0, stack_pointer_rtx));
7670 sp_in_r0 = 1;
7671 }
7672
7673 offset -= GET_MODE_SIZE (mode);
7674 offset_in_r0 -= GET_MODE_SIZE (mode);
7675
7676 mem_rtx = pre_dec;
7677 }
7678 else if (sp_in_r0)
7679 mem_rtx = gen_frame_mem (mode, r0);
7680 else
7681 mem_rtx = gen_frame_mem (mode,
7682 gen_rtx_PLUS (Pmode,
7683 stack_pointer_rtx,
7684 r0));
7685
7686 /* We must not use an r0-based address for target-branch
7687 registers or for special registers without pre-dec
7688 memory addresses, since we store their values in r0
7689 first. */
7690 gcc_assert (!TARGET_REGISTER_P (reg)
7691 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7692 || mem_rtx == pre_dec));
7693
7694 addr_ok:
7695 orig_reg_rtx = reg_rtx;
7696 if (TARGET_REGISTER_P (reg)
7697 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7698 && mem_rtx != pre_dec))
7699 {
7700 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7701
7702 emit_move_insn (tmp_reg, reg_rtx);
7703
7704 if (REGNO (tmp_reg) == R0_REG)
7705 {
7706 offset_in_r0 = -1;
7707 sp_in_r0 = 0;
7708 gcc_assert (!refers_to_regno_p
7709 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7710 }
7711
7712 if (*++tmp_pnt <= 0)
7713 tmp_pnt = schedule.temps;
7714
7715 reg_rtx = tmp_reg;
7716 }
7717 {
7718 rtx insn;
7719
7720 /* Mark as interesting for dwarf cfi generator */
7721 insn = emit_move_insn (mem_rtx, reg_rtx);
7722 RTX_FRAME_RELATED_P (insn) = 1;
7723 /* If we use an intermediate register for the save, we can't
7724 describe this exactly in cfi as a copy of the to-be-saved
7725 register into the temporary register and then the temporary
7726 register on the stack, because the temporary register can
7727 have a different natural size than the to-be-saved register.
7728 Thus, we gloss over the intermediate copy and pretend we do
7729 a direct save from the to-be-saved register. */
7730 if (REGNO (reg_rtx) != reg)
7731 {
7732 rtx set;
7733
7734 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7735 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7736 }
7737
7738 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7739 {
7740 rtx reg_rtx = gen_rtx_REG (mode, reg);
7741 rtx set;
7742 rtx mem_rtx = gen_frame_mem (mode,
7743 gen_rtx_PLUS (Pmode,
7744 stack_pointer_rtx,
7745 GEN_INT (offset)));
7746
7747 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7748 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7749 }
7750 }
7751 }
7752
7753 gcc_assert (entry->offset == d_rounding);
7754 }
7755 else
7756 {
7757 push_regs (&live_regs_mask, current_function_interrupt);
7758 stack_usage += d;
7759 }
7760
7761 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7762 emit_insn (gen_GOTaddr2picreg ());
7763
7764 if (SHMEDIA_REGS_STACK_ADJUST ())
7765 {
7766 /* This must NOT go through the PLT, otherwise mach and macl
7767 may be clobbered. */
7768 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7769 (TARGET_FPU_ANY
7770 ? "__GCC_push_shmedia_regs"
7771 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7772 emit_insn (gen_shmedia_save_restore_regs_compact
7773 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7774 }
7775
7776 if (target_flags != save_flags && ! current_function_interrupt)
7777 emit_insn (gen_toggle_sz ());
7778
7779 target_flags = save_flags;
7780
7781 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7782 stack_pointer_rtx, 0, NULL, true);
7783 stack_usage += rounded_frame_size (d) - d_rounding;
7784
7785 if (frame_pointer_needed)
7786 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7787
7788 if (TARGET_SHCOMPACT
7789 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7790 {
7791 /* This must NOT go through the PLT, otherwise mach and macl
7792 may be clobbered. */
7793 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7794 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7795 emit_insn (gen_shcompact_incoming_args ());
7796 }
7797
7798 /* If we are profiling, make sure no instructions are scheduled before
7799 the call to mcount. Similarly if some call instructions are swapped
7800 before frame related insns, it'll confuse the unwinder because
7801 currently SH has no unwind info for function epilogues. */
7802 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7803 emit_insn (gen_blockage ());
7804
7805 if (flag_stack_usage_info)
7806 current_function_static_stack_size = stack_usage;
7807 }
7808
7809 /* Expand code for the function epilogue. */
7810 void
7811 sh_expand_epilogue (bool sibcall_p)
7812 {
7813 HARD_REG_SET live_regs_mask;
7814 int d, i;
7815 int d_rounding = 0;
7816
7817 int save_flags = target_flags;
7818 int frame_size, save_size;
7819 int fpscr_deferred = 0;
7820 int e = sibcall_p ? -1 : 1;
7821
7822 d = calc_live_regs (&live_regs_mask);
7823
7824 save_size = d;
7825 frame_size = rounded_frame_size (d);
7826
7827 if (TARGET_SH5)
7828 {
7829 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7830 int total_size;
7831 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7832 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7833 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7834
7835 total_size = d + tregs_space;
7836 total_size += rounded_frame_size (total_size);
7837 save_size = total_size - frame_size;
7838
7839 /* If adjusting the stack in a single step costs nothing extra, do so.
7840 I.e. either if a single addi is enough, or we need a movi anyway,
7841 and we don't exceed the maximum offset range (the test for the
7842 latter is conservative for simplicity). */
7843 if (TARGET_SHMEDIA
7844 && ! frame_pointer_needed
7845 && (CONST_OK_FOR_I10 (total_size)
7846 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7847 && total_size <= 2044)))
7848 d_rounding = frame_size;
7849
7850 frame_size -= d_rounding;
7851 }
7852
7853 if (frame_pointer_needed)
7854 {
7855 /* We must avoid scheduling the epilogue with previous basic blocks.
7856 See PR/18032 and PR/40313. */
7857 emit_insn (gen_blockage ());
7858 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7859 &live_regs_mask, true);
7860
7861 /* We must avoid moving the stack pointer adjustment past code
7862 which reads from the local frame, else an interrupt could
7863 occur after the SP adjustment and clobber data in the local
7864 frame. */
7865 emit_insn (gen_blockage ());
7866 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7867 }
7868 else if (frame_size)
7869 {
7870 /* We must avoid moving the stack pointer adjustment past code
7871 which reads from the local frame, else an interrupt could
7872 occur after the SP adjustment and clobber data in the local
7873 frame. */
7874 emit_insn (gen_blockage ());
7875 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7876 &live_regs_mask, true);
7877 }
7878
7879 if (SHMEDIA_REGS_STACK_ADJUST ())
7880 {
7881 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7882 (TARGET_FPU_ANY
7883 ? "__GCC_pop_shmedia_regs"
7884 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7885 /* This must NOT go through the PLT, otherwise mach and macl
7886 may be clobbered. */
7887 emit_insn (gen_shmedia_save_restore_regs_compact
7888 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7889 }
7890
7891 /* Pop all the registers. */
7892
7893 if (target_flags != save_flags && ! current_function_interrupt)
7894 emit_insn (gen_toggle_sz ());
7895 if (TARGET_SH5)
7896 {
7897 int offset_base, offset;
7898 int offset_in_r0 = -1;
7899 int sp_in_r0 = 0;
7900 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7901 save_schedule schedule;
7902 save_entry *entry;
7903 int *tmp_pnt;
7904
7905 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7906 offset_base = -entry[1].offset + d_rounding;
7907 tmp_pnt = schedule.temps;
7908 for (; entry->mode != VOIDmode; entry--)
7909 {
7910 enum machine_mode mode = (enum machine_mode) entry->mode;
7911 int reg = entry->reg;
7912 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7913
7914 offset = offset_base + entry->offset;
7915 reg_rtx = gen_rtx_REG (mode, reg);
7916
7917 mem_rtx = gen_frame_mem (mode,
7918 gen_rtx_PLUS (Pmode,
7919 stack_pointer_rtx,
7920 GEN_INT (offset)));
7921
7922 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7923 mem_rtx = NULL_RTX;
7924
7925 if (HAVE_POST_INCREMENT
7926 && (offset == offset_in_r0
7927 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7928 && mem_rtx == NULL_RTX)
7929 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7930 {
7931 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7932
7933 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7934 post_inc = NULL_RTX;
7935 else
7936 mem_rtx = NULL_RTX;
7937 }
7938
7939 if (mem_rtx != NULL_RTX)
7940 goto addr_ok;
7941
7942 if (offset_in_r0 == -1)
7943 {
7944 emit_move_insn (r0, GEN_INT (offset));
7945 offset_in_r0 = offset;
7946 }
7947 else if (offset != offset_in_r0)
7948 {
7949 emit_move_insn (r0,
7950 gen_rtx_PLUS
7951 (Pmode, r0,
7952 GEN_INT (offset - offset_in_r0)));
7953 offset_in_r0 += offset - offset_in_r0;
7954 }
7955
7956 if (post_inc != NULL_RTX)
7957 {
7958 if (! sp_in_r0)
7959 {
7960 emit_move_insn (r0,
7961 gen_rtx_PLUS
7962 (Pmode, r0, stack_pointer_rtx));
7963 sp_in_r0 = 1;
7964 }
7965
7966 mem_rtx = post_inc;
7967
7968 offset_in_r0 += GET_MODE_SIZE (mode);
7969 }
7970 else if (sp_in_r0)
7971 mem_rtx = gen_frame_mem (mode, r0);
7972 else
7973 mem_rtx = gen_frame_mem (mode,
7974 gen_rtx_PLUS (Pmode,
7975 stack_pointer_rtx,
7976 r0));
7977
7978 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7979 || mem_rtx == post_inc);
7980
7981 addr_ok:
7982 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7983 && mem_rtx != post_inc)
7984 {
7985 emit_move_insn (r0, mem_rtx);
7986 mem_rtx = r0;
7987 }
7988 else if (TARGET_REGISTER_P (reg))
7989 {
7990 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7991
7992 /* Give the scheduler a bit of freedom by using up to
7993 MAX_TEMPS registers in a round-robin fashion. */
7994 emit_move_insn (tmp_reg, mem_rtx);
7995 mem_rtx = tmp_reg;
7996 if (*++tmp_pnt < 0)
7997 tmp_pnt = schedule.temps;
7998 }
7999
8000 emit_move_insn (reg_rtx, mem_rtx);
8001 }
8002
8003 gcc_assert (entry->offset + offset_base == d + d_rounding);
8004 }
8005 else /* ! TARGET_SH5 */
8006 {
8007 int last_reg;
8008
8009 save_size = 0;
8010 /* For an ISR with RESBANK attribute assigned, don't pop PR
8011 register. */
8012 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8013 && !sh_cfun_resbank_handler_p ())
8014 {
8015 if (!frame_pointer_needed)
8016 emit_insn (gen_blockage ());
8017 pop (PR_REG);
8018 }
8019
8020 /* Banked registers are popped first to avoid being scheduled in the
8021 delay slot. RTE switches banks before the ds instruction. */
8022 if (current_function_interrupt)
8023 {
8024 bool use_movml = false;
8025
8026 if (TARGET_SH2A)
8027 {
8028 unsigned int count = 0;
8029
8030 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8031 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8032 count++;
8033 else
8034 break;
8035
8036 /* Use movml when all banked register are poped. */
8037 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8038 use_movml = true;
8039 }
8040
8041 if (sh_cfun_resbank_handler_p ())
8042 ; /* Do nothing. */
8043 else if (use_movml)
8044 {
8045 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8046
8047 /* We must avoid scheduling multiple load insn with another
8048 insns. */
8049 emit_insn (gen_blockage ());
8050 emit_insn (gen_movml_pop_banked (sp_reg));
8051 emit_insn (gen_blockage ());
8052 }
8053 else
8054 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8055 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8056 pop (i);
8057
8058 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8059 }
8060 else
8061 last_reg = FIRST_PSEUDO_REGISTER;
8062
8063 for (i = 0; i < last_reg; i++)
8064 {
8065 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8066
8067 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8068 && hard_reg_set_intersect_p (live_regs_mask,
8069 reg_class_contents[DF_REGS]))
8070 fpscr_deferred = 1;
8071 /* For an ISR with RESBANK attribute assigned, don't pop
8072 following registers, R0-R14, MACH, MACL and GBR. */
8073 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8074 && ! (sh_cfun_resbank_handler_p ()
8075 && ((j >= FIRST_GENERAL_REG
8076 && j < LAST_GENERAL_REG)
8077 || j == MACH_REG
8078 || j == MACL_REG
8079 || j == GBR_REG)))
8080 pop (j);
8081
8082 if (j == FIRST_FP_REG && fpscr_deferred)
8083 pop (FPSCR_REG);
8084 }
8085 }
8086 if (target_flags != save_flags && ! current_function_interrupt)
8087 emit_insn (gen_toggle_sz ());
8088 target_flags = save_flags;
8089
8090 output_stack_adjust (crtl->args.pretend_args_size
8091 + save_size + d_rounding
8092 + crtl->args.info.stack_regs * 8,
8093 stack_pointer_rtx, e, NULL, true);
8094
8095 if (crtl->calls_eh_return)
8096 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8097 EH_RETURN_STACKADJ_RTX));
8098
8099 /* Switch back to the normal stack if necessary. */
8100 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8101 emit_insn (gen_sp_switch_2 ());
8102
8103 /* Tell flow the insn that pops PR isn't dead. */
8104 /* PR_REG will never be live in SHmedia mode, and we don't need to
8105 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8106 by the return pattern. */
8107 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8108 emit_use (gen_rtx_REG (SImode, PR_REG));
8109 }
8110
8111 /* Emit code to change the current function's return address to RA.
8112 TEMP is available as a scratch register, if needed. */
8113 void
8114 sh_set_return_address (rtx ra, rtx tmp)
8115 {
8116 HARD_REG_SET live_regs_mask;
8117 int d;
8118 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8119 int pr_offset;
8120
8121 d = calc_live_regs (&live_regs_mask);
8122
8123 /* If pr_reg isn't life, we can set it (or the register given in
8124 sh_media_register_for_return) directly. */
8125 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8126 {
8127 rtx rr;
8128
8129 if (TARGET_SHMEDIA)
8130 {
8131 int rr_regno = sh_media_register_for_return ();
8132
8133 if (rr_regno < 0)
8134 rr_regno = pr_reg;
8135
8136 rr = gen_rtx_REG (DImode, rr_regno);
8137 }
8138 else
8139 rr = gen_rtx_REG (SImode, pr_reg);
8140
8141 emit_insn (GEN_MOV (rr, ra));
8142 /* Tell flow the register for return isn't dead. */
8143 emit_use (rr);
8144 return;
8145 }
8146
8147 if (TARGET_SH5)
8148 {
8149 int offset;
8150 save_schedule schedule;
8151 save_entry *entry;
8152
8153 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8154 offset = entry[1].offset;
8155 for (; entry->mode != VOIDmode; entry--)
8156 if (entry->reg == pr_reg)
8157 goto found;
8158
8159 /* We can't find pr register. */
8160 gcc_unreachable ();
8161
8162 found:
8163 offset = entry->offset - offset;
8164 pr_offset = (rounded_frame_size (d) + offset
8165 + SHMEDIA_REGS_STACK_ADJUST ());
8166 }
8167 else
8168 pr_offset = rounded_frame_size (d);
8169
8170 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8171
8172 if (frame_pointer_needed)
8173 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8174 else
8175 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8176
8177 tmp = gen_frame_mem (Pmode, tmp);
8178 emit_insn (GEN_MOV (tmp, ra));
8179 /* Tell this store isn't dead. */
8180 emit_use (tmp);
8181 }
8182
8183 /* Clear variables at function end. */
8184 static void
8185 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8186 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8187 {
8188 }
8189
8190 static rtx
8191 sh_builtin_saveregs (void)
8192 {
8193 /* First unnamed integer register. */
8194 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8195 /* Number of integer registers we need to save. */
8196 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8197 /* First unnamed SFmode float reg */
8198 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8199 /* Number of SFmode float regs to save. */
8200 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8201 rtx regbuf, fpregs;
8202 int bufsize, regno;
8203 alias_set_type alias_set;
8204
8205 if (TARGET_SH5)
8206 {
8207 if (n_intregs)
8208 {
8209 int pushregs = n_intregs;
8210
8211 while (pushregs < NPARM_REGS (SImode) - 1
8212 && (CALL_COOKIE_INT_REG_GET
8213 (crtl->args.info.call_cookie,
8214 NPARM_REGS (SImode) - pushregs)
8215 == 1))
8216 {
8217 crtl->args.info.call_cookie
8218 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8219 - pushregs, 1);
8220 pushregs++;
8221 }
8222
8223 if (pushregs == NPARM_REGS (SImode))
8224 crtl->args.info.call_cookie
8225 |= (CALL_COOKIE_INT_REG (0, 1)
8226 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8227 else
8228 crtl->args.info.call_cookie
8229 |= CALL_COOKIE_STACKSEQ (pushregs);
8230
8231 crtl->args.pretend_args_size += 8 * n_intregs;
8232 }
8233 if (TARGET_SHCOMPACT)
8234 return const0_rtx;
8235 }
8236
8237 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8238 {
8239 error ("__builtin_saveregs not supported by this subtarget");
8240 return const0_rtx;
8241 }
8242
8243 if (TARGET_SHMEDIA)
8244 n_floatregs = 0;
8245
8246 /* Allocate block of memory for the regs. */
8247 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8248 Or can assign_stack_local accept a 0 SIZE argument? */
8249 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8250
8251 if (TARGET_SHMEDIA)
8252 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8253 else if (n_floatregs & 1)
8254 {
8255 rtx addr;
8256
8257 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8258 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8259 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8260 regbuf = change_address (regbuf, BLKmode, addr);
8261 }
8262 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8263 {
8264 rtx addr, mask;
8265
8266 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8267 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8268 XEXP (regbuf, 0), 4));
8269 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8270 emit_insn (gen_andsi3 (addr, addr, mask));
8271 regbuf = change_address (regbuf, BLKmode, addr);
8272 }
8273 else
8274 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8275 alias_set = get_varargs_alias_set ();
8276 set_mem_alias_set (regbuf, alias_set);
8277
8278 /* Save int args.
8279 This is optimized to only save the regs that are necessary. Explicitly
8280 named args need not be saved. */
8281 if (n_intregs > 0)
8282 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8283 adjust_address (regbuf, BLKmode,
8284 n_floatregs * UNITS_PER_WORD),
8285 n_intregs);
8286
8287 if (TARGET_SHMEDIA)
8288 /* Return the address of the regbuf. */
8289 return XEXP (regbuf, 0);
8290
8291 /* Save float args.
8292 This is optimized to only save the regs that are necessary. Explicitly
8293 named args need not be saved.
8294 We explicitly build a pointer to the buffer because it halves the insn
8295 count when not optimizing (otherwise the pointer is built for each reg
8296 saved).
8297 We emit the moves in reverse order so that we can use predecrement. */
8298
8299 fpregs = copy_to_mode_reg (Pmode,
8300 plus_constant (Pmode, XEXP (regbuf, 0),
8301 n_floatregs * UNITS_PER_WORD));
8302 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8303 {
8304 rtx mem;
8305 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8306 {
8307 emit_insn (gen_addsi3 (fpregs, fpregs,
8308 GEN_INT (-2 * UNITS_PER_WORD)));
8309 mem = change_address (regbuf, DFmode, fpregs);
8310 emit_move_insn (mem,
8311 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8312 }
8313 regno = first_floatreg;
8314 if (regno & 1)
8315 {
8316 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8317 mem = change_address (regbuf, SFmode, fpregs);
8318 emit_move_insn (mem,
8319 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8320 + regno - SH_REG_MSW_OFFSET));
8321 }
8322 }
8323 else
8324 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8325 {
8326 rtx mem;
8327
8328 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8329 mem = change_address (regbuf, SFmode, fpregs);
8330 emit_move_insn (mem,
8331 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8332 }
8333
8334 /* Return the address of the regbuf. */
8335 return XEXP (regbuf, 0);
8336 }
8337
8338 /* Define the `__builtin_va_list' type for the ABI. */
8339 static tree
8340 sh_build_builtin_va_list (void)
8341 {
8342 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8343 tree record, type_decl;
8344
8345 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8346 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8347 return ptr_type_node;
8348
8349 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8350 type_decl = build_decl (BUILTINS_LOCATION,
8351 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8352
8353 f_next_o = build_decl (BUILTINS_LOCATION,
8354 FIELD_DECL, get_identifier ("__va_next_o"),
8355 ptr_type_node);
8356 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8357 FIELD_DECL,
8358 get_identifier ("__va_next_o_limit"),
8359 ptr_type_node);
8360 f_next_fp = build_decl (BUILTINS_LOCATION,
8361 FIELD_DECL, get_identifier ("__va_next_fp"),
8362 ptr_type_node);
8363 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8364 FIELD_DECL,
8365 get_identifier ("__va_next_fp_limit"),
8366 ptr_type_node);
8367 f_next_stack = build_decl (BUILTINS_LOCATION,
8368 FIELD_DECL, get_identifier ("__va_next_stack"),
8369 ptr_type_node);
8370
8371 DECL_FIELD_CONTEXT (f_next_o) = record;
8372 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8373 DECL_FIELD_CONTEXT (f_next_fp) = record;
8374 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8375 DECL_FIELD_CONTEXT (f_next_stack) = record;
8376
8377 TYPE_STUB_DECL (record) = type_decl;
8378 TYPE_NAME (record) = type_decl;
8379 TYPE_FIELDS (record) = f_next_o;
8380 DECL_CHAIN (f_next_o) = f_next_o_limit;
8381 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8382 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8383 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8384
8385 layout_type (record);
8386
8387 return record;
8388 }
8389
8390 /* Implement `va_start' for varargs and stdarg. */
8391 static void
8392 sh_va_start (tree valist, rtx nextarg)
8393 {
8394 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8395 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8396 tree t, u;
8397 int nfp, nint;
8398
8399 if (TARGET_SH5)
8400 {
8401 expand_builtin_saveregs ();
8402 std_expand_builtin_va_start (valist, nextarg);
8403 return;
8404 }
8405
8406 if ((! TARGET_SH2E && ! TARGET_SH4)
8407 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8408 {
8409 std_expand_builtin_va_start (valist, nextarg);
8410 return;
8411 }
8412
8413 f_next_o = TYPE_FIELDS (va_list_type_node);
8414 f_next_o_limit = DECL_CHAIN (f_next_o);
8415 f_next_fp = DECL_CHAIN (f_next_o_limit);
8416 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8417 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8418
8419 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8420 NULL_TREE);
8421 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8422 valist, f_next_o_limit, NULL_TREE);
8423 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8424 NULL_TREE);
8425 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8426 valist, f_next_fp_limit, NULL_TREE);
8427 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8428 valist, f_next_stack, NULL_TREE);
8429
8430 /* Call __builtin_saveregs. */
8431 u = make_tree (sizetype, expand_builtin_saveregs ());
8432 u = fold_convert (ptr_type_node, u);
8433 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8434 TREE_SIDE_EFFECTS (t) = 1;
8435 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8436
8437 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8438 if (nfp < 8)
8439 nfp = 8 - nfp;
8440 else
8441 nfp = 0;
8442 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8443 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8444 TREE_SIDE_EFFECTS (t) = 1;
8445 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8446
8447 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8448 TREE_SIDE_EFFECTS (t) = 1;
8449 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8450
8451 nint = crtl->args.info.arg_count[SH_ARG_INT];
8452 if (nint < 4)
8453 nint = 4 - nint;
8454 else
8455 nint = 0;
8456 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8457 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8458 TREE_SIDE_EFFECTS (t) = 1;
8459 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8460
8461 u = make_tree (ptr_type_node, nextarg);
8462 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8463 TREE_SIDE_EFFECTS (t) = 1;
8464 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8465 }
8466
8467 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8468 member, return it. */
8469 static tree
8470 find_sole_member (tree type)
8471 {
8472 tree field, member = NULL_TREE;
8473
8474 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8475 {
8476 if (TREE_CODE (field) != FIELD_DECL)
8477 continue;
8478 if (!DECL_SIZE (field))
8479 return NULL_TREE;
8480 if (integer_zerop (DECL_SIZE (field)))
8481 continue;
8482 if (member)
8483 return NULL_TREE;
8484 member = field;
8485 }
8486 return member;
8487 }
8488
8489 /* Implement `va_arg'. */
8490 static tree
8491 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8492 gimple_seq *post_p ATTRIBUTE_UNUSED)
8493 {
8494 HOST_WIDE_INT size, rsize;
8495 tree tmp, pptr_type_node;
8496 tree addr, lab_over = NULL, result = NULL;
8497 bool pass_by_ref;
8498 tree eff_type;
8499
8500 if (!VOID_TYPE_P (type))
8501 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8502 else
8503 pass_by_ref = false;
8504
8505 if (pass_by_ref)
8506 type = build_pointer_type (type);
8507
8508 size = int_size_in_bytes (type);
8509 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8510 pptr_type_node = build_pointer_type (ptr_type_node);
8511
8512 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8513 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8514 {
8515 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8516 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8517 int pass_as_float;
8518 tree lab_false;
8519 tree member;
8520
8521 f_next_o = TYPE_FIELDS (va_list_type_node);
8522 f_next_o_limit = DECL_CHAIN (f_next_o);
8523 f_next_fp = DECL_CHAIN (f_next_o_limit);
8524 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8525 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8526
8527 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8528 NULL_TREE);
8529 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8530 valist, f_next_o_limit, NULL_TREE);
8531 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8532 valist, f_next_fp, NULL_TREE);
8533 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8534 valist, f_next_fp_limit, NULL_TREE);
8535 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8536 valist, f_next_stack, NULL_TREE);
8537
8538 /* Structures with a single member with a distinct mode are passed
8539 like their member. This is relevant if the latter has a REAL_TYPE
8540 or COMPLEX_TYPE type. */
8541 eff_type = type;
8542 while (TREE_CODE (eff_type) == RECORD_TYPE
8543 && (member = find_sole_member (eff_type))
8544 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8545 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8546 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8547 {
8548 tree field_type = TREE_TYPE (member);
8549
8550 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8551 eff_type = field_type;
8552 else
8553 {
8554 gcc_assert ((TYPE_ALIGN (eff_type)
8555 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8556 || (TYPE_ALIGN (eff_type)
8557 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8558 break;
8559 }
8560 }
8561
8562 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8563 {
8564 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8565 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8566 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8567 && size <= 16));
8568 }
8569 else
8570 {
8571 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8572 }
8573
8574 addr = create_tmp_var (pptr_type_node, NULL);
8575 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8576 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8577
8578 valist = build_simple_mem_ref (addr);
8579
8580 if (pass_as_float)
8581 {
8582 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8583 tree cmp;
8584 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8585
8586 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8587 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8588
8589 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8590 tmp = next_fp_limit;
8591 if (size > 4 && !is_double)
8592 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8593 tmp = build2 (GE_EXPR, boolean_type_node,
8594 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8595 cmp = build3 (COND_EXPR, void_type_node, tmp,
8596 build1 (GOTO_EXPR, void_type_node,
8597 unshare_expr (lab_false)), NULL_TREE);
8598 if (!is_double)
8599 gimplify_and_add (cmp, pre_p);
8600
8601 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8602 || (is_double || size == 16))
8603 {
8604 tmp = fold_convert (sizetype, next_fp_tmp);
8605 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8606 size_int (UNITS_PER_WORD));
8607 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8608 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8609 }
8610 if (is_double)
8611 gimplify_and_add (cmp, pre_p);
8612
8613 #ifdef FUNCTION_ARG_SCmode_WART
8614 if (TYPE_MODE (eff_type) == SCmode
8615 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8616 {
8617 tree subtype = TREE_TYPE (eff_type);
8618 tree real, imag;
8619
8620 imag
8621 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8622 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8623
8624 real
8625 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8626 real = get_initialized_tmp_var (real, pre_p, NULL);
8627
8628 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8629 if (type != eff_type)
8630 result = build1 (VIEW_CONVERT_EXPR, type, result);
8631 result = get_initialized_tmp_var (result, pre_p, NULL);
8632 }
8633 #endif /* FUNCTION_ARG_SCmode_WART */
8634
8635 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8636 gimplify_and_add (tmp, pre_p);
8637
8638 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8639 gimplify_and_add (tmp, pre_p);
8640
8641 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8642 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8643 gimplify_assign (unshare_expr (next_fp_tmp),
8644 unshare_expr (valist), pre_p);
8645
8646 gimplify_assign (unshare_expr (valist),
8647 unshare_expr (next_fp_tmp), post_p);
8648 valist = next_fp_tmp;
8649 }
8650 else
8651 {
8652 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8653 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8654 unshare_expr (next_o_limit));
8655 tmp = build3 (COND_EXPR, void_type_node, tmp,
8656 build1 (GOTO_EXPR, void_type_node,
8657 unshare_expr (lab_false)),
8658 NULL_TREE);
8659 gimplify_and_add (tmp, pre_p);
8660
8661 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8662 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8663
8664 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8665 gimplify_and_add (tmp, pre_p);
8666
8667 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8668 gimplify_and_add (tmp, pre_p);
8669
8670 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8671 gimplify_assign (unshare_expr (next_o),
8672 unshare_expr (next_o_limit), pre_p);
8673
8674 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8675 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8676 }
8677
8678 if (!result)
8679 {
8680 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8681 gimplify_and_add (tmp, pre_p);
8682 }
8683 }
8684
8685 /* ??? In va-sh.h, there had been code to make values larger than
8686 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8687
8688 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8689 if (result)
8690 {
8691 gimplify_assign (result, tmp, pre_p);
8692 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8693 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8694 gimplify_and_add (tmp, pre_p);
8695 }
8696 else
8697 result = tmp;
8698
8699 if (pass_by_ref)
8700 result = build_va_arg_indirect_ref (result);
8701
8702 return result;
8703 }
8704
8705 /* 64 bit floating points memory transfers are paired single precision loads
8706 or store. So DWARF information needs fixing in little endian (unless
8707 PR=SZ=1 in FPSCR). */
8708 rtx
8709 sh_dwarf_register_span (rtx reg)
8710 {
8711 unsigned regno = REGNO (reg);
8712
8713 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8714 return NULL_RTX;
8715
8716 return
8717 gen_rtx_PARALLEL (VOIDmode,
8718 gen_rtvec (2,
8719 gen_rtx_REG (SFmode, regno + 1),
8720 gen_rtx_REG (SFmode, regno)));
8721 }
8722
8723 static enum machine_mode
8724 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8725 int *punsignedp, const_tree funtype,
8726 int for_return)
8727 {
8728 if (sh_promote_prototypes (funtype))
8729 return promote_mode (type, mode, punsignedp);
8730 else
8731 return default_promote_function_mode (type, mode, punsignedp, funtype,
8732 for_return);
8733 }
8734
8735 static bool
8736 sh_promote_prototypes (const_tree type)
8737 {
8738 if (TARGET_HITACHI)
8739 return false;
8740 if (! type)
8741 return true;
8742 return ! sh_attr_renesas_p (type);
8743 }
8744
8745 /* Whether an argument must be passed by reference. On SHcompact, we
8746 pretend arguments wider than 32-bits that would have been passed in
8747 registers are passed by reference, so that an SHmedia trampoline
8748 loads them into the full 64-bits registers. */
8749 static int
8750 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8751 const_tree type, bool named)
8752 {
8753 unsigned HOST_WIDE_INT size;
8754
8755 if (type)
8756 size = int_size_in_bytes (type);
8757 else
8758 size = GET_MODE_SIZE (mode);
8759
8760 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8761 && (!named
8762 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8763 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8764 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8765 && size > 4
8766 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8767 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8768 return size;
8769 else
8770 return 0;
8771 }
8772
8773 static bool
8774 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8775 const_tree type, bool named)
8776 {
8777 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8778
8779 if (targetm.calls.must_pass_in_stack (mode, type))
8780 return true;
8781
8782 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8783 wants to know about pass-by-reference semantics for incoming
8784 arguments. */
8785 if (! cum)
8786 return false;
8787
8788 if (TARGET_SHCOMPACT)
8789 {
8790 cum->byref = shcompact_byref (cum, mode, type, named);
8791 return cum->byref != 0;
8792 }
8793
8794 return false;
8795 }
8796
8797 static bool
8798 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8799 const_tree type, bool named ATTRIBUTE_UNUSED)
8800 {
8801 /* ??? How can it possibly be correct to return true only on the
8802 caller side of the equation? Is there someplace else in the
8803 sh backend that's magically producing the copies? */
8804 return (get_cumulative_args (cum)->outgoing
8805 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8806 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8807 }
8808
8809 static int
8810 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8811 tree type, bool named ATTRIBUTE_UNUSED)
8812 {
8813 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8814 int words = 0;
8815
8816 if (!TARGET_SH5
8817 && PASS_IN_REG_P (*cum, mode, type)
8818 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8819 && (ROUND_REG (*cum, mode)
8820 + (mode != BLKmode
8821 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8822 : ROUND_ADVANCE (int_size_in_bytes (type)))
8823 > NPARM_REGS (mode)))
8824 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8825
8826 else if (!TARGET_SHCOMPACT
8827 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8828 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8829
8830 return words * UNITS_PER_WORD;
8831 }
8832
8833
8834 /* Define where to put the arguments to a function.
8835 Value is zero to push the argument on the stack,
8836 or a hard register in which to store the argument.
8837
8838 MODE is the argument's machine mode.
8839 TYPE is the data type of the argument (as a tree).
8840 This is null for libcalls where that information may
8841 not be available.
8842 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8843 the preceding args and about the function being called.
8844 NAMED is nonzero if this argument is a named parameter
8845 (otherwise it is an extra parameter matching an ellipsis).
8846
8847 On SH the first args are normally in registers
8848 and the rest are pushed. Any arg that starts within the first
8849 NPARM_REGS words is at least partially passed in a register unless
8850 its data type forbids. */
8851 static rtx
8852 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8853 const_tree type, bool named)
8854 {
8855 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8856
8857 if (! TARGET_SH5 && mode == VOIDmode)
8858 return GEN_INT (ca->renesas_abi ? 1 : 0);
8859
8860 if (! TARGET_SH5
8861 && PASS_IN_REG_P (*ca, mode, type)
8862 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8863 {
8864 int regno;
8865
8866 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8867 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8868 {
8869 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8870 gen_rtx_REG (SFmode,
8871 BASE_ARG_REG (mode)
8872 + (ROUND_REG (*ca, mode) ^ 1)),
8873 const0_rtx);
8874 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8875 gen_rtx_REG (SFmode,
8876 BASE_ARG_REG (mode)
8877 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8878 GEN_INT (4));
8879 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8880 }
8881
8882 /* If the alignment of a DF value causes an SF register to be
8883 skipped, we will use that skipped register for the next SF
8884 value. */
8885 if ((TARGET_HITACHI || ca->renesas_abi)
8886 && ca->free_single_fp_reg
8887 && mode == SFmode)
8888 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8889
8890 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8891 ^ (mode == SFmode && TARGET_SH4
8892 && TARGET_LITTLE_ENDIAN
8893 && ! TARGET_HITACHI && ! ca->renesas_abi);
8894 return gen_rtx_REG (mode, regno);
8895
8896 }
8897
8898 if (TARGET_SH5)
8899 {
8900 if (mode == VOIDmode && TARGET_SHCOMPACT)
8901 return GEN_INT (ca->call_cookie);
8902
8903 /* The following test assumes unnamed arguments are promoted to
8904 DFmode. */
8905 if (mode == SFmode && ca->free_single_fp_reg)
8906 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8907
8908 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8909 && (named || ! ca->prototype_p)
8910 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8911 {
8912 if (! ca->prototype_p && TARGET_SHMEDIA)
8913 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8914
8915 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8916 FIRST_FP_PARM_REG
8917 + ca->arg_count[(int) SH_ARG_FLOAT]);
8918 }
8919
8920 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8921 && (! TARGET_SHCOMPACT
8922 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8923 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8924 type, named))))
8925 {
8926 return gen_rtx_REG (mode, (FIRST_PARM_REG
8927 + ca->arg_count[(int) SH_ARG_INT]));
8928 }
8929
8930 return NULL_RTX;
8931 }
8932
8933 return NULL_RTX;
8934 }
8935
8936 /* Update the data in CUM to advance over an argument
8937 of mode MODE and data type TYPE.
8938 (TYPE is null for libcalls where that information may not be
8939 available.) */
8940 static void
8941 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8942 const_tree type, bool named)
8943 {
8944 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8945
8946 if (ca->force_mem)
8947 ca->force_mem = 0;
8948 else if (TARGET_SH5)
8949 {
8950 const_tree type2 = (ca->byref && type
8951 ? TREE_TYPE (type)
8952 : type);
8953 enum machine_mode mode2 = (ca->byref && type
8954 ? TYPE_MODE (type2)
8955 : mode);
8956 int dwords = ((ca->byref
8957 ? ca->byref
8958 : mode2 == BLKmode
8959 ? int_size_in_bytes (type2)
8960 : GET_MODE_SIZE (mode2)) + 7) / 8;
8961 int numregs = MIN (dwords, NPARM_REGS (SImode)
8962 - ca->arg_count[(int) SH_ARG_INT]);
8963
8964 if (numregs)
8965 {
8966 ca->arg_count[(int) SH_ARG_INT] += numregs;
8967 if (TARGET_SHCOMPACT
8968 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8969 {
8970 ca->call_cookie
8971 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8972 - numregs, 1);
8973 /* N.B. We want this also for outgoing. */
8974 ca->stack_regs += numregs;
8975 }
8976 else if (ca->byref)
8977 {
8978 if (! ca->outgoing)
8979 ca->stack_regs += numregs;
8980 ca->byref_regs += numregs;
8981 ca->byref = 0;
8982 do
8983 ca->call_cookie
8984 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8985 - numregs, 2);
8986 while (--numregs);
8987 ca->call_cookie
8988 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8989 - 1, 1);
8990 }
8991 else if (dwords > numregs)
8992 {
8993 int pushregs = numregs;
8994
8995 if (TARGET_SHCOMPACT)
8996 ca->stack_regs += numregs;
8997 while (pushregs < NPARM_REGS (SImode) - 1
8998 && (CALL_COOKIE_INT_REG_GET
8999 (ca->call_cookie,
9000 NPARM_REGS (SImode) - pushregs)
9001 == 1))
9002 {
9003 ca->call_cookie
9004 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9005 - pushregs, 1);
9006 pushregs++;
9007 }
9008 if (numregs == NPARM_REGS (SImode))
9009 ca->call_cookie
9010 |= CALL_COOKIE_INT_REG (0, 1)
9011 | CALL_COOKIE_STACKSEQ (numregs - 1);
9012 else
9013 ca->call_cookie
9014 |= CALL_COOKIE_STACKSEQ (numregs);
9015 }
9016 }
9017 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9018 && (named || ! ca->prototype_p))
9019 {
9020 if (mode2 == SFmode && ca->free_single_fp_reg)
9021 ca->free_single_fp_reg = 0;
9022 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9023 < NPARM_REGS (SFmode))
9024 {
9025 int numfpregs
9026 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9027 NPARM_REGS (SFmode)
9028 - ca->arg_count[(int) SH_ARG_FLOAT]);
9029
9030 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9031
9032 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9033 {
9034 if (ca->outgoing && numregs > 0)
9035 do
9036 {
9037 ca->call_cookie
9038 |= (CALL_COOKIE_INT_REG
9039 (ca->arg_count[(int) SH_ARG_INT]
9040 - numregs + ((numfpregs - 2) / 2),
9041 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9042 - numfpregs) / 2));
9043 }
9044 while (numfpregs -= 2);
9045 }
9046 else if (mode2 == SFmode && (named)
9047 && (ca->arg_count[(int) SH_ARG_FLOAT]
9048 < NPARM_REGS (SFmode)))
9049 ca->free_single_fp_reg
9050 = FIRST_FP_PARM_REG - numfpregs
9051 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9052 }
9053 }
9054 return;
9055 }
9056
9057 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9058 {
9059 /* Note that we've used the skipped register. */
9060 if (mode == SFmode && ca->free_single_fp_reg)
9061 {
9062 ca->free_single_fp_reg = 0;
9063 return;
9064 }
9065 /* When we have a DF after an SF, there's an SF register that get
9066 skipped in order to align the DF value. We note this skipped
9067 register, because the next SF value will use it, and not the
9068 SF that follows the DF. */
9069 if (mode == DFmode
9070 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9071 {
9072 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9073 + BASE_ARG_REG (mode));
9074 }
9075 }
9076
9077 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9078 || PASS_IN_REG_P (*ca, mode, type))
9079 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9080 = (ROUND_REG (*ca, mode)
9081 + (mode == BLKmode
9082 ? ROUND_ADVANCE (int_size_in_bytes (type))
9083 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9084 }
9085
9086 /* The Renesas calling convention doesn't quite fit into this scheme since
9087 the address is passed like an invisible argument, but one that is always
9088 passed in memory. */
9089 static rtx
9090 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9091 {
9092 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9093 return NULL_RTX;
9094 return gen_rtx_REG (Pmode, 2);
9095 }
9096
9097 /* Worker function for TARGET_FUNCTION_VALUE.
9098
9099 For the SH, this is like LIBCALL_VALUE, except that we must change the
9100 mode like PROMOTE_MODE does.
9101 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9102 tested here has to be kept in sync with the one in
9103 explow.c:promote_mode. */
9104 static rtx
9105 sh_function_value (const_tree valtype,
9106 const_tree fn_decl_or_type,
9107 bool outgoing ATTRIBUTE_UNUSED)
9108 {
9109 if (fn_decl_or_type
9110 && !DECL_P (fn_decl_or_type))
9111 fn_decl_or_type = NULL;
9112
9113 return gen_rtx_REG (
9114 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9115 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9116 && (TREE_CODE (valtype) == INTEGER_TYPE
9117 || TREE_CODE (valtype) == ENUMERAL_TYPE
9118 || TREE_CODE (valtype) == BOOLEAN_TYPE
9119 || TREE_CODE (valtype) == REAL_TYPE
9120 || TREE_CODE (valtype) == OFFSET_TYPE))
9121 && sh_promote_prototypes (fn_decl_or_type)
9122 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9123 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9124 }
9125
9126 /* Worker function for TARGET_LIBCALL_VALUE. */
9127 static rtx
9128 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9129 {
9130 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9131 }
9132
9133 /* Return true if N is a possible register number of function value. */
9134 static bool
9135 sh_function_value_regno_p (const unsigned int regno)
9136 {
9137 return ((regno) == FIRST_RET_REG
9138 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9139 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9140 }
9141
9142 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9143 static bool
9144 sh_return_in_memory (const_tree type, const_tree fndecl)
9145 {
9146 if (TARGET_SH5)
9147 {
9148 if (TYPE_MODE (type) == BLKmode)
9149 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9150 else
9151 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9152 }
9153 else
9154 {
9155 return (TYPE_MODE (type) == BLKmode
9156 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9157 && TREE_CODE (type) == RECORD_TYPE));
9158 }
9159 }
9160
9161 /* We actually emit the code in sh_expand_prologue. We used to use
9162 a static variable to flag that we need to emit this code, but that
9163 doesn't when inlining, when functions are deferred and then emitted
9164 later. Fortunately, we already have two flags that are part of struct
9165 function that tell if a function uses varargs or stdarg. */
9166 static void
9167 sh_setup_incoming_varargs (cumulative_args_t ca,
9168 enum machine_mode mode,
9169 tree type,
9170 int *pretend_arg_size,
9171 int second_time ATTRIBUTE_UNUSED)
9172 {
9173 gcc_assert (cfun->stdarg);
9174 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9175 {
9176 int named_parm_regs, anon_parm_regs;
9177
9178 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9179 + (mode == BLKmode
9180 ? ROUND_ADVANCE (int_size_in_bytes (type))
9181 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9182 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9183 if (anon_parm_regs > 0)
9184 *pretend_arg_size = anon_parm_regs * 4;
9185 }
9186 }
9187
9188 static bool
9189 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9190 {
9191 return TARGET_SH5;
9192 }
9193
9194 static bool
9195 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9196 {
9197 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9198
9199 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9200 }
9201
9202
9203 /* Define the offset between two registers, one to be eliminated, and
9204 the other its replacement, at the start of a routine. */
9205 int
9206 initial_elimination_offset (int from, int to)
9207 {
9208 int regs_saved;
9209 int regs_saved_rounding = 0;
9210 int total_saved_regs_space;
9211 int total_auto_space;
9212 int save_flags = target_flags;
9213 int copy_flags;
9214 HARD_REG_SET live_regs_mask;
9215
9216 shmedia_space_reserved_for_target_registers = false;
9217 regs_saved = calc_live_regs (&live_regs_mask);
9218 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9219
9220 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9221 {
9222 shmedia_space_reserved_for_target_registers = true;
9223 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9224 }
9225
9226 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9227 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9228 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9229
9230 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9231 copy_flags = target_flags;
9232 target_flags = save_flags;
9233
9234 total_saved_regs_space = regs_saved + regs_saved_rounding;
9235
9236 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9237 return total_saved_regs_space + total_auto_space
9238 + crtl->args.info.byref_regs * 8;
9239
9240 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9241 return total_saved_regs_space + total_auto_space
9242 + crtl->args.info.byref_regs * 8;
9243
9244 /* Initial gap between fp and sp is 0. */
9245 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9246 return 0;
9247
9248 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9249 return rounded_frame_size (0);
9250
9251 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9252 return rounded_frame_size (0);
9253
9254 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9255 && (to == HARD_FRAME_POINTER_REGNUM
9256 || to == STACK_POINTER_REGNUM));
9257 if (TARGET_SH5)
9258 {
9259 int n = total_saved_regs_space;
9260 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9261 save_schedule schedule;
9262 save_entry *entry;
9263
9264 n += total_auto_space;
9265
9266 /* If it wasn't saved, there's not much we can do. */
9267 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9268 return n;
9269
9270 target_flags = copy_flags;
9271
9272 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9273 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9274 if (entry->reg == pr_reg)
9275 {
9276 target_flags = save_flags;
9277 return entry->offset;
9278 }
9279 gcc_unreachable ();
9280 }
9281 else
9282 return total_auto_space;
9283 }
9284
9285 /* Parse the -mfixed-range= option string. */
9286 void
9287 sh_fix_range (const char *const_str)
9288 {
9289 int i, first, last;
9290 char *str, *dash, *comma;
9291
9292 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9293 REG2 are either register names or register numbers. The effect
9294 of this option is to mark the registers in the range from REG1 to
9295 REG2 as ``fixed'' so they won't be used by the compiler. */
9296
9297 i = strlen (const_str);
9298 str = (char *) alloca (i + 1);
9299 memcpy (str, const_str, i + 1);
9300
9301 while (1)
9302 {
9303 dash = strchr (str, '-');
9304 if (!dash)
9305 {
9306 warning (0, "value of -mfixed-range must have form REG1-REG2");
9307 return;
9308 }
9309 *dash = '\0';
9310 comma = strchr (dash + 1, ',');
9311 if (comma)
9312 *comma = '\0';
9313
9314 first = decode_reg_name (str);
9315 if (first < 0)
9316 {
9317 warning (0, "unknown register name: %s", str);
9318 return;
9319 }
9320
9321 last = decode_reg_name (dash + 1);
9322 if (last < 0)
9323 {
9324 warning (0, "unknown register name: %s", dash + 1);
9325 return;
9326 }
9327
9328 *dash = '-';
9329
9330 if (first > last)
9331 {
9332 warning (0, "%s-%s is an empty range", str, dash + 1);
9333 return;
9334 }
9335
9336 for (i = first; i <= last; ++i)
9337 fixed_regs[i] = call_used_regs[i] = 1;
9338
9339 if (!comma)
9340 break;
9341
9342 *comma = ',';
9343 str = comma + 1;
9344 }
9345 }
9346 \f
9347 /* Insert any deferred function attributes from earlier pragmas. */
9348 static void
9349 sh_insert_attributes (tree node, tree *attributes)
9350 {
9351 tree attrs;
9352
9353 if (TREE_CODE (node) != FUNCTION_DECL)
9354 return;
9355
9356 /* We are only interested in fields. */
9357 if (!DECL_P (node))
9358 return;
9359
9360 /* Append the attributes to the deferred attributes. */
9361 *sh_deferred_function_attributes_tail = *attributes;
9362 attrs = sh_deferred_function_attributes;
9363 if (!attrs)
9364 return;
9365
9366 /* Some attributes imply or require the interrupt attribute. */
9367 if (!lookup_attribute ("interrupt_handler", attrs)
9368 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9369 {
9370 /* If we have a trapa_handler, but no interrupt_handler attribute,
9371 insert an interrupt_handler attribute. */
9372 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9373 /* We can't use sh_pr_interrupt here because that's not in the
9374 java frontend. */
9375 attrs
9376 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9377 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9378 if the interrupt attribute is missing, we ignore the attribute
9379 and warn. */
9380 else if (lookup_attribute ("sp_switch", attrs)
9381 || lookup_attribute ("trap_exit", attrs)
9382 || lookup_attribute ("nosave_low_regs", attrs)
9383 || lookup_attribute ("resbank", attrs))
9384 {
9385 tree *tail;
9386
9387 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9388 {
9389 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9390 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9391 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9392 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9393 warning (OPT_Wattributes,
9394 "%qE attribute only applies to interrupt functions",
9395 TREE_PURPOSE (attrs));
9396 else
9397 {
9398 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9399 NULL_TREE);
9400 tail = &TREE_CHAIN (*tail);
9401 }
9402 }
9403 attrs = *attributes;
9404 }
9405 }
9406
9407 /* Install the processed list. */
9408 *attributes = attrs;
9409
9410 /* Clear deferred attributes. */
9411 sh_deferred_function_attributes = NULL_TREE;
9412 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9413
9414 return;
9415 }
9416
9417 /*------------------------------------------------------------------------------
9418 Target specific attributes
9419 Supported attributes are:
9420
9421 * interrupt_handler
9422 Specifies this function is an interrupt handler.
9423
9424 * trapa_handler
9425 Like interrupt_handler, but don't save all registers.
9426
9427 * sp_switch
9428 Specifies an alternate stack for an interrupt handler to run on.
9429
9430 * trap_exit
9431 Use a trapa to exit an interrupt function instead of rte.
9432
9433 * nosave_low_regs
9434 Don't save r0..r7 in an interrupt handler function.
9435 This is useful on SH3* and SH4*, which have a separate set of low
9436 regs for user and privileged modes.
9437 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9438 those that run with interrupts disabled and thus can't be
9439 interrupted thenselves).
9440
9441 * renesas
9442 Use Renesas calling/layout conventions (functions and structures).
9443
9444 * resbank
9445 In case of an interrupt handler function, use a register bank to
9446 save registers R0-R14, MACH, MACL, GBR and PR.
9447 This is available only on SH2A targets.
9448
9449 * function_vector
9450 Declares a function to be called using the TBR relative addressing
9451 mode. Takes an argument that specifies the slot number in the table
9452 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9453 */
9454
9455 /* Handle a 'resbank' attribute. */
9456 static tree
9457 sh_handle_resbank_handler_attribute (tree * node, tree name,
9458 tree args ATTRIBUTE_UNUSED,
9459 int flags ATTRIBUTE_UNUSED,
9460 bool * no_add_attrs)
9461 {
9462 if (!TARGET_SH2A)
9463 {
9464 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9465 name);
9466 *no_add_attrs = true;
9467 }
9468 if (TREE_CODE (*node) != FUNCTION_DECL)
9469 {
9470 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9471 name);
9472 *no_add_attrs = true;
9473 }
9474
9475 return NULL_TREE;
9476 }
9477
9478 /* Handle an "interrupt_handler" attribute; arguments as in
9479 struct attribute_spec.handler. */
9480 static tree
9481 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9482 tree args ATTRIBUTE_UNUSED,
9483 int flags ATTRIBUTE_UNUSED,
9484 bool *no_add_attrs)
9485 {
9486 if (TREE_CODE (*node) != FUNCTION_DECL)
9487 {
9488 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9489 name);
9490 *no_add_attrs = true;
9491 }
9492 else if (TARGET_SHCOMPACT)
9493 {
9494 error ("attribute interrupt_handler is not compatible with -m5-compact");
9495 *no_add_attrs = true;
9496 }
9497
9498 return NULL_TREE;
9499 }
9500
9501 /* Handle an 'function_vector' attribute; arguments as in
9502 struct attribute_spec.handler. */
9503 static tree
9504 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9505 tree args ATTRIBUTE_UNUSED,
9506 int flags ATTRIBUTE_UNUSED,
9507 bool * no_add_attrs)
9508 {
9509 if (!TARGET_SH2A)
9510 {
9511 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9512 name);
9513 *no_add_attrs = true;
9514 }
9515 else if (TREE_CODE (*node) != FUNCTION_DECL)
9516 {
9517 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9518 name);
9519 *no_add_attrs = true;
9520 }
9521 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9522 {
9523 /* The argument must be a constant integer. */
9524 warning (OPT_Wattributes,
9525 "%qE attribute argument not an integer constant",
9526 name);
9527 *no_add_attrs = true;
9528 }
9529 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9530 {
9531 /* The argument value must be between 0 to 255. */
9532 warning (OPT_Wattributes,
9533 "%qE attribute argument should be between 0 to 255",
9534 name);
9535 *no_add_attrs = true;
9536 }
9537 return NULL_TREE;
9538 }
9539
9540 /* Returns true if current function has been assigned the attribute
9541 'function_vector'. */
9542 bool
9543 sh2a_is_function_vector_call (rtx x)
9544 {
9545 if (GET_CODE (x) == SYMBOL_REF
9546 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9547 {
9548 tree tr = SYMBOL_REF_DECL (x);
9549
9550 if (sh2a_function_vector_p (tr))
9551 return true;
9552 }
9553
9554 return false;
9555 }
9556
9557 /* Returns the function vector number, if the attribute
9558 'function_vector' is assigned, otherwise returns zero. */
9559 int
9560 sh2a_get_function_vector_number (rtx x)
9561 {
9562 int num;
9563 tree list, t;
9564
9565 if ((GET_CODE (x) == SYMBOL_REF)
9566 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9567 {
9568 t = SYMBOL_REF_DECL (x);
9569
9570 if (TREE_CODE (t) != FUNCTION_DECL)
9571 return 0;
9572
9573 list = SH_ATTRIBUTES (t);
9574 while (list)
9575 {
9576 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9577 {
9578 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9579 return num;
9580 }
9581
9582 list = TREE_CHAIN (list);
9583 }
9584
9585 return 0;
9586 }
9587 else
9588 return 0;
9589 }
9590
9591 /* Handle an "sp_switch" attribute; arguments as in
9592 struct attribute_spec.handler. */
9593 static tree
9594 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9595 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9596 {
9597 if (TREE_CODE (*node) != FUNCTION_DECL)
9598 {
9599 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9600 name);
9601 *no_add_attrs = true;
9602 }
9603 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9604 {
9605 /* The argument must be a constant string. */
9606 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9607 name);
9608 *no_add_attrs = true;
9609 }
9610
9611 return NULL_TREE;
9612 }
9613
9614 /* Handle an "trap_exit" attribute; arguments as in
9615 struct attribute_spec.handler. */
9616 static tree
9617 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9618 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9619 {
9620 if (TREE_CODE (*node) != FUNCTION_DECL)
9621 {
9622 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9623 name);
9624 *no_add_attrs = true;
9625 }
9626 /* The argument specifies a trap number to be used in a trapa instruction
9627 at function exit (instead of an rte instruction). */
9628 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9629 {
9630 /* The argument must be a constant integer. */
9631 warning (OPT_Wattributes, "%qE attribute argument not an "
9632 "integer constant", name);
9633 *no_add_attrs = true;
9634 }
9635
9636 return NULL_TREE;
9637 }
9638
9639 static tree
9640 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9641 tree name ATTRIBUTE_UNUSED,
9642 tree args ATTRIBUTE_UNUSED,
9643 int flags ATTRIBUTE_UNUSED,
9644 bool *no_add_attrs ATTRIBUTE_UNUSED)
9645 {
9646 return NULL_TREE;
9647 }
9648
9649 /* True if __attribute__((renesas)) or -mrenesas. */
9650 bool
9651 sh_attr_renesas_p (const_tree td)
9652 {
9653 if (TARGET_HITACHI)
9654 return true;
9655 if (td == NULL_TREE)
9656 return false;
9657 if (DECL_P (td))
9658 td = TREE_TYPE (td);
9659 if (td == error_mark_node)
9660 return false;
9661 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9662 != NULL_TREE);
9663 }
9664
9665 /* True if __attribute__((renesas)) or -mrenesas, for the current
9666 function. */
9667 bool
9668 sh_cfun_attr_renesas_p (void)
9669 {
9670 return sh_attr_renesas_p (current_function_decl);
9671 }
9672
9673 /* Returns true if the current function has the "interrupt_handler"
9674 attribute set. */
9675 bool
9676 sh_cfun_interrupt_handler_p (void)
9677 {
9678 return (lookup_attribute ("interrupt_handler",
9679 DECL_ATTRIBUTES (current_function_decl))
9680 != NULL_TREE);
9681 }
9682
9683 /* Returns true if FUNC has been assigned the attribute
9684 "function_vector". */
9685 bool
9686 sh2a_function_vector_p (tree func)
9687 {
9688 tree list;
9689 if (TREE_CODE (func) != FUNCTION_DECL)
9690 return false;
9691
9692 list = SH_ATTRIBUTES (func);
9693 while (list)
9694 {
9695 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9696 return true;
9697
9698 list = TREE_CHAIN (list);
9699 }
9700 return false;
9701 }
9702
9703 /* Returns true if given tree has the "resbank" attribute set. */
9704 bool
9705 sh_cfun_resbank_handler_p (void)
9706 {
9707 return ((lookup_attribute ("resbank",
9708 DECL_ATTRIBUTES (current_function_decl))
9709 != NULL_TREE)
9710 && (lookup_attribute ("interrupt_handler",
9711 DECL_ATTRIBUTES (current_function_decl))
9712 != NULL_TREE) && TARGET_SH2A);
9713 }
9714
9715 /* Returns true if the current function has a "trap_exit" attribute set. */
9716 bool
9717 sh_cfun_trap_exit_p (void)
9718 {
9719 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9720 != NULL_TREE;
9721 }
9722
9723 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9724 static const char *
9725 sh_check_pch_target_flags (int old_flags)
9726 {
9727 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9728 | MASK_SH_E | MASK_HARD_SH4
9729 | MASK_FPU_SINGLE | MASK_SH4))
9730 return _("created and used with different architectures / ABIs");
9731 if ((old_flags ^ target_flags) & MASK_HITACHI)
9732 return _("created and used with different ABIs");
9733 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9734 return _("created and used with different endianness");
9735 return NULL;
9736 }
9737 \f
9738 /* Predicates used by the templates. */
9739
9740 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9741 Used only in general_movsrc_operand. */
9742 bool
9743 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9744 {
9745 switch (REGNO (op))
9746 {
9747 case PR_REG:
9748 case MACL_REG:
9749 case MACH_REG:
9750 return true;
9751 }
9752 return false;
9753 }
9754
9755 /* Returns true if OP is a floating point value with value 0.0. */
9756 bool
9757 fp_zero_operand (rtx op)
9758 {
9759 REAL_VALUE_TYPE r;
9760
9761 if (GET_MODE (op) != SFmode)
9762 return false;
9763
9764 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9765 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9766 }
9767
9768 /* Returns true if OP is a floating point value with value 1.0. */
9769 bool
9770 fp_one_operand (rtx op)
9771 {
9772 REAL_VALUE_TYPE r;
9773
9774 if (GET_MODE (op) != SFmode)
9775 return false;
9776
9777 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9778 return REAL_VALUES_EQUAL (r, dconst1);
9779 }
9780
9781 /* In general mode switching is used. If we are
9782 compiling without -mfmovd, movsf_ie isn't taken into account for
9783 mode switching. We could check in machine_dependent_reorg for
9784 cases where we know we are in single precision mode, but there is
9785 interface to find that out during reload, so we must avoid
9786 choosing an fldi alternative during reload and thus failing to
9787 allocate a scratch register for the constant loading. */
9788 bool
9789 fldi_ok (void)
9790 {
9791 return true;
9792 }
9793
9794 /* Return the TLS type for TLS symbols. */
9795 enum tls_model
9796 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9797 {
9798 if (GET_CODE (op) != SYMBOL_REF)
9799 return TLS_MODEL_NONE;
9800 return SYMBOL_REF_TLS_MODEL (op);
9801 }
9802 \f
9803 /* Return the destination address of a branch. */
9804 static int
9805 branch_dest (rtx branch)
9806 {
9807 rtx dest = SET_SRC (PATTERN (branch));
9808 int dest_uid;
9809
9810 if (GET_CODE (dest) == IF_THEN_ELSE)
9811 dest = XEXP (dest, 1);
9812 dest = XEXP (dest, 0);
9813 dest_uid = INSN_UID (dest);
9814 return INSN_ADDRESSES (dest_uid);
9815 }
9816 \f
9817 /* Return nonzero if REG is not used after INSN.
9818 We assume REG is a reload reg, and therefore does
9819 not live past labels. It may live past calls or jumps though. */
9820 bool
9821 reg_unused_after (rtx reg, rtx insn)
9822 {
9823 enum rtx_code code;
9824 rtx set;
9825
9826 /* If the reg is set by this instruction, then it is safe for our
9827 case. Disregard the case where this is a store to memory, since
9828 we are checking a register used in the store address. */
9829 set = single_set (insn);
9830 if (set && !MEM_P (SET_DEST (set))
9831 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9832 return true;
9833
9834 while ((insn = NEXT_INSN (insn)))
9835 {
9836 rtx set;
9837 if (!INSN_P (insn))
9838 continue;
9839
9840 code = GET_CODE (insn);
9841
9842 #if 0
9843 /* If this is a label that existed before reload, then the register
9844 is dead here. However, if this is a label added by reorg, then
9845 the register may still be live here. We can't tell the difference,
9846 so we just ignore labels completely. */
9847 if (code == CODE_LABEL)
9848 return 1;
9849 /* else */
9850 #endif
9851
9852 if (code == JUMP_INSN)
9853 return false;
9854
9855 /* If this is a sequence, we must handle them all at once.
9856 We could have for instance a call that sets the target register,
9857 and an insn in a delay slot that uses the register. In this case,
9858 we must return 0. */
9859 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9860 {
9861 int i;
9862 int retval = 0;
9863
9864 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9865 {
9866 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9867 rtx set = single_set (this_insn);
9868
9869 if (CALL_P (this_insn))
9870 code = CALL_INSN;
9871 else if (JUMP_P (this_insn))
9872 {
9873 if (INSN_ANNULLED_BRANCH_P (this_insn))
9874 return false;
9875 code = JUMP_INSN;
9876 }
9877
9878 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9879 return false;
9880 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9881 {
9882 if (!MEM_P (SET_DEST (set)))
9883 retval = true;
9884 else
9885 return false;
9886 }
9887 if (set == NULL_RTX
9888 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9889 return false;
9890 }
9891 if (retval == 1)
9892 return true;
9893 else if (code == JUMP_INSN)
9894 return false;
9895 }
9896
9897 set = single_set (insn);
9898 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9899 return false;
9900 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9901 return !MEM_P (SET_DEST (set));
9902 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9903 return false;
9904
9905 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9906 return true;
9907 }
9908 return true;
9909 }
9910 \f
9911 #include "ggc.h"
9912
9913 static GTY(()) rtx t_reg_rtx;
9914 rtx
9915 get_t_reg_rtx (void)
9916 {
9917 if (! t_reg_rtx)
9918 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9919 return t_reg_rtx;
9920 }
9921
9922 static GTY(()) rtx fpscr_rtx;
9923 rtx
9924 get_fpscr_rtx (void)
9925 {
9926 if (! fpscr_rtx)
9927 {
9928 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9929 REG_USERVAR_P (fpscr_rtx) = 1;
9930 mark_user_reg (fpscr_rtx);
9931 }
9932 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9933 mark_user_reg (fpscr_rtx);
9934 return fpscr_rtx;
9935 }
9936
9937 static GTY(()) tree fpscr_values;
9938
9939 static void
9940 emit_fpu_switch (rtx scratch, int index)
9941 {
9942 rtx dst, src;
9943
9944 if (fpscr_values == NULL)
9945 {
9946 tree t;
9947
9948 t = build_index_type (integer_one_node);
9949 t = build_array_type (integer_type_node, t);
9950 t = build_decl (BUILTINS_LOCATION,
9951 VAR_DECL, get_identifier ("__fpscr_values"), t);
9952 DECL_ARTIFICIAL (t) = 1;
9953 DECL_IGNORED_P (t) = 1;
9954 DECL_EXTERNAL (t) = 1;
9955 TREE_STATIC (t) = 1;
9956 TREE_PUBLIC (t) = 1;
9957 TREE_USED (t) = 1;
9958
9959 fpscr_values = t;
9960 }
9961
9962 src = DECL_RTL (fpscr_values);
9963 if (!can_create_pseudo_p ())
9964 {
9965 emit_move_insn (scratch, XEXP (src, 0));
9966 if (index != 0)
9967 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9968 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9969 }
9970 else
9971 src = adjust_address (src, PSImode, index * 4);
9972
9973 dst = get_fpscr_rtx ();
9974 emit_move_insn (dst, src);
9975 }
9976
9977 void
9978 emit_sf_insn (rtx pat)
9979 {
9980 emit_insn (pat);
9981 }
9982
9983 void
9984 emit_df_insn (rtx pat)
9985 {
9986 emit_insn (pat);
9987 }
9988
9989 void
9990 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9991 {
9992 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9993 }
9994
9995 void
9996 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9997 {
9998 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9999 get_fpscr_rtx ()));
10000 }
10001
10002 void
10003 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10004 {
10005 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10006 }
10007
10008 void
10009 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10010 {
10011 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10012 get_fpscr_rtx ()));
10013 }
10014 \f
10015 static rtx get_free_reg (HARD_REG_SET);
10016
10017 /* This function returns a register to use to load the address to load
10018 the fpscr from. Currently it always returns r1 or r7, but when we are
10019 able to use pseudo registers after combine, or have a better mechanism
10020 for choosing a register, it should be done here. */
10021 /* REGS_LIVE is the liveness information for the point for which we
10022 need this allocation. In some bare-bones exit blocks, r1 is live at the
10023 start. We can even have all of r0..r3 being live:
10024 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10025 INSN before which new insns are placed with will clobber the register
10026 we return. If a basic block consists only of setting the return value
10027 register to a pseudo and using that register, the return value is not
10028 live before or after this block, yet we we'll insert our insns right in
10029 the middle. */
10030 static rtx
10031 get_free_reg (HARD_REG_SET regs_live)
10032 {
10033 if (! TEST_HARD_REG_BIT (regs_live, 1))
10034 return gen_rtx_REG (Pmode, 1);
10035
10036 /* Hard reg 1 is live; since this is a small register classes target,
10037 there shouldn't be anything but a jump before the function end. */
10038 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10039 return gen_rtx_REG (Pmode, 7);
10040 }
10041
10042 /* This function will set the fpscr from memory.
10043 MODE is the mode we are setting it to. */
10044 void
10045 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10046 {
10047 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10048 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10049 rtx addr_reg;
10050
10051 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10052 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10053 }
10054
10055 /* Is the given character a logical line separator for the assembler? */
10056 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10057 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10058 #endif
10059
10060 static bool
10061 sequence_insn_p (rtx insn)
10062 {
10063 rtx prev, next;
10064
10065 prev = PREV_INSN (insn);
10066 if (prev == NULL)
10067 return false;
10068
10069 next = NEXT_INSN (prev);
10070 if (next == NULL)
10071 return false;
10072
10073 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10074 }
10075
10076 int
10077 sh_insn_length_adjustment (rtx insn)
10078 {
10079 /* Instructions with unfilled delay slots take up an extra two bytes for
10080 the nop in the delay slot. */
10081 if (((NONJUMP_INSN_P (insn)
10082 && GET_CODE (PATTERN (insn)) != USE
10083 && GET_CODE (PATTERN (insn)) != CLOBBER)
10084 || CALL_P (insn) || JUMP_P (insn))
10085 && ! sequence_insn_p (insn)
10086 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10087 return 2;
10088
10089 /* SH2e has a bug that prevents the use of annulled branches, so if
10090 the delay slot is not filled, we'll have to put a NOP in it. */
10091 if (sh_cpu_attr == CPU_SH2E
10092 && JUMP_P (insn)
10093 && get_attr_type (insn) == TYPE_CBRANCH
10094 && ! sequence_insn_p (insn))
10095 return 2;
10096
10097 /* sh-dsp parallel processing insn take four bytes instead of two. */
10098
10099 if (NONJUMP_INSN_P (insn))
10100 {
10101 int sum = 0;
10102 rtx body = PATTERN (insn);
10103 const char *templ;
10104 char c;
10105 bool maybe_label = true;
10106
10107 if (GET_CODE (body) == ASM_INPUT)
10108 templ = XSTR (body, 0);
10109 else if (asm_noperands (body) >= 0)
10110 templ
10111 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10112 else
10113 return 0;
10114 do
10115 {
10116 int ppi_adjust = 0;
10117
10118 do
10119 c = *templ++;
10120 while (c == ' ' || c == '\t');
10121 /* all sh-dsp parallel-processing insns start with p.
10122 The only non-ppi sh insn starting with p is pref.
10123 The only ppi starting with pr is prnd. */
10124 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10125 ppi_adjust = 2;
10126 /* The repeat pseudo-insn expands two three insns, a total of
10127 six bytes in size. */
10128 else if ((c == 'r' || c == 'R')
10129 && ! strncasecmp ("epeat", templ, 5))
10130 ppi_adjust = 4;
10131 while (c && c != '\n'
10132 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10133 {
10134 /* If this is a label, it is obviously not a ppi insn. */
10135 if (c == ':' && maybe_label)
10136 {
10137 ppi_adjust = 0;
10138 break;
10139 }
10140 else if (c == '\'' || c == '"')
10141 maybe_label = false;
10142 c = *templ++;
10143 }
10144 sum += ppi_adjust;
10145 maybe_label = c != ':';
10146 }
10147 while (c);
10148 return sum;
10149 }
10150 return 0;
10151 }
10152 \f
10153 /* Return TRUE for a valid displacement for the REG+disp addressing
10154 with MODE. */
10155 bool
10156 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10157 bool allow_zero)
10158 {
10159 if (! CONST_INT_P (op))
10160 return false;
10161
10162 if (TARGET_SHMEDIA)
10163 {
10164 int size;
10165
10166 /* Check if this is the address of an unaligned load / store. */
10167 if (mode == VOIDmode)
10168 return satisfies_constraint_I06 (op);
10169
10170 size = GET_MODE_SIZE (mode);
10171 return (!(INTVAL (op) & (size - 1))
10172 && INTVAL (op) >= -512 * size
10173 && INTVAL (op) < 512 * size);
10174 }
10175 else
10176 {
10177 const HOST_WIDE_INT offset = INTVAL (op);
10178 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
10179 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10180
10181 /* If the mode does not support any displacement always return false.
10182 Even though an index of '0' is actually always valid, it will cause
10183 troubles when e.g. a DFmode move is split into two SFmode moves,
10184 where one SFmode move will have index '0' and the other move will
10185 have index '4'. */
10186 if (!allow_zero && max_disp < 1)
10187 return false;
10188
10189 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10190 }
10191 }
10192
10193 /* Recognize an RTL expression that is a valid memory address for
10194 an instruction.
10195 The MODE argument is the machine mode for the MEM expression
10196 that wants to use this address.
10197 Allow REG
10198 REG+disp
10199 REG+r0
10200 REG++
10201 --REG
10202 GBR
10203 GBR+disp */
10204 static bool
10205 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10206 {
10207 if (REG_P (x) && REGNO (x) == GBR_REG)
10208 return true;
10209
10210 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10211 return true;
10212 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10213 && ! TARGET_SHMEDIA
10214 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10215 return true;
10216 else if (GET_CODE (x) == PLUS
10217 && (mode != PSImode || reload_completed))
10218 {
10219 rtx xop0 = XEXP (x, 0);
10220 rtx xop1 = XEXP (x, 1);
10221
10222 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10223 return gbr_displacement (xop1, mode);
10224
10225 if (GET_MODE_SIZE (mode) <= 8
10226 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10227 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10228 return true;
10229
10230 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10231 || ((xop0 == stack_pointer_rtx
10232 || xop0 == hard_frame_pointer_rtx)
10233 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10234 || ((xop1 == stack_pointer_rtx
10235 || xop1 == hard_frame_pointer_rtx)
10236 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10237 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10238 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10239 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10240 && TARGET_FMOVD && mode == DFmode)))
10241 {
10242 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10243 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10244 return true;
10245 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10246 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10247 return true;
10248 }
10249 }
10250
10251 return false;
10252 }
10253 \f
10254 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10255 isn't protected by a PIC unspec. */
10256 bool
10257 nonpic_symbol_mentioned_p (rtx x)
10258 {
10259 const char *fmt;
10260 int i;
10261
10262 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10263 || GET_CODE (x) == PC)
10264 return true;
10265
10266 /* We don't want to look into the possible MEM location of a
10267 CONST_DOUBLE, since we're not going to use it, in general. */
10268 if (GET_CODE (x) == CONST_DOUBLE)
10269 return false;
10270
10271 if (GET_CODE (x) == UNSPEC
10272 && (XINT (x, 1) == UNSPEC_PIC
10273 || XINT (x, 1) == UNSPEC_GOT
10274 || XINT (x, 1) == UNSPEC_GOTOFF
10275 || XINT (x, 1) == UNSPEC_GOTPLT
10276 || XINT (x, 1) == UNSPEC_GOTTPOFF
10277 || XINT (x, 1) == UNSPEC_DTPOFF
10278 || XINT (x, 1) == UNSPEC_TPOFF
10279 || XINT (x, 1) == UNSPEC_PLT
10280 || XINT (x, 1) == UNSPEC_SYMOFF
10281 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10282 return false;
10283
10284 fmt = GET_RTX_FORMAT (GET_CODE (x));
10285 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10286 {
10287 if (fmt[i] == 'E')
10288 {
10289 int j;
10290 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10291 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10292 return true;
10293 }
10294 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10295 return true;
10296 }
10297
10298 return false;
10299 }
10300
10301 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10302 @GOTOFF in `reg'. */
10303 rtx
10304 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10305 rtx reg)
10306 {
10307 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10308 return orig;
10309
10310 if (GET_CODE (orig) == LABEL_REF
10311 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10312 {
10313 if (reg == NULL_RTX)
10314 reg = gen_reg_rtx (Pmode);
10315
10316 emit_insn (gen_symGOTOFF2reg (reg, orig));
10317 return reg;
10318 }
10319 else if (GET_CODE (orig) == SYMBOL_REF)
10320 {
10321 if (reg == NULL_RTX)
10322 reg = gen_reg_rtx (Pmode);
10323
10324 emit_insn (gen_symGOT2reg (reg, orig));
10325 return reg;
10326 }
10327 return orig;
10328 }
10329
10330 /* Given a (logical) mode size and an offset in bytes, try to find a the
10331 appropriate displacement value for a mov insn. On SH the displacements
10332 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10333 15 bytes in QImode. To compensate this we create a new base address by
10334 adding an adjustment value to it.
10335
10336 If the originally requested offset is greater than 127 we prefer using
10337 values 124..127 over 128..131 to increase opportunities to use the
10338 add #imm, Rn insn.
10339
10340 In some cases it is possible that a requested offset might seem unaligned
10341 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10342 This is compensated by adjusting the base address so that the effective
10343 address of the displacement move insn will be aligned.
10344
10345 This is not the best possible way of rebasing the base address, as it
10346 does not look at other present displacement addressings around it.
10347 In some cases this can create more base address adjustments than would
10348 actually be necessary. */
10349 struct disp_adjust
10350 {
10351 rtx offset_adjust;
10352 rtx mov_disp;
10353 };
10354
10355 static struct disp_adjust
10356 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10357 {
10358 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10359
10360 /* Do not try to use SH2A's large displacements here, because this would
10361 effectively disable the small displacement insns. */
10362 const int mode_sz = GET_MODE_SIZE (mode);
10363 const int mov_insn_sz = mov_insn_size (mode, false);
10364 const int max_disp = max_mov_insn_displacement (mode, false);
10365 const int max_disp_next = max_disp + mov_insn_sz;
10366 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10367 HOST_WIDE_INT offset_adjust;
10368
10369 /* In some cases this actually does happen and we must check for it. */
10370 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10371 return res;
10372
10373 /* Keeps the previous behavior for QImode displacement addressing.
10374 This just decides how the offset is re-based. Removing this special
10375 case will result in slightly bigger code on average, but it's not that
10376 bad actually. */
10377 if (mov_insn_sz == 1)
10378 align_modifier = 0;
10379
10380 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10381
10382 if (mode_sz + offset - offset_adjust <= max_disp_next)
10383 {
10384 res.offset_adjust = GEN_INT (offset_adjust);
10385 res.mov_disp = GEN_INT (offset - offset_adjust);
10386 }
10387
10388 return res;
10389 }
10390
10391 /* Try to modify an illegitimate address and make it legitimate.
10392 If we find one, return the new, valid address.
10393 Otherwise, return the original address. */
10394 static rtx
10395 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10396 {
10397 if (flag_pic)
10398 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10399
10400 if (TARGET_SHMEDIA)
10401 return x;
10402
10403 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10404 || (TARGET_SH2E && mode == SFmode))
10405 return x;
10406
10407 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10408 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10409 {
10410 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10411 INTVAL (XEXP (x, 1)));
10412
10413 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10414 {
10415 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10416 adj.offset_adjust, NULL_RTX, 0,
10417 OPTAB_LIB_WIDEN);
10418 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10419 }
10420 }
10421
10422 return x;
10423 }
10424
10425 /* Attempt to replace *p, which is an address that needs reloading, with
10426 a valid memory address for an operand of mode MODE.
10427 Like for sh_legitimize_address, for the SH we try to get a normal form
10428 of the address. That will allow inheritance of the address reloads. */
10429 bool
10430 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10431 int itype)
10432 {
10433 enum reload_type type = (enum reload_type) itype;
10434 const int mode_sz = GET_MODE_SIZE (mode);
10435
10436 if (TARGET_SHMEDIA)
10437 return false;
10438
10439 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10440 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10441 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10442 && (ALLOW_INDEXED_ADDRESS
10443 || XEXP (*p, 0) == stack_pointer_rtx
10444 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10445 {
10446 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10447 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10448
10449 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10450 {
10451 push_reload (*p, NULL_RTX, p, NULL,
10452 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10453 return true;
10454 }
10455
10456 if (TARGET_SH2E && mode == SFmode)
10457 {
10458 *p = copy_rtx (*p);
10459 push_reload (*p, NULL_RTX, p, NULL,
10460 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10461 return true;
10462 }
10463
10464 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10465 moves because then reload has a problem figuring the constraint
10466 that the move insn target/source reg must be R0.
10467 Or maybe some handling is wrong in sh_secondary_reload for this
10468 to work properly? */
10469 if ((mode_sz == 4 || mode_sz == 8)
10470 && ! (TARGET_SH4 && mode == DFmode)
10471 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10472 {
10473 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10474 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10475 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10476 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10477 return true;
10478 }
10479 }
10480
10481 /* We must re-recognize what we created before. */
10482 if (GET_CODE (*p) == PLUS
10483 && (mode_sz == 4 || mode_sz == 8)
10484 && GET_CODE (XEXP (*p, 0)) == PLUS
10485 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10486 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10487 && CONST_INT_P (XEXP (*p, 1))
10488 && ! (TARGET_SH2E && mode == SFmode))
10489 {
10490 /* Because this address is so complex, we know it must have
10491 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10492 it is already unshared, and needs no further unsharing. */
10493 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10494 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10495 return true;
10496 }
10497
10498 return false;
10499 }
10500
10501 /* In the name of slightly smaller debug output, and to cater to
10502 general assembler lossage, recognize various UNSPEC sequences
10503 and turn them back into a direct symbol reference. */
10504 static rtx
10505 sh_delegitimize_address (rtx orig_x)
10506 {
10507 rtx x, y;
10508
10509 orig_x = delegitimize_mem_from_attrs (orig_x);
10510
10511 x = orig_x;
10512 if (MEM_P (x))
10513 x = XEXP (x, 0);
10514 if (GET_CODE (x) == CONST)
10515 {
10516 y = XEXP (x, 0);
10517 if (GET_CODE (y) == UNSPEC)
10518 {
10519 if (XINT (y, 1) == UNSPEC_GOT
10520 || XINT (y, 1) == UNSPEC_GOTOFF
10521 || XINT (y, 1) == UNSPEC_SYMOFF)
10522 return XVECEXP (y, 0, 0);
10523 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10524 {
10525 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10526 {
10527 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10528
10529 if (GET_CODE (symplt) == UNSPEC
10530 && XINT (symplt, 1) == UNSPEC_PLT)
10531 return XVECEXP (symplt, 0, 0);
10532 }
10533 }
10534 else if (TARGET_SHMEDIA
10535 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10536 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10537 {
10538 rtx offset = XVECEXP (y, 0, 1);
10539
10540 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10541 if (MEM_P (orig_x))
10542 x = replace_equiv_address_nv (orig_x, x);
10543 return x;
10544 }
10545 }
10546 }
10547
10548 return orig_x;
10549 }
10550
10551 /* Mark the use of a constant in the literal table. If the constant
10552 has multiple labels, make it unique. */
10553 static rtx
10554 mark_constant_pool_use (rtx x)
10555 {
10556 rtx insn, lab, pattern;
10557
10558 if (x == NULL_RTX)
10559 return x;
10560
10561 switch (GET_CODE (x))
10562 {
10563 case LABEL_REF:
10564 x = XEXP (x, 0);
10565 case CODE_LABEL:
10566 break;
10567 default:
10568 return x;
10569 }
10570
10571 /* Get the first label in the list of labels for the same constant
10572 and delete another labels in the list. */
10573 lab = x;
10574 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10575 {
10576 if (!LABEL_P (insn)
10577 || LABEL_REFS (insn) != NEXT_INSN (insn))
10578 break;
10579 lab = insn;
10580 }
10581
10582 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10583 INSN_DELETED_P (insn) = 1;
10584
10585 /* Mark constants in a window. */
10586 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10587 {
10588 if (!NONJUMP_INSN_P (insn))
10589 continue;
10590
10591 pattern = PATTERN (insn);
10592 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10593 continue;
10594
10595 switch (XINT (pattern, 1))
10596 {
10597 case UNSPECV_CONST2:
10598 case UNSPECV_CONST4:
10599 case UNSPECV_CONST8:
10600 XVECEXP (pattern, 0, 1) = const1_rtx;
10601 break;
10602 case UNSPECV_WINDOW_END:
10603 if (XVECEXP (pattern, 0, 0) == x)
10604 return lab;
10605 break;
10606 case UNSPECV_CONST_END:
10607 return lab;
10608 default:
10609 break;
10610 }
10611 }
10612
10613 return lab;
10614 }
10615 \f
10616 /* Return true if it's possible to redirect BRANCH1 to the destination
10617 of an unconditional jump BRANCH2. We only want to do this if the
10618 resulting branch will have a short displacement. */
10619 bool
10620 sh_can_redirect_branch (rtx branch1, rtx branch2)
10621 {
10622 if (flag_expensive_optimizations && simplejump_p (branch2))
10623 {
10624 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10625 rtx insn;
10626 int distance;
10627
10628 for (distance = 0, insn = NEXT_INSN (branch1);
10629 insn && distance < 256;
10630 insn = PREV_INSN (insn))
10631 {
10632 if (insn == dest)
10633 return true;
10634 else
10635 distance += get_attr_length (insn);
10636 }
10637 for (distance = 0, insn = NEXT_INSN (branch1);
10638 insn && distance < 256;
10639 insn = NEXT_INSN (insn))
10640 {
10641 if (insn == dest)
10642 return true;
10643 else
10644 distance += get_attr_length (insn);
10645 }
10646 }
10647 return false;
10648 }
10649
10650 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10651 bool
10652 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10653 unsigned int new_reg)
10654 {
10655 /* Interrupt functions can only use registers that have already been
10656 saved by the prologue, even if they would normally be
10657 call-clobbered. */
10658 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10659 return false;
10660
10661 return true;
10662 }
10663
10664 /* Function to update the integer COST
10665 based on the relationship between INSN that is dependent on
10666 DEP_INSN through the dependence LINK. The default is to make no
10667 adjustment to COST. This can be used for example to specify to
10668 the scheduler that an output- or anti-dependence does not incur
10669 the same cost as a data-dependence. The return value should be
10670 the new value for COST. */
10671 static int
10672 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10673 {
10674 rtx reg, use_pat;
10675
10676 if (TARGET_SHMEDIA)
10677 {
10678 /* On SHmedia, if the dependence is an anti-dependence or
10679 output-dependence, there is no cost. */
10680 if (REG_NOTE_KIND (link) != 0)
10681 {
10682 /* However, dependencies between target register loads and
10683 uses of the register in a subsequent block that are separated
10684 by a conditional branch are not modelled - we have to do with
10685 the anti-dependency between the target register load and the
10686 conditional branch that ends the current block. */
10687 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10688 && GET_CODE (PATTERN (dep_insn)) == SET
10689 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10690 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10691 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10692 {
10693 int orig_cost = cost;
10694 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10695 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10696 ? insn : JUMP_LABEL (insn));
10697 /* On the likely path, the branch costs 1, on the unlikely path,
10698 it costs 3. */
10699 cost--;
10700 do
10701 target = next_active_insn (target);
10702 while (target && ! flow_dependent_p (target, dep_insn)
10703 && --cost > 0);
10704 /* If two branches are executed in immediate succession, with the
10705 first branch properly predicted, this causes a stall at the
10706 second branch, hence we won't need the target for the
10707 second branch for two cycles after the launch of the first
10708 branch. */
10709 if (cost > orig_cost - 2)
10710 cost = orig_cost - 2;
10711 }
10712 else
10713 cost = 0;
10714 }
10715
10716 else if (get_attr_is_mac_media (insn)
10717 && get_attr_is_mac_media (dep_insn))
10718 cost = 1;
10719
10720 else if (! reload_completed
10721 && GET_CODE (PATTERN (insn)) == SET
10722 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10723 && GET_CODE (PATTERN (dep_insn)) == SET
10724 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10725 && cost < 4)
10726 cost = 4;
10727 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10728 that is needed at the target. */
10729 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10730 && ! flow_dependent_p (insn, dep_insn))
10731 cost--;
10732 }
10733 else if (REG_NOTE_KIND (link) == 0)
10734 {
10735 enum attr_type type;
10736 rtx dep_set;
10737
10738 if (recog_memoized (insn) < 0
10739 || recog_memoized (dep_insn) < 0)
10740 return cost;
10741
10742 dep_set = single_set (dep_insn);
10743
10744 /* The latency that we specify in the scheduling description refers
10745 to the actual output, not to an auto-increment register; for that,
10746 the latency is one. */
10747 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10748 {
10749 rtx set = single_set (insn);
10750
10751 if (set
10752 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10753 && (!MEM_P (SET_DEST (set))
10754 || !reg_mentioned_p (SET_DEST (dep_set),
10755 XEXP (SET_DEST (set), 0))))
10756 cost = 1;
10757 }
10758 /* The only input for a call that is timing-critical is the
10759 function's address. */
10760 if (CALL_P (insn))
10761 {
10762 rtx call = get_call_rtx_from (insn);
10763 if (call
10764 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10765 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10766 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10767 cost -= TARGET_SH4_300 ? 3 : 6;
10768 }
10769 /* Likewise, the most timing critical input for an sfuncs call
10770 is the function address. However, sfuncs typically start
10771 using their arguments pretty quickly.
10772 Assume a four cycle delay for SH4 before they are needed.
10773 Cached ST40-300 calls are quicker, so assume only a one
10774 cycle delay there.
10775 ??? Maybe we should encode the delays till input registers
10776 are needed by sfuncs into the sfunc call insn. */
10777 /* All sfunc calls are parallels with at least four components.
10778 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10779 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10780 && XVECLEN (PATTERN (insn), 0) >= 4
10781 && (reg = sfunc_uses_reg (insn)))
10782 {
10783 if (! reg_set_p (reg, dep_insn))
10784 cost -= TARGET_SH4_300 ? 1 : 4;
10785 }
10786 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10787 {
10788 enum attr_type dep_type = get_attr_type (dep_insn);
10789
10790 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10791 cost--;
10792 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10793 && (type = get_attr_type (insn)) != TYPE_CALL
10794 && type != TYPE_SFUNC)
10795 cost--;
10796 /* When the preceding instruction loads the shift amount of
10797 the following SHAD/SHLD, the latency of the load is increased
10798 by 1 cycle. */
10799 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10800 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10801 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10802 XEXP (SET_SRC (single_set (insn)),
10803 1)))
10804 cost++;
10805 /* When an LS group instruction with a latency of less than
10806 3 cycles is followed by a double-precision floating-point
10807 instruction, FIPR, or FTRV, the latency of the first
10808 instruction is increased to 3 cycles. */
10809 else if (cost < 3
10810 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10811 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10812 cost = 3;
10813 /* The lsw register of a double-precision computation is ready one
10814 cycle earlier. */
10815 else if (reload_completed
10816 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10817 && (use_pat = single_set (insn))
10818 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10819 SET_SRC (use_pat)))
10820 cost -= 1;
10821
10822 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10823 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10824 cost -= 1;
10825 }
10826 else if (TARGET_SH4_300)
10827 {
10828 /* Stores need their input register two cycles later. */
10829 if (dep_set && cost >= 1
10830 && ((type = get_attr_type (insn)) == TYPE_STORE
10831 || type == TYPE_PSTORE
10832 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10833 {
10834 rtx set = single_set (insn);
10835
10836 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10837 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10838 {
10839 cost -= 2;
10840 /* But don't reduce the cost below 1 if the address depends
10841 on a side effect of dep_insn. */
10842 if (cost < 1
10843 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10844 cost = 1;
10845 }
10846 }
10847 }
10848 }
10849 /* An anti-dependence penalty of two applies if the first insn is a double
10850 precision fadd / fsub / fmul. */
10851 else if (!TARGET_SH4_300
10852 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10853 && recog_memoized (dep_insn) >= 0
10854 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10855 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10856 /* A lot of alleged anti-flow dependences are fake,
10857 so check this one is real. */
10858 && flow_dependent_p (dep_insn, insn))
10859 cost = 2;
10860
10861 return cost;
10862 }
10863
10864 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10865 if DEP_INSN is anti-flow dependent on INSN. */
10866 static bool
10867 flow_dependent_p (rtx insn, rtx dep_insn)
10868 {
10869 rtx tmp = PATTERN (insn);
10870
10871 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10872 return tmp == NULL_RTX;
10873 }
10874
10875 /* A helper function for flow_dependent_p called through note_stores. */
10876 static void
10877 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10878 {
10879 rtx * pinsn = (rtx *) data;
10880
10881 if (*pinsn && reg_referenced_p (x, *pinsn))
10882 *pinsn = NULL_RTX;
10883 }
10884
10885 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10886 'special function' patterns (type sfunc) that clobber pr, but that
10887 do not look like function calls to leaf_function_p. Hence we must
10888 do this extra check. */
10889 static int
10890 sh_pr_n_sets (void)
10891 {
10892 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10893 }
10894
10895 /* Return where to allocate pseudo for a given hard register initial
10896 value. */
10897 static rtx
10898 sh_allocate_initial_value (rtx hard_reg)
10899 {
10900 rtx x;
10901
10902 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10903 {
10904 if (crtl->is_leaf
10905 && ! sh_pr_n_sets ()
10906 && ! (TARGET_SHCOMPACT
10907 && ((crtl->args.info.call_cookie
10908 & ~ CALL_COOKIE_RET_TRAMP (1))
10909 || crtl->saves_all_registers)))
10910 x = hard_reg;
10911 else
10912 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10913 }
10914 else
10915 x = NULL_RTX;
10916
10917 return x;
10918 }
10919
10920 /* This function returns "2" to indicate dual issue for the SH4
10921 processor. To be used by the DFA pipeline description. */
10922 static int
10923 sh_issue_rate (void)
10924 {
10925 if (TARGET_SUPERSCALAR)
10926 return 2;
10927 else
10928 return 1;
10929 }
10930
10931 /* Functions for ready queue reordering for sched1. */
10932
10933 /* Get weight for mode for a set x. */
10934 static short
10935 find_set_regmode_weight (rtx x, enum machine_mode mode)
10936 {
10937 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10938 return 1;
10939 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10940 {
10941 if (REG_P (SET_DEST (x)))
10942 {
10943 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10944 return 1;
10945 else
10946 return 0;
10947 }
10948 return 1;
10949 }
10950 return 0;
10951 }
10952
10953 /* Get regmode weight for insn. */
10954 static short
10955 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10956 {
10957 short reg_weight = 0;
10958 rtx x;
10959
10960 /* Increment weight for each register born here. */
10961 x = PATTERN (insn);
10962 reg_weight += find_set_regmode_weight (x, mode);
10963 if (GET_CODE (x) == PARALLEL)
10964 {
10965 int j;
10966 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10967 {
10968 x = XVECEXP (PATTERN (insn), 0, j);
10969 reg_weight += find_set_regmode_weight (x, mode);
10970 }
10971 }
10972 /* Decrement weight for each register that dies here. */
10973 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10974 {
10975 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10976 {
10977 rtx note = XEXP (x, 0);
10978 if (REG_P (note) && GET_MODE (note) == mode)
10979 reg_weight--;
10980 }
10981 }
10982 return reg_weight;
10983 }
10984
10985 /* Calculate regmode weights for all insns of a basic block. */
10986 static void
10987 find_regmode_weight (basic_block b, enum machine_mode mode)
10988 {
10989 rtx insn, next_tail, head, tail;
10990
10991 get_ebb_head_tail (b, b, &head, &tail);
10992 next_tail = NEXT_INSN (tail);
10993
10994 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10995 {
10996 /* Handle register life information. */
10997 if (!INSN_P (insn))
10998 continue;
10999
11000 if (mode == SFmode)
11001 INSN_REGMODE_WEIGHT (insn, mode) =
11002 find_insn_regmode_weight (insn, mode)
11003 + 2 * find_insn_regmode_weight (insn, DFmode);
11004 else if (mode == SImode)
11005 INSN_REGMODE_WEIGHT (insn, mode) =
11006 find_insn_regmode_weight (insn, mode)
11007 + 2 * find_insn_regmode_weight (insn, DImode);
11008 }
11009 }
11010
11011 /* Comparison function for ready queue sorting. */
11012 static int
11013 rank_for_reorder (const void *x, const void *y)
11014 {
11015 rtx tmp = *(const rtx *) y;
11016 rtx tmp2 = *(const rtx *) x;
11017
11018 /* The insn in a schedule group should be issued the first. */
11019 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11020 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11021
11022 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11023 minimizes instruction movement, thus minimizing sched's effect on
11024 register pressure. */
11025 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11026 }
11027
11028 /* Resort the array A in which only element at index N may be out of order. */
11029 static void
11030 swap_reorder (rtx *a, int n)
11031 {
11032 rtx insn = a[n - 1];
11033 int i = n - 2;
11034
11035 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11036 {
11037 a[i + 1] = a[i];
11038 i -= 1;
11039 }
11040 a[i + 1] = insn;
11041 }
11042
11043 /* Sort the ready list by ascending priority. */
11044 static void
11045 ready_reorder (rtx *ready, int nready)
11046 {
11047 if (nready == 2)
11048 swap_reorder (ready, nready);
11049 else if (nready > 2)
11050 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11051 }
11052
11053 /* Count life regions of r0 for a block. */
11054 static int
11055 find_r0_life_regions (basic_block b)
11056 {
11057 rtx end, insn;
11058 rtx pset;
11059 rtx r0_reg;
11060 int live;
11061 int set;
11062 int death = 0;
11063
11064 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11065 {
11066 set = 1;
11067 live = 1;
11068 }
11069 else
11070 {
11071 set = 0;
11072 live = 0;
11073 }
11074
11075 insn = BB_HEAD (b);
11076 end = BB_END (b);
11077 r0_reg = gen_rtx_REG (SImode, R0_REG);
11078 while (1)
11079 {
11080 if (INSN_P (insn))
11081 {
11082 if (find_regno_note (insn, REG_DEAD, R0_REG))
11083 {
11084 death++;
11085 live = 0;
11086 }
11087 if (!live
11088 && (pset = single_set (insn))
11089 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11090 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11091 {
11092 set++;
11093 live = 1;
11094 }
11095 }
11096 if (insn == end)
11097 break;
11098 insn = NEXT_INSN (insn);
11099 }
11100 return set - death;
11101 }
11102
11103 /* Calculate regmode weights for all insns of all basic block. */
11104 static void
11105 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11106 int verbose ATTRIBUTE_UNUSED,
11107 int old_max_uid)
11108 {
11109 basic_block b;
11110
11111 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11112 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11113 r0_life_regions = 0;
11114
11115 FOR_EACH_BB_REVERSE (b)
11116 {
11117 find_regmode_weight (b, SImode);
11118 find_regmode_weight (b, SFmode);
11119 if (!reload_completed)
11120 r0_life_regions += find_r0_life_regions (b);
11121 }
11122
11123 CURR_REGMODE_PRESSURE (SImode) = 0;
11124 CURR_REGMODE_PRESSURE (SFmode) = 0;
11125 }
11126
11127 /* Cleanup. */
11128 static void
11129 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11130 int verbose ATTRIBUTE_UNUSED)
11131 {
11132 if (regmode_weight[0])
11133 {
11134 free (regmode_weight[0]);
11135 regmode_weight[0] = NULL;
11136 }
11137 if (regmode_weight[1])
11138 {
11139 free (regmode_weight[1]);
11140 regmode_weight[1] = NULL;
11141 }
11142 }
11143
11144 /* The scalar modes supported differs from the default version in TImode
11145 for 32-bit SHMEDIA. */
11146 static bool
11147 sh_scalar_mode_supported_p (enum machine_mode mode)
11148 {
11149 if (TARGET_SHMEDIA32 && mode == TImode)
11150 return false;
11151
11152 return default_scalar_mode_supported_p (mode);
11153 }
11154
11155 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11156 keep count of register pressures on SImode and SFmode. */
11157 static int
11158 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11159 int sched_verbose ATTRIBUTE_UNUSED,
11160 rtx insn,
11161 int can_issue_more)
11162 {
11163 if (GET_CODE (PATTERN (insn)) != USE
11164 && GET_CODE (PATTERN (insn)) != CLOBBER)
11165 cached_can_issue_more = can_issue_more - 1;
11166 else
11167 cached_can_issue_more = can_issue_more;
11168
11169 if (reload_completed)
11170 return cached_can_issue_more;
11171
11172 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11173 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11174
11175 return cached_can_issue_more;
11176 }
11177
11178 static void
11179 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11180 int verbose ATTRIBUTE_UNUSED,
11181 int veclen ATTRIBUTE_UNUSED)
11182 {
11183 CURR_REGMODE_PRESSURE (SImode) = 0;
11184 CURR_REGMODE_PRESSURE (SFmode) = 0;
11185 }
11186
11187 /* Some magic numbers. */
11188 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11189 functions that already have high pressure on r0. */
11190 #define R0_MAX_LIFE_REGIONS 2
11191 /* Register Pressure thresholds for SImode and SFmode registers. */
11192 #define SIMODE_MAX_WEIGHT 5
11193 #define SFMODE_MAX_WEIGHT 10
11194
11195 /* Return true if the pressure is high for MODE. */
11196 static bool
11197 high_pressure (enum machine_mode mode)
11198 {
11199 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11200 functions that already have high pressure on r0. */
11201 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11202 return true;
11203
11204 if (mode == SFmode)
11205 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11206 else
11207 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11208 }
11209
11210 /* Reorder ready queue if register pressure is high. */
11211 static int
11212 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11213 int sched_verbose ATTRIBUTE_UNUSED,
11214 rtx *ready,
11215 int *n_readyp,
11216 int clock_var ATTRIBUTE_UNUSED)
11217 {
11218 if (reload_completed)
11219 return sh_issue_rate ();
11220
11221 if (high_pressure (SFmode) || high_pressure (SImode))
11222 {
11223 ready_reorder (ready, *n_readyp);
11224 }
11225
11226 return sh_issue_rate ();
11227 }
11228
11229 /* Skip cycles if the current register pressure is high. */
11230 static int
11231 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11232 int sched_verbose ATTRIBUTE_UNUSED,
11233 rtx *ready ATTRIBUTE_UNUSED,
11234 int *n_readyp ATTRIBUTE_UNUSED,
11235 int clock_var ATTRIBUTE_UNUSED)
11236 {
11237 if (reload_completed)
11238 return cached_can_issue_more;
11239
11240 if (high_pressure(SFmode) || high_pressure (SImode))
11241 skip_cycles = 1;
11242
11243 return cached_can_issue_more;
11244 }
11245
11246 /* Skip cycles without sorting the ready queue. This will move insn from
11247 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11248 queue by sh_reorder. */
11249
11250 /* Generally, skipping these many cycles are sufficient for all insns to move
11251 from Q -> R. */
11252 #define MAX_SKIPS 8
11253
11254 static int
11255 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11256 int sched_verbose ATTRIBUTE_UNUSED,
11257 rtx insn ATTRIBUTE_UNUSED,
11258 int last_clock_var,
11259 int clock_var,
11260 int *sort_p)
11261 {
11262 if (reload_completed)
11263 return 0;
11264
11265 if (skip_cycles)
11266 {
11267 if ((clock_var - last_clock_var) < MAX_SKIPS)
11268 {
11269 *sort_p = 0;
11270 return 1;
11271 }
11272 /* If this is the last cycle we are skipping, allow reordering of R. */
11273 if ((clock_var - last_clock_var) == MAX_SKIPS)
11274 {
11275 *sort_p = 1;
11276 return 1;
11277 }
11278 }
11279
11280 skip_cycles = 0;
11281
11282 return 0;
11283 }
11284
11285 /* SHmedia requires registers for branches, so we can't generate new
11286 branches past reload. */
11287 static bool
11288 sh_cannot_modify_jumps_p (void)
11289 {
11290 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11291 }
11292
11293 static reg_class_t
11294 sh_target_reg_class (void)
11295 {
11296 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11297 }
11298
11299 static bool
11300 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11301 {
11302 if (! shmedia_space_reserved_for_target_registers)
11303 return 0;
11304 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11305 return 0;
11306
11307 HARD_REG_SET dummy;
11308 if (calc_live_regs (&dummy) >= 6 * 8)
11309 return 1;
11310 return 0;
11311 }
11312
11313 static bool
11314 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11315 {
11316 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11317 }
11318 \f
11319 /*
11320 On the SH1..SH4, the trampoline looks like
11321 2 0002 D202 mov.l l2,r2
11322 1 0000 D301 mov.l l1,r3
11323 3 0004 422B jmp @r2
11324 4 0006 0009 nop
11325 5 0008 00000000 l1: .long area
11326 6 000c 00000000 l2: .long function
11327
11328 SH5 (compact) uses r1 instead of r3 for the static chain. */
11329
11330
11331 /* Emit RTL insns to initialize the variable parts of a trampoline.
11332 FNADDR is an RTX for the address of the function's pure code.
11333 CXT is an RTX for the static chain value for the function. */
11334 static void
11335 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11336 {
11337 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11338 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11339
11340 if (TARGET_SHMEDIA64)
11341 {
11342 rtx tramp_templ;
11343 int fixed_len;
11344
11345 rtx movi1 = GEN_INT (0xcc000010);
11346 rtx shori1 = GEN_INT (0xc8000010);
11347 rtx src, dst;
11348
11349 /* The following trampoline works within a +- 128 KB range for cxt:
11350 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11351 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11352 gettr tr1,r1; blink tr0,r63 */
11353 /* Address rounding makes it hard to compute the exact bounds of the
11354 offset for this trampoline, but we have a rather generous offset
11355 range, so frame_offset should do fine as an upper bound. */
11356 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11357 {
11358 /* ??? could optimize this trampoline initialization
11359 by writing DImode words with two insns each. */
11360 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11361 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11362 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11363 insn = gen_rtx_AND (DImode, insn, mask);
11364 /* Or in ptb/u .,tr1 pattern */
11365 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11366 insn = force_operand (insn, NULL_RTX);
11367 insn = gen_lowpart (SImode, insn);
11368 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11369 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11370 insn = gen_rtx_AND (DImode, insn, mask);
11371 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11372 insn = gen_lowpart (SImode, insn);
11373 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11374 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11375 insn = gen_rtx_AND (DImode, insn, mask);
11376 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11377 insn = gen_lowpart (SImode, insn);
11378 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11379 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11380 insn = gen_rtx_AND (DImode, insn, mask);
11381 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11382 insn = gen_lowpart (SImode, insn);
11383 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11384 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11385 insn = gen_rtx_AND (DImode, insn, mask);
11386 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11387 insn = gen_lowpart (SImode, insn);
11388 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11389 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11390 GEN_INT (0x6bf10600));
11391 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11392 GEN_INT (0x4415fc10));
11393 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11394 GEN_INT (0x4401fff0));
11395 emit_insn (gen_ic_invalidate_line (tramp));
11396 return;
11397 }
11398 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11399 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11400
11401 tramp_templ = gen_datalabel_ref (tramp_templ);
11402 dst = tramp_mem;
11403 src = gen_const_mem (BLKmode, tramp_templ);
11404 set_mem_align (dst, 256);
11405 set_mem_align (src, 64);
11406 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11407
11408 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11409 emit_move_insn (adjust_address (tramp_mem, Pmode,
11410 fixed_len + GET_MODE_SIZE (Pmode)),
11411 cxt);
11412 emit_insn (gen_ic_invalidate_line (tramp));
11413 return;
11414 }
11415 else if (TARGET_SHMEDIA)
11416 {
11417 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11418 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11419 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11420 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11421 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11422 rotated 10 right, and higher 16 bit of every 32 selected. */
11423 rtx movishori
11424 = force_reg (V2HImode, (simplify_gen_subreg
11425 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11426 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11427 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11428
11429 fnaddr = force_reg (SImode, fnaddr);
11430 cxt = force_reg (SImode, cxt);
11431 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11432 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11433 movishori));
11434 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11435 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11436 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11437 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11438 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11439 gen_rtx_SUBREG (V2HImode, cxt, 0),
11440 movishori));
11441 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11442 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11443 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11444 if (TARGET_LITTLE_ENDIAN)
11445 {
11446 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11447 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11448 }
11449 else
11450 {
11451 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11452 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11453 }
11454 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11455 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11456 emit_insn (gen_ic_invalidate_line (tramp));
11457 return;
11458 }
11459 else if (TARGET_SHCOMPACT)
11460 {
11461 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11462 return;
11463 }
11464 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11465 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11466 SImode));
11467 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11468 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11469 SImode));
11470 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11471 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11472 if (TARGET_HARD_SH4 || TARGET_SH5)
11473 {
11474 if (!TARGET_INLINE_IC_INVALIDATE
11475 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11476 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11477 FUNCTION_ORDINARY),
11478 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11479 else
11480 emit_insn (gen_ic_invalidate_line (tramp));
11481 }
11482 }
11483
11484 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11485 static rtx
11486 sh_trampoline_adjust_address (rtx tramp)
11487 {
11488 if (TARGET_SHMEDIA)
11489 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11490 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11491 return tramp;
11492 }
11493
11494 /* FIXME: This is overly conservative. A SHcompact function that
11495 receives arguments ``by reference'' will have them stored in its
11496 own stack frame, so it must not pass pointers or references to
11497 these arguments to other functions by means of sibling calls. */
11498 /* If PIC, we cannot make sibling calls to global functions
11499 because the PLT requires r12 to be live. */
11500 static bool
11501 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11502 {
11503 return (1
11504 && (! TARGET_SHCOMPACT
11505 || crtl->args.info.stack_regs == 0)
11506 && ! sh_cfun_interrupt_handler_p ()
11507 && (! flag_pic
11508 || (decl && ! TREE_PUBLIC (decl))
11509 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11510 }
11511 \f
11512 /* Machine specific built-in functions. */
11513
11514 struct builtin_description
11515 {
11516 bool (* const is_enabled) (void);
11517 const enum insn_code icode;
11518 const char *const name;
11519 int signature;
11520 tree fndecl;
11521 };
11522
11523 static bool
11524 shmedia_builtin_p (void)
11525 {
11526 return TARGET_SHMEDIA;
11527 }
11528
11529 /* This function can be used if there are any built-ins that are not for
11530 SHmedia. It's commented out to avoid the defined-but-unused warning.
11531 static bool
11532 sh1_builtin_p (void)
11533 {
11534 return TARGET_SH1;
11535 }
11536 */
11537
11538 /* describe number and signedness of arguments; arg[0] == result
11539 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11540 /* 9: 64-bit pointer, 10: 32-bit pointer */
11541 static const char signature_args[][4] =
11542 {
11543 #define SH_BLTIN_V2SI2 0
11544 { 4, 4 },
11545 #define SH_BLTIN_V4HI2 1
11546 { 4, 4 },
11547 #define SH_BLTIN_V2SI3 2
11548 { 4, 4, 4 },
11549 #define SH_BLTIN_V4HI3 3
11550 { 4, 4, 4 },
11551 #define SH_BLTIN_V8QI3 4
11552 { 4, 4, 4 },
11553 #define SH_BLTIN_MAC_HISI 5
11554 { 1, 4, 4, 1 },
11555 #define SH_BLTIN_SH_HI 6
11556 { 4, 4, 1 },
11557 #define SH_BLTIN_SH_SI 7
11558 { 4, 4, 1 },
11559 #define SH_BLTIN_V4HI2V2SI 8
11560 { 4, 4, 4 },
11561 #define SH_BLTIN_V4HI2V8QI 9
11562 { 4, 4, 4 },
11563 #define SH_BLTIN_SISF 10
11564 { 4, 2 },
11565 #define SH_BLTIN_LDUA_L 11
11566 { 2, 10 },
11567 #define SH_BLTIN_LDUA_Q 12
11568 { 1, 10 },
11569 #define SH_BLTIN_STUA_L 13
11570 { 0, 10, 2 },
11571 #define SH_BLTIN_STUA_Q 14
11572 { 0, 10, 1 },
11573 #define SH_BLTIN_LDUA_L64 15
11574 { 2, 9 },
11575 #define SH_BLTIN_LDUA_Q64 16
11576 { 1, 9 },
11577 #define SH_BLTIN_STUA_L64 17
11578 { 0, 9, 2 },
11579 #define SH_BLTIN_STUA_Q64 18
11580 { 0, 9, 1 },
11581 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11582 #define SH_BLTIN_2 19
11583 #define SH_BLTIN_SU 19
11584 { 1, 2 },
11585 #define SH_BLTIN_3 20
11586 #define SH_BLTIN_SUS 20
11587 { 2, 2, 1 },
11588 #define SH_BLTIN_PSSV 21
11589 { 0, 8, 2, 2 },
11590 #define SH_BLTIN_XXUU 22
11591 #define SH_BLTIN_UUUU 22
11592 { 1, 1, 1, 1 },
11593 #define SH_BLTIN_PV 23
11594 { 0, 8 },
11595 #define SH_BLTIN_VP 24
11596 { 8, 0 },
11597 };
11598 /* mcmv: operands considered unsigned. */
11599 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11600 /* mperm: control value considered unsigned int. */
11601 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11602 /* mshards_q: returns signed short. */
11603 /* nsb: takes long long arg, returns unsigned char. */
11604 static struct builtin_description bdesc[] =
11605 {
11606 { shmedia_builtin_p,
11607 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11608 { shmedia_builtin_p,
11609 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11610 { shmedia_builtin_p,
11611 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11612 { shmedia_builtin_p,
11613 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11614 { shmedia_builtin_p,
11615 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11616 { shmedia_builtin_p,
11617 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11618 { shmedia_builtin_p,
11619 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11620 { shmedia_builtin_p,
11621 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11622 { shmedia_builtin_p,
11623 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11624 { shmedia_builtin_p,
11625 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11626 { shmedia_builtin_p,
11627 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11628 { shmedia_builtin_p,
11629 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11630 { shmedia_builtin_p,
11631 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11632 { shmedia_builtin_p,
11633 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11634 { shmedia_builtin_p,
11635 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11636 { shmedia_builtin_p,
11637 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11638 { shmedia_builtin_p,
11639 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11640 { shmedia_builtin_p,
11641 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11642 { shmedia_builtin_p,
11643 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11644 { shmedia_builtin_p,
11645 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11646 { shmedia_builtin_p,
11647 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11648 { shmedia_builtin_p,
11649 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11650 { shmedia_builtin_p,
11651 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11652 { shmedia_builtin_p,
11653 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11654 { shmedia_builtin_p,
11655 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11656 { shmedia_builtin_p,
11657 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11658 { shmedia_builtin_p,
11659 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11660 { shmedia_builtin_p,
11661 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11662 { shmedia_builtin_p,
11663 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11664 { shmedia_builtin_p,
11665 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11666 { shmedia_builtin_p,
11667 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11668 { shmedia_builtin_p,
11669 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11670 { shmedia_builtin_p,
11671 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11672 { shmedia_builtin_p,
11673 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11674 { shmedia_builtin_p,
11675 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11676 { shmedia_builtin_p,
11677 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11678 { shmedia_builtin_p,
11679 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11680 { shmedia_builtin_p,
11681 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11682 { shmedia_builtin_p,
11683 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11684 { shmedia_builtin_p,
11685 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11686 { shmedia_builtin_p,
11687 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11688 { shmedia_builtin_p,
11689 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11690 { shmedia_builtin_p,
11691 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11692 { shmedia_builtin_p,
11693 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11694 { shmedia_builtin_p,
11695 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11696 { shmedia_builtin_p,
11697 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11698 { shmedia_builtin_p,
11699 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11700 { shmedia_builtin_p,
11701 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11702 { shmedia_builtin_p,
11703 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11704 { shmedia_builtin_p,
11705 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11706 { shmedia_builtin_p,
11707 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11708 { shmedia_builtin_p,
11709 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11710 { shmedia_builtin_p,
11711 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11712 { shmedia_builtin_p,
11713 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11714 { shmedia_builtin_p,
11715 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11716 { shmedia_builtin_p,
11717 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11718 { shmedia_builtin_p,
11719 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11720 { shmedia_builtin_p,
11721 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11722 { shmedia_builtin_p,
11723 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11724 { shmedia_builtin_p,
11725 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11726 { shmedia_builtin_p,
11727 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11728 { shmedia_builtin_p,
11729 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11730 { shmedia_builtin_p,
11731 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11732 { shmedia_builtin_p,
11733 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11734 { shmedia_builtin_p,
11735 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11736 { shmedia_builtin_p,
11737 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11738 { shmedia_builtin_p,
11739 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11740 { shmedia_builtin_p,
11741 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11742 { shmedia_builtin_p,
11743 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11744 { shmedia_builtin_p,
11745 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11746 { shmedia_builtin_p,
11747 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11748 { shmedia_builtin_p,
11749 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11750 { shmedia_builtin_p,
11751 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11752 { shmedia_builtin_p,
11753 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11754 { shmedia_builtin_p,
11755 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11756 { shmedia_builtin_p,
11757 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11758 { shmedia_builtin_p,
11759 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11760 { shmedia_builtin_p,
11761 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11762 { shmedia_builtin_p,
11763 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11764 { shmedia_builtin_p,
11765 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11766 { shmedia_builtin_p,
11767 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11768 { shmedia_builtin_p,
11769 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11770 { shmedia_builtin_p,
11771 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11772 };
11773
11774 static void
11775 sh_init_builtins (void)
11776 {
11777 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11778 memset (shared, 0, sizeof shared);
11779
11780 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11781 {
11782 builtin_description* d = &bdesc[di];
11783
11784 if (!d->is_enabled ())
11785 continue;
11786
11787 tree type, arg_type = NULL_TREE;
11788 int signature = d->signature;
11789
11790 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11791 type = shared[signature];
11792 else
11793 {
11794 int has_result = signature_args[signature][0] != 0;
11795 tree args[3];
11796
11797 if ((signature_args[signature][1] & 8)
11798 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11799 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11800 continue;
11801 if (! TARGET_FPU_ANY
11802 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11803 continue;
11804 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11805 args[i] = NULL_TREE;
11806 for (int i = 3; ; i--)
11807 {
11808 int arg = signature_args[signature][i];
11809 int opno = i - 1 + has_result;
11810
11811 if (arg & 8)
11812 arg_type = ptr_type_node;
11813 else if (arg)
11814 arg_type = (*lang_hooks.types.type_for_mode)
11815 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11816 else if (i)
11817 continue;
11818 else
11819 arg_type = void_type_node;
11820 if (i == 0)
11821 break;
11822 args[i-1] = arg_type;
11823 }
11824 type = build_function_type_list (arg_type, args[0], args[1],
11825 args[2], NULL_TREE);
11826 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11827 shared[signature] = type;
11828 }
11829 d->fndecl =
11830 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11831 NULL, NULL_TREE);
11832 }
11833 }
11834
11835 /* Implements target hook vector_mode_supported_p. */
11836 bool
11837 sh_vector_mode_supported_p (enum machine_mode mode)
11838 {
11839 if (TARGET_FPU_ANY
11840 && ((mode == V2SFmode)
11841 || (mode == V4SFmode)
11842 || (mode == V16SFmode)))
11843 return true;
11844
11845 else if (TARGET_SHMEDIA
11846 && ((mode == V8QImode)
11847 || (mode == V2HImode)
11848 || (mode == V4HImode)
11849 || (mode == V2SImode)))
11850 return true;
11851
11852 return false;
11853 }
11854
11855 bool
11856 sh_frame_pointer_required (void)
11857 {
11858 /* If needed override this in other tm.h files to cope with various OS
11859 lossage requiring a frame pointer. */
11860 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11861 return true;
11862
11863 if (crtl->profile)
11864 return true;
11865
11866 return false;
11867 }
11868
11869 /* Implements target hook dwarf_calling_convention. Return an enum
11870 of dwarf_calling_convention. */
11871 int
11872 sh_dwarf_calling_convention (const_tree func)
11873 {
11874 if (sh_attr_renesas_p (func))
11875 return DW_CC_GNU_renesas_sh;
11876
11877 return DW_CC_normal;
11878 }
11879
11880 /* Returns the sh builtin decl for CODE. */
11881 static tree
11882 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11883 {
11884 if (code >= ARRAY_SIZE (bdesc))
11885 return error_mark_node;
11886
11887 if (!bdesc[code].is_enabled ())
11888 return error_mark_node;
11889
11890 return bdesc[code].fndecl;
11891 }
11892
11893 /* Expand an expression EXP that calls a built-in function,
11894 with result going to TARGET if that's convenient
11895 (and in mode MODE if that's convenient).
11896 SUBTARGET may be used as the target for computing one of EXP's operands.
11897 IGNORE is nonzero if the value is to be ignored. */
11898 static rtx
11899 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11900 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11901 {
11902 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11903 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11904 const struct builtin_description *d = &bdesc[fcode];
11905 enum insn_code icode = d->icode;
11906 int signature = d->signature;
11907 int nop = 0;
11908 rtx op[4];
11909
11910 if (signature_args[signature][0])
11911 {
11912 if (ignore)
11913 return NULL_RTX;
11914
11915 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11916 if (! target || GET_MODE (target) != tmode
11917 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11918 target = gen_reg_rtx (tmode);
11919 op[nop++] = target;
11920 }
11921 else
11922 target = NULL_RTX;
11923
11924 for (int i = 1; i <= 3; i++, nop++)
11925 {
11926 tree arg;
11927 enum machine_mode opmode, argmode;
11928 tree optype;
11929
11930 if (! signature_args[signature][i])
11931 break;
11932 arg = CALL_EXPR_ARG (exp, i - 1);
11933 if (arg == error_mark_node)
11934 return const0_rtx;
11935 if (signature_args[signature][i] & 8)
11936 {
11937 opmode = ptr_mode;
11938 optype = ptr_type_node;
11939 }
11940 else
11941 {
11942 opmode = insn_data[icode].operand[nop].mode;
11943 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11944 }
11945 argmode = TYPE_MODE (TREE_TYPE (arg));
11946 if (argmode != opmode)
11947 arg = build1 (NOP_EXPR, optype, arg);
11948 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11949 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11950 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11951 }
11952
11953 rtx pat = NULL_RTX;
11954
11955 switch (nop)
11956 {
11957 case 1:
11958 pat = (*insn_data[d->icode].genfun) (op[0]);
11959 break;
11960 case 2:
11961 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11962 break;
11963 case 3:
11964 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11965 break;
11966 case 4:
11967 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11968 break;
11969 default:
11970 gcc_unreachable ();
11971 }
11972 if (! pat)
11973 return NULL_RTX;
11974 emit_insn (pat);
11975 return target;
11976 }
11977
11978 void
11979 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11980 {
11981 rtx sel0 = const0_rtx;
11982 rtx sel1 = const1_rtx;
11983 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11984 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11985
11986 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11987 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11988 }
11989
11990 void
11991 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11992 {
11993 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11994
11995 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11996 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11997 }
11998
11999 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12000 We can allow any mode in any general register. The special registers
12001 only allow SImode. Don't allow any mode in the PR.
12002
12003 We cannot hold DCmode values in the XD registers because alter_reg
12004 handles subregs of them incorrectly. We could work around this by
12005 spacing the XD registers like the DR registers, but this would require
12006 additional memory in every compilation to hold larger register vectors.
12007 We could hold SFmode / SCmode values in XD registers, but that
12008 would require a tertiary reload when reloading from / to memory,
12009 and a secondary reload to reload from / to general regs; that
12010 seems to be a losing proposition.
12011
12012 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12013 it won't be ferried through GP registers first. */
12014 bool
12015 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12016 {
12017 if (SPECIAL_REGISTER_P (regno))
12018 return mode == SImode;
12019
12020 if (regno == FPUL_REG)
12021 return (mode == SImode || mode == SFmode);
12022
12023 if (FP_REGISTER_P (regno) && mode == SFmode)
12024 return true;
12025
12026 if (mode == V2SFmode)
12027 {
12028 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12029 || GENERAL_REGISTER_P (regno)))
12030 return true;
12031 else
12032 return false;
12033 }
12034
12035 if (mode == V4SFmode)
12036 {
12037 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12038 || GENERAL_REGISTER_P (regno))
12039 return true;
12040 else
12041 return false;
12042 }
12043
12044 if (mode == V16SFmode)
12045 {
12046 if (TARGET_SHMEDIA)
12047 {
12048 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12049 return true;
12050 else
12051 return false;
12052 }
12053 else
12054 return regno == FIRST_XD_REG;
12055 }
12056
12057 if (FP_REGISTER_P (regno))
12058 {
12059 if (mode == SFmode
12060 || mode == SImode
12061 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12062 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12063 || mode == DCmode
12064 || (TARGET_SHMEDIA
12065 && (mode == DFmode || mode == DImode
12066 || mode == V2SFmode || mode == TImode)))
12067 && ((regno - FIRST_FP_REG) & 1) == 0)
12068 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12069 && ((regno - FIRST_FP_REG) & 3) == 0))
12070 return true;
12071 else
12072 return false;
12073 }
12074
12075 if (XD_REGISTER_P (regno))
12076 return mode == DFmode;
12077
12078 if (TARGET_REGISTER_P (regno))
12079 return (mode == DImode || mode == SImode || mode == PDImode);
12080
12081 if (regno == PR_REG)
12082 return mode == SImode;
12083
12084 if (regno == FPSCR_REG)
12085 return mode == PSImode;
12086
12087 /* FIXME. This works around PR target/37633 for -O0. */
12088 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12089 {
12090 unsigned int n = GET_MODE_SIZE (mode) / 8;
12091
12092 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12093 && regno <= FIRST_GENERAL_REG + 14)
12094 return false;
12095 }
12096
12097 return true;
12098 }
12099
12100 /* Return the class of registers for which a mode change from FROM to TO
12101 is invalid. */
12102 bool
12103 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12104 enum reg_class rclass)
12105 {
12106 /* We want to enable the use of SUBREGs as a means to
12107 VEC_SELECT a single element of a vector. */
12108
12109 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12110 This can be problematic when SFmode vector subregs need to be accessed
12111 on the stack with displacement addressing, as it happens with -O0.
12112 Thus we disallow the mode change for -O0. */
12113 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12114 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12115
12116 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12117 {
12118 if (TARGET_LITTLE_ENDIAN)
12119 {
12120 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12121 return reg_classes_intersect_p (DF_REGS, rclass);
12122 }
12123 else
12124 {
12125 if (GET_MODE_SIZE (from) < 8)
12126 return reg_classes_intersect_p (DF_REGS, rclass);
12127 }
12128 }
12129 return false;
12130 }
12131
12132 /* Return true if registers in machine mode MODE will likely be
12133 allocated to registers in small register classes. */
12134 bool
12135 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12136 {
12137 return (! TARGET_SHMEDIA);
12138 }
12139
12140 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12141 that label is used. */
12142 void
12143 sh_mark_label (rtx address, int nuses)
12144 {
12145 if (GOTOFF_P (address))
12146 {
12147 /* Extract the label or symbol. */
12148 address = XEXP (address, 0);
12149 if (GET_CODE (address) == PLUS)
12150 address = XEXP (address, 0);
12151 address = XVECEXP (address, 0, 0);
12152 }
12153 if (GET_CODE (address) == LABEL_REF
12154 && LABEL_P (XEXP (address, 0)))
12155 LABEL_NUSES (XEXP (address, 0)) += nuses;
12156 }
12157
12158 /* Compute extra cost of moving data between one register class
12159 and another.
12160
12161 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12162 uses this information. Hence, the general register <-> floating point
12163 register information here is not used for SFmode. */
12164 static int
12165 sh_register_move_cost (enum machine_mode mode,
12166 reg_class_t srcclass, reg_class_t dstclass)
12167 {
12168 if (dstclass == T_REGS || dstclass == PR_REGS)
12169 return 10;
12170
12171 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12172 return 4;
12173
12174 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12175 && REGCLASS_HAS_FP_REG (srcclass)
12176 && REGCLASS_HAS_FP_REG (dstclass))
12177 return 4;
12178
12179 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12180 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12181
12182 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12183 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12184 return 9;
12185
12186 if ((REGCLASS_HAS_FP_REG (dstclass)
12187 && REGCLASS_HAS_GENERAL_REG (srcclass))
12188 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12189 && REGCLASS_HAS_FP_REG (srcclass)))
12190 {
12191 /* Discourage trying to use fp regs for a pointer. This also
12192 discourages fp regs with SImode because Pmode is an alias
12193 of SImode on this target. See PR target/48596. */
12194 int addend = (mode == Pmode) ? 40 : 0;
12195
12196 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12197 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12198 }
12199
12200 if ((dstclass == FPUL_REGS
12201 && REGCLASS_HAS_GENERAL_REG (srcclass))
12202 || (srcclass == FPUL_REGS
12203 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12204 return 5;
12205
12206 if ((dstclass == FPUL_REGS
12207 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12208 || (srcclass == FPUL_REGS
12209 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12210 return 7;
12211
12212 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12213 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12214 return 20;
12215
12216 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12217 if (TARGET_SHMEDIA
12218 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12219 {
12220 if (sh_gettrcost >= 0)
12221 return sh_gettrcost;
12222 else if (!TARGET_PT_FIXED)
12223 return 100;
12224 }
12225
12226 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12227 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12228 return 4;
12229
12230 if (TARGET_SHMEDIA
12231 || (TARGET_FMOVD
12232 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12233 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12234 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12235
12236 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12237 }
12238
12239 static rtx
12240 emit_load_ptr (rtx reg, rtx addr)
12241 {
12242 rtx mem = gen_const_mem (ptr_mode, addr);
12243
12244 if (Pmode != ptr_mode)
12245 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12246 return emit_move_insn (reg, mem);
12247 }
12248
12249 static void
12250 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12251 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12252 tree function)
12253 {
12254 CUMULATIVE_ARGS cum;
12255 int structure_value_byref = 0;
12256 rtx this_rtx, this_value, sibcall, insns, funexp;
12257 tree funtype = TREE_TYPE (function);
12258 int simple_add = CONST_OK_FOR_ADD (delta);
12259 int did_load = 0;
12260 rtx scratch0, scratch1, scratch2;
12261 unsigned i;
12262
12263 reload_completed = 1;
12264 epilogue_completed = 1;
12265 crtl->uses_only_leaf_regs = 1;
12266
12267 emit_note (NOTE_INSN_PROLOGUE_END);
12268
12269 /* Find the "this" pointer. We have such a wide range of ABIs for the
12270 SH that it's best to do this completely machine independently.
12271 "this" is passed as first argument, unless a structure return pointer
12272 comes first, in which case "this" comes second. */
12273 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12274 #ifndef PCC_STATIC_STRUCT_RETURN
12275 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12276 structure_value_byref = 1;
12277 #endif /* not PCC_STATIC_STRUCT_RETURN */
12278 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12279 {
12280 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12281
12282 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12283 }
12284 this_rtx
12285 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12286
12287 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12288 static chain pointer (even if you can't have nested virtual functions
12289 right now, someone might implement them sometime), and the rest of the
12290 registers are used for argument passing, are callee-saved, or reserved. */
12291 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12292 -ffixed-reg has been used. */
12293 if (! call_used_regs[0] || fixed_regs[0])
12294 error ("r0 needs to be available as a call-clobbered register");
12295 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12296 if (! TARGET_SH5)
12297 {
12298 if (call_used_regs[1] && ! fixed_regs[1])
12299 scratch1 = gen_rtx_REG (ptr_mode, 1);
12300 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12301 pointing where to return struct values. */
12302 if (call_used_regs[3] && ! fixed_regs[3])
12303 scratch2 = gen_rtx_REG (Pmode, 3);
12304 }
12305 else if (TARGET_SHMEDIA)
12306 {
12307 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12308 if (i != REGNO (scratch0) &&
12309 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12310 {
12311 scratch1 = gen_rtx_REG (ptr_mode, i);
12312 break;
12313 }
12314 if (scratch1 == scratch0)
12315 error ("need a second call-clobbered general purpose register");
12316 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12317 if (call_used_regs[i] && ! fixed_regs[i])
12318 {
12319 scratch2 = gen_rtx_REG (Pmode, i);
12320 break;
12321 }
12322 if (scratch2 == scratch0)
12323 error ("need a call-clobbered target register");
12324 }
12325
12326 this_value = plus_constant (Pmode, this_rtx, delta);
12327 if (vcall_offset
12328 && (simple_add || scratch0 != scratch1)
12329 && strict_memory_address_p (ptr_mode, this_value))
12330 {
12331 emit_load_ptr (scratch0, this_value);
12332 did_load = 1;
12333 }
12334
12335 if (!delta)
12336 ; /* Do nothing. */
12337 else if (simple_add)
12338 emit_move_insn (this_rtx, this_value);
12339 else
12340 {
12341 emit_move_insn (scratch1, GEN_INT (delta));
12342 emit_insn (gen_add2_insn (this_rtx, scratch1));
12343 }
12344
12345 if (vcall_offset)
12346 {
12347 rtx offset_addr;
12348
12349 if (!did_load)
12350 emit_load_ptr (scratch0, this_rtx);
12351
12352 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12353 if (strict_memory_address_p (ptr_mode, offset_addr))
12354 ; /* Do nothing. */
12355 else if (! TARGET_SH5 && scratch0 != scratch1)
12356 {
12357 /* scratch0 != scratch1, and we have indexed loads. Get better
12358 schedule by loading the offset into r1 and using an indexed
12359 load - then the load of r1 can issue before the load from
12360 (this_rtx + delta) finishes. */
12361 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12362 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12363 }
12364 else if (CONST_OK_FOR_ADD (vcall_offset))
12365 {
12366 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12367 offset_addr = scratch0;
12368 }
12369 else if (scratch0 != scratch1)
12370 {
12371 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12372 emit_insn (gen_add2_insn (scratch0, scratch1));
12373 offset_addr = scratch0;
12374 }
12375 else
12376 gcc_unreachable (); /* FIXME */
12377 emit_load_ptr (scratch0, offset_addr);
12378
12379 if (Pmode != ptr_mode)
12380 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12381 emit_insn (gen_add2_insn (this_rtx, scratch0));
12382 }
12383
12384 /* Generate a tail call to the target function. */
12385 if (! TREE_USED (function))
12386 {
12387 assemble_external (function);
12388 TREE_USED (function) = 1;
12389 }
12390 funexp = XEXP (DECL_RTL (function), 0);
12391 /* If the function is overridden, so is the thunk, hence we don't
12392 need GOT addressing even if this is a public symbol. */
12393 #if 0
12394 if (TARGET_SH1 && ! flag_weak)
12395 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12396 else
12397 #endif
12398 if (TARGET_SH2 && flag_pic)
12399 {
12400 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12401 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12402 }
12403 else
12404 {
12405 if (TARGET_SHMEDIA && flag_pic)
12406 {
12407 funexp = gen_sym2PIC (funexp);
12408 PUT_MODE (funexp, Pmode);
12409 }
12410 emit_move_insn (scratch2, funexp);
12411 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12412 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12413 }
12414 sibcall = emit_call_insn (sibcall);
12415 SIBLING_CALL_P (sibcall) = 1;
12416 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12417 emit_barrier ();
12418
12419 /* Run just enough of rest_of_compilation to do scheduling and get
12420 the insns emitted. Note that use_thunk calls
12421 assemble_start_function and assemble_end_function. */
12422
12423 insns = get_insns ();
12424
12425 if (optimize > 0)
12426 {
12427 if (! cfun->cfg)
12428 init_flow (cfun);
12429 split_all_insns_noflow ();
12430 }
12431
12432 sh_reorg ();
12433 shorten_branches (insns);
12434 final_start_function (insns, file, 1);
12435 final (insns, file, 1);
12436 final_end_function ();
12437
12438 reload_completed = 0;
12439 epilogue_completed = 0;
12440 }
12441
12442 rtx
12443 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12444 {
12445 rtx sym;
12446
12447 /* If this is not an ordinary function, the name usually comes from a
12448 string literal or an sprintf buffer. Make sure we use the same
12449 string consistently, so that cse will be able to unify address loads. */
12450 if (kind != FUNCTION_ORDINARY)
12451 name = IDENTIFIER_POINTER (get_identifier (name));
12452 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12453 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12454 if (flag_pic)
12455 switch (kind)
12456 {
12457 case FUNCTION_ORDINARY:
12458 break;
12459 case SFUNC_GOT:
12460 {
12461 rtx reg = target ? target : gen_reg_rtx (Pmode);
12462
12463 emit_insn (gen_symGOT2reg (reg, sym));
12464 sym = reg;
12465 break;
12466 }
12467 case SFUNC_STATIC:
12468 {
12469 /* ??? To allow cse to work, we use GOTOFF relocations.
12470 We could add combiner patterns to transform this into
12471 straight pc-relative calls with sym2PIC / bsrf when
12472 label load and function call are still 1:1 and in the
12473 same basic block during combine. */
12474 rtx reg = target ? target : gen_reg_rtx (Pmode);
12475
12476 emit_insn (gen_symGOTOFF2reg (reg, sym));
12477 sym = reg;
12478 break;
12479 }
12480 }
12481 if (target && sym != target)
12482 {
12483 emit_move_insn (target, sym);
12484 return target;
12485 }
12486 return sym;
12487 }
12488
12489 /* Find the number of a general purpose register in S. */
12490 static int
12491 scavenge_reg (HARD_REG_SET *s)
12492 {
12493 int r;
12494 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12495 if (TEST_HARD_REG_BIT (*s, r))
12496 return r;
12497 return -1;
12498 }
12499
12500 rtx
12501 sh_get_pr_initial_val (void)
12502 {
12503 rtx val;
12504
12505 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12506 PR register on SHcompact, because it might be clobbered by the prologue.
12507 We check first if that is known to be the case. */
12508 if (TARGET_SHCOMPACT
12509 && ((crtl->args.info.call_cookie
12510 & ~ CALL_COOKIE_RET_TRAMP (1))
12511 || crtl->saves_all_registers))
12512 return gen_frame_mem (SImode, return_address_pointer_rtx);
12513
12514 /* If we haven't finished rtl generation, there might be a nonlocal label
12515 that we haven't seen yet.
12516 ??? get_hard_reg_initial_val fails if it is called after register
12517 allocation has started, unless it has been called before for the
12518 same register. And even then, we end in trouble if we didn't use
12519 the register in the same basic block before. So call
12520 get_hard_reg_initial_val now and wrap it in an unspec if we might
12521 need to replace it. */
12522 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12523 combine can put the pseudo returned by get_hard_reg_initial_val into
12524 instructions that need a general purpose registers, which will fail to
12525 be recognized when the pseudo becomes allocated to PR. */
12526 val
12527 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12528 if (TARGET_SH1)
12529 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12530 return val;
12531 }
12532
12533 bool
12534 sh_expand_t_scc (rtx operands[])
12535 {
12536 enum rtx_code code = GET_CODE (operands[1]);
12537 rtx target = operands[0];
12538 rtx op0 = operands[2];
12539 rtx op1 = operands[3];
12540 rtx result = target;
12541 HOST_WIDE_INT val;
12542
12543 if (!REG_P (op0) || REGNO (op0) != T_REG
12544 || !CONST_INT_P (op1))
12545 return false;
12546 if (!REG_P (result))
12547 result = gen_reg_rtx (SImode);
12548 val = INTVAL (op1);
12549 if ((code == EQ && val == 1) || (code == NE && val == 0))
12550 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12551 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12552 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12553 else if (code == EQ || code == NE)
12554 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12555 else
12556 return false;
12557 if (result != target)
12558 emit_move_insn (target, result);
12559 return true;
12560 }
12561
12562 /* INSN is an sfunc; return the rtx that describes the address used. */
12563 static rtx
12564 extract_sfunc_addr (rtx insn)
12565 {
12566 rtx pattern, part = NULL_RTX;
12567 int len, i;
12568
12569 pattern = PATTERN (insn);
12570 len = XVECLEN (pattern, 0);
12571 for (i = 0; i < len; i++)
12572 {
12573 part = XVECEXP (pattern, 0, i);
12574 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12575 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12576 return XEXP (part, 0);
12577 }
12578 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12579 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12580 }
12581
12582 /* Verify that the register in use_sfunc_addr still agrees with the address
12583 used in the sfunc. This prevents fill_slots_from_thread from changing
12584 use_sfunc_addr.
12585 INSN is the use_sfunc_addr instruction, and REG is the register it
12586 guards. */
12587 bool
12588 check_use_sfunc_addr (rtx insn, rtx reg)
12589 {
12590 /* Search for the sfunc. It should really come right after INSN. */
12591 while ((insn = NEXT_INSN (insn)))
12592 {
12593 if (LABEL_P (insn) || JUMP_P (insn))
12594 break;
12595 if (! INSN_P (insn))
12596 continue;
12597
12598 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12599 insn = XVECEXP (PATTERN (insn), 0, 0);
12600 if (GET_CODE (PATTERN (insn)) != PARALLEL
12601 || get_attr_type (insn) != TYPE_SFUNC)
12602 continue;
12603 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12604 }
12605 gcc_unreachable ();
12606 }
12607
12608 /* This function returns a constant rtx that represents 2**15 / pi in
12609 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12610 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12611 static GTY(()) rtx sh_fsca_sf2int_rtx;
12612
12613 rtx
12614 sh_fsca_sf2int (void)
12615 {
12616 if (! sh_fsca_sf2int_rtx)
12617 {
12618 REAL_VALUE_TYPE rv;
12619
12620 real_from_string (&rv, "10430.378350470453");
12621 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12622 }
12623
12624 return sh_fsca_sf2int_rtx;
12625 }
12626
12627 /* This function returns a constant rtx that represents pi / 2**15 in
12628 SFmode. It's used to scale SFmode angles, in radians, to a
12629 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12630 maps to 0x10000. */
12631 static GTY(()) rtx sh_fsca_int2sf_rtx;
12632
12633 rtx
12634 sh_fsca_int2sf (void)
12635 {
12636 if (! sh_fsca_int2sf_rtx)
12637 {
12638 REAL_VALUE_TYPE rv;
12639
12640 real_from_string (&rv, "9.587379924285257e-5");
12641 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12642 }
12643
12644 return sh_fsca_int2sf_rtx;
12645 }
12646
12647 /* Initialize the CUMULATIVE_ARGS structure. */
12648 void
12649 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12650 tree fntype,
12651 rtx libname ATTRIBUTE_UNUSED,
12652 tree fndecl,
12653 signed int n_named_args,
12654 enum machine_mode mode)
12655 {
12656 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12657 pcum->free_single_fp_reg = 0;
12658 pcum->stack_regs = 0;
12659 pcum->byref_regs = 0;
12660 pcum->byref = 0;
12661 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12662
12663 /* XXX - Should we check TARGET_HITACHI here ??? */
12664 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12665
12666 if (fntype)
12667 {
12668 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12669 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12670 pcum->prototype_p = prototype_p (fntype);
12671 pcum->arg_count [(int) SH_ARG_INT]
12672 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12673
12674 pcum->call_cookie
12675 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12676 && pcum->arg_count [(int) SH_ARG_INT] == 0
12677 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12678 ? int_size_in_bytes (TREE_TYPE (fntype))
12679 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12680 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12681 == FIRST_RET_REG));
12682 }
12683 else
12684 {
12685 pcum->arg_count [(int) SH_ARG_INT] = 0;
12686 pcum->prototype_p = FALSE;
12687 if (mode != VOIDmode)
12688 {
12689 pcum->call_cookie =
12690 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12691 && GET_MODE_SIZE (mode) > 4
12692 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12693
12694 /* If the default ABI is the Renesas ABI then all library
12695 calls must assume that the library will be using the
12696 Renesas ABI. So if the function would return its result
12697 in memory then we must force the address of this memory
12698 block onto the stack. Ideally we would like to call
12699 targetm.calls.return_in_memory() here but we do not have
12700 the TYPE or the FNDECL available so we synthesize the
12701 contents of that function as best we can. */
12702 pcum->force_mem =
12703 (TARGET_DEFAULT & MASK_HITACHI)
12704 && (mode == BLKmode
12705 || (GET_MODE_SIZE (mode) > 4
12706 && !(mode == DFmode
12707 && TARGET_FPU_DOUBLE)));
12708 }
12709 else
12710 {
12711 pcum->call_cookie = 0;
12712 pcum->force_mem = FALSE;
12713 }
12714 }
12715 }
12716
12717 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12718 not enter into CONST_DOUBLE for the replace.
12719
12720 Note that copying is not done so X must not be shared unless all copies
12721 are to be modified.
12722
12723 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12724 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12725 replacements[n*2+1] - and that we take mode changes into account.
12726
12727 If a replacement is ambiguous, return NULL_RTX.
12728
12729 If MODIFY is zero, don't modify any rtl in place,
12730 just return zero or nonzero for failure / success. */
12731 rtx
12732 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12733 {
12734 int i, j;
12735 const char *fmt;
12736
12737 /* The following prevents loops occurrence when we change MEM in
12738 CONST_DOUBLE onto the same CONST_DOUBLE. */
12739 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12740 return x;
12741
12742 for (i = n_replacements - 1; i >= 0 ; i--)
12743 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12744 return replacements[i*2+1];
12745
12746 /* Allow this function to make replacements in EXPR_LISTs. */
12747 if (x == NULL_RTX)
12748 return NULL_RTX;
12749
12750 if (GET_CODE (x) == SUBREG)
12751 {
12752 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12753 n_replacements, modify);
12754
12755 if (CONST_INT_P (new_rtx))
12756 {
12757 x = simplify_subreg (GET_MODE (x), new_rtx,
12758 GET_MODE (SUBREG_REG (x)),
12759 SUBREG_BYTE (x));
12760 if (! x)
12761 abort ();
12762 }
12763 else if (modify)
12764 SUBREG_REG (x) = new_rtx;
12765
12766 return x;
12767 }
12768 else if (REG_P (x))
12769 {
12770 unsigned regno = REGNO (x);
12771 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12772 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12773 rtx result = NULL_RTX;
12774
12775 for (i = n_replacements - 1; i >= 0; i--)
12776 {
12777 rtx from = replacements[i*2];
12778 rtx to = replacements[i*2+1];
12779 unsigned from_regno, from_nregs, to_regno, new_regno;
12780
12781 if (!REG_P (from))
12782 continue;
12783 from_regno = REGNO (from);
12784 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12785 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12786 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12787 {
12788 if (regno < from_regno
12789 || regno + nregs > from_regno + nregs
12790 || !REG_P (to)
12791 || result)
12792 return NULL_RTX;
12793 to_regno = REGNO (to);
12794 if (to_regno < FIRST_PSEUDO_REGISTER)
12795 {
12796 new_regno = regno + to_regno - from_regno;
12797 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12798 != nregs)
12799 return NULL_RTX;
12800 result = gen_rtx_REG (GET_MODE (x), new_regno);
12801 }
12802 else if (GET_MODE (x) <= GET_MODE (to))
12803 result = gen_lowpart_common (GET_MODE (x), to);
12804 else
12805 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12806 }
12807 }
12808 return result ? result : x;
12809 }
12810 else if (GET_CODE (x) == ZERO_EXTEND)
12811 {
12812 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12813 n_replacements, modify);
12814
12815 if (CONST_INT_P (new_rtx))
12816 {
12817 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12818 new_rtx, GET_MODE (XEXP (x, 0)));
12819 if (! x)
12820 abort ();
12821 }
12822 else if (modify)
12823 XEXP (x, 0) = new_rtx;
12824
12825 return x;
12826 }
12827
12828 fmt = GET_RTX_FORMAT (GET_CODE (x));
12829 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12830 {
12831 rtx new_rtx;
12832
12833 if (fmt[i] == 'e')
12834 {
12835 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12836 n_replacements, modify);
12837 if (!new_rtx)
12838 return NULL_RTX;
12839 if (modify)
12840 XEXP (x, i) = new_rtx;
12841 }
12842 else if (fmt[i] == 'E')
12843 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12844 {
12845 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12846 n_replacements, modify);
12847 if (!new_rtx)
12848 return NULL_RTX;
12849 if (modify)
12850 XVECEXP (x, i, j) = new_rtx;
12851 }
12852 }
12853
12854 return x;
12855 }
12856
12857 rtx
12858 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12859 {
12860 enum rtx_code code = TRUNCATE;
12861
12862 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12863 {
12864 rtx inner = XEXP (x, 0);
12865 enum machine_mode inner_mode = GET_MODE (inner);
12866
12867 if (inner_mode == mode)
12868 return inner;
12869 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12870 x = inner;
12871 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12872 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12873 {
12874 code = GET_CODE (x);
12875 x = inner;
12876 }
12877 }
12878 return gen_rtx_fmt_e (code, mode, x);
12879 }
12880
12881 /* Called via for_each_rtx after reload, to clean up truncates of
12882 registers that span multiple actual hard registers. */
12883 int
12884 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12885 {
12886 rtx x = *p, reg;
12887
12888 if (GET_CODE (x) != TRUNCATE)
12889 return 0;
12890 reg = XEXP (x, 0);
12891 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12892 {
12893 enum machine_mode reg_mode = GET_MODE (reg);
12894 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12895 subreg_lowpart_offset (DImode, reg_mode));
12896 *(int*) n_changes += 1;
12897 return -1;
12898 }
12899 return 0;
12900 }
12901
12902 /* Load and store depend on the highpart of the address. However,
12903 set_attr_alternative does not give well-defined results before reload,
12904 so we must look at the rtl ourselves to see if any of the feeding
12905 registers is used in a memref.
12906
12907 Called by sh_contains_memref_p via for_each_rtx. */
12908 static int
12909 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12910 {
12911 return (MEM_P (*loc));
12912 }
12913
12914 /* Return true iff INSN contains a MEM. */
12915 bool
12916 sh_contains_memref_p (rtx insn)
12917 {
12918 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12919 }
12920
12921 /* Return true iff INSN loads a banked register. */
12922 bool
12923 sh_loads_bankedreg_p (rtx insn)
12924 {
12925 if (GET_CODE (PATTERN (insn)) == SET)
12926 {
12927 rtx op = SET_DEST (PATTERN(insn));
12928 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12929 return true;
12930 }
12931
12932 return false;
12933 }
12934
12935 /* FNADDR is the MEM expression from a call expander. Return an address
12936 to use in an SHmedia insn pattern. */
12937 rtx
12938 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12939 {
12940 int is_sym;
12941
12942 fnaddr = XEXP (fnaddr, 0);
12943 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12944 if (flag_pic && is_sym)
12945 {
12946 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12947 {
12948 rtx reg = gen_reg_rtx (Pmode);
12949
12950 /* We must not use GOTPLT for sibcalls, because PIC_REG
12951 must be restored before the PLT code gets to run. */
12952 if (is_sibcall)
12953 emit_insn (gen_symGOT2reg (reg, fnaddr));
12954 else
12955 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12956 fnaddr = reg;
12957 }
12958 else
12959 {
12960 fnaddr = gen_sym2PIC (fnaddr);
12961 PUT_MODE (fnaddr, Pmode);
12962 }
12963 }
12964 /* If ptabs might trap, make this visible to the rest of the compiler.
12965 We generally assume that symbols pertain to valid locations, but
12966 it is possible to generate invalid symbols with asm or linker tricks.
12967 In a list of functions where each returns its successor, an invalid
12968 symbol might denote an empty list. */
12969 if (!TARGET_PT_FIXED
12970 && (!is_sym || TARGET_INVALID_SYMBOLS)
12971 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12972 {
12973 rtx tr = gen_reg_rtx (PDImode);
12974
12975 emit_insn (gen_ptabs (tr, fnaddr));
12976 fnaddr = tr;
12977 }
12978 else if (! target_reg_operand (fnaddr, Pmode))
12979 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12980 return fnaddr;
12981 }
12982
12983 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12984 static reg_class_t
12985 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12986 {
12987 if (rclass == NO_REGS
12988 && TARGET_SHMEDIA
12989 && (CONST_DOUBLE_P (x)
12990 || GET_CODE (x) == SYMBOL_REF
12991 || PIC_ADDR_P (x)))
12992 return GENERAL_REGS;
12993
12994 return rclass;
12995 }
12996
12997 /* Implement TARGET_SECONDARY_RELOAD. */
12998 static reg_class_t
12999 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13000 enum machine_mode mode, secondary_reload_info *sri)
13001 {
13002 enum reg_class rclass = (enum reg_class) rclass_i;
13003
13004 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13005 && REG_P (XEXP (XEXP (x, 0), 0))
13006 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13007 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13008
13009 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13010 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13011
13012 if (REG_P (x) && REGNO (x) == GBR_REG)
13013 return NO_REGS;
13014
13015 if (in_p)
13016 {
13017 if (REGCLASS_HAS_FP_REG (rclass)
13018 && ! TARGET_SHMEDIA
13019 && immediate_operand ((x), mode)
13020 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13021 && mode == SFmode && fldi_ok ()))
13022 switch (mode)
13023 {
13024 case SFmode:
13025 sri->icode = CODE_FOR_reload_insf__frn;
13026 return NO_REGS;
13027 case DFmode:
13028 sri->icode = CODE_FOR_reload_indf__frn;
13029 return NO_REGS;
13030 case SImode:
13031 /* ??? If we knew that we are in the appropriate mode -
13032 single precision - we could use a reload pattern directly. */
13033 return FPUL_REGS;
13034 default:
13035 abort ();
13036 }
13037 if (rclass == FPUL_REGS
13038 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13039 || REGNO (x) == T_REG))
13040 || GET_CODE (x) == PLUS))
13041 return GENERAL_REGS;
13042 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13043 {
13044 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13045 return GENERAL_REGS;
13046 else if (mode == SFmode)
13047 return FP_REGS;
13048 sri->icode = CODE_FOR_reload_insi__i_fpul;
13049 return NO_REGS;
13050 }
13051 if (rclass == FPSCR_REGS
13052 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13053 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13054 return GENERAL_REGS;
13055 if (REGCLASS_HAS_FP_REG (rclass)
13056 && TARGET_SHMEDIA
13057 && immediate_operand (x, mode)
13058 && x != CONST0_RTX (GET_MODE (x))
13059 && GET_MODE (x) != V4SFmode)
13060 return GENERAL_REGS;
13061 if ((mode == QImode || mode == HImode)
13062 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13063 {
13064 sri->icode = ((mode == QImode)
13065 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13066 return NO_REGS;
13067 }
13068 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13069 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13070 return TARGET_REGS;
13071 } /* end of input-only processing. */
13072
13073 if (((REGCLASS_HAS_FP_REG (rclass)
13074 && (REG_P (x)
13075 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13076 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13077 && TARGET_FMOVD))))
13078 || (REGCLASS_HAS_GENERAL_REG (rclass)
13079 && REG_P (x)
13080 && FP_REGISTER_P (REGNO (x))))
13081 && ! TARGET_SHMEDIA
13082 && (mode == SFmode || mode == SImode))
13083 return FPUL_REGS;
13084 if ((rclass == FPUL_REGS
13085 || (REGCLASS_HAS_FP_REG (rclass)
13086 && ! TARGET_SHMEDIA && mode == SImode))
13087 && (MEM_P (x)
13088 || (REG_P (x)
13089 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13090 || REGNO (x) == T_REG
13091 || system_reg_operand (x, VOIDmode)))))
13092 {
13093 if (rclass == FPUL_REGS)
13094 return GENERAL_REGS;
13095 return FPUL_REGS;
13096 }
13097 if ((rclass == TARGET_REGS
13098 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13099 && !satisfies_constraint_Csy (x)
13100 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13101 return GENERAL_REGS;
13102 if ((rclass == MAC_REGS || rclass == PR_REGS)
13103 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13104 && rclass != REGNO_REG_CLASS (REGNO (x)))
13105 return GENERAL_REGS;
13106 if (rclass != GENERAL_REGS && REG_P (x)
13107 && TARGET_REGISTER_P (REGNO (x)))
13108 return GENERAL_REGS;
13109
13110 /* If here fall back to loading FPUL register through general registers.
13111 This case can happen when movsi_ie insn is picked initially to
13112 load/store the FPUL register from/to another register, and then the
13113 other register is allocated on the stack. */
13114 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13115 return GENERAL_REGS;
13116
13117 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13118 the other operand.
13119 On SH2A could also just leave it alone here, which would result in a
13120 4 byte move insn being generated instead. However, for this to work
13121 the insns must have the appropriate alternatives. */
13122 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13123 && satisfies_constraint_Sdd (x)
13124 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
13125 return R0_REGS;
13126
13127 /* When reload is trying to address a QImode or HImode subreg on the stack,
13128 force any subreg byte into R0_REGS, as this is going to become a
13129 displacement address.
13130 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13131 is on the stack, the memref to it might already require a displacement
13132 and that has to be added to the final address. At this point we don't
13133 know the cumulative displacement so we assume the worst case. */
13134 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13135 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13136 return R0_REGS;
13137
13138 return NO_REGS;
13139 }
13140
13141 static void
13142 sh_conditional_register_usage (void)
13143 {
13144 int regno;
13145 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13146 if (! VALID_REGISTER_P (regno))
13147 fixed_regs[regno] = call_used_regs[regno] = 1;
13148 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13149 if (TARGET_SH5)
13150 {
13151 call_used_regs[FIRST_GENERAL_REG + 8]
13152 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13153 call_really_used_regs[FIRST_GENERAL_REG + 8]
13154 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13155 }
13156 if (TARGET_SHMEDIA)
13157 {
13158 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13159 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13160 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13161 }
13162 if (flag_pic)
13163 {
13164 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13165 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13166 }
13167 /* Renesas saves and restores mac registers on call. */
13168 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13169 {
13170 call_really_used_regs[MACH_REG] = 0;
13171 call_really_used_regs[MACL_REG] = 0;
13172 }
13173
13174 if (TARGET_SHMEDIA)
13175 {
13176 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13177 if (! fixed_regs[regno] && call_really_used_regs[regno])
13178 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13179 }
13180 else
13181 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13182 if (! fixed_regs[regno] && call_really_used_regs[regno])
13183 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13184 }
13185
13186 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13187
13188 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13189 static bool
13190 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13191 {
13192 return (TARGET_SHMEDIA
13193 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13194 || x == CONST0_RTX (mode)
13195 || !TARGET_SHMEDIA_FPU
13196 || TARGET_SHMEDIA64)
13197 : (GET_CODE (x) != CONST_DOUBLE
13198 || mode == DFmode || mode == SFmode
13199 || mode == DImode || GET_MODE (x) == VOIDmode));
13200 }
13201
13202 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13203
13204 static void
13205 sh_init_sync_libfuncs (void)
13206 {
13207 init_sync_libfuncs (UNITS_PER_WORD);
13208 }
13209
13210 /* Return true if it is appropriate to emit `ret' instructions in the
13211 body of a function. */
13212 bool
13213 sh_can_use_simple_return_p (void)
13214 {
13215 HARD_REG_SET live_regs_mask;
13216 int d;
13217
13218 /* Some targets require special return insns. */
13219 if (TARGET_SHMEDIA
13220 || (TARGET_SHCOMPACT
13221 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13222 return false;
13223
13224 if (! reload_completed || frame_pointer_needed)
13225 return false;
13226
13227 /* Moving prologue around does't reduce the size. */
13228 if (optimize_function_for_size_p (cfun))
13229 return false;
13230
13231 /* Finally, allow for pr save. */
13232 d = calc_live_regs (&live_regs_mask);
13233
13234 if (rounded_frame_size (d) > 4)
13235 return false;
13236
13237 return true;
13238 }
13239
13240 /*------------------------------------------------------------------------------
13241 Address mode optimization support code
13242 */
13243
13244 typedef HOST_WIDE_INT disp_t;
13245 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13246 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13247 static const disp_t INVALID_DISP = MAX_DISP;
13248
13249 /* A memory reference which is described by a base register and a
13250 displacement. */
13251 class base_reg_disp
13252 {
13253 public:
13254 base_reg_disp (rtx br, disp_t d);
13255
13256 bool is_reg (void) const;
13257 bool is_disp (void) const;
13258 rtx reg (void) const;
13259 disp_t disp (void) const;
13260
13261 private:
13262 rtx reg_;
13263 disp_t disp_;
13264 };
13265
13266 inline
13267 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13268 : reg_ (br), disp_ (d)
13269 {
13270 }
13271
13272 inline bool
13273 base_reg_disp::is_reg (void) const
13274 {
13275 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13276 }
13277
13278 inline bool
13279 base_reg_disp::is_disp (void) const
13280 {
13281 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13282 }
13283
13284 inline rtx
13285 base_reg_disp::reg (void) const
13286 {
13287 return reg_;
13288 }
13289
13290 inline disp_t
13291 base_reg_disp::disp (void) const
13292 {
13293 return disp_;
13294 }
13295
13296 /* Find the base register and calculate the displacement for a given
13297 address rtx 'x'.
13298 This is done by walking the insn list backwards and following SET insns
13299 that set the value of the specified reg 'x'. */
13300 static base_reg_disp
13301 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13302 {
13303 if (REG_P (x))
13304 {
13305 if (REGNO (x) == GBR_REG)
13306 return base_reg_disp (x, disp);
13307
13308 /* We've reached a hard-reg. This is probably the point where
13309 function args are copied to pseudos. Do not go any further and
13310 stick to the pseudo. If the original mem addr was in a hard reg
13311 from the beginning, it will become the base reg. */
13312 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13313 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13314
13315 /* Try to find the previous insn that sets the reg. */
13316 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13317 i = prev_nonnote_insn (i))
13318 {
13319 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13320 && CALL_P (i))
13321 break;
13322
13323 if (!NONJUMP_INSN_P (i))
13324 continue;
13325
13326 rtx p = PATTERN (i);
13327 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13328 && REGNO (XEXP (p, 0)) == REGNO (x))
13329 {
13330 /* If the recursion can't find out any more details about the
13331 source of the set, then this reg becomes our new base reg. */
13332 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13333 }
13334 }
13335
13336 /* When here, no previous insn was found that sets the reg.
13337 The input reg is already the base reg. */
13338 return base_reg_disp (x, disp);
13339 }
13340
13341 else if (GET_CODE (x) == PLUS)
13342 {
13343 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13344 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13345
13346 /* Either left or right val must be a reg.
13347 We don't handle the case of 'reg + reg' here. */
13348 if (left_val.is_reg () && right_val.is_disp ())
13349 return base_reg_disp (left_val.reg (), left_val.disp ()
13350 + right_val.disp () + disp);
13351 else if (right_val.is_reg () && left_val.is_disp ())
13352 return base_reg_disp (right_val.reg (), right_val.disp ()
13353 + left_val.disp () + disp);
13354 else
13355 return base_reg_disp (base_reg, disp);
13356 }
13357
13358 else if (CONST_INT_P (x))
13359 return base_reg_disp (NULL, disp + INTVAL (x));
13360
13361 /* Didn't find anything useful. */
13362 return base_reg_disp (base_reg, disp);
13363 }
13364
13365 /* Given an insn and a memory operand, try to find an equivalent GBR
13366 based memory address and return the corresponding new memory address.
13367 Return NULL_RTX if not found. */
13368 rtx
13369 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13370 {
13371 if (!MEM_P (mem))
13372 return NULL_RTX;
13373
13374 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13375 if (side_effects_p (XEXP (mem, 0)))
13376 return NULL_RTX;
13377
13378 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13379
13380 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13381 {
13382 rtx disp = GEN_INT (gbr_disp.disp ());
13383 if (gbr_displacement (disp, GET_MODE (mem)))
13384 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13385 }
13386
13387 return NULL_RTX;
13388 }
13389
13390 /*------------------------------------------------------------------------------
13391 Manual insn combine support code.
13392 */
13393
13394 /* Given a reg rtx and a start insn, try to find the insn that sets the
13395 specified reg by using the specified insn stepping function, such as
13396 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13397 of the reg set. */
13398 set_of_reg
13399 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13400 {
13401 set_of_reg result;
13402 result.insn = insn;
13403 result.set_rtx = NULL_RTX;
13404 result.set_src = NULL_RTX;
13405
13406 if (!REG_P (reg) || insn == NULL_RTX)
13407 return result;
13408
13409 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13410 result.insn = stepfunc (result.insn))
13411 {
13412 if (BARRIER_P (result.insn))
13413 return result;
13414 if (!NONJUMP_INSN_P (result.insn))
13415 continue;
13416 if (reg_set_p (reg, result.insn))
13417 {
13418 result.set_rtx = set_of (reg, result.insn);
13419
13420 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13421 return result;
13422
13423 result.set_src = XEXP (result.set_rtx, 1);
13424 return result;
13425 }
13426 }
13427
13428 return result;
13429 }
13430
13431 /* Given an op rtx and an insn, try to find out whether the result of the
13432 specified op consists only of logical operations on T bit stores. */
13433 bool
13434 sh_is_logical_t_store_expr (rtx op, rtx insn)
13435 {
13436 if (!logical_operator (op, SImode))
13437 return false;
13438
13439 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13440 int op_is_t_count = 0;
13441
13442 for (int i = 0; i < 2; ++i)
13443 {
13444 if (t_reg_operand (ops[i], VOIDmode)
13445 || negt_reg_operand (ops[i], VOIDmode))
13446 op_is_t_count++;
13447
13448 else
13449 {
13450 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13451 prev_nonnote_insn_bb);
13452 if (op_set.set_src == NULL_RTX)
13453 continue;
13454
13455 if (t_reg_operand (op_set.set_src, VOIDmode)
13456 || negt_reg_operand (op_set.set_src, VOIDmode)
13457 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13458 op_is_t_count++;
13459 }
13460 }
13461
13462 return op_is_t_count == 2;
13463 }
13464
13465 /* Given the operand that is extended in a sign/zero extend insn, and the
13466 insn, try to figure out whether the sign/zero extension can be replaced
13467 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13468 NULL_RTX otherwise. */
13469 rtx
13470 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13471 {
13472 if (REG_P (extended_op))
13473 extended_op = extended_op;
13474 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13475 extended_op = SUBREG_REG (extended_op);
13476 else
13477 return NULL_RTX;
13478
13479 /* Reg moves must be of the same mode. */
13480 if (GET_MODE (extended_op) != SImode)
13481 return NULL_RTX;
13482
13483 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13484 if (s.set_src == NULL_RTX)
13485 return NULL_RTX;
13486
13487 if (t_reg_operand (s.set_src, VOIDmode)
13488 || negt_reg_operand (s.set_src, VOIDmode))
13489 return extended_op;
13490
13491 /* If the zero extended reg was formed by a logical operation, check the
13492 operands of the logical operation. If both originated from T bit
13493 stores the zero extension can be eliminated. */
13494 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13495 return extended_op;
13496
13497 return NULL_RTX;
13498 }
13499
13500 #include "gt-sh.h"