]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
Merge in trunk.
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2013 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "reload.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "diagnostic-core.h"
39 #include "recog.h"
40 #include "dwarf2.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44 #include "langhooks.h"
45 #include "basic-block.h"
46 #include "df.h"
47 #include "intl.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "ggc.h"
51 #include "gimple.h"
52 #include "cfgloop.h"
53 #include "alloc-pool.h"
54 #include "tm-constrs.h"
55 #include "opts.h"
56 #include "tree-pass.h"
57 #include "pass_manager.h"
58 #include "context.h"
59
60 #include <sstream>
61 #include <vector>
62 #include <algorithm>
63
64 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
65
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
68 && ((HOST_WIDE_INT)(VALUE)) <= 511)
69
70 #define CONST_OK_FOR_ADD(size) \
71 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
72 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
73 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
74 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
75
76 /* Used to simplify the logic below. Find the attributes wherever
77 they may be. */
78 #define SH_ATTRIBUTES(decl) \
79 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
80 : DECL_ATTRIBUTES (decl) \
81 ? (DECL_ATTRIBUTES (decl)) \
82 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
83
84 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
85 int current_function_interrupt;
86
87 tree sh_deferred_function_attributes;
88 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
89
90 /* Global variables for machine-dependent things. */
91
92 /* Which cpu are we scheduling for. */
93 enum processor_type sh_cpu;
94
95 /* Definitions used in ready queue reordering for first scheduling pass. */
96
97 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
98 static short *regmode_weight[2];
99
100 /* Total SFmode and SImode weights of scheduled insns. */
101 static int curr_regmode_pressure[2];
102
103 /* Number of r0 life regions. */
104 static int r0_life_regions;
105
106 /* If true, skip cycles for Q -> R movement. */
107 static int skip_cycles = 0;
108
109 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
110 and returned from sh_reorder2. */
111 static short cached_can_issue_more;
112
113 /* Unique number for UNSPEC_BBR pattern. */
114 static unsigned int unspec_bbr_uid = 1;
115
116 /* Provides the class number of the smallest class containing
117 reg number. */
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 {
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
159 };
160
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167
168 int assembler_dialect;
169
170 static bool shmedia_space_reserved_for_target_registers;
171
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static bool broken_move (rtx);
178 static bool mova_p (rtx);
179 static rtx find_barrier (int, rtx, rtx);
180 static bool noncall_uses_reg (rtx, rtx, rtx *);
181 static rtx gen_block_redirect (rtx, int, int);
182 static void sh_reorg (void);
183 static void sh_option_override (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static bool sh_frame_pointer_required (void);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
194 int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 tree, int, bool *);
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
206 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void sh_insert_attributes (tree, tree *);
208 static const char *sh_check_pch_target_flags (int);
209 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_issue_rate (void);
212 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
213 static short find_set_regmode_weight (rtx, enum machine_mode);
214 static short find_insn_regmode_weight (rtx, enum machine_mode);
215 static void find_regmode_weight (basic_block, enum machine_mode);
216 static int find_r0_life_regions (basic_block);
217 static void sh_md_init_global (FILE *, int, int);
218 static void sh_md_finish_global (FILE *, int);
219 static int rank_for_reorder (const void *, const void *);
220 static void swap_reorder (rtx *, int);
221 static void ready_reorder (rtx *, int);
222 static bool high_pressure (enum machine_mode);
223 static int sh_reorder (FILE *, int, rtx *, int *, int);
224 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
225 static void sh_md_init (FILE *, int, int);
226 static int sh_variable_issue (FILE *, int, rtx, int);
227
228 static bool sh_function_ok_for_sibcall (tree, tree);
229
230 static bool sh_cannot_modify_jumps_p (void);
231 static reg_class_t sh_target_reg_class (void);
232 static bool sh_optimize_target_register_callee_saved (bool);
233 static bool sh_ms_bitfield_layout_p (const_tree);
234
235 static void sh_init_builtins (void);
236 static tree sh_builtin_decl (unsigned, bool);
237 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
238 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
239 HOST_WIDE_INT, tree);
240 static void sh_file_start (void);
241 static bool flow_dependent_p (rtx, rtx);
242 static void flow_dependent_p_1 (rtx, const_rtx, void *);
243 static int shiftcosts (rtx);
244 static int and_xor_ior_costs (rtx, int);
245 static int addsubcosts (rtx);
246 static int multcosts (rtx);
247 static bool unspec_caller_rtx_p (rtx);
248 static bool sh_cannot_copy_insn_p (rtx);
249 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
250 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
251 static int sh_pr_n_sets (void);
252 static rtx sh_allocate_initial_value (rtx);
253 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
254 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
255 enum machine_mode,
256 struct secondary_reload_info *);
257 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
258 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
259 static rtx sh_delegitimize_address (rtx);
260 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
261 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
262 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
263 static int scavenge_reg (HARD_REG_SET *s);
264 struct save_schedule_s;
265 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
266 struct save_schedule_s *, int);
267
268 static rtx sh_struct_value_rtx (tree, int);
269 static rtx sh_function_value (const_tree, const_tree, bool);
270 static bool sh_function_value_regno_p (const unsigned int);
271 static rtx sh_libcall_value (enum machine_mode, const_rtx);
272 static bool sh_return_in_memory (const_tree, const_tree);
273 static rtx sh_builtin_saveregs (void);
274 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
275 tree, int *, int);
276 static bool sh_strict_argument_naming (cumulative_args_t);
277 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
278 static tree sh_build_builtin_va_list (void);
279 static void sh_va_start (tree, rtx);
280 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
281 static bool sh_promote_prototypes (const_tree);
282 static enum machine_mode sh_promote_function_mode (const_tree type,
283 enum machine_mode,
284 int *punsignedp,
285 const_tree funtype,
286 int for_return);
287 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
288 const_tree, bool);
289 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
290 const_tree, bool);
291 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
292 tree, bool);
293 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
294 const_tree, bool);
295 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
296 const_tree, bool);
297 static bool sh_scalar_mode_supported_p (enum machine_mode);
298 static int sh_dwarf_calling_convention (const_tree);
299 static void sh_encode_section_info (tree, rtx, int);
300 static bool sh2a_function_vector_p (tree);
301 static void sh_trampoline_init (rtx, tree, rtx);
302 static rtx sh_trampoline_adjust_address (rtx);
303 static void sh_conditional_register_usage (void);
304 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
305 static int mov_insn_size (enum machine_mode, bool);
306 static int max_mov_insn_displacement (enum machine_mode, bool);
307 static int mov_insn_alignment_mask (enum machine_mode, bool);
308 static HOST_WIDE_INT disp_addr_displacement (rtx);
309 static bool sequence_insn_p (rtx);
310 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
311 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
312 enum machine_mode, bool);
313 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
314
315 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
316 \f
317 static const struct attribute_spec sh_attribute_table[] =
318 {
319 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
320 affects_type_identity } */
321 { "interrupt_handler", 0, 0, true, false, false,
322 sh_handle_interrupt_handler_attribute, false },
323 { "sp_switch", 1, 1, true, false, false,
324 sh_handle_sp_switch_attribute, false },
325 { "trap_exit", 1, 1, true, false, false,
326 sh_handle_trap_exit_attribute, false },
327 { "renesas", 0, 0, false, true, false,
328 sh_handle_renesas_attribute, false },
329 { "trapa_handler", 0, 0, true, false, false,
330 sh_handle_interrupt_handler_attribute, false },
331 { "nosave_low_regs", 0, 0, true, false, false,
332 sh_handle_interrupt_handler_attribute, false },
333 { "resbank", 0, 0, true, false, false,
334 sh_handle_resbank_handler_attribute, false },
335 { "function_vector", 1, 1, true, false, false,
336 sh2a_handle_function_vector_handler_attribute, false },
337 { NULL, 0, 0, false, false, false, NULL, false }
338 };
339 \f
340 /* Initialize the GCC target structure. */
341 #undef TARGET_ATTRIBUTE_TABLE
342 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
343
344 /* The next two are used for debug info when compiling with -gdwarf. */
345 #undef TARGET_ASM_UNALIGNED_HI_OP
346 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
347 #undef TARGET_ASM_UNALIGNED_SI_OP
348 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
349
350 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
351 #undef TARGET_ASM_UNALIGNED_DI_OP
352 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
353 #undef TARGET_ASM_ALIGNED_DI_OP
354 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
355
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE sh_option_override
358
359 #undef TARGET_PRINT_OPERAND
360 #define TARGET_PRINT_OPERAND sh_print_operand
361 #undef TARGET_PRINT_OPERAND_ADDRESS
362 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
363 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
364 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
367
368 #undef TARGET_ASM_FUNCTION_EPILOGUE
369 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
370
371 #undef TARGET_ASM_OUTPUT_MI_THUNK
372 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
373
374 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
375 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
376 hook_bool_const_tree_hwi_hwi_const_tree_true
377
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START sh_file_start
380 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
381 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
382
383 #undef TARGET_REGISTER_MOVE_COST
384 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
385
386 #undef TARGET_INSERT_ATTRIBUTES
387 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
388
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
391
392 #undef TARGET_SCHED_ISSUE_RATE
393 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
394
395 /* The next 5 hooks have been implemented for reenabling sched1. With the
396 help of these macros we are limiting the movement of insns in sched1 to
397 reduce the register pressure. The overall idea is to keep count of SImode
398 and SFmode regs required by already scheduled insns. When these counts
399 cross some threshold values; give priority to insns that free registers.
400 The insn that frees registers is most likely to be the insn with lowest
401 LUID (original insn order); but such an insn might be there in the stalled
402 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
403 up to a max of 8 cycles so that such insns may move from Q -> R.
404
405 The description of the hooks are as below:
406
407 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
408 scheduler; it is called inside the sched_init function just after
409 find_insn_reg_weights function call. It is used to calculate the SImode
410 and SFmode weights of insns of basic blocks; much similar to what
411 find_insn_reg_weights does.
412 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
413
414 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
415 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
416 (Q)->(R).
417
418 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
419 high; reorder the ready queue so that the insn with lowest LUID will be
420 issued next.
421
422 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
423 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
424
425 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
426 can be returned from TARGET_SCHED_REORDER2.
427
428 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
429
430 #undef TARGET_SCHED_DFA_NEW_CYCLE
431 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
432
433 #undef TARGET_SCHED_INIT_GLOBAL
434 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
435
436 #undef TARGET_SCHED_FINISH_GLOBAL
437 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
438
439 #undef TARGET_SCHED_VARIABLE_ISSUE
440 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
441
442 #undef TARGET_SCHED_REORDER
443 #define TARGET_SCHED_REORDER sh_reorder
444
445 #undef TARGET_SCHED_REORDER2
446 #define TARGET_SCHED_REORDER2 sh_reorder2
447
448 #undef TARGET_SCHED_INIT
449 #define TARGET_SCHED_INIT sh_md_init
450
451 #undef TARGET_DELEGITIMIZE_ADDRESS
452 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
453
454 #undef TARGET_LEGITIMIZE_ADDRESS
455 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
456
457 #undef TARGET_CANNOT_MODIFY_JUMPS_P
458 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
459 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
460 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
461 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
462 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
463 sh_optimize_target_register_callee_saved
464
465 #undef TARGET_MS_BITFIELD_LAYOUT_P
466 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
467
468 #undef TARGET_INIT_BUILTINS
469 #define TARGET_INIT_BUILTINS sh_init_builtins
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL sh_builtin_decl
472 #undef TARGET_EXPAND_BUILTIN
473 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
474
475 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
476 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
477
478 #undef TARGET_CANNOT_COPY_INSN_P
479 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
480 #undef TARGET_RTX_COSTS
481 #define TARGET_RTX_COSTS sh_rtx_costs
482 #undef TARGET_ADDRESS_COST
483 #define TARGET_ADDRESS_COST sh_address_cost
484 #undef TARGET_ALLOCATE_INITIAL_VALUE
485 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
486
487 #undef TARGET_MACHINE_DEPENDENT_REORG
488 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
489
490 #undef TARGET_DWARF_REGISTER_SPAN
491 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
492
493 #ifdef HAVE_AS_TLS
494 #undef TARGET_HAVE_TLS
495 #define TARGET_HAVE_TLS true
496 #endif
497
498 #undef TARGET_PROMOTE_PROTOTYPES
499 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
500 #undef TARGET_PROMOTE_FUNCTION_MODE
501 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
502
503 #undef TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE sh_function_value
505 #undef TARGET_FUNCTION_VALUE_REGNO_P
506 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
507 #undef TARGET_LIBCALL_VALUE
508 #define TARGET_LIBCALL_VALUE sh_libcall_value
509 #undef TARGET_STRUCT_VALUE_RTX
510 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
511 #undef TARGET_RETURN_IN_MEMORY
512 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
513
514 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
515 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
516 #undef TARGET_SETUP_INCOMING_VARARGS
517 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
518 #undef TARGET_STRICT_ARGUMENT_NAMING
519 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
520 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
521 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
522 #undef TARGET_MUST_PASS_IN_STACK
523 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
524 #undef TARGET_PASS_BY_REFERENCE
525 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
526 #undef TARGET_CALLEE_COPIES
527 #define TARGET_CALLEE_COPIES sh_callee_copies
528 #undef TARGET_ARG_PARTIAL_BYTES
529 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
530 #undef TARGET_FUNCTION_ARG
531 #define TARGET_FUNCTION_ARG sh_function_arg
532 #undef TARGET_FUNCTION_ARG_ADVANCE
533 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
534
535 #undef TARGET_BUILD_BUILTIN_VA_LIST
536 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
537 #undef TARGET_EXPAND_BUILTIN_VA_START
538 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
539 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
540 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
541
542 #undef TARGET_SCALAR_MODE_SUPPORTED_P
543 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
544 #undef TARGET_VECTOR_MODE_SUPPORTED_P
545 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
546
547 #undef TARGET_CHECK_PCH_TARGET_FLAGS
548 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
549
550 #undef TARGET_DWARF_CALLING_CONVENTION
551 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
552
553 #undef TARGET_FRAME_POINTER_REQUIRED
554 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
555
556 /* Return regmode weight for insn. */
557 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
558 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
559
560 /* Return current register pressure for regmode. */
561 #define CURR_REGMODE_PRESSURE(MODE)\
562 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
563
564 #undef TARGET_ENCODE_SECTION_INFO
565 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
566
567 #undef TARGET_SECONDARY_RELOAD
568 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
569
570 #undef TARGET_PREFERRED_RELOAD_CLASS
571 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
572
573 #undef TARGET_CONDITIONAL_REGISTER_USAGE
574 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
575
576 #undef TARGET_LEGITIMATE_ADDRESS_P
577 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
578
579 #undef TARGET_TRAMPOLINE_INIT
580 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
581 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
582 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
583
584 #undef TARGET_LEGITIMATE_CONSTANT_P
585 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
586
587 #undef TARGET_CANONICALIZE_COMPARISON
588 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
589
590 #undef TARGET_FIXED_CONDITION_CODE_REGS
591 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
592
593 /* Machine-specific symbol_ref flags. */
594 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
595
596 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
597 is used by optabs.c atomic op expansion code as well as in sync.md. */
598 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
599 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
600
601 struct gcc_target targetm = TARGET_INITIALIZER;
602 \f
603
604 /* Information on the currently selected atomic model.
605 This is initialized in sh_option_override. */
606 static sh_atomic_model selected_atomic_model_;
607
608 const sh_atomic_model&
609 selected_atomic_model (void)
610 {
611 return selected_atomic_model_;
612 }
613
614 static sh_atomic_model
615 parse_validate_atomic_model_option (const char* str)
616 {
617 const char* model_names[sh_atomic_model::num_models];
618 model_names[sh_atomic_model::none] = "none";
619 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
620 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
621 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
622 model_names[sh_atomic_model::soft_imask] = "soft-imask";
623
624 const char* model_cdef_names[sh_atomic_model::num_models];
625 model_cdef_names[sh_atomic_model::none] = "NONE";
626 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
627 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
628 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
629 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
630
631 sh_atomic_model ret;
632 ret.type = sh_atomic_model::none;
633 ret.name = model_names[sh_atomic_model::none];
634 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
635 ret.strict = false;
636 ret.tcb_gbr_offset = -1;
637
638 /* Handle empty string as 'none'. */
639 if (str == NULL || *str == '\0')
640 return ret;
641
642 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
643
644 std::vector<std::string> tokens;
645 for (std::stringstream ss (str); ss.good (); )
646 {
647 tokens.push_back (std::string ());
648 std::getline (ss, tokens.back (), ',');
649 }
650
651 if (tokens.empty ())
652 err_ret ("invalid atomic model option");
653
654 /* The first token must be the atomic model name. */
655 {
656 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
657 if (tokens.front () == model_names[i])
658 {
659 ret.type = (sh_atomic_model::enum_type)i;
660 ret.name = model_names[i];
661 ret.cdef_name = model_cdef_names[i];
662 goto got_mode_name;
663 }
664
665 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
666 got_mode_name:;
667 }
668
669 /* Go through the remaining tokens. */
670 for (size_t i = 1; i < tokens.size (); ++i)
671 {
672 if (tokens[i] == "strict")
673 ret.strict = true;
674 else if (tokens[i].find ("gbr-offset=") == 0)
675 {
676 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
677 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
678 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
679 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
680 "option", offset_str.c_str ());
681 }
682 else
683 err_ret ("unknown parameter \"%s\" in atomic model option",
684 tokens[i].c_str ());
685 }
686
687 /* Check that the selection makes sense. */
688 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
689 err_ret ("atomic operations are not supported on SHmedia");
690
691 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
692 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
693 ret.name);
694
695 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
696 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
697
698 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
699 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
700
701 if (ret.type == sh_atomic_model::soft_tcb
702 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
703 || (ret.tcb_gbr_offset & 3) != 0))
704 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
705 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
706 ret.name);
707
708 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
709 err_ret ("cannot use atomic model %s in user mode", ret.name);
710
711 return ret;
712
713 #undef err_ret
714 }
715
716 /* Register SH specific RTL passes. */
717 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
718 const char* name);
719 static void
720 register_sh_passes (void)
721 {
722 if (!TARGET_SH1)
723 return;
724
725 /* Running the sh_treg_combine pass after ce1 generates better code when
726 comparisons are combined and reg-reg moves are introduced, because
727 reg-reg moves will be eliminated afterwards. However, there are quite
728 some cases where combine will be unable to fold comparison related insns,
729 thus for now don't do it.
730 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
731 PASS_POS_INSERT_AFTER, "ce1", 1);
732 */
733
734 /* Run sh_treg_combine pass after combine but before register allocation. */
735 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
736 PASS_POS_INSERT_AFTER, "split1", 1);
737
738 /* Run sh_treg_combine pass after register allocation and basic block
739 reordering as this sometimes creates new opportunities. */
740 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
741 PASS_POS_INSERT_AFTER, "split4", 1);
742 }
743
744 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
745 various options, and do some machine dependent initialization. */
746 static void
747 sh_option_override (void)
748 {
749 int regno;
750
751 SUBTARGET_OVERRIDE_OPTIONS;
752 if (optimize > 1 && !optimize_size)
753 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
754 sh_cpu = PROCESSOR_SH1;
755 assembler_dialect = 0;
756 if (TARGET_SH2)
757 sh_cpu = PROCESSOR_SH2;
758 if (TARGET_SH2E)
759 sh_cpu = PROCESSOR_SH2E;
760 if (TARGET_SH2A)
761 sh_cpu = PROCESSOR_SH2A;
762 if (TARGET_SH3)
763 sh_cpu = PROCESSOR_SH3;
764 if (TARGET_SH3E)
765 sh_cpu = PROCESSOR_SH3E;
766 if (TARGET_SH4)
767 {
768 assembler_dialect = 1;
769 sh_cpu = PROCESSOR_SH4;
770 }
771 if (TARGET_SH4A_ARCH)
772 {
773 assembler_dialect = 1;
774 sh_cpu = PROCESSOR_SH4A;
775 }
776 if (TARGET_SH5)
777 {
778 sh_cpu = PROCESSOR_SH5;
779 target_flags |= MASK_ALIGN_DOUBLE;
780 if (TARGET_SHMEDIA_FPU)
781 target_flags |= MASK_FMOVD;
782 if (TARGET_SHMEDIA)
783 {
784 /* There are no delay slots on SHmedia. */
785 flag_delayed_branch = 0;
786 /* Relaxation isn't yet supported for SHmedia */
787 target_flags &= ~MASK_RELAX;
788 /* After reload, if conversion does little good but can cause
789 ICEs:
790 - find_if_block doesn't do anything for SH because we don't
791 have conditional execution patterns. (We use conditional
792 move patterns, which are handled differently, and only
793 before reload).
794 - find_cond_trap doesn't do anything for the SH because we
795 don't have conditional traps.
796 - find_if_case_1 uses redirect_edge_and_branch_force in
797 the only path that does an optimization, and this causes
798 an ICE when branch targets are in registers.
799 - find_if_case_2 doesn't do anything for the SHmedia after
800 reload except when it can redirect a tablejump - and
801 that's rather rare. */
802 flag_if_conversion2 = 0;
803 if (! strcmp (sh_div_str, "call"))
804 sh_div_strategy = SH_DIV_CALL;
805 else if (! strcmp (sh_div_str, "call2"))
806 sh_div_strategy = SH_DIV_CALL2;
807 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
808 sh_div_strategy = SH_DIV_FP;
809 else if (! strcmp (sh_div_str, "inv"))
810 sh_div_strategy = SH_DIV_INV;
811 else if (! strcmp (sh_div_str, "inv:minlat"))
812 sh_div_strategy = SH_DIV_INV_MINLAT;
813 else if (! strcmp (sh_div_str, "inv20u"))
814 sh_div_strategy = SH_DIV_INV20U;
815 else if (! strcmp (sh_div_str, "inv20l"))
816 sh_div_strategy = SH_DIV_INV20L;
817 else if (! strcmp (sh_div_str, "inv:call2"))
818 sh_div_strategy = SH_DIV_INV_CALL2;
819 else if (! strcmp (sh_div_str, "inv:call"))
820 sh_div_strategy = SH_DIV_INV_CALL;
821 else if (! strcmp (sh_div_str, "inv:fp"))
822 {
823 if (TARGET_FPU_ANY)
824 sh_div_strategy = SH_DIV_INV_FP;
825 else
826 sh_div_strategy = SH_DIV_INV;
827 }
828 TARGET_CBRANCHDI4 = 0;
829 /* Assembler CFI isn't yet fully supported for SHmedia. */
830 flag_dwarf2_cfi_asm = 0;
831 }
832 }
833 else
834 {
835 /* Only the sh64-elf assembler fully supports .quad properly. */
836 targetm.asm_out.aligned_op.di = NULL;
837 targetm.asm_out.unaligned_op.di = NULL;
838 }
839 if (TARGET_SH1)
840 {
841 if (! strcmp (sh_div_str, "call-div1"))
842 sh_div_strategy = SH_DIV_CALL_DIV1;
843 else if (! strcmp (sh_div_str, "call-fp")
844 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
845 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
846 sh_div_strategy = SH_DIV_CALL_FP;
847 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
848 sh_div_strategy = SH_DIV_CALL_TABLE;
849 else
850 /* Pick one that makes most sense for the target in general.
851 It is not much good to use different functions depending
852 on -Os, since then we'll end up with two different functions
853 when some of the code is compiled for size, and some for
854 speed. */
855
856 /* SH4 tends to emphasize speed. */
857 if (TARGET_HARD_SH4)
858 sh_div_strategy = SH_DIV_CALL_TABLE;
859 /* These have their own way of doing things. */
860 else if (TARGET_SH2A)
861 sh_div_strategy = SH_DIV_INTRINSIC;
862 /* ??? Should we use the integer SHmedia function instead? */
863 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
864 sh_div_strategy = SH_DIV_CALL_FP;
865 /* SH1 .. SH3 cores often go into small-footprint systems, so
866 default to the smallest implementation available. */
867 else
868 sh_div_strategy = SH_DIV_CALL_DIV1;
869 }
870 if (!TARGET_SH1)
871 TARGET_PRETEND_CMOVE = 0;
872 if (sh_divsi3_libfunc[0])
873 ; /* User supplied - leave it alone. */
874 else if (TARGET_DIVIDE_CALL_FP)
875 sh_divsi3_libfunc = "__sdivsi3_i4";
876 else if (TARGET_DIVIDE_CALL_TABLE)
877 sh_divsi3_libfunc = "__sdivsi3_i4i";
878 else if (TARGET_SH5)
879 sh_divsi3_libfunc = "__sdivsi3_1";
880 else
881 sh_divsi3_libfunc = "__sdivsi3";
882 if (sh_branch_cost == -1)
883 {
884 sh_branch_cost = 1;
885
886 /* The SH1 does not have delay slots, hence we get a pipeline stall
887 at every branch. The SH4 is superscalar, so the single delay slot
888 is not sufficient to keep both pipelines filled. */
889 if (! TARGET_SH2 || TARGET_HARD_SH4)
890 sh_branch_cost = 2;
891 }
892
893 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
894 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
895 TARGET_ZDCBRANCH = 1;
896
897 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
898 if (! VALID_REGISTER_P (regno))
899 sh_register_names[regno][0] = '\0';
900
901 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
902 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
903 sh_additional_register_names[regno][0] = '\0';
904
905 if ((flag_pic && ! TARGET_PREFERGOT)
906 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
907 flag_no_function_cse = 1;
908
909 if (targetm.small_register_classes_for_mode_p (VOIDmode))
910 {
911 /* Never run scheduling before reload, since that can
912 break global alloc, and generates slower code anyway due
913 to the pressure on R0. */
914 /* Enable sched1 for SH4 if the user explicitly requests.
915 When sched1 is enabled, the ready queue will be reordered by
916 the target hooks if pressure is high. We can not do this for
917 PIC, SH3 and lower as they give spill failures for R0. */
918 if (!TARGET_HARD_SH4 || flag_pic)
919 flag_schedule_insns = 0;
920 /* ??? Current exception handling places basic block boundaries
921 after call_insns. It causes the high pressure on R0 and gives
922 spill failures for R0 in reload. See PR 22553 and the thread
923 on gcc-patches
924 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
925 else if (flag_exceptions)
926 {
927 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
928 warning (0, "ignoring -fschedule-insns because of exception "
929 "handling bug");
930 flag_schedule_insns = 0;
931 }
932 else if (flag_schedule_insns
933 && !global_options_set.x_flag_schedule_insns)
934 flag_schedule_insns = 0;
935 }
936
937 /* Unwind info is not correct around the CFG unless either a frame
938 pointer is present or M_A_O_A is set. Fixing this requires rewriting
939 unwind info generation to be aware of the CFG and propagating states
940 around edges. */
941 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
942 || flag_exceptions || flag_non_call_exceptions)
943 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
944 {
945 warning (0, "unwind tables currently require either a frame pointer "
946 "or -maccumulate-outgoing-args for correctness");
947 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
948 }
949
950 /* Unwinding with -freorder-blocks-and-partition does not work on this
951 architecture, because it requires far jumps to label crossing between
952 hot/cold sections which are rejected on this architecture. */
953 if (flag_reorder_blocks_and_partition)
954 {
955 if (flag_exceptions)
956 {
957 inform (input_location,
958 "-freorder-blocks-and-partition does not work with "
959 "exceptions on this architecture");
960 flag_reorder_blocks_and_partition = 0;
961 flag_reorder_blocks = 1;
962 }
963 else if (flag_unwind_tables)
964 {
965 inform (input_location,
966 "-freorder-blocks-and-partition does not support unwind "
967 "info on this architecture");
968 flag_reorder_blocks_and_partition = 0;
969 flag_reorder_blocks = 1;
970 }
971 }
972
973 /* Adjust loop, jump and function alignment values (in bytes), if those
974 were not specified by the user using -falign-loops, -falign-jumps
975 and -falign-functions options.
976 32 bit alignment is better for speed, because instructions can be
977 fetched as a pair from a longword boundary. For size use 16 bit
978 alignment to get more compact code.
979 Aligning all jumps increases the code size, even if it might
980 result in slightly faster code. Thus, it is set to the smallest
981 alignment possible if not specified by the user. */
982 if (align_loops == 0)
983 {
984 if (TARGET_SH5)
985 align_loops = 8;
986 else
987 align_loops = optimize_size ? 2 : 4;
988 }
989
990 if (align_jumps == 0)
991 {
992 if (TARGET_SHMEDIA)
993 align_jumps = 1 << CACHE_LOG;
994 else
995 align_jumps = 2;
996 }
997 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
998 align_jumps = TARGET_SHMEDIA ? 4 : 2;
999
1000 if (align_functions == 0)
1001 {
1002 if (TARGET_SHMEDIA)
1003 align_functions = optimize_size
1004 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1005 else
1006 align_functions = optimize_size ? 2 : 4;
1007 }
1008
1009 /* The linker relaxation code breaks when a function contains
1010 alignments that are larger than that at the start of a
1011 compilation unit. */
1012 if (TARGET_RELAX)
1013 {
1014 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1015
1016 /* Also take possible .long constants / mova tables into account. */
1017 if (min_align < 4)
1018 min_align = 4;
1019 if (align_functions < min_align)
1020 align_functions = min_align;
1021 }
1022
1023 if (flag_unsafe_math_optimizations)
1024 {
1025 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1026 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1027 TARGET_FSCA = 1;
1028
1029 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1030 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1031 TARGET_FSRRA = 1;
1032 }
1033
1034 /* Allow fsrra insn only if -funsafe-math-optimizations and
1035 -ffinite-math-only is enabled. */
1036 TARGET_FSRRA = TARGET_FSRRA
1037 && flag_unsafe_math_optimizations
1038 && flag_finite_math_only;
1039
1040 /* If the -mieee option was not explicitly set by the user, turn it on
1041 unless -ffinite-math-only was specified. See also PR 33135. */
1042 if (! global_options_set.x_TARGET_IEEE)
1043 TARGET_IEEE = ! flag_finite_math_only;
1044
1045 if (sh_fixed_range_str)
1046 sh_fix_range (sh_fixed_range_str);
1047
1048 /* This target defaults to strict volatile bitfields. */
1049 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1050 flag_strict_volatile_bitfields = 1;
1051
1052 /* Parse atomic model option and make sure it is valid for the current
1053 target CPU. */
1054 selected_atomic_model_
1055 = parse_validate_atomic_model_option (sh_atomic_model_str);
1056
1057 register_sh_passes ();
1058 }
1059 \f
1060 /* Print the operand address in x to the stream. */
1061 static void
1062 sh_print_operand_address (FILE *stream, rtx x)
1063 {
1064 switch (GET_CODE (x))
1065 {
1066 case REG:
1067 case SUBREG:
1068 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1069 break;
1070
1071 case PLUS:
1072 {
1073 rtx base = XEXP (x, 0);
1074 rtx index = XEXP (x, 1);
1075
1076 switch (GET_CODE (index))
1077 {
1078 case CONST_INT:
1079 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1080 reg_names[true_regnum (base)]);
1081 break;
1082
1083 case REG:
1084 case SUBREG:
1085 {
1086 int base_num = true_regnum (base);
1087 int index_num = true_regnum (index);
1088
1089 fprintf (stream, "@(r0,%s)",
1090 reg_names[MAX (base_num, index_num)]);
1091 break;
1092 }
1093
1094 default:
1095 gcc_unreachable ();
1096 }
1097 }
1098 break;
1099
1100 case PRE_DEC:
1101 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1102 break;
1103
1104 case POST_INC:
1105 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1106 break;
1107
1108 default:
1109 x = mark_constant_pool_use (x);
1110 output_addr_const (stream, x);
1111 break;
1112 }
1113 }
1114
1115 /* Print operand x (an rtx) in assembler syntax to file stream
1116 according to modifier code.
1117
1118 '.' print a .s if insn needs delay slot
1119 ',' print LOCAL_LABEL_PREFIX
1120 '@' print trap, rte or rts depending upon pragma interruptness
1121 '#' output a nop if there is nothing to put in the delay slot
1122 ''' print likelihood suffix (/u for unlikely).
1123 '>' print branch target if -fverbose-asm
1124 'O' print a constant without the #
1125 'R' print the LSW of a dp value - changes if in little endian
1126 'S' print the MSW of a dp value - changes if in little endian
1127 'T' print the next word of a dp value - same as 'R' in big endian mode.
1128 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1129 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1130 'N' print 'r63' if the operand is (const_int 0).
1131 'd' print a V2SF reg as dN instead of fpN.
1132 'm' print a pair `base,offset' or `base,index', for LD and ST.
1133 'U' Likewise for {LD,ST}{HI,LO}.
1134 'V' print the position of a single bit set.
1135 'W' print the position of a single bit cleared.
1136 't' print a memory address which is a register.
1137 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1138 'o' output an operator. */
1139 static void
1140 sh_print_operand (FILE *stream, rtx x, int code)
1141 {
1142 int regno;
1143 enum machine_mode mode;
1144
1145 switch (code)
1146 {
1147 tree trapa_attr;
1148
1149 case '.':
1150 if (final_sequence
1151 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1152 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1153 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1154 break;
1155 case ',':
1156 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1157 break;
1158 case '@':
1159 trapa_attr = lookup_attribute ("trap_exit",
1160 DECL_ATTRIBUTES (current_function_decl));
1161 if (trapa_attr)
1162 fprintf (stream, "trapa #%ld",
1163 (long) tree_to_hwi (TREE_VALUE (TREE_VALUE (trapa_attr))));
1164 else if (sh_cfun_interrupt_handler_p ())
1165 {
1166 if (sh_cfun_resbank_handler_p ())
1167 fprintf (stream, "resbank\n");
1168 fprintf (stream, "rte");
1169 }
1170 else
1171 fprintf (stream, "rts");
1172 break;
1173 case '#':
1174 /* Output a nop if there's nothing in the delay slot. */
1175 if (dbr_sequence_length () == 0)
1176 fprintf (stream, "\n\tnop");
1177 break;
1178 case '\'':
1179 {
1180 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1181
1182 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1183 fputs ("/u", stream);
1184 break;
1185 }
1186 case '>':
1187 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1188 {
1189 fputs ("\t! target: ", stream);
1190 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1191 }
1192 break;
1193 case 'O':
1194 x = mark_constant_pool_use (x);
1195 output_addr_const (stream, x);
1196 break;
1197 /* N.B.: %R / %S / %T adjust memory addresses by four.
1198 For SHMEDIA, that means they can be used to access the first and
1199 second 32 bit part of a 64 bit (or larger) value that
1200 might be held in floating point registers or memory.
1201 While they can be used to access 64 bit parts of a larger value
1202 held in general purpose registers, that won't work with memory -
1203 neither for fp registers, since the frxx names are used. */
1204 case 'R':
1205 if (REG_P (x) || GET_CODE (x) == SUBREG)
1206 {
1207 regno = true_regnum (x);
1208 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1209 fputs (reg_names[regno], (stream));
1210 }
1211 else if (MEM_P (x))
1212 {
1213 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1214 sh_print_operand_address (stream, XEXP (x, 0));
1215 }
1216 else
1217 {
1218 rtx sub = NULL_RTX;
1219
1220 mode = GET_MODE (x);
1221 if (mode == VOIDmode)
1222 mode = DImode;
1223 if (GET_MODE_SIZE (mode) >= 8)
1224 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1225 if (sub)
1226 sh_print_operand (stream, sub, 0);
1227 else
1228 output_operand_lossage ("invalid operand to %%R");
1229 }
1230 break;
1231 case 'S':
1232 if (REG_P (x) || GET_CODE (x) == SUBREG)
1233 {
1234 regno = true_regnum (x);
1235 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1236 fputs (reg_names[regno], (stream));
1237 }
1238 else if (MEM_P (x))
1239 {
1240 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1241 sh_print_operand_address (stream, XEXP (x, 0));
1242 }
1243 else
1244 {
1245 rtx sub = NULL_RTX;
1246
1247 mode = GET_MODE (x);
1248 if (mode == VOIDmode)
1249 mode = DImode;
1250 if (GET_MODE_SIZE (mode) >= 8)
1251 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1252 if (sub)
1253 sh_print_operand (stream, sub, 0);
1254 else
1255 output_operand_lossage ("invalid operand to %%S");
1256 }
1257 break;
1258 case 'T':
1259 /* Next word of a double. */
1260 switch (GET_CODE (x))
1261 {
1262 case REG:
1263 fputs (reg_names[REGNO (x) + 1], (stream));
1264 break;
1265 case MEM:
1266 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1267 && GET_CODE (XEXP (x, 0)) != POST_INC)
1268 x = adjust_address (x, SImode, 4);
1269 sh_print_operand_address (stream, XEXP (x, 0));
1270 break;
1271 default:
1272 break;
1273 }
1274 break;
1275
1276 case 't':
1277 gcc_assert (MEM_P (x));
1278 x = XEXP (x, 0);
1279 switch (GET_CODE (x))
1280 {
1281 case REG:
1282 case SUBREG:
1283 sh_print_operand (stream, x, 0);
1284 break;
1285 default:
1286 break;
1287 }
1288 break;
1289
1290 case 'o':
1291 switch (GET_CODE (x))
1292 {
1293 case PLUS: fputs ("add", stream); break;
1294 case MINUS: fputs ("sub", stream); break;
1295 case MULT: fputs ("mul", stream); break;
1296 case DIV: fputs ("div", stream); break;
1297 case EQ: fputs ("eq", stream); break;
1298 case NE: fputs ("ne", stream); break;
1299 case GT: case LT: fputs ("gt", stream); break;
1300 case GE: case LE: fputs ("ge", stream); break;
1301 case GTU: case LTU: fputs ("gtu", stream); break;
1302 case GEU: case LEU: fputs ("geu", stream); break;
1303 default:
1304 break;
1305 }
1306 break;
1307 case 'M':
1308 if (TARGET_SHMEDIA)
1309 {
1310 if (MEM_P (x)
1311 && GET_CODE (XEXP (x, 0)) == PLUS
1312 && (REG_P (XEXP (XEXP (x, 0), 1))
1313 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1314 fputc ('x', stream);
1315 }
1316 else
1317 {
1318 if (MEM_P (x))
1319 {
1320 switch (GET_MODE (x))
1321 {
1322 case QImode: fputs (".b", stream); break;
1323 case HImode: fputs (".w", stream); break;
1324 case SImode: fputs (".l", stream); break;
1325 case SFmode: fputs (".s", stream); break;
1326 case DFmode: fputs (".d", stream); break;
1327 default: gcc_unreachable ();
1328 }
1329 }
1330 }
1331 break;
1332
1333 case 'm':
1334 gcc_assert (MEM_P (x));
1335 x = XEXP (x, 0);
1336 /* Fall through. */
1337 case 'U':
1338 switch (GET_CODE (x))
1339 {
1340 case REG:
1341 case SUBREG:
1342 sh_print_operand (stream, x, 0);
1343 fputs (", 0", stream);
1344 break;
1345
1346 case PLUS:
1347 sh_print_operand (stream, XEXP (x, 0), 0);
1348 fputs (", ", stream);
1349 sh_print_operand (stream, XEXP (x, 1), 0);
1350 break;
1351
1352 default:
1353 gcc_unreachable ();
1354 }
1355 break;
1356
1357 case 'V':
1358 {
1359 int num = exact_log2 (INTVAL (x));
1360 gcc_assert (num >= 0);
1361 fprintf (stream, "#%d", num);
1362 }
1363 break;
1364
1365 case 'W':
1366 {
1367 int num = exact_log2 (~INTVAL (x));
1368 gcc_assert (num >= 0);
1369 fprintf (stream, "#%d", num);
1370 }
1371 break;
1372
1373 case 'd':
1374 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1375
1376 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1377 break;
1378
1379 case 'N':
1380 if (x == CONST0_RTX (GET_MODE (x)))
1381 {
1382 fprintf ((stream), "r63");
1383 break;
1384 }
1385 goto default_output;
1386 case 'u':
1387 if (CONST_INT_P (x))
1388 {
1389 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1390 break;
1391 }
1392 /* Fall through. */
1393
1394 default_output:
1395 default:
1396 regno = 0;
1397 mode = GET_MODE (x);
1398
1399 switch (GET_CODE (x))
1400 {
1401 case TRUNCATE:
1402 {
1403 rtx inner = XEXP (x, 0);
1404 int offset = 0;
1405 enum machine_mode inner_mode;
1406
1407 /* We might see SUBREGs with vector mode registers inside. */
1408 if (GET_CODE (inner) == SUBREG
1409 && (GET_MODE_SIZE (GET_MODE (inner))
1410 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1411 && subreg_lowpart_p (inner))
1412 inner = SUBREG_REG (inner);
1413 if (CONST_INT_P (inner))
1414 {
1415 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1416 goto default_output;
1417 }
1418 inner_mode = GET_MODE (inner);
1419 if (GET_CODE (inner) == SUBREG
1420 && (GET_MODE_SIZE (GET_MODE (inner))
1421 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1422 && REG_P (SUBREG_REG (inner)))
1423 {
1424 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1425 GET_MODE (SUBREG_REG (inner)),
1426 SUBREG_BYTE (inner),
1427 GET_MODE (inner));
1428 inner = SUBREG_REG (inner);
1429 }
1430 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1431 abort ();
1432 /* Floating point register pairs are always big endian;
1433 general purpose registers are 64 bit wide. */
1434 regno = REGNO (inner);
1435 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1436 - HARD_REGNO_NREGS (regno, mode))
1437 + offset;
1438 x = inner;
1439 goto reg;
1440 }
1441 case SIGN_EXTEND:
1442 x = XEXP (x, 0);
1443 goto reg;
1444 /* FIXME: We need this on SHmedia32 because reload generates
1445 some sign-extended HI or QI loads into DImode registers
1446 but, because Pmode is SImode, the address ends up with a
1447 subreg:SI of the DImode register. Maybe reload should be
1448 fixed so as to apply alter_subreg to such loads? */
1449 case IF_THEN_ELSE:
1450 gcc_assert (trapping_target_operand (x, VOIDmode));
1451 x = XEXP (XEXP (x, 2), 0);
1452 goto default_output;
1453 case SUBREG:
1454 gcc_assert (SUBREG_BYTE (x) == 0
1455 && REG_P (SUBREG_REG (x)));
1456
1457 x = SUBREG_REG (x);
1458 /* Fall through. */
1459
1460 reg:
1461 case REG:
1462 regno += REGNO (x);
1463 if (FP_REGISTER_P (regno)
1464 && mode == V16SFmode)
1465 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1466 else if (FP_REGISTER_P (REGNO (x))
1467 && mode == V4SFmode)
1468 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1469 else if (REG_P (x)
1470 && mode == V2SFmode)
1471 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1472 else if (FP_REGISTER_P (REGNO (x))
1473 && GET_MODE_SIZE (mode) > 4)
1474 fprintf ((stream), "d%s", reg_names[regno] + 1);
1475 else
1476 fputs (reg_names[regno], (stream));
1477 break;
1478
1479 case MEM:
1480 output_address (XEXP (x, 0));
1481 break;
1482
1483 default:
1484 if (TARGET_SH1)
1485 fputc ('#', stream);
1486 output_addr_const (stream, x);
1487 break;
1488 }
1489 break;
1490 }
1491 }
1492
1493 static bool
1494 sh_print_operand_punct_valid_p (unsigned char code)
1495 {
1496 return (code == '.' || code == '#' || code == '@' || code == ','
1497 || code == '$' || code == '\'' || code == '>');
1498 }
1499
1500 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1501 static bool
1502 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1503 {
1504 if (GET_CODE (x) == UNSPEC)
1505 {
1506 switch (XINT (x, 1))
1507 {
1508 case UNSPEC_DATALABEL:
1509 fputs ("datalabel ", file);
1510 output_addr_const (file, XVECEXP (x, 0, 0));
1511 break;
1512 case UNSPEC_PIC:
1513 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1514 output_addr_const (file, XVECEXP (x, 0, 0));
1515 break;
1516 case UNSPEC_GOT:
1517 output_addr_const (file, XVECEXP (x, 0, 0));
1518 fputs ("@GOT", file);
1519 break;
1520 case UNSPEC_GOTOFF:
1521 output_addr_const (file, XVECEXP (x, 0, 0));
1522 fputs ("@GOTOFF", file);
1523 break;
1524 case UNSPEC_PLT:
1525 output_addr_const (file, XVECEXP (x, 0, 0));
1526 fputs ("@PLT", file);
1527 break;
1528 case UNSPEC_GOTPLT:
1529 output_addr_const (file, XVECEXP (x, 0, 0));
1530 fputs ("@GOTPLT", file);
1531 break;
1532 case UNSPEC_DTPOFF:
1533 output_addr_const (file, XVECEXP (x, 0, 0));
1534 fputs ("@DTPOFF", file);
1535 break;
1536 case UNSPEC_GOTTPOFF:
1537 output_addr_const (file, XVECEXP (x, 0, 0));
1538 fputs ("@GOTTPOFF", file);
1539 break;
1540 case UNSPEC_TPOFF:
1541 output_addr_const (file, XVECEXP (x, 0, 0));
1542 fputs ("@TPOFF", file);
1543 break;
1544 case UNSPEC_CALLER:
1545 {
1546 char name[32];
1547 /* LPCS stands for Label for PIC Call Site. */
1548 targetm.asm_out.generate_internal_label (name, "LPCS",
1549 INTVAL (XVECEXP (x, 0, 0)));
1550 assemble_name (file, name);
1551 }
1552 break;
1553 case UNSPEC_EXTRACT_S16:
1554 case UNSPEC_EXTRACT_U16:
1555 {
1556 rtx val, shift;
1557
1558 val = XVECEXP (x, 0, 0);
1559 shift = XVECEXP (x, 0, 1);
1560 fputc ('(', file);
1561 if (shift != const0_rtx)
1562 fputc ('(', file);
1563 if (GET_CODE (val) == CONST
1564 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1565 {
1566 fputc ('(', file);
1567 output_addr_const (file, val);
1568 fputc (')', file);
1569 }
1570 else
1571 output_addr_const (file, val);
1572 if (shift != const0_rtx)
1573 {
1574 fputs (" >> ", file);
1575 output_addr_const (file, shift);
1576 fputc (')', file);
1577 }
1578 fputs (" & 65535)", file);
1579 }
1580 break;
1581 case UNSPEC_SYMOFF:
1582 output_addr_const (file, XVECEXP (x, 0, 0));
1583 fputc ('-', file);
1584 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1585 {
1586 fputc ('(', file);
1587 output_addr_const (file, XVECEXP (x, 0, 1));
1588 fputc (')', file);
1589 }
1590 else
1591 output_addr_const (file, XVECEXP (x, 0, 1));
1592 break;
1593 case UNSPEC_PCREL_SYMOFF:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1595 fputs ("-(", file);
1596 output_addr_const (file, XVECEXP (x, 0, 1));
1597 fputs ("-.)", file);
1598 break;
1599 default:
1600 return false;
1601 }
1602 return true;
1603 }
1604 else
1605 return false;
1606 }
1607 \f
1608 /* Encode symbol attributes of a SYMBOL_REF into its
1609 SYMBOL_REF_FLAGS. */
1610 static void
1611 sh_encode_section_info (tree decl, rtx rtl, int first)
1612 {
1613 default_encode_section_info (decl, rtl, first);
1614
1615 if (TREE_CODE (decl) == FUNCTION_DECL
1616 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1617 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1618 }
1619
1620 /* Prepare operands for a move define_expand; specifically, one of the
1621 operands must be in a register. */
1622 void
1623 prepare_move_operands (rtx operands[], enum machine_mode mode)
1624 {
1625 if ((mode == SImode || mode == DImode)
1626 && flag_pic
1627 && ! ((mode == Pmode || mode == ptr_mode)
1628 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1629 {
1630 rtx temp;
1631 if (SYMBOLIC_CONST_P (operands[1]))
1632 {
1633 if (MEM_P (operands[0]))
1634 operands[1] = force_reg (Pmode, operands[1]);
1635 else if (TARGET_SHMEDIA
1636 && GET_CODE (operands[1]) == LABEL_REF
1637 && target_reg_operand (operands[0], mode))
1638 /* It's ok. */;
1639 else
1640 {
1641 temp = (!can_create_pseudo_p ()
1642 ? operands[0]
1643 : gen_reg_rtx (Pmode));
1644 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1645 }
1646 }
1647 else if (GET_CODE (operands[1]) == CONST
1648 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1649 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1650 {
1651 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1652 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1653 mode, temp);
1654 operands[1] = expand_binop (mode, add_optab, temp,
1655 XEXP (XEXP (operands[1], 0), 1),
1656 (!can_create_pseudo_p ()
1657 ? temp
1658 : gen_reg_rtx (Pmode)),
1659 0, OPTAB_LIB_WIDEN);
1660 }
1661 }
1662
1663 if (! reload_in_progress && ! reload_completed)
1664 {
1665 /* Copy the source to a register if both operands aren't registers. */
1666 if (! register_operand (operands[0], mode)
1667 && ! sh_register_operand (operands[1], mode))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1669
1670 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1671 {
1672 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1673 except that we can't use that function because it is static. */
1674 rtx new_rtx = change_address (operands[0], mode, 0);
1675 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1676 operands[0] = new_rtx;
1677 }
1678
1679 /* This case can happen while generating code to move the result
1680 of a library call to the target. Reject `st r0,@(rX,rY)' because
1681 reload will fail to find a spill register for rX, since r0 is already
1682 being used for the source. */
1683 else if (TARGET_SH1
1684 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1685 && MEM_P (operands[0])
1686 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1687 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1688 operands[1] = copy_to_mode_reg (mode, operands[1]);
1689 }
1690
1691 if (mode == Pmode || mode == ptr_mode)
1692 {
1693 rtx op0, op1, opc;
1694 enum tls_model tls_kind;
1695
1696 op0 = operands[0];
1697 op1 = operands[1];
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1701 != TLS_MODEL_NONE))
1702 {
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1705 }
1706 else
1707 opc = NULL_RTX;
1708
1709 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1710 {
1711 rtx tga_op1, tga_ret, tmp, tmp2;
1712
1713 if (! flag_pic
1714 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1715 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1716 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1717 {
1718 /* Don't schedule insns for getting GOT address when
1719 the first scheduling is enabled, to avoid spill
1720 failures for R0. */
1721 if (flag_schedule_insns)
1722 emit_insn (gen_blockage ());
1723 emit_insn (gen_GOTaddr2picreg ());
1724 emit_use (gen_rtx_REG (SImode, PIC_REG));
1725 if (flag_schedule_insns)
1726 emit_insn (gen_blockage ());
1727 }
1728
1729 switch (tls_kind)
1730 {
1731 case TLS_MODEL_GLOBAL_DYNAMIC:
1732 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1733 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1734 tmp = gen_reg_rtx (Pmode);
1735 emit_move_insn (tmp, tga_ret);
1736 op1 = tmp;
1737 break;
1738
1739 case TLS_MODEL_LOCAL_DYNAMIC:
1740 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1741 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1742
1743 tmp = gen_reg_rtx (Pmode);
1744 emit_move_insn (tmp, tga_ret);
1745
1746 if (register_operand (op0, Pmode))
1747 tmp2 = op0;
1748 else
1749 tmp2 = gen_reg_rtx (Pmode);
1750
1751 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1752 op1 = tmp2;
1753 break;
1754
1755 case TLS_MODEL_INITIAL_EXEC:
1756 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1757 tmp = gen_sym2GOTTPOFF (op1);
1758 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1759 op1 = tga_op1;
1760 break;
1761
1762 case TLS_MODEL_LOCAL_EXEC:
1763 tmp2 = gen_reg_rtx (Pmode);
1764 emit_insn (gen_store_gbr (tmp2));
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_insn (gen_symTPOFF2reg (tmp, op1));
1767
1768 if (register_operand (op0, Pmode))
1769 op1 = op0;
1770 else
1771 op1 = gen_reg_rtx (Pmode);
1772
1773 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1774 break;
1775
1776 default:
1777 gcc_unreachable ();
1778 }
1779 if (opc)
1780 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1781 operands[1] = op1;
1782 }
1783 }
1784 }
1785
1786 /* Implement the canonicalize_comparison target hook for the combine
1787 pass. For the target hook this function is invoked via
1788 sh_canonicalize_comparison. This function is also re-used to
1789 canonicalize comparisons in cbranch pattern expanders. */
1790 static void
1791 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1792 enum machine_mode mode,
1793 bool op0_preserve_value)
1794 {
1795 /* When invoked from within the combine pass the mode is not specified,
1796 so try to get it from one of the operands. */
1797 if (mode == VOIDmode)
1798 mode = GET_MODE (op0);
1799 if (mode == VOIDmode)
1800 mode = GET_MODE (op1);
1801
1802 // We need to have a mode to do something useful here.
1803 if (mode == VOIDmode)
1804 return;
1805
1806 // Currently, we don't deal with floats here.
1807 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1808 return;
1809
1810 // Make sure that the constant operand is the second operand.
1811 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1812 {
1813 if (op0_preserve_value)
1814 return;
1815
1816 std::swap (op0, op1);
1817 cmp = swap_condition (cmp);
1818 }
1819
1820 if (CONST_INT_P (op1))
1821 {
1822 /* Try to adjust the constant operand in such a way that available
1823 comparison insns can be utilized better and the constant can be
1824 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1825 constant pool. */
1826 const HOST_WIDE_INT val = INTVAL (op1);
1827
1828 /* x > -1 --> x >= 0
1829 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1830 x <= -1 --> x < 0
1831 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1832 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1833 {
1834 cmp = cmp == GT ? GE : LT;
1835 op1 = gen_int_mode (val + 1, mode);
1836 }
1837
1838 /* x >= 1 --> x > 0
1839 x >= 0x80 --> x > 0x7F
1840 x < 1 --> x <= 0
1841 x < 0x80 --> x <= 0x7F */
1842 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1843 {
1844 cmp = cmp == GE ? GT : LE;
1845 op1 = gen_int_mode (val - 1, mode);
1846 }
1847
1848 /* unsigned x >= 1 --> x != 0
1849 unsigned x < 1 --> x == 0 */
1850 else if (val == 1 && (cmp == GEU || cmp == LTU))
1851 {
1852 cmp = cmp == GEU ? NE : EQ;
1853 op1 = CONST0_RTX (mode);
1854 }
1855
1856 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1857 unsigned x < 0x80 --> unsigned x < 0x7F */
1858 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1859 {
1860 cmp = cmp == GEU ? GTU : LEU;
1861 op1 = gen_int_mode (val - 1, mode);
1862 }
1863
1864 /* unsigned x > 0 --> x != 0
1865 unsigned x <= 0 --> x == 0 */
1866 else if (val == 0 && (cmp == GTU || cmp == LEU))
1867 cmp = cmp == GTU ? NE : EQ;
1868
1869 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1870 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1871 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1872 && val == 0x7FFFFFFF)
1873 {
1874 cmp = cmp == GTU ? LT : GE;
1875 op1 = const0_rtx;
1876 }
1877
1878 /* unsigned x >= 0x80000000 --> signed x < 0
1879 unsigned x < 0x80000000 --> signed x >= 0 */
1880 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1881 && (unsigned HOST_WIDE_INT)val
1882 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1883 {
1884 cmp = cmp == GEU ? LT : GE;
1885 op1 = const0_rtx;
1886 }
1887 }
1888 }
1889
1890 /* This function implements the canonicalize_comparison target hook.
1891 This wrapper around the internally used sh_canonicalize_comparison
1892 function is needed to do the enum rtx_code <-> int conversion.
1893 Target hooks cannot use enum rtx_code in its definition. */
1894 static void
1895 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1896 bool op0_preserve_value)
1897 {
1898 enum rtx_code tmp_code = (enum rtx_code)*code;
1899 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1900 VOIDmode, op0_preserve_value);
1901 *code = (int)tmp_code;
1902 }
1903
1904 bool
1905 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1906 {
1907 *p1 = T_REG;
1908 *p2 = INVALID_REGNUM;
1909 return true;
1910 }
1911
1912 enum rtx_code
1913 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1914 enum rtx_code comparison)
1915 {
1916 /* The scratch reg is only available when this is invoked from within
1917 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1918 rtx scratch = NULL_RTX;
1919
1920 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1921 comparison = GET_CODE (operands[0]);
1922 else
1923 scratch = operands[4];
1924
1925 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1926 mode, false);
1927
1928 /* Notice that this function is also invoked after reload by
1929 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1930 rtx op1 = operands[1];
1931
1932 if (can_create_pseudo_p ())
1933 operands[1] = force_reg (mode, op1);
1934 /* When we are handling DImode comparisons, we want to keep constants so
1935 that we can optimize the component comparisons; however, memory loads
1936 are better issued as a whole so that they can be scheduled well.
1937 SImode equality comparisons allow I08 constants, but only when they
1938 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1939 into a register, that register might as well be r0, and we allow the
1940 constant. If it is already in a register, this is likely to be
1941 allocated to a different hard register, thus we load the constant into
1942 a register unless it is zero. */
1943 if (!REG_P (operands[2])
1944 && (!CONST_INT_P (operands[2])
1945 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1946 && ((comparison != EQ && comparison != NE)
1947 || (REG_P (op1) && REGNO (op1) != R0_REG)
1948 || !satisfies_constraint_I08 (operands[2])))))
1949 {
1950 if (scratch && GET_MODE (scratch) == mode)
1951 {
1952 emit_move_insn (scratch, operands[2]);
1953 operands[2] = scratch;
1954 }
1955 else if (can_create_pseudo_p ())
1956 operands[2] = force_reg (mode, operands[2]);
1957 }
1958 return comparison;
1959 }
1960
1961 void
1962 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1963 {
1964 rtx (*branch_expander) (rtx) = gen_branch_true;
1965 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1966 switch (comparison)
1967 {
1968 case NE: case LT: case LE: case LTU: case LEU:
1969 comparison = reverse_condition (comparison);
1970 branch_expander = gen_branch_false;
1971 default: ;
1972 }
1973 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
1974 gen_rtx_fmt_ee (comparison, SImode,
1975 operands[1], operands[2])));
1976 rtx jump = emit_jump_insn (branch_expander (operands[3]));
1977 if (probability >= 0)
1978 add_int_reg_note (jump, REG_BR_PROB, probability);
1979 }
1980
1981 /* ??? How should we distribute probabilities when more than one branch
1982 is generated. So far we only have some ad-hoc observations:
1983 - If the operands are random, they are likely to differ in both parts.
1984 - If comparing items in a hash chain, the operands are random or equal;
1985 operation should be EQ or NE.
1986 - If items are searched in an ordered tree from the root, we can expect
1987 the highpart to be unequal about half of the time; operation should be
1988 an inequality comparison, operands non-constant, and overall probability
1989 about 50%. Likewise for quicksort.
1990 - Range checks will be often made against constants. Even if we assume for
1991 simplicity an even distribution of the non-constant operand over a
1992 sub-range here, the same probability could be generated with differently
1993 wide sub-ranges - as long as the ratio of the part of the subrange that
1994 is before the threshold to the part that comes after the threshold stays
1995 the same. Thus, we can't really tell anything here;
1996 assuming random distribution is at least simple.
1997 */
1998 bool
1999 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2000 {
2001 enum rtx_code msw_taken, msw_skip, lsw_taken;
2002 rtx skip_label = NULL_RTX;
2003 rtx op1h, op1l, op2h, op2l;
2004 int num_branches;
2005 int prob, rev_prob;
2006 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2007 rtx scratch = operands[4];
2008
2009 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2010 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2011 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2012 op1l = gen_lowpart (SImode, operands[1]);
2013 op2l = gen_lowpart (SImode, operands[2]);
2014 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2015 prob = split_branch_probability;
2016 rev_prob = REG_BR_PROB_BASE - prob;
2017 switch (comparison)
2018 {
2019 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2020 That costs 1 cycle more when the first branch can be predicted taken,
2021 but saves us mispredicts because only one branch needs prediction.
2022 It also enables generating the cmpeqdi_t-1 pattern. */
2023 case EQ:
2024 if (TARGET_CMPEQDI_T)
2025 {
2026 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2027 emit_jump_insn (gen_branch_true (operands[3]));
2028 return true;
2029 }
2030 msw_skip = NE;
2031 lsw_taken = EQ;
2032 if (prob >= 0)
2033 {
2034 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2035 msw_skip_prob = rev_prob;
2036 if (REG_BR_PROB_BASE <= 65535)
2037 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2038 else
2039 {
2040 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2041 lsw_taken_prob
2042 = (prob
2043 ? (REG_BR_PROB_BASE
2044 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2045 / ((HOST_WIDEST_INT) prob << 32)))
2046 : 0);
2047 }
2048 }
2049 break;
2050 case NE:
2051 if (TARGET_CMPEQDI_T)
2052 {
2053 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2054 emit_jump_insn (gen_branch_false (operands[3]));
2055 return true;
2056 }
2057 msw_taken = NE;
2058 msw_taken_prob = prob;
2059 lsw_taken = NE;
2060 lsw_taken_prob = 0;
2061 break;
2062 case GTU: case GT:
2063 msw_taken = comparison;
2064 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2065 break;
2066 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2067 msw_skip = swap_condition (msw_taken);
2068 lsw_taken = GTU;
2069 break;
2070 case GEU: case GE:
2071 if (op2l == CONST0_RTX (SImode))
2072 msw_taken = comparison;
2073 else
2074 {
2075 msw_taken = comparison == GE ? GT : GTU;
2076 msw_skip = swap_condition (msw_taken);
2077 lsw_taken = GEU;
2078 }
2079 break;
2080 case LTU: case LT:
2081 msw_taken = comparison;
2082 if (op2l == CONST0_RTX (SImode))
2083 break;
2084 msw_skip = swap_condition (msw_taken);
2085 lsw_taken = LTU;
2086 break;
2087 case LEU: case LE:
2088 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2089 msw_taken = comparison;
2090 else
2091 {
2092 lsw_taken = LEU;
2093 if (comparison == LE)
2094 msw_taken = LT;
2095 else if (op2h != CONST0_RTX (SImode))
2096 msw_taken = LTU;
2097 else
2098 {
2099 msw_skip = swap_condition (LTU);
2100 break;
2101 }
2102 msw_skip = swap_condition (msw_taken);
2103 }
2104 break;
2105 default: return false;
2106 }
2107 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2108 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2109 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2110 if (comparison != EQ && comparison != NE && num_branches > 1)
2111 {
2112 if (!CONSTANT_P (operands[2])
2113 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2114 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2115 {
2116 msw_taken_prob = prob / 2U;
2117 msw_skip_prob
2118 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2119 lsw_taken_prob = prob;
2120 }
2121 else
2122 {
2123 msw_taken_prob = prob;
2124 msw_skip_prob = REG_BR_PROB_BASE;
2125 /* ??? If we have a constant op2h, should we use that when
2126 calculating lsw_taken_prob? */
2127 lsw_taken_prob = prob;
2128 }
2129 }
2130 operands[1] = op1h;
2131 operands[2] = op2h;
2132 operands[4] = NULL_RTX;
2133 if (reload_completed
2134 && ! arith_reg_or_0_operand (op2h, SImode)
2135 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2136 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2137 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2138 {
2139 emit_move_insn (scratch, operands[2]);
2140 operands[2] = scratch;
2141 }
2142 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2143 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2144 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2145 {
2146 rtx taken_label = operands[3];
2147
2148 /* Operands were possibly modified, but msw_skip doesn't expect this.
2149 Always use the original ones. */
2150 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2151 {
2152 operands[1] = op1h;
2153 operands[2] = op2h;
2154 if (reload_completed
2155 && ! arith_reg_or_0_operand (op2h, SImode)
2156 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2157 {
2158 emit_move_insn (scratch, operands[2]);
2159 operands[2] = scratch;
2160 }
2161 }
2162
2163 operands[3] = skip_label = gen_label_rtx ();
2164 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2165 operands[3] = taken_label;
2166 }
2167 operands[1] = op1l;
2168 operands[2] = op2l;
2169 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2170 {
2171 if (reload_completed
2172 && ! arith_reg_or_0_operand (op2l, SImode)
2173 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2174 {
2175 emit_move_insn (scratch, operands[2]);
2176 operands[2] = scratch;
2177 }
2178 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2179 }
2180 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2181 emit_label (skip_label);
2182 return true;
2183 }
2184
2185 /* Given an operand, return 1 if the evaluated operand plugged into an
2186 if_then_else will result in a branch_true, 0 if branch_false, or
2187 -1 if neither nor applies. The truth table goes like this:
2188
2189 op | cmpval | code | result
2190 ---------+--------+---------+--------------------
2191 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2192 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2193 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2194 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2195 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2196 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2197 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2198 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2199 int
2200 sh_eval_treg_value (rtx op)
2201 {
2202 enum rtx_code code = GET_CODE (op);
2203 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2204 return -1;
2205
2206 int cmpop = code == EQ ? 1 : 0;
2207 int cmpval = INTVAL (XEXP (op, 1));
2208 if (cmpval != 0 && cmpval != 1)
2209 return -1;
2210
2211 int t;
2212 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2213 t = 0;
2214 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2215 t = 1;
2216 else
2217 return -1;
2218
2219 return t ^ (cmpval == cmpop);
2220 }
2221
2222 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2223
2224 static void
2225 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2226 {
2227 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2228 {
2229 insn = gen_rtx_PARALLEL (VOIDmode,
2230 gen_rtvec (2, insn,
2231 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2232 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2233 }
2234 else
2235 emit_insn (insn);
2236 }
2237
2238 /* Prepare the operands for an scc instruction; make sure that the
2239 compare has been done and the result is in T_REG. */
2240 void
2241 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2242 {
2243 rtx t_reg = get_t_reg_rtx ();
2244 enum rtx_code oldcode = code;
2245 enum machine_mode mode;
2246
2247 /* First need a compare insn. */
2248 switch (code)
2249 {
2250 case NE:
2251 /* It isn't possible to handle this case. */
2252 gcc_unreachable ();
2253 case LT:
2254 code = GT;
2255 break;
2256 case LE:
2257 code = GE;
2258 break;
2259 case LTU:
2260 code = GTU;
2261 break;
2262 case LEU:
2263 code = GEU;
2264 break;
2265 default:
2266 break;
2267 }
2268 if (code != oldcode)
2269 {
2270 rtx tmp = op0;
2271 op0 = op1;
2272 op1 = tmp;
2273 }
2274
2275 mode = GET_MODE (op0);
2276 if (mode == VOIDmode)
2277 mode = GET_MODE (op1);
2278
2279 op0 = force_reg (mode, op0);
2280 if ((code != EQ && code != NE
2281 && (op1 != const0_rtx
2282 || code == GTU || code == GEU || code == LTU || code == LEU))
2283 || (mode == DImode && op1 != const0_rtx)
2284 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2285 op1 = force_reg (mode, op1);
2286
2287 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2288 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2289 mode);
2290 }
2291
2292 rtx
2293 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2294 rtx op0, rtx op1)
2295 {
2296 rtx target = gen_reg_rtx (SImode);
2297 rtx tmp;
2298
2299 gcc_assert (TARGET_SHMEDIA);
2300 switch (code)
2301 {
2302 case EQ:
2303 case GT:
2304 case LT:
2305 case UNORDERED:
2306 case GTU:
2307 case LTU:
2308 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2309 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2310 code = NE;
2311 break;
2312
2313 case NE:
2314 case GE:
2315 case LE:
2316 case ORDERED:
2317 case GEU:
2318 case LEU:
2319 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2320 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2321 code = EQ;
2322 break;
2323
2324 case UNEQ:
2325 case UNGE:
2326 case UNGT:
2327 case UNLE:
2328 case UNLT:
2329 case LTGT:
2330 return NULL_RTX;
2331
2332 default:
2333 gcc_unreachable ();
2334 }
2335
2336 if (mode == DImode)
2337 {
2338 rtx t2 = gen_reg_rtx (DImode);
2339 emit_insn (gen_extendsidi2 (t2, target));
2340 target = t2;
2341 }
2342
2343 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2344 }
2345
2346 /* Called from the md file, set up the operands of a compare instruction. */
2347 void
2348 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2349 {
2350 enum rtx_code code = GET_CODE (operands[0]);
2351 enum rtx_code branch_code;
2352 rtx op0 = operands[1];
2353 rtx op1 = operands[2];
2354 rtx insn, tem;
2355 bool need_ccmpeq = false;
2356
2357 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2358 {
2359 op0 = force_reg (mode, op0);
2360 op1 = force_reg (mode, op1);
2361 }
2362 else
2363 {
2364 if (code != EQ || mode == DImode)
2365 {
2366 /* Force args into regs, since we can't use constants here. */
2367 op0 = force_reg (mode, op0);
2368 if (op1 != const0_rtx || code == GTU || code == GEU)
2369 op1 = force_reg (mode, op1);
2370 }
2371 }
2372
2373 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2374 {
2375 if (code == LT
2376 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2377 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2378 {
2379 tem = op0, op0 = op1, op1 = tem;
2380 code = swap_condition (code);
2381 }
2382
2383 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2384 if (code == GE)
2385 {
2386 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2387 need_ccmpeq = true;
2388 code = GT;
2389 }
2390
2391 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2392 to EQ/GT respectively. */
2393 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2394 }
2395
2396 switch (code)
2397 {
2398 case EQ:
2399 case GT:
2400 case GE:
2401 case GTU:
2402 case GEU:
2403 branch_code = code;
2404 break;
2405 case NE:
2406 case LT:
2407 case LE:
2408 case LTU:
2409 case LEU:
2410 branch_code = reverse_condition (code);
2411 break;
2412 default:
2413 gcc_unreachable ();
2414 }
2415
2416 insn = gen_rtx_SET (VOIDmode,
2417 get_t_reg_rtx (),
2418 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2419
2420 sh_emit_set_t_insn (insn, mode);
2421 if (need_ccmpeq)
2422 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2423
2424 if (branch_code == code)
2425 emit_jump_insn (gen_branch_true (operands[3]));
2426 else
2427 emit_jump_insn (gen_branch_false (operands[3]));
2428 }
2429
2430 void
2431 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2432 {
2433 enum rtx_code code = GET_CODE (operands[1]);
2434 rtx op0 = operands[2];
2435 rtx op1 = operands[3];
2436 rtx lab = NULL_RTX;
2437 bool invert = false;
2438 rtx tem;
2439
2440 op0 = force_reg (mode, op0);
2441 if ((code != EQ && code != NE
2442 && (op1 != const0_rtx
2443 || code == GTU || code == GEU || code == LTU || code == LEU))
2444 || (mode == DImode && op1 != const0_rtx)
2445 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2446 op1 = force_reg (mode, op1);
2447
2448 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2449 {
2450 if (code == LT || code == LE)
2451 {
2452 code = swap_condition (code);
2453 tem = op0, op0 = op1, op1 = tem;
2454 }
2455 if (code == GE)
2456 {
2457 if (TARGET_IEEE)
2458 {
2459 lab = gen_label_rtx ();
2460 sh_emit_scc_to_t (EQ, op0, op1);
2461 emit_jump_insn (gen_branch_true (lab));
2462 code = GT;
2463 }
2464 else
2465 {
2466 code = LT;
2467 invert = true;
2468 }
2469 }
2470 }
2471
2472 if (code == NE)
2473 {
2474 code = EQ;
2475 invert = true;
2476 }
2477
2478 sh_emit_scc_to_t (code, op0, op1);
2479 if (lab)
2480 emit_label (lab);
2481 if (invert)
2482 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2483 else
2484 emit_move_insn (operands[0], get_t_reg_rtx ());
2485 }
2486 \f
2487 /* Functions to output assembly code. */
2488
2489 /* Return a sequence of instructions to perform DI or DF move.
2490
2491 Since the SH cannot move a DI or DF in one instruction, we have
2492 to take care when we see overlapping source and dest registers. */
2493 const char *
2494 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2495 enum machine_mode mode)
2496 {
2497 rtx dst = operands[0];
2498 rtx src = operands[1];
2499
2500 if (MEM_P (dst)
2501 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2502 return "mov.l %T1,%0" "\n"
2503 " mov.l %1,%0";
2504
2505 if (register_operand (dst, mode)
2506 && register_operand (src, mode))
2507 {
2508 if (REGNO (src) == MACH_REG)
2509 return "sts mach,%S0" "\n"
2510 " sts macl,%R0";
2511
2512 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2513 when mov.d r1,r0 do r1->r0 then r2->r1. */
2514 if (REGNO (src) + 1 == REGNO (dst))
2515 return "mov %T1,%T0" "\n"
2516 " mov %1,%0";
2517 else
2518 return "mov %1,%0" "\n"
2519 " mov %T1,%T0";
2520 }
2521 else if (CONST_INT_P (src))
2522 {
2523 if (INTVAL (src) < 0)
2524 output_asm_insn ("mov #-1,%S0", operands);
2525 else
2526 output_asm_insn ("mov #0,%S0", operands);
2527
2528 return "mov %1,%R0";
2529 }
2530 else if (MEM_P (src))
2531 {
2532 int ptrreg = -1;
2533 int dreg = REGNO (dst);
2534 rtx inside = XEXP (src, 0);
2535
2536 switch (GET_CODE (inside))
2537 {
2538 case REG:
2539 ptrreg = REGNO (inside);
2540 break;
2541
2542 case SUBREG:
2543 ptrreg = subreg_regno (inside);
2544 break;
2545
2546 case PLUS:
2547 ptrreg = REGNO (XEXP (inside, 0));
2548 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2549 an offsettable address. Unfortunately, offsettable addresses use
2550 QImode to check the offset, and a QImode offsettable address
2551 requires r0 for the other operand, which is not currently
2552 supported, so we can't use the 'o' constraint.
2553 Thus we must check for and handle r0+REG addresses here.
2554 We punt for now, since this is likely very rare. */
2555 gcc_assert (!REG_P (XEXP (inside, 1)));
2556 break;
2557
2558 case LABEL_REF:
2559 return "mov.l %1,%0" "\n"
2560 " mov.l %1+4,%T0";
2561 case POST_INC:
2562 return "mov.l %1,%0" "\n"
2563 " mov.l %1,%T0";
2564 default:
2565 gcc_unreachable ();
2566 }
2567
2568 /* Work out the safe way to copy. Copy into the second half first. */
2569 if (dreg == ptrreg)
2570 return "mov.l %T1,%T0" "\n"
2571 " mov.l %1,%0";
2572 }
2573
2574 return "mov.l %1,%0" "\n"
2575 " mov.l %T1,%T0";
2576 }
2577
2578 /* Print an instruction which would have gone into a delay slot after
2579 another instruction, but couldn't because the other instruction expanded
2580 into a sequence where putting the slot insn at the end wouldn't work. */
2581 static void
2582 print_slot (rtx insn)
2583 {
2584 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2585
2586 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2587 }
2588
2589 const char *
2590 output_far_jump (rtx insn, rtx op)
2591 {
2592 struct { rtx lab, reg, op; } this_jmp;
2593 rtx braf_base_lab = NULL_RTX;
2594 const char *jump;
2595 int far;
2596 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2597 rtx prev;
2598
2599 this_jmp.lab = gen_label_rtx ();
2600
2601 if (TARGET_SH2
2602 && offset >= -32764
2603 && offset - get_attr_length (insn) <= 32766)
2604 {
2605 far = 0;
2606 jump = "mov.w %O0,%1" "\n"
2607 " braf %1";
2608 }
2609 else
2610 {
2611 far = 1;
2612 if (flag_pic)
2613 {
2614 if (TARGET_SH2)
2615 jump = "mov.l %O0,%1" "\n"
2616 " braf %1";
2617 else
2618 jump = "mov.l r0,@-r15" "\n"
2619 " mova %O0,r0" "\n"
2620 " mov.l @r0,%1" "\n"
2621 " add r0,%1" "\n"
2622 " mov.l @r15+,r0" "\n"
2623 " jmp @%1";
2624 }
2625 else
2626 jump = "mov.l %O0,%1" "\n"
2627 " jmp @%1";
2628 }
2629 /* If we have a scratch register available, use it. */
2630 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2631 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2632 {
2633 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2634 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2635 jump = "mov.l r1,@-r15" "\n"
2636 " mova %O0,r0" "\n"
2637 " mov.l @r0,r1" "\n"
2638 " add r1,r0" "\n"
2639 " mov.l @r15+,r1" "\n"
2640 " jmp @%1";
2641 output_asm_insn (jump, &this_jmp.lab);
2642 if (dbr_sequence_length ())
2643 print_slot (final_sequence);
2644 else
2645 output_asm_insn ("nop", 0);
2646 }
2647 else
2648 {
2649 /* Output the delay slot insn first if any. */
2650 if (dbr_sequence_length ())
2651 print_slot (final_sequence);
2652
2653 this_jmp.reg = gen_rtx_REG (SImode, 13);
2654 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2655 Fortunately, MACL is fixed and call-clobbered, and we never
2656 need its value across jumps, so save r13 in it instead of in
2657 the stack. */
2658 if (TARGET_SH5)
2659 output_asm_insn ("lds r13,macl", 0);
2660 else
2661 output_asm_insn ("mov.l r13,@-r15", 0);
2662 output_asm_insn (jump, &this_jmp.lab);
2663 if (TARGET_SH5)
2664 output_asm_insn ("sts macl,r13", 0);
2665 else
2666 output_asm_insn ("mov.l @r15+,r13", 0);
2667 }
2668 if (far && flag_pic && TARGET_SH2)
2669 {
2670 braf_base_lab = gen_label_rtx ();
2671 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2672 CODE_LABEL_NUMBER (braf_base_lab));
2673 }
2674 if (far)
2675 output_asm_insn (".align 2", 0);
2676 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2677 this_jmp.op = op;
2678 if (far && flag_pic)
2679 {
2680 if (TARGET_SH2)
2681 this_jmp.lab = braf_base_lab;
2682 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2683 }
2684 else
2685 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2686 return "";
2687 }
2688
2689 /* Local label counter, used for constants in the pool and inside
2690 pattern branches. */
2691 static int lf = 100;
2692
2693 /* Output code for ordinary branches. */
2694 const char *
2695 output_branch (int logic, rtx insn, rtx *operands)
2696 {
2697 switch (get_attr_length (insn))
2698 {
2699 case 6:
2700 /* This can happen if filling the delay slot has caused a forward
2701 branch to exceed its range (we could reverse it, but only
2702 when we know we won't overextend other branches; this should
2703 best be handled by relaxation).
2704 It can also happen when other condbranches hoist delay slot insn
2705 from their destination, thus leading to code size increase.
2706 But the branch will still be in the range -4092..+4098 bytes. */
2707 if (! TARGET_RELAX)
2708 {
2709 int label = lf++;
2710 /* The call to print_slot will clobber the operands. */
2711 rtx op0 = operands[0];
2712
2713 /* If the instruction in the delay slot is annulled (true), then
2714 there is no delay slot where we can put it now. The only safe
2715 place for it is after the label. final will do that by default. */
2716
2717 if (final_sequence
2718 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2719 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2720 {
2721 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2722 ASSEMBLER_DIALECT ? "/" : ".", label);
2723 print_slot (final_sequence);
2724 }
2725 else
2726 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2727
2728 output_asm_insn ("bra\t%l0", &op0);
2729 fprintf (asm_out_file, "\tnop\n");
2730 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2731
2732 return "";
2733 }
2734 /* When relaxing, handle this like a short branch. The linker
2735 will fix it up if it still doesn't fit after relaxation. */
2736 case 2:
2737 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2738
2739 /* These are for SH2e, in which we have to account for the
2740 extra nop because of the hardware bug in annulled branches. */
2741 case 8:
2742 if (! TARGET_RELAX)
2743 {
2744 int label = lf++;
2745
2746 gcc_assert (!final_sequence
2747 || !(INSN_ANNULLED_BRANCH_P
2748 (XVECEXP (final_sequence, 0, 0))));
2749 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2750 logic ? "f" : "t",
2751 ASSEMBLER_DIALECT ? "/" : ".", label);
2752 fprintf (asm_out_file, "\tnop\n");
2753 output_asm_insn ("bra\t%l0", operands);
2754 fprintf (asm_out_file, "\tnop\n");
2755 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2756
2757 return "";
2758 }
2759 /* When relaxing, fall through. */
2760 case 4:
2761 {
2762 char buffer[10];
2763
2764 sprintf (buffer, "b%s%ss\t%%l0",
2765 logic ? "t" : "f",
2766 ASSEMBLER_DIALECT ? "/" : ".");
2767 output_asm_insn (buffer, &operands[0]);
2768 return "nop";
2769 }
2770
2771 default:
2772 /* There should be no longer branches now - that would
2773 indicate that something has destroyed the branches set
2774 up in machine_dependent_reorg. */
2775 gcc_unreachable ();
2776 }
2777 }
2778
2779 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2780 fill in operands 9 as a label to the successor insn.
2781 We try to use jump threading where possible.
2782 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2783 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2784 follow jmp and bt, if the address is in range. */
2785 const char *
2786 output_branchy_insn (enum rtx_code code, const char *templ,
2787 rtx insn, rtx *operands)
2788 {
2789 rtx next_insn = NEXT_INSN (insn);
2790
2791 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2792 {
2793 rtx src = SET_SRC (PATTERN (next_insn));
2794 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2795 {
2796 /* Following branch not taken */
2797 operands[9] = gen_label_rtx ();
2798 emit_label_after (operands[9], next_insn);
2799 INSN_ADDRESSES_NEW (operands[9],
2800 INSN_ADDRESSES (INSN_UID (next_insn))
2801 + get_attr_length (next_insn));
2802 return templ;
2803 }
2804 else
2805 {
2806 int offset = (branch_dest (next_insn)
2807 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2808 if (offset >= -252 && offset <= 258)
2809 {
2810 if (GET_CODE (src) == IF_THEN_ELSE)
2811 /* branch_true */
2812 src = XEXP (src, 1);
2813 operands[9] = src;
2814 return templ;
2815 }
2816 }
2817 }
2818 operands[9] = gen_label_rtx ();
2819 emit_label_after (operands[9], insn);
2820 INSN_ADDRESSES_NEW (operands[9],
2821 INSN_ADDRESSES (INSN_UID (insn))
2822 + get_attr_length (insn));
2823 return templ;
2824 }
2825
2826 const char *
2827 output_ieee_ccmpeq (rtx insn, rtx *operands)
2828 {
2829 return output_branchy_insn (NE, "bt %l9" "\n"
2830 " fcmp/eq %1,%0",
2831 insn, operands);
2832 }
2833 \f
2834 /* Output the start of the assembler file. */
2835 static void
2836 sh_file_start (void)
2837 {
2838 default_file_start ();
2839
2840 if (TARGET_ELF)
2841 /* We need to show the text section with the proper
2842 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2843 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2844 will complain. We can teach GAS specifically about the
2845 default attributes for our choice of text section, but
2846 then we would have to change GAS again if/when we change
2847 the text section name. */
2848 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2849 else
2850 /* Switch to the data section so that the coffsem symbol
2851 isn't in the text section. */
2852 switch_to_section (data_section);
2853
2854 if (TARGET_LITTLE_ENDIAN)
2855 fputs ("\t.little\n", asm_out_file);
2856
2857 if (!TARGET_ELF)
2858 {
2859 if (TARGET_SHCOMPACT)
2860 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2861 else if (TARGET_SHMEDIA)
2862 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2863 TARGET_SHMEDIA64 ? 64 : 32);
2864 }
2865 }
2866 \f
2867 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2868 static bool
2869 unspec_caller_rtx_p (rtx pat)
2870 {
2871 rtx base, offset;
2872 int i;
2873
2874 split_const (pat, &base, &offset);
2875 if (GET_CODE (base) == UNSPEC)
2876 {
2877 if (XINT (base, 1) == UNSPEC_CALLER)
2878 return true;
2879 for (i = 0; i < XVECLEN (base, 0); i++)
2880 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2881 return true;
2882 }
2883 return false;
2884 }
2885
2886 /* Indicate that INSN cannot be duplicated. This is true for insn
2887 that generates a unique label. */
2888 static bool
2889 sh_cannot_copy_insn_p (rtx insn)
2890 {
2891 rtx pat;
2892
2893 if (!reload_completed || !flag_pic)
2894 return false;
2895
2896 if (!NONJUMP_INSN_P (insn))
2897 return false;
2898 if (asm_noperands (insn) >= 0)
2899 return false;
2900
2901 pat = PATTERN (insn);
2902 if (GET_CODE (pat) != SET)
2903 return false;
2904 pat = SET_SRC (pat);
2905
2906 if (unspec_caller_rtx_p (pat))
2907 return true;
2908
2909 return false;
2910 }
2911 \f
2912 /* Number of instructions used to make an arithmetic right shift by N. */
2913 static const char ashiftrt_insns[] =
2914 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2915
2916 /* Description of a logical left or right shift, when expanded to a sequence
2917 of 1/2/8/16 shifts.
2918 Notice that one bit right shifts clobber the T bit. One bit left shifts
2919 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2920 enum
2921 {
2922 ASHL_CLOBBERS_T = 1 << 0,
2923 LSHR_CLOBBERS_T = 1 << 1
2924 };
2925
2926 struct ashl_lshr_sequence
2927 {
2928 char insn_count;
2929 char amount[6];
2930 char clobbers_t;
2931 };
2932
2933 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2934 {
2935 { 0, { 0 }, 0 }, // 0
2936 { 1, { 1 }, LSHR_CLOBBERS_T },
2937 { 1, { 2 }, 0 },
2938 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2939 { 2, { 2, 2 }, 0 }, // 4
2940 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 2, 2, 2 }, 0 },
2942 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 1, { 8 }, 0 }, // 8
2944 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2945 { 2, { 8, 2 }, 0 },
2946 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2947 { 3, { 8, 2, 2 }, 0 }, // 12
2948 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2949 { 3, { 8, -2, 8 }, 0 },
2950 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2951 { 1, { 16 }, 0 }, // 16
2952 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2953 { 2, { 16, 2 }, 0 },
2954 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2955 { 3, { 16, 2, 2 }, 0 }, // 20
2956 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2957 { 3, { 16, -2, 8 }, 0 },
2958 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2959 { 2, { 16, 8 }, 0 }, // 24
2960 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2961 { 3, { 16, 8, 2 }, 0 },
2962 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2964 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2965 { 3, { 16, -2, 16 }, 0 },
2966
2967 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2968 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2969 However, the shift-and combiner code needs this entry here to be in
2970 terms of real shift insns. */
2971 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2972 };
2973
2974 /* Individual shift amounts for shift amounts < 16, up to three highmost
2975 bits might be clobbered. This is typically used when combined with some
2976 kind of sign or zero extension. */
2977 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2978 {
2979 { 0, { 0 }, 0 }, // 0
2980 { 1, { 1 }, LSHR_CLOBBERS_T },
2981 { 1, { 2 }, 0 },
2982 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2983 { 2, { 2, 2 }, 0 }, // 4
2984 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 2, { 8, -2 }, 0 },
2986 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2987 { 1, { 8 }, 0 }, // 8
2988 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2989 { 2, { 8, 2 }, 0 },
2990 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2991 { 3, { 8, 2, 2 }, 0 }, // 12
2992 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2993 { 2, { 16, -2 }, 0 },
2994 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2995 { 1, { 16 }, 0 }, // 16
2996 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2997 { 2, { 16, 2 }, 0 },
2998 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2999 { 3, { 16, 2, 2 }, 0 }, // 20
3000 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3001 { 3, { 16, -2, 8 }, 0 },
3002 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3003 { 2, { 16, 8 }, 0 }, // 24
3004 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3005 { 3, { 16, 8, 2 }, 0 },
3006 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3007 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3008 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3009 { 3, { 16, -2, 16 }, 0 },
3010 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3011 };
3012
3013 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3014 will clobber the T bit. */
3015 bool
3016 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3017 {
3018 gcc_assert (CONST_INT_P (shift_amount));
3019
3020 const int shift_amount_i = INTVAL (shift_amount) & 31;
3021
3022 /* Special case for shift count of 31: use and-rotl sequence. */
3023 if (shift_amount_i == 31)
3024 return true;
3025
3026 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3027 & ASHL_CLOBBERS_T) != 0;
3028 }
3029
3030 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3031 instructions will clobber the T bit. */
3032 bool
3033 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3034 {
3035 gcc_assert (CONST_INT_P (shift_amount));
3036
3037 const int shift_amount_i = INTVAL (shift_amount) & 31;
3038
3039 /* Special case for shift count of 31: use shll-movt sequence. */
3040 if (shift_amount_i == 31)
3041 return true;
3042
3043 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3044 & LSHR_CLOBBERS_T) != 0;
3045 }
3046
3047 /* Return true if it is potentially beneficial to use a dynamic shift
3048 instruction (shad / shar) instead of a combination of 1/2/8/16
3049 shift instructions for the specified shift count.
3050 If dynamic shifts are not available, always return false. */
3051 bool
3052 sh_dynamicalize_shift_p (rtx count)
3053 {
3054 gcc_assert (CONST_INT_P (count));
3055
3056 const int shift_amount_i = INTVAL (count) & 31;
3057 int insn_count;
3058
3059 /* For left and right shifts, there are shorter 2 insn sequences for
3060 shift amounts of 31. */
3061 if (shift_amount_i == 31)
3062 insn_count = 2;
3063 else
3064 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3065
3066 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3067 }
3068
3069 /* Assuming we have a value that has been sign-extended by at least one bit,
3070 can we use the ext_shift_amounts with the last shift turned to an
3071 arithmetic shift to shift it by N without data loss, and quicker than by
3072 other means? */
3073 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3074
3075 /* Return the cost of a shift. */
3076 static inline int
3077 shiftcosts (rtx x)
3078 {
3079 int value;
3080
3081 if (TARGET_SHMEDIA)
3082 return 1;
3083
3084 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3085 {
3086 if (GET_MODE (x) == DImode
3087 && CONST_INT_P (XEXP (x, 1))
3088 && INTVAL (XEXP (x, 1)) == 1)
3089 return 2;
3090
3091 /* Everything else is invalid, because there is no pattern for it. */
3092 return -1;
3093 }
3094 /* If shift by a non constant, then this will be expensive. */
3095 if (!CONST_INT_P (XEXP (x, 1)))
3096 return SH_DYNAMIC_SHIFT_COST;
3097
3098 /* Otherwise, return the true cost in instructions. Cope with out of range
3099 shift counts more or less arbitrarily. */
3100 value = INTVAL (XEXP (x, 1)) & 31;
3101
3102 if (GET_CODE (x) == ASHIFTRT)
3103 {
3104 int cost = ashiftrt_insns[value];
3105 /* If dynamic shifts are available and profitable in this case, then we
3106 put the constant in a reg and use shad. */
3107 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3108 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3109 return cost;
3110 }
3111 else
3112 return ashl_lshr_seq[value].insn_count;
3113 }
3114
3115 /* Return the cost of an AND/XOR/IOR operation. */
3116 static inline int
3117 and_xor_ior_costs (rtx x, int code)
3118 {
3119 /* On SH1-4 we have only max. SImode operations.
3120 Double the cost for modes > SImode. */
3121 const int cost_scale = !TARGET_SHMEDIA
3122 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3123 ? 2 : 1;
3124
3125 /* A logical operation with two registers is a single cycle
3126 instruction. */
3127 if (!CONST_INT_P (XEXP (x, 1)))
3128 return 1 * cost_scale;
3129
3130 int i = INTVAL (XEXP (x, 1));
3131
3132 if (TARGET_SHMEDIA)
3133 {
3134 if (satisfies_constraint_I10 (XEXP (x, 1))
3135 || satisfies_constraint_J16 (XEXP (x, 1)))
3136 return 1;
3137 else
3138 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3139 }
3140
3141 /* These constants are single cycle extu.[bw] instructions. */
3142 if ((i == 0xff || i == 0xffff) && code == AND)
3143 return 1 * cost_scale;
3144 /* Constants that can be used in an instruction as an immediate are
3145 a single cycle, but this requires r0, so make it a little more
3146 expensive. */
3147 if (CONST_OK_FOR_K08 (i))
3148 return 2 * cost_scale;
3149 /* Constants that can be loaded with a mov immediate need one more cycle.
3150 This case is probably unnecessary. */
3151 if (CONST_OK_FOR_I08 (i))
3152 return 2 * cost_scale;
3153 /* Any other constant requires an additional 2 cycle pc-relative load.
3154 This case is probably unnecessary. */
3155 return 3 * cost_scale;
3156 }
3157
3158 /* Return the cost of an addition or a subtraction. */
3159 static inline int
3160 addsubcosts (rtx x)
3161 {
3162 if (GET_MODE (x) == SImode)
3163 {
3164 /* The addc or subc patterns will eventually become one or two
3165 instructions. Below are some costs for some of the patterns
3166 which combine would reject because the costs of the individual
3167 insns in the patterns are lower.
3168
3169 FIXME: It would be much easier if we had something like insn cost
3170 attributes and the cost calculation machinery used those attributes
3171 in the first place. This would eliminate redundant recog-like C
3172 code to calculate costs of complex patterns. */
3173 rtx op0 = XEXP (x, 0);
3174 rtx op1 = XEXP (x, 1);
3175
3176 if (GET_CODE (x) == PLUS)
3177 {
3178 if (GET_CODE (op0) == AND
3179 && XEXP (op0, 1) == const1_rtx
3180 && (GET_CODE (op1) == PLUS
3181 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3182 return 1;
3183
3184 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3185 && GET_CODE (op1) == LSHIFTRT
3186 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3187 return 1;
3188 }
3189 }
3190
3191 /* On SH1-4 we have only max. SImode operations.
3192 Double the cost for modes > SImode. */
3193 const int cost_scale = !TARGET_SHMEDIA
3194 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3195 ? 2 : 1;
3196
3197 /* Adding a register is a single cycle insn. */
3198 if (REG_P (XEXP (x, 1))
3199 || GET_CODE (XEXP (x, 1)) == SUBREG)
3200 return 1 * cost_scale;
3201
3202 /* Likewise for small constants. */
3203 if (CONST_INT_P (XEXP (x, 1))
3204 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3205 return 1 * cost_scale;
3206
3207 if (TARGET_SHMEDIA)
3208 switch (GET_CODE (XEXP (x, 1)))
3209 {
3210 case CONST:
3211 case LABEL_REF:
3212 case SYMBOL_REF:
3213 return TARGET_SHMEDIA64 ? 5 : 3;
3214
3215 case CONST_INT:
3216 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3217 return 2;
3218 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3219 return 3;
3220 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3221 return 4;
3222
3223 /* Fall through. */
3224 default:
3225 return 5;
3226 }
3227
3228 /* Any other constant requires a 2 cycle pc-relative load plus an
3229 addition. */
3230 return 3 * cost_scale;
3231 }
3232
3233 /* Return the cost of a multiply. */
3234 static inline int
3235 multcosts (rtx x ATTRIBUTE_UNUSED)
3236 {
3237 if (sh_multcost >= 0)
3238 return sh_multcost;
3239 if (TARGET_SHMEDIA)
3240 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3241 accept constants. Ideally, we would use a cost of one or two and
3242 add the cost of the operand, but disregard the latter when inside loops
3243 and loop invariant code motion is still to follow.
3244 Using a multiply first and splitting it later if it's a loss
3245 doesn't work because of different sign / zero extension semantics
3246 of multiplies vs. shifts. */
3247 return optimize_size ? 2 : 3;
3248
3249 if (TARGET_SH2)
3250 {
3251 /* We have a mul insn, so we can never take more than the mul and the
3252 read of the mac reg, but count more because of the latency and extra
3253 reg usage. */
3254 if (optimize_size)
3255 return 2;
3256 return 3;
3257 }
3258
3259 /* If we're aiming at small code, then just count the number of
3260 insns in a multiply call sequence. */
3261 if (optimize_size)
3262 return 5;
3263
3264 /* Otherwise count all the insns in the routine we'd be calling too. */
3265 return 20;
3266 }
3267
3268 /* Compute a (partial) cost for rtx X. Return true if the complete
3269 cost has been computed, and false if subexpressions should be
3270 scanned. In either case, *TOTAL contains the cost result. */
3271 static bool
3272 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3273 int *total, bool speed ATTRIBUTE_UNUSED)
3274 {
3275 switch (code)
3276 {
3277 /* The lower-subreg pass decides whether to split multi-word regs
3278 into individual regs by looking at the cost for a SET of certain
3279 modes with the following patterns:
3280 (set (reg) (reg))
3281 (set (reg) (const_int 0))
3282 On machines that support vector-move operations a multi-word move
3283 is the same cost as individual reg move. On SH there is no
3284 vector-move, so we have to provide the correct cost in the number
3285 of move insns to load/store the reg of the mode in question. */
3286 case SET:
3287 if (register_operand (SET_DEST (x), VOIDmode)
3288 && (register_operand (SET_SRC (x), VOIDmode)
3289 || satisfies_constraint_Z (SET_SRC (x))))
3290 {
3291 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3292 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3293 / mov_insn_size (mode, TARGET_SH2A));
3294 return true;
3295 }
3296 return false;
3297
3298 /* The cost of a mem access is mainly the cost of the address mode. */
3299 case MEM:
3300 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3301 true);
3302 return true;
3303
3304 /* The cost of a sign or zero extend depends on whether the source is a
3305 reg or a mem. In case of a mem take the address into acount. */
3306 case SIGN_EXTEND:
3307 if (REG_P (XEXP (x, 0)))
3308 {
3309 *total = COSTS_N_INSNS (1);
3310 return true;
3311 }
3312 if (MEM_P (XEXP (x, 0)))
3313 {
3314 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3315 GET_MODE (XEXP (x, 0)),
3316 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3317 return true;
3318 }
3319 return false;
3320
3321 case ZERO_EXTEND:
3322 if (REG_P (XEXP (x, 0)))
3323 {
3324 *total = COSTS_N_INSNS (1);
3325 return true;
3326 }
3327 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3328 && (GET_MODE (XEXP (x, 0)) == QImode
3329 || GET_MODE (XEXP (x, 0)) == HImode))
3330 {
3331 /* Handle SH2A's movu.b and movu.w insn. */
3332 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3333 GET_MODE (XEXP (x, 0)),
3334 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3335 return true;
3336 }
3337 return false;
3338
3339 /* mems for SFmode and DFmode can be inside a parallel due to
3340 the way the fpscr is handled. */
3341 case PARALLEL:
3342 for (int i = 0; i < XVECLEN (x, 0); i++)
3343 {
3344 rtx xx = XVECEXP (x, 0, i);
3345 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3346 {
3347 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3348 GET_MODE (XEXP (xx, 0)),
3349 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3350 return true;
3351 }
3352 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3353 {
3354 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3355 GET_MODE (XEXP (xx, 1)),
3356 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3357 return true;
3358 }
3359 }
3360
3361 if (sh_1el_vec (x, VOIDmode))
3362 *total = outer_code != SET;
3363 else if (sh_rep_vec (x, VOIDmode))
3364 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3365 + (outer_code != SET));
3366 else
3367 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3368 return true;
3369
3370 case CONST_INT:
3371 if (TARGET_SHMEDIA)
3372 {
3373 if (INTVAL (x) == 0)
3374 *total = 0;
3375 else if (outer_code == AND && and_operand ((x), DImode))
3376 *total = 0;
3377 else if ((outer_code == IOR || outer_code == XOR
3378 || outer_code == PLUS)
3379 && CONST_OK_FOR_I10 (INTVAL (x)))
3380 *total = 0;
3381 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3382 *total = COSTS_N_INSNS (outer_code != SET);
3383 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3384 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3385 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3386 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3387 else
3388 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3389 return true;
3390 }
3391 if (CONST_OK_FOR_I08 (INTVAL (x)))
3392 *total = 0;
3393 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3394 && CONST_OK_FOR_K08 (INTVAL (x)))
3395 *total = 1;
3396 /* prepare_cmp_insn will force costly constants int registers before
3397 the cbranch[sd]i4 patterns can see them, so preserve potentially
3398 interesting ones not covered by I08 above. */
3399 else if (outer_code == COMPARE
3400 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3401 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3402 || INTVAL (x) == 0x7fffffff
3403 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3404 *total = 1;
3405 else
3406 *total = 8;
3407 return true;
3408
3409 case EQ:
3410 /* An and with a constant compared against zero is
3411 most likely going to be a TST #imm, R0 instruction.
3412 Notice that this does not catch the zero_extract variants from
3413 the md file. */
3414 if (GET_CODE (XEXP (x, 0)) == AND
3415 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3416 {
3417 *total = 1;
3418 return true;
3419 }
3420 else
3421 return false;
3422
3423 case SMIN:
3424 case SMAX:
3425 /* This is most likely a clips.b or clips.w insn that is being made up
3426 by combine. */
3427 if (TARGET_SH2A
3428 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3429 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3430 && REG_P (XEXP (XEXP (x, 0), 0))
3431 && CONST_INT_P (XEXP (x, 1)))
3432 {
3433 *total = COSTS_N_INSNS (1);
3434 return true;
3435 }
3436 else
3437 return false;
3438
3439 case CONST:
3440 case LABEL_REF:
3441 case SYMBOL_REF:
3442 if (TARGET_SHMEDIA64)
3443 *total = COSTS_N_INSNS (4);
3444 else if (TARGET_SHMEDIA32)
3445 *total = COSTS_N_INSNS (2);
3446 else
3447 *total = 5;
3448 return true;
3449
3450 case CONST_DOUBLE:
3451 if (TARGET_SHMEDIA)
3452 *total = COSTS_N_INSNS (4);
3453 /* prepare_cmp_insn will force costly constants int registers before
3454 the cbranchdi4 pattern can see them, so preserve potentially
3455 interesting ones. */
3456 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3457 *total = 1;
3458 else
3459 *total = 10;
3460 return true;
3461
3462 case CONST_VECTOR:
3463 /* FIXME: This looks broken. Only the last statement has any effect.
3464 Probably this could be folded with the PARALLEL case? */
3465 if (x == CONST0_RTX (GET_MODE (x)))
3466 *total = 0;
3467 else if (sh_1el_vec (x, VOIDmode))
3468 *total = outer_code != SET;
3469 if (sh_rep_vec (x, VOIDmode))
3470 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3471 + (outer_code != SET));
3472 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3473 return true;
3474
3475 case PLUS:
3476 case MINUS:
3477 *total = COSTS_N_INSNS (addsubcosts (x));
3478 return true;
3479
3480 case AND:
3481 case XOR:
3482 case IOR:
3483 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3484 return true;
3485
3486 case MULT:
3487 *total = COSTS_N_INSNS (multcosts (x));
3488 return true;
3489
3490 case LT:
3491 case GE:
3492 /* div0s sign comparison. */
3493 if (GET_CODE (XEXP (x, 0)) == XOR
3494 && REG_P ((XEXP (XEXP (x, 0), 0)))
3495 && REG_P ((XEXP (XEXP (x, 0), 1)))
3496 && satisfies_constraint_Z (XEXP (x, 1)))
3497 {
3498 *total = COSTS_N_INSNS (1);
3499 return true;
3500 }
3501 else
3502 return false;
3503
3504 case LSHIFTRT:
3505 /* div0s sign comparison. */
3506 if (GET_CODE (XEXP (x, 0)) == XOR
3507 && REG_P ((XEXP (XEXP (x, 0), 0)))
3508 && REG_P ((XEXP (XEXP (x, 0), 1)))
3509 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3510 {
3511 *total = COSTS_N_INSNS (1);
3512 return true;
3513 }
3514 /* Fall through to shiftcosts. */
3515 case ASHIFT:
3516 case ASHIFTRT:
3517 {
3518 int cost = shiftcosts (x);
3519 if (cost < 0)
3520 return false;
3521 *total = COSTS_N_INSNS (cost);
3522 return true;
3523 }
3524
3525 case DIV:
3526 case UDIV:
3527 case MOD:
3528 case UMOD:
3529 *total = COSTS_N_INSNS (20);
3530 return true;
3531
3532 case FLOAT:
3533 case FIX:
3534 *total = 100;
3535 return true;
3536
3537 default:
3538 return false;
3539 }
3540 }
3541
3542 /* Determine the size of the fundamental move insn that will be used
3543 for the specified mode. */
3544 static inline int
3545 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3546 {
3547 const int mode_sz = GET_MODE_SIZE (mode);
3548
3549 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3550 || (TARGET_FMOVD && mode == DFmode))
3551 return mode_sz;
3552 else
3553 {
3554 /* The max. available mode for actual move insns is SImode.
3555 Larger accesses will be split into multiple loads/stores. */
3556 const int max_mov_sz = GET_MODE_SIZE (SImode);
3557 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3558 }
3559 }
3560
3561 /* Determine the maximum possible displacement for a move insn for the
3562 specified mode. */
3563 static int
3564 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3565 {
3566 /* The 4 byte displacement move insns are the same as the 2 byte
3567 versions but take a 12 bit displacement. All we need to do is to
3568 scale the max. displacement value accordingly. */
3569 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3570
3571 /* SH2A supports FPU move insns with 12 bit displacements.
3572 Other variants to do not support any kind of displacements for
3573 FPU move insns. */
3574 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3575 return 0;
3576 else
3577 {
3578 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3579 const int mode_sz = GET_MODE_SIZE (mode);
3580 int r = 15 * mov_insn_sz * disp_scale;
3581
3582 /* If the mov insn will be split into multiple loads/stores, the
3583 maximum possible displacement is a bit smaller. */
3584 if (mode_sz > mov_insn_sz)
3585 r -= mode_sz - mov_insn_sz;
3586 return r;
3587 }
3588 }
3589
3590 /* Determine the alignment mask for a move insn of the
3591 specified mode. */
3592 static inline int
3593 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3594 {
3595 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3596 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3597 }
3598
3599 /* Return the displacement value of a displacement address. */
3600 static inline HOST_WIDE_INT
3601 disp_addr_displacement (rtx x)
3602 {
3603 gcc_assert (satisfies_constraint_Sdd (x));
3604 return INTVAL (XEXP (XEXP (x, 0), 1));
3605 }
3606
3607 /* Compute the cost of an address. */
3608 static int
3609 sh_address_cost (rtx x, enum machine_mode mode,
3610 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3611 {
3612 /* 'GBR + 0'. Account one more because of R0 restriction. */
3613 if (REG_P (x) && REGNO (x) == GBR_REG)
3614 return 2;
3615
3616 /* Simple reg, post-inc, pre-dec addressing. */
3617 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3618 return 1;
3619
3620 /* 'reg + disp' addressing. */
3621 if (GET_CODE (x) == PLUS
3622 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3623 {
3624 /* 'GBR + disp'. Account one more because of R0 restriction. */
3625 if (REGNO (XEXP (x, 0)) == GBR_REG
3626 && gbr_displacement (XEXP (x, 1), mode))
3627 return 2;
3628
3629 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3630
3631 if (offset == 0)
3632 return 1;
3633
3634 /* The displacement would fit into a 2 byte move insn.
3635 HImode and QImode loads/stores with displacement put pressure on
3636 R0 which will most likely require another reg copy. Thus account
3637 a higher cost for that. */
3638 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3639 return (mode == HImode || mode == QImode) ? 2 : 1;
3640
3641 /* The displacement would fit into a 4 byte move insn (SH2A). */
3642 if (TARGET_SH2A
3643 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3644 return 2;
3645
3646 /* The displacement is probably out of range and will require extra
3647 calculations. */
3648 return 3;
3649 }
3650
3651 /* 'reg + reg' addressing. Account a slightly higher cost because of
3652 increased pressure on R0. */
3653 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3654 && ! TARGET_SHMEDIA)
3655 return 3;
3656
3657 /* Not sure what it is - probably expensive. */
3658 return 10;
3659 }
3660
3661 /* Code to expand a shift. */
3662 static void
3663 gen_ashift (int type, int n, rtx reg)
3664 {
3665 rtx n_rtx;
3666
3667 /* Negative values here come from the shift_amounts array. */
3668 if (n < 0)
3669 {
3670 if (type == ASHIFT)
3671 type = LSHIFTRT;
3672 else
3673 type = ASHIFT;
3674 n = -n;
3675 }
3676
3677 n_rtx = GEN_INT (n);
3678 gcc_assert (satisfies_constraint_P27 (n_rtx));
3679
3680 switch (type)
3681 {
3682 case ASHIFTRT:
3683 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3684 break;
3685 case LSHIFTRT:
3686 if (n == 1)
3687 emit_insn (gen_shlr (reg, reg));
3688 else
3689 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3690 break;
3691 case ASHIFT:
3692 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3693 break;
3694 default:
3695 gcc_unreachable ();
3696 }
3697 }
3698
3699 /* Code to expand a HImode shift. */
3700 static void
3701 gen_ashift_hi (int type, int n, rtx reg)
3702 {
3703 /* Negative values here come from the shift_amounts array. */
3704 if (n < 0)
3705 {
3706 if (type == ASHIFT)
3707 type = LSHIFTRT;
3708 else
3709 type = ASHIFT;
3710 n = -n;
3711 }
3712
3713 switch (type)
3714 {
3715 case ASHIFTRT:
3716 case LSHIFTRT:
3717 /* We don't have HImode right shift operations because using the
3718 ordinary 32 bit shift instructions for that doesn't generate proper
3719 zero/sign extension.
3720 gen_ashift_hi is only called in contexts where we know that the
3721 sign extension works out correctly. */
3722 {
3723 int offset = 0;
3724 if (GET_CODE (reg) == SUBREG)
3725 {
3726 offset = SUBREG_BYTE (reg);
3727 reg = SUBREG_REG (reg);
3728 }
3729 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3730 break;
3731 }
3732 case ASHIFT:
3733 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3734 break;
3735 }
3736 }
3737
3738 /* Output RTL to split a constant shift into its component SH constant
3739 shift instructions. */
3740 void
3741 gen_shifty_op (int code, rtx *operands)
3742 {
3743 int value = INTVAL (operands[2]);
3744 int max, i;
3745
3746 /* Truncate the shift count in case it is out of bounds. */
3747 value = value & 31;
3748
3749 if (value == 31)
3750 {
3751 if (code == LSHIFTRT)
3752 {
3753 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3754 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3755 return;
3756 }
3757 else if (code == ASHIFT)
3758 {
3759 /* There is a two instruction sequence for 31 bit left shifts,
3760 but it requires r0. */
3761 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3762 {
3763 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3764 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3765 return;
3766 }
3767 }
3768 }
3769 else if (value == 0)
3770 {
3771 /* This can happen even when optimizing, if there were subregs before
3772 reload. Don't output a nop here, as this is never optimized away;
3773 use a no-op move instead. */
3774 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3775 return;
3776 }
3777
3778 max = ashl_lshr_seq[value].insn_count;
3779 for (i = 0; i < max; i++)
3780 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3781 }
3782
3783 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3784 don't matter. */
3785 void
3786 gen_shifty_hi_op (int code, rtx *operands)
3787 {
3788 int value = INTVAL (operands[2]);
3789 int max, i;
3790 void (*gen_fun) (int, int, rtx);
3791
3792 /* This operation is used by and_shl for SImode values with a few
3793 high bits known to be cleared. */
3794 value &= 31;
3795 if (value == 0)
3796 {
3797 emit_insn (gen_nop ());
3798 return;
3799 }
3800
3801 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3802 if (code == ASHIFT)
3803 {
3804 max = ext_ashl_lshr_seq[value].insn_count;
3805 for (i = 0; i < max; i++)
3806 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3807 }
3808 else
3809 /* When shifting right, emit the shifts in reverse order, so that
3810 solitary negative values come first. */
3811 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3812 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3813 }
3814
3815 /* Output RTL for an arithmetic right shift.
3816 ??? Rewrite to use super-optimizer sequences. */
3817 bool
3818 expand_ashiftrt (rtx *operands)
3819 {
3820 rtx wrk;
3821 char func[18];
3822 int value;
3823
3824 if (TARGET_DYNSHIFT)
3825 {
3826 if (!CONST_INT_P (operands[2]))
3827 {
3828 rtx count = copy_to_mode_reg (SImode, operands[2]);
3829 emit_insn (gen_negsi2 (count, count));
3830 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3831 return true;
3832 }
3833 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3834 > 1 + SH_DYNAMIC_SHIFT_COST)
3835 {
3836 rtx count
3837 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3838 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3839 return true;
3840 }
3841 }
3842 if (!CONST_INT_P (operands[2]))
3843 return false;
3844
3845 value = INTVAL (operands[2]) & 31;
3846
3847 if (value == 31)
3848 {
3849 /* If we are called from abs expansion, arrange things so that we
3850 we can use a single MT instruction that doesn't clobber the source,
3851 if LICM can hoist out the load of the constant zero. */
3852 if (currently_expanding_to_rtl)
3853 {
3854 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3855 operands[1]));
3856 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3857 return true;
3858 }
3859 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3860 return true;
3861 }
3862 else if (value >= 16 && value <= 19)
3863 {
3864 wrk = gen_reg_rtx (SImode);
3865 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3866 value -= 16;
3867 while (value--)
3868 gen_ashift (ASHIFTRT, 1, wrk);
3869 emit_move_insn (operands[0], wrk);
3870 return true;
3871 }
3872 /* Expand a short sequence inline, longer call a magic routine. */
3873 else if (value <= 5)
3874 {
3875 wrk = gen_reg_rtx (SImode);
3876 emit_move_insn (wrk, operands[1]);
3877 while (value--)
3878 gen_ashift (ASHIFTRT, 1, wrk);
3879 emit_move_insn (operands[0], wrk);
3880 return true;
3881 }
3882
3883 wrk = gen_reg_rtx (Pmode);
3884
3885 /* Load the value into an arg reg and call a helper. */
3886 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3887 sprintf (func, "__ashiftrt_r4_%d", value);
3888 function_symbol (wrk, func, SFUNC_STATIC);
3889 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3890 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3891 return true;
3892 }
3893
3894 /* Try to find a good way to implement the combiner pattern
3895 [(set (match_operand:SI 0 "register_operand" "r")
3896 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3897 (match_operand:SI 2 "const_int_operand" "n"))
3898 (match_operand:SI 3 "const_int_operand" "n"))) .
3899 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3900 return 0 for simple right / left or left/right shift combination.
3901 return 1 for a combination of shifts with zero_extend.
3902 return 2 for a combination of shifts with an AND that needs r0.
3903 return 3 for a combination of shifts with an AND that needs an extra
3904 scratch register, when the three highmost bits of the AND mask are clear.
3905 return 4 for a combination of shifts with an AND that needs an extra
3906 scratch register, when any of the three highmost bits of the AND mask
3907 is set.
3908 If ATTRP is set, store an initial right shift width in ATTRP[0],
3909 and the instruction length in ATTRP[1] . These values are not valid
3910 when returning 0.
3911 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3912 shift_amounts for the last shift value that is to be used before the
3913 sign extend. */
3914 int
3915 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3916 {
3917 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3918 int left = INTVAL (left_rtx), right;
3919 int best = 0;
3920 int cost, best_cost = 10000;
3921 int best_right = 0, best_len = 0;
3922 int i;
3923 int can_ext;
3924
3925 if (left < 0 || left > 31)
3926 return 0;
3927 if (CONST_INT_P (mask_rtx))
3928 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3929 else
3930 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3931 /* Can this be expressed as a right shift / left shift pair? */
3932 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3933 right = exact_log2 (lsb);
3934 mask2 = ~(mask + lsb - 1);
3935 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3936 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3937 if (! mask2)
3938 best_cost = ashl_lshr_seq[right].insn_count
3939 + ashl_lshr_seq[right + left].insn_count;
3940 /* mask has no trailing zeroes <==> ! right */
3941 else if (! right && mask2 == ~(lsb2 - 1))
3942 {
3943 int late_right = exact_log2 (lsb2);
3944 best_cost = ashl_lshr_seq[left + late_right].insn_count
3945 + ashl_lshr_seq[late_right].insn_count;
3946 }
3947 /* Try to use zero extend. */
3948 if (mask2 == ~(lsb2 - 1))
3949 {
3950 int width, first;
3951
3952 for (width = 8; width <= 16; width += 8)
3953 {
3954 /* Can we zero-extend right away? */
3955 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3956 {
3957 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3958 + ext_ashl_lshr_seq[left + right].insn_count;
3959 if (cost < best_cost)
3960 {
3961 best = 1;
3962 best_cost = cost;
3963 best_right = right;
3964 best_len = cost;
3965 if (attrp)
3966 attrp[2] = -1;
3967 }
3968 continue;
3969 }
3970 /* ??? Could try to put zero extend into initial right shift,
3971 or even shift a bit left before the right shift. */
3972 /* Determine value of first part of left shift, to get to the
3973 zero extend cut-off point. */
3974 first = width - exact_log2 (lsb2) + right;
3975 if (first >= 0 && right + left - first >= 0)
3976 {
3977 cost = ext_ashl_lshr_seq[right].insn_count
3978 + ext_ashl_lshr_seq[first].insn_count + 1
3979 + ext_ashl_lshr_seq[right + left - first].insn_count;
3980
3981 if (cost < best_cost)
3982 {
3983 best = 1;
3984 best_cost = cost;
3985 best_right = right;
3986 best_len = cost;
3987 if (attrp)
3988 attrp[2] = first;
3989 }
3990 }
3991 }
3992 }
3993 /* Try to use r0 AND pattern */
3994 for (i = 0; i <= 2; i++)
3995 {
3996 if (i > right)
3997 break;
3998 if (! CONST_OK_FOR_K08 (mask >> i))
3999 continue;
4000 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4001 if (cost < best_cost)
4002 {
4003 best = 2;
4004 best_cost = cost;
4005 best_right = i;
4006 best_len = cost - 1;
4007 }
4008 }
4009 /* Try to use a scratch register to hold the AND operand. */
4010 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4011 for (i = 0; i <= 2; i++)
4012 {
4013 if (i > right)
4014 break;
4015 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4016 + (can_ext
4017 ? ext_ashl_lshr_seq
4018 : ashl_lshr_seq)[left + i].insn_count;
4019 if (cost < best_cost)
4020 {
4021 best = 4 - can_ext;
4022 best_cost = cost;
4023 best_right = i;
4024 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4025 }
4026 }
4027
4028 if (attrp)
4029 {
4030 attrp[0] = best_right;
4031 attrp[1] = best_len;
4032 }
4033 return best;
4034 }
4035
4036 /* This is used in length attributes of the unnamed instructions
4037 corresponding to shl_and_kind return values of 1 and 2. */
4038 int
4039 shl_and_length (rtx insn)
4040 {
4041 rtx set_src, left_rtx, mask_rtx;
4042 int attributes[3];
4043
4044 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4045 left_rtx = XEXP (XEXP (set_src, 0), 1);
4046 mask_rtx = XEXP (set_src, 1);
4047 shl_and_kind (left_rtx, mask_rtx, attributes);
4048 return attributes[1];
4049 }
4050
4051 /* This is used in length attribute of the and_shl_scratch instruction. */
4052 int
4053 shl_and_scr_length (rtx insn)
4054 {
4055 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4056 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4057 rtx op = XEXP (set_src, 0);
4058 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4059 op = XEXP (XEXP (op, 0), 0);
4060 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4061 }
4062
4063 /* Generate rtl for instructions for which shl_and_kind advised a particular
4064 method of generating them, i.e. returned zero. */
4065 bool
4066 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4067 {
4068 int attributes[3];
4069 unsigned HOST_WIDE_INT mask;
4070 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4071 int right, total_shift;
4072 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4073
4074 right = attributes[0];
4075 total_shift = INTVAL (left_rtx) + right;
4076 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4077 switch (kind)
4078 {
4079 default:
4080 return true;
4081 case 1:
4082 {
4083 int first = attributes[2];
4084 rtx operands[3];
4085
4086 if (first < 0)
4087 {
4088 emit_insn ((mask << right) <= 0xff
4089 ? gen_zero_extendqisi2 (dest,
4090 gen_lowpart (QImode, source))
4091 : gen_zero_extendhisi2 (dest,
4092 gen_lowpart (HImode, source)));
4093 source = dest;
4094 }
4095 if (source != dest)
4096 emit_insn (gen_movsi (dest, source));
4097 operands[0] = dest;
4098 if (right)
4099 {
4100 operands[2] = GEN_INT (right);
4101 gen_shifty_hi_op (LSHIFTRT, operands);
4102 }
4103 if (first > 0)
4104 {
4105 operands[2] = GEN_INT (first);
4106 gen_shifty_hi_op (ASHIFT, operands);
4107 total_shift -= first;
4108 mask <<= first;
4109 }
4110 if (first >= 0)
4111 emit_insn (mask <= 0xff
4112 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4113 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4114 if (total_shift > 0)
4115 {
4116 operands[2] = GEN_INT (total_shift);
4117 gen_shifty_hi_op (ASHIFT, operands);
4118 }
4119 break;
4120 }
4121 case 4:
4122 shift_gen_fun = gen_shifty_op;
4123 case 3:
4124 /* If the topmost bit that matters is set, set the topmost bits
4125 that don't matter. This way, we might be able to get a shorter
4126 signed constant. */
4127 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4128 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4129 case 2:
4130 /* Don't expand fine-grained when combining, because that will
4131 make the pattern fail. */
4132 if (currently_expanding_to_rtl
4133 || reload_in_progress || reload_completed)
4134 {
4135 rtx operands[3];
4136
4137 /* Cases 3 and 4 should be handled by this split
4138 only while combining */
4139 gcc_assert (kind <= 2);
4140 if (right)
4141 {
4142 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4143 source = dest;
4144 }
4145 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4146 if (total_shift)
4147 {
4148 operands[0] = dest;
4149 operands[1] = dest;
4150 operands[2] = GEN_INT (total_shift);
4151 shift_gen_fun (ASHIFT, operands);
4152 }
4153 break;
4154 }
4155 else
4156 {
4157 int neg = 0;
4158 if (kind != 4 && total_shift < 16)
4159 {
4160 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4161 if (neg > 0)
4162 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4163 else
4164 neg = 0;
4165 }
4166 emit_insn (gen_and_shl_scratch (dest, source,
4167 GEN_INT (right),
4168 GEN_INT (mask),
4169 GEN_INT (total_shift + neg),
4170 GEN_INT (neg)));
4171 emit_insn (gen_movsi (dest, dest));
4172 break;
4173 }
4174 }
4175 return false;
4176 }
4177
4178 /* Try to find a good way to implement the combiner pattern
4179 [(set (match_operand:SI 0 "register_operand" "=r")
4180 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4181 (match_operand:SI 2 "const_int_operand" "n")
4182 (match_operand:SI 3 "const_int_operand" "n")
4183 (const_int 0)))
4184 (clobber (reg:SI T_REG))]
4185 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4186 return 0 for simple left / right shift combination.
4187 return 1 for left shift / 8 bit sign extend / left shift.
4188 return 2 for left shift / 16 bit sign extend / left shift.
4189 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4190 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4191 return 5 for left shift / 16 bit sign extend / right shift
4192 return 6 for < 8 bit sign extend / left shift.
4193 return 7 for < 8 bit sign extend / left shift / single right shift.
4194 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4195 int
4196 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4197 {
4198 int left, size, insize, ext;
4199 int cost = 0, best_cost;
4200 int kind;
4201
4202 left = INTVAL (left_rtx);
4203 size = INTVAL (size_rtx);
4204 insize = size - left;
4205 gcc_assert (insize > 0);
4206 /* Default to left / right shift. */
4207 kind = 0;
4208 best_cost = ashl_lshr_seq[32 - insize].insn_count
4209 + ashl_lshr_seq[32 - size].insn_count;
4210 if (size <= 16)
4211 {
4212 /* 16 bit shift / sign extend / 16 bit shift */
4213 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4214 + ashl_lshr_seq[16 - size].insn_count;
4215 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4216 below, by alternative 3 or something even better. */
4217 if (cost < best_cost)
4218 {
4219 kind = 5;
4220 best_cost = cost;
4221 }
4222 }
4223 /* Try a plain sign extend between two shifts. */
4224 for (ext = 16; ext >= insize; ext -= 8)
4225 {
4226 if (ext <= size)
4227 {
4228 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4229 + ashl_lshr_seq[size - ext].insn_count;
4230 if (cost < best_cost)
4231 {
4232 kind = ext / (unsigned) 8;
4233 best_cost = cost;
4234 }
4235 }
4236 /* Check if we can do a sloppy shift with a final signed shift
4237 restoring the sign. */
4238 if (EXT_SHIFT_SIGNED (size - ext))
4239 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4240 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4241 /* If not, maybe it's still cheaper to do the second shift sloppy,
4242 and do a final sign extend? */
4243 else if (size <= 16)
4244 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4245 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4246 + 1;
4247 else
4248 continue;
4249 if (cost < best_cost)
4250 {
4251 kind = ext / (unsigned) 8 + 2;
4252 best_cost = cost;
4253 }
4254 }
4255 /* Check if we can sign extend in r0 */
4256 if (insize < 8)
4257 {
4258 cost = 3 + ashl_lshr_seq[left].insn_count;
4259 if (cost < best_cost)
4260 {
4261 kind = 6;
4262 best_cost = cost;
4263 }
4264 /* Try the same with a final signed shift. */
4265 if (left < 31)
4266 {
4267 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4268 if (cost < best_cost)
4269 {
4270 kind = 7;
4271 best_cost = cost;
4272 }
4273 }
4274 }
4275 if (TARGET_DYNSHIFT)
4276 {
4277 /* Try to use a dynamic shift. */
4278 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4279 if (cost < best_cost)
4280 {
4281 kind = 0;
4282 best_cost = cost;
4283 }
4284 }
4285 if (costp)
4286 *costp = cost;
4287 return kind;
4288 }
4289
4290 /* Function to be used in the length attribute of the instructions
4291 implementing this pattern. */
4292 int
4293 shl_sext_length (rtx insn)
4294 {
4295 rtx set_src, left_rtx, size_rtx;
4296 int cost;
4297
4298 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4299 left_rtx = XEXP (XEXP (set_src, 0), 1);
4300 size_rtx = XEXP (set_src, 1);
4301 shl_sext_kind (left_rtx, size_rtx, &cost);
4302 return cost;
4303 }
4304
4305 /* Generate rtl for this pattern */
4306 bool
4307 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4308 {
4309 int kind;
4310 int left, size, insize, cost;
4311 rtx operands[3];
4312
4313 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4314 left = INTVAL (left_rtx);
4315 size = INTVAL (size_rtx);
4316 insize = size - left;
4317 switch (kind)
4318 {
4319 case 1:
4320 case 2:
4321 case 3:
4322 case 4:
4323 {
4324 int ext = kind & 1 ? 8 : 16;
4325 int shift2 = size - ext;
4326
4327 /* Don't expand fine-grained when combining, because that will
4328 make the pattern fail. */
4329 if (! currently_expanding_to_rtl
4330 && ! reload_in_progress && ! reload_completed)
4331 {
4332 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4333 emit_insn (gen_movsi (dest, source));
4334 break;
4335 }
4336 if (dest != source)
4337 emit_insn (gen_movsi (dest, source));
4338 operands[0] = dest;
4339 if (ext - insize)
4340 {
4341 operands[2] = GEN_INT (ext - insize);
4342 gen_shifty_hi_op (ASHIFT, operands);
4343 }
4344 emit_insn (kind & 1
4345 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4346 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4347 if (kind <= 2)
4348 {
4349 if (shift2)
4350 {
4351 operands[2] = GEN_INT (shift2);
4352 gen_shifty_op (ASHIFT, operands);
4353 }
4354 }
4355 else
4356 {
4357 if (shift2 > 0)
4358 {
4359 if (EXT_SHIFT_SIGNED (shift2))
4360 {
4361 operands[2] = GEN_INT (shift2 + 1);
4362 gen_shifty_op (ASHIFT, operands);
4363 operands[2] = const1_rtx;
4364 gen_shifty_op (ASHIFTRT, operands);
4365 break;
4366 }
4367 operands[2] = GEN_INT (shift2);
4368 gen_shifty_hi_op (ASHIFT, operands);
4369 }
4370 else if (shift2)
4371 {
4372 operands[2] = GEN_INT (-shift2);
4373 gen_shifty_hi_op (LSHIFTRT, operands);
4374 }
4375 emit_insn (size <= 8
4376 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4377 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4378 }
4379 break;
4380 }
4381 case 5:
4382 {
4383 int i = 16 - size;
4384 if (! currently_expanding_to_rtl
4385 && ! reload_in_progress && ! reload_completed)
4386 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4387 else
4388 {
4389 operands[0] = dest;
4390 operands[2] = GEN_INT (16 - insize);
4391 gen_shifty_hi_op (ASHIFT, operands);
4392 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4393 }
4394 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4395 while (--i >= 0)
4396 gen_ashift (ASHIFTRT, 1, dest);
4397 break;
4398 }
4399 case 6:
4400 case 7:
4401 /* Don't expand fine-grained when combining, because that will
4402 make the pattern fail. */
4403 if (! currently_expanding_to_rtl
4404 && ! reload_in_progress && ! reload_completed)
4405 {
4406 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4407 emit_insn (gen_movsi (dest, source));
4408 break;
4409 }
4410 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4411 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4412 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4413 operands[0] = dest;
4414 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4415 gen_shifty_op (ASHIFT, operands);
4416 if (kind == 7)
4417 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4418 break;
4419 default:
4420 return true;
4421 }
4422 return false;
4423 }
4424
4425 /* Prefix a symbol_ref name with "datalabel". */
4426 rtx
4427 gen_datalabel_ref (rtx sym)
4428 {
4429 const char *str;
4430
4431 if (GET_CODE (sym) == LABEL_REF)
4432 return gen_rtx_CONST (GET_MODE (sym),
4433 gen_rtx_UNSPEC (GET_MODE (sym),
4434 gen_rtvec (1, sym),
4435 UNSPEC_DATALABEL));
4436
4437 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4438
4439 str = XSTR (sym, 0);
4440 /* Share all SYMBOL_REF strings with the same value - that is important
4441 for cse. */
4442 str = IDENTIFIER_POINTER (get_identifier (str));
4443 XSTR (sym, 0) = str;
4444
4445 return sym;
4446 }
4447
4448 \f
4449 static alloc_pool label_ref_list_pool;
4450
4451 typedef struct label_ref_list_d
4452 {
4453 rtx label;
4454 struct label_ref_list_d *next;
4455 } *label_ref_list_t;
4456
4457 /* The SH cannot load a large constant into a register, constants have to
4458 come from a pc relative load. The reference of a pc relative load
4459 instruction must be less than 1k in front of the instruction. This
4460 means that we often have to dump a constant inside a function, and
4461 generate code to branch around it.
4462
4463 It is important to minimize this, since the branches will slow things
4464 down and make things bigger.
4465
4466 Worst case code looks like:
4467
4468 mov.l L1,rn
4469 bra L2
4470 nop
4471 align
4472 L1: .long value
4473 L2:
4474 ..
4475
4476 mov.l L3,rn
4477 bra L4
4478 nop
4479 align
4480 L3: .long value
4481 L4:
4482 ..
4483
4484 We fix this by performing a scan before scheduling, which notices which
4485 instructions need to have their operands fetched from the constant table
4486 and builds the table.
4487
4488 The algorithm is:
4489
4490 scan, find an instruction which needs a pcrel move. Look forward, find the
4491 last barrier which is within MAX_COUNT bytes of the requirement.
4492 If there isn't one, make one. Process all the instructions between
4493 the find and the barrier.
4494
4495 In the above example, we can tell that L3 is within 1k of L1, so
4496 the first move can be shrunk from the 3 insn+constant sequence into
4497 just 1 insn, and the constant moved to L3 to make:
4498
4499 mov.l L1,rn
4500 ..
4501 mov.l L3,rn
4502 bra L4
4503 nop
4504 align
4505 L3:.long value
4506 L4:.long value
4507
4508 Then the second move becomes the target for the shortening process. */
4509
4510 typedef struct
4511 {
4512 rtx value; /* Value in table. */
4513 rtx label; /* Label of value. */
4514 label_ref_list_t wend; /* End of window. */
4515 enum machine_mode mode; /* Mode of value. */
4516
4517 /* True if this constant is accessed as part of a post-increment
4518 sequence. Note that HImode constants are never accessed in this way. */
4519 bool part_of_sequence_p;
4520 } pool_node;
4521
4522 /* The maximum number of constants that can fit into one pool, since
4523 constants in the range 0..510 are at least 2 bytes long, and in the
4524 range from there to 1018 at least 4 bytes. */
4525
4526 #define MAX_POOL_SIZE 372
4527 static pool_node pool_vector[MAX_POOL_SIZE];
4528 static int pool_size;
4529 static rtx pool_window_label;
4530 static int pool_window_last;
4531
4532 static int max_labelno_before_reorg;
4533
4534 /* ??? If we need a constant in HImode which is the truncated value of a
4535 constant we need in SImode, we could combine the two entries thus saving
4536 two bytes. Is this common enough to be worth the effort of implementing
4537 it? */
4538
4539 /* ??? This stuff should be done at the same time that we shorten branches.
4540 As it is now, we must assume that all branches are the maximum size, and
4541 this causes us to almost always output constant pools sooner than
4542 necessary. */
4543
4544 /* Add a constant to the pool and return its label. */
4545 static rtx
4546 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4547 {
4548 int i;
4549 rtx lab, new_rtx;
4550 label_ref_list_t ref, newref;
4551
4552 /* First see if we've already got it. */
4553 for (i = 0; i < pool_size; i++)
4554 {
4555 if (x->code == pool_vector[i].value->code
4556 && mode == pool_vector[i].mode)
4557 {
4558 if (x->code == CODE_LABEL)
4559 {
4560 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4561 continue;
4562 }
4563 if (rtx_equal_p (x, pool_vector[i].value))
4564 {
4565 lab = new_rtx = 0;
4566 if (! last_value
4567 || ! i
4568 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4569 {
4570 new_rtx = gen_label_rtx ();
4571 LABEL_REFS (new_rtx) = pool_vector[i].label;
4572 pool_vector[i].label = lab = new_rtx;
4573 }
4574 if (lab && pool_window_label)
4575 {
4576 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4577 newref->label = pool_window_label;
4578 ref = pool_vector[pool_window_last].wend;
4579 newref->next = ref;
4580 pool_vector[pool_window_last].wend = newref;
4581 }
4582 if (new_rtx)
4583 pool_window_label = new_rtx;
4584 pool_window_last = i;
4585 return lab;
4586 }
4587 }
4588 }
4589
4590 /* Need a new one. */
4591 pool_vector[pool_size].value = x;
4592 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4593 {
4594 lab = 0;
4595 pool_vector[pool_size - 1].part_of_sequence_p = true;
4596 }
4597 else
4598 lab = gen_label_rtx ();
4599 pool_vector[pool_size].mode = mode;
4600 pool_vector[pool_size].label = lab;
4601 pool_vector[pool_size].wend = NULL;
4602 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4603 if (lab && pool_window_label)
4604 {
4605 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4606 newref->label = pool_window_label;
4607 ref = pool_vector[pool_window_last].wend;
4608 newref->next = ref;
4609 pool_vector[pool_window_last].wend = newref;
4610 }
4611 if (lab)
4612 pool_window_label = lab;
4613 pool_window_last = pool_size;
4614 pool_size++;
4615 return lab;
4616 }
4617
4618 /* Output the literal table. START, if nonzero, is the first instruction
4619 this table is needed for, and also indicates that there is at least one
4620 casesi_worker_2 instruction; We have to emit the operand3 labels from
4621 these insns at a 4-byte aligned position. BARRIER is the barrier
4622 after which we are to place the table. */
4623 static void
4624 dump_table (rtx start, rtx barrier)
4625 {
4626 rtx scan = barrier;
4627 int i;
4628 bool need_align = true;
4629 rtx lab;
4630 label_ref_list_t ref;
4631 bool have_df = false;
4632
4633 /* Do two passes, first time dump out the HI sized constants. */
4634
4635 for (i = 0; i < pool_size; i++)
4636 {
4637 pool_node *p = &pool_vector[i];
4638
4639 if (p->mode == HImode)
4640 {
4641 if (need_align)
4642 {
4643 scan = emit_insn_after (gen_align_2 (), scan);
4644 need_align = false;
4645 }
4646 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4647 scan = emit_label_after (lab, scan);
4648 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4649 scan);
4650 for (ref = p->wend; ref; ref = ref->next)
4651 {
4652 lab = ref->label;
4653 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4654 }
4655 }
4656 else if (p->mode == DFmode)
4657 have_df = true;
4658 }
4659
4660 need_align = true;
4661
4662 if (start)
4663 {
4664 scan = emit_insn_after (gen_align_4 (), scan);
4665 need_align = false;
4666 for (; start != barrier; start = NEXT_INSN (start))
4667 if (NONJUMP_INSN_P (start)
4668 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4669 {
4670 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4671 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4672
4673 scan = emit_label_after (lab, scan);
4674 }
4675 }
4676 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4677 {
4678 rtx align_insn = NULL_RTX;
4679
4680 scan = emit_label_after (gen_label_rtx (), scan);
4681 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4682 need_align = false;
4683
4684 for (i = 0; i < pool_size; i++)
4685 {
4686 pool_node *p = &pool_vector[i];
4687
4688 switch (p->mode)
4689 {
4690 case HImode:
4691 break;
4692 case SImode:
4693 case SFmode:
4694 if (align_insn && !p->part_of_sequence_p)
4695 {
4696 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4697 emit_label_before (lab, align_insn);
4698 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4699 align_insn);
4700 for (ref = p->wend; ref; ref = ref->next)
4701 {
4702 lab = ref->label;
4703 emit_insn_before (gen_consttable_window_end (lab),
4704 align_insn);
4705 }
4706 delete_insn (align_insn);
4707 align_insn = NULL_RTX;
4708 continue;
4709 }
4710 else
4711 {
4712 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4713 scan = emit_label_after (lab, scan);
4714 scan = emit_insn_after (gen_consttable_4 (p->value,
4715 const0_rtx), scan);
4716 need_align = ! need_align;
4717 }
4718 break;
4719 case DFmode:
4720 if (need_align)
4721 {
4722 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4723 align_insn = scan;
4724 need_align = false;
4725 }
4726 case DImode:
4727 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4728 scan = emit_label_after (lab, scan);
4729 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4730 scan);
4731 break;
4732 default:
4733 gcc_unreachable ();
4734 }
4735
4736 if (p->mode != HImode)
4737 {
4738 for (ref = p->wend; ref; ref = ref->next)
4739 {
4740 lab = ref->label;
4741 scan = emit_insn_after (gen_consttable_window_end (lab),
4742 scan);
4743 }
4744 }
4745 }
4746
4747 pool_size = 0;
4748 }
4749
4750 for (i = 0; i < pool_size; i++)
4751 {
4752 pool_node *p = &pool_vector[i];
4753
4754 switch (p->mode)
4755 {
4756 case HImode:
4757 break;
4758 case SImode:
4759 case SFmode:
4760 if (need_align)
4761 {
4762 need_align = false;
4763 scan = emit_label_after (gen_label_rtx (), scan);
4764 scan = emit_insn_after (gen_align_4 (), scan);
4765 }
4766 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4767 scan = emit_label_after (lab, scan);
4768 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4769 scan);
4770 break;
4771 case DFmode:
4772 case DImode:
4773 if (need_align)
4774 {
4775 need_align = false;
4776 scan = emit_label_after (gen_label_rtx (), scan);
4777 scan = emit_insn_after (gen_align_4 (), scan);
4778 }
4779 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4780 scan = emit_label_after (lab, scan);
4781 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4782 scan);
4783 break;
4784 default:
4785 gcc_unreachable ();
4786 }
4787
4788 if (p->mode != HImode)
4789 {
4790 for (ref = p->wend; ref; ref = ref->next)
4791 {
4792 lab = ref->label;
4793 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4794 }
4795 }
4796 }
4797
4798 scan = emit_insn_after (gen_consttable_end (), scan);
4799 scan = emit_barrier_after (scan);
4800 pool_size = 0;
4801 pool_window_label = NULL_RTX;
4802 pool_window_last = 0;
4803 }
4804
4805 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4806
4807 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4808
4809 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4810 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4811 need to fix it if the input value is CONST_OK_FOR_I08. */
4812 static bool
4813 broken_move (rtx insn)
4814 {
4815 if (NONJUMP_INSN_P (insn))
4816 {
4817 rtx pat = PATTERN (insn);
4818 if (GET_CODE (pat) == PARALLEL)
4819 pat = XVECEXP (pat, 0, 0);
4820 if (GET_CODE (pat) == SET
4821 /* We can load any 8-bit value if we don't care what the high
4822 order bits end up as. */
4823 && GET_MODE (SET_DEST (pat)) != QImode
4824 && (CONSTANT_P (SET_SRC (pat))
4825 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4826 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4827 /* Match mova_const. */
4828 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4829 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4830 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4831 && ! (TARGET_SH2E
4832 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4833 && (fp_zero_operand (SET_SRC (pat))
4834 || fp_one_operand (SET_SRC (pat)))
4835 /* In general we don't know the current setting of fpscr, so
4836 disable fldi.
4837 There is an exception if this was a register-register move
4838 before reload - and hence it was ascertained that we have
4839 single precision setting - and in a post-reload optimization
4840 we changed this to do a constant load. In that case
4841 we don't have an r0 clobber, hence we must use fldi. */
4842 && (TARGET_FMOVD
4843 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4844 == SCRATCH))
4845 && REG_P (SET_DEST (pat))
4846 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4847 && ! (TARGET_SH2A
4848 && GET_MODE (SET_DEST (pat)) == SImode
4849 && (satisfies_constraint_I20 (SET_SRC (pat))
4850 || satisfies_constraint_I28 (SET_SRC (pat))))
4851 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4852 return true;
4853 }
4854
4855 return false;
4856 }
4857
4858 /* Return true if the specified insn is a mova insn. */
4859 static bool
4860 mova_p (rtx insn)
4861 {
4862 return (NONJUMP_INSN_P (insn)
4863 && GET_CODE (PATTERN (insn)) == SET
4864 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4865 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4866 /* Don't match mova_const. */
4867 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4868 }
4869
4870 /* Fix up a mova from a switch that went out of range. */
4871 static void
4872 fixup_mova (rtx mova)
4873 {
4874 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4875 if (! flag_pic)
4876 {
4877 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4878 INSN_CODE (mova) = -1;
4879 }
4880 else
4881 {
4882 rtx worker = mova;
4883 rtx lab = gen_label_rtx ();
4884 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4885
4886 do
4887 {
4888 worker = NEXT_INSN (worker);
4889 gcc_assert (worker
4890 && !LABEL_P (worker)
4891 && !JUMP_P (worker));
4892 } while (NOTE_P (worker)
4893 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4894 wpat = PATTERN (worker);
4895 wpat0 = XVECEXP (wpat, 0, 0);
4896 wpat1 = XVECEXP (wpat, 0, 1);
4897 wsrc = SET_SRC (wpat0);
4898 PATTERN (worker) = (gen_casesi_worker_2
4899 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4900 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4901 XEXP (wpat1, 0)));
4902 INSN_CODE (worker) = -1;
4903 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4904 base = gen_rtx_LABEL_REF (Pmode, lab);
4905 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4906 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4907 INSN_CODE (mova) = -1;
4908 }
4909 }
4910
4911 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4912 *num_mova, and check if the new mova is not nested within the first one.
4913 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4914 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4915 static int
4916 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4917 {
4918 int n_addr = 0; /* Initialization to shut up spurious warning. */
4919 int f_target, n_target = 0; /* Likewise. */
4920
4921 if (optimize)
4922 {
4923 /* If NEW_MOVA has no address yet, it will be handled later. */
4924 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4925 return -1;
4926
4927 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4928 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4929 if (n_addr > n_target || n_addr + 1022 < n_target)
4930 {
4931 /* Change the mova into a load.
4932 broken_move will then return true for it. */
4933 fixup_mova (new_mova);
4934 return 1;
4935 }
4936 }
4937 if (!(*num_mova)++)
4938 {
4939 *first_mova = new_mova;
4940 return 2;
4941 }
4942 if (!optimize
4943 || ((f_target
4944 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4945 >= n_target))
4946 return -1;
4947
4948 (*num_mova)--;
4949 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4950 > n_target - n_addr)
4951 {
4952 fixup_mova (*first_mova);
4953 return 0;
4954 }
4955 else
4956 {
4957 fixup_mova (new_mova);
4958 return 1;
4959 }
4960 }
4961
4962 /* Find the last barrier from insn FROM which is close enough to hold the
4963 constant pool. If we can't find one, then create one near the end of
4964 the range. */
4965 static rtx
4966 find_barrier (int num_mova, rtx mova, rtx from)
4967 {
4968 int count_si = 0;
4969 int count_hi = 0;
4970 int found_hi = 0;
4971 int found_si = 0;
4972 int found_di = 0;
4973 int hi_align = 2;
4974 int si_align = 2;
4975 int leading_mova = num_mova;
4976 rtx barrier_before_mova = NULL_RTX;
4977 rtx found_barrier = NULL_RTX;
4978 rtx good_barrier = NULL_RTX;
4979 int si_limit;
4980 int hi_limit;
4981 rtx orig = from;
4982 rtx last_got = NULL_RTX;
4983 rtx last_symoff = NULL_RTX;
4984
4985 /* For HImode: range is 510, add 4 because pc counts from address of
4986 second instruction after this one, subtract 2 for the jump instruction
4987 that we may need to emit before the table, subtract 2 for the instruction
4988 that fills the jump delay slot (in very rare cases, reorg will take an
4989 instruction from after the constant pool or will leave the delay slot
4990 empty). This gives 510.
4991 For SImode: range is 1020, add 4 because pc counts from address of
4992 second instruction after this one, subtract 2 in case pc is 2 byte
4993 aligned, subtract 2 for the jump instruction that we may need to emit
4994 before the table, subtract 2 for the instruction that fills the jump
4995 delay slot. This gives 1018. */
4996
4997 /* The branch will always be shortened now that the reference address for
4998 forward branches is the successor address, thus we need no longer make
4999 adjustments to the [sh]i_limit for -O0. */
5000
5001 si_limit = 1018;
5002 hi_limit = 510;
5003
5004 while (from && count_si < si_limit && count_hi < hi_limit)
5005 {
5006 int inc = get_attr_length (from);
5007 int new_align = 1;
5008
5009 /* If this is a label that existed at the time of the compute_alignments
5010 call, determine the alignment. N.B. When find_barrier recurses for
5011 an out-of-reach mova, we might see labels at the start of previously
5012 inserted constant tables. */
5013 if (LABEL_P (from)
5014 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5015 {
5016 if (optimize)
5017 new_align = 1 << label_to_alignment (from);
5018 else if (BARRIER_P (prev_nonnote_insn (from)))
5019 new_align = 1 << barrier_align (from);
5020 else
5021 new_align = 1;
5022 inc = 0;
5023 }
5024 /* In case we are scanning a constant table because of recursion, check
5025 for explicit alignments. If the table is long, we might be forced
5026 to emit the new table in front of it; the length of the alignment
5027 might be the last straw. */
5028 else if (NONJUMP_INSN_P (from)
5029 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5030 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5031 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5032 /* When we find the end of a constant table, paste the new constant
5033 at the end. That is better than putting it in front because
5034 this way, we don't need extra alignment for adding a 4-byte-aligned
5035 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5036 else if (NONJUMP_INSN_P (from)
5037 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5038 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5039 return from;
5040
5041 if (BARRIER_P (from))
5042 {
5043 rtx next;
5044
5045 found_barrier = from;
5046
5047 /* If we are at the end of the function, or in front of an alignment
5048 instruction, we need not insert an extra alignment. We prefer
5049 this kind of barrier. */
5050 if (barrier_align (from) > 2)
5051 good_barrier = from;
5052
5053 /* If we are at the end of a hot/cold block, dump the constants
5054 here. */
5055 next = NEXT_INSN (from);
5056 if (next
5057 && NOTE_P (next)
5058 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5059 break;
5060 }
5061
5062 if (broken_move (from))
5063 {
5064 rtx pat, src, dst;
5065 enum machine_mode mode;
5066
5067 pat = PATTERN (from);
5068 if (GET_CODE (pat) == PARALLEL)
5069 pat = XVECEXP (pat, 0, 0);
5070 src = SET_SRC (pat);
5071 dst = SET_DEST (pat);
5072 mode = GET_MODE (dst);
5073
5074 /* GOT pcrelat setting comes in pair of
5075 mova .L8,r0
5076 mov.l .L8,r12
5077 instructions. (plus add r0,r12).
5078 Remember if we see one without the other. */
5079 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5080 last_got = last_got ? NULL_RTX : from;
5081 else if (PIC_ADDR_P (src))
5082 last_got = last_got ? NULL_RTX : from;
5083
5084 /* We must explicitly check the mode, because sometimes the
5085 front end will generate code to load unsigned constants into
5086 HImode targets without properly sign extending them. */
5087 if (mode == HImode
5088 || (mode == SImode && satisfies_constraint_I16 (src)
5089 && REGNO (dst) != FPUL_REG))
5090 {
5091 found_hi += 2;
5092 /* We put the short constants before the long constants, so
5093 we must count the length of short constants in the range
5094 for the long constants. */
5095 /* ??? This isn't optimal, but is easy to do. */
5096 si_limit -= 2;
5097 }
5098 else
5099 {
5100 /* We dump DF/DI constants before SF/SI ones, because
5101 the limit is the same, but the alignment requirements
5102 are higher. We may waste up to 4 additional bytes
5103 for alignment, and the DF/DI constant may have
5104 another SF/SI constant placed before it. */
5105 if (TARGET_SHCOMPACT
5106 && ! found_di
5107 && (mode == DFmode || mode == DImode))
5108 {
5109 found_di = 1;
5110 si_limit -= 8;
5111 }
5112 while (si_align > 2 && found_si + si_align - 2 > count_si)
5113 si_align >>= 1;
5114 if (found_si > count_si)
5115 count_si = found_si;
5116 found_si += GET_MODE_SIZE (mode);
5117 if (num_mova)
5118 si_limit -= GET_MODE_SIZE (mode);
5119 }
5120 }
5121
5122 if (mova_p (from))
5123 {
5124 switch (untangle_mova (&num_mova, &mova, from))
5125 {
5126 case 1:
5127 if (flag_pic)
5128 {
5129 rtx src = SET_SRC (PATTERN (from));
5130 if (GET_CODE (src) == CONST
5131 && GET_CODE (XEXP (src, 0)) == UNSPEC
5132 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5133 last_symoff = from;
5134 }
5135 break;
5136 case 0: return find_barrier (0, 0, mova);
5137 case 2:
5138 {
5139 leading_mova = 0;
5140 barrier_before_mova
5141 = good_barrier ? good_barrier : found_barrier;
5142 }
5143 default: break;
5144 }
5145 if (found_si > count_si)
5146 count_si = found_si;
5147 }
5148 else if (JUMP_TABLE_DATA_P (from)
5149 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5150 {
5151 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5152 || (num_mova
5153 && (prev_nonnote_insn (from)
5154 == XEXP (MOVA_LABELREF (mova), 0))))
5155 num_mova--;
5156 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5157 {
5158 /* We have just passed the barrier in front of the
5159 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5160 the ADDR_DIFF_VEC is accessed as data, just like our pool
5161 constants, this is a good opportunity to accommodate what
5162 we have gathered so far.
5163 If we waited any longer, we could end up at a barrier in
5164 front of code, which gives worse cache usage for separated
5165 instruction / data caches. */
5166 good_barrier = found_barrier;
5167 break;
5168 }
5169 else
5170 {
5171 rtx body = PATTERN (from);
5172 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5173 }
5174 }
5175 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5176 else if (JUMP_P (from)
5177 && ! TARGET_SH2
5178 && ! optimize_size)
5179 new_align = 4;
5180
5181 /* There is a possibility that a bf is transformed into a bf/s by the
5182 delay slot scheduler. */
5183 if (JUMP_P (from)
5184 && get_attr_type (from) == TYPE_CBRANCH
5185 && ! sequence_insn_p (from))
5186 inc += 2;
5187
5188 if (found_si)
5189 {
5190 count_si += inc;
5191 if (new_align > si_align)
5192 {
5193 si_limit -= (count_si - 1) & (new_align - si_align);
5194 si_align = new_align;
5195 }
5196 count_si = (count_si + new_align - 1) & -new_align;
5197 }
5198 if (found_hi)
5199 {
5200 count_hi += inc;
5201 if (new_align > hi_align)
5202 {
5203 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5204 hi_align = new_align;
5205 }
5206 count_hi = (count_hi + new_align - 1) & -new_align;
5207 }
5208 from = NEXT_INSN (from);
5209 }
5210
5211 if (num_mova)
5212 {
5213 if (leading_mova)
5214 {
5215 /* Try as we might, the leading mova is out of range. Change
5216 it into a load (which will become a pcload) and retry. */
5217 fixup_mova (mova);
5218 return find_barrier (0, 0, mova);
5219 }
5220 else
5221 {
5222 /* Insert the constant pool table before the mova instruction,
5223 to prevent the mova label reference from going out of range. */
5224 from = mova;
5225 good_barrier = found_barrier = barrier_before_mova;
5226 }
5227 }
5228
5229 if (found_barrier)
5230 {
5231 if (good_barrier && next_real_insn (found_barrier))
5232 found_barrier = good_barrier;
5233 }
5234 else
5235 {
5236 /* We didn't find a barrier in time to dump our stuff,
5237 so we'll make one. */
5238 rtx label = gen_label_rtx ();
5239
5240 /* Don't emit a constant table in the middle of insns for
5241 casesi_worker_2. This is a bit overkill but is enough
5242 because casesi_worker_2 wouldn't appear so frequently. */
5243 if (last_symoff)
5244 from = last_symoff;
5245
5246 /* If we exceeded the range, then we must back up over the last
5247 instruction we looked at. Otherwise, we just need to undo the
5248 NEXT_INSN at the end of the loop. */
5249 if (PREV_INSN (from) != orig
5250 && (count_hi > hi_limit || count_si > si_limit))
5251 from = PREV_INSN (PREV_INSN (from));
5252 else
5253 from = PREV_INSN (from);
5254
5255 /* Don't emit a constant table int the middle of global pointer setting,
5256 since that that would move the addressing base GOT into another table.
5257 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5258 in the pool anyway, so just move up the whole constant pool.
5259
5260 However, avoid doing so when the last single GOT mov is the starting
5261 insn itself. Going past above the start insn would create a negative
5262 offset, causing errors. */
5263 if (last_got && last_got != orig)
5264 from = PREV_INSN (last_got);
5265
5266 /* Don't insert the constant pool table at the position which
5267 may be the landing pad. */
5268 if (flag_exceptions
5269 && CALL_P (from)
5270 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5271 from = PREV_INSN (from);
5272
5273 /* Walk back to be just before any jump or label.
5274 Putting it before a label reduces the number of times the branch
5275 around the constant pool table will be hit. Putting it before
5276 a jump makes it more likely that the bra delay slot will be
5277 filled. */
5278 while (NOTE_P (from) || JUMP_P (from)
5279 || LABEL_P (from))
5280 from = PREV_INSN (from);
5281
5282 /* Make sure we do not split between a call and its corresponding
5283 CALL_ARG_LOCATION note. */
5284 if (CALL_P (from))
5285 {
5286 rtx next = NEXT_INSN (from);
5287 if (next && NOTE_P (next)
5288 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5289 from = next;
5290 }
5291
5292 from = emit_jump_insn_after (gen_jump (label), from);
5293 JUMP_LABEL (from) = label;
5294 LABEL_NUSES (label) = 1;
5295 found_barrier = emit_barrier_after (from);
5296 emit_label_after (label, found_barrier);
5297 }
5298
5299 return found_barrier;
5300 }
5301
5302 /* If the instruction INSN is implemented by a special function, and we can
5303 positively find the register that is used to call the sfunc, and this
5304 register is not used anywhere else in this instruction - except as the
5305 destination of a set, return this register; else, return 0. */
5306 rtx
5307 sfunc_uses_reg (rtx insn)
5308 {
5309 int i;
5310 rtx pattern, part, reg_part, reg;
5311
5312 if (!NONJUMP_INSN_P (insn))
5313 return NULL_RTX;
5314 pattern = PATTERN (insn);
5315 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5316 return NULL_RTX;
5317
5318 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5319 {
5320 part = XVECEXP (pattern, 0, i);
5321 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5322 reg_part = part;
5323 }
5324 if (! reg_part)
5325 return NULL_RTX;
5326 reg = XEXP (reg_part, 0);
5327 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5328 {
5329 part = XVECEXP (pattern, 0, i);
5330 if (part == reg_part || GET_CODE (part) == CLOBBER)
5331 continue;
5332 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5333 && REG_P (SET_DEST (part)))
5334 ? SET_SRC (part) : part)))
5335 return NULL_RTX;
5336 }
5337 return reg;
5338 }
5339
5340 /* See if the only way in which INSN uses REG is by calling it, or by
5341 setting it while calling it. Set *SET to a SET rtx if the register
5342 is set by INSN. */
5343 static bool
5344 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5345 {
5346 rtx pattern, reg2;
5347
5348 *set = NULL_RTX;
5349
5350 reg2 = sfunc_uses_reg (insn);
5351 if (reg2 && REGNO (reg2) == REGNO (reg))
5352 {
5353 pattern = single_set (insn);
5354 if (pattern
5355 && REG_P (SET_DEST (pattern))
5356 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5357 *set = pattern;
5358 return false;
5359 }
5360 if (!CALL_P (insn))
5361 {
5362 /* We don't use rtx_equal_p because we don't care if the mode is
5363 different. */
5364 pattern = single_set (insn);
5365 if (pattern
5366 && REG_P (SET_DEST (pattern))
5367 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5368 {
5369 rtx par, part;
5370 int i;
5371
5372 *set = pattern;
5373 par = PATTERN (insn);
5374 if (GET_CODE (par) == PARALLEL)
5375 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5376 {
5377 part = XVECEXP (par, 0, i);
5378 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5379 return true;
5380 }
5381 return reg_mentioned_p (reg, SET_SRC (pattern));
5382 }
5383
5384 return true;
5385 }
5386
5387 pattern = PATTERN (insn);
5388
5389 if (GET_CODE (pattern) == PARALLEL)
5390 {
5391 int i;
5392
5393 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5394 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5395 return true;
5396 pattern = XVECEXP (pattern, 0, 0);
5397 }
5398
5399 if (GET_CODE (pattern) == SET)
5400 {
5401 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5402 {
5403 /* We don't use rtx_equal_p, because we don't care if the
5404 mode is different. */
5405 if (!REG_P (SET_DEST (pattern))
5406 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5407 return true;
5408
5409 *set = pattern;
5410 }
5411
5412 pattern = SET_SRC (pattern);
5413 }
5414
5415 if (GET_CODE (pattern) != CALL
5416 || !MEM_P (XEXP (pattern, 0))
5417 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5418 return true;
5419
5420 return false;
5421 }
5422
5423 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5424 general registers. Bits 0..15 mean that the respective registers
5425 are used as inputs in the instruction. Bits 16..31 mean that the
5426 registers 0..15, respectively, are used as outputs, or are clobbered.
5427 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5428 int
5429 regs_used (rtx x, int is_dest)
5430 {
5431 enum rtx_code code;
5432 const char *fmt;
5433 int i, used = 0;
5434
5435 if (! x)
5436 return used;
5437 code = GET_CODE (x);
5438 switch (code)
5439 {
5440 case REG:
5441 if (REGNO (x) < 16)
5442 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5443 << (REGNO (x) + is_dest));
5444 return 0;
5445 case SUBREG:
5446 {
5447 rtx y = SUBREG_REG (x);
5448
5449 if (!REG_P (y))
5450 break;
5451 if (REGNO (y) < 16)
5452 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5453 << (REGNO (y) +
5454 subreg_regno_offset (REGNO (y),
5455 GET_MODE (y),
5456 SUBREG_BYTE (x),
5457 GET_MODE (x)) + is_dest));
5458 return 0;
5459 }
5460 case SET:
5461 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5462 case RETURN:
5463 /* If there was a return value, it must have been indicated with USE. */
5464 return 0x00ffff00;
5465 case CLOBBER:
5466 is_dest = 1;
5467 break;
5468 case MEM:
5469 is_dest = 0;
5470 break;
5471 case CALL:
5472 used |= 0x00ff00f0;
5473 break;
5474 default:
5475 break;
5476 }
5477
5478 fmt = GET_RTX_FORMAT (code);
5479
5480 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5481 {
5482 if (fmt[i] == 'E')
5483 {
5484 int j;
5485 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5486 used |= regs_used (XVECEXP (x, i, j), is_dest);
5487 }
5488 else if (fmt[i] == 'e')
5489 used |= regs_used (XEXP (x, i), is_dest);
5490 }
5491 return used;
5492 }
5493
5494 /* Create an instruction that prevents redirection of a conditional branch
5495 to the destination of the JUMP with address ADDR.
5496 If the branch needs to be implemented as an indirect jump, try to find
5497 a scratch register for it.
5498 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5499 If any preceding insn that doesn't fit into a delay slot is good enough,
5500 pass 1. Pass 2 if a definite blocking insn is needed.
5501 -1 is used internally to avoid deep recursion.
5502 If a blocking instruction is made or recognized, return it. */
5503 static rtx
5504 gen_block_redirect (rtx jump, int addr, int need_block)
5505 {
5506 int dead = 0;
5507 rtx prev = prev_nonnote_insn (jump);
5508 rtx dest;
5509
5510 /* First, check if we already have an instruction that satisfies our need. */
5511 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5512 {
5513 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5514 return prev;
5515 if (GET_CODE (PATTERN (prev)) == USE
5516 || GET_CODE (PATTERN (prev)) == CLOBBER
5517 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5518 prev = jump;
5519 else if ((need_block &= ~1) < 0)
5520 return prev;
5521 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5522 need_block = 0;
5523 }
5524 if (GET_CODE (PATTERN (jump)) == RETURN)
5525 {
5526 if (! need_block)
5527 return prev;
5528 /* Reorg even does nasty things with return insns that cause branches
5529 to go out of range - see find_end_label and callers. */
5530 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5531 }
5532 /* We can't use JUMP_LABEL here because it might be undefined
5533 when not optimizing. */
5534 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5535 /* If the branch is out of range, try to find a scratch register for it. */
5536 if (optimize
5537 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5538 > 4092 + 4098))
5539 {
5540 rtx scan;
5541 /* Don't look for the stack pointer as a scratch register,
5542 it would cause trouble if an interrupt occurred. */
5543 unsigned attempt = 0x7fff, used;
5544 int jump_left = flag_expensive_optimizations + 1;
5545
5546 /* It is likely that the most recent eligible instruction is wanted for
5547 the delay slot. Therefore, find out which registers it uses, and
5548 try to avoid using them. */
5549
5550 for (scan = jump; (scan = PREV_INSN (scan)); )
5551 {
5552 enum rtx_code code;
5553
5554 if (INSN_DELETED_P (scan))
5555 continue;
5556 code = GET_CODE (scan);
5557 if (code == CODE_LABEL || code == JUMP_INSN)
5558 break;
5559 if (code == INSN
5560 && GET_CODE (PATTERN (scan)) != USE
5561 && GET_CODE (PATTERN (scan)) != CLOBBER
5562 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5563 {
5564 attempt &= ~regs_used (PATTERN (scan), 0);
5565 break;
5566 }
5567 }
5568 for (used = dead = 0, scan = JUMP_LABEL (jump);
5569 (scan = NEXT_INSN (scan)); )
5570 {
5571 enum rtx_code code;
5572
5573 if (INSN_DELETED_P (scan))
5574 continue;
5575 code = GET_CODE (scan);
5576 if (INSN_P (scan))
5577 {
5578 used |= regs_used (PATTERN (scan), 0);
5579 if (code == CALL_INSN)
5580 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5581 dead |= (used >> 16) & ~used;
5582 if (dead & attempt)
5583 {
5584 dead &= attempt;
5585 break;
5586 }
5587 if (code == JUMP_INSN)
5588 {
5589 if (jump_left-- && simplejump_p (scan))
5590 scan = JUMP_LABEL (scan);
5591 else
5592 break;
5593 }
5594 }
5595 }
5596 /* Mask out the stack pointer again, in case it was
5597 the only 'free' register we have found. */
5598 dead &= 0x7fff;
5599 }
5600 /* If the immediate destination is still in range, check for possible
5601 threading with a jump beyond the delay slot insn.
5602 Don't check if we are called recursively; the jump has been or will be
5603 checked in a different invocation then. */
5604
5605 else if (optimize && need_block >= 0)
5606 {
5607 rtx next = next_active_insn (next_active_insn (dest));
5608 if (next && JUMP_P (next)
5609 && GET_CODE (PATTERN (next)) == SET
5610 && recog_memoized (next) == CODE_FOR_jump_compact)
5611 {
5612 dest = JUMP_LABEL (next);
5613 if (dest
5614 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5615 > 4092 + 4098))
5616 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5617 }
5618 }
5619
5620 if (dead)
5621 {
5622 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5623
5624 /* It would be nice if we could convert the jump into an indirect
5625 jump / far branch right now, and thus exposing all constituent
5626 instructions to further optimization. However, reorg uses
5627 simplejump_p to determine if there is an unconditional jump where
5628 it should try to schedule instructions from the target of the
5629 branch; simplejump_p fails for indirect jumps even if they have
5630 a JUMP_LABEL. */
5631 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5632 (reg, GEN_INT (unspec_bbr_uid++)),
5633 jump);
5634 /* ??? We would like this to have the scope of the jump, but that
5635 scope will change when a delay slot insn of an inner scope is added.
5636 Hence, after delay slot scheduling, we'll have to expect
5637 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5638 the jump. */
5639
5640 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5641 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5642 return insn;
5643 }
5644 else if (need_block)
5645 /* We can't use JUMP_LABEL here because it might be undefined
5646 when not optimizing. */
5647 return emit_insn_before (gen_block_branch_redirect
5648 (GEN_INT (unspec_bbr_uid++)),
5649 jump);
5650 return prev;
5651 }
5652
5653 #define CONDJUMP_MIN -252
5654 #define CONDJUMP_MAX 262
5655 struct far_branch
5656 {
5657 /* A label (to be placed) in front of the jump
5658 that jumps to our ultimate destination. */
5659 rtx near_label;
5660 /* Where we are going to insert it if we cannot move the jump any farther,
5661 or the jump itself if we have picked up an existing jump. */
5662 rtx insert_place;
5663 /* The ultimate destination. */
5664 rtx far_label;
5665 struct far_branch *prev;
5666 /* If the branch has already been created, its address;
5667 else the address of its first prospective user. */
5668 int address;
5669 };
5670
5671 static void gen_far_branch (struct far_branch *);
5672 enum mdep_reorg_phase_e mdep_reorg_phase;
5673 static void
5674 gen_far_branch (struct far_branch *bp)
5675 {
5676 rtx insn = bp->insert_place;
5677 rtx jump;
5678 rtx label = gen_label_rtx ();
5679 int ok;
5680
5681 emit_label_after (label, insn);
5682 if (bp->far_label)
5683 {
5684 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5685 LABEL_NUSES (bp->far_label)++;
5686 }
5687 else
5688 jump = emit_jump_insn_after (gen_return (), insn);
5689
5690 /* Emit a barrier so that reorg knows that any following instructions
5691 are not reachable via a fall-through path.
5692 But don't do this when not optimizing, since we wouldn't suppress the
5693 alignment for the barrier then, and could end up with out-of-range
5694 pc-relative loads. */
5695 if (optimize)
5696 emit_barrier_after (jump);
5697 emit_label_after (bp->near_label, insn);
5698
5699 if (bp->far_label)
5700 JUMP_LABEL (jump) = bp->far_label;
5701 else
5702 {
5703 rtx pat = PATTERN (jump);
5704 gcc_assert (ANY_RETURN_P (pat));
5705 JUMP_LABEL (jump) = pat;
5706 }
5707
5708 ok = invert_jump (insn, label, 1);
5709 gcc_assert (ok);
5710
5711 /* If we are branching around a jump (rather than a return), prevent
5712 reorg from using an insn from the jump target as the delay slot insn -
5713 when reorg did this, it pessimized code (we rather hide the delay slot)
5714 and it could cause branches to go out of range. */
5715 if (bp->far_label)
5716 (emit_insn_after
5717 (gen_stuff_delay_slot
5718 (GEN_INT (unspec_bbr_uid++),
5719 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5720 insn));
5721 /* Prevent reorg from undoing our splits. */
5722 gen_block_redirect (jump, bp->address += 2, 2);
5723 }
5724
5725 /* Fix up ADDR_DIFF_VECs. */
5726 void
5727 fixup_addr_diff_vecs (rtx first)
5728 {
5729 rtx insn;
5730
5731 for (insn = first; insn; insn = NEXT_INSN (insn))
5732 {
5733 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5734
5735 if (! JUMP_TABLE_DATA_P (insn)
5736 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5737 continue;
5738 pat = PATTERN (insn);
5739 vec_lab = XEXP (XEXP (pat, 0), 0);
5740
5741 /* Search the matching casesi_jump_2. */
5742 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5743 {
5744 if (!JUMP_P (prev))
5745 continue;
5746 prevpat = PATTERN (prev);
5747 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5748 continue;
5749 x = XVECEXP (prevpat, 0, 1);
5750 if (GET_CODE (x) != USE)
5751 continue;
5752 x = XEXP (x, 0);
5753 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5754 break;
5755 }
5756 /* FIXME: This is a bug in the optimizer, but it seems harmless
5757 to just avoid panicing. */
5758 if (!prev)
5759 continue;
5760
5761 /* Emit the reference label of the braf where it belongs, right after
5762 the casesi_jump_2 (i.e. braf). */
5763 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5764 emit_label_after (braf_label, prev);
5765
5766 /* Fix up the ADDR_DIF_VEC to be relative
5767 to the reference address of the braf. */
5768 XEXP (XEXP (pat, 0), 0) = braf_label;
5769 }
5770 }
5771
5772 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5773 a barrier. Return the base 2 logarithm of the desired alignment. */
5774 int
5775 barrier_align (rtx barrier_or_label)
5776 {
5777 rtx next = next_active_insn (barrier_or_label), pat, prev;
5778
5779 if (! next)
5780 return 0;
5781
5782 pat = PATTERN (next);
5783
5784 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5785 return 2;
5786
5787 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5788 /* This is a barrier in front of a constant table. */
5789 return 0;
5790
5791 prev = prev_active_insn (barrier_or_label);
5792 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5793 {
5794 pat = PATTERN (prev);
5795 /* If this is a very small table, we want to keep the alignment after
5796 the table to the minimum for proper code alignment. */
5797 return ((optimize_size
5798 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5799 <= (unsigned) 1 << (CACHE_LOG - 2)))
5800 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5801 }
5802
5803 if (optimize_size)
5804 return 0;
5805
5806 if (! TARGET_SH2 || ! optimize)
5807 return align_jumps_log;
5808
5809 /* When fixing up pcloads, a constant table might be inserted just before
5810 the basic block that ends with the barrier. Thus, we can't trust the
5811 instruction lengths before that. */
5812 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5813 {
5814 /* Check if there is an immediately preceding branch to the insn beyond
5815 the barrier. We must weight the cost of discarding useful information
5816 from the current cache line when executing this branch and there is
5817 an alignment, against that of fetching unneeded insn in front of the
5818 branch target when there is no alignment. */
5819
5820 /* There are two delay_slot cases to consider. One is the simple case
5821 where the preceding branch is to the insn beyond the barrier (simple
5822 delay slot filling), and the other is where the preceding branch has
5823 a delay slot that is a duplicate of the insn after the barrier
5824 (fill_eager_delay_slots) and the branch is to the insn after the insn
5825 after the barrier. */
5826
5827 /* PREV is presumed to be the JUMP_INSN for the barrier under
5828 investigation. Skip to the insn before it. */
5829
5830 int slot, credit;
5831 bool jump_to_next = false;
5832
5833 prev = prev_real_insn (prev);
5834
5835 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5836 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5837 prev = prev_real_insn (prev))
5838 {
5839 jump_to_next = false;
5840 if (GET_CODE (PATTERN (prev)) == USE
5841 || GET_CODE (PATTERN (prev)) == CLOBBER)
5842 continue;
5843 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5844 {
5845 prev = XVECEXP (PATTERN (prev), 0, 1);
5846 if (INSN_UID (prev) == INSN_UID (next))
5847 {
5848 /* Delay slot was filled with insn at jump target. */
5849 jump_to_next = true;
5850 continue;
5851 }
5852 }
5853
5854 if (slot &&
5855 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5856 slot = 0;
5857 credit -= get_attr_length (prev);
5858 }
5859 if (prev && jump_to_label_p (prev))
5860 {
5861 rtx x;
5862 if (jump_to_next
5863 || next_real_insn (JUMP_LABEL (prev)) == next
5864 /* If relax_delay_slots() decides NEXT was redundant
5865 with some previous instruction, it will have
5866 redirected PREV's jump to the following insn. */
5867 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5868 /* There is no upper bound on redundant instructions
5869 that might have been skipped, but we must not put an
5870 alignment where none had been before. */
5871 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5872 (INSN_P (x)
5873 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5874 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5875 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5876 {
5877 rtx pat = PATTERN (prev);
5878 if (GET_CODE (pat) == PARALLEL)
5879 pat = XVECEXP (pat, 0, 0);
5880 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5881 return 0;
5882 }
5883 }
5884 }
5885
5886 return align_jumps_log;
5887 }
5888
5889 /* If we are inside a phony loop, almost any kind of label can turn up as the
5890 first one in the loop. Aligning a braf label causes incorrect switch
5891 destination addresses; we can detect braf labels because they are
5892 followed by a BARRIER.
5893 Applying loop alignment to small constant or switch tables is a waste
5894 of space, so we suppress this too. */
5895 int
5896 sh_loop_align (rtx label)
5897 {
5898 rtx next = label;
5899
5900 if (! optimize || optimize_size)
5901 return 0;
5902
5903 do
5904 next = next_nonnote_insn (next);
5905 while (next && LABEL_P (next));
5906
5907 if (! next
5908 || ! INSN_P (next)
5909 || recog_memoized (next) == CODE_FOR_consttable_2)
5910 return 0;
5911
5912 return align_loops_log;
5913 }
5914
5915 /* Do a final pass over the function, just before delayed branch
5916 scheduling. */
5917 static void
5918 sh_reorg (void)
5919 {
5920 rtx first, insn, mova = NULL_RTX;
5921 int num_mova;
5922 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5923 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5924
5925 first = get_insns ();
5926 max_labelno_before_reorg = max_label_num ();
5927
5928 /* We must split call insns before introducing `mova's. If we're
5929 optimizing, they'll have already been split. Otherwise, make
5930 sure we don't split them too late. */
5931 if (! optimize)
5932 split_all_insns_noflow ();
5933
5934 if (TARGET_SHMEDIA)
5935 return;
5936
5937 /* If relaxing, generate pseudo-ops to associate function calls with
5938 the symbols they call. It does no harm to not generate these
5939 pseudo-ops. However, when we can generate them, it enables the
5940 linker to potentially relax the jsr to a bsr, and eliminate the
5941 register load and, possibly, the constant pool entry. */
5942
5943 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5944 if (TARGET_RELAX)
5945 {
5946 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5947 own purposes. This works because none of the remaining passes
5948 need to look at them.
5949
5950 ??? But it may break in the future. We should use a machine
5951 dependent REG_NOTE, or some other approach entirely. */
5952 for (insn = first; insn; insn = NEXT_INSN (insn))
5953 {
5954 if (INSN_P (insn))
5955 {
5956 rtx note;
5957
5958 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5959 NULL_RTX)) != 0)
5960 remove_note (insn, note);
5961 }
5962 }
5963
5964 for (insn = first; insn; insn = NEXT_INSN (insn))
5965 {
5966 rtx pattern, reg, link, set, scan, dies, label;
5967 int rescan = 0, foundinsn = 0;
5968
5969 if (CALL_P (insn))
5970 {
5971 pattern = PATTERN (insn);
5972
5973 if (GET_CODE (pattern) == PARALLEL)
5974 pattern = XVECEXP (pattern, 0, 0);
5975 if (GET_CODE (pattern) == SET)
5976 pattern = SET_SRC (pattern);
5977
5978 if (GET_CODE (pattern) != CALL
5979 || !MEM_P (XEXP (pattern, 0)))
5980 continue;
5981
5982 reg = XEXP (XEXP (pattern, 0), 0);
5983 }
5984 else
5985 {
5986 reg = sfunc_uses_reg (insn);
5987 if (! reg)
5988 continue;
5989 }
5990
5991 if (!REG_P (reg))
5992 continue;
5993
5994 /* Try scanning backward to find where the register is set. */
5995 link = NULL;
5996 for (scan = PREV_INSN (insn);
5997 scan && !LABEL_P (scan);
5998 scan = PREV_INSN (scan))
5999 {
6000 if (! INSN_P (scan))
6001 continue;
6002
6003 if (! reg_mentioned_p (reg, scan))
6004 continue;
6005
6006 if (noncall_uses_reg (reg, scan, &set))
6007 break;
6008
6009 if (set)
6010 {
6011 link = scan;
6012 break;
6013 }
6014 }
6015
6016 if (! link)
6017 continue;
6018
6019 /* The register is set at LINK. */
6020
6021 /* We can only optimize the function call if the register is
6022 being set to a symbol. In theory, we could sometimes
6023 optimize calls to a constant location, but the assembler
6024 and linker do not support that at present. */
6025 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6026 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6027 continue;
6028
6029 /* Scan forward from LINK to the place where REG dies, and
6030 make sure that the only insns which use REG are
6031 themselves function calls. */
6032
6033 /* ??? This doesn't work for call targets that were allocated
6034 by reload, since there may not be a REG_DEAD note for the
6035 register. */
6036
6037 dies = NULL_RTX;
6038 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6039 {
6040 rtx scanset;
6041
6042 /* Don't try to trace forward past a CODE_LABEL if we haven't
6043 seen INSN yet. Ordinarily, we will only find the setting insn
6044 if it is in the same basic block. However,
6045 cross-jumping can insert code labels in between the load and
6046 the call, and can result in situations where a single call
6047 insn may have two targets depending on where we came from. */
6048
6049 if (LABEL_P (scan) && ! foundinsn)
6050 break;
6051
6052 if (! INSN_P (scan))
6053 continue;
6054
6055 /* Don't try to trace forward past a JUMP. To optimize
6056 safely, we would have to check that all the
6057 instructions at the jump destination did not use REG. */
6058
6059 if (JUMP_P (scan))
6060 break;
6061
6062 if (! reg_mentioned_p (reg, scan))
6063 continue;
6064
6065 if (noncall_uses_reg (reg, scan, &scanset))
6066 break;
6067
6068 if (scan == insn)
6069 foundinsn = 1;
6070
6071 if (scan != insn
6072 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6073 {
6074 /* There is a function call to this register other
6075 than the one we are checking. If we optimize
6076 this call, we need to rescan again below. */
6077 rescan = 1;
6078 }
6079
6080 /* ??? We shouldn't have to worry about SCANSET here.
6081 We should just be able to check for a REG_DEAD note
6082 on a function call. However, the REG_DEAD notes are
6083 apparently not dependable around libcalls; c-torture
6084 execute/920501-2 is a test case. If SCANSET is set,
6085 then this insn sets the register, so it must have
6086 died earlier. Unfortunately, this will only handle
6087 the cases in which the register is, in fact, set in a
6088 later insn. */
6089
6090 /* ??? We shouldn't have to use FOUNDINSN here.
6091 This dates back to when we used LOG_LINKS to find
6092 the most recent insn which sets the register. */
6093
6094 if (foundinsn
6095 && (scanset
6096 || find_reg_note (scan, REG_DEAD, reg)))
6097 {
6098 dies = scan;
6099 break;
6100 }
6101 }
6102
6103 if (! dies)
6104 {
6105 /* Either there was a branch, or some insn used REG
6106 other than as a function call address. */
6107 continue;
6108 }
6109
6110 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6111 on the insn which sets the register, and on each call insn
6112 which uses the register. In final_prescan_insn we look for
6113 the REG_LABEL_OPERAND notes, and output the appropriate label
6114 or pseudo-op. */
6115
6116 label = gen_label_rtx ();
6117 add_reg_note (link, REG_LABEL_OPERAND, label);
6118 add_reg_note (insn, REG_LABEL_OPERAND, label);
6119 if (rescan)
6120 {
6121 scan = link;
6122 do
6123 {
6124 rtx reg2;
6125
6126 scan = NEXT_INSN (scan);
6127 if (scan != insn
6128 && ((CALL_P (scan)
6129 && reg_mentioned_p (reg, scan))
6130 || ((reg2 = sfunc_uses_reg (scan))
6131 && REGNO (reg2) == REGNO (reg))))
6132 add_reg_note (scan, REG_LABEL_OPERAND, label);
6133 }
6134 while (scan != dies);
6135 }
6136 }
6137 }
6138
6139 if (TARGET_SH2)
6140 fixup_addr_diff_vecs (first);
6141
6142 if (optimize)
6143 {
6144 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6145 shorten_branches (first);
6146 }
6147
6148 /* Scan the function looking for move instructions which have to be
6149 changed to pc-relative loads and insert the literal tables. */
6150 label_ref_list_pool = create_alloc_pool ("label references list",
6151 sizeof (struct label_ref_list_d),
6152 30);
6153 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6154 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6155 {
6156 if (mova_p (insn))
6157 {
6158 /* ??? basic block reordering can move a switch table dispatch
6159 below the switch table. Check if that has happened.
6160 We only have the addresses available when optimizing; but then,
6161 this check shouldn't be needed when not optimizing. */
6162 if (!untangle_mova (&num_mova, &mova, insn))
6163 {
6164 insn = mova;
6165 num_mova = 0;
6166 }
6167 }
6168 else if (JUMP_TABLE_DATA_P (insn)
6169 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6170 && num_mova
6171 /* ??? loop invariant motion can also move a mova out of a
6172 loop. Since loop does this code motion anyway, maybe we
6173 should wrap UNSPEC_MOVA into a CONST, so that reload can
6174 move it back. */
6175 && ((num_mova > 1
6176 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6177 || (prev_nonnote_insn (insn)
6178 == XEXP (MOVA_LABELREF (mova), 0))))
6179 {
6180 rtx scan;
6181 int total;
6182
6183 num_mova--;
6184
6185 /* Some code might have been inserted between the mova and
6186 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6187 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6188 total += get_attr_length (scan);
6189
6190 /* range of mova is 1020, add 4 because pc counts from address of
6191 second instruction after this one, subtract 2 in case pc is 2
6192 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6193 cancels out with alignment effects of the mova itself. */
6194 if (total > 1022)
6195 {
6196 /* Change the mova into a load, and restart scanning
6197 there. broken_move will then return true for mova. */
6198 fixup_mova (mova);
6199 insn = mova;
6200 }
6201 }
6202 if (broken_move (insn)
6203 || (NONJUMP_INSN_P (insn)
6204 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6205 {
6206 rtx scan;
6207 /* Scan ahead looking for a barrier to stick the constant table
6208 behind. */
6209 rtx barrier = find_barrier (num_mova, mova, insn);
6210 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6211 int need_aligned_label = 0;
6212
6213 if (num_mova && ! mova_p (mova))
6214 {
6215 /* find_barrier had to change the first mova into a
6216 pcload; thus, we have to start with this new pcload. */
6217 insn = mova;
6218 num_mova = 0;
6219 }
6220 /* Now find all the moves between the points and modify them. */
6221 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6222 {
6223 if (LABEL_P (scan))
6224 last_float = 0;
6225 if (NONJUMP_INSN_P (scan)
6226 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6227 need_aligned_label = 1;
6228 if (broken_move (scan))
6229 {
6230 rtx *patp = &PATTERN (scan), pat = *patp;
6231 rtx src, dst;
6232 rtx lab;
6233 rtx newsrc;
6234 enum machine_mode mode;
6235
6236 if (GET_CODE (pat) == PARALLEL)
6237 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6238 src = SET_SRC (pat);
6239 dst = SET_DEST (pat);
6240 mode = GET_MODE (dst);
6241
6242 if (mode == SImode && satisfies_constraint_I16 (src)
6243 && REGNO (dst) != FPUL_REG)
6244 {
6245 int offset = 0;
6246
6247 mode = HImode;
6248 while (GET_CODE (dst) == SUBREG)
6249 {
6250 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6251 GET_MODE (SUBREG_REG (dst)),
6252 SUBREG_BYTE (dst),
6253 GET_MODE (dst));
6254 dst = SUBREG_REG (dst);
6255 }
6256 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6257 }
6258 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6259 {
6260 /* This must be an insn that clobbers r0. */
6261 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6262 XVECLEN (PATTERN (scan), 0)
6263 - 1);
6264 rtx clobber = *clobberp;
6265
6266 gcc_assert (GET_CODE (clobber) == CLOBBER
6267 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6268
6269 if (last_float
6270 && reg_set_between_p (r0_rtx, last_float_move, scan))
6271 last_float = 0;
6272 if (last_float
6273 && TARGET_SHCOMPACT
6274 && GET_MODE_SIZE (mode) != 4
6275 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6276 last_float = 0;
6277 lab = add_constant (src, mode, last_float);
6278 if (lab)
6279 emit_insn_before (gen_mova (lab), scan);
6280 else
6281 {
6282 /* There will be a REG_UNUSED note for r0 on
6283 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6284 lest reorg:mark_target_live_regs will not
6285 consider r0 to be used, and we end up with delay
6286 slot insn in front of SCAN that clobbers r0. */
6287 rtx note
6288 = find_regno_note (last_float_move, REG_UNUSED, 0);
6289
6290 /* If we are not optimizing, then there may not be
6291 a note. */
6292 if (note)
6293 PUT_REG_NOTE_KIND (note, REG_INC);
6294
6295 *last_float_addr = r0_inc_rtx;
6296 }
6297 last_float_move = scan;
6298 last_float = src;
6299 newsrc = gen_const_mem (mode,
6300 (((TARGET_SH4 && ! TARGET_FMOVD)
6301 || REGNO (dst) == FPUL_REG)
6302 ? r0_inc_rtx
6303 : r0_rtx));
6304 last_float_addr = &XEXP (newsrc, 0);
6305
6306 /* Remove the clobber of r0. */
6307 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6308 gen_rtx_SCRATCH (Pmode));
6309 }
6310 /* This is a mova needing a label. Create it. */
6311 else if (GET_CODE (src) == UNSPEC
6312 && XINT (src, 1) == UNSPEC_MOVA
6313 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6314 {
6315 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6316 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6317 newsrc = gen_rtx_UNSPEC (SImode,
6318 gen_rtvec (1, newsrc),
6319 UNSPEC_MOVA);
6320 }
6321 else if (GET_CODE (src) == UNSPEC_VOLATILE
6322 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6323 {
6324 newsrc = XVECEXP (src, 0, 0);
6325 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6326 INSN_CODE (scan) = -1;
6327 continue;
6328 }
6329 else
6330 {
6331 lab = add_constant (src, mode, 0);
6332 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6333 newsrc = gen_const_mem (mode, newsrc);
6334 }
6335 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6336 INSN_CODE (scan) = -1;
6337 }
6338 }
6339 dump_table (need_aligned_label ? insn : 0, barrier);
6340 insn = barrier;
6341 }
6342 }
6343 free_alloc_pool (label_ref_list_pool);
6344 for (insn = first; insn; insn = NEXT_INSN (insn))
6345 PUT_MODE (insn, VOIDmode);
6346
6347 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6348 INSN_ADDRESSES_FREE ();
6349 split_branches (first);
6350
6351 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6352 also has an effect on the register that holds the address of the sfunc.
6353 Insert an extra dummy insn in front of each sfunc that pretends to
6354 use this register. */
6355 if (flag_delayed_branch)
6356 {
6357 for (insn = first; insn; insn = NEXT_INSN (insn))
6358 {
6359 rtx reg = sfunc_uses_reg (insn);
6360
6361 if (! reg)
6362 continue;
6363 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6364 }
6365 }
6366 #if 0
6367 /* fpscr is not actually a user variable, but we pretend it is for the
6368 sake of the previous optimization passes, since we want it handled like
6369 one. However, we don't have any debugging information for it, so turn
6370 it into a non-user variable now. */
6371 if (TARGET_SH4)
6372 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6373 #endif
6374 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6375 }
6376
6377 /* Return the UID of the insn that follows the specified label. */
6378 int
6379 get_dest_uid (rtx label, int max_uid)
6380 {
6381 rtx dest = next_real_insn (label);
6382 int dest_uid;
6383 if (! dest)
6384 /* This can happen for an undefined label. */
6385 return 0;
6386 dest_uid = INSN_UID (dest);
6387 /* If this is a newly created branch redirection blocking instruction,
6388 we cannot index the branch_uid or insn_addresses arrays with its
6389 uid. But then, we won't need to, because the actual destination is
6390 the following branch. */
6391 while (dest_uid >= max_uid)
6392 {
6393 dest = NEXT_INSN (dest);
6394 dest_uid = INSN_UID (dest);
6395 }
6396 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6397 return 0;
6398 return dest_uid;
6399 }
6400
6401 /* Split condbranches that are out of range. Also add clobbers for
6402 scratch registers that are needed in far jumps.
6403 We do this before delay slot scheduling, so that it can take our
6404 newly created instructions into account. It also allows us to
6405 find branches with common targets more easily. */
6406 static void
6407 split_branches (rtx first)
6408 {
6409 rtx insn;
6410 struct far_branch **uid_branch, *far_branch_list = 0;
6411 int max_uid = get_max_uid ();
6412 int ok;
6413
6414 /* Find out which branches are out of range. */
6415 shorten_branches (first);
6416
6417 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6418 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6419
6420 for (insn = first; insn; insn = NEXT_INSN (insn))
6421 if (! INSN_P (insn))
6422 continue;
6423 else if (INSN_DELETED_P (insn))
6424 {
6425 /* Shorten_branches would split this instruction again,
6426 so transform it into a note. */
6427 SET_INSN_DELETED (insn);
6428 }
6429 else if (JUMP_P (insn))
6430 {
6431 enum attr_type type = get_attr_type (insn);
6432 if (type == TYPE_CBRANCH)
6433 {
6434 rtx next, beyond;
6435
6436 if (get_attr_length (insn) > 4)
6437 {
6438 rtx src = SET_SRC (PATTERN (insn));
6439 rtx olabel = XEXP (XEXP (src, 1), 0);
6440 int addr = INSN_ADDRESSES (INSN_UID (insn));
6441 rtx label = 0;
6442 int dest_uid = get_dest_uid (olabel, max_uid);
6443 struct far_branch *bp = uid_branch[dest_uid];
6444
6445 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6446 the label if the LABEL_NUSES count drops to zero. There is
6447 always a jump_optimize pass that sets these values, but it
6448 proceeds to delete unreferenced code, and then if not
6449 optimizing, to un-delete the deleted instructions, thus
6450 leaving labels with too low uses counts. */
6451 if (! optimize)
6452 {
6453 JUMP_LABEL (insn) = olabel;
6454 LABEL_NUSES (olabel)++;
6455 }
6456 if (! bp)
6457 {
6458 bp = (struct far_branch *) alloca (sizeof *bp);
6459 uid_branch[dest_uid] = bp;
6460 bp->prev = far_branch_list;
6461 far_branch_list = bp;
6462 bp->far_label
6463 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6464 LABEL_NUSES (bp->far_label)++;
6465 }
6466 else
6467 {
6468 label = bp->near_label;
6469 if (! label && bp->address - addr >= CONDJUMP_MIN)
6470 {
6471 rtx block = bp->insert_place;
6472
6473 if (GET_CODE (PATTERN (block)) == RETURN)
6474 block = PREV_INSN (block);
6475 else
6476 block = gen_block_redirect (block,
6477 bp->address, 2);
6478 label = emit_label_after (gen_label_rtx (),
6479 PREV_INSN (block));
6480 bp->near_label = label;
6481 }
6482 else if (label && ! NEXT_INSN (label))
6483 {
6484 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6485 bp->insert_place = insn;
6486 else
6487 gen_far_branch (bp);
6488 }
6489 }
6490 if (! label
6491 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6492 {
6493 bp->near_label = label = gen_label_rtx ();
6494 bp->insert_place = insn;
6495 bp->address = addr;
6496 }
6497 ok = redirect_jump (insn, label, 0);
6498 gcc_assert (ok);
6499 }
6500 else
6501 {
6502 /* get_attr_length (insn) == 2 */
6503 /* Check if we have a pattern where reorg wants to redirect
6504 the branch to a label from an unconditional branch that
6505 is too far away. */
6506 /* We can't use JUMP_LABEL here because it might be undefined
6507 when not optimizing. */
6508 /* A syntax error might cause beyond to be NULL_RTX. */
6509 beyond
6510 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6511 0));
6512
6513 if (beyond
6514 && (JUMP_P (beyond)
6515 || ((beyond = next_active_insn (beyond))
6516 && JUMP_P (beyond)))
6517 && GET_CODE (PATTERN (beyond)) == SET
6518 && recog_memoized (beyond) == CODE_FOR_jump_compact
6519 && ((INSN_ADDRESSES
6520 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6521 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6522 > 252 + 258 + 2))
6523 gen_block_redirect (beyond,
6524 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6525 }
6526
6527 next = next_active_insn (insn);
6528
6529 if (next
6530 && (JUMP_P (next)
6531 || ((next = next_active_insn (next))
6532 && JUMP_P (next)))
6533 && GET_CODE (PATTERN (next)) == SET
6534 && recog_memoized (next) == CODE_FOR_jump_compact
6535 && ((INSN_ADDRESSES
6536 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6537 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6538 > 252 + 258 + 2))
6539 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6540 }
6541 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6542 {
6543 int addr = INSN_ADDRESSES (INSN_UID (insn));
6544 rtx far_label = 0;
6545 int dest_uid = 0;
6546 struct far_branch *bp;
6547
6548 if (type == TYPE_JUMP)
6549 {
6550 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6551 dest_uid = get_dest_uid (far_label, max_uid);
6552 if (! dest_uid)
6553 {
6554 /* Parse errors can lead to labels outside
6555 the insn stream. */
6556 if (! NEXT_INSN (far_label))
6557 continue;
6558
6559 if (! optimize)
6560 {
6561 JUMP_LABEL (insn) = far_label;
6562 LABEL_NUSES (far_label)++;
6563 }
6564 redirect_jump (insn, ret_rtx, 1);
6565 far_label = 0;
6566 }
6567 }
6568 bp = uid_branch[dest_uid];
6569 if (! bp)
6570 {
6571 bp = (struct far_branch *) alloca (sizeof *bp);
6572 uid_branch[dest_uid] = bp;
6573 bp->prev = far_branch_list;
6574 far_branch_list = bp;
6575 bp->near_label = 0;
6576 bp->far_label = far_label;
6577 if (far_label)
6578 LABEL_NUSES (far_label)++;
6579 }
6580 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6581 if (addr - bp->address <= CONDJUMP_MAX)
6582 emit_label_after (bp->near_label, PREV_INSN (insn));
6583 else
6584 {
6585 gen_far_branch (bp);
6586 bp->near_label = 0;
6587 }
6588 else
6589 bp->near_label = 0;
6590 bp->address = addr;
6591 bp->insert_place = insn;
6592 if (! far_label)
6593 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6594 else
6595 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6596 }
6597 }
6598 /* Generate all pending far branches,
6599 and free our references to the far labels. */
6600 while (far_branch_list)
6601 {
6602 if (far_branch_list->near_label
6603 && ! NEXT_INSN (far_branch_list->near_label))
6604 gen_far_branch (far_branch_list);
6605 if (optimize
6606 && far_branch_list->far_label
6607 && ! --LABEL_NUSES (far_branch_list->far_label))
6608 delete_insn (far_branch_list->far_label);
6609 far_branch_list = far_branch_list->prev;
6610 }
6611
6612 /* Instruction length information is no longer valid due to the new
6613 instructions that have been generated. */
6614 init_insn_lengths ();
6615 }
6616
6617 /* Dump out instruction addresses, which is useful for debugging the
6618 constant pool table stuff.
6619
6620 If relaxing, output the label and pseudo-ops used to link together
6621 calls and the instruction which set the registers.
6622
6623 ??? The addresses printed by this routine for insns are nonsense for
6624 insns which are inside of a sequence where none of the inner insns have
6625 variable length. This is because the second pass of shorten_branches
6626 does not bother to update them. */
6627 void
6628 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6629 int noperands ATTRIBUTE_UNUSED)
6630 {
6631 if (TARGET_DUMPISIZE)
6632 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6633
6634 if (TARGET_RELAX)
6635 {
6636 rtx note;
6637
6638 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6639 if (note)
6640 {
6641 rtx pattern;
6642
6643 pattern = PATTERN (insn);
6644 if (GET_CODE (pattern) == PARALLEL)
6645 pattern = XVECEXP (pattern, 0, 0);
6646 switch (GET_CODE (pattern))
6647 {
6648 case SET:
6649 if (GET_CODE (SET_SRC (pattern)) != CALL
6650 && get_attr_type (insn) != TYPE_SFUNC)
6651 {
6652 targetm.asm_out.internal_label
6653 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6654 break;
6655 }
6656 /* else FALLTHROUGH */
6657 case CALL:
6658 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6659 CODE_LABEL_NUMBER (XEXP (note, 0)));
6660 break;
6661
6662 default:
6663 gcc_unreachable ();
6664 }
6665 }
6666 }
6667 }
6668
6669 /* Dump out any constants accumulated in the final pass. These will
6670 only be labels. */
6671 const char *
6672 output_jump_label_table (void)
6673 {
6674 int i;
6675
6676 if (pool_size)
6677 {
6678 fprintf (asm_out_file, "\t.align 2\n");
6679 for (i = 0; i < pool_size; i++)
6680 {
6681 pool_node *p = &pool_vector[i];
6682
6683 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6684 CODE_LABEL_NUMBER (p->label));
6685 output_asm_insn (".long %O0", &p->value);
6686 }
6687 pool_size = 0;
6688 }
6689
6690 return "";
6691 }
6692 \f
6693 /* A full frame looks like:
6694
6695 arg-5
6696 arg-4
6697 [ if current_function_anonymous_args
6698 arg-3
6699 arg-2
6700 arg-1
6701 arg-0 ]
6702 saved-fp
6703 saved-r10
6704 saved-r11
6705 saved-r12
6706 saved-pr
6707 local-n
6708 ..
6709 local-1
6710 local-0 <- fp points here.
6711
6712 Number of bytes pushed for anonymous args, used to pass information
6713 between expand_prologue and expand_epilogue.
6714
6715 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6716 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6717 for an epilogue and a negative value means that it's for a sibcall
6718 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6719 all the registers that are about to be restored, and hence dead. */
6720 static void
6721 output_stack_adjust (int size, rtx reg, int epilogue_p,
6722 HARD_REG_SET *live_regs_mask, bool frame_p)
6723 {
6724 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6725 if (size)
6726 {
6727 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6728
6729 /* This test is bogus, as output_stack_adjust is used to re-align the
6730 stack. */
6731 #if 0
6732 gcc_assert (!(size % align));
6733 #endif
6734
6735 if (CONST_OK_FOR_ADD (size))
6736 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6737 /* Try to do it with two partial adjustments; however, we must make
6738 sure that the stack is properly aligned at all times, in case
6739 an interrupt occurs between the two partial adjustments. */
6740 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6741 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6742 {
6743 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6744 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6745 }
6746 else
6747 {
6748 rtx const_reg;
6749 rtx insn;
6750 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6751 int i;
6752
6753 /* If TEMP is invalid, we could temporarily save a general
6754 register to MACL. However, there is currently no need
6755 to handle this case, so just die when we see it. */
6756 if (epilogue_p < 0
6757 || current_function_interrupt
6758 || ! call_really_used_regs[temp] || fixed_regs[temp])
6759 temp = -1;
6760 if (temp < 0 && ! current_function_interrupt
6761 && (TARGET_SHMEDIA || epilogue_p >= 0))
6762 {
6763 HARD_REG_SET temps;
6764 COPY_HARD_REG_SET (temps, call_used_reg_set);
6765 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6766 if (epilogue_p > 0)
6767 {
6768 int nreg = 0;
6769 if (crtl->return_rtx)
6770 {
6771 enum machine_mode mode;
6772 mode = GET_MODE (crtl->return_rtx);
6773 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6774 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6775 }
6776 for (i = 0; i < nreg; i++)
6777 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6778 if (crtl->calls_eh_return)
6779 {
6780 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6781 for (i = 0; i <= 3; i++)
6782 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6783 }
6784 }
6785 if (TARGET_SHMEDIA && epilogue_p < 0)
6786 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6787 CLEAR_HARD_REG_BIT (temps, i);
6788 if (epilogue_p <= 0)
6789 {
6790 for (i = FIRST_PARM_REG;
6791 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6792 CLEAR_HARD_REG_BIT (temps, i);
6793 if (cfun->static_chain_decl != NULL)
6794 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6795 }
6796 temp = scavenge_reg (&temps);
6797 }
6798 if (temp < 0 && live_regs_mask)
6799 {
6800 HARD_REG_SET temps;
6801
6802 COPY_HARD_REG_SET (temps, *live_regs_mask);
6803 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6804 temp = scavenge_reg (&temps);
6805 }
6806 if (temp < 0)
6807 {
6808 rtx adj_reg, tmp_reg, mem;
6809
6810 /* If we reached here, the most likely case is the (sibcall)
6811 epilogue for non SHmedia. Put a special push/pop sequence
6812 for such case as the last resort. This looks lengthy but
6813 would not be problem because it seems to be very
6814 rare. */
6815
6816 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6817
6818
6819 /* ??? There is still the slight possibility that r4 or
6820 r5 have been reserved as fixed registers or assigned
6821 as global registers, and they change during an
6822 interrupt. There are possible ways to handle this:
6823
6824 - If we are adjusting the frame pointer (r14), we can do
6825 with a single temp register and an ordinary push / pop
6826 on the stack.
6827 - Grab any call-used or call-saved registers (i.e. not
6828 fixed or globals) for the temps we need. We might
6829 also grab r14 if we are adjusting the stack pointer.
6830 If we can't find enough available registers, issue
6831 a diagnostic and die - the user must have reserved
6832 way too many registers.
6833 But since all this is rather unlikely to happen and
6834 would require extra testing, we just die if r4 / r5
6835 are not available. */
6836 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6837 && !global_regs[4] && !global_regs[5]);
6838
6839 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6840 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6841 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6842 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6843 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6844 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6845 emit_move_insn (mem, tmp_reg);
6846 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6847 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6848 emit_move_insn (mem, tmp_reg);
6849 emit_move_insn (reg, adj_reg);
6850 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6851 emit_move_insn (adj_reg, mem);
6852 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6853 emit_move_insn (tmp_reg, mem);
6854 /* Tell flow the insns that pop r4/r5 aren't dead. */
6855 emit_use (tmp_reg);
6856 emit_use (adj_reg);
6857 return;
6858 }
6859 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6860
6861 /* If SIZE is negative, subtract the positive value.
6862 This sometimes allows a constant pool entry to be shared
6863 between prologue and epilogue code. */
6864 if (size < 0)
6865 {
6866 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6867 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6868 }
6869 else
6870 {
6871 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6872 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6873 }
6874 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6875 gen_rtx_SET (VOIDmode, reg,
6876 gen_rtx_PLUS (SImode, reg,
6877 GEN_INT (size))));
6878 }
6879 }
6880 }
6881
6882 /* Emit the specified insn and mark it as frame related.
6883 FIXME: Rename this to emit_frame_insn. */
6884 static rtx
6885 frame_insn (rtx x)
6886 {
6887 x = emit_insn (x);
6888 RTX_FRAME_RELATED_P (x) = 1;
6889 return x;
6890 }
6891
6892 /* Output RTL to push register RN onto the stack. */
6893 static rtx
6894 push (int rn)
6895 {
6896 rtx x;
6897 if (rn == FPUL_REG)
6898 x = gen_push_fpul ();
6899 else if (rn == FPSCR_REG)
6900 x = gen_push_fpscr ();
6901 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6902 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6903 {
6904 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6905 return NULL_RTX;
6906 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6907 }
6908 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6909 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6910 else
6911 x = gen_push (gen_rtx_REG (SImode, rn));
6912
6913 x = frame_insn (x);
6914 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6915 return x;
6916 }
6917
6918 /* Output RTL to pop register RN from the stack. */
6919 static void
6920 pop (int rn)
6921 {
6922 rtx x, sp_reg, reg;
6923 if (rn == FPUL_REG)
6924 x = gen_pop_fpul ();
6925 else if (rn == FPSCR_REG)
6926 x = gen_pop_fpscr ();
6927 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6928 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6929 {
6930 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6931 return;
6932 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6933 }
6934 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6935 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6936 else
6937 x = gen_pop (gen_rtx_REG (SImode, rn));
6938
6939 x = emit_insn (x);
6940
6941 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6942 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6943 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6944 : SET_DEST (PATTERN (x)));
6945 add_reg_note (x, REG_CFA_RESTORE, reg);
6946 add_reg_note (x, REG_CFA_ADJUST_CFA,
6947 gen_rtx_SET (SImode, sp_reg,
6948 plus_constant (SImode, sp_reg,
6949 GET_MODE_SIZE (GET_MODE (reg)))));
6950 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6951 RTX_FRAME_RELATED_P (x) = 1;
6952 }
6953
6954 /* Generate code to push the regs specified in the mask. */
6955 static void
6956 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6957 {
6958 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6959 int skip_fpscr = 0;
6960
6961 /* Push PR last; this gives better latencies after the prologue, and
6962 candidates for the return delay slot when there are no general
6963 registers pushed. */
6964 for (; i < FIRST_PSEUDO_REGISTER; i++)
6965 {
6966 /* If this is an interrupt handler, and the SZ bit varies,
6967 and we have to push any floating point register, we need
6968 to switch to the correct precision first. */
6969 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6970 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6971 {
6972 HARD_REG_SET unsaved;
6973
6974 push (FPSCR_REG);
6975 COMPL_HARD_REG_SET (unsaved, *mask);
6976 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6977 skip_fpscr = 1;
6978 }
6979 if (i != PR_REG
6980 && (i != FPSCR_REG || ! skip_fpscr)
6981 && TEST_HARD_REG_BIT (*mask, i))
6982 {
6983 /* If the ISR has RESBANK attribute assigned, don't push any of
6984 the following registers - R0-R14, MACH, MACL and GBR. */
6985 if (! (sh_cfun_resbank_handler_p ()
6986 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6987 || i == MACH_REG
6988 || i == MACL_REG
6989 || i == GBR_REG)))
6990 push (i);
6991 }
6992 }
6993
6994 /* Push banked registers last to improve delay slot opportunities. */
6995 if (interrupt_handler)
6996 {
6997 bool use_movml = false;
6998
6999 if (TARGET_SH2A)
7000 {
7001 unsigned int count = 0;
7002
7003 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7004 if (TEST_HARD_REG_BIT (*mask, i))
7005 count++;
7006 else
7007 break;
7008
7009 /* Use movml when all banked registers are pushed. */
7010 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7011 use_movml = true;
7012 }
7013
7014 if (sh_cfun_resbank_handler_p ())
7015 ; /* Do nothing. */
7016 else if (use_movml)
7017 {
7018 rtx x, mem, reg, set;
7019 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7020
7021 /* We must avoid scheduling multiple store insn with another
7022 insns. */
7023 emit_insn (gen_blockage ());
7024 x = gen_movml_push_banked (sp_reg);
7025 x = frame_insn (x);
7026 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7027 {
7028 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7029 reg = gen_rtx_REG (SImode, i);
7030 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7031 }
7032
7033 set = gen_rtx_SET (SImode, sp_reg,
7034 plus_constant (Pmode, sp_reg, - 32));
7035 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7036 emit_insn (gen_blockage ());
7037 }
7038 else
7039 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7040 if (TEST_HARD_REG_BIT (*mask, i))
7041 push (i);
7042 }
7043
7044 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7045 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7046 push (PR_REG);
7047 }
7048
7049 /* Calculate how much extra space is needed to save all callee-saved
7050 target registers.
7051 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7052 static int
7053 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7054 {
7055 int reg;
7056 int stack_space = 0;
7057 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7058
7059 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7060 if ((! call_really_used_regs[reg] || interrupt_handler)
7061 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7062 /* Leave space to save this target register on the stack,
7063 in case target register allocation wants to use it. */
7064 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7065 return stack_space;
7066 }
7067
7068 /* Decide whether we should reserve space for callee-save target registers,
7069 in case target register allocation wants to use them. REGS_SAVED is
7070 the space, in bytes, that is already required for register saves.
7071 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7072 static int
7073 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7074 HARD_REG_SET *live_regs_mask)
7075 {
7076 if (optimize_size)
7077 return 0;
7078 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7079 }
7080
7081 /* Decide how much space to reserve for callee-save target registers
7082 in case target register allocation wants to use them.
7083 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7084 static int
7085 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7086 {
7087 if (shmedia_space_reserved_for_target_registers)
7088 return shmedia_target_regs_stack_space (live_regs_mask);
7089 else
7090 return 0;
7091 }
7092
7093 /* Work out the registers which need to be saved, both as a mask and a
7094 count of saved words. Return the count.
7095
7096 If doing a pragma interrupt function, then push all regs used by the
7097 function, and if we call another function (we can tell by looking at PR),
7098 make sure that all the regs it clobbers are safe too. */
7099 static int
7100 calc_live_regs (HARD_REG_SET *live_regs_mask)
7101 {
7102 unsigned int reg;
7103 int count;
7104 tree attrs;
7105 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7106 bool nosave_low_regs;
7107 int pr_live, has_call;
7108
7109 attrs = DECL_ATTRIBUTES (current_function_decl);
7110 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7111 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7112 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7113 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7114
7115 CLEAR_HARD_REG_SET (*live_regs_mask);
7116 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7117 && df_regs_ever_live_p (FPSCR_REG))
7118 target_flags &= ~MASK_FPU_SINGLE;
7119 /* If we can save a lot of saves by switching to double mode, do that. */
7120 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7121 && TARGET_FPU_SINGLE)
7122 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7123 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7124 && (! call_really_used_regs[reg]
7125 || interrupt_handler)
7126 && ++count > 2)
7127 {
7128 target_flags &= ~MASK_FPU_SINGLE;
7129 break;
7130 }
7131 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7132 knows how to use it. That means the pseudo originally allocated for
7133 the initial value can become the PR_MEDIA_REG hard register, as seen for
7134 execute/20010122-1.c:test9. */
7135 if (TARGET_SHMEDIA)
7136 /* ??? this function is called from initial_elimination_offset, hence we
7137 can't use the result of sh_media_register_for_return here. */
7138 pr_live = sh_pr_n_sets ();
7139 else
7140 {
7141 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7142 pr_live = (pr_initial
7143 ? (!REG_P (pr_initial)
7144 || REGNO (pr_initial) != (PR_REG))
7145 : df_regs_ever_live_p (PR_REG));
7146 /* For Shcompact, if not optimizing, we end up with a memory reference
7147 using the return address pointer for __builtin_return_address even
7148 though there is no actual need to put the PR register on the stack. */
7149 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7150 }
7151 /* Force PR to be live if the prologue has to call the SHmedia
7152 argument decoder or register saver. */
7153 if (TARGET_SHCOMPACT
7154 && ((crtl->args.info.call_cookie
7155 & ~ CALL_COOKIE_RET_TRAMP (1))
7156 || crtl->saves_all_registers))
7157 pr_live = 1;
7158 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7159 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7160 {
7161 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7162 ? pr_live
7163 : interrupt_handler
7164 ? (/* Need to save all the regs ever live. */
7165 (df_regs_ever_live_p (reg)
7166 || (call_really_used_regs[reg]
7167 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7168 || reg == PIC_OFFSET_TABLE_REGNUM)
7169 && has_call)
7170 || (TARGET_SHMEDIA && has_call
7171 && REGISTER_NATURAL_MODE (reg) == SImode
7172 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7173 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7174 && reg != RETURN_ADDRESS_POINTER_REGNUM
7175 && reg != T_REG && reg != GBR_REG
7176 /* Push fpscr only on targets which have FPU */
7177 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7178 : (/* Only push those regs which are used and need to be saved. */
7179 (TARGET_SHCOMPACT
7180 && flag_pic
7181 && crtl->args.info.call_cookie
7182 && reg == PIC_OFFSET_TABLE_REGNUM)
7183 || (df_regs_ever_live_p (reg)
7184 && ((!call_really_used_regs[reg]
7185 && !(reg != PIC_OFFSET_TABLE_REGNUM
7186 && fixed_regs[reg] && call_used_regs[reg]))
7187 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7188 || (crtl->calls_eh_return
7189 && (reg == EH_RETURN_DATA_REGNO (0)
7190 || reg == EH_RETURN_DATA_REGNO (1)
7191 || reg == EH_RETURN_DATA_REGNO (2)
7192 || reg == EH_RETURN_DATA_REGNO (3)))
7193 || ((reg == MACL_REG || reg == MACH_REG)
7194 && df_regs_ever_live_p (reg)
7195 && sh_cfun_attr_renesas_p ())
7196 ))
7197 {
7198 SET_HARD_REG_BIT (*live_regs_mask, reg);
7199 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7200
7201 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7202 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7203 {
7204 if (FP_REGISTER_P (reg))
7205 {
7206 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7207 {
7208 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7209 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7210 }
7211 }
7212 else if (XD_REGISTER_P (reg))
7213 {
7214 /* Must switch to double mode to access these registers. */
7215 target_flags &= ~MASK_FPU_SINGLE;
7216 }
7217 }
7218 }
7219 if (nosave_low_regs && reg == R8_REG)
7220 break;
7221 }
7222 /* If we have a target register optimization pass after prologue / epilogue
7223 threading, we need to assume all target registers will be live even if
7224 they aren't now. */
7225 if (flag_branch_target_load_optimize2
7226 && TARGET_SAVE_ALL_TARGET_REGS
7227 && shmedia_space_reserved_for_target_registers)
7228 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7229 if ((! call_really_used_regs[reg] || interrupt_handler)
7230 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7231 {
7232 SET_HARD_REG_BIT (*live_regs_mask, reg);
7233 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7234 }
7235 /* If this is an interrupt handler, we don't have any call-clobbered
7236 registers we can conveniently use for target register save/restore.
7237 Make sure we save at least one general purpose register when we need
7238 to save target registers. */
7239 if (interrupt_handler
7240 && hard_reg_set_intersect_p (*live_regs_mask,
7241 reg_class_contents[TARGET_REGS])
7242 && ! hard_reg_set_intersect_p (*live_regs_mask,
7243 reg_class_contents[GENERAL_REGS]))
7244 {
7245 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7246 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7247 }
7248
7249 return count;
7250 }
7251
7252 /* Code to generate prologue and epilogue sequences */
7253
7254 /* PUSHED is the number of bytes that are being pushed on the
7255 stack for register saves. Return the frame size, padded
7256 appropriately so that the stack stays properly aligned. */
7257 static HOST_WIDE_INT
7258 rounded_frame_size (int pushed)
7259 {
7260 HOST_WIDE_INT size = get_frame_size ();
7261 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7262
7263 if (ACCUMULATE_OUTGOING_ARGS)
7264 size += crtl->outgoing_args_size;
7265
7266 return ((size + pushed + align - 1) & -align) - pushed;
7267 }
7268
7269 /* Choose a call-clobbered target-branch register that remains
7270 unchanged along the whole function. We set it up as the return
7271 value in the prologue. */
7272 int
7273 sh_media_register_for_return (void)
7274 {
7275 int regno;
7276 int tr0_used;
7277
7278 if (! crtl->is_leaf)
7279 return -1;
7280 if (lookup_attribute ("interrupt_handler",
7281 DECL_ATTRIBUTES (current_function_decl)))
7282 return -1;
7283 if (sh_cfun_interrupt_handler_p ())
7284 return -1;
7285
7286 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7287
7288 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7289 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7290 return regno;
7291
7292 return -1;
7293 }
7294
7295 /* The maximum registers we need to save are:
7296 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7297 - 32 floating point registers (for each pair, we save none,
7298 one single precision value, or a double precision value).
7299 - 8 target registers
7300 - add 1 entry for a delimiter. */
7301 #define MAX_SAVED_REGS (62+32+8)
7302
7303 typedef struct save_entry_s
7304 {
7305 unsigned char reg;
7306 unsigned char mode;
7307 short offset;
7308 } save_entry;
7309
7310 #define MAX_TEMPS 4
7311
7312 /* There will be a delimiter entry with VOIDmode both at the start and the
7313 end of a filled in schedule. The end delimiter has the offset of the
7314 save with the smallest (i.e. most negative) offset. */
7315 typedef struct save_schedule_s
7316 {
7317 save_entry entries[MAX_SAVED_REGS + 2];
7318 int temps[MAX_TEMPS+1];
7319 } save_schedule;
7320
7321 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7322 use reverse order. Returns the last entry written to (not counting
7323 the delimiter). OFFSET_BASE is a number to be added to all offset
7324 entries. */
7325 static save_entry *
7326 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7327 int offset_base)
7328 {
7329 int align, i;
7330 save_entry *entry = schedule->entries;
7331 int tmpx = 0;
7332 int offset;
7333
7334 if (! current_function_interrupt)
7335 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7336 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7337 && ! FUNCTION_ARG_REGNO_P (i)
7338 && i != FIRST_RET_REG
7339 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7340 && ! (crtl->calls_eh_return
7341 && (i == EH_RETURN_STACKADJ_REGNO
7342 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7343 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7344 schedule->temps[tmpx++] = i;
7345 entry->reg = -1;
7346 entry->mode = VOIDmode;
7347 entry->offset = offset_base;
7348 entry++;
7349 /* We loop twice: first, we save 8-byte aligned registers in the
7350 higher addresses, that are known to be aligned. Then, we
7351 proceed to saving 32-bit registers that don't need 8-byte
7352 alignment.
7353 If this is an interrupt function, all registers that need saving
7354 need to be saved in full. moreover, we need to postpone saving
7355 target registers till we have saved some general purpose registers
7356 we can then use as scratch registers. */
7357 offset = offset_base;
7358 for (align = 1; align >= 0; align--)
7359 {
7360 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7361 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7362 {
7363 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7364 int reg = i;
7365
7366 if (current_function_interrupt)
7367 {
7368 if (TARGET_REGISTER_P (i))
7369 continue;
7370 if (GENERAL_REGISTER_P (i))
7371 mode = DImode;
7372 }
7373 if (mode == SFmode && (i % 2) == 1
7374 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7375 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7376 {
7377 mode = DFmode;
7378 i--;
7379 reg--;
7380 }
7381
7382 /* If we're doing the aligned pass and this is not aligned,
7383 or we're doing the unaligned pass and this is aligned,
7384 skip it. */
7385 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7386 != align)
7387 continue;
7388
7389 if (current_function_interrupt
7390 && GENERAL_REGISTER_P (i)
7391 && tmpx < MAX_TEMPS)
7392 schedule->temps[tmpx++] = i;
7393
7394 offset -= GET_MODE_SIZE (mode);
7395 entry->reg = i;
7396 entry->mode = mode;
7397 entry->offset = offset;
7398 entry++;
7399 }
7400 if (align && current_function_interrupt)
7401 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7402 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7403 {
7404 offset -= GET_MODE_SIZE (DImode);
7405 entry->reg = i;
7406 entry->mode = DImode;
7407 entry->offset = offset;
7408 entry++;
7409 }
7410 }
7411 entry->reg = -1;
7412 entry->mode = VOIDmode;
7413 entry->offset = offset;
7414 schedule->temps[tmpx] = -1;
7415 return entry - 1;
7416 }
7417
7418 /* Expand code for the function prologue. */
7419 void
7420 sh_expand_prologue (void)
7421 {
7422 HARD_REG_SET live_regs_mask;
7423 int d, i;
7424 int d_rounding = 0;
7425 int save_flags = target_flags;
7426 int pretend_args;
7427 int stack_usage;
7428 tree sp_switch_attr
7429 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7430
7431 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7432
7433 /* We have pretend args if we had an object sent partially in registers
7434 and partially on the stack, e.g. a large structure. */
7435 pretend_args = crtl->args.pretend_args_size;
7436 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7437 && (NPARM_REGS(SImode)
7438 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7439 pretend_args = 0;
7440
7441 output_stack_adjust (-pretend_args
7442 - crtl->args.info.stack_regs * 8,
7443 stack_pointer_rtx, 0, NULL, true);
7444 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7445
7446 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7447 /* We're going to use the PIC register to load the address of the
7448 incoming-argument decoder and/or of the return trampoline from
7449 the GOT, so make sure the PIC register is preserved and
7450 initialized. */
7451 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7452
7453 if (TARGET_SHCOMPACT
7454 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7455 {
7456 int reg;
7457
7458 /* First, make all registers with incoming arguments that will
7459 be pushed onto the stack live, so that register renaming
7460 doesn't overwrite them. */
7461 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7462 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7463 >= NPARM_REGS (SImode) - reg)
7464 for (; reg < NPARM_REGS (SImode); reg++)
7465 emit_insn (gen_shcompact_preserve_incoming_args
7466 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7467 else if (CALL_COOKIE_INT_REG_GET
7468 (crtl->args.info.call_cookie, reg) == 1)
7469 emit_insn (gen_shcompact_preserve_incoming_args
7470 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7471
7472 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7473 stack_pointer_rtx);
7474 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7475 GEN_INT (crtl->args.info.call_cookie));
7476 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7477 gen_rtx_REG (SImode, R0_REG));
7478 }
7479 else if (TARGET_SHMEDIA)
7480 {
7481 int tr = sh_media_register_for_return ();
7482
7483 if (tr >= 0)
7484 emit_move_insn (gen_rtx_REG (DImode, tr),
7485 gen_rtx_REG (DImode, PR_MEDIA_REG));
7486 }
7487
7488 /* Emit the code for SETUP_VARARGS. */
7489 if (cfun->stdarg)
7490 {
7491 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7492 {
7493 /* Push arg regs as if they'd been provided by caller in stack. */
7494 for (i = 0; i < NPARM_REGS(SImode); i++)
7495 {
7496 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7497
7498 if (i >= (NPARM_REGS(SImode)
7499 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7500 ))
7501 break;
7502 push (rn);
7503 stack_usage += GET_MODE_SIZE (SImode);
7504 }
7505 }
7506 }
7507
7508 /* If we're supposed to switch stacks at function entry, do so now. */
7509 if (sp_switch_attr)
7510 {
7511 rtx lab, newsrc;
7512 /* The argument specifies a variable holding the address of the
7513 stack the interrupt function should switch to/from at entry/exit. */
7514 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7515 const char *s
7516 = ggc_strdup (TREE_STRING_POINTER (arg));
7517 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7518
7519 lab = add_constant (sp_switch, SImode, 0);
7520 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7521
7522 emit_insn (gen_sp_switch_1 (newsrc));
7523 }
7524
7525 d = calc_live_regs (&live_regs_mask);
7526 /* ??? Maybe we could save some switching if we can move a mode switch
7527 that already happens to be at the function start into the prologue. */
7528 if (target_flags != save_flags && ! current_function_interrupt)
7529 emit_insn (gen_toggle_sz ());
7530
7531 if (TARGET_SH5)
7532 {
7533 int offset_base, offset;
7534 rtx r0 = NULL_RTX;
7535 int offset_in_r0 = -1;
7536 int sp_in_r0 = 0;
7537 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7538 int total_size, save_size;
7539 save_schedule schedule;
7540 save_entry *entry;
7541 int *tmp_pnt;
7542
7543 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7544 && ! current_function_interrupt)
7545 r0 = gen_rtx_REG (Pmode, R0_REG);
7546
7547 /* D is the actual number of bytes that we need for saving registers,
7548 however, in initial_elimination_offset we have committed to using
7549 an additional TREGS_SPACE amount of bytes - in order to keep both
7550 addresses to arguments supplied by the caller and local variables
7551 valid, we must keep this gap. Place it between the incoming
7552 arguments and the actually saved registers in a bid to optimize
7553 locality of reference. */
7554 total_size = d + tregs_space;
7555 total_size += rounded_frame_size (total_size);
7556 save_size = total_size - rounded_frame_size (d);
7557 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7558 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7559 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7560
7561 /* If adjusting the stack in a single step costs nothing extra, do so.
7562 I.e. either if a single addi is enough, or we need a movi anyway,
7563 and we don't exceed the maximum offset range (the test for the
7564 latter is conservative for simplicity). */
7565 if (TARGET_SHMEDIA
7566 && (CONST_OK_FOR_I10 (-total_size)
7567 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7568 && total_size <= 2044)))
7569 d_rounding = total_size - save_size;
7570
7571 offset_base = d + d_rounding;
7572
7573 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7574 0, NULL, true);
7575 stack_usage += save_size + d_rounding;
7576
7577 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7578 tmp_pnt = schedule.temps;
7579 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7580 {
7581 enum machine_mode mode = (enum machine_mode) entry->mode;
7582 unsigned int reg = entry->reg;
7583 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7584 rtx orig_reg_rtx;
7585
7586 offset = entry->offset;
7587
7588 reg_rtx = gen_rtx_REG (mode, reg);
7589
7590 mem_rtx = gen_frame_mem (mode,
7591 gen_rtx_PLUS (Pmode,
7592 stack_pointer_rtx,
7593 GEN_INT (offset)));
7594
7595 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7596 {
7597 gcc_assert (r0);
7598 mem_rtx = NULL_RTX;
7599 }
7600
7601 if (HAVE_PRE_DECREMENT
7602 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7603 || mem_rtx == NULL_RTX
7604 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7605 {
7606 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7607
7608 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7609 pre_dec = NULL_RTX;
7610 else
7611 {
7612 mem_rtx = NULL_RTX;
7613 offset += GET_MODE_SIZE (mode);
7614 }
7615 }
7616
7617 if (mem_rtx != NULL_RTX)
7618 goto addr_ok;
7619
7620 if (offset_in_r0 == -1)
7621 {
7622 emit_move_insn (r0, GEN_INT (offset));
7623 offset_in_r0 = offset;
7624 }
7625 else if (offset != offset_in_r0)
7626 {
7627 emit_move_insn (r0,
7628 gen_rtx_PLUS
7629 (Pmode, r0,
7630 GEN_INT (offset - offset_in_r0)));
7631 offset_in_r0 += offset - offset_in_r0;
7632 }
7633
7634 if (pre_dec != NULL_RTX)
7635 {
7636 if (! sp_in_r0)
7637 {
7638 emit_move_insn (r0,
7639 gen_rtx_PLUS
7640 (Pmode, r0, stack_pointer_rtx));
7641 sp_in_r0 = 1;
7642 }
7643
7644 offset -= GET_MODE_SIZE (mode);
7645 offset_in_r0 -= GET_MODE_SIZE (mode);
7646
7647 mem_rtx = pre_dec;
7648 }
7649 else if (sp_in_r0)
7650 mem_rtx = gen_frame_mem (mode, r0);
7651 else
7652 mem_rtx = gen_frame_mem (mode,
7653 gen_rtx_PLUS (Pmode,
7654 stack_pointer_rtx,
7655 r0));
7656
7657 /* We must not use an r0-based address for target-branch
7658 registers or for special registers without pre-dec
7659 memory addresses, since we store their values in r0
7660 first. */
7661 gcc_assert (!TARGET_REGISTER_P (reg)
7662 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7663 || mem_rtx == pre_dec));
7664
7665 addr_ok:
7666 orig_reg_rtx = reg_rtx;
7667 if (TARGET_REGISTER_P (reg)
7668 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7669 && mem_rtx != pre_dec))
7670 {
7671 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7672
7673 emit_move_insn (tmp_reg, reg_rtx);
7674
7675 if (REGNO (tmp_reg) == R0_REG)
7676 {
7677 offset_in_r0 = -1;
7678 sp_in_r0 = 0;
7679 gcc_assert (!refers_to_regno_p
7680 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7681 }
7682
7683 if (*++tmp_pnt <= 0)
7684 tmp_pnt = schedule.temps;
7685
7686 reg_rtx = tmp_reg;
7687 }
7688 {
7689 rtx insn;
7690
7691 /* Mark as interesting for dwarf cfi generator */
7692 insn = emit_move_insn (mem_rtx, reg_rtx);
7693 RTX_FRAME_RELATED_P (insn) = 1;
7694 /* If we use an intermediate register for the save, we can't
7695 describe this exactly in cfi as a copy of the to-be-saved
7696 register into the temporary register and then the temporary
7697 register on the stack, because the temporary register can
7698 have a different natural size than the to-be-saved register.
7699 Thus, we gloss over the intermediate copy and pretend we do
7700 a direct save from the to-be-saved register. */
7701 if (REGNO (reg_rtx) != reg)
7702 {
7703 rtx set;
7704
7705 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7706 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7707 }
7708
7709 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7710 {
7711 rtx reg_rtx = gen_rtx_REG (mode, reg);
7712 rtx set;
7713 rtx mem_rtx = gen_frame_mem (mode,
7714 gen_rtx_PLUS (Pmode,
7715 stack_pointer_rtx,
7716 GEN_INT (offset)));
7717
7718 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7719 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7720 }
7721 }
7722 }
7723
7724 gcc_assert (entry->offset == d_rounding);
7725 }
7726 else
7727 {
7728 push_regs (&live_regs_mask, current_function_interrupt);
7729 stack_usage += d;
7730 }
7731
7732 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7733 emit_insn (gen_GOTaddr2picreg ());
7734
7735 if (SHMEDIA_REGS_STACK_ADJUST ())
7736 {
7737 /* This must NOT go through the PLT, otherwise mach and macl
7738 may be clobbered. */
7739 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7740 (TARGET_FPU_ANY
7741 ? "__GCC_push_shmedia_regs"
7742 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7743 emit_insn (gen_shmedia_save_restore_regs_compact
7744 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7745 }
7746
7747 if (target_flags != save_flags && ! current_function_interrupt)
7748 emit_insn (gen_toggle_sz ());
7749
7750 target_flags = save_flags;
7751
7752 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7753 stack_pointer_rtx, 0, NULL, true);
7754 stack_usage += rounded_frame_size (d) - d_rounding;
7755
7756 if (frame_pointer_needed)
7757 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7758
7759 if (TARGET_SHCOMPACT
7760 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7761 {
7762 /* This must NOT go through the PLT, otherwise mach and macl
7763 may be clobbered. */
7764 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7765 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7766 emit_insn (gen_shcompact_incoming_args ());
7767 }
7768
7769 /* If we are profiling, make sure no instructions are scheduled before
7770 the call to mcount. Similarly if some call instructions are swapped
7771 before frame related insns, it'll confuse the unwinder because
7772 currently SH has no unwind info for function epilogues. */
7773 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7774 emit_insn (gen_blockage ());
7775
7776 if (flag_stack_usage_info)
7777 current_function_static_stack_size = stack_usage;
7778 }
7779
7780 /* Expand code for the function epilogue. */
7781 void
7782 sh_expand_epilogue (bool sibcall_p)
7783 {
7784 HARD_REG_SET live_regs_mask;
7785 int d, i;
7786 int d_rounding = 0;
7787
7788 int save_flags = target_flags;
7789 int frame_size, save_size;
7790 int fpscr_deferred = 0;
7791 int e = sibcall_p ? -1 : 1;
7792
7793 d = calc_live_regs (&live_regs_mask);
7794
7795 save_size = d;
7796 frame_size = rounded_frame_size (d);
7797
7798 if (TARGET_SH5)
7799 {
7800 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7801 int total_size;
7802 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7803 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7804 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7805
7806 total_size = d + tregs_space;
7807 total_size += rounded_frame_size (total_size);
7808 save_size = total_size - frame_size;
7809
7810 /* If adjusting the stack in a single step costs nothing extra, do so.
7811 I.e. either if a single addi is enough, or we need a movi anyway,
7812 and we don't exceed the maximum offset range (the test for the
7813 latter is conservative for simplicity). */
7814 if (TARGET_SHMEDIA
7815 && ! frame_pointer_needed
7816 && (CONST_OK_FOR_I10 (total_size)
7817 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7818 && total_size <= 2044)))
7819 d_rounding = frame_size;
7820
7821 frame_size -= d_rounding;
7822 }
7823
7824 if (frame_pointer_needed)
7825 {
7826 /* We must avoid scheduling the epilogue with previous basic blocks.
7827 See PR/18032 and PR/40313. */
7828 emit_insn (gen_blockage ());
7829 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7830 &live_regs_mask, true);
7831
7832 /* We must avoid moving the stack pointer adjustment past code
7833 which reads from the local frame, else an interrupt could
7834 occur after the SP adjustment and clobber data in the local
7835 frame. */
7836 emit_insn (gen_blockage ());
7837 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7838 }
7839 else if (frame_size)
7840 {
7841 /* We must avoid moving the stack pointer adjustment past code
7842 which reads from the local frame, else an interrupt could
7843 occur after the SP adjustment and clobber data in the local
7844 frame. */
7845 emit_insn (gen_blockage ());
7846 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7847 &live_regs_mask, true);
7848 }
7849
7850 if (SHMEDIA_REGS_STACK_ADJUST ())
7851 {
7852 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7853 (TARGET_FPU_ANY
7854 ? "__GCC_pop_shmedia_regs"
7855 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7856 /* This must NOT go through the PLT, otherwise mach and macl
7857 may be clobbered. */
7858 emit_insn (gen_shmedia_save_restore_regs_compact
7859 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7860 }
7861
7862 /* Pop all the registers. */
7863
7864 if (target_flags != save_flags && ! current_function_interrupt)
7865 emit_insn (gen_toggle_sz ());
7866 if (TARGET_SH5)
7867 {
7868 int offset_base, offset;
7869 int offset_in_r0 = -1;
7870 int sp_in_r0 = 0;
7871 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7872 save_schedule schedule;
7873 save_entry *entry;
7874 int *tmp_pnt;
7875
7876 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7877 offset_base = -entry[1].offset + d_rounding;
7878 tmp_pnt = schedule.temps;
7879 for (; entry->mode != VOIDmode; entry--)
7880 {
7881 enum machine_mode mode = (enum machine_mode) entry->mode;
7882 int reg = entry->reg;
7883 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7884
7885 offset = offset_base + entry->offset;
7886 reg_rtx = gen_rtx_REG (mode, reg);
7887
7888 mem_rtx = gen_frame_mem (mode,
7889 gen_rtx_PLUS (Pmode,
7890 stack_pointer_rtx,
7891 GEN_INT (offset)));
7892
7893 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7894 mem_rtx = NULL_RTX;
7895
7896 if (HAVE_POST_INCREMENT
7897 && (offset == offset_in_r0
7898 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7899 && mem_rtx == NULL_RTX)
7900 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7901 {
7902 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7903
7904 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7905 post_inc = NULL_RTX;
7906 else
7907 mem_rtx = NULL_RTX;
7908 }
7909
7910 if (mem_rtx != NULL_RTX)
7911 goto addr_ok;
7912
7913 if (offset_in_r0 == -1)
7914 {
7915 emit_move_insn (r0, GEN_INT (offset));
7916 offset_in_r0 = offset;
7917 }
7918 else if (offset != offset_in_r0)
7919 {
7920 emit_move_insn (r0,
7921 gen_rtx_PLUS
7922 (Pmode, r0,
7923 GEN_INT (offset - offset_in_r0)));
7924 offset_in_r0 += offset - offset_in_r0;
7925 }
7926
7927 if (post_inc != NULL_RTX)
7928 {
7929 if (! sp_in_r0)
7930 {
7931 emit_move_insn (r0,
7932 gen_rtx_PLUS
7933 (Pmode, r0, stack_pointer_rtx));
7934 sp_in_r0 = 1;
7935 }
7936
7937 mem_rtx = post_inc;
7938
7939 offset_in_r0 += GET_MODE_SIZE (mode);
7940 }
7941 else if (sp_in_r0)
7942 mem_rtx = gen_frame_mem (mode, r0);
7943 else
7944 mem_rtx = gen_frame_mem (mode,
7945 gen_rtx_PLUS (Pmode,
7946 stack_pointer_rtx,
7947 r0));
7948
7949 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7950 || mem_rtx == post_inc);
7951
7952 addr_ok:
7953 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7954 && mem_rtx != post_inc)
7955 {
7956 emit_move_insn (r0, mem_rtx);
7957 mem_rtx = r0;
7958 }
7959 else if (TARGET_REGISTER_P (reg))
7960 {
7961 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7962
7963 /* Give the scheduler a bit of freedom by using up to
7964 MAX_TEMPS registers in a round-robin fashion. */
7965 emit_move_insn (tmp_reg, mem_rtx);
7966 mem_rtx = tmp_reg;
7967 if (*++tmp_pnt < 0)
7968 tmp_pnt = schedule.temps;
7969 }
7970
7971 emit_move_insn (reg_rtx, mem_rtx);
7972 }
7973
7974 gcc_assert (entry->offset + offset_base == d + d_rounding);
7975 }
7976 else /* ! TARGET_SH5 */
7977 {
7978 int last_reg;
7979
7980 save_size = 0;
7981 /* For an ISR with RESBANK attribute assigned, don't pop PR
7982 register. */
7983 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7984 && !sh_cfun_resbank_handler_p ())
7985 {
7986 if (!frame_pointer_needed)
7987 emit_insn (gen_blockage ());
7988 pop (PR_REG);
7989 }
7990
7991 /* Banked registers are popped first to avoid being scheduled in the
7992 delay slot. RTE switches banks before the ds instruction. */
7993 if (current_function_interrupt)
7994 {
7995 bool use_movml = false;
7996
7997 if (TARGET_SH2A)
7998 {
7999 unsigned int count = 0;
8000
8001 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8002 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8003 count++;
8004 else
8005 break;
8006
8007 /* Use movml when all banked register are poped. */
8008 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8009 use_movml = true;
8010 }
8011
8012 if (sh_cfun_resbank_handler_p ())
8013 ; /* Do nothing. */
8014 else if (use_movml)
8015 {
8016 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8017
8018 /* We must avoid scheduling multiple load insn with another
8019 insns. */
8020 emit_insn (gen_blockage ());
8021 emit_insn (gen_movml_pop_banked (sp_reg));
8022 emit_insn (gen_blockage ());
8023 }
8024 else
8025 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8026 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8027 pop (i);
8028
8029 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8030 }
8031 else
8032 last_reg = FIRST_PSEUDO_REGISTER;
8033
8034 for (i = 0; i < last_reg; i++)
8035 {
8036 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8037
8038 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8039 && hard_reg_set_intersect_p (live_regs_mask,
8040 reg_class_contents[DF_REGS]))
8041 fpscr_deferred = 1;
8042 /* For an ISR with RESBANK attribute assigned, don't pop
8043 following registers, R0-R14, MACH, MACL and GBR. */
8044 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8045 && ! (sh_cfun_resbank_handler_p ()
8046 && ((j >= FIRST_GENERAL_REG
8047 && j < LAST_GENERAL_REG)
8048 || j == MACH_REG
8049 || j == MACL_REG
8050 || j == GBR_REG)))
8051 pop (j);
8052
8053 if (j == FIRST_FP_REG && fpscr_deferred)
8054 pop (FPSCR_REG);
8055 }
8056 }
8057 if (target_flags != save_flags && ! current_function_interrupt)
8058 emit_insn (gen_toggle_sz ());
8059 target_flags = save_flags;
8060
8061 output_stack_adjust (crtl->args.pretend_args_size
8062 + save_size + d_rounding
8063 + crtl->args.info.stack_regs * 8,
8064 stack_pointer_rtx, e, NULL, true);
8065
8066 if (crtl->calls_eh_return)
8067 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8068 EH_RETURN_STACKADJ_RTX));
8069
8070 /* Switch back to the normal stack if necessary. */
8071 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8072 emit_insn (gen_sp_switch_2 ());
8073
8074 /* Tell flow the insn that pops PR isn't dead. */
8075 /* PR_REG will never be live in SHmedia mode, and we don't need to
8076 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8077 by the return pattern. */
8078 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8079 emit_use (gen_rtx_REG (SImode, PR_REG));
8080 }
8081
8082 /* Emit code to change the current function's return address to RA.
8083 TEMP is available as a scratch register, if needed. */
8084 void
8085 sh_set_return_address (rtx ra, rtx tmp)
8086 {
8087 HARD_REG_SET live_regs_mask;
8088 int d;
8089 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8090 int pr_offset;
8091
8092 d = calc_live_regs (&live_regs_mask);
8093
8094 /* If pr_reg isn't life, we can set it (or the register given in
8095 sh_media_register_for_return) directly. */
8096 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8097 {
8098 rtx rr;
8099
8100 if (TARGET_SHMEDIA)
8101 {
8102 int rr_regno = sh_media_register_for_return ();
8103
8104 if (rr_regno < 0)
8105 rr_regno = pr_reg;
8106
8107 rr = gen_rtx_REG (DImode, rr_regno);
8108 }
8109 else
8110 rr = gen_rtx_REG (SImode, pr_reg);
8111
8112 emit_insn (GEN_MOV (rr, ra));
8113 /* Tell flow the register for return isn't dead. */
8114 emit_use (rr);
8115 return;
8116 }
8117
8118 if (TARGET_SH5)
8119 {
8120 int offset;
8121 save_schedule schedule;
8122 save_entry *entry;
8123
8124 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8125 offset = entry[1].offset;
8126 for (; entry->mode != VOIDmode; entry--)
8127 if (entry->reg == pr_reg)
8128 goto found;
8129
8130 /* We can't find pr register. */
8131 gcc_unreachable ();
8132
8133 found:
8134 offset = entry->offset - offset;
8135 pr_offset = (rounded_frame_size (d) + offset
8136 + SHMEDIA_REGS_STACK_ADJUST ());
8137 }
8138 else
8139 pr_offset = rounded_frame_size (d);
8140
8141 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8142
8143 if (frame_pointer_needed)
8144 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8145 else
8146 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8147
8148 tmp = gen_frame_mem (Pmode, tmp);
8149 emit_insn (GEN_MOV (tmp, ra));
8150 /* Tell this store isn't dead. */
8151 emit_use (tmp);
8152 }
8153
8154 /* Clear variables at function end. */
8155 static void
8156 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8157 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8158 {
8159 }
8160
8161 static rtx
8162 sh_builtin_saveregs (void)
8163 {
8164 /* First unnamed integer register. */
8165 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8166 /* Number of integer registers we need to save. */
8167 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8168 /* First unnamed SFmode float reg */
8169 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8170 /* Number of SFmode float regs to save. */
8171 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8172 rtx regbuf, fpregs;
8173 int bufsize, regno;
8174 alias_set_type alias_set;
8175
8176 if (TARGET_SH5)
8177 {
8178 if (n_intregs)
8179 {
8180 int pushregs = n_intregs;
8181
8182 while (pushregs < NPARM_REGS (SImode) - 1
8183 && (CALL_COOKIE_INT_REG_GET
8184 (crtl->args.info.call_cookie,
8185 NPARM_REGS (SImode) - pushregs)
8186 == 1))
8187 {
8188 crtl->args.info.call_cookie
8189 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8190 - pushregs, 1);
8191 pushregs++;
8192 }
8193
8194 if (pushregs == NPARM_REGS (SImode))
8195 crtl->args.info.call_cookie
8196 |= (CALL_COOKIE_INT_REG (0, 1)
8197 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8198 else
8199 crtl->args.info.call_cookie
8200 |= CALL_COOKIE_STACKSEQ (pushregs);
8201
8202 crtl->args.pretend_args_size += 8 * n_intregs;
8203 }
8204 if (TARGET_SHCOMPACT)
8205 return const0_rtx;
8206 }
8207
8208 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8209 {
8210 error ("__builtin_saveregs not supported by this subtarget");
8211 return const0_rtx;
8212 }
8213
8214 if (TARGET_SHMEDIA)
8215 n_floatregs = 0;
8216
8217 /* Allocate block of memory for the regs. */
8218 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8219 Or can assign_stack_local accept a 0 SIZE argument? */
8220 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8221
8222 if (TARGET_SHMEDIA)
8223 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8224 else if (n_floatregs & 1)
8225 {
8226 rtx addr;
8227
8228 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8229 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8230 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8231 regbuf = change_address (regbuf, BLKmode, addr);
8232 }
8233 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8234 {
8235 rtx addr, mask;
8236
8237 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8238 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8239 XEXP (regbuf, 0), 4));
8240 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8241 emit_insn (gen_andsi3 (addr, addr, mask));
8242 regbuf = change_address (regbuf, BLKmode, addr);
8243 }
8244 else
8245 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8246 alias_set = get_varargs_alias_set ();
8247 set_mem_alias_set (regbuf, alias_set);
8248
8249 /* Save int args.
8250 This is optimized to only save the regs that are necessary. Explicitly
8251 named args need not be saved. */
8252 if (n_intregs > 0)
8253 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8254 adjust_address (regbuf, BLKmode,
8255 n_floatregs * UNITS_PER_WORD),
8256 n_intregs);
8257
8258 if (TARGET_SHMEDIA)
8259 /* Return the address of the regbuf. */
8260 return XEXP (regbuf, 0);
8261
8262 /* Save float args.
8263 This is optimized to only save the regs that are necessary. Explicitly
8264 named args need not be saved.
8265 We explicitly build a pointer to the buffer because it halves the insn
8266 count when not optimizing (otherwise the pointer is built for each reg
8267 saved).
8268 We emit the moves in reverse order so that we can use predecrement. */
8269
8270 fpregs = copy_to_mode_reg (Pmode,
8271 plus_constant (Pmode, XEXP (regbuf, 0),
8272 n_floatregs * UNITS_PER_WORD));
8273 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8274 {
8275 rtx mem;
8276 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8277 {
8278 emit_insn (gen_addsi3 (fpregs, fpregs,
8279 GEN_INT (-2 * UNITS_PER_WORD)));
8280 mem = change_address (regbuf, DFmode, fpregs);
8281 emit_move_insn (mem,
8282 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8283 }
8284 regno = first_floatreg;
8285 if (regno & 1)
8286 {
8287 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8288 mem = change_address (regbuf, SFmode, fpregs);
8289 emit_move_insn (mem,
8290 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8291 + regno - SH_REG_MSW_OFFSET));
8292 }
8293 }
8294 else
8295 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8296 {
8297 rtx mem;
8298
8299 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8300 mem = change_address (regbuf, SFmode, fpregs);
8301 emit_move_insn (mem,
8302 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8303 }
8304
8305 /* Return the address of the regbuf. */
8306 return XEXP (regbuf, 0);
8307 }
8308
8309 /* Define the `__builtin_va_list' type for the ABI. */
8310 static tree
8311 sh_build_builtin_va_list (void)
8312 {
8313 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8314 tree record, type_decl;
8315
8316 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8317 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8318 return ptr_type_node;
8319
8320 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8321 type_decl = build_decl (BUILTINS_LOCATION,
8322 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8323
8324 f_next_o = build_decl (BUILTINS_LOCATION,
8325 FIELD_DECL, get_identifier ("__va_next_o"),
8326 ptr_type_node);
8327 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8328 FIELD_DECL,
8329 get_identifier ("__va_next_o_limit"),
8330 ptr_type_node);
8331 f_next_fp = build_decl (BUILTINS_LOCATION,
8332 FIELD_DECL, get_identifier ("__va_next_fp"),
8333 ptr_type_node);
8334 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8335 FIELD_DECL,
8336 get_identifier ("__va_next_fp_limit"),
8337 ptr_type_node);
8338 f_next_stack = build_decl (BUILTINS_LOCATION,
8339 FIELD_DECL, get_identifier ("__va_next_stack"),
8340 ptr_type_node);
8341
8342 DECL_FIELD_CONTEXT (f_next_o) = record;
8343 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8344 DECL_FIELD_CONTEXT (f_next_fp) = record;
8345 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8346 DECL_FIELD_CONTEXT (f_next_stack) = record;
8347
8348 TYPE_STUB_DECL (record) = type_decl;
8349 TYPE_NAME (record) = type_decl;
8350 TYPE_FIELDS (record) = f_next_o;
8351 DECL_CHAIN (f_next_o) = f_next_o_limit;
8352 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8353 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8354 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8355
8356 layout_type (record);
8357
8358 return record;
8359 }
8360
8361 /* Implement `va_start' for varargs and stdarg. */
8362 static void
8363 sh_va_start (tree valist, rtx nextarg)
8364 {
8365 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8366 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8367 tree t, u;
8368 int nfp, nint;
8369
8370 if (TARGET_SH5)
8371 {
8372 expand_builtin_saveregs ();
8373 std_expand_builtin_va_start (valist, nextarg);
8374 return;
8375 }
8376
8377 if ((! TARGET_SH2E && ! TARGET_SH4)
8378 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8379 {
8380 std_expand_builtin_va_start (valist, nextarg);
8381 return;
8382 }
8383
8384 f_next_o = TYPE_FIELDS (va_list_type_node);
8385 f_next_o_limit = DECL_CHAIN (f_next_o);
8386 f_next_fp = DECL_CHAIN (f_next_o_limit);
8387 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8388 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8389
8390 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8391 NULL_TREE);
8392 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8393 valist, f_next_o_limit, NULL_TREE);
8394 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8395 NULL_TREE);
8396 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8397 valist, f_next_fp_limit, NULL_TREE);
8398 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8399 valist, f_next_stack, NULL_TREE);
8400
8401 /* Call __builtin_saveregs. */
8402 u = make_tree (sizetype, expand_builtin_saveregs ());
8403 u = fold_convert (ptr_type_node, u);
8404 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8405 TREE_SIDE_EFFECTS (t) = 1;
8406 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8407
8408 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8409 if (nfp < 8)
8410 nfp = 8 - nfp;
8411 else
8412 nfp = 0;
8413 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8414 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8415 TREE_SIDE_EFFECTS (t) = 1;
8416 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8417
8418 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8419 TREE_SIDE_EFFECTS (t) = 1;
8420 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8421
8422 nint = crtl->args.info.arg_count[SH_ARG_INT];
8423 if (nint < 4)
8424 nint = 4 - nint;
8425 else
8426 nint = 0;
8427 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8428 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8429 TREE_SIDE_EFFECTS (t) = 1;
8430 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8431
8432 u = make_tree (ptr_type_node, nextarg);
8433 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8434 TREE_SIDE_EFFECTS (t) = 1;
8435 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8436 }
8437
8438 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8439 member, return it. */
8440 static tree
8441 find_sole_member (tree type)
8442 {
8443 tree field, member = NULL_TREE;
8444
8445 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8446 {
8447 if (TREE_CODE (field) != FIELD_DECL)
8448 continue;
8449 if (!DECL_SIZE (field))
8450 return NULL_TREE;
8451 if (integer_zerop (DECL_SIZE (field)))
8452 continue;
8453 if (member)
8454 return NULL_TREE;
8455 member = field;
8456 }
8457 return member;
8458 }
8459
8460 /* Implement `va_arg'. */
8461 static tree
8462 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8463 gimple_seq *post_p ATTRIBUTE_UNUSED)
8464 {
8465 HOST_WIDE_INT size, rsize;
8466 tree tmp, pptr_type_node;
8467 tree addr, lab_over = NULL, result = NULL;
8468 bool pass_by_ref;
8469 tree eff_type;
8470
8471 if (!VOID_TYPE_P (type))
8472 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8473 else
8474 pass_by_ref = false;
8475
8476 if (pass_by_ref)
8477 type = build_pointer_type (type);
8478
8479 size = int_size_in_bytes (type);
8480 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8481 pptr_type_node = build_pointer_type (ptr_type_node);
8482
8483 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8484 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8485 {
8486 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8487 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8488 int pass_as_float;
8489 tree lab_false;
8490 tree member;
8491
8492 f_next_o = TYPE_FIELDS (va_list_type_node);
8493 f_next_o_limit = DECL_CHAIN (f_next_o);
8494 f_next_fp = DECL_CHAIN (f_next_o_limit);
8495 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8496 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8497
8498 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8499 NULL_TREE);
8500 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8501 valist, f_next_o_limit, NULL_TREE);
8502 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8503 valist, f_next_fp, NULL_TREE);
8504 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8505 valist, f_next_fp_limit, NULL_TREE);
8506 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8507 valist, f_next_stack, NULL_TREE);
8508
8509 /* Structures with a single member with a distinct mode are passed
8510 like their member. This is relevant if the latter has a REAL_TYPE
8511 or COMPLEX_TYPE type. */
8512 eff_type = type;
8513 while (TREE_CODE (eff_type) == RECORD_TYPE
8514 && (member = find_sole_member (eff_type))
8515 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8516 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8517 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8518 {
8519 tree field_type = TREE_TYPE (member);
8520
8521 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8522 eff_type = field_type;
8523 else
8524 {
8525 gcc_assert ((TYPE_ALIGN (eff_type)
8526 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8527 || (TYPE_ALIGN (eff_type)
8528 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8529 break;
8530 }
8531 }
8532
8533 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8534 {
8535 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8536 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8537 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8538 && size <= 16));
8539 }
8540 else
8541 {
8542 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8543 }
8544
8545 addr = create_tmp_var (pptr_type_node, NULL);
8546 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8547 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8548
8549 valist = build_simple_mem_ref (addr);
8550
8551 if (pass_as_float)
8552 {
8553 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8554 tree cmp;
8555 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8556
8557 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8558 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8559
8560 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8561 tmp = next_fp_limit;
8562 if (size > 4 && !is_double)
8563 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8564 tmp = build2 (GE_EXPR, boolean_type_node,
8565 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8566 cmp = build3 (COND_EXPR, void_type_node, tmp,
8567 build1 (GOTO_EXPR, void_type_node,
8568 unshare_expr (lab_false)), NULL_TREE);
8569 if (!is_double)
8570 gimplify_and_add (cmp, pre_p);
8571
8572 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8573 || (is_double || size == 16))
8574 {
8575 tmp = fold_convert (sizetype, next_fp_tmp);
8576 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8577 size_int (UNITS_PER_WORD));
8578 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8579 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8580 }
8581 if (is_double)
8582 gimplify_and_add (cmp, pre_p);
8583
8584 #ifdef FUNCTION_ARG_SCmode_WART
8585 if (TYPE_MODE (eff_type) == SCmode
8586 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8587 {
8588 tree subtype = TREE_TYPE (eff_type);
8589 tree real, imag;
8590
8591 imag
8592 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8593 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8594
8595 real
8596 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8597 real = get_initialized_tmp_var (real, pre_p, NULL);
8598
8599 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8600 if (type != eff_type)
8601 result = build1 (VIEW_CONVERT_EXPR, type, result);
8602 result = get_initialized_tmp_var (result, pre_p, NULL);
8603 }
8604 #endif /* FUNCTION_ARG_SCmode_WART */
8605
8606 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8607 gimplify_and_add (tmp, pre_p);
8608
8609 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8610 gimplify_and_add (tmp, pre_p);
8611
8612 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8613 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8614 gimplify_assign (unshare_expr (next_fp_tmp),
8615 unshare_expr (valist), pre_p);
8616
8617 gimplify_assign (unshare_expr (valist),
8618 unshare_expr (next_fp_tmp), post_p);
8619 valist = next_fp_tmp;
8620 }
8621 else
8622 {
8623 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8624 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8625 unshare_expr (next_o_limit));
8626 tmp = build3 (COND_EXPR, void_type_node, tmp,
8627 build1 (GOTO_EXPR, void_type_node,
8628 unshare_expr (lab_false)),
8629 NULL_TREE);
8630 gimplify_and_add (tmp, pre_p);
8631
8632 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8633 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8634
8635 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8636 gimplify_and_add (tmp, pre_p);
8637
8638 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8639 gimplify_and_add (tmp, pre_p);
8640
8641 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8642 gimplify_assign (unshare_expr (next_o),
8643 unshare_expr (next_o_limit), pre_p);
8644
8645 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8646 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8647 }
8648
8649 if (!result)
8650 {
8651 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8652 gimplify_and_add (tmp, pre_p);
8653 }
8654 }
8655
8656 /* ??? In va-sh.h, there had been code to make values larger than
8657 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8658
8659 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8660 if (result)
8661 {
8662 gimplify_assign (result, tmp, pre_p);
8663 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8664 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8665 gimplify_and_add (tmp, pre_p);
8666 }
8667 else
8668 result = tmp;
8669
8670 if (pass_by_ref)
8671 result = build_va_arg_indirect_ref (result);
8672
8673 return result;
8674 }
8675
8676 /* 64 bit floating points memory transfers are paired single precision loads
8677 or store. So DWARF information needs fixing in little endian (unless
8678 PR=SZ=1 in FPSCR). */
8679 rtx
8680 sh_dwarf_register_span (rtx reg)
8681 {
8682 unsigned regno = REGNO (reg);
8683
8684 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8685 return NULL_RTX;
8686
8687 return
8688 gen_rtx_PARALLEL (VOIDmode,
8689 gen_rtvec (2,
8690 gen_rtx_REG (SFmode, regno + 1),
8691 gen_rtx_REG (SFmode, regno)));
8692 }
8693
8694 static enum machine_mode
8695 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8696 int *punsignedp, const_tree funtype,
8697 int for_return)
8698 {
8699 if (sh_promote_prototypes (funtype))
8700 return promote_mode (type, mode, punsignedp);
8701 else
8702 return default_promote_function_mode (type, mode, punsignedp, funtype,
8703 for_return);
8704 }
8705
8706 static bool
8707 sh_promote_prototypes (const_tree type)
8708 {
8709 if (TARGET_HITACHI)
8710 return false;
8711 if (! type)
8712 return true;
8713 return ! sh_attr_renesas_p (type);
8714 }
8715
8716 /* Whether an argument must be passed by reference. On SHcompact, we
8717 pretend arguments wider than 32-bits that would have been passed in
8718 registers are passed by reference, so that an SHmedia trampoline
8719 loads them into the full 64-bits registers. */
8720 static int
8721 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8722 const_tree type, bool named)
8723 {
8724 unsigned HOST_WIDE_INT size;
8725
8726 if (type)
8727 size = int_size_in_bytes (type);
8728 else
8729 size = GET_MODE_SIZE (mode);
8730
8731 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8732 && (!named
8733 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8734 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8735 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8736 && size > 4
8737 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8738 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8739 return size;
8740 else
8741 return 0;
8742 }
8743
8744 static bool
8745 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8746 const_tree type, bool named)
8747 {
8748 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8749
8750 if (targetm.calls.must_pass_in_stack (mode, type))
8751 return true;
8752
8753 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8754 wants to know about pass-by-reference semantics for incoming
8755 arguments. */
8756 if (! cum)
8757 return false;
8758
8759 if (TARGET_SHCOMPACT)
8760 {
8761 cum->byref = shcompact_byref (cum, mode, type, named);
8762 return cum->byref != 0;
8763 }
8764
8765 return false;
8766 }
8767
8768 static bool
8769 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8770 const_tree type, bool named ATTRIBUTE_UNUSED)
8771 {
8772 /* ??? How can it possibly be correct to return true only on the
8773 caller side of the equation? Is there someplace else in the
8774 sh backend that's magically producing the copies? */
8775 return (get_cumulative_args (cum)->outgoing
8776 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8777 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8778 }
8779
8780 static int
8781 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8782 tree type, bool named ATTRIBUTE_UNUSED)
8783 {
8784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8785 int words = 0;
8786
8787 if (!TARGET_SH5
8788 && PASS_IN_REG_P (*cum, mode, type)
8789 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8790 && (ROUND_REG (*cum, mode)
8791 + (mode != BLKmode
8792 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8793 : ROUND_ADVANCE (int_size_in_bytes (type)))
8794 > NPARM_REGS (mode)))
8795 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8796
8797 else if (!TARGET_SHCOMPACT
8798 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8799 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8800
8801 return words * UNITS_PER_WORD;
8802 }
8803
8804
8805 /* Define where to put the arguments to a function.
8806 Value is zero to push the argument on the stack,
8807 or a hard register in which to store the argument.
8808
8809 MODE is the argument's machine mode.
8810 TYPE is the data type of the argument (as a tree).
8811 This is null for libcalls where that information may
8812 not be available.
8813 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8814 the preceding args and about the function being called.
8815 NAMED is nonzero if this argument is a named parameter
8816 (otherwise it is an extra parameter matching an ellipsis).
8817
8818 On SH the first args are normally in registers
8819 and the rest are pushed. Any arg that starts within the first
8820 NPARM_REGS words is at least partially passed in a register unless
8821 its data type forbids. */
8822 static rtx
8823 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8824 const_tree type, bool named)
8825 {
8826 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8827
8828 if (! TARGET_SH5 && mode == VOIDmode)
8829 return GEN_INT (ca->renesas_abi ? 1 : 0);
8830
8831 if (! TARGET_SH5
8832 && PASS_IN_REG_P (*ca, mode, type)
8833 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8834 {
8835 int regno;
8836
8837 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8838 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8839 {
8840 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8841 gen_rtx_REG (SFmode,
8842 BASE_ARG_REG (mode)
8843 + (ROUND_REG (*ca, mode) ^ 1)),
8844 const0_rtx);
8845 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8846 gen_rtx_REG (SFmode,
8847 BASE_ARG_REG (mode)
8848 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8849 GEN_INT (4));
8850 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8851 }
8852
8853 /* If the alignment of a DF value causes an SF register to be
8854 skipped, we will use that skipped register for the next SF
8855 value. */
8856 if ((TARGET_HITACHI || ca->renesas_abi)
8857 && ca->free_single_fp_reg
8858 && mode == SFmode)
8859 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8860
8861 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8862 ^ (mode == SFmode && TARGET_SH4
8863 && TARGET_LITTLE_ENDIAN
8864 && ! TARGET_HITACHI && ! ca->renesas_abi);
8865 return gen_rtx_REG (mode, regno);
8866
8867 }
8868
8869 if (TARGET_SH5)
8870 {
8871 if (mode == VOIDmode && TARGET_SHCOMPACT)
8872 return GEN_INT (ca->call_cookie);
8873
8874 /* The following test assumes unnamed arguments are promoted to
8875 DFmode. */
8876 if (mode == SFmode && ca->free_single_fp_reg)
8877 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8878
8879 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8880 && (named || ! ca->prototype_p)
8881 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8882 {
8883 if (! ca->prototype_p && TARGET_SHMEDIA)
8884 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8885
8886 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8887 FIRST_FP_PARM_REG
8888 + ca->arg_count[(int) SH_ARG_FLOAT]);
8889 }
8890
8891 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8892 && (! TARGET_SHCOMPACT
8893 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8894 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8895 type, named))))
8896 {
8897 return gen_rtx_REG (mode, (FIRST_PARM_REG
8898 + ca->arg_count[(int) SH_ARG_INT]));
8899 }
8900
8901 return NULL_RTX;
8902 }
8903
8904 return NULL_RTX;
8905 }
8906
8907 /* Update the data in CUM to advance over an argument
8908 of mode MODE and data type TYPE.
8909 (TYPE is null for libcalls where that information may not be
8910 available.) */
8911 static void
8912 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8913 const_tree type, bool named)
8914 {
8915 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8916
8917 if (ca->force_mem)
8918 ca->force_mem = 0;
8919 else if (TARGET_SH5)
8920 {
8921 const_tree type2 = (ca->byref && type
8922 ? TREE_TYPE (type)
8923 : type);
8924 enum machine_mode mode2 = (ca->byref && type
8925 ? TYPE_MODE (type2)
8926 : mode);
8927 int dwords = ((ca->byref
8928 ? ca->byref
8929 : mode2 == BLKmode
8930 ? int_size_in_bytes (type2)
8931 : GET_MODE_SIZE (mode2)) + 7) / 8;
8932 int numregs = MIN (dwords, NPARM_REGS (SImode)
8933 - ca->arg_count[(int) SH_ARG_INT]);
8934
8935 if (numregs)
8936 {
8937 ca->arg_count[(int) SH_ARG_INT] += numregs;
8938 if (TARGET_SHCOMPACT
8939 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8940 {
8941 ca->call_cookie
8942 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8943 - numregs, 1);
8944 /* N.B. We want this also for outgoing. */
8945 ca->stack_regs += numregs;
8946 }
8947 else if (ca->byref)
8948 {
8949 if (! ca->outgoing)
8950 ca->stack_regs += numregs;
8951 ca->byref_regs += numregs;
8952 ca->byref = 0;
8953 do
8954 ca->call_cookie
8955 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8956 - numregs, 2);
8957 while (--numregs);
8958 ca->call_cookie
8959 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8960 - 1, 1);
8961 }
8962 else if (dwords > numregs)
8963 {
8964 int pushregs = numregs;
8965
8966 if (TARGET_SHCOMPACT)
8967 ca->stack_regs += numregs;
8968 while (pushregs < NPARM_REGS (SImode) - 1
8969 && (CALL_COOKIE_INT_REG_GET
8970 (ca->call_cookie,
8971 NPARM_REGS (SImode) - pushregs)
8972 == 1))
8973 {
8974 ca->call_cookie
8975 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8976 - pushregs, 1);
8977 pushregs++;
8978 }
8979 if (numregs == NPARM_REGS (SImode))
8980 ca->call_cookie
8981 |= CALL_COOKIE_INT_REG (0, 1)
8982 | CALL_COOKIE_STACKSEQ (numregs - 1);
8983 else
8984 ca->call_cookie
8985 |= CALL_COOKIE_STACKSEQ (numregs);
8986 }
8987 }
8988 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8989 && (named || ! ca->prototype_p))
8990 {
8991 if (mode2 == SFmode && ca->free_single_fp_reg)
8992 ca->free_single_fp_reg = 0;
8993 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8994 < NPARM_REGS (SFmode))
8995 {
8996 int numfpregs
8997 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8998 NPARM_REGS (SFmode)
8999 - ca->arg_count[(int) SH_ARG_FLOAT]);
9000
9001 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9002
9003 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9004 {
9005 if (ca->outgoing && numregs > 0)
9006 do
9007 {
9008 ca->call_cookie
9009 |= (CALL_COOKIE_INT_REG
9010 (ca->arg_count[(int) SH_ARG_INT]
9011 - numregs + ((numfpregs - 2) / 2),
9012 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9013 - numfpregs) / 2));
9014 }
9015 while (numfpregs -= 2);
9016 }
9017 else if (mode2 == SFmode && (named)
9018 && (ca->arg_count[(int) SH_ARG_FLOAT]
9019 < NPARM_REGS (SFmode)))
9020 ca->free_single_fp_reg
9021 = FIRST_FP_PARM_REG - numfpregs
9022 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9023 }
9024 }
9025 return;
9026 }
9027
9028 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9029 {
9030 /* Note that we've used the skipped register. */
9031 if (mode == SFmode && ca->free_single_fp_reg)
9032 {
9033 ca->free_single_fp_reg = 0;
9034 return;
9035 }
9036 /* When we have a DF after an SF, there's an SF register that get
9037 skipped in order to align the DF value. We note this skipped
9038 register, because the next SF value will use it, and not the
9039 SF that follows the DF. */
9040 if (mode == DFmode
9041 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9042 {
9043 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9044 + BASE_ARG_REG (mode));
9045 }
9046 }
9047
9048 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9049 || PASS_IN_REG_P (*ca, mode, type))
9050 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9051 = (ROUND_REG (*ca, mode)
9052 + (mode == BLKmode
9053 ? ROUND_ADVANCE (int_size_in_bytes (type))
9054 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9055 }
9056
9057 /* The Renesas calling convention doesn't quite fit into this scheme since
9058 the address is passed like an invisible argument, but one that is always
9059 passed in memory. */
9060 static rtx
9061 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9062 {
9063 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9064 return NULL_RTX;
9065 return gen_rtx_REG (Pmode, 2);
9066 }
9067
9068 /* Worker function for TARGET_FUNCTION_VALUE.
9069
9070 For the SH, this is like LIBCALL_VALUE, except that we must change the
9071 mode like PROMOTE_MODE does.
9072 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9073 tested here has to be kept in sync with the one in
9074 explow.c:promote_mode. */
9075 static rtx
9076 sh_function_value (const_tree valtype,
9077 const_tree fn_decl_or_type,
9078 bool outgoing ATTRIBUTE_UNUSED)
9079 {
9080 if (fn_decl_or_type
9081 && !DECL_P (fn_decl_or_type))
9082 fn_decl_or_type = NULL;
9083
9084 return gen_rtx_REG (
9085 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9086 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9087 && (TREE_CODE (valtype) == INTEGER_TYPE
9088 || TREE_CODE (valtype) == ENUMERAL_TYPE
9089 || TREE_CODE (valtype) == BOOLEAN_TYPE
9090 || TREE_CODE (valtype) == REAL_TYPE
9091 || TREE_CODE (valtype) == OFFSET_TYPE))
9092 && sh_promote_prototypes (fn_decl_or_type)
9093 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9094 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9095 }
9096
9097 /* Worker function for TARGET_LIBCALL_VALUE. */
9098 static rtx
9099 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9100 {
9101 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9102 }
9103
9104 /* Return true if N is a possible register number of function value. */
9105 static bool
9106 sh_function_value_regno_p (const unsigned int regno)
9107 {
9108 return ((regno) == FIRST_RET_REG
9109 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9110 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9111 }
9112
9113 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9114 static bool
9115 sh_return_in_memory (const_tree type, const_tree fndecl)
9116 {
9117 if (TARGET_SH5)
9118 {
9119 if (TYPE_MODE (type) == BLKmode)
9120 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9121 else
9122 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9123 }
9124 else
9125 {
9126 return (TYPE_MODE (type) == BLKmode
9127 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9128 && TREE_CODE (type) == RECORD_TYPE));
9129 }
9130 }
9131
9132 /* We actually emit the code in sh_expand_prologue. We used to use
9133 a static variable to flag that we need to emit this code, but that
9134 doesn't when inlining, when functions are deferred and then emitted
9135 later. Fortunately, we already have two flags that are part of struct
9136 function that tell if a function uses varargs or stdarg. */
9137 static void
9138 sh_setup_incoming_varargs (cumulative_args_t ca,
9139 enum machine_mode mode,
9140 tree type,
9141 int *pretend_arg_size,
9142 int second_time ATTRIBUTE_UNUSED)
9143 {
9144 gcc_assert (cfun->stdarg);
9145 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9146 {
9147 int named_parm_regs, anon_parm_regs;
9148
9149 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9150 + (mode == BLKmode
9151 ? ROUND_ADVANCE (int_size_in_bytes (type))
9152 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9153 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9154 if (anon_parm_regs > 0)
9155 *pretend_arg_size = anon_parm_regs * 4;
9156 }
9157 }
9158
9159 static bool
9160 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9161 {
9162 return TARGET_SH5;
9163 }
9164
9165 static bool
9166 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9167 {
9168 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9169
9170 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9171 }
9172
9173
9174 /* Define the offset between two registers, one to be eliminated, and
9175 the other its replacement, at the start of a routine. */
9176 int
9177 initial_elimination_offset (int from, int to)
9178 {
9179 int regs_saved;
9180 int regs_saved_rounding = 0;
9181 int total_saved_regs_space;
9182 int total_auto_space;
9183 int save_flags = target_flags;
9184 int copy_flags;
9185 HARD_REG_SET live_regs_mask;
9186
9187 shmedia_space_reserved_for_target_registers = false;
9188 regs_saved = calc_live_regs (&live_regs_mask);
9189 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9190
9191 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9192 {
9193 shmedia_space_reserved_for_target_registers = true;
9194 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9195 }
9196
9197 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9198 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9199 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9200
9201 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9202 copy_flags = target_flags;
9203 target_flags = save_flags;
9204
9205 total_saved_regs_space = regs_saved + regs_saved_rounding;
9206
9207 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9208 return total_saved_regs_space + total_auto_space
9209 + crtl->args.info.byref_regs * 8;
9210
9211 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9212 return total_saved_regs_space + total_auto_space
9213 + crtl->args.info.byref_regs * 8;
9214
9215 /* Initial gap between fp and sp is 0. */
9216 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9217 return 0;
9218
9219 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9220 return rounded_frame_size (0);
9221
9222 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9223 return rounded_frame_size (0);
9224
9225 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9226 && (to == HARD_FRAME_POINTER_REGNUM
9227 || to == STACK_POINTER_REGNUM));
9228 if (TARGET_SH5)
9229 {
9230 int n = total_saved_regs_space;
9231 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9232 save_schedule schedule;
9233 save_entry *entry;
9234
9235 n += total_auto_space;
9236
9237 /* If it wasn't saved, there's not much we can do. */
9238 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9239 return n;
9240
9241 target_flags = copy_flags;
9242
9243 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9244 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9245 if (entry->reg == pr_reg)
9246 {
9247 target_flags = save_flags;
9248 return entry->offset;
9249 }
9250 gcc_unreachable ();
9251 }
9252 else
9253 return total_auto_space;
9254 }
9255
9256 /* Parse the -mfixed-range= option string. */
9257 void
9258 sh_fix_range (const char *const_str)
9259 {
9260 int i, first, last;
9261 char *str, *dash, *comma;
9262
9263 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9264 REG2 are either register names or register numbers. The effect
9265 of this option is to mark the registers in the range from REG1 to
9266 REG2 as ``fixed'' so they won't be used by the compiler. */
9267
9268 i = strlen (const_str);
9269 str = (char *) alloca (i + 1);
9270 memcpy (str, const_str, i + 1);
9271
9272 while (1)
9273 {
9274 dash = strchr (str, '-');
9275 if (!dash)
9276 {
9277 warning (0, "value of -mfixed-range must have form REG1-REG2");
9278 return;
9279 }
9280 *dash = '\0';
9281 comma = strchr (dash + 1, ',');
9282 if (comma)
9283 *comma = '\0';
9284
9285 first = decode_reg_name (str);
9286 if (first < 0)
9287 {
9288 warning (0, "unknown register name: %s", str);
9289 return;
9290 }
9291
9292 last = decode_reg_name (dash + 1);
9293 if (last < 0)
9294 {
9295 warning (0, "unknown register name: %s", dash + 1);
9296 return;
9297 }
9298
9299 *dash = '-';
9300
9301 if (first > last)
9302 {
9303 warning (0, "%s-%s is an empty range", str, dash + 1);
9304 return;
9305 }
9306
9307 for (i = first; i <= last; ++i)
9308 fixed_regs[i] = call_used_regs[i] = 1;
9309
9310 if (!comma)
9311 break;
9312
9313 *comma = ',';
9314 str = comma + 1;
9315 }
9316 }
9317 \f
9318 /* Insert any deferred function attributes from earlier pragmas. */
9319 static void
9320 sh_insert_attributes (tree node, tree *attributes)
9321 {
9322 tree attrs;
9323
9324 if (TREE_CODE (node) != FUNCTION_DECL)
9325 return;
9326
9327 /* We are only interested in fields. */
9328 if (!DECL_P (node))
9329 return;
9330
9331 /* Append the attributes to the deferred attributes. */
9332 *sh_deferred_function_attributes_tail = *attributes;
9333 attrs = sh_deferred_function_attributes;
9334 if (!attrs)
9335 return;
9336
9337 /* Some attributes imply or require the interrupt attribute. */
9338 if (!lookup_attribute ("interrupt_handler", attrs)
9339 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9340 {
9341 /* If we have a trapa_handler, but no interrupt_handler attribute,
9342 insert an interrupt_handler attribute. */
9343 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9344 /* We can't use sh_pr_interrupt here because that's not in the
9345 java frontend. */
9346 attrs
9347 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9348 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9349 if the interrupt attribute is missing, we ignore the attribute
9350 and warn. */
9351 else if (lookup_attribute ("sp_switch", attrs)
9352 || lookup_attribute ("trap_exit", attrs)
9353 || lookup_attribute ("nosave_low_regs", attrs)
9354 || lookup_attribute ("resbank", attrs))
9355 {
9356 tree *tail;
9357
9358 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9359 {
9360 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9361 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9362 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9363 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9364 warning (OPT_Wattributes,
9365 "%qE attribute only applies to interrupt functions",
9366 TREE_PURPOSE (attrs));
9367 else
9368 {
9369 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9370 NULL_TREE);
9371 tail = &TREE_CHAIN (*tail);
9372 }
9373 }
9374 attrs = *attributes;
9375 }
9376 }
9377
9378 /* Install the processed list. */
9379 *attributes = attrs;
9380
9381 /* Clear deferred attributes. */
9382 sh_deferred_function_attributes = NULL_TREE;
9383 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9384
9385 return;
9386 }
9387
9388 /*------------------------------------------------------------------------------
9389 Target specific attributes
9390 Supported attributes are:
9391
9392 * interrupt_handler
9393 Specifies this function is an interrupt handler.
9394
9395 * trapa_handler
9396 Like interrupt_handler, but don't save all registers.
9397
9398 * sp_switch
9399 Specifies an alternate stack for an interrupt handler to run on.
9400
9401 * trap_exit
9402 Use a trapa to exit an interrupt function instead of rte.
9403
9404 * nosave_low_regs
9405 Don't save r0..r7 in an interrupt handler function.
9406 This is useful on SH3* and SH4*, which have a separate set of low
9407 regs for user and privileged modes.
9408 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9409 those that run with interrupts disabled and thus can't be
9410 interrupted thenselves).
9411
9412 * renesas
9413 Use Renesas calling/layout conventions (functions and structures).
9414
9415 * resbank
9416 In case of an interrupt handler function, use a register bank to
9417 save registers R0-R14, MACH, MACL, GBR and PR.
9418 This is available only on SH2A targets.
9419
9420 * function_vector
9421 Declares a function to be called using the TBR relative addressing
9422 mode. Takes an argument that specifies the slot number in the table
9423 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9424 */
9425
9426 /* Handle a 'resbank' attribute. */
9427 static tree
9428 sh_handle_resbank_handler_attribute (tree * node, tree name,
9429 tree args ATTRIBUTE_UNUSED,
9430 int flags ATTRIBUTE_UNUSED,
9431 bool * no_add_attrs)
9432 {
9433 if (!TARGET_SH2A)
9434 {
9435 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9436 name);
9437 *no_add_attrs = true;
9438 }
9439 if (TREE_CODE (*node) != FUNCTION_DECL)
9440 {
9441 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9442 name);
9443 *no_add_attrs = true;
9444 }
9445
9446 return NULL_TREE;
9447 }
9448
9449 /* Handle an "interrupt_handler" attribute; arguments as in
9450 struct attribute_spec.handler. */
9451 static tree
9452 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9453 tree args ATTRIBUTE_UNUSED,
9454 int flags ATTRIBUTE_UNUSED,
9455 bool *no_add_attrs)
9456 {
9457 if (TREE_CODE (*node) != FUNCTION_DECL)
9458 {
9459 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9460 name);
9461 *no_add_attrs = true;
9462 }
9463 else if (TARGET_SHCOMPACT)
9464 {
9465 error ("attribute interrupt_handler is not compatible with -m5-compact");
9466 *no_add_attrs = true;
9467 }
9468
9469 return NULL_TREE;
9470 }
9471
9472 /* Handle an 'function_vector' attribute; arguments as in
9473 struct attribute_spec.handler. */
9474 static tree
9475 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9476 tree args ATTRIBUTE_UNUSED,
9477 int flags ATTRIBUTE_UNUSED,
9478 bool * no_add_attrs)
9479 {
9480 if (!TARGET_SH2A)
9481 {
9482 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9483 name);
9484 *no_add_attrs = true;
9485 }
9486 else if (TREE_CODE (*node) != FUNCTION_DECL)
9487 {
9488 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9489 name);
9490 *no_add_attrs = true;
9491 }
9492 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9493 {
9494 /* The argument must be a constant integer. */
9495 warning (OPT_Wattributes,
9496 "%qE attribute argument not an integer constant",
9497 name);
9498 *no_add_attrs = true;
9499 }
9500 else if (tree_to_hwi (TREE_VALUE (args)) > 255)
9501 {
9502 /* The argument value must be between 0 to 255. */
9503 warning (OPT_Wattributes,
9504 "%qE attribute argument should be between 0 to 255",
9505 name);
9506 *no_add_attrs = true;
9507 }
9508 return NULL_TREE;
9509 }
9510
9511 /* Returns true if current function has been assigned the attribute
9512 'function_vector'. */
9513 bool
9514 sh2a_is_function_vector_call (rtx x)
9515 {
9516 if (GET_CODE (x) == SYMBOL_REF
9517 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9518 {
9519 tree tr = SYMBOL_REF_DECL (x);
9520
9521 if (sh2a_function_vector_p (tr))
9522 return true;
9523 }
9524
9525 return false;
9526 }
9527
9528 /* Returns the function vector number, if the attribute
9529 'function_vector' is assigned, otherwise returns zero. */
9530 int
9531 sh2a_get_function_vector_number (rtx x)
9532 {
9533 int num;
9534 tree list, t;
9535
9536 if ((GET_CODE (x) == SYMBOL_REF)
9537 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9538 {
9539 t = SYMBOL_REF_DECL (x);
9540
9541 if (TREE_CODE (t) != FUNCTION_DECL)
9542 return 0;
9543
9544 list = SH_ATTRIBUTES (t);
9545 while (list)
9546 {
9547 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9548 {
9549 num = tree_to_hwi (TREE_VALUE (TREE_VALUE (list)));
9550 return num;
9551 }
9552
9553 list = TREE_CHAIN (list);
9554 }
9555
9556 return 0;
9557 }
9558 else
9559 return 0;
9560 }
9561
9562 /* Handle an "sp_switch" attribute; arguments as in
9563 struct attribute_spec.handler. */
9564 static tree
9565 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9566 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9567 {
9568 if (TREE_CODE (*node) != FUNCTION_DECL)
9569 {
9570 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9571 name);
9572 *no_add_attrs = true;
9573 }
9574 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9575 {
9576 /* The argument must be a constant string. */
9577 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9578 name);
9579 *no_add_attrs = true;
9580 }
9581
9582 return NULL_TREE;
9583 }
9584
9585 /* Handle an "trap_exit" attribute; arguments as in
9586 struct attribute_spec.handler. */
9587 static tree
9588 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9589 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9590 {
9591 if (TREE_CODE (*node) != FUNCTION_DECL)
9592 {
9593 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9594 name);
9595 *no_add_attrs = true;
9596 }
9597 /* The argument specifies a trap number to be used in a trapa instruction
9598 at function exit (instead of an rte instruction). */
9599 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9600 {
9601 /* The argument must be a constant integer. */
9602 warning (OPT_Wattributes, "%qE attribute argument not an "
9603 "integer constant", name);
9604 *no_add_attrs = true;
9605 }
9606
9607 return NULL_TREE;
9608 }
9609
9610 static tree
9611 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9612 tree name ATTRIBUTE_UNUSED,
9613 tree args ATTRIBUTE_UNUSED,
9614 int flags ATTRIBUTE_UNUSED,
9615 bool *no_add_attrs ATTRIBUTE_UNUSED)
9616 {
9617 return NULL_TREE;
9618 }
9619
9620 /* True if __attribute__((renesas)) or -mrenesas. */
9621 bool
9622 sh_attr_renesas_p (const_tree td)
9623 {
9624 if (TARGET_HITACHI)
9625 return true;
9626 if (td == NULL_TREE)
9627 return false;
9628 if (DECL_P (td))
9629 td = TREE_TYPE (td);
9630 if (td == error_mark_node)
9631 return false;
9632 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9633 != NULL_TREE);
9634 }
9635
9636 /* True if __attribute__((renesas)) or -mrenesas, for the current
9637 function. */
9638 bool
9639 sh_cfun_attr_renesas_p (void)
9640 {
9641 return sh_attr_renesas_p (current_function_decl);
9642 }
9643
9644 /* Returns true if the current function has the "interrupt_handler"
9645 attribute set. */
9646 bool
9647 sh_cfun_interrupt_handler_p (void)
9648 {
9649 return (lookup_attribute ("interrupt_handler",
9650 DECL_ATTRIBUTES (current_function_decl))
9651 != NULL_TREE);
9652 }
9653
9654 /* Returns true if FUNC has been assigned the attribute
9655 "function_vector". */
9656 bool
9657 sh2a_function_vector_p (tree func)
9658 {
9659 tree list;
9660 if (TREE_CODE (func) != FUNCTION_DECL)
9661 return false;
9662
9663 list = SH_ATTRIBUTES (func);
9664 while (list)
9665 {
9666 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9667 return true;
9668
9669 list = TREE_CHAIN (list);
9670 }
9671 return false;
9672 }
9673
9674 /* Returns true if given tree has the "resbank" attribute set. */
9675 bool
9676 sh_cfun_resbank_handler_p (void)
9677 {
9678 return ((lookup_attribute ("resbank",
9679 DECL_ATTRIBUTES (current_function_decl))
9680 != NULL_TREE)
9681 && (lookup_attribute ("interrupt_handler",
9682 DECL_ATTRIBUTES (current_function_decl))
9683 != NULL_TREE) && TARGET_SH2A);
9684 }
9685
9686 /* Returns true if the current function has a "trap_exit" attribute set. */
9687 bool
9688 sh_cfun_trap_exit_p (void)
9689 {
9690 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9691 != NULL_TREE;
9692 }
9693
9694 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9695 static const char *
9696 sh_check_pch_target_flags (int old_flags)
9697 {
9698 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9699 | MASK_SH_E | MASK_HARD_SH4
9700 | MASK_FPU_SINGLE | MASK_SH4))
9701 return _("created and used with different architectures / ABIs");
9702 if ((old_flags ^ target_flags) & MASK_HITACHI)
9703 return _("created and used with different ABIs");
9704 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9705 return _("created and used with different endianness");
9706 return NULL;
9707 }
9708 \f
9709 /* Predicates used by the templates. */
9710
9711 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9712 Used only in general_movsrc_operand. */
9713 bool
9714 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9715 {
9716 switch (REGNO (op))
9717 {
9718 case PR_REG:
9719 case MACL_REG:
9720 case MACH_REG:
9721 return true;
9722 }
9723 return false;
9724 }
9725
9726 /* Returns true if OP is a floating point value with value 0.0. */
9727 bool
9728 fp_zero_operand (rtx op)
9729 {
9730 REAL_VALUE_TYPE r;
9731
9732 if (GET_MODE (op) != SFmode)
9733 return false;
9734
9735 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9736 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9737 }
9738
9739 /* Returns true if OP is a floating point value with value 1.0. */
9740 bool
9741 fp_one_operand (rtx op)
9742 {
9743 REAL_VALUE_TYPE r;
9744
9745 if (GET_MODE (op) != SFmode)
9746 return false;
9747
9748 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9749 return REAL_VALUES_EQUAL (r, dconst1);
9750 }
9751
9752 /* In general mode switching is used. If we are
9753 compiling without -mfmovd, movsf_ie isn't taken into account for
9754 mode switching. We could check in machine_dependent_reorg for
9755 cases where we know we are in single precision mode, but there is
9756 interface to find that out during reload, so we must avoid
9757 choosing an fldi alternative during reload and thus failing to
9758 allocate a scratch register for the constant loading. */
9759 bool
9760 fldi_ok (void)
9761 {
9762 return true;
9763 }
9764
9765 /* Return the TLS type for TLS symbols. */
9766 enum tls_model
9767 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9768 {
9769 if (GET_CODE (op) != SYMBOL_REF)
9770 return TLS_MODEL_NONE;
9771 return SYMBOL_REF_TLS_MODEL (op);
9772 }
9773 \f
9774 /* Return the destination address of a branch. */
9775 static int
9776 branch_dest (rtx branch)
9777 {
9778 rtx dest = SET_SRC (PATTERN (branch));
9779 int dest_uid;
9780
9781 if (GET_CODE (dest) == IF_THEN_ELSE)
9782 dest = XEXP (dest, 1);
9783 dest = XEXP (dest, 0);
9784 dest_uid = INSN_UID (dest);
9785 return INSN_ADDRESSES (dest_uid);
9786 }
9787 \f
9788 /* Return nonzero if REG is not used after INSN.
9789 We assume REG is a reload reg, and therefore does
9790 not live past labels. It may live past calls or jumps though. */
9791 bool
9792 reg_unused_after (rtx reg, rtx insn)
9793 {
9794 enum rtx_code code;
9795 rtx set;
9796
9797 /* If the reg is set by this instruction, then it is safe for our
9798 case. Disregard the case where this is a store to memory, since
9799 we are checking a register used in the store address. */
9800 set = single_set (insn);
9801 if (set && !MEM_P (SET_DEST (set))
9802 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9803 return true;
9804
9805 while ((insn = NEXT_INSN (insn)))
9806 {
9807 rtx set;
9808 if (!INSN_P (insn))
9809 continue;
9810
9811 code = GET_CODE (insn);
9812
9813 #if 0
9814 /* If this is a label that existed before reload, then the register
9815 is dead here. However, if this is a label added by reorg, then
9816 the register may still be live here. We can't tell the difference,
9817 so we just ignore labels completely. */
9818 if (code == CODE_LABEL)
9819 return 1;
9820 /* else */
9821 #endif
9822
9823 if (code == JUMP_INSN)
9824 return false;
9825
9826 /* If this is a sequence, we must handle them all at once.
9827 We could have for instance a call that sets the target register,
9828 and an insn in a delay slot that uses the register. In this case,
9829 we must return 0. */
9830 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9831 {
9832 int i;
9833 int retval = 0;
9834
9835 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9836 {
9837 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9838 rtx set = single_set (this_insn);
9839
9840 if (CALL_P (this_insn))
9841 code = CALL_INSN;
9842 else if (JUMP_P (this_insn))
9843 {
9844 if (INSN_ANNULLED_BRANCH_P (this_insn))
9845 return false;
9846 code = JUMP_INSN;
9847 }
9848
9849 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9850 return false;
9851 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9852 {
9853 if (!MEM_P (SET_DEST (set)))
9854 retval = true;
9855 else
9856 return false;
9857 }
9858 if (set == NULL_RTX
9859 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9860 return false;
9861 }
9862 if (retval == 1)
9863 return true;
9864 else if (code == JUMP_INSN)
9865 return false;
9866 }
9867
9868 set = single_set (insn);
9869 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9870 return false;
9871 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9872 return !MEM_P (SET_DEST (set));
9873 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9874 return false;
9875
9876 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9877 return true;
9878 }
9879 return true;
9880 }
9881 \f
9882 #include "ggc.h"
9883
9884 static GTY(()) rtx t_reg_rtx;
9885 rtx
9886 get_t_reg_rtx (void)
9887 {
9888 if (! t_reg_rtx)
9889 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9890 return t_reg_rtx;
9891 }
9892
9893 static GTY(()) rtx fpscr_rtx;
9894 rtx
9895 get_fpscr_rtx (void)
9896 {
9897 if (! fpscr_rtx)
9898 {
9899 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9900 REG_USERVAR_P (fpscr_rtx) = 1;
9901 mark_user_reg (fpscr_rtx);
9902 }
9903 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9904 mark_user_reg (fpscr_rtx);
9905 return fpscr_rtx;
9906 }
9907
9908 static GTY(()) tree fpscr_values;
9909
9910 static void
9911 emit_fpu_switch (rtx scratch, int index)
9912 {
9913 rtx dst, src;
9914
9915 if (fpscr_values == NULL)
9916 {
9917 tree t;
9918
9919 t = build_index_type (integer_one_node);
9920 t = build_array_type (integer_type_node, t);
9921 t = build_decl (BUILTINS_LOCATION,
9922 VAR_DECL, get_identifier ("__fpscr_values"), t);
9923 DECL_ARTIFICIAL (t) = 1;
9924 DECL_IGNORED_P (t) = 1;
9925 DECL_EXTERNAL (t) = 1;
9926 TREE_STATIC (t) = 1;
9927 TREE_PUBLIC (t) = 1;
9928 TREE_USED (t) = 1;
9929
9930 fpscr_values = t;
9931 }
9932
9933 src = DECL_RTL (fpscr_values);
9934 if (!can_create_pseudo_p ())
9935 {
9936 emit_move_insn (scratch, XEXP (src, 0));
9937 if (index != 0)
9938 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9939 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9940 }
9941 else
9942 src = adjust_address (src, PSImode, index * 4);
9943
9944 dst = get_fpscr_rtx ();
9945 emit_move_insn (dst, src);
9946 }
9947
9948 void
9949 emit_sf_insn (rtx pat)
9950 {
9951 emit_insn (pat);
9952 }
9953
9954 void
9955 emit_df_insn (rtx pat)
9956 {
9957 emit_insn (pat);
9958 }
9959
9960 void
9961 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9962 {
9963 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9964 }
9965
9966 void
9967 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9968 {
9969 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9970 get_fpscr_rtx ()));
9971 }
9972
9973 void
9974 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9975 {
9976 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9977 }
9978
9979 void
9980 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9981 {
9982 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9983 get_fpscr_rtx ()));
9984 }
9985 \f
9986 static rtx get_free_reg (HARD_REG_SET);
9987
9988 /* This function returns a register to use to load the address to load
9989 the fpscr from. Currently it always returns r1 or r7, but when we are
9990 able to use pseudo registers after combine, or have a better mechanism
9991 for choosing a register, it should be done here. */
9992 /* REGS_LIVE is the liveness information for the point for which we
9993 need this allocation. In some bare-bones exit blocks, r1 is live at the
9994 start. We can even have all of r0..r3 being live:
9995 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9996 INSN before which new insns are placed with will clobber the register
9997 we return. If a basic block consists only of setting the return value
9998 register to a pseudo and using that register, the return value is not
9999 live before or after this block, yet we we'll insert our insns right in
10000 the middle. */
10001 static rtx
10002 get_free_reg (HARD_REG_SET regs_live)
10003 {
10004 if (! TEST_HARD_REG_BIT (regs_live, 1))
10005 return gen_rtx_REG (Pmode, 1);
10006
10007 /* Hard reg 1 is live; since this is a small register classes target,
10008 there shouldn't be anything but a jump before the function end. */
10009 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10010 return gen_rtx_REG (Pmode, 7);
10011 }
10012
10013 /* This function will set the fpscr from memory.
10014 MODE is the mode we are setting it to. */
10015 void
10016 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10017 {
10018 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10019 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10020 rtx addr_reg;
10021
10022 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10023 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10024 }
10025
10026 /* Is the given character a logical line separator for the assembler? */
10027 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10028 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10029 #endif
10030
10031 static bool
10032 sequence_insn_p (rtx insn)
10033 {
10034 rtx prev, next;
10035
10036 prev = PREV_INSN (insn);
10037 if (prev == NULL)
10038 return false;
10039
10040 next = NEXT_INSN (prev);
10041 if (next == NULL)
10042 return false;
10043
10044 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10045 }
10046
10047 int
10048 sh_insn_length_adjustment (rtx insn)
10049 {
10050 /* Instructions with unfilled delay slots take up an extra two bytes for
10051 the nop in the delay slot. */
10052 if (((NONJUMP_INSN_P (insn)
10053 && GET_CODE (PATTERN (insn)) != USE
10054 && GET_CODE (PATTERN (insn)) != CLOBBER)
10055 || CALL_P (insn) || JUMP_P (insn))
10056 && ! sequence_insn_p (insn)
10057 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10058 return 2;
10059
10060 /* SH2e has a bug that prevents the use of annulled branches, so if
10061 the delay slot is not filled, we'll have to put a NOP in it. */
10062 if (sh_cpu_attr == CPU_SH2E
10063 && JUMP_P (insn)
10064 && get_attr_type (insn) == TYPE_CBRANCH
10065 && ! sequence_insn_p (insn))
10066 return 2;
10067
10068 /* sh-dsp parallel processing insn take four bytes instead of two. */
10069
10070 if (NONJUMP_INSN_P (insn))
10071 {
10072 int sum = 0;
10073 rtx body = PATTERN (insn);
10074 const char *templ;
10075 char c;
10076 bool maybe_label = true;
10077
10078 if (GET_CODE (body) == ASM_INPUT)
10079 templ = XSTR (body, 0);
10080 else if (asm_noperands (body) >= 0)
10081 templ
10082 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10083 else
10084 return 0;
10085 do
10086 {
10087 int ppi_adjust = 0;
10088
10089 do
10090 c = *templ++;
10091 while (c == ' ' || c == '\t');
10092 /* all sh-dsp parallel-processing insns start with p.
10093 The only non-ppi sh insn starting with p is pref.
10094 The only ppi starting with pr is prnd. */
10095 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10096 ppi_adjust = 2;
10097 /* The repeat pseudo-insn expands two three insns, a total of
10098 six bytes in size. */
10099 else if ((c == 'r' || c == 'R')
10100 && ! strncasecmp ("epeat", templ, 5))
10101 ppi_adjust = 4;
10102 while (c && c != '\n'
10103 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10104 {
10105 /* If this is a label, it is obviously not a ppi insn. */
10106 if (c == ':' && maybe_label)
10107 {
10108 ppi_adjust = 0;
10109 break;
10110 }
10111 else if (c == '\'' || c == '"')
10112 maybe_label = false;
10113 c = *templ++;
10114 }
10115 sum += ppi_adjust;
10116 maybe_label = c != ':';
10117 }
10118 while (c);
10119 return sum;
10120 }
10121 return 0;
10122 }
10123 \f
10124 /* Return TRUE for a valid displacement for the REG+disp addressing
10125 with MODE. */
10126 bool
10127 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10128 bool allow_zero)
10129 {
10130 if (! CONST_INT_P (op))
10131 return false;
10132
10133 if (TARGET_SHMEDIA)
10134 {
10135 int size;
10136
10137 /* Check if this is the address of an unaligned load / store. */
10138 if (mode == VOIDmode)
10139 return satisfies_constraint_I06 (op);
10140
10141 size = GET_MODE_SIZE (mode);
10142 return (!(INTVAL (op) & (size - 1))
10143 && INTVAL (op) >= -512 * size
10144 && INTVAL (op) < 512 * size);
10145 }
10146 else
10147 {
10148 const HOST_WIDE_INT offset = INTVAL (op);
10149 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
10150 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10151
10152 /* If the mode does not support any displacement always return false.
10153 Even though an index of '0' is actually always valid, it will cause
10154 troubles when e.g. a DFmode move is split into two SFmode moves,
10155 where one SFmode move will have index '0' and the other move will
10156 have index '4'. */
10157 if (!allow_zero && max_disp < 1)
10158 return false;
10159
10160 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10161 }
10162 }
10163
10164 /* Recognize an RTL expression that is a valid memory address for
10165 an instruction.
10166 The MODE argument is the machine mode for the MEM expression
10167 that wants to use this address.
10168 Allow REG
10169 REG+disp
10170 REG+r0
10171 REG++
10172 --REG
10173 GBR
10174 GBR+disp */
10175 static bool
10176 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10177 {
10178 if (REG_P (x) && REGNO (x) == GBR_REG)
10179 return true;
10180
10181 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10182 return true;
10183 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10184 && ! TARGET_SHMEDIA
10185 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10186 return true;
10187 else if (GET_CODE (x) == PLUS
10188 && (mode != PSImode || reload_completed))
10189 {
10190 rtx xop0 = XEXP (x, 0);
10191 rtx xop1 = XEXP (x, 1);
10192
10193 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10194 return gbr_displacement (xop1, mode);
10195
10196 if (GET_MODE_SIZE (mode) <= 8
10197 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10198 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10199 return true;
10200
10201 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10202 || ((xop0 == stack_pointer_rtx
10203 || xop0 == hard_frame_pointer_rtx)
10204 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10205 || ((xop1 == stack_pointer_rtx
10206 || xop1 == hard_frame_pointer_rtx)
10207 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10208 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10209 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10210 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10211 && TARGET_FMOVD && mode == DFmode)))
10212 {
10213 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10214 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10215 return true;
10216 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10217 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10218 return true;
10219 }
10220 }
10221
10222 return false;
10223 }
10224 \f
10225 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10226 isn't protected by a PIC unspec. */
10227 bool
10228 nonpic_symbol_mentioned_p (rtx x)
10229 {
10230 const char *fmt;
10231 int i;
10232
10233 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10234 || GET_CODE (x) == PC)
10235 return true;
10236
10237 /* We don't want to look into the possible MEM location of a
10238 CONST_DOUBLE, since we're not going to use it, in general. */
10239 if (GET_CODE (x) == CONST_DOUBLE)
10240 return false;
10241
10242 if (GET_CODE (x) == UNSPEC
10243 && (XINT (x, 1) == UNSPEC_PIC
10244 || XINT (x, 1) == UNSPEC_GOT
10245 || XINT (x, 1) == UNSPEC_GOTOFF
10246 || XINT (x, 1) == UNSPEC_GOTPLT
10247 || XINT (x, 1) == UNSPEC_GOTTPOFF
10248 || XINT (x, 1) == UNSPEC_DTPOFF
10249 || XINT (x, 1) == UNSPEC_TPOFF
10250 || XINT (x, 1) == UNSPEC_PLT
10251 || XINT (x, 1) == UNSPEC_SYMOFF
10252 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10253 return false;
10254
10255 fmt = GET_RTX_FORMAT (GET_CODE (x));
10256 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10257 {
10258 if (fmt[i] == 'E')
10259 {
10260 int j;
10261 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10262 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10263 return true;
10264 }
10265 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10266 return true;
10267 }
10268
10269 return false;
10270 }
10271
10272 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10273 @GOTOFF in `reg'. */
10274 rtx
10275 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10276 rtx reg)
10277 {
10278 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10279 return orig;
10280
10281 if (GET_CODE (orig) == LABEL_REF
10282 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10283 {
10284 if (reg == NULL_RTX)
10285 reg = gen_reg_rtx (Pmode);
10286
10287 emit_insn (gen_symGOTOFF2reg (reg, orig));
10288 return reg;
10289 }
10290 else if (GET_CODE (orig) == SYMBOL_REF)
10291 {
10292 if (reg == NULL_RTX)
10293 reg = gen_reg_rtx (Pmode);
10294
10295 emit_insn (gen_symGOT2reg (reg, orig));
10296 return reg;
10297 }
10298 return orig;
10299 }
10300
10301 /* Given a (logical) mode size and an offset in bytes, try to find a the
10302 appropriate displacement value for a mov insn. On SH the displacements
10303 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10304 15 bytes in QImode. To compensate this we create a new base address by
10305 adding an adjustment value to it.
10306
10307 If the originally requested offset is greater than 127 we prefer using
10308 values 124..127 over 128..131 to increase opportunities to use the
10309 add #imm, Rn insn.
10310
10311 In some cases it is possible that a requested offset might seem unaligned
10312 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10313 This is compensated by adjusting the base address so that the effective
10314 address of the displacement move insn will be aligned.
10315
10316 This is not the best possible way of rebasing the base address, as it
10317 does not look at other present displacement addressings around it.
10318 In some cases this can create more base address adjustments than would
10319 actually be necessary. */
10320 struct disp_adjust
10321 {
10322 rtx offset_adjust;
10323 rtx mov_disp;
10324 };
10325
10326 static struct disp_adjust
10327 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10328 {
10329 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10330
10331 /* Do not try to use SH2A's large displacements here, because this would
10332 effectively disable the small displacement insns. */
10333 const int mode_sz = GET_MODE_SIZE (mode);
10334 const int mov_insn_sz = mov_insn_size (mode, false);
10335 const int max_disp = max_mov_insn_displacement (mode, false);
10336 const int max_disp_next = max_disp + mov_insn_sz;
10337 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10338 HOST_WIDE_INT offset_adjust;
10339
10340 /* In some cases this actually does happen and we must check for it. */
10341 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10342 return res;
10343
10344 /* Keeps the previous behavior for QImode displacement addressing.
10345 This just decides how the offset is re-based. Removing this special
10346 case will result in slightly bigger code on average, but it's not that
10347 bad actually. */
10348 if (mov_insn_sz == 1)
10349 align_modifier = 0;
10350
10351 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10352
10353 if (mode_sz + offset - offset_adjust <= max_disp_next)
10354 {
10355 res.offset_adjust = GEN_INT (offset_adjust);
10356 res.mov_disp = GEN_INT (offset - offset_adjust);
10357 }
10358
10359 return res;
10360 }
10361
10362 /* Try to modify an illegitimate address and make it legitimate.
10363 If we find one, return the new, valid address.
10364 Otherwise, return the original address. */
10365 static rtx
10366 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10367 {
10368 if (flag_pic)
10369 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10370
10371 if (TARGET_SHMEDIA)
10372 return x;
10373
10374 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10375 || (TARGET_SH2E && mode == SFmode))
10376 return x;
10377
10378 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10379 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10380 {
10381 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10382 INTVAL (XEXP (x, 1)));
10383
10384 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10385 {
10386 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10387 adj.offset_adjust, NULL_RTX, 0,
10388 OPTAB_LIB_WIDEN);
10389 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10390 }
10391 }
10392
10393 return x;
10394 }
10395
10396 /* Attempt to replace *p, which is an address that needs reloading, with
10397 a valid memory address for an operand of mode MODE.
10398 Like for sh_legitimize_address, for the SH we try to get a normal form
10399 of the address. That will allow inheritance of the address reloads. */
10400 bool
10401 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10402 int itype)
10403 {
10404 enum reload_type type = (enum reload_type) itype;
10405 const int mode_sz = GET_MODE_SIZE (mode);
10406
10407 if (TARGET_SHMEDIA)
10408 return false;
10409
10410 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10411 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10412 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10413 && (ALLOW_INDEXED_ADDRESS
10414 || XEXP (*p, 0) == stack_pointer_rtx
10415 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10416 {
10417 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10418 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10419
10420 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10421 {
10422 push_reload (*p, NULL_RTX, p, NULL,
10423 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10424 return true;
10425 }
10426
10427 if (TARGET_SH2E && mode == SFmode)
10428 {
10429 *p = copy_rtx (*p);
10430 push_reload (*p, NULL_RTX, p, NULL,
10431 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10432 return true;
10433 }
10434
10435 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10436 moves because then reload has a problem figuring the constraint
10437 that the move insn target/source reg must be R0.
10438 Or maybe some handling is wrong in sh_secondary_reload for this
10439 to work properly? */
10440 if ((mode_sz == 4 || mode_sz == 8)
10441 && ! (TARGET_SH4 && mode == DFmode)
10442 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10443 {
10444 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10445 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10446 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10447 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10448 return true;
10449 }
10450 }
10451
10452 /* We must re-recognize what we created before. */
10453 if (GET_CODE (*p) == PLUS
10454 && (mode_sz == 4 || mode_sz == 8)
10455 && GET_CODE (XEXP (*p, 0)) == PLUS
10456 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10457 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10458 && CONST_INT_P (XEXP (*p, 1))
10459 && ! (TARGET_SH2E && mode == SFmode))
10460 {
10461 /* Because this address is so complex, we know it must have
10462 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10463 it is already unshared, and needs no further unsharing. */
10464 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10465 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10466 return true;
10467 }
10468
10469 return false;
10470 }
10471
10472 /* In the name of slightly smaller debug output, and to cater to
10473 general assembler lossage, recognize various UNSPEC sequences
10474 and turn them back into a direct symbol reference. */
10475 static rtx
10476 sh_delegitimize_address (rtx orig_x)
10477 {
10478 rtx x, y;
10479
10480 orig_x = delegitimize_mem_from_attrs (orig_x);
10481
10482 x = orig_x;
10483 if (MEM_P (x))
10484 x = XEXP (x, 0);
10485 if (GET_CODE (x) == CONST)
10486 {
10487 y = XEXP (x, 0);
10488 if (GET_CODE (y) == UNSPEC)
10489 {
10490 if (XINT (y, 1) == UNSPEC_GOT
10491 || XINT (y, 1) == UNSPEC_GOTOFF
10492 || XINT (y, 1) == UNSPEC_SYMOFF)
10493 return XVECEXP (y, 0, 0);
10494 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10495 {
10496 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10497 {
10498 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10499
10500 if (GET_CODE (symplt) == UNSPEC
10501 && XINT (symplt, 1) == UNSPEC_PLT)
10502 return XVECEXP (symplt, 0, 0);
10503 }
10504 }
10505 else if (TARGET_SHMEDIA
10506 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10507 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10508 {
10509 rtx offset = XVECEXP (y, 0, 1);
10510
10511 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10512 if (MEM_P (orig_x))
10513 x = replace_equiv_address_nv (orig_x, x);
10514 return x;
10515 }
10516 }
10517 }
10518
10519 return orig_x;
10520 }
10521
10522 /* Mark the use of a constant in the literal table. If the constant
10523 has multiple labels, make it unique. */
10524 static rtx
10525 mark_constant_pool_use (rtx x)
10526 {
10527 rtx insn, lab, pattern;
10528
10529 if (x == NULL_RTX)
10530 return x;
10531
10532 switch (GET_CODE (x))
10533 {
10534 case LABEL_REF:
10535 x = XEXP (x, 0);
10536 case CODE_LABEL:
10537 break;
10538 default:
10539 return x;
10540 }
10541
10542 /* Get the first label in the list of labels for the same constant
10543 and delete another labels in the list. */
10544 lab = x;
10545 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10546 {
10547 if (!LABEL_P (insn)
10548 || LABEL_REFS (insn) != NEXT_INSN (insn))
10549 break;
10550 lab = insn;
10551 }
10552
10553 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10554 INSN_DELETED_P (insn) = 1;
10555
10556 /* Mark constants in a window. */
10557 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10558 {
10559 if (!NONJUMP_INSN_P (insn))
10560 continue;
10561
10562 pattern = PATTERN (insn);
10563 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10564 continue;
10565
10566 switch (XINT (pattern, 1))
10567 {
10568 case UNSPECV_CONST2:
10569 case UNSPECV_CONST4:
10570 case UNSPECV_CONST8:
10571 XVECEXP (pattern, 0, 1) = const1_rtx;
10572 break;
10573 case UNSPECV_WINDOW_END:
10574 if (XVECEXP (pattern, 0, 0) == x)
10575 return lab;
10576 break;
10577 case UNSPECV_CONST_END:
10578 return lab;
10579 default:
10580 break;
10581 }
10582 }
10583
10584 return lab;
10585 }
10586 \f
10587 /* Return true if it's possible to redirect BRANCH1 to the destination
10588 of an unconditional jump BRANCH2. We only want to do this if the
10589 resulting branch will have a short displacement. */
10590 bool
10591 sh_can_redirect_branch (rtx branch1, rtx branch2)
10592 {
10593 if (flag_expensive_optimizations && simplejump_p (branch2))
10594 {
10595 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10596 rtx insn;
10597 int distance;
10598
10599 for (distance = 0, insn = NEXT_INSN (branch1);
10600 insn && distance < 256;
10601 insn = PREV_INSN (insn))
10602 {
10603 if (insn == dest)
10604 return true;
10605 else
10606 distance += get_attr_length (insn);
10607 }
10608 for (distance = 0, insn = NEXT_INSN (branch1);
10609 insn && distance < 256;
10610 insn = NEXT_INSN (insn))
10611 {
10612 if (insn == dest)
10613 return true;
10614 else
10615 distance += get_attr_length (insn);
10616 }
10617 }
10618 return false;
10619 }
10620
10621 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10622 bool
10623 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10624 unsigned int new_reg)
10625 {
10626 /* Interrupt functions can only use registers that have already been
10627 saved by the prologue, even if they would normally be
10628 call-clobbered. */
10629 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10630 return false;
10631
10632 return true;
10633 }
10634
10635 /* Function to update the integer COST
10636 based on the relationship between INSN that is dependent on
10637 DEP_INSN through the dependence LINK. The default is to make no
10638 adjustment to COST. This can be used for example to specify to
10639 the scheduler that an output- or anti-dependence does not incur
10640 the same cost as a data-dependence. The return value should be
10641 the new value for COST. */
10642 static int
10643 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10644 {
10645 rtx reg, use_pat;
10646
10647 if (TARGET_SHMEDIA)
10648 {
10649 /* On SHmedia, if the dependence is an anti-dependence or
10650 output-dependence, there is no cost. */
10651 if (REG_NOTE_KIND (link) != 0)
10652 {
10653 /* However, dependencies between target register loads and
10654 uses of the register in a subsequent block that are separated
10655 by a conditional branch are not modelled - we have to do with
10656 the anti-dependency between the target register load and the
10657 conditional branch that ends the current block. */
10658 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10659 && GET_CODE (PATTERN (dep_insn)) == SET
10660 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10661 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10662 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10663 {
10664 int orig_cost = cost;
10665 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10666 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10667 ? insn : JUMP_LABEL (insn));
10668 /* On the likely path, the branch costs 1, on the unlikely path,
10669 it costs 3. */
10670 cost--;
10671 do
10672 target = next_active_insn (target);
10673 while (target && ! flow_dependent_p (target, dep_insn)
10674 && --cost > 0);
10675 /* If two branches are executed in immediate succession, with the
10676 first branch properly predicted, this causes a stall at the
10677 second branch, hence we won't need the target for the
10678 second branch for two cycles after the launch of the first
10679 branch. */
10680 if (cost > orig_cost - 2)
10681 cost = orig_cost - 2;
10682 }
10683 else
10684 cost = 0;
10685 }
10686
10687 else if (get_attr_is_mac_media (insn)
10688 && get_attr_is_mac_media (dep_insn))
10689 cost = 1;
10690
10691 else if (! reload_completed
10692 && GET_CODE (PATTERN (insn)) == SET
10693 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10694 && GET_CODE (PATTERN (dep_insn)) == SET
10695 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10696 && cost < 4)
10697 cost = 4;
10698 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10699 that is needed at the target. */
10700 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10701 && ! flow_dependent_p (insn, dep_insn))
10702 cost--;
10703 }
10704 else if (REG_NOTE_KIND (link) == 0)
10705 {
10706 enum attr_type type;
10707 rtx dep_set;
10708
10709 if (recog_memoized (insn) < 0
10710 || recog_memoized (dep_insn) < 0)
10711 return cost;
10712
10713 dep_set = single_set (dep_insn);
10714
10715 /* The latency that we specify in the scheduling description refers
10716 to the actual output, not to an auto-increment register; for that,
10717 the latency is one. */
10718 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10719 {
10720 rtx set = single_set (insn);
10721
10722 if (set
10723 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10724 && (!MEM_P (SET_DEST (set))
10725 || !reg_mentioned_p (SET_DEST (dep_set),
10726 XEXP (SET_DEST (set), 0))))
10727 cost = 1;
10728 }
10729 /* The only input for a call that is timing-critical is the
10730 function's address. */
10731 if (CALL_P (insn))
10732 {
10733 rtx call = get_call_rtx_from (insn);
10734 if (call
10735 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10736 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10737 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10738 cost -= TARGET_SH4_300 ? 3 : 6;
10739 }
10740 /* Likewise, the most timing critical input for an sfuncs call
10741 is the function address. However, sfuncs typically start
10742 using their arguments pretty quickly.
10743 Assume a four cycle delay for SH4 before they are needed.
10744 Cached ST40-300 calls are quicker, so assume only a one
10745 cycle delay there.
10746 ??? Maybe we should encode the delays till input registers
10747 are needed by sfuncs into the sfunc call insn. */
10748 /* All sfunc calls are parallels with at least four components.
10749 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10750 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10751 && XVECLEN (PATTERN (insn), 0) >= 4
10752 && (reg = sfunc_uses_reg (insn)))
10753 {
10754 if (! reg_set_p (reg, dep_insn))
10755 cost -= TARGET_SH4_300 ? 1 : 4;
10756 }
10757 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10758 {
10759 enum attr_type dep_type = get_attr_type (dep_insn);
10760
10761 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10762 cost--;
10763 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10764 && (type = get_attr_type (insn)) != TYPE_CALL
10765 && type != TYPE_SFUNC)
10766 cost--;
10767 /* When the preceding instruction loads the shift amount of
10768 the following SHAD/SHLD, the latency of the load is increased
10769 by 1 cycle. */
10770 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10771 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10772 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10773 XEXP (SET_SRC (single_set (insn)),
10774 1)))
10775 cost++;
10776 /* When an LS group instruction with a latency of less than
10777 3 cycles is followed by a double-precision floating-point
10778 instruction, FIPR, or FTRV, the latency of the first
10779 instruction is increased to 3 cycles. */
10780 else if (cost < 3
10781 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10782 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10783 cost = 3;
10784 /* The lsw register of a double-precision computation is ready one
10785 cycle earlier. */
10786 else if (reload_completed
10787 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10788 && (use_pat = single_set (insn))
10789 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10790 SET_SRC (use_pat)))
10791 cost -= 1;
10792
10793 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10794 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10795 cost -= 1;
10796 }
10797 else if (TARGET_SH4_300)
10798 {
10799 /* Stores need their input register two cycles later. */
10800 if (dep_set && cost >= 1
10801 && ((type = get_attr_type (insn)) == TYPE_STORE
10802 || type == TYPE_PSTORE
10803 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10804 {
10805 rtx set = single_set (insn);
10806
10807 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10808 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10809 {
10810 cost -= 2;
10811 /* But don't reduce the cost below 1 if the address depends
10812 on a side effect of dep_insn. */
10813 if (cost < 1
10814 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10815 cost = 1;
10816 }
10817 }
10818 }
10819 }
10820 /* An anti-dependence penalty of two applies if the first insn is a double
10821 precision fadd / fsub / fmul. */
10822 else if (!TARGET_SH4_300
10823 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10824 && recog_memoized (dep_insn) >= 0
10825 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10826 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10827 /* A lot of alleged anti-flow dependences are fake,
10828 so check this one is real. */
10829 && flow_dependent_p (dep_insn, insn))
10830 cost = 2;
10831
10832 return cost;
10833 }
10834
10835 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10836 if DEP_INSN is anti-flow dependent on INSN. */
10837 static bool
10838 flow_dependent_p (rtx insn, rtx dep_insn)
10839 {
10840 rtx tmp = PATTERN (insn);
10841
10842 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10843 return tmp == NULL_RTX;
10844 }
10845
10846 /* A helper function for flow_dependent_p called through note_stores. */
10847 static void
10848 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10849 {
10850 rtx * pinsn = (rtx *) data;
10851
10852 if (*pinsn && reg_referenced_p (x, *pinsn))
10853 *pinsn = NULL_RTX;
10854 }
10855
10856 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10857 'special function' patterns (type sfunc) that clobber pr, but that
10858 do not look like function calls to leaf_function_p. Hence we must
10859 do this extra check. */
10860 static int
10861 sh_pr_n_sets (void)
10862 {
10863 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10864 }
10865
10866 /* Return where to allocate pseudo for a given hard register initial
10867 value. */
10868 static rtx
10869 sh_allocate_initial_value (rtx hard_reg)
10870 {
10871 rtx x;
10872
10873 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10874 {
10875 if (crtl->is_leaf
10876 && ! sh_pr_n_sets ()
10877 && ! (TARGET_SHCOMPACT
10878 && ((crtl->args.info.call_cookie
10879 & ~ CALL_COOKIE_RET_TRAMP (1))
10880 || crtl->saves_all_registers)))
10881 x = hard_reg;
10882 else
10883 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10884 }
10885 else
10886 x = NULL_RTX;
10887
10888 return x;
10889 }
10890
10891 /* This function returns "2" to indicate dual issue for the SH4
10892 processor. To be used by the DFA pipeline description. */
10893 static int
10894 sh_issue_rate (void)
10895 {
10896 if (TARGET_SUPERSCALAR)
10897 return 2;
10898 else
10899 return 1;
10900 }
10901
10902 /* Functions for ready queue reordering for sched1. */
10903
10904 /* Get weight for mode for a set x. */
10905 static short
10906 find_set_regmode_weight (rtx x, enum machine_mode mode)
10907 {
10908 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10909 return 1;
10910 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10911 {
10912 if (REG_P (SET_DEST (x)))
10913 {
10914 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10915 return 1;
10916 else
10917 return 0;
10918 }
10919 return 1;
10920 }
10921 return 0;
10922 }
10923
10924 /* Get regmode weight for insn. */
10925 static short
10926 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10927 {
10928 short reg_weight = 0;
10929 rtx x;
10930
10931 /* Increment weight for each register born here. */
10932 x = PATTERN (insn);
10933 reg_weight += find_set_regmode_weight (x, mode);
10934 if (GET_CODE (x) == PARALLEL)
10935 {
10936 int j;
10937 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10938 {
10939 x = XVECEXP (PATTERN (insn), 0, j);
10940 reg_weight += find_set_regmode_weight (x, mode);
10941 }
10942 }
10943 /* Decrement weight for each register that dies here. */
10944 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10945 {
10946 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10947 {
10948 rtx note = XEXP (x, 0);
10949 if (REG_P (note) && GET_MODE (note) == mode)
10950 reg_weight--;
10951 }
10952 }
10953 return reg_weight;
10954 }
10955
10956 /* Calculate regmode weights for all insns of a basic block. */
10957 static void
10958 find_regmode_weight (basic_block b, enum machine_mode mode)
10959 {
10960 rtx insn, next_tail, head, tail;
10961
10962 get_ebb_head_tail (b, b, &head, &tail);
10963 next_tail = NEXT_INSN (tail);
10964
10965 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10966 {
10967 /* Handle register life information. */
10968 if (!INSN_P (insn))
10969 continue;
10970
10971 if (mode == SFmode)
10972 INSN_REGMODE_WEIGHT (insn, mode) =
10973 find_insn_regmode_weight (insn, mode)
10974 + 2 * find_insn_regmode_weight (insn, DFmode);
10975 else if (mode == SImode)
10976 INSN_REGMODE_WEIGHT (insn, mode) =
10977 find_insn_regmode_weight (insn, mode)
10978 + 2 * find_insn_regmode_weight (insn, DImode);
10979 }
10980 }
10981
10982 /* Comparison function for ready queue sorting. */
10983 static int
10984 rank_for_reorder (const void *x, const void *y)
10985 {
10986 rtx tmp = *(const rtx *) y;
10987 rtx tmp2 = *(const rtx *) x;
10988
10989 /* The insn in a schedule group should be issued the first. */
10990 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10991 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10992
10993 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10994 minimizes instruction movement, thus minimizing sched's effect on
10995 register pressure. */
10996 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10997 }
10998
10999 /* Resort the array A in which only element at index N may be out of order. */
11000 static void
11001 swap_reorder (rtx *a, int n)
11002 {
11003 rtx insn = a[n - 1];
11004 int i = n - 2;
11005
11006 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11007 {
11008 a[i + 1] = a[i];
11009 i -= 1;
11010 }
11011 a[i + 1] = insn;
11012 }
11013
11014 /* Sort the ready list by ascending priority. */
11015 static void
11016 ready_reorder (rtx *ready, int nready)
11017 {
11018 if (nready == 2)
11019 swap_reorder (ready, nready);
11020 else if (nready > 2)
11021 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11022 }
11023
11024 /* Count life regions of r0 for a block. */
11025 static int
11026 find_r0_life_regions (basic_block b)
11027 {
11028 rtx end, insn;
11029 rtx pset;
11030 rtx r0_reg;
11031 int live;
11032 int set;
11033 int death = 0;
11034
11035 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11036 {
11037 set = 1;
11038 live = 1;
11039 }
11040 else
11041 {
11042 set = 0;
11043 live = 0;
11044 }
11045
11046 insn = BB_HEAD (b);
11047 end = BB_END (b);
11048 r0_reg = gen_rtx_REG (SImode, R0_REG);
11049 while (1)
11050 {
11051 if (INSN_P (insn))
11052 {
11053 if (find_regno_note (insn, REG_DEAD, R0_REG))
11054 {
11055 death++;
11056 live = 0;
11057 }
11058 if (!live
11059 && (pset = single_set (insn))
11060 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11061 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11062 {
11063 set++;
11064 live = 1;
11065 }
11066 }
11067 if (insn == end)
11068 break;
11069 insn = NEXT_INSN (insn);
11070 }
11071 return set - death;
11072 }
11073
11074 /* Calculate regmode weights for all insns of all basic block. */
11075 static void
11076 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11077 int verbose ATTRIBUTE_UNUSED,
11078 int old_max_uid)
11079 {
11080 basic_block b;
11081
11082 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11083 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11084 r0_life_regions = 0;
11085
11086 FOR_EACH_BB_REVERSE (b)
11087 {
11088 find_regmode_weight (b, SImode);
11089 find_regmode_weight (b, SFmode);
11090 if (!reload_completed)
11091 r0_life_regions += find_r0_life_regions (b);
11092 }
11093
11094 CURR_REGMODE_PRESSURE (SImode) = 0;
11095 CURR_REGMODE_PRESSURE (SFmode) = 0;
11096 }
11097
11098 /* Cleanup. */
11099 static void
11100 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11101 int verbose ATTRIBUTE_UNUSED)
11102 {
11103 if (regmode_weight[0])
11104 {
11105 free (regmode_weight[0]);
11106 regmode_weight[0] = NULL;
11107 }
11108 if (regmode_weight[1])
11109 {
11110 free (regmode_weight[1]);
11111 regmode_weight[1] = NULL;
11112 }
11113 }
11114
11115 /* The scalar modes supported differs from the default version in TImode
11116 for 32-bit SHMEDIA. */
11117 static bool
11118 sh_scalar_mode_supported_p (enum machine_mode mode)
11119 {
11120 if (TARGET_SHMEDIA32 && mode == TImode)
11121 return false;
11122
11123 return default_scalar_mode_supported_p (mode);
11124 }
11125
11126 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11127 keep count of register pressures on SImode and SFmode. */
11128 static int
11129 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11130 int sched_verbose ATTRIBUTE_UNUSED,
11131 rtx insn,
11132 int can_issue_more)
11133 {
11134 if (GET_CODE (PATTERN (insn)) != USE
11135 && GET_CODE (PATTERN (insn)) != CLOBBER)
11136 cached_can_issue_more = can_issue_more - 1;
11137 else
11138 cached_can_issue_more = can_issue_more;
11139
11140 if (reload_completed)
11141 return cached_can_issue_more;
11142
11143 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11144 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11145
11146 return cached_can_issue_more;
11147 }
11148
11149 static void
11150 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11151 int verbose ATTRIBUTE_UNUSED,
11152 int veclen ATTRIBUTE_UNUSED)
11153 {
11154 CURR_REGMODE_PRESSURE (SImode) = 0;
11155 CURR_REGMODE_PRESSURE (SFmode) = 0;
11156 }
11157
11158 /* Some magic numbers. */
11159 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11160 functions that already have high pressure on r0. */
11161 #define R0_MAX_LIFE_REGIONS 2
11162 /* Register Pressure thresholds for SImode and SFmode registers. */
11163 #define SIMODE_MAX_WEIGHT 5
11164 #define SFMODE_MAX_WEIGHT 10
11165
11166 /* Return true if the pressure is high for MODE. */
11167 static bool
11168 high_pressure (enum machine_mode mode)
11169 {
11170 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11171 functions that already have high pressure on r0. */
11172 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11173 return true;
11174
11175 if (mode == SFmode)
11176 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11177 else
11178 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11179 }
11180
11181 /* Reorder ready queue if register pressure is high. */
11182 static int
11183 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11184 int sched_verbose ATTRIBUTE_UNUSED,
11185 rtx *ready,
11186 int *n_readyp,
11187 int clock_var ATTRIBUTE_UNUSED)
11188 {
11189 if (reload_completed)
11190 return sh_issue_rate ();
11191
11192 if (high_pressure (SFmode) || high_pressure (SImode))
11193 {
11194 ready_reorder (ready, *n_readyp);
11195 }
11196
11197 return sh_issue_rate ();
11198 }
11199
11200 /* Skip cycles if the current register pressure is high. */
11201 static int
11202 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11203 int sched_verbose ATTRIBUTE_UNUSED,
11204 rtx *ready ATTRIBUTE_UNUSED,
11205 int *n_readyp ATTRIBUTE_UNUSED,
11206 int clock_var ATTRIBUTE_UNUSED)
11207 {
11208 if (reload_completed)
11209 return cached_can_issue_more;
11210
11211 if (high_pressure(SFmode) || high_pressure (SImode))
11212 skip_cycles = 1;
11213
11214 return cached_can_issue_more;
11215 }
11216
11217 /* Skip cycles without sorting the ready queue. This will move insn from
11218 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11219 queue by sh_reorder. */
11220
11221 /* Generally, skipping these many cycles are sufficient for all insns to move
11222 from Q -> R. */
11223 #define MAX_SKIPS 8
11224
11225 static int
11226 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11227 int sched_verbose ATTRIBUTE_UNUSED,
11228 rtx insn ATTRIBUTE_UNUSED,
11229 int last_clock_var,
11230 int clock_var,
11231 int *sort_p)
11232 {
11233 if (reload_completed)
11234 return 0;
11235
11236 if (skip_cycles)
11237 {
11238 if ((clock_var - last_clock_var) < MAX_SKIPS)
11239 {
11240 *sort_p = 0;
11241 return 1;
11242 }
11243 /* If this is the last cycle we are skipping, allow reordering of R. */
11244 if ((clock_var - last_clock_var) == MAX_SKIPS)
11245 {
11246 *sort_p = 1;
11247 return 1;
11248 }
11249 }
11250
11251 skip_cycles = 0;
11252
11253 return 0;
11254 }
11255
11256 /* SHmedia requires registers for branches, so we can't generate new
11257 branches past reload. */
11258 static bool
11259 sh_cannot_modify_jumps_p (void)
11260 {
11261 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11262 }
11263
11264 static reg_class_t
11265 sh_target_reg_class (void)
11266 {
11267 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11268 }
11269
11270 static bool
11271 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11272 {
11273 if (! shmedia_space_reserved_for_target_registers)
11274 return 0;
11275 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11276 return 0;
11277
11278 HARD_REG_SET dummy;
11279 if (calc_live_regs (&dummy) >= 6 * 8)
11280 return 1;
11281 return 0;
11282 }
11283
11284 static bool
11285 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11286 {
11287 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11288 }
11289 \f
11290 /*
11291 On the SH1..SH4, the trampoline looks like
11292 2 0002 D202 mov.l l2,r2
11293 1 0000 D301 mov.l l1,r3
11294 3 0004 422B jmp @r2
11295 4 0006 0009 nop
11296 5 0008 00000000 l1: .long area
11297 6 000c 00000000 l2: .long function
11298
11299 SH5 (compact) uses r1 instead of r3 for the static chain. */
11300
11301
11302 /* Emit RTL insns to initialize the variable parts of a trampoline.
11303 FNADDR is an RTX for the address of the function's pure code.
11304 CXT is an RTX for the static chain value for the function. */
11305 static void
11306 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11307 {
11308 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11309 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11310
11311 if (TARGET_SHMEDIA64)
11312 {
11313 rtx tramp_templ;
11314 int fixed_len;
11315
11316 rtx movi1 = GEN_INT (0xcc000010);
11317 rtx shori1 = GEN_INT (0xc8000010);
11318 rtx src, dst;
11319
11320 /* The following trampoline works within a +- 128 KB range for cxt:
11321 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11322 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11323 gettr tr1,r1; blink tr0,r63 */
11324 /* Address rounding makes it hard to compute the exact bounds of the
11325 offset for this trampoline, but we have a rather generous offset
11326 range, so frame_offset should do fine as an upper bound. */
11327 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11328 {
11329 /* ??? could optimize this trampoline initialization
11330 by writing DImode words with two insns each. */
11331 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11332 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11333 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11334 insn = gen_rtx_AND (DImode, insn, mask);
11335 /* Or in ptb/u .,tr1 pattern */
11336 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11337 insn = force_operand (insn, NULL_RTX);
11338 insn = gen_lowpart (SImode, insn);
11339 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11340 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11341 insn = gen_rtx_AND (DImode, insn, mask);
11342 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11343 insn = gen_lowpart (SImode, insn);
11344 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11345 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11346 insn = gen_rtx_AND (DImode, insn, mask);
11347 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11348 insn = gen_lowpart (SImode, insn);
11349 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11350 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11351 insn = gen_rtx_AND (DImode, insn, mask);
11352 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11353 insn = gen_lowpart (SImode, insn);
11354 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11355 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11356 insn = gen_rtx_AND (DImode, insn, mask);
11357 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11358 insn = gen_lowpart (SImode, insn);
11359 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11360 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11361 GEN_INT (0x6bf10600));
11362 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11363 GEN_INT (0x4415fc10));
11364 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11365 GEN_INT (0x4401fff0));
11366 emit_insn (gen_ic_invalidate_line (tramp));
11367 return;
11368 }
11369 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11370 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11371
11372 tramp_templ = gen_datalabel_ref (tramp_templ);
11373 dst = tramp_mem;
11374 src = gen_const_mem (BLKmode, tramp_templ);
11375 set_mem_align (dst, 256);
11376 set_mem_align (src, 64);
11377 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11378
11379 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11380 emit_move_insn (adjust_address (tramp_mem, Pmode,
11381 fixed_len + GET_MODE_SIZE (Pmode)),
11382 cxt);
11383 emit_insn (gen_ic_invalidate_line (tramp));
11384 return;
11385 }
11386 else if (TARGET_SHMEDIA)
11387 {
11388 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11389 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11390 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11391 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11392 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11393 rotated 10 right, and higher 16 bit of every 32 selected. */
11394 rtx movishori
11395 = force_reg (V2HImode, (simplify_gen_subreg
11396 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11397 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11398 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11399
11400 fnaddr = force_reg (SImode, fnaddr);
11401 cxt = force_reg (SImode, cxt);
11402 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11403 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11404 movishori));
11405 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11406 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11407 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11408 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11409 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11410 gen_rtx_SUBREG (V2HImode, cxt, 0),
11411 movishori));
11412 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11413 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11414 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11415 if (TARGET_LITTLE_ENDIAN)
11416 {
11417 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11418 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11419 }
11420 else
11421 {
11422 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11423 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11424 }
11425 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11426 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11427 emit_insn (gen_ic_invalidate_line (tramp));
11428 return;
11429 }
11430 else if (TARGET_SHCOMPACT)
11431 {
11432 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11433 return;
11434 }
11435 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11436 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11437 SImode));
11438 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11439 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11440 SImode));
11441 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11442 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11443 if (TARGET_HARD_SH4 || TARGET_SH5)
11444 {
11445 if (!TARGET_INLINE_IC_INVALIDATE
11446 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11447 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11448 FUNCTION_ORDINARY),
11449 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11450 else
11451 emit_insn (gen_ic_invalidate_line (tramp));
11452 }
11453 }
11454
11455 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11456 static rtx
11457 sh_trampoline_adjust_address (rtx tramp)
11458 {
11459 if (TARGET_SHMEDIA)
11460 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11461 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11462 return tramp;
11463 }
11464
11465 /* FIXME: This is overly conservative. A SHcompact function that
11466 receives arguments ``by reference'' will have them stored in its
11467 own stack frame, so it must not pass pointers or references to
11468 these arguments to other functions by means of sibling calls. */
11469 /* If PIC, we cannot make sibling calls to global functions
11470 because the PLT requires r12 to be live. */
11471 static bool
11472 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11473 {
11474 return (1
11475 && (! TARGET_SHCOMPACT
11476 || crtl->args.info.stack_regs == 0)
11477 && ! sh_cfun_interrupt_handler_p ()
11478 && (! flag_pic
11479 || (decl && ! TREE_PUBLIC (decl))
11480 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11481 }
11482 \f
11483 /* Machine specific built-in functions. */
11484
11485 struct builtin_description
11486 {
11487 bool (* const is_enabled) (void);
11488 const enum insn_code icode;
11489 const char *const name;
11490 int signature;
11491 tree fndecl;
11492 };
11493
11494 static bool
11495 shmedia_builtin_p (void)
11496 {
11497 return TARGET_SHMEDIA;
11498 }
11499
11500 /* This function can be used if there are any built-ins that are not for
11501 SHmedia. It's commented out to avoid the defined-but-unused warning.
11502 static bool
11503 sh1_builtin_p (void)
11504 {
11505 return TARGET_SH1;
11506 }
11507 */
11508
11509 /* describe number and signedness of arguments; arg[0] == result
11510 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11511 /* 9: 64-bit pointer, 10: 32-bit pointer */
11512 static const char signature_args[][4] =
11513 {
11514 #define SH_BLTIN_V2SI2 0
11515 { 4, 4 },
11516 #define SH_BLTIN_V4HI2 1
11517 { 4, 4 },
11518 #define SH_BLTIN_V2SI3 2
11519 { 4, 4, 4 },
11520 #define SH_BLTIN_V4HI3 3
11521 { 4, 4, 4 },
11522 #define SH_BLTIN_V8QI3 4
11523 { 4, 4, 4 },
11524 #define SH_BLTIN_MAC_HISI 5
11525 { 1, 4, 4, 1 },
11526 #define SH_BLTIN_SH_HI 6
11527 { 4, 4, 1 },
11528 #define SH_BLTIN_SH_SI 7
11529 { 4, 4, 1 },
11530 #define SH_BLTIN_V4HI2V2SI 8
11531 { 4, 4, 4 },
11532 #define SH_BLTIN_V4HI2V8QI 9
11533 { 4, 4, 4 },
11534 #define SH_BLTIN_SISF 10
11535 { 4, 2 },
11536 #define SH_BLTIN_LDUA_L 11
11537 { 2, 10 },
11538 #define SH_BLTIN_LDUA_Q 12
11539 { 1, 10 },
11540 #define SH_BLTIN_STUA_L 13
11541 { 0, 10, 2 },
11542 #define SH_BLTIN_STUA_Q 14
11543 { 0, 10, 1 },
11544 #define SH_BLTIN_LDUA_L64 15
11545 { 2, 9 },
11546 #define SH_BLTIN_LDUA_Q64 16
11547 { 1, 9 },
11548 #define SH_BLTIN_STUA_L64 17
11549 { 0, 9, 2 },
11550 #define SH_BLTIN_STUA_Q64 18
11551 { 0, 9, 1 },
11552 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11553 #define SH_BLTIN_2 19
11554 #define SH_BLTIN_SU 19
11555 { 1, 2 },
11556 #define SH_BLTIN_3 20
11557 #define SH_BLTIN_SUS 20
11558 { 2, 2, 1 },
11559 #define SH_BLTIN_PSSV 21
11560 { 0, 8, 2, 2 },
11561 #define SH_BLTIN_XXUU 22
11562 #define SH_BLTIN_UUUU 22
11563 { 1, 1, 1, 1 },
11564 #define SH_BLTIN_PV 23
11565 { 0, 8 },
11566 #define SH_BLTIN_VP 24
11567 { 8, 0 },
11568 };
11569 /* mcmv: operands considered unsigned. */
11570 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11571 /* mperm: control value considered unsigned int. */
11572 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11573 /* mshards_q: returns signed short. */
11574 /* nsb: takes long long arg, returns unsigned char. */
11575 static struct builtin_description bdesc[] =
11576 {
11577 { shmedia_builtin_p,
11578 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11579 { shmedia_builtin_p,
11580 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11581 { shmedia_builtin_p,
11582 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11583 { shmedia_builtin_p,
11584 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11585 { shmedia_builtin_p,
11586 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11587 { shmedia_builtin_p,
11588 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11589 { shmedia_builtin_p,
11590 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11591 { shmedia_builtin_p,
11592 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11593 { shmedia_builtin_p,
11594 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11595 { shmedia_builtin_p,
11596 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11597 { shmedia_builtin_p,
11598 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11599 { shmedia_builtin_p,
11600 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11601 { shmedia_builtin_p,
11602 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11603 { shmedia_builtin_p,
11604 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11605 { shmedia_builtin_p,
11606 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11607 { shmedia_builtin_p,
11608 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11609 { shmedia_builtin_p,
11610 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11611 { shmedia_builtin_p,
11612 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11613 { shmedia_builtin_p,
11614 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11615 { shmedia_builtin_p,
11616 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11617 { shmedia_builtin_p,
11618 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11619 { shmedia_builtin_p,
11620 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11621 { shmedia_builtin_p,
11622 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11623 { shmedia_builtin_p,
11624 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11625 { shmedia_builtin_p,
11626 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11627 { shmedia_builtin_p,
11628 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11629 { shmedia_builtin_p,
11630 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11631 { shmedia_builtin_p,
11632 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11633 { shmedia_builtin_p,
11634 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11635 { shmedia_builtin_p,
11636 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11637 { shmedia_builtin_p,
11638 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11639 { shmedia_builtin_p,
11640 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11641 { shmedia_builtin_p,
11642 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11643 { shmedia_builtin_p,
11644 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11645 { shmedia_builtin_p,
11646 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11647 { shmedia_builtin_p,
11648 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11649 { shmedia_builtin_p,
11650 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11651 { shmedia_builtin_p,
11652 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11653 { shmedia_builtin_p,
11654 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11655 { shmedia_builtin_p,
11656 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11657 { shmedia_builtin_p,
11658 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11659 { shmedia_builtin_p,
11660 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11661 { shmedia_builtin_p,
11662 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11663 { shmedia_builtin_p,
11664 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11665 { shmedia_builtin_p,
11666 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11667 { shmedia_builtin_p,
11668 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11669 { shmedia_builtin_p,
11670 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11671 { shmedia_builtin_p,
11672 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11673 { shmedia_builtin_p,
11674 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11675 { shmedia_builtin_p,
11676 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11677 { shmedia_builtin_p,
11678 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11679 { shmedia_builtin_p,
11680 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11681 { shmedia_builtin_p,
11682 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11683 { shmedia_builtin_p,
11684 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11685 { shmedia_builtin_p,
11686 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11687 { shmedia_builtin_p,
11688 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11689 { shmedia_builtin_p,
11690 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11743 };
11744
11745 static void
11746 sh_init_builtins (void)
11747 {
11748 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11749 memset (shared, 0, sizeof shared);
11750
11751 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11752 {
11753 builtin_description* d = &bdesc[di];
11754
11755 if (!d->is_enabled ())
11756 continue;
11757
11758 tree type, arg_type = NULL_TREE;
11759 int signature = d->signature;
11760
11761 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11762 type = shared[signature];
11763 else
11764 {
11765 int has_result = signature_args[signature][0] != 0;
11766 tree args[3];
11767
11768 if ((signature_args[signature][1] & 8)
11769 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11770 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11771 continue;
11772 if (! TARGET_FPU_ANY
11773 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11774 continue;
11775 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11776 args[i] = NULL_TREE;
11777 for (int i = 3; ; i--)
11778 {
11779 int arg = signature_args[signature][i];
11780 int opno = i - 1 + has_result;
11781
11782 if (arg & 8)
11783 arg_type = ptr_type_node;
11784 else if (arg)
11785 arg_type = (*lang_hooks.types.type_for_mode)
11786 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11787 else if (i)
11788 continue;
11789 else
11790 arg_type = void_type_node;
11791 if (i == 0)
11792 break;
11793 args[i-1] = arg_type;
11794 }
11795 type = build_function_type_list (arg_type, args[0], args[1],
11796 args[2], NULL_TREE);
11797 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11798 shared[signature] = type;
11799 }
11800 d->fndecl =
11801 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11802 NULL, NULL_TREE);
11803 }
11804 }
11805
11806 /* Implements target hook vector_mode_supported_p. */
11807 bool
11808 sh_vector_mode_supported_p (enum machine_mode mode)
11809 {
11810 if (TARGET_FPU_ANY
11811 && ((mode == V2SFmode)
11812 || (mode == V4SFmode)
11813 || (mode == V16SFmode)))
11814 return true;
11815
11816 else if (TARGET_SHMEDIA
11817 && ((mode == V8QImode)
11818 || (mode == V2HImode)
11819 || (mode == V4HImode)
11820 || (mode == V2SImode)))
11821 return true;
11822
11823 return false;
11824 }
11825
11826 bool
11827 sh_frame_pointer_required (void)
11828 {
11829 /* If needed override this in other tm.h files to cope with various OS
11830 lossage requiring a frame pointer. */
11831 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11832 return true;
11833
11834 if (crtl->profile)
11835 return true;
11836
11837 return false;
11838 }
11839
11840 /* Implements target hook dwarf_calling_convention. Return an enum
11841 of dwarf_calling_convention. */
11842 int
11843 sh_dwarf_calling_convention (const_tree func)
11844 {
11845 if (sh_attr_renesas_p (func))
11846 return DW_CC_GNU_renesas_sh;
11847
11848 return DW_CC_normal;
11849 }
11850
11851 /* Returns the sh builtin decl for CODE. */
11852 static tree
11853 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11854 {
11855 if (code >= ARRAY_SIZE (bdesc))
11856 return error_mark_node;
11857
11858 if (!bdesc[code].is_enabled ())
11859 return error_mark_node;
11860
11861 return bdesc[code].fndecl;
11862 }
11863
11864 /* Expand an expression EXP that calls a built-in function,
11865 with result going to TARGET if that's convenient
11866 (and in mode MODE if that's convenient).
11867 SUBTARGET may be used as the target for computing one of EXP's operands.
11868 IGNORE is nonzero if the value is to be ignored. */
11869 static rtx
11870 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11871 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11872 {
11873 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11874 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11875 const struct builtin_description *d = &bdesc[fcode];
11876 enum insn_code icode = d->icode;
11877 int signature = d->signature;
11878 int nop = 0;
11879 rtx op[4];
11880
11881 if (signature_args[signature][0])
11882 {
11883 if (ignore)
11884 return NULL_RTX;
11885
11886 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11887 if (! target || GET_MODE (target) != tmode
11888 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11889 target = gen_reg_rtx (tmode);
11890 op[nop++] = target;
11891 }
11892 else
11893 target = NULL_RTX;
11894
11895 for (int i = 1; i <= 3; i++, nop++)
11896 {
11897 tree arg;
11898 enum machine_mode opmode, argmode;
11899 tree optype;
11900
11901 if (! signature_args[signature][i])
11902 break;
11903 arg = CALL_EXPR_ARG (exp, i - 1);
11904 if (arg == error_mark_node)
11905 return const0_rtx;
11906 if (signature_args[signature][i] & 8)
11907 {
11908 opmode = ptr_mode;
11909 optype = ptr_type_node;
11910 }
11911 else
11912 {
11913 opmode = insn_data[icode].operand[nop].mode;
11914 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11915 }
11916 argmode = TYPE_MODE (TREE_TYPE (arg));
11917 if (argmode != opmode)
11918 arg = build1 (NOP_EXPR, optype, arg);
11919 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11920 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11921 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11922 }
11923
11924 rtx pat = NULL_RTX;
11925
11926 switch (nop)
11927 {
11928 case 1:
11929 pat = (*insn_data[d->icode].genfun) (op[0]);
11930 break;
11931 case 2:
11932 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11933 break;
11934 case 3:
11935 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11936 break;
11937 case 4:
11938 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11939 break;
11940 default:
11941 gcc_unreachable ();
11942 }
11943 if (! pat)
11944 return NULL_RTX;
11945 emit_insn (pat);
11946 return target;
11947 }
11948
11949 void
11950 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11951 {
11952 rtx sel0 = const0_rtx;
11953 rtx sel1 = const1_rtx;
11954 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11955 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11956
11957 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11958 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11959 }
11960
11961 void
11962 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11963 {
11964 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11965
11966 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11967 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11968 }
11969
11970 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11971 We can allow any mode in any general register. The special registers
11972 only allow SImode. Don't allow any mode in the PR.
11973
11974 We cannot hold DCmode values in the XD registers because alter_reg
11975 handles subregs of them incorrectly. We could work around this by
11976 spacing the XD registers like the DR registers, but this would require
11977 additional memory in every compilation to hold larger register vectors.
11978 We could hold SFmode / SCmode values in XD registers, but that
11979 would require a tertiary reload when reloading from / to memory,
11980 and a secondary reload to reload from / to general regs; that
11981 seems to be a losing proposition.
11982
11983 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11984 it won't be ferried through GP registers first. */
11985 bool
11986 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11987 {
11988 if (SPECIAL_REGISTER_P (regno))
11989 return mode == SImode;
11990
11991 if (regno == FPUL_REG)
11992 return (mode == SImode || mode == SFmode);
11993
11994 if (FP_REGISTER_P (regno) && mode == SFmode)
11995 return true;
11996
11997 if (mode == V2SFmode)
11998 {
11999 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12000 || GENERAL_REGISTER_P (regno)))
12001 return true;
12002 else
12003 return false;
12004 }
12005
12006 if (mode == V4SFmode)
12007 {
12008 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12009 || GENERAL_REGISTER_P (regno))
12010 return true;
12011 else
12012 return false;
12013 }
12014
12015 if (mode == V16SFmode)
12016 {
12017 if (TARGET_SHMEDIA)
12018 {
12019 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12020 return true;
12021 else
12022 return false;
12023 }
12024 else
12025 return regno == FIRST_XD_REG;
12026 }
12027
12028 if (FP_REGISTER_P (regno))
12029 {
12030 if (mode == SFmode
12031 || mode == SImode
12032 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12033 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12034 || mode == DCmode
12035 || (TARGET_SHMEDIA
12036 && (mode == DFmode || mode == DImode
12037 || mode == V2SFmode || mode == TImode)))
12038 && ((regno - FIRST_FP_REG) & 1) == 0)
12039 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12040 && ((regno - FIRST_FP_REG) & 3) == 0))
12041 return true;
12042 else
12043 return false;
12044 }
12045
12046 if (XD_REGISTER_P (regno))
12047 return mode == DFmode;
12048
12049 if (TARGET_REGISTER_P (regno))
12050 return (mode == DImode || mode == SImode || mode == PDImode);
12051
12052 if (regno == PR_REG)
12053 return mode == SImode;
12054
12055 if (regno == FPSCR_REG)
12056 return mode == PSImode;
12057
12058 /* FIXME. This works around PR target/37633 for -O0. */
12059 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12060 {
12061 unsigned int n = GET_MODE_SIZE (mode) / 8;
12062
12063 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12064 && regno <= FIRST_GENERAL_REG + 14)
12065 return false;
12066 }
12067
12068 return true;
12069 }
12070
12071 /* Return the class of registers for which a mode change from FROM to TO
12072 is invalid. */
12073 bool
12074 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12075 enum reg_class rclass)
12076 {
12077 /* We want to enable the use of SUBREGs as a means to
12078 VEC_SELECT a single element of a vector. */
12079
12080 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12081 This can be problematic when SFmode vector subregs need to be accessed
12082 on the stack with displacement addressing, as it happens with -O0.
12083 Thus we disallow the mode change for -O0. */
12084 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12085 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12086
12087 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12088 {
12089 if (TARGET_LITTLE_ENDIAN)
12090 {
12091 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12092 return reg_classes_intersect_p (DF_REGS, rclass);
12093 }
12094 else
12095 {
12096 if (GET_MODE_SIZE (from) < 8)
12097 return reg_classes_intersect_p (DF_REGS, rclass);
12098 }
12099 }
12100 return false;
12101 }
12102
12103 /* Return true if registers in machine mode MODE will likely be
12104 allocated to registers in small register classes. */
12105 bool
12106 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12107 {
12108 return (! TARGET_SHMEDIA);
12109 }
12110
12111 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12112 that label is used. */
12113 void
12114 sh_mark_label (rtx address, int nuses)
12115 {
12116 if (GOTOFF_P (address))
12117 {
12118 /* Extract the label or symbol. */
12119 address = XEXP (address, 0);
12120 if (GET_CODE (address) == PLUS)
12121 address = XEXP (address, 0);
12122 address = XVECEXP (address, 0, 0);
12123 }
12124 if (GET_CODE (address) == LABEL_REF
12125 && LABEL_P (XEXP (address, 0)))
12126 LABEL_NUSES (XEXP (address, 0)) += nuses;
12127 }
12128
12129 /* Compute extra cost of moving data between one register class
12130 and another.
12131
12132 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12133 uses this information. Hence, the general register <-> floating point
12134 register information here is not used for SFmode. */
12135 static int
12136 sh_register_move_cost (enum machine_mode mode,
12137 reg_class_t srcclass, reg_class_t dstclass)
12138 {
12139 if (dstclass == T_REGS || dstclass == PR_REGS)
12140 return 10;
12141
12142 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12143 return 4;
12144
12145 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12146 && REGCLASS_HAS_FP_REG (srcclass)
12147 && REGCLASS_HAS_FP_REG (dstclass))
12148 return 4;
12149
12150 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12151 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12152
12153 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12154 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12155 return 9;
12156
12157 if ((REGCLASS_HAS_FP_REG (dstclass)
12158 && REGCLASS_HAS_GENERAL_REG (srcclass))
12159 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12160 && REGCLASS_HAS_FP_REG (srcclass)))
12161 {
12162 /* Discourage trying to use fp regs for a pointer. This also
12163 discourages fp regs with SImode because Pmode is an alias
12164 of SImode on this target. See PR target/48596. */
12165 int addend = (mode == Pmode) ? 40 : 0;
12166
12167 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12168 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12169 }
12170
12171 if ((dstclass == FPUL_REGS
12172 && REGCLASS_HAS_GENERAL_REG (srcclass))
12173 || (srcclass == FPUL_REGS
12174 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12175 return 5;
12176
12177 if ((dstclass == FPUL_REGS
12178 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12179 || (srcclass == FPUL_REGS
12180 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12181 return 7;
12182
12183 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12184 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12185 return 20;
12186
12187 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12188 if (TARGET_SHMEDIA
12189 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12190 {
12191 if (sh_gettrcost >= 0)
12192 return sh_gettrcost;
12193 else if (!TARGET_PT_FIXED)
12194 return 100;
12195 }
12196
12197 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12198 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12199 return 4;
12200
12201 if (TARGET_SHMEDIA
12202 || (TARGET_FMOVD
12203 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12204 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12205 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12206
12207 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12208 }
12209
12210 static rtx
12211 emit_load_ptr (rtx reg, rtx addr)
12212 {
12213 rtx mem = gen_const_mem (ptr_mode, addr);
12214
12215 if (Pmode != ptr_mode)
12216 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12217 return emit_move_insn (reg, mem);
12218 }
12219
12220 static void
12221 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12222 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12223 tree function)
12224 {
12225 CUMULATIVE_ARGS cum;
12226 int structure_value_byref = 0;
12227 rtx this_rtx, this_value, sibcall, insns, funexp;
12228 tree funtype = TREE_TYPE (function);
12229 int simple_add = CONST_OK_FOR_ADD (delta);
12230 int did_load = 0;
12231 rtx scratch0, scratch1, scratch2;
12232 unsigned i;
12233
12234 reload_completed = 1;
12235 epilogue_completed = 1;
12236 crtl->uses_only_leaf_regs = 1;
12237
12238 emit_note (NOTE_INSN_PROLOGUE_END);
12239
12240 /* Find the "this" pointer. We have such a wide range of ABIs for the
12241 SH that it's best to do this completely machine independently.
12242 "this" is passed as first argument, unless a structure return pointer
12243 comes first, in which case "this" comes second. */
12244 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12245 #ifndef PCC_STATIC_STRUCT_RETURN
12246 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12247 structure_value_byref = 1;
12248 #endif /* not PCC_STATIC_STRUCT_RETURN */
12249 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12250 {
12251 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12252
12253 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12254 }
12255 this_rtx
12256 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12257
12258 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12259 static chain pointer (even if you can't have nested virtual functions
12260 right now, someone might implement them sometime), and the rest of the
12261 registers are used for argument passing, are callee-saved, or reserved. */
12262 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12263 -ffixed-reg has been used. */
12264 if (! call_used_regs[0] || fixed_regs[0])
12265 error ("r0 needs to be available as a call-clobbered register");
12266 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12267 if (! TARGET_SH5)
12268 {
12269 if (call_used_regs[1] && ! fixed_regs[1])
12270 scratch1 = gen_rtx_REG (ptr_mode, 1);
12271 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12272 pointing where to return struct values. */
12273 if (call_used_regs[3] && ! fixed_regs[3])
12274 scratch2 = gen_rtx_REG (Pmode, 3);
12275 }
12276 else if (TARGET_SHMEDIA)
12277 {
12278 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12279 if (i != REGNO (scratch0) &&
12280 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12281 {
12282 scratch1 = gen_rtx_REG (ptr_mode, i);
12283 break;
12284 }
12285 if (scratch1 == scratch0)
12286 error ("need a second call-clobbered general purpose register");
12287 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12288 if (call_used_regs[i] && ! fixed_regs[i])
12289 {
12290 scratch2 = gen_rtx_REG (Pmode, i);
12291 break;
12292 }
12293 if (scratch2 == scratch0)
12294 error ("need a call-clobbered target register");
12295 }
12296
12297 this_value = plus_constant (Pmode, this_rtx, delta);
12298 if (vcall_offset
12299 && (simple_add || scratch0 != scratch1)
12300 && strict_memory_address_p (ptr_mode, this_value))
12301 {
12302 emit_load_ptr (scratch0, this_value);
12303 did_load = 1;
12304 }
12305
12306 if (!delta)
12307 ; /* Do nothing. */
12308 else if (simple_add)
12309 emit_move_insn (this_rtx, this_value);
12310 else
12311 {
12312 emit_move_insn (scratch1, GEN_INT (delta));
12313 emit_insn (gen_add2_insn (this_rtx, scratch1));
12314 }
12315
12316 if (vcall_offset)
12317 {
12318 rtx offset_addr;
12319
12320 if (!did_load)
12321 emit_load_ptr (scratch0, this_rtx);
12322
12323 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12324 if (strict_memory_address_p (ptr_mode, offset_addr))
12325 ; /* Do nothing. */
12326 else if (! TARGET_SH5 && scratch0 != scratch1)
12327 {
12328 /* scratch0 != scratch1, and we have indexed loads. Get better
12329 schedule by loading the offset into r1 and using an indexed
12330 load - then the load of r1 can issue before the load from
12331 (this_rtx + delta) finishes. */
12332 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12333 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12334 }
12335 else if (CONST_OK_FOR_ADD (vcall_offset))
12336 {
12337 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12338 offset_addr = scratch0;
12339 }
12340 else if (scratch0 != scratch1)
12341 {
12342 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12343 emit_insn (gen_add2_insn (scratch0, scratch1));
12344 offset_addr = scratch0;
12345 }
12346 else
12347 gcc_unreachable (); /* FIXME */
12348 emit_load_ptr (scratch0, offset_addr);
12349
12350 if (Pmode != ptr_mode)
12351 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12352 emit_insn (gen_add2_insn (this_rtx, scratch0));
12353 }
12354
12355 /* Generate a tail call to the target function. */
12356 if (! TREE_USED (function))
12357 {
12358 assemble_external (function);
12359 TREE_USED (function) = 1;
12360 }
12361 funexp = XEXP (DECL_RTL (function), 0);
12362 /* If the function is overridden, so is the thunk, hence we don't
12363 need GOT addressing even if this is a public symbol. */
12364 #if 0
12365 if (TARGET_SH1 && ! flag_weak)
12366 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12367 else
12368 #endif
12369 if (TARGET_SH2 && flag_pic)
12370 {
12371 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12372 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12373 }
12374 else
12375 {
12376 if (TARGET_SHMEDIA && flag_pic)
12377 {
12378 funexp = gen_sym2PIC (funexp);
12379 PUT_MODE (funexp, Pmode);
12380 }
12381 emit_move_insn (scratch2, funexp);
12382 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12383 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12384 }
12385 sibcall = emit_call_insn (sibcall);
12386 SIBLING_CALL_P (sibcall) = 1;
12387 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12388 emit_barrier ();
12389
12390 /* Run just enough of rest_of_compilation to do scheduling and get
12391 the insns emitted. Note that use_thunk calls
12392 assemble_start_function and assemble_end_function. */
12393
12394 insns = get_insns ();
12395
12396 if (optimize > 0)
12397 {
12398 if (! cfun->cfg)
12399 init_flow (cfun);
12400 split_all_insns_noflow ();
12401 }
12402
12403 sh_reorg ();
12404 shorten_branches (insns);
12405 final_start_function (insns, file, 1);
12406 final (insns, file, 1);
12407 final_end_function ();
12408
12409 reload_completed = 0;
12410 epilogue_completed = 0;
12411 }
12412
12413 rtx
12414 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12415 {
12416 rtx sym;
12417
12418 /* If this is not an ordinary function, the name usually comes from a
12419 string literal or an sprintf buffer. Make sure we use the same
12420 string consistently, so that cse will be able to unify address loads. */
12421 if (kind != FUNCTION_ORDINARY)
12422 name = IDENTIFIER_POINTER (get_identifier (name));
12423 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12424 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12425 if (flag_pic)
12426 switch (kind)
12427 {
12428 case FUNCTION_ORDINARY:
12429 break;
12430 case SFUNC_GOT:
12431 {
12432 rtx reg = target ? target : gen_reg_rtx (Pmode);
12433
12434 emit_insn (gen_symGOT2reg (reg, sym));
12435 sym = reg;
12436 break;
12437 }
12438 case SFUNC_STATIC:
12439 {
12440 /* ??? To allow cse to work, we use GOTOFF relocations.
12441 We could add combiner patterns to transform this into
12442 straight pc-relative calls with sym2PIC / bsrf when
12443 label load and function call are still 1:1 and in the
12444 same basic block during combine. */
12445 rtx reg = target ? target : gen_reg_rtx (Pmode);
12446
12447 emit_insn (gen_symGOTOFF2reg (reg, sym));
12448 sym = reg;
12449 break;
12450 }
12451 }
12452 if (target && sym != target)
12453 {
12454 emit_move_insn (target, sym);
12455 return target;
12456 }
12457 return sym;
12458 }
12459
12460 /* Find the number of a general purpose register in S. */
12461 static int
12462 scavenge_reg (HARD_REG_SET *s)
12463 {
12464 int r;
12465 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12466 if (TEST_HARD_REG_BIT (*s, r))
12467 return r;
12468 return -1;
12469 }
12470
12471 rtx
12472 sh_get_pr_initial_val (void)
12473 {
12474 rtx val;
12475
12476 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12477 PR register on SHcompact, because it might be clobbered by the prologue.
12478 We check first if that is known to be the case. */
12479 if (TARGET_SHCOMPACT
12480 && ((crtl->args.info.call_cookie
12481 & ~ CALL_COOKIE_RET_TRAMP (1))
12482 || crtl->saves_all_registers))
12483 return gen_frame_mem (SImode, return_address_pointer_rtx);
12484
12485 /* If we haven't finished rtl generation, there might be a nonlocal label
12486 that we haven't seen yet.
12487 ??? get_hard_reg_initial_val fails if it is called after register
12488 allocation has started, unless it has been called before for the
12489 same register. And even then, we end in trouble if we didn't use
12490 the register in the same basic block before. So call
12491 get_hard_reg_initial_val now and wrap it in an unspec if we might
12492 need to replace it. */
12493 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12494 combine can put the pseudo returned by get_hard_reg_initial_val into
12495 instructions that need a general purpose registers, which will fail to
12496 be recognized when the pseudo becomes allocated to PR. */
12497 val
12498 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12499 if (TARGET_SH1)
12500 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12501 return val;
12502 }
12503
12504 bool
12505 sh_expand_t_scc (rtx operands[])
12506 {
12507 enum rtx_code code = GET_CODE (operands[1]);
12508 rtx target = operands[0];
12509 rtx op0 = operands[2];
12510 rtx op1 = operands[3];
12511 rtx result = target;
12512 HOST_WIDE_INT val;
12513
12514 if (!REG_P (op0) || REGNO (op0) != T_REG
12515 || !CONST_INT_P (op1))
12516 return false;
12517 if (!REG_P (result))
12518 result = gen_reg_rtx (SImode);
12519 val = INTVAL (op1);
12520 if ((code == EQ && val == 1) || (code == NE && val == 0))
12521 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12522 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12523 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12524 else if (code == EQ || code == NE)
12525 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12526 else
12527 return false;
12528 if (result != target)
12529 emit_move_insn (target, result);
12530 return true;
12531 }
12532
12533 /* INSN is an sfunc; return the rtx that describes the address used. */
12534 static rtx
12535 extract_sfunc_addr (rtx insn)
12536 {
12537 rtx pattern, part = NULL_RTX;
12538 int len, i;
12539
12540 pattern = PATTERN (insn);
12541 len = XVECLEN (pattern, 0);
12542 for (i = 0; i < len; i++)
12543 {
12544 part = XVECEXP (pattern, 0, i);
12545 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12546 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12547 return XEXP (part, 0);
12548 }
12549 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12550 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12551 }
12552
12553 /* Verify that the register in use_sfunc_addr still agrees with the address
12554 used in the sfunc. This prevents fill_slots_from_thread from changing
12555 use_sfunc_addr.
12556 INSN is the use_sfunc_addr instruction, and REG is the register it
12557 guards. */
12558 bool
12559 check_use_sfunc_addr (rtx insn, rtx reg)
12560 {
12561 /* Search for the sfunc. It should really come right after INSN. */
12562 while ((insn = NEXT_INSN (insn)))
12563 {
12564 if (LABEL_P (insn) || JUMP_P (insn))
12565 break;
12566 if (! INSN_P (insn))
12567 continue;
12568
12569 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12570 insn = XVECEXP (PATTERN (insn), 0, 0);
12571 if (GET_CODE (PATTERN (insn)) != PARALLEL
12572 || get_attr_type (insn) != TYPE_SFUNC)
12573 continue;
12574 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12575 }
12576 gcc_unreachable ();
12577 }
12578
12579 /* This function returns a constant rtx that represents 2**15 / pi in
12580 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12581 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12582 static GTY(()) rtx sh_fsca_sf2int_rtx;
12583
12584 rtx
12585 sh_fsca_sf2int (void)
12586 {
12587 if (! sh_fsca_sf2int_rtx)
12588 {
12589 REAL_VALUE_TYPE rv;
12590
12591 real_from_string (&rv, "10430.378350470453");
12592 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12593 }
12594
12595 return sh_fsca_sf2int_rtx;
12596 }
12597
12598 /* This function returns a constant rtx that represents pi / 2**15 in
12599 SFmode. It's used to scale SFmode angles, in radians, to a
12600 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12601 maps to 0x10000. */
12602 static GTY(()) rtx sh_fsca_int2sf_rtx;
12603
12604 rtx
12605 sh_fsca_int2sf (void)
12606 {
12607 if (! sh_fsca_int2sf_rtx)
12608 {
12609 REAL_VALUE_TYPE rv;
12610
12611 real_from_string (&rv, "9.587379924285257e-5");
12612 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12613 }
12614
12615 return sh_fsca_int2sf_rtx;
12616 }
12617
12618 /* Initialize the CUMULATIVE_ARGS structure. */
12619 void
12620 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12621 tree fntype,
12622 rtx libname ATTRIBUTE_UNUSED,
12623 tree fndecl,
12624 signed int n_named_args,
12625 enum machine_mode mode)
12626 {
12627 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12628 pcum->free_single_fp_reg = 0;
12629 pcum->stack_regs = 0;
12630 pcum->byref_regs = 0;
12631 pcum->byref = 0;
12632 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12633
12634 /* XXX - Should we check TARGET_HITACHI here ??? */
12635 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12636
12637 if (fntype)
12638 {
12639 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12640 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12641 pcum->prototype_p = prototype_p (fntype);
12642 pcum->arg_count [(int) SH_ARG_INT]
12643 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12644
12645 pcum->call_cookie
12646 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12647 && pcum->arg_count [(int) SH_ARG_INT] == 0
12648 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12649 ? int_size_in_bytes (TREE_TYPE (fntype))
12650 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12651 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12652 == FIRST_RET_REG));
12653 }
12654 else
12655 {
12656 pcum->arg_count [(int) SH_ARG_INT] = 0;
12657 pcum->prototype_p = FALSE;
12658 if (mode != VOIDmode)
12659 {
12660 pcum->call_cookie =
12661 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12662 && GET_MODE_SIZE (mode) > 4
12663 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12664
12665 /* If the default ABI is the Renesas ABI then all library
12666 calls must assume that the library will be using the
12667 Renesas ABI. So if the function would return its result
12668 in memory then we must force the address of this memory
12669 block onto the stack. Ideally we would like to call
12670 targetm.calls.return_in_memory() here but we do not have
12671 the TYPE or the FNDECL available so we synthesize the
12672 contents of that function as best we can. */
12673 pcum->force_mem =
12674 (TARGET_DEFAULT & MASK_HITACHI)
12675 && (mode == BLKmode
12676 || (GET_MODE_SIZE (mode) > 4
12677 && !(mode == DFmode
12678 && TARGET_FPU_DOUBLE)));
12679 }
12680 else
12681 {
12682 pcum->call_cookie = 0;
12683 pcum->force_mem = FALSE;
12684 }
12685 }
12686 }
12687
12688 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12689 not enter into CONST_DOUBLE for the replace.
12690
12691 Note that copying is not done so X must not be shared unless all copies
12692 are to be modified.
12693
12694 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12695 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12696 replacements[n*2+1] - and that we take mode changes into account.
12697
12698 If a replacement is ambiguous, return NULL_RTX.
12699
12700 If MODIFY is zero, don't modify any rtl in place,
12701 just return zero or nonzero for failure / success. */
12702 rtx
12703 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12704 {
12705 int i, j;
12706 const char *fmt;
12707
12708 /* The following prevents loops occurrence when we change MEM in
12709 CONST_DOUBLE onto the same CONST_DOUBLE. */
12710 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12711 return x;
12712
12713 for (i = n_replacements - 1; i >= 0 ; i--)
12714 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12715 return replacements[i*2+1];
12716
12717 /* Allow this function to make replacements in EXPR_LISTs. */
12718 if (x == NULL_RTX)
12719 return NULL_RTX;
12720
12721 if (GET_CODE (x) == SUBREG)
12722 {
12723 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12724 n_replacements, modify);
12725
12726 if (CONST_INT_P (new_rtx))
12727 {
12728 x = simplify_subreg (GET_MODE (x), new_rtx,
12729 GET_MODE (SUBREG_REG (x)),
12730 SUBREG_BYTE (x));
12731 if (! x)
12732 abort ();
12733 }
12734 else if (modify)
12735 SUBREG_REG (x) = new_rtx;
12736
12737 return x;
12738 }
12739 else if (REG_P (x))
12740 {
12741 unsigned regno = REGNO (x);
12742 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12743 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12744 rtx result = NULL_RTX;
12745
12746 for (i = n_replacements - 1; i >= 0; i--)
12747 {
12748 rtx from = replacements[i*2];
12749 rtx to = replacements[i*2+1];
12750 unsigned from_regno, from_nregs, to_regno, new_regno;
12751
12752 if (!REG_P (from))
12753 continue;
12754 from_regno = REGNO (from);
12755 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12756 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12757 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12758 {
12759 if (regno < from_regno
12760 || regno + nregs > from_regno + nregs
12761 || !REG_P (to)
12762 || result)
12763 return NULL_RTX;
12764 to_regno = REGNO (to);
12765 if (to_regno < FIRST_PSEUDO_REGISTER)
12766 {
12767 new_regno = regno + to_regno - from_regno;
12768 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12769 != nregs)
12770 return NULL_RTX;
12771 result = gen_rtx_REG (GET_MODE (x), new_regno);
12772 }
12773 else if (GET_MODE (x) <= GET_MODE (to))
12774 result = gen_lowpart_common (GET_MODE (x), to);
12775 else
12776 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12777 }
12778 }
12779 return result ? result : x;
12780 }
12781 else if (GET_CODE (x) == ZERO_EXTEND)
12782 {
12783 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12784 n_replacements, modify);
12785
12786 if (CONST_INT_P (new_rtx))
12787 {
12788 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12789 new_rtx, GET_MODE (XEXP (x, 0)));
12790 if (! x)
12791 abort ();
12792 }
12793 else if (modify)
12794 XEXP (x, 0) = new_rtx;
12795
12796 return x;
12797 }
12798
12799 fmt = GET_RTX_FORMAT (GET_CODE (x));
12800 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12801 {
12802 rtx new_rtx;
12803
12804 if (fmt[i] == 'e')
12805 {
12806 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12807 n_replacements, modify);
12808 if (!new_rtx)
12809 return NULL_RTX;
12810 if (modify)
12811 XEXP (x, i) = new_rtx;
12812 }
12813 else if (fmt[i] == 'E')
12814 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12815 {
12816 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12817 n_replacements, modify);
12818 if (!new_rtx)
12819 return NULL_RTX;
12820 if (modify)
12821 XVECEXP (x, i, j) = new_rtx;
12822 }
12823 }
12824
12825 return x;
12826 }
12827
12828 rtx
12829 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12830 {
12831 enum rtx_code code = TRUNCATE;
12832
12833 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12834 {
12835 rtx inner = XEXP (x, 0);
12836 enum machine_mode inner_mode = GET_MODE (inner);
12837
12838 if (inner_mode == mode)
12839 return inner;
12840 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12841 x = inner;
12842 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12843 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12844 {
12845 code = GET_CODE (x);
12846 x = inner;
12847 }
12848 }
12849 return gen_rtx_fmt_e (code, mode, x);
12850 }
12851
12852 /* Called via for_each_rtx after reload, to clean up truncates of
12853 registers that span multiple actual hard registers. */
12854 int
12855 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12856 {
12857 rtx x = *p, reg;
12858
12859 if (GET_CODE (x) != TRUNCATE)
12860 return 0;
12861 reg = XEXP (x, 0);
12862 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12863 {
12864 enum machine_mode reg_mode = GET_MODE (reg);
12865 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12866 subreg_lowpart_offset (DImode, reg_mode));
12867 *(int*) n_changes += 1;
12868 return -1;
12869 }
12870 return 0;
12871 }
12872
12873 /* Load and store depend on the highpart of the address. However,
12874 set_attr_alternative does not give well-defined results before reload,
12875 so we must look at the rtl ourselves to see if any of the feeding
12876 registers is used in a memref.
12877
12878 Called by sh_contains_memref_p via for_each_rtx. */
12879 static int
12880 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12881 {
12882 return (MEM_P (*loc));
12883 }
12884
12885 /* Return true iff INSN contains a MEM. */
12886 bool
12887 sh_contains_memref_p (rtx insn)
12888 {
12889 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12890 }
12891
12892 /* Return true iff INSN loads a banked register. */
12893 bool
12894 sh_loads_bankedreg_p (rtx insn)
12895 {
12896 if (GET_CODE (PATTERN (insn)) == SET)
12897 {
12898 rtx op = SET_DEST (PATTERN(insn));
12899 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12900 return true;
12901 }
12902
12903 return false;
12904 }
12905
12906 /* FNADDR is the MEM expression from a call expander. Return an address
12907 to use in an SHmedia insn pattern. */
12908 rtx
12909 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12910 {
12911 int is_sym;
12912
12913 fnaddr = XEXP (fnaddr, 0);
12914 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12915 if (flag_pic && is_sym)
12916 {
12917 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12918 {
12919 rtx reg = gen_reg_rtx (Pmode);
12920
12921 /* We must not use GOTPLT for sibcalls, because PIC_REG
12922 must be restored before the PLT code gets to run. */
12923 if (is_sibcall)
12924 emit_insn (gen_symGOT2reg (reg, fnaddr));
12925 else
12926 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12927 fnaddr = reg;
12928 }
12929 else
12930 {
12931 fnaddr = gen_sym2PIC (fnaddr);
12932 PUT_MODE (fnaddr, Pmode);
12933 }
12934 }
12935 /* If ptabs might trap, make this visible to the rest of the compiler.
12936 We generally assume that symbols pertain to valid locations, but
12937 it is possible to generate invalid symbols with asm or linker tricks.
12938 In a list of functions where each returns its successor, an invalid
12939 symbol might denote an empty list. */
12940 if (!TARGET_PT_FIXED
12941 && (!is_sym || TARGET_INVALID_SYMBOLS)
12942 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12943 {
12944 rtx tr = gen_reg_rtx (PDImode);
12945
12946 emit_insn (gen_ptabs (tr, fnaddr));
12947 fnaddr = tr;
12948 }
12949 else if (! target_reg_operand (fnaddr, Pmode))
12950 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12951 return fnaddr;
12952 }
12953
12954 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12955 static reg_class_t
12956 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12957 {
12958 if (rclass == NO_REGS
12959 && TARGET_SHMEDIA
12960 && (CONST_DOUBLE_P (x)
12961 || GET_CODE (x) == SYMBOL_REF
12962 || PIC_ADDR_P (x)))
12963 return GENERAL_REGS;
12964
12965 return rclass;
12966 }
12967
12968 /* Implement TARGET_SECONDARY_RELOAD. */
12969 static reg_class_t
12970 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12971 enum machine_mode mode, secondary_reload_info *sri)
12972 {
12973 enum reg_class rclass = (enum reg_class) rclass_i;
12974
12975 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
12976 && REG_P (XEXP (XEXP (x, 0), 0))
12977 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
12978 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12979
12980 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
12981 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12982
12983 if (REG_P (x) && REGNO (x) == GBR_REG)
12984 return NO_REGS;
12985
12986 if (in_p)
12987 {
12988 if (REGCLASS_HAS_FP_REG (rclass)
12989 && ! TARGET_SHMEDIA
12990 && immediate_operand ((x), mode)
12991 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12992 && mode == SFmode && fldi_ok ()))
12993 switch (mode)
12994 {
12995 case SFmode:
12996 sri->icode = CODE_FOR_reload_insf__frn;
12997 return NO_REGS;
12998 case DFmode:
12999 sri->icode = CODE_FOR_reload_indf__frn;
13000 return NO_REGS;
13001 case SImode:
13002 /* ??? If we knew that we are in the appropriate mode -
13003 single precision - we could use a reload pattern directly. */
13004 return FPUL_REGS;
13005 default:
13006 abort ();
13007 }
13008 if (rclass == FPUL_REGS
13009 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13010 || REGNO (x) == T_REG))
13011 || GET_CODE (x) == PLUS))
13012 return GENERAL_REGS;
13013 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13014 {
13015 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13016 return GENERAL_REGS;
13017 else if (mode == SFmode)
13018 return FP_REGS;
13019 sri->icode = CODE_FOR_reload_insi__i_fpul;
13020 return NO_REGS;
13021 }
13022 if (rclass == FPSCR_REGS
13023 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13024 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13025 return GENERAL_REGS;
13026 if (REGCLASS_HAS_FP_REG (rclass)
13027 && TARGET_SHMEDIA
13028 && immediate_operand (x, mode)
13029 && x != CONST0_RTX (GET_MODE (x))
13030 && GET_MODE (x) != V4SFmode)
13031 return GENERAL_REGS;
13032 if ((mode == QImode || mode == HImode)
13033 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13034 {
13035 sri->icode = ((mode == QImode)
13036 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13037 return NO_REGS;
13038 }
13039 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13040 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13041 return TARGET_REGS;
13042 } /* end of input-only processing. */
13043
13044 if (((REGCLASS_HAS_FP_REG (rclass)
13045 && (REG_P (x)
13046 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13047 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13048 && TARGET_FMOVD))))
13049 || (REGCLASS_HAS_GENERAL_REG (rclass)
13050 && REG_P (x)
13051 && FP_REGISTER_P (REGNO (x))))
13052 && ! TARGET_SHMEDIA
13053 && (mode == SFmode || mode == SImode))
13054 return FPUL_REGS;
13055 if ((rclass == FPUL_REGS
13056 || (REGCLASS_HAS_FP_REG (rclass)
13057 && ! TARGET_SHMEDIA && mode == SImode))
13058 && (MEM_P (x)
13059 || (REG_P (x)
13060 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13061 || REGNO (x) == T_REG
13062 || system_reg_operand (x, VOIDmode)))))
13063 {
13064 if (rclass == FPUL_REGS)
13065 return GENERAL_REGS;
13066 return FPUL_REGS;
13067 }
13068 if ((rclass == TARGET_REGS
13069 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13070 && !satisfies_constraint_Csy (x)
13071 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13072 return GENERAL_REGS;
13073 if ((rclass == MAC_REGS || rclass == PR_REGS)
13074 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13075 && rclass != REGNO_REG_CLASS (REGNO (x)))
13076 return GENERAL_REGS;
13077 if (rclass != GENERAL_REGS && REG_P (x)
13078 && TARGET_REGISTER_P (REGNO (x)))
13079 return GENERAL_REGS;
13080
13081 /* If here fall back to loading FPUL register through general registers.
13082 This case can happen when movsi_ie insn is picked initially to
13083 load/store the FPUL register from/to another register, and then the
13084 other register is allocated on the stack. */
13085 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13086 return GENERAL_REGS;
13087
13088 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13089 the other operand.
13090 On SH2A could also just leave it alone here, which would result in a
13091 4 byte move insn being generated instead. However, for this to work
13092 the insns must have the appropriate alternatives. */
13093 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13094 && satisfies_constraint_Sdd (x)
13095 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
13096 return R0_REGS;
13097
13098 /* When reload is trying to address a QImode or HImode subreg on the stack,
13099 force any subreg byte into R0_REGS, as this is going to become a
13100 displacement address.
13101 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13102 is on the stack, the memref to it might already require a displacement
13103 and that has to be added to the final address. At this point we don't
13104 know the cumulative displacement so we assume the worst case. */
13105 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13106 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13107 return R0_REGS;
13108
13109 return NO_REGS;
13110 }
13111
13112 static void
13113 sh_conditional_register_usage (void)
13114 {
13115 int regno;
13116 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13117 if (! VALID_REGISTER_P (regno))
13118 fixed_regs[regno] = call_used_regs[regno] = 1;
13119 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13120 if (TARGET_SH5)
13121 {
13122 call_used_regs[FIRST_GENERAL_REG + 8]
13123 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13124 call_really_used_regs[FIRST_GENERAL_REG + 8]
13125 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13126 }
13127 if (TARGET_SHMEDIA)
13128 {
13129 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13130 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13131 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13132 }
13133 if (flag_pic)
13134 {
13135 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13136 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13137 }
13138 /* Renesas saves and restores mac registers on call. */
13139 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13140 {
13141 call_really_used_regs[MACH_REG] = 0;
13142 call_really_used_regs[MACL_REG] = 0;
13143 }
13144
13145 if (TARGET_SHMEDIA)
13146 {
13147 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13148 if (! fixed_regs[regno] && call_really_used_regs[regno])
13149 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13150 }
13151 else
13152 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13153 if (! fixed_regs[regno] && call_really_used_regs[regno])
13154 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13155 }
13156
13157 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13158
13159 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13160 static bool
13161 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13162 {
13163 return (TARGET_SHMEDIA
13164 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13165 || x == CONST0_RTX (mode)
13166 || !TARGET_SHMEDIA_FPU
13167 || TARGET_SHMEDIA64)
13168 : (GET_CODE (x) != CONST_DOUBLE
13169 || mode == DFmode || mode == SFmode
13170 || mode == DImode || GET_MODE (x) == VOIDmode));
13171 }
13172
13173 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13174
13175 static void
13176 sh_init_sync_libfuncs (void)
13177 {
13178 init_sync_libfuncs (UNITS_PER_WORD);
13179 }
13180
13181 /* Return true if it is appropriate to emit `ret' instructions in the
13182 body of a function. */
13183 bool
13184 sh_can_use_simple_return_p (void)
13185 {
13186 HARD_REG_SET live_regs_mask;
13187 int d;
13188
13189 /* Some targets require special return insns. */
13190 if (TARGET_SHMEDIA
13191 || (TARGET_SHCOMPACT
13192 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13193 return false;
13194
13195 if (! reload_completed || frame_pointer_needed)
13196 return false;
13197
13198 /* Moving prologue around does't reduce the size. */
13199 if (optimize_function_for_size_p (cfun))
13200 return false;
13201
13202 /* Finally, allow for pr save. */
13203 d = calc_live_regs (&live_regs_mask);
13204
13205 if (rounded_frame_size (d) > 4)
13206 return false;
13207
13208 return true;
13209 }
13210
13211 /*------------------------------------------------------------------------------
13212 Address mode optimization support code
13213 */
13214
13215 typedef HOST_WIDE_INT disp_t;
13216 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13217 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13218 static const disp_t INVALID_DISP = MAX_DISP;
13219
13220 /* A memory reference which is described by a base register and a
13221 displacement. */
13222 class base_reg_disp
13223 {
13224 public:
13225 base_reg_disp (rtx br, disp_t d);
13226
13227 bool is_reg (void) const;
13228 bool is_disp (void) const;
13229 rtx reg (void) const;
13230 disp_t disp (void) const;
13231
13232 private:
13233 rtx reg_;
13234 disp_t disp_;
13235 };
13236
13237 inline
13238 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13239 : reg_ (br), disp_ (d)
13240 {
13241 }
13242
13243 inline bool
13244 base_reg_disp::is_reg (void) const
13245 {
13246 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13247 }
13248
13249 inline bool
13250 base_reg_disp::is_disp (void) const
13251 {
13252 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13253 }
13254
13255 inline rtx
13256 base_reg_disp::reg (void) const
13257 {
13258 return reg_;
13259 }
13260
13261 inline disp_t
13262 base_reg_disp::disp (void) const
13263 {
13264 return disp_;
13265 }
13266
13267 /* Find the base register and calculate the displacement for a given
13268 address rtx 'x'.
13269 This is done by walking the insn list backwards and following SET insns
13270 that set the value of the specified reg 'x'. */
13271 static base_reg_disp
13272 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13273 {
13274 if (REG_P (x))
13275 {
13276 if (REGNO (x) == GBR_REG)
13277 return base_reg_disp (x, disp);
13278
13279 /* We've reached a hard-reg. This is probably the point where
13280 function args are copied to pseudos. Do not go any further and
13281 stick to the pseudo. If the original mem addr was in a hard reg
13282 from the beginning, it will become the base reg. */
13283 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13284 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13285
13286 /* Try to find the previous insn that sets the reg. */
13287 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13288 i = prev_nonnote_insn (i))
13289 {
13290 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13291 && CALL_P (i))
13292 break;
13293
13294 if (!NONJUMP_INSN_P (i))
13295 continue;
13296
13297 rtx p = PATTERN (i);
13298 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13299 && REGNO (XEXP (p, 0)) == REGNO (x))
13300 {
13301 /* If the recursion can't find out any more details about the
13302 source of the set, then this reg becomes our new base reg. */
13303 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13304 }
13305 }
13306
13307 /* When here, no previous insn was found that sets the reg.
13308 The input reg is already the base reg. */
13309 return base_reg_disp (x, disp);
13310 }
13311
13312 else if (GET_CODE (x) == PLUS)
13313 {
13314 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13315 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13316
13317 /* Either left or right val must be a reg.
13318 We don't handle the case of 'reg + reg' here. */
13319 if (left_val.is_reg () && right_val.is_disp ())
13320 return base_reg_disp (left_val.reg (), left_val.disp ()
13321 + right_val.disp () + disp);
13322 else if (right_val.is_reg () && left_val.is_disp ())
13323 return base_reg_disp (right_val.reg (), right_val.disp ()
13324 + left_val.disp () + disp);
13325 else
13326 return base_reg_disp (base_reg, disp);
13327 }
13328
13329 else if (CONST_INT_P (x))
13330 return base_reg_disp (NULL, disp + INTVAL (x));
13331
13332 /* Didn't find anything useful. */
13333 return base_reg_disp (base_reg, disp);
13334 }
13335
13336 /* Given an insn and a memory operand, try to find an equivalent GBR
13337 based memory address and return the corresponding new memory address.
13338 Return NULL_RTX if not found. */
13339 rtx
13340 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13341 {
13342 if (!MEM_P (mem))
13343 return NULL_RTX;
13344
13345 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13346 if (side_effects_p (XEXP (mem, 0)))
13347 return NULL_RTX;
13348
13349 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13350
13351 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13352 {
13353 rtx disp = GEN_INT (gbr_disp.disp ());
13354 if (gbr_displacement (disp, GET_MODE (mem)))
13355 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13356 }
13357
13358 return NULL_RTX;
13359 }
13360
13361 /*------------------------------------------------------------------------------
13362 Manual insn combine support code.
13363 */
13364
13365 /* Given a reg rtx and a start insn, try to find the insn that sets the
13366 specified reg by using the specified insn stepping function, such as
13367 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13368 of the reg set. */
13369 set_of_reg
13370 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13371 {
13372 set_of_reg result;
13373 result.insn = insn;
13374 result.set_rtx = NULL_RTX;
13375 result.set_src = NULL_RTX;
13376
13377 if (!REG_P (reg) || insn == NULL_RTX)
13378 return result;
13379
13380 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13381 result.insn = stepfunc (result.insn))
13382 {
13383 if (BARRIER_P (result.insn))
13384 return result;
13385 if (!NONJUMP_INSN_P (result.insn))
13386 continue;
13387 if (reg_set_p (reg, result.insn))
13388 {
13389 result.set_rtx = set_of (reg, result.insn);
13390
13391 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13392 return result;
13393
13394 result.set_src = XEXP (result.set_rtx, 1);
13395 return result;
13396 }
13397 }
13398
13399 return result;
13400 }
13401
13402 /* Given an op rtx and an insn, try to find out whether the result of the
13403 specified op consists only of logical operations on T bit stores. */
13404 bool
13405 sh_is_logical_t_store_expr (rtx op, rtx insn)
13406 {
13407 if (!logical_operator (op, SImode))
13408 return false;
13409
13410 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13411 int op_is_t_count = 0;
13412
13413 for (int i = 0; i < 2; ++i)
13414 {
13415 if (t_reg_operand (ops[i], VOIDmode)
13416 || negt_reg_operand (ops[i], VOIDmode))
13417 op_is_t_count++;
13418
13419 else
13420 {
13421 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13422 prev_nonnote_insn_bb);
13423 if (op_set.set_src == NULL_RTX)
13424 continue;
13425
13426 if (t_reg_operand (op_set.set_src, VOIDmode)
13427 || negt_reg_operand (op_set.set_src, VOIDmode)
13428 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13429 op_is_t_count++;
13430 }
13431 }
13432
13433 return op_is_t_count == 2;
13434 }
13435
13436 /* Given the operand that is extended in a sign/zero extend insn, and the
13437 insn, try to figure out whether the sign/zero extension can be replaced
13438 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13439 NULL_RTX otherwise. */
13440 rtx
13441 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13442 {
13443 if (REG_P (extended_op))
13444 extended_op = extended_op;
13445 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13446 extended_op = SUBREG_REG (extended_op);
13447 else
13448 return NULL_RTX;
13449
13450 /* Reg moves must be of the same mode. */
13451 if (GET_MODE (extended_op) != SImode)
13452 return NULL_RTX;
13453
13454 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13455 if (s.set_src == NULL_RTX)
13456 return NULL_RTX;
13457
13458 if (t_reg_operand (s.set_src, VOIDmode)
13459 || negt_reg_operand (s.set_src, VOIDmode))
13460 return extended_op;
13461
13462 /* If the zero extended reg was formed by a logical operation, check the
13463 operands of the logical operation. If both originated from T bit
13464 stores the zero extension can be eliminated. */
13465 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13466 return extended_op;
13467
13468 return NULL_RTX;
13469 }
13470
13471 #include "gt-sh.h"