]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
re PR target/54236 ([SH] Improve addc and subc insn utilization)
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23 #include <vector>
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "cfghooks.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "rtl.h"
33 #include "df.h"
34 #include "insn-config.h"
35 #include "alias.h"
36 #include "fold-const.h"
37 #include "stringpool.h"
38 #include "stor-layout.h"
39 #include "calls.h"
40 #include "varasm.h"
41 #include "flags.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "regs.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "dwarf2.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "langhooks.h"
60 #include "cfgrtl.h"
61 #include "cfganal.h"
62 #include "lcm.h"
63 #include "cfgbuild.h"
64 #include "cfgcleanup.h"
65 #include "intl.h"
66 #include "sched-int.h"
67 #include "params.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
70 #include "tree-eh.h"
71 #include "gimplify.h"
72 #include "cfgloop.h"
73 #include "alloc-pool.h"
74 #include "tm-constrs.h"
75 #include "opts.h"
76 #include "tree-pass.h"
77 #include "pass_manager.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
86
87 /* These are some macros to abstract register modes. */
88 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
89 && ((HOST_WIDE_INT)(VALUE)) <= 511)
90
91 #define CONST_OK_FOR_ADD(size) \
92 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
93 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
94 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
95 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
96
97 /* Used to simplify the logic below. Find the attributes wherever
98 they may be. */
99 #define SH_ATTRIBUTES(decl) \
100 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
101 : DECL_ATTRIBUTES (decl) \
102 ? (DECL_ATTRIBUTES (decl)) \
103 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
104
105 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
106 int current_function_interrupt;
107
108 tree sh_deferred_function_attributes;
109 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
110
111 /* Global variables for machine-dependent things. */
112
113 /* Which cpu are we scheduling for. */
114 enum processor_type sh_cpu;
115
116 /* Definitions used in ready queue reordering for first scheduling pass. */
117
118 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
119 static short *regmode_weight[2];
120
121 /* Total SFmode and SImode weights of scheduled insns. */
122 static int curr_regmode_pressure[2];
123
124 /* Number of r0 life regions. */
125 static int r0_life_regions;
126
127 /* If true, skip cycles for Q -> R movement. */
128 static int skip_cycles = 0;
129
130 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
131 and returned from sh_reorder2. */
132 static short cached_can_issue_more;
133
134 /* Unique number for UNSPEC_BBR pattern. */
135 static unsigned int unspec_bbr_uid = 1;
136
137 /* Provides the class number of the smallest class containing
138 reg number. */
139 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
140 {
141 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
174 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
175 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
176 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
177 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
178 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
179 GENERAL_REGS, GENERAL_REGS,
180 };
181
182 char sh_register_names[FIRST_PSEUDO_REGISTER] \
183 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
184
185 char sh_additional_register_names[ADDREGNAMES_SIZE] \
186 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
187 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
188
189 int assembler_dialect;
190
191 static bool shmedia_space_reserved_for_target_registers;
192
193 static void split_branches (rtx_insn *);
194 static int branch_dest (rtx);
195 static void print_slot (rtx_sequence *);
196 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
197 static void dump_table (rtx_insn *, rtx_insn *);
198 static bool broken_move (rtx_insn *);
199 static bool mova_p (rtx_insn *);
200 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
201 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
202 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
203 static void sh_reorg (void);
204 static void sh_option_override (void);
205 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
206 static rtx_insn *frame_insn (rtx);
207 static rtx push (int);
208 static void pop (int);
209 static void push_regs (HARD_REG_SET *, int);
210 static int calc_live_regs (HARD_REG_SET *);
211 static HOST_WIDE_INT rounded_frame_size (int);
212 static bool sh_frame_pointer_required (void);
213 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
214 static int sh_mode_needed (int, rtx_insn *);
215 static int sh_mode_after (int, int, rtx_insn *);
216 static int sh_mode_entry (int);
217 static int sh_mode_exit (int);
218 static int sh_mode_priority (int entity, int n);
219
220 static rtx mark_constant_pool_use (rtx);
221 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
222 int, bool *);
223 static tree sh_handle_resbank_handler_attribute (tree *, tree,
224 tree, int, bool *);
225 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
226 tree, int, bool *);
227 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
229 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
230 static void sh_print_operand (FILE *, rtx, int);
231 static void sh_print_operand_address (FILE *, rtx);
232 static bool sh_print_operand_punct_valid_p (unsigned char code);
233 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
234 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
235 static void sh_insert_attributes (tree, tree *);
236 static const char *sh_check_pch_target_flags (int);
237 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
238 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
239 static int sh_issue_rate (void);
240 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
241 static short find_set_regmode_weight (rtx, machine_mode);
242 static short find_insn_regmode_weight (rtx, machine_mode);
243 static void find_regmode_weight (basic_block, machine_mode);
244 static int find_r0_life_regions (basic_block);
245 static void sh_md_init_global (FILE *, int, int);
246 static void sh_md_finish_global (FILE *, int);
247 static int rank_for_reorder (const void *, const void *);
248 static void swap_reorder (rtx_insn **, int);
249 static void ready_reorder (rtx_insn **, int);
250 static bool high_pressure (machine_mode);
251 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
252 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
253 static void sh_md_init (FILE *, int, int);
254 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
255
256 static bool sh_function_ok_for_sibcall (tree, tree);
257
258 static bool sh_cannot_modify_jumps_p (void);
259 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
260 static reg_class_t sh_target_reg_class (void);
261 static bool sh_optimize_target_register_callee_saved (bool);
262 static bool sh_ms_bitfield_layout_p (const_tree);
263
264 static void sh_init_builtins (void);
265 static tree sh_builtin_decl (unsigned, bool);
266 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
267 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
268 HOST_WIDE_INT, tree);
269 static void sh_file_start (void);
270 static bool flow_dependent_p (rtx, rtx);
271 static void flow_dependent_p_1 (rtx, const_rtx, void *);
272 static int shiftcosts (rtx);
273 static int and_xor_ior_costs (rtx, int);
274 static int addsubcosts (rtx);
275 static int multcosts (rtx);
276 static bool unspec_caller_rtx_p (rtx);
277 static bool sh_cannot_copy_insn_p (rtx_insn *);
278 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
279 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
280 static int sh_pr_n_sets (void);
281 static rtx sh_allocate_initial_value (rtx);
282 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
283 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
284 machine_mode,
285 struct secondary_reload_info *);
286 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
287 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
288 static rtx sh_delegitimize_address (rtx);
289 static bool sh_cannot_substitute_mem_equiv_p (rtx);
290 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
291 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
292 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
293 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
294 static int scavenge_reg (HARD_REG_SET *s);
295 struct save_schedule_s;
296 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
297 struct save_schedule_s *, int);
298
299 static rtx sh_struct_value_rtx (tree, int);
300 static rtx sh_function_value (const_tree, const_tree, bool);
301 static bool sh_function_value_regno_p (const unsigned int);
302 static rtx sh_libcall_value (machine_mode, const_rtx);
303 static bool sh_return_in_memory (const_tree, const_tree);
304 static rtx sh_builtin_saveregs (void);
305 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
306 tree, int *, int);
307 static bool sh_strict_argument_naming (cumulative_args_t);
308 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
309 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
310 static tree sh_build_builtin_va_list (void);
311 static void sh_va_start (tree, rtx);
312 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
313 static bool sh_promote_prototypes (const_tree);
314 static machine_mode sh_promote_function_mode (const_tree type,
315 machine_mode,
316 int *punsignedp,
317 const_tree funtype,
318 int for_return);
319 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
320 const_tree, bool);
321 static bool sh_callee_copies (cumulative_args_t, machine_mode,
322 const_tree, bool);
323 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
324 tree, bool);
325 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
326 const_tree, bool);
327 static rtx sh_function_arg (cumulative_args_t, machine_mode,
328 const_tree, bool);
329 static bool sh_scalar_mode_supported_p (machine_mode);
330 static int sh_dwarf_calling_convention (const_tree);
331 static void sh_encode_section_info (tree, rtx, int);
332 static bool sh2a_function_vector_p (tree);
333 static void sh_trampoline_init (rtx, tree, rtx);
334 static rtx sh_trampoline_adjust_address (rtx);
335 static void sh_conditional_register_usage (void);
336 static bool sh_legitimate_constant_p (machine_mode, rtx);
337 static int mov_insn_size (machine_mode, bool);
338 static int mov_insn_alignment_mask (machine_mode, bool);
339 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
340 unsigned int,
341 enum by_pieces_operation,
342 bool);
343 static bool sequence_insn_p (rtx_insn *);
344 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
345 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
346 machine_mode, bool);
347 static bool sh_legitimate_combined_insn (rtx_insn* insn);
348
349 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
350
351 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
352 \f
353 static const struct attribute_spec sh_attribute_table[] =
354 {
355 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
356 affects_type_identity } */
357 { "interrupt_handler", 0, 0, true, false, false,
358 sh_handle_interrupt_handler_attribute, false },
359 { "sp_switch", 1, 1, true, false, false,
360 sh_handle_sp_switch_attribute, false },
361 { "trap_exit", 1, 1, true, false, false,
362 sh_handle_trap_exit_attribute, false },
363 { "renesas", 0, 0, false, true, false,
364 sh_handle_renesas_attribute, false },
365 { "trapa_handler", 0, 0, true, false, false,
366 sh_handle_interrupt_handler_attribute, false },
367 { "nosave_low_regs", 0, 0, true, false, false,
368 sh_handle_interrupt_handler_attribute, false },
369 { "resbank", 0, 0, true, false, false,
370 sh_handle_resbank_handler_attribute, false },
371 { "function_vector", 1, 1, true, false, false,
372 sh2a_handle_function_vector_handler_attribute, false },
373 { NULL, 0, 0, false, false, false, NULL, false }
374 };
375 \f
376 /* Initialize the GCC target structure. */
377 #undef TARGET_ATTRIBUTE_TABLE
378 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
379
380 /* The next two are used for debug info when compiling with -gdwarf. */
381 #undef TARGET_ASM_UNALIGNED_HI_OP
382 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
383 #undef TARGET_ASM_UNALIGNED_SI_OP
384 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
385
386 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
387 #undef TARGET_ASM_UNALIGNED_DI_OP
388 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
389 #undef TARGET_ASM_ALIGNED_DI_OP
390 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
391
392 #undef TARGET_OPTION_OVERRIDE
393 #define TARGET_OPTION_OVERRIDE sh_option_override
394
395 #undef TARGET_PRINT_OPERAND
396 #define TARGET_PRINT_OPERAND sh_print_operand
397 #undef TARGET_PRINT_OPERAND_ADDRESS
398 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
401 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
402 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
403
404 #undef TARGET_ASM_FUNCTION_EPILOGUE
405 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
406
407 #undef TARGET_ASM_OUTPUT_MI_THUNK
408 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
409
410 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
411 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
412 hook_bool_const_tree_hwi_hwi_const_tree_true
413
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START sh_file_start
416 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
417 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
418
419 #undef TARGET_REGISTER_MOVE_COST
420 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
421
422 #undef TARGET_INSERT_ATTRIBUTES
423 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
424
425 #undef TARGET_SCHED_ADJUST_COST
426 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
427
428 #undef TARGET_SCHED_ISSUE_RATE
429 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
430
431 /* The next 5 hooks have been implemented for reenabling sched1. With the
432 help of these macros we are limiting the movement of insns in sched1 to
433 reduce the register pressure. The overall idea is to keep count of SImode
434 and SFmode regs required by already scheduled insns. When these counts
435 cross some threshold values; give priority to insns that free registers.
436 The insn that frees registers is most likely to be the insn with lowest
437 LUID (original insn order); but such an insn might be there in the stalled
438 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
439 up to a max of 8 cycles so that such insns may move from Q -> R.
440
441 The description of the hooks are as below:
442
443 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
444 scheduler; it is called inside the sched_init function just after
445 find_insn_reg_weights function call. It is used to calculate the SImode
446 and SFmode weights of insns of basic blocks; much similar to what
447 find_insn_reg_weights does.
448 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
449
450 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
451 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
452 (Q)->(R).
453
454 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
455 high; reorder the ready queue so that the insn with lowest LUID will be
456 issued next.
457
458 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
459 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
460
461 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
462 can be returned from TARGET_SCHED_REORDER2.
463
464 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
465
466 #undef TARGET_SCHED_DFA_NEW_CYCLE
467 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
468
469 #undef TARGET_SCHED_INIT_GLOBAL
470 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
471
472 #undef TARGET_SCHED_FINISH_GLOBAL
473 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
474
475 #undef TARGET_SCHED_VARIABLE_ISSUE
476 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
477
478 #undef TARGET_SCHED_REORDER
479 #define TARGET_SCHED_REORDER sh_reorder
480
481 #undef TARGET_SCHED_REORDER2
482 #define TARGET_SCHED_REORDER2 sh_reorder2
483
484 #undef TARGET_SCHED_INIT
485 #define TARGET_SCHED_INIT sh_md_init
486
487 #undef TARGET_DELEGITIMIZE_ADDRESS
488 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
489
490 #undef TARGET_LEGITIMIZE_ADDRESS
491 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
492
493 #undef TARGET_CANNOT_MODIFY_JUMPS_P
494 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
495 #undef TARGET_CAN_FOLLOW_JUMP
496 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
497 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
498 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
499 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
500 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
501 sh_optimize_target_register_callee_saved
502
503 #undef TARGET_MS_BITFIELD_LAYOUT_P
504 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
505
506 #undef TARGET_INIT_BUILTINS
507 #define TARGET_INIT_BUILTINS sh_init_builtins
508 #undef TARGET_BUILTIN_DECL
509 #define TARGET_BUILTIN_DECL sh_builtin_decl
510 #undef TARGET_EXPAND_BUILTIN
511 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
512
513 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
514 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
515
516 #undef TARGET_CANNOT_COPY_INSN_P
517 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
518 #undef TARGET_RTX_COSTS
519 #define TARGET_RTX_COSTS sh_rtx_costs
520 #undef TARGET_ADDRESS_COST
521 #define TARGET_ADDRESS_COST sh_address_cost
522 #undef TARGET_ALLOCATE_INITIAL_VALUE
523 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
524
525 #undef TARGET_MACHINE_DEPENDENT_REORG
526 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
527
528 #undef TARGET_DWARF_REGISTER_SPAN
529 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
530
531 #ifdef HAVE_AS_TLS
532 #undef TARGET_HAVE_TLS
533 #define TARGET_HAVE_TLS true
534 #endif
535
536 #undef TARGET_PROMOTE_PROTOTYPES
537 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
538 #undef TARGET_PROMOTE_FUNCTION_MODE
539 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
540
541 #undef TARGET_FUNCTION_VALUE
542 #define TARGET_FUNCTION_VALUE sh_function_value
543 #undef TARGET_FUNCTION_VALUE_REGNO_P
544 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
545 #undef TARGET_LIBCALL_VALUE
546 #define TARGET_LIBCALL_VALUE sh_libcall_value
547 #undef TARGET_STRUCT_VALUE_RTX
548 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
549 #undef TARGET_RETURN_IN_MEMORY
550 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
551
552 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
553 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
554 #undef TARGET_SETUP_INCOMING_VARARGS
555 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
556 #undef TARGET_STRICT_ARGUMENT_NAMING
557 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
558 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
559 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
560 #undef TARGET_MUST_PASS_IN_STACK
561 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
562 #undef TARGET_PASS_BY_REFERENCE
563 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
564 #undef TARGET_CALLEE_COPIES
565 #define TARGET_CALLEE_COPIES sh_callee_copies
566 #undef TARGET_ARG_PARTIAL_BYTES
567 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
568 #undef TARGET_FUNCTION_ARG
569 #define TARGET_FUNCTION_ARG sh_function_arg
570 #undef TARGET_FUNCTION_ARG_ADVANCE
571 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
572
573 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
574 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
575
576 #undef TARGET_BUILD_BUILTIN_VA_LIST
577 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
578 #undef TARGET_EXPAND_BUILTIN_VA_START
579 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
580 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
581 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
582
583 #undef TARGET_SCALAR_MODE_SUPPORTED_P
584 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
585 #undef TARGET_VECTOR_MODE_SUPPORTED_P
586 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
587
588 #undef TARGET_CHECK_PCH_TARGET_FLAGS
589 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
590
591 #undef TARGET_DWARF_CALLING_CONVENTION
592 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
593
594 #undef TARGET_FRAME_POINTER_REQUIRED
595 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
596
597 #undef TARGET_MODE_EMIT
598 #define TARGET_MODE_EMIT sh_emit_mode_set
599
600 #undef TARGET_MODE_NEEDED
601 #define TARGET_MODE_NEEDED sh_mode_needed
602
603 #undef TARGET_MODE_AFTER
604 #define TARGET_MODE_AFTER sh_mode_after
605
606 #undef TARGET_MODE_ENTRY
607 #define TARGET_MODE_ENTRY sh_mode_entry
608
609 #undef TARGET_MODE_EXIT
610 #define TARGET_MODE_EXIT sh_mode_exit
611
612 #undef TARGET_MODE_PRIORITY
613 #define TARGET_MODE_PRIORITY sh_mode_priority
614
615 /* Return regmode weight for insn. */
616 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
617 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
618
619 /* Return current register pressure for regmode. */
620 #define CURR_REGMODE_PRESSURE(MODE)\
621 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
622
623 #undef TARGET_ENCODE_SECTION_INFO
624 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
625
626 #undef TARGET_LRA_P
627 #define TARGET_LRA_P sh_lra_p
628
629 #undef TARGET_SECONDARY_RELOAD
630 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
631
632 #undef TARGET_PREFERRED_RELOAD_CLASS
633 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
634
635 #undef TARGET_CONDITIONAL_REGISTER_USAGE
636 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
637
638 #undef TARGET_LEGITIMATE_ADDRESS_P
639 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
640
641 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
642 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
643
644 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
645 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
646 sh_legitimize_address_displacement
647
648 #undef TARGET_TRAMPOLINE_INIT
649 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
650 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
651 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
652
653 #undef TARGET_LEGITIMATE_CONSTANT_P
654 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
655
656 #undef TARGET_CANONICALIZE_COMPARISON
657 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
658
659 #undef TARGET_LEGITIMATE_COMBINED_INSN
660 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
661
662 #undef TARGET_FIXED_CONDITION_CODE_REGS
663 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
664
665 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
666 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
667 sh_use_by_pieces_infrastructure_p
668
669 /* Machine-specific symbol_ref flags. */
670 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
671
672 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
673 is used by optabs.c atomic op expansion code as well as in sync.md. */
674 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
675 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
676
677 struct gcc_target targetm = TARGET_INITIALIZER;
678 \f
679
680 /* Information on the currently selected atomic model.
681 This is initialized in sh_option_override. */
682 static sh_atomic_model selected_atomic_model_;
683
684 const sh_atomic_model&
685 selected_atomic_model (void)
686 {
687 return selected_atomic_model_;
688 }
689
690 static sh_atomic_model
691 parse_validate_atomic_model_option (const char* str)
692 {
693 const char* model_names[sh_atomic_model::num_models];
694 model_names[sh_atomic_model::none] = "none";
695 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
696 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
697 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
698 model_names[sh_atomic_model::soft_imask] = "soft-imask";
699
700 const char* model_cdef_names[sh_atomic_model::num_models];
701 model_cdef_names[sh_atomic_model::none] = "NONE";
702 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
703 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
704 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
705 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
706
707 sh_atomic_model ret;
708 ret.type = sh_atomic_model::none;
709 ret.name = model_names[sh_atomic_model::none];
710 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
711 ret.strict = false;
712 ret.tcb_gbr_offset = -1;
713
714 /* Handle empty string as 'none'. */
715 if (str == NULL || *str == '\0')
716 return ret;
717
718 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
719
720 std::vector<std::string> tokens;
721 for (std::stringstream ss (str); ss.good (); )
722 {
723 tokens.push_back (std::string ());
724 std::getline (ss, tokens.back (), ',');
725 }
726
727 if (tokens.empty ())
728 err_ret ("invalid atomic model option");
729
730 /* The first token must be the atomic model name. */
731 {
732 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
733 if (tokens.front () == model_names[i])
734 {
735 ret.type = (sh_atomic_model::enum_type)i;
736 ret.name = model_names[i];
737 ret.cdef_name = model_cdef_names[i];
738 goto got_mode_name;
739 }
740
741 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
742 got_mode_name:;
743 }
744
745 /* Go through the remaining tokens. */
746 for (size_t i = 1; i < tokens.size (); ++i)
747 {
748 if (tokens[i] == "strict")
749 ret.strict = true;
750 else if (tokens[i].find ("gbr-offset=") == 0)
751 {
752 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
753 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
754 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
755 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
756 "option", offset_str.c_str ());
757 }
758 else
759 err_ret ("unknown parameter \"%s\" in atomic model option",
760 tokens[i].c_str ());
761 }
762
763 /* Check that the selection makes sense. */
764 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
765 err_ret ("atomic operations are not supported on SHmedia");
766
767 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
768 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
769 ret.name);
770
771 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
772 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
773
774 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
775 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
776
777 if (ret.type == sh_atomic_model::soft_tcb
778 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
779 || (ret.tcb_gbr_offset & 3) != 0))
780 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
781 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
782 ret.name);
783
784 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
785 err_ret ("cannot use atomic model %s in user mode", ret.name);
786
787 return ret;
788
789 #undef err_ret
790 }
791
792 /* Register SH specific RTL passes. */
793 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
794 const char* name);
795 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
796 const char* name);
797 static void
798 register_sh_passes (void)
799 {
800 if (!TARGET_SH1)
801 return;
802
803 /* Running the sh_treg_combine pass after ce1 generates better code when
804 comparisons are combined and reg-reg moves are introduced, because
805 reg-reg moves will be eliminated afterwards. However, there are quite
806 some cases where combine will be unable to fold comparison related insns,
807 thus for now don't do it.
808 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
809 PASS_POS_INSERT_AFTER, "ce1", 1);
810 */
811
812 /* Run sh_treg_combine pass after combine but before register allocation. */
813 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
814 PASS_POS_INSERT_AFTER, "split1", 1);
815
816 /* Run sh_treg_combine pass after register allocation and basic block
817 reordering as this sometimes creates new opportunities. */
818 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
819 PASS_POS_INSERT_AFTER, "split4", 1);
820
821 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
822 is known after a conditional branch.
823 This must be done after basic blocks and branch conditions have
824 stabilized and won't be changed by further passes. */
825 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
826 PASS_POS_INSERT_BEFORE, "sched2", 1);
827 }
828
829 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
830 various options, and do some machine dependent initialization. */
831 static void
832 sh_option_override (void)
833 {
834 int regno;
835
836 SUBTARGET_OVERRIDE_OPTIONS;
837 if (optimize > 1 && !optimize_size)
838 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
839
840 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
841 TARGET_CBRANCHDI4 = 1;
842 TARGET_CMPEQDI_T = 0;
843
844 sh_cpu = PROCESSOR_SH1;
845 assembler_dialect = 0;
846 if (TARGET_SH2)
847 sh_cpu = PROCESSOR_SH2;
848 if (TARGET_SH2E)
849 sh_cpu = PROCESSOR_SH2E;
850 if (TARGET_SH2A)
851 sh_cpu = PROCESSOR_SH2A;
852 if (TARGET_SH3)
853 sh_cpu = PROCESSOR_SH3;
854 if (TARGET_SH3E)
855 sh_cpu = PROCESSOR_SH3E;
856 if (TARGET_SH4)
857 {
858 assembler_dialect = 1;
859 sh_cpu = PROCESSOR_SH4;
860 }
861 if (TARGET_SH4A)
862 {
863 assembler_dialect = 1;
864 sh_cpu = PROCESSOR_SH4A;
865 }
866 if (TARGET_SH5)
867 {
868 sh_cpu = PROCESSOR_SH5;
869 target_flags |= MASK_ALIGN_DOUBLE;
870 if (TARGET_SHMEDIA_FPU)
871 target_flags |= MASK_FMOVD;
872 if (TARGET_SHMEDIA)
873 {
874 /* There are no delay slots on SHmedia. */
875 flag_delayed_branch = 0;
876 /* Relaxation isn't yet supported for SHmedia */
877 target_flags &= ~MASK_RELAX;
878 /* After reload, if conversion does little good but can cause
879 ICEs:
880 - find_if_block doesn't do anything for SH because we don't
881 have conditional execution patterns. (We use conditional
882 move patterns, which are handled differently, and only
883 before reload).
884 - find_cond_trap doesn't do anything for the SH because we
885 don't have conditional traps.
886 - find_if_case_1 uses redirect_edge_and_branch_force in
887 the only path that does an optimization, and this causes
888 an ICE when branch targets are in registers.
889 - find_if_case_2 doesn't do anything for the SHmedia after
890 reload except when it can redirect a tablejump - and
891 that's rather rare. */
892 flag_if_conversion2 = 0;
893 if (! strcmp (sh_div_str, "call"))
894 sh_div_strategy = SH_DIV_CALL;
895 else if (! strcmp (sh_div_str, "call2"))
896 sh_div_strategy = SH_DIV_CALL2;
897 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
898 sh_div_strategy = SH_DIV_FP;
899 else if (! strcmp (sh_div_str, "inv"))
900 sh_div_strategy = SH_DIV_INV;
901 else if (! strcmp (sh_div_str, "inv:minlat"))
902 sh_div_strategy = SH_DIV_INV_MINLAT;
903 else if (! strcmp (sh_div_str, "inv20u"))
904 sh_div_strategy = SH_DIV_INV20U;
905 else if (! strcmp (sh_div_str, "inv20l"))
906 sh_div_strategy = SH_DIV_INV20L;
907 else if (! strcmp (sh_div_str, "inv:call2"))
908 sh_div_strategy = SH_DIV_INV_CALL2;
909 else if (! strcmp (sh_div_str, "inv:call"))
910 sh_div_strategy = SH_DIV_INV_CALL;
911 else if (! strcmp (sh_div_str, "inv:fp"))
912 {
913 if (TARGET_FPU_ANY)
914 sh_div_strategy = SH_DIV_INV_FP;
915 else
916 sh_div_strategy = SH_DIV_INV;
917 }
918 TARGET_CBRANCHDI4 = 0;
919 /* Assembler CFI isn't yet fully supported for SHmedia. */
920 flag_dwarf2_cfi_asm = 0;
921 }
922 }
923 else
924 {
925 /* Only the sh64-elf assembler fully supports .quad properly. */
926 targetm.asm_out.aligned_op.di = NULL;
927 targetm.asm_out.unaligned_op.di = NULL;
928 }
929
930 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
931 Disable it for everything else. */
932 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
933 TARGET_USERMODE = false;
934
935 if (TARGET_SH1)
936 {
937 if (! strcmp (sh_div_str, "call-div1"))
938 sh_div_strategy = SH_DIV_CALL_DIV1;
939 else if (! strcmp (sh_div_str, "call-fp")
940 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
941 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
942 sh_div_strategy = SH_DIV_CALL_FP;
943 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
944 sh_div_strategy = SH_DIV_CALL_TABLE;
945 else
946 /* Pick one that makes most sense for the target in general.
947 It is not much good to use different functions depending
948 on -Os, since then we'll end up with two different functions
949 when some of the code is compiled for size, and some for
950 speed. */
951
952 /* SH4 tends to emphasize speed. */
953 if (TARGET_HARD_SH4)
954 sh_div_strategy = SH_DIV_CALL_TABLE;
955 /* These have their own way of doing things. */
956 else if (TARGET_SH2A)
957 sh_div_strategy = SH_DIV_INTRINSIC;
958 /* ??? Should we use the integer SHmedia function instead? */
959 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
960 sh_div_strategy = SH_DIV_CALL_FP;
961 /* SH1 .. SH3 cores often go into small-footprint systems, so
962 default to the smallest implementation available. */
963 else
964 sh_div_strategy = SH_DIV_CALL_DIV1;
965 }
966 if (!TARGET_SH1)
967 TARGET_PRETEND_CMOVE = 0;
968 if (sh_divsi3_libfunc[0])
969 ; /* User supplied - leave it alone. */
970 else if (TARGET_DIVIDE_CALL_FP)
971 sh_divsi3_libfunc = "__sdivsi3_i4";
972 else if (TARGET_DIVIDE_CALL_TABLE)
973 sh_divsi3_libfunc = "__sdivsi3_i4i";
974 else if (TARGET_SH5)
975 sh_divsi3_libfunc = "__sdivsi3_1";
976 else
977 sh_divsi3_libfunc = "__sdivsi3";
978
979 if (sh_branch_cost == -1)
980 {
981 /* The SH1 does not have delay slots, hence we get a pipeline stall
982 at every branch. The SH4 is superscalar, so the single delay slot
983 is not sufficient to keep both pipelines filled.
984 In any case, set the default branch cost to '2', as it results in
985 slightly overall smaller code and also enables some if conversions
986 that are required for matching special T bit related insns. */
987 sh_branch_cost = 2;
988 }
989
990 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
991 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
992 TARGET_ZDCBRANCH = 1;
993
994 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
995 if (! VALID_REGISTER_P (regno))
996 sh_register_names[regno][0] = '\0';
997
998 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
999 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1000 sh_additional_register_names[regno][0] = '\0';
1001
1002 if ((flag_pic && ! TARGET_PREFERGOT)
1003 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1004 flag_no_function_cse = 1;
1005
1006 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1007 {
1008 /* Never run scheduling before reload, since that can
1009 break global alloc, and generates slower code anyway due
1010 to the pressure on R0. */
1011 /* Enable sched1 for SH4 if the user explicitly requests.
1012 When sched1 is enabled, the ready queue will be reordered by
1013 the target hooks if pressure is high. We can not do this for
1014 PIC, SH3 and lower as they give spill failures for R0. */
1015 if (!TARGET_HARD_SH4 || flag_pic)
1016 flag_schedule_insns = 0;
1017 /* ??? Current exception handling places basic block boundaries
1018 after call_insns. It causes the high pressure on R0 and gives
1019 spill failures for R0 in reload. See PR 22553 and the thread
1020 on gcc-patches
1021 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1022 else if (flag_exceptions)
1023 {
1024 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1025 warning (0, "ignoring -fschedule-insns because of exception "
1026 "handling bug");
1027 flag_schedule_insns = 0;
1028 }
1029 else if (flag_schedule_insns
1030 && !global_options_set.x_flag_schedule_insns)
1031 flag_schedule_insns = 0;
1032 }
1033
1034 /* Unwind info is not correct around the CFG unless either a frame
1035 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1036 unwind info generation to be aware of the CFG and propagating states
1037 around edges. */
1038 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1039 || flag_exceptions || flag_non_call_exceptions)
1040 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1041 {
1042 warning (0, "unwind tables currently require either a frame pointer "
1043 "or -maccumulate-outgoing-args for correctness");
1044 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1045 }
1046
1047 /* Adjust loop, jump and function alignment values (in bytes), if those
1048 were not specified by the user using -falign-loops, -falign-jumps
1049 and -falign-functions options.
1050 32 bit alignment is better for speed, because instructions can be
1051 fetched as a pair from a longword boundary. For size use 16 bit
1052 alignment to get more compact code.
1053 Aligning all jumps increases the code size, even if it might
1054 result in slightly faster code. Thus, it is set to the smallest
1055 alignment possible if not specified by the user. */
1056 if (align_loops == 0)
1057 {
1058 if (TARGET_SH5)
1059 align_loops = 8;
1060 else
1061 align_loops = optimize_size ? 2 : 4;
1062 }
1063
1064 if (align_jumps == 0)
1065 {
1066 if (TARGET_SHMEDIA)
1067 align_jumps = 1 << CACHE_LOG;
1068 else
1069 align_jumps = 2;
1070 }
1071 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1072 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1073
1074 if (align_functions == 0)
1075 {
1076 if (TARGET_SHMEDIA)
1077 align_functions = optimize_size
1078 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1079 else
1080 align_functions = optimize_size ? 2 : 4;
1081 }
1082
1083 /* The linker relaxation code breaks when a function contains
1084 alignments that are larger than that at the start of a
1085 compilation unit. */
1086 if (TARGET_RELAX)
1087 {
1088 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1089
1090 /* Also take possible .long constants / mova tables into account. */
1091 if (min_align < 4)
1092 min_align = 4;
1093 if (align_functions < min_align)
1094 align_functions = min_align;
1095 }
1096
1097 if (flag_unsafe_math_optimizations)
1098 {
1099 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1100 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1101 TARGET_FSCA = 1;
1102
1103 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1104 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1105 TARGET_FSRRA = 1;
1106 }
1107
1108 /* Allow fsrra insn only if -funsafe-math-optimizations and
1109 -ffinite-math-only is enabled. */
1110 TARGET_FSRRA = TARGET_FSRRA
1111 && flag_unsafe_math_optimizations
1112 && flag_finite_math_only;
1113
1114 /* If the -mieee option was not explicitly set by the user, turn it on
1115 unless -ffinite-math-only was specified. See also PR 33135. */
1116 if (! global_options_set.x_TARGET_IEEE)
1117 TARGET_IEEE = ! flag_finite_math_only;
1118
1119 if (sh_fixed_range_str)
1120 sh_fix_range (sh_fixed_range_str);
1121
1122 /* This target defaults to strict volatile bitfields. */
1123 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1124 flag_strict_volatile_bitfields = 1;
1125
1126 /* Parse atomic model option and make sure it is valid for the current
1127 target CPU. */
1128 selected_atomic_model_
1129 = parse_validate_atomic_model_option (sh_atomic_model_str);
1130
1131 register_sh_passes ();
1132 }
1133 \f
1134 /* Print the operand address in x to the stream. */
1135 static void
1136 sh_print_operand_address (FILE *stream, rtx x)
1137 {
1138 switch (GET_CODE (x))
1139 {
1140 case REG:
1141 case SUBREG:
1142 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1143 break;
1144
1145 case PLUS:
1146 {
1147 rtx base = XEXP (x, 0);
1148 rtx index = XEXP (x, 1);
1149
1150 switch (GET_CODE (index))
1151 {
1152 case CONST_INT:
1153 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1154 reg_names[true_regnum (base)]);
1155 break;
1156
1157 case REG:
1158 case SUBREG:
1159 {
1160 int base_num = true_regnum (base);
1161 int index_num = true_regnum (index);
1162
1163 fprintf (stream, "@(r0,%s)",
1164 reg_names[MAX (base_num, index_num)]);
1165 break;
1166 }
1167
1168 default:
1169 gcc_unreachable ();
1170 }
1171 }
1172 break;
1173
1174 case PRE_DEC:
1175 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1176 break;
1177
1178 case POST_INC:
1179 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1180 break;
1181
1182 default:
1183 x = mark_constant_pool_use (x);
1184 output_addr_const (stream, x);
1185 break;
1186 }
1187 }
1188
1189 /* Print operand x (an rtx) in assembler syntax to file stream
1190 according to modifier code.
1191
1192 '.' print a .s if insn needs delay slot
1193 ',' print LOCAL_LABEL_PREFIX
1194 '@' print trap, rte or rts depending upon pragma interruptness
1195 '#' output a nop if there is nothing to put in the delay slot
1196 ''' print likelihood suffix (/u for unlikely).
1197 '>' print branch target if -fverbose-asm
1198 'O' print a constant without the #
1199 'R' print the LSW of a dp value - changes if in little endian
1200 'S' print the MSW of a dp value - changes if in little endian
1201 'T' print the next word of a dp value - same as 'R' in big endian mode.
1202 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1203 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1204 'N' print 'r63' if the operand is (const_int 0).
1205 'd' print a V2SF reg as dN instead of fpN.
1206 'm' print a pair `base,offset' or `base,index', for LD and ST.
1207 'U' Likewise for {LD,ST}{HI,LO}.
1208 'V' print the position of a single bit set.
1209 'W' print the position of a single bit cleared.
1210 't' print a memory address which is a register.
1211 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1212 'o' output an operator. */
1213 static void
1214 sh_print_operand (FILE *stream, rtx x, int code)
1215 {
1216 int regno;
1217 machine_mode mode;
1218
1219 switch (code)
1220 {
1221 tree trapa_attr;
1222
1223 case '.':
1224 if (final_sequence
1225 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1226 && get_attr_length (final_sequence->insn (1)))
1227 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1228 break;
1229 case ',':
1230 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1231 break;
1232 case '@':
1233 trapa_attr = lookup_attribute ("trap_exit",
1234 DECL_ATTRIBUTES (current_function_decl));
1235 if (trapa_attr)
1236 fprintf (stream, "trapa #%ld",
1237 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1238 else if (sh_cfun_interrupt_handler_p ())
1239 {
1240 if (sh_cfun_resbank_handler_p ())
1241 fprintf (stream, "resbank\n");
1242 fprintf (stream, "rte");
1243 }
1244 else
1245 fprintf (stream, "rts");
1246 break;
1247 case '#':
1248 /* Output a nop if there's nothing in the delay slot. */
1249 if (dbr_sequence_length () == 0)
1250 fprintf (stream, "\n\tnop");
1251 break;
1252 case '\'':
1253 {
1254 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1255
1256 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1257 fputs ("/u", stream);
1258 break;
1259 }
1260 case '>':
1261 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1262 {
1263 fputs ("\t! target: ", stream);
1264 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1265 }
1266 break;
1267 case 'O':
1268 x = mark_constant_pool_use (x);
1269 output_addr_const (stream, x);
1270 break;
1271 /* N.B.: %R / %S / %T adjust memory addresses by four.
1272 For SHMEDIA, that means they can be used to access the first and
1273 second 32 bit part of a 64 bit (or larger) value that
1274 might be held in floating point registers or memory.
1275 While they can be used to access 64 bit parts of a larger value
1276 held in general purpose registers, that won't work with memory -
1277 neither for fp registers, since the frxx names are used. */
1278 case 'R':
1279 if (REG_P (x) || GET_CODE (x) == SUBREG)
1280 {
1281 regno = true_regnum (x);
1282 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1283 fputs (reg_names[regno], (stream));
1284 }
1285 else if (MEM_P (x))
1286 {
1287 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1288 sh_print_operand_address (stream, XEXP (x, 0));
1289 }
1290 else
1291 {
1292 rtx sub = NULL_RTX;
1293
1294 mode = GET_MODE (x);
1295 if (mode == VOIDmode)
1296 mode = DImode;
1297 if (GET_MODE_SIZE (mode) >= 8)
1298 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1299 if (sub)
1300 sh_print_operand (stream, sub, 0);
1301 else
1302 output_operand_lossage ("invalid operand to %%R");
1303 }
1304 break;
1305 case 'S':
1306 if (REG_P (x) || GET_CODE (x) == SUBREG)
1307 {
1308 regno = true_regnum (x);
1309 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1310 fputs (reg_names[regno], (stream));
1311 }
1312 else if (MEM_P (x))
1313 {
1314 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1315 sh_print_operand_address (stream, XEXP (x, 0));
1316 }
1317 else
1318 {
1319 rtx sub = NULL_RTX;
1320
1321 mode = GET_MODE (x);
1322 if (mode == VOIDmode)
1323 mode = DImode;
1324 if (GET_MODE_SIZE (mode) >= 8)
1325 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1326 if (sub)
1327 sh_print_operand (stream, sub, 0);
1328 else
1329 output_operand_lossage ("invalid operand to %%S");
1330 }
1331 break;
1332 case 'T':
1333 /* Next word of a double. */
1334 switch (GET_CODE (x))
1335 {
1336 case REG:
1337 fputs (reg_names[REGNO (x) + 1], (stream));
1338 break;
1339 case MEM:
1340 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1341 && GET_CODE (XEXP (x, 0)) != POST_INC)
1342 x = adjust_address (x, SImode, 4);
1343 sh_print_operand_address (stream, XEXP (x, 0));
1344 break;
1345 default:
1346 break;
1347 }
1348 break;
1349
1350 case 't':
1351 gcc_assert (MEM_P (x));
1352 x = XEXP (x, 0);
1353 switch (GET_CODE (x))
1354 {
1355 case REG:
1356 case SUBREG:
1357 sh_print_operand (stream, x, 0);
1358 break;
1359 default:
1360 break;
1361 }
1362 break;
1363
1364 case 'o':
1365 switch (GET_CODE (x))
1366 {
1367 case PLUS: fputs ("add", stream); break;
1368 case MINUS: fputs ("sub", stream); break;
1369 case MULT: fputs ("mul", stream); break;
1370 case DIV: fputs ("div", stream); break;
1371 case EQ: fputs ("eq", stream); break;
1372 case NE: fputs ("ne", stream); break;
1373 case GT: case LT: fputs ("gt", stream); break;
1374 case GE: case LE: fputs ("ge", stream); break;
1375 case GTU: case LTU: fputs ("gtu", stream); break;
1376 case GEU: case LEU: fputs ("geu", stream); break;
1377 default:
1378 break;
1379 }
1380 break;
1381 case 'M':
1382 if (TARGET_SHMEDIA)
1383 {
1384 if (MEM_P (x)
1385 && GET_CODE (XEXP (x, 0)) == PLUS
1386 && (REG_P (XEXP (XEXP (x, 0), 1))
1387 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1388 fputc ('x', stream);
1389 }
1390 else
1391 {
1392 if (MEM_P (x))
1393 {
1394 switch (GET_MODE (x))
1395 {
1396 case QImode: fputs (".b", stream); break;
1397 case HImode: fputs (".w", stream); break;
1398 case SImode: fputs (".l", stream); break;
1399 case SFmode: fputs (".s", stream); break;
1400 case DFmode: fputs (".d", stream); break;
1401 default: gcc_unreachable ();
1402 }
1403 }
1404 }
1405 break;
1406
1407 case 'm':
1408 gcc_assert (MEM_P (x));
1409 x = XEXP (x, 0);
1410 /* Fall through. */
1411 case 'U':
1412 switch (GET_CODE (x))
1413 {
1414 case REG:
1415 case SUBREG:
1416 sh_print_operand (stream, x, 0);
1417 fputs (", 0", stream);
1418 break;
1419
1420 case PLUS:
1421 sh_print_operand (stream, XEXP (x, 0), 0);
1422 fputs (", ", stream);
1423 sh_print_operand (stream, XEXP (x, 1), 0);
1424 break;
1425
1426 default:
1427 gcc_unreachable ();
1428 }
1429 break;
1430
1431 case 'V':
1432 {
1433 int num = exact_log2 (INTVAL (x));
1434 gcc_assert (num >= 0);
1435 fprintf (stream, "#%d", num);
1436 }
1437 break;
1438
1439 case 'W':
1440 {
1441 int num = exact_log2 (~INTVAL (x));
1442 gcc_assert (num >= 0);
1443 fprintf (stream, "#%d", num);
1444 }
1445 break;
1446
1447 case 'd':
1448 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1449
1450 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1451 break;
1452
1453 case 'N':
1454 if (x == CONST0_RTX (GET_MODE (x)))
1455 {
1456 fprintf ((stream), "r63");
1457 break;
1458 }
1459 goto default_output;
1460 case 'u':
1461 if (CONST_INT_P (x))
1462 {
1463 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1464 break;
1465 }
1466 /* Fall through. */
1467
1468 default_output:
1469 default:
1470 regno = 0;
1471 mode = GET_MODE (x);
1472
1473 switch (GET_CODE (x))
1474 {
1475 case TRUNCATE:
1476 {
1477 rtx inner = XEXP (x, 0);
1478 int offset = 0;
1479 machine_mode inner_mode;
1480
1481 /* We might see SUBREGs with vector mode registers inside. */
1482 if (GET_CODE (inner) == SUBREG
1483 && (GET_MODE_SIZE (GET_MODE (inner))
1484 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1485 && subreg_lowpart_p (inner))
1486 inner = SUBREG_REG (inner);
1487 if (CONST_INT_P (inner))
1488 {
1489 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1490 goto default_output;
1491 }
1492 inner_mode = GET_MODE (inner);
1493 if (GET_CODE (inner) == SUBREG
1494 && (GET_MODE_SIZE (GET_MODE (inner))
1495 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1496 && REG_P (SUBREG_REG (inner)))
1497 {
1498 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1499 GET_MODE (SUBREG_REG (inner)),
1500 SUBREG_BYTE (inner),
1501 GET_MODE (inner));
1502 inner = SUBREG_REG (inner);
1503 }
1504 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1505 abort ();
1506 /* Floating point register pairs are always big endian;
1507 general purpose registers are 64 bit wide. */
1508 regno = REGNO (inner);
1509 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1510 - HARD_REGNO_NREGS (regno, mode))
1511 + offset;
1512 x = inner;
1513 goto reg;
1514 }
1515 case SIGN_EXTEND:
1516 x = XEXP (x, 0);
1517 goto reg;
1518 /* FIXME: We need this on SHmedia32 because reload generates
1519 some sign-extended HI or QI loads into DImode registers
1520 but, because Pmode is SImode, the address ends up with a
1521 subreg:SI of the DImode register. Maybe reload should be
1522 fixed so as to apply alter_subreg to such loads? */
1523 case IF_THEN_ELSE:
1524 gcc_assert (trapping_target_operand (x, VOIDmode));
1525 x = XEXP (XEXP (x, 2), 0);
1526 goto default_output;
1527 case SUBREG:
1528 gcc_assert (SUBREG_BYTE (x) == 0
1529 && REG_P (SUBREG_REG (x)));
1530
1531 x = SUBREG_REG (x);
1532 /* Fall through. */
1533
1534 reg:
1535 case REG:
1536 regno += REGNO (x);
1537 if (FP_REGISTER_P (regno)
1538 && mode == V16SFmode)
1539 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1540 else if (FP_REGISTER_P (REGNO (x))
1541 && mode == V4SFmode)
1542 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1543 else if (REG_P (x)
1544 && mode == V2SFmode)
1545 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1546 else if (FP_REGISTER_P (REGNO (x))
1547 && GET_MODE_SIZE (mode) > 4)
1548 fprintf ((stream), "d%s", reg_names[regno] + 1);
1549 else
1550 fputs (reg_names[regno], (stream));
1551 break;
1552
1553 case MEM:
1554 output_address (XEXP (x, 0));
1555 break;
1556
1557 default:
1558 if (TARGET_SH1)
1559 fputc ('#', stream);
1560 output_addr_const (stream, x);
1561 break;
1562 }
1563 break;
1564 }
1565 }
1566
1567 static bool
1568 sh_print_operand_punct_valid_p (unsigned char code)
1569 {
1570 return (code == '.' || code == '#' || code == '@' || code == ','
1571 || code == '$' || code == '\'' || code == '>');
1572 }
1573
1574 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1575 static bool
1576 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1577 {
1578 if (GET_CODE (x) == UNSPEC)
1579 {
1580 switch (XINT (x, 1))
1581 {
1582 case UNSPEC_DATALABEL:
1583 fputs ("datalabel ", file);
1584 output_addr_const (file, XVECEXP (x, 0, 0));
1585 break;
1586 case UNSPEC_PIC:
1587 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1588 output_addr_const (file, XVECEXP (x, 0, 0));
1589 break;
1590 case UNSPEC_GOT:
1591 output_addr_const (file, XVECEXP (x, 0, 0));
1592 fputs ("@GOT", file);
1593 break;
1594 case UNSPEC_GOTOFF:
1595 output_addr_const (file, XVECEXP (x, 0, 0));
1596 fputs ("@GOTOFF", file);
1597 break;
1598 case UNSPEC_PLT:
1599 output_addr_const (file, XVECEXP (x, 0, 0));
1600 fputs ("@PLT", file);
1601 break;
1602 case UNSPEC_GOTPLT:
1603 output_addr_const (file, XVECEXP (x, 0, 0));
1604 fputs ("@GOTPLT", file);
1605 break;
1606 case UNSPEC_PCREL:
1607 output_addr_const (file, XVECEXP (x, 0, 0));
1608 fputs ("@PCREL", file);
1609 break;
1610 case UNSPEC_DTPOFF:
1611 output_addr_const (file, XVECEXP (x, 0, 0));
1612 fputs ("@DTPOFF", file);
1613 break;
1614 case UNSPEC_GOTTPOFF:
1615 output_addr_const (file, XVECEXP (x, 0, 0));
1616 fputs ("@GOTTPOFF", file);
1617 break;
1618 case UNSPEC_TPOFF:
1619 output_addr_const (file, XVECEXP (x, 0, 0));
1620 fputs ("@TPOFF", file);
1621 break;
1622 case UNSPEC_CALLER:
1623 {
1624 char name[32];
1625 /* LPCS stands for Label for PIC Call Site. */
1626 targetm.asm_out.generate_internal_label (name, "LPCS",
1627 INTVAL (XVECEXP (x, 0, 0)));
1628 assemble_name (file, name);
1629 }
1630 break;
1631 case UNSPEC_EXTRACT_S16:
1632 case UNSPEC_EXTRACT_U16:
1633 {
1634 rtx val, shift;
1635
1636 val = XVECEXP (x, 0, 0);
1637 shift = XVECEXP (x, 0, 1);
1638 fputc ('(', file);
1639 if (shift != const0_rtx)
1640 fputc ('(', file);
1641 if (GET_CODE (val) == CONST
1642 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1643 {
1644 fputc ('(', file);
1645 output_addr_const (file, val);
1646 fputc (')', file);
1647 }
1648 else
1649 output_addr_const (file, val);
1650 if (shift != const0_rtx)
1651 {
1652 fputs (" >> ", file);
1653 output_addr_const (file, shift);
1654 fputc (')', file);
1655 }
1656 fputs (" & 65535)", file);
1657 }
1658 break;
1659 case UNSPEC_SYMOFF:
1660 output_addr_const (file, XVECEXP (x, 0, 0));
1661 fputc ('-', file);
1662 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1663 {
1664 fputc ('(', file);
1665 output_addr_const (file, XVECEXP (x, 0, 1));
1666 fputc (')', file);
1667 }
1668 else
1669 output_addr_const (file, XVECEXP (x, 0, 1));
1670 break;
1671 case UNSPEC_PCREL_SYMOFF:
1672 output_addr_const (file, XVECEXP (x, 0, 0));
1673 fputs ("-(", file);
1674 output_addr_const (file, XVECEXP (x, 0, 1));
1675 fputs ("-.)", file);
1676 break;
1677 default:
1678 return false;
1679 }
1680 return true;
1681 }
1682 else
1683 return false;
1684 }
1685 \f
1686 /* Encode symbol attributes of a SYMBOL_REF into its
1687 SYMBOL_REF_FLAGS. */
1688 static void
1689 sh_encode_section_info (tree decl, rtx rtl, int first)
1690 {
1691 default_encode_section_info (decl, rtl, first);
1692
1693 if (TREE_CODE (decl) == FUNCTION_DECL
1694 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1695 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1696 }
1697
1698 /* Prepare operands for a move define_expand; specifically, one of the
1699 operands must be in a register. */
1700 void
1701 prepare_move_operands (rtx operands[], machine_mode mode)
1702 {
1703 if ((mode == SImode || mode == DImode)
1704 && flag_pic
1705 && ! ((mode == Pmode || mode == ptr_mode)
1706 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1707 {
1708 rtx temp;
1709 if (SYMBOLIC_CONST_P (operands[1]))
1710 {
1711 if (MEM_P (operands[0]))
1712 operands[1] = force_reg (Pmode, operands[1]);
1713 else if (TARGET_SHMEDIA
1714 && GET_CODE (operands[1]) == LABEL_REF
1715 && target_reg_operand (operands[0], mode))
1716 /* It's ok. */;
1717 else
1718 {
1719 temp = (!can_create_pseudo_p ()
1720 ? operands[0]
1721 : gen_reg_rtx (Pmode));
1722 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1723 }
1724 }
1725 else if (GET_CODE (operands[1]) == CONST
1726 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1727 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1728 {
1729 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1730 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1731 mode, temp);
1732 operands[1] = expand_binop (mode, add_optab, temp,
1733 XEXP (XEXP (operands[1], 0), 1),
1734 (!can_create_pseudo_p ()
1735 ? temp
1736 : gen_reg_rtx (Pmode)),
1737 0, OPTAB_LIB_WIDEN);
1738 }
1739 }
1740
1741 if (! reload_in_progress && ! reload_completed)
1742 {
1743 /* Copy the source to a register if both operands aren't registers. */
1744 if (! register_operand (operands[0], mode)
1745 && ! sh_register_operand (operands[1], mode))
1746 operands[1] = copy_to_mode_reg (mode, operands[1]);
1747
1748 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1749 {
1750 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1751 except that we can't use that function because it is static. */
1752 rtx new_rtx = change_address (operands[0], mode, 0);
1753 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1754 operands[0] = new_rtx;
1755 }
1756
1757 /* This case can happen while generating code to move the result
1758 of a library call to the target. Reject `st r0,@(rX,rY)' because
1759 reload will fail to find a spill register for rX, since r0 is already
1760 being used for the source. */
1761 else if (TARGET_SH1
1762 && refers_to_regno_p (R0_REG, operands[1])
1763 && MEM_P (operands[0])
1764 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1765 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1766 operands[1] = copy_to_mode_reg (mode, operands[1]);
1767
1768 /* When the displacement addressing is used, RA will assign r0 to
1769 the pseudo register operand for the QI/HImode load/store.
1770 This tends to make a long live range for R0 and might cause
1771 anomalous register spills in some case with LRA. See PR
1772 target/55212.
1773 We split possible load/store to two move insns via r0 so as to
1774 shorten R0 live range. It will make some codes worse but will
1775 win on average for LRA.
1776 Also when base+index addressing is used and the index term is
1777 a subreg, LRA assumes that more hard registers can be available
1778 in some situation. It isn't the case for SH in the problematic
1779 case. We can pre-allocate R0 for that index term to avoid
1780 the issue. See PR target/66591. */
1781 else if (sh_lra_p ()
1782 && TARGET_SH1 && ! TARGET_SH2A
1783 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1784 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1785 {
1786 bool load_p = REG_P (operands[0]);
1787 rtx reg = operands[load_p ? 0 : 1];
1788 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1789
1790 if ((mode == QImode || mode == HImode)
1791 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1792 && GET_CODE (adr) == PLUS
1793 && REG_P (XEXP (adr, 0))
1794 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1795 && CONST_INT_P (XEXP (adr, 1))
1796 && INTVAL (XEXP (adr, 1)) != 0
1797 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1798 {
1799 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1800 emit_move_insn (r0_rtx, operands[1]);
1801 operands[1] = r0_rtx;
1802 }
1803 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1804 && GET_CODE (adr) == PLUS
1805 && REG_P (XEXP (adr, 0))
1806 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1807 && SUBREG_P (XEXP (adr, 1))
1808 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1809 {
1810 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1811 emit_move_insn (r0_rtx, XEXP (adr, 1));
1812 XEXP (adr, 1) = r0_rtx;
1813 }
1814 }
1815 }
1816
1817 if (mode == Pmode || mode == ptr_mode)
1818 {
1819 rtx op0, op1, opc;
1820 enum tls_model tls_kind;
1821
1822 op0 = operands[0];
1823 op1 = operands[1];
1824 if (GET_CODE (op1) == CONST
1825 && GET_CODE (XEXP (op1, 0)) == PLUS
1826 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1827 != TLS_MODEL_NONE))
1828 {
1829 opc = XEXP (XEXP (op1, 0), 1);
1830 op1 = XEXP (XEXP (op1, 0), 0);
1831 }
1832 else
1833 opc = NULL_RTX;
1834
1835 if (! reload_in_progress && ! reload_completed
1836 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1837 {
1838 rtx tga_op1, tga_ret, tmp, tmp2;
1839
1840 if (! flag_pic
1841 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1842 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1843 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1844 {
1845 static int got_labelno;
1846 /* Don't schedule insns for getting GOT address when
1847 the first scheduling is enabled, to avoid spill
1848 failures for R0. */
1849 if (flag_schedule_insns)
1850 emit_insn (gen_blockage ());
1851 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1852 emit_use (gen_rtx_REG (SImode, PIC_REG));
1853 if (flag_schedule_insns)
1854 emit_insn (gen_blockage ());
1855 }
1856
1857 switch (tls_kind)
1858 {
1859 case TLS_MODEL_GLOBAL_DYNAMIC:
1860 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1861 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1862 tmp = gen_reg_rtx (Pmode);
1863 emit_move_insn (tmp, tga_ret);
1864 op1 = tmp;
1865 break;
1866
1867 case TLS_MODEL_LOCAL_DYNAMIC:
1868 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1869 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1870
1871 tmp = gen_reg_rtx (Pmode);
1872 emit_move_insn (tmp, tga_ret);
1873
1874 if (register_operand (op0, Pmode))
1875 tmp2 = op0;
1876 else
1877 tmp2 = gen_reg_rtx (Pmode);
1878
1879 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1880 op1 = tmp2;
1881 break;
1882
1883 case TLS_MODEL_INITIAL_EXEC:
1884 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1885 tmp = gen_sym2GOTTPOFF (op1);
1886 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1887 op1 = tga_op1;
1888 break;
1889
1890 case TLS_MODEL_LOCAL_EXEC:
1891 tmp2 = gen_reg_rtx (Pmode);
1892 emit_insn (gen_store_gbr (tmp2));
1893 tmp = gen_reg_rtx (Pmode);
1894 emit_insn (gen_symTPOFF2reg (tmp, op1));
1895
1896 if (register_operand (op0, Pmode))
1897 op1 = op0;
1898 else
1899 op1 = gen_reg_rtx (Pmode);
1900
1901 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1902 break;
1903
1904 default:
1905 gcc_unreachable ();
1906 }
1907 if (opc)
1908 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1909 operands[1] = op1;
1910 }
1911 }
1912 }
1913
1914 /* Implement the canonicalize_comparison target hook for the combine
1915 pass. For the target hook this function is invoked via
1916 sh_canonicalize_comparison. This function is also re-used to
1917 canonicalize comparisons in cbranch pattern expanders. */
1918 static void
1919 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1920 machine_mode mode,
1921 bool op0_preserve_value)
1922 {
1923 /* When invoked from within the combine pass the mode is not specified,
1924 so try to get it from one of the operands. */
1925 if (mode == VOIDmode)
1926 mode = GET_MODE (op0);
1927 if (mode == VOIDmode)
1928 mode = GET_MODE (op1);
1929
1930 // We need to have a mode to do something useful here.
1931 if (mode == VOIDmode)
1932 return;
1933
1934 // Currently, we don't deal with floats here.
1935 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1936 return;
1937
1938 // Make sure that the constant operand is the second operand.
1939 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1940 {
1941 if (op0_preserve_value)
1942 return;
1943
1944 std::swap (op0, op1);
1945 cmp = swap_condition (cmp);
1946 }
1947
1948 if (CONST_INT_P (op1))
1949 {
1950 /* Try to adjust the constant operand in such a way that available
1951 comparison insns can be utilized better and the constant can be
1952 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1953 constant pool. */
1954 const HOST_WIDE_INT val = INTVAL (op1);
1955
1956 /* x > -1 --> x >= 0
1957 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1958 x <= -1 --> x < 0
1959 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1960 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1961 {
1962 cmp = cmp == GT ? GE : LT;
1963 op1 = gen_int_mode (val + 1, mode);
1964 }
1965
1966 /* x >= 1 --> x > 0
1967 x >= 0x80 --> x > 0x7F
1968 x < 1 --> x <= 0
1969 x < 0x80 --> x <= 0x7F */
1970 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1971 {
1972 cmp = cmp == GE ? GT : LE;
1973 op1 = gen_int_mode (val - 1, mode);
1974 }
1975
1976 /* unsigned x >= 1 --> x != 0
1977 unsigned x < 1 --> x == 0 */
1978 else if (val == 1 && (cmp == GEU || cmp == LTU))
1979 {
1980 cmp = cmp == GEU ? NE : EQ;
1981 op1 = CONST0_RTX (mode);
1982 }
1983
1984 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1985 unsigned x < 0x80 --> unsigned x < 0x7F */
1986 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1987 {
1988 cmp = cmp == GEU ? GTU : LEU;
1989 op1 = gen_int_mode (val - 1, mode);
1990 }
1991
1992 /* unsigned x > 0 --> x != 0
1993 unsigned x <= 0 --> x == 0 */
1994 else if (val == 0 && (cmp == GTU || cmp == LEU))
1995 cmp = cmp == GTU ? NE : EQ;
1996
1997 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1998 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1999 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2000 && val == 0x7FFFFFFF)
2001 {
2002 cmp = cmp == GTU ? LT : GE;
2003 op1 = const0_rtx;
2004 }
2005
2006 /* unsigned x >= 0x80000000 --> signed x < 0
2007 unsigned x < 0x80000000 --> signed x >= 0 */
2008 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2009 && (unsigned HOST_WIDE_INT)val
2010 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2011 {
2012 cmp = cmp == GEU ? LT : GE;
2013 op1 = const0_rtx;
2014 }
2015 }
2016 }
2017
2018 /* This function implements the canonicalize_comparison target hook.
2019 This wrapper around the internally used sh_canonicalize_comparison
2020 function is needed to do the enum rtx_code <-> int conversion.
2021 Target hooks cannot use enum rtx_code in its definition. */
2022 static void
2023 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2024 bool op0_preserve_value)
2025 {
2026 enum rtx_code tmp_code = (enum rtx_code)*code;
2027 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2028 VOIDmode, op0_preserve_value);
2029 *code = (int)tmp_code;
2030 }
2031
2032 /* This function implements the legitimate_combined_insn target hook,
2033 which the combine pass uses to early reject combined insns, before
2034 it tries to recog the insn and determine its cost. */
2035 static bool
2036 sh_legitimate_combined_insn (rtx_insn* insn)
2037 {
2038 /* Reject combinations of memory loads and zero extensions, as these
2039 interfere with other combine patterns such as zero extracts and bit
2040 tests. The SH2A movu.{b|w} insns are formed later in the
2041 'sh_optimize_extu_exts' pass after combine/split1. */
2042 rtx p = PATTERN (insn);
2043 if (GET_CODE (p) == SET
2044 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2045 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2046 && MEM_P (XEXP (XEXP (p, 1), 0)))
2047 return false;
2048
2049 return true;
2050 }
2051
2052 bool
2053 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2054 {
2055 *p1 = T_REG;
2056 *p2 = INVALID_REGNUM;
2057 return true;
2058 }
2059
2060 enum rtx_code
2061 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2062 enum rtx_code comparison)
2063 {
2064 /* The scratch reg is only available when this is invoked from within
2065 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2066 rtx scratch = NULL_RTX;
2067
2068 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2069 comparison = GET_CODE (operands[0]);
2070 else
2071 scratch = operands[4];
2072
2073 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2074 mode, false);
2075
2076 /* Notice that this function is also invoked after reload by
2077 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2078 rtx op1 = operands[1];
2079
2080 if (can_create_pseudo_p ())
2081 operands[1] = force_reg (mode, op1);
2082 /* When we are handling DImode comparisons, we want to keep constants so
2083 that we can optimize the component comparisons; however, memory loads
2084 are better issued as a whole so that they can be scheduled well.
2085 SImode equality comparisons allow I08 constants, but only when they
2086 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2087 into a register, that register might as well be r0, and we allow the
2088 constant. If it is already in a register, this is likely to be
2089 allocated to a different hard register, thus we load the constant into
2090 a register unless it is zero. */
2091 if (!REG_P (operands[2])
2092 && (!CONST_INT_P (operands[2])
2093 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2094 && ((comparison != EQ && comparison != NE)
2095 || (REG_P (op1) && REGNO (op1) != R0_REG)
2096 || !satisfies_constraint_I08 (operands[2])))))
2097 {
2098 if (scratch && GET_MODE (scratch) == mode)
2099 {
2100 emit_move_insn (scratch, operands[2]);
2101 operands[2] = scratch;
2102 }
2103 else if (can_create_pseudo_p ())
2104 operands[2] = force_reg (mode, operands[2]);
2105 }
2106 return comparison;
2107 }
2108
2109 void
2110 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2111 {
2112 rtx (*branch_expander) (rtx) = gen_branch_true;
2113 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2114 switch (comparison)
2115 {
2116 case NE: case LT: case LE: case LTU: case LEU:
2117 comparison = reverse_condition (comparison);
2118 branch_expander = gen_branch_false;
2119 default: ;
2120 }
2121 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2122 gen_rtx_fmt_ee (comparison, SImode,
2123 operands[1], operands[2])));
2124 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2125 if (probability >= 0)
2126 add_int_reg_note (jump, REG_BR_PROB, probability);
2127 }
2128
2129 /* ??? How should we distribute probabilities when more than one branch
2130 is generated. So far we only have some ad-hoc observations:
2131 - If the operands are random, they are likely to differ in both parts.
2132 - If comparing items in a hash chain, the operands are random or equal;
2133 operation should be EQ or NE.
2134 - If items are searched in an ordered tree from the root, we can expect
2135 the highpart to be unequal about half of the time; operation should be
2136 an inequality comparison, operands non-constant, and overall probability
2137 about 50%. Likewise for quicksort.
2138 - Range checks will be often made against constants. Even if we assume for
2139 simplicity an even distribution of the non-constant operand over a
2140 sub-range here, the same probability could be generated with differently
2141 wide sub-ranges - as long as the ratio of the part of the subrange that
2142 is before the threshold to the part that comes after the threshold stays
2143 the same. Thus, we can't really tell anything here;
2144 assuming random distribution is at least simple.
2145 */
2146 bool
2147 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2148 {
2149 enum rtx_code msw_taken, msw_skip, lsw_taken;
2150 rtx_code_label *skip_label = NULL;
2151 rtx op1h, op1l, op2h, op2l;
2152 int num_branches;
2153 int prob, rev_prob;
2154 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2155 rtx scratch = operands[4];
2156
2157 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2158 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2159 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2160 op1l = gen_lowpart (SImode, operands[1]);
2161 op2l = gen_lowpart (SImode, operands[2]);
2162 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2163 prob = split_branch_probability;
2164 rev_prob = REG_BR_PROB_BASE - prob;
2165 switch (comparison)
2166 {
2167 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2168 That costs 1 cycle more when the first branch can be predicted taken,
2169 but saves us mispredicts because only one branch needs prediction.
2170 It also enables generating the cmpeqdi_t-1 pattern. */
2171 case EQ:
2172 if (TARGET_CMPEQDI_T)
2173 {
2174 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2175 emit_jump_insn (gen_branch_true (operands[3]));
2176 return true;
2177 }
2178 msw_skip = NE;
2179 lsw_taken = EQ;
2180 if (prob >= 0)
2181 {
2182 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2183 msw_skip_prob = rev_prob;
2184 if (REG_BR_PROB_BASE <= 65535)
2185 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2186 else
2187 {
2188 lsw_taken_prob
2189 = (prob
2190 ? (REG_BR_PROB_BASE
2191 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2192 / ((gcov_type) prob << 32)))
2193 : 0);
2194 }
2195 }
2196 break;
2197 case NE:
2198 if (TARGET_CMPEQDI_T)
2199 {
2200 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2201 emit_jump_insn (gen_branch_false (operands[3]));
2202 return true;
2203 }
2204 msw_taken = NE;
2205 msw_taken_prob = prob;
2206 lsw_taken = NE;
2207 lsw_taken_prob = 0;
2208 break;
2209 case GTU: case GT:
2210 msw_taken = comparison;
2211 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2212 break;
2213 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2214 msw_skip = swap_condition (msw_taken);
2215 lsw_taken = GTU;
2216 break;
2217 case GEU: case GE:
2218 if (op2l == CONST0_RTX (SImode))
2219 msw_taken = comparison;
2220 else
2221 {
2222 msw_taken = comparison == GE ? GT : GTU;
2223 msw_skip = swap_condition (msw_taken);
2224 lsw_taken = GEU;
2225 }
2226 break;
2227 case LTU: case LT:
2228 msw_taken = comparison;
2229 if (op2l == CONST0_RTX (SImode))
2230 break;
2231 msw_skip = swap_condition (msw_taken);
2232 lsw_taken = LTU;
2233 break;
2234 case LEU: case LE:
2235 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2236 msw_taken = comparison;
2237 else
2238 {
2239 lsw_taken = LEU;
2240 if (comparison == LE)
2241 msw_taken = LT;
2242 else if (op2h != CONST0_RTX (SImode))
2243 msw_taken = LTU;
2244 else
2245 {
2246 msw_skip = swap_condition (LTU);
2247 break;
2248 }
2249 msw_skip = swap_condition (msw_taken);
2250 }
2251 break;
2252 default: return false;
2253 }
2254 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2255 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2256 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2257 if (comparison != EQ && comparison != NE && num_branches > 1)
2258 {
2259 if (!CONSTANT_P (operands[2])
2260 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2261 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2262 {
2263 msw_taken_prob = prob / 2U;
2264 msw_skip_prob
2265 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2266 lsw_taken_prob = prob;
2267 }
2268 else
2269 {
2270 msw_taken_prob = prob;
2271 msw_skip_prob = REG_BR_PROB_BASE;
2272 /* ??? If we have a constant op2h, should we use that when
2273 calculating lsw_taken_prob? */
2274 lsw_taken_prob = prob;
2275 }
2276 }
2277 operands[1] = op1h;
2278 operands[2] = op2h;
2279 operands[4] = NULL_RTX;
2280 if (reload_completed
2281 && ! arith_reg_or_0_operand (op2h, SImode)
2282 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2283 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2284 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2285 {
2286 emit_move_insn (scratch, operands[2]);
2287 operands[2] = scratch;
2288 }
2289 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2290 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2291 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2292 {
2293 rtx taken_label = operands[3];
2294
2295 /* Operands were possibly modified, but msw_skip doesn't expect this.
2296 Always use the original ones. */
2297 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2298 {
2299 operands[1] = op1h;
2300 operands[2] = op2h;
2301 if (reload_completed
2302 && ! arith_reg_or_0_operand (op2h, SImode)
2303 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2304 {
2305 emit_move_insn (scratch, operands[2]);
2306 operands[2] = scratch;
2307 }
2308 }
2309
2310 operands[3] = skip_label = gen_label_rtx ();
2311 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2312 operands[3] = taken_label;
2313 }
2314 operands[1] = op1l;
2315 operands[2] = op2l;
2316 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2317 {
2318 if (reload_completed
2319 && ! arith_reg_or_0_operand (op2l, SImode)
2320 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2321 {
2322 emit_move_insn (scratch, operands[2]);
2323 operands[2] = scratch;
2324 }
2325 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2326 }
2327 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2328 emit_label (skip_label);
2329 return true;
2330 }
2331
2332 /* Given an operand, return 1 if the evaluated operand plugged into an
2333 if_then_else will result in a branch_true, 0 if branch_false, or
2334 -1 if neither nor applies. The truth table goes like this:
2335
2336 op | cmpval | code | result
2337 ---------+--------+---------+--------------------
2338 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2339 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2340 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2341 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2342 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2343 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2344 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2345 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2346 int
2347 sh_eval_treg_value (rtx op)
2348 {
2349 if (t_reg_operand (op, GET_MODE (op)))
2350 return 1;
2351 if (negt_reg_operand (op, GET_MODE (op)))
2352 return 0;
2353
2354 rtx_code code = GET_CODE (op);
2355 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2356 return -1;
2357
2358 int cmpop = code == EQ ? 1 : 0;
2359 int cmpval = INTVAL (XEXP (op, 1));
2360 if (cmpval != 0 && cmpval != 1)
2361 return -1;
2362
2363 int t;
2364 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2365 t = 0;
2366 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2367 t = 1;
2368 else
2369 return -1;
2370
2371 return t ^ (cmpval == cmpop);
2372 }
2373
2374 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2375 of floating-point comparisons. */
2376 static void
2377 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2378 {
2379 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2380 && GET_CODE (insn) != PARALLEL)
2381 {
2382 insn = gen_rtx_PARALLEL (VOIDmode,
2383 gen_rtvec (3, insn,
2384 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2385 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2386 }
2387 emit_insn (insn);
2388 }
2389
2390 /* Prepare the operands for an scc instruction; make sure that the
2391 compare has been done and the result is in T_REG. */
2392 void
2393 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2394 {
2395 rtx t_reg = get_t_reg_rtx ();
2396 enum rtx_code oldcode = code;
2397 machine_mode mode;
2398
2399 /* First need a compare insn. */
2400 switch (code)
2401 {
2402 case NE:
2403 /* It isn't possible to handle this case. */
2404 gcc_unreachable ();
2405 case LT:
2406 code = GT;
2407 break;
2408 case LE:
2409 code = GE;
2410 break;
2411 case LTU:
2412 code = GTU;
2413 break;
2414 case LEU:
2415 code = GEU;
2416 break;
2417 default:
2418 break;
2419 }
2420 if (code != oldcode)
2421 std::swap (op0, op1);
2422
2423 mode = GET_MODE (op0);
2424 if (mode == VOIDmode)
2425 mode = GET_MODE (op1);
2426
2427 op0 = force_reg (mode, op0);
2428 if ((code != EQ && code != NE
2429 && (op1 != const0_rtx
2430 || code == GTU || code == GEU || code == LTU || code == LEU))
2431 || (mode == DImode && op1 != const0_rtx)
2432 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2433 op1 = force_reg (mode, op1);
2434
2435 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2436 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2437 mode);
2438 }
2439
2440 rtx
2441 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2442 rtx op0, rtx op1)
2443 {
2444 rtx target = gen_reg_rtx (SImode);
2445 rtx tmp;
2446
2447 gcc_assert (TARGET_SHMEDIA);
2448 switch (code)
2449 {
2450 case EQ:
2451 case GT:
2452 case LT:
2453 case UNORDERED:
2454 case GTU:
2455 case LTU:
2456 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2457 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2458 code = NE;
2459 break;
2460
2461 case NE:
2462 case GE:
2463 case LE:
2464 case ORDERED:
2465 case GEU:
2466 case LEU:
2467 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2468 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2469 code = EQ;
2470 break;
2471
2472 case UNEQ:
2473 case UNGE:
2474 case UNGT:
2475 case UNLE:
2476 case UNLT:
2477 case LTGT:
2478 return NULL_RTX;
2479
2480 default:
2481 gcc_unreachable ();
2482 }
2483
2484 if (mode == DImode)
2485 {
2486 rtx t2 = gen_reg_rtx (DImode);
2487 emit_insn (gen_extendsidi2 (t2, target));
2488 target = t2;
2489 }
2490
2491 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2492 }
2493
2494 /* Called from the md file, set up the operands of a compare instruction. */
2495 void
2496 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2497 {
2498 enum rtx_code code = GET_CODE (operands[0]);
2499 enum rtx_code branch_code;
2500 rtx op0 = operands[1];
2501 rtx op1 = operands[2];
2502 rtx insn;
2503 bool need_ccmpeq = false;
2504
2505 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2506 {
2507 op0 = force_reg (mode, op0);
2508 op1 = force_reg (mode, op1);
2509 }
2510 else
2511 {
2512 if (code != EQ || mode == DImode)
2513 {
2514 /* Force args into regs, since we can't use constants here. */
2515 op0 = force_reg (mode, op0);
2516 if (op1 != const0_rtx || code == GTU || code == GEU)
2517 op1 = force_reg (mode, op1);
2518 }
2519 }
2520
2521 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2522 {
2523 if (code == LT
2524 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2525 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2526 {
2527 std::swap (op0, op1);
2528 code = swap_condition (code);
2529 }
2530
2531 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2532 if (code == GE)
2533 {
2534 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2535 need_ccmpeq = true;
2536 code = GT;
2537 }
2538
2539 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2540 to EQ/GT respectively. */
2541 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2542 }
2543
2544 switch (code)
2545 {
2546 case EQ:
2547 case GT:
2548 case GE:
2549 case GTU:
2550 case GEU:
2551 branch_code = code;
2552 break;
2553 case NE:
2554 case LT:
2555 case LE:
2556 case LTU:
2557 case LEU:
2558 branch_code = reverse_condition (code);
2559 break;
2560 default:
2561 gcc_unreachable ();
2562 }
2563
2564 insn = gen_rtx_SET (get_t_reg_rtx (),
2565 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2566
2567 sh_emit_set_t_insn (insn, mode);
2568 if (need_ccmpeq)
2569 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2570
2571 if (branch_code == code)
2572 emit_jump_insn (gen_branch_true (operands[3]));
2573 else
2574 emit_jump_insn (gen_branch_false (operands[3]));
2575 }
2576
2577 void
2578 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2579 {
2580 enum rtx_code code = GET_CODE (operands[1]);
2581 rtx op0 = operands[2];
2582 rtx op1 = operands[3];
2583 rtx_code_label *lab = NULL;
2584 bool invert = false;
2585
2586 op0 = force_reg (mode, op0);
2587 if ((code != EQ && code != NE
2588 && (op1 != const0_rtx
2589 || code == GTU || code == GEU || code == LTU || code == LEU))
2590 || (mode == DImode && op1 != const0_rtx)
2591 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2592 op1 = force_reg (mode, op1);
2593
2594 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2595 {
2596 if (code == LT || code == LE)
2597 {
2598 std::swap (op0, op1);
2599 code = swap_condition (code);
2600 }
2601 if (code == GE)
2602 {
2603 if (TARGET_IEEE)
2604 {
2605 lab = gen_label_rtx ();
2606 sh_emit_scc_to_t (EQ, op0, op1);
2607 emit_jump_insn (gen_branch_true (lab));
2608 code = GT;
2609 }
2610 else
2611 {
2612 code = LT;
2613 invert = true;
2614 }
2615 }
2616 }
2617
2618 if (code == NE)
2619 {
2620 code = EQ;
2621 invert = true;
2622 }
2623
2624 sh_emit_scc_to_t (code, op0, op1);
2625 if (lab)
2626 emit_label (lab);
2627 if (invert)
2628 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2629 else
2630 emit_move_insn (operands[0], get_t_reg_rtx ());
2631 }
2632 \f
2633 /* Functions to output assembly code. */
2634
2635 /* Return a sequence of instructions to perform DI or DF move.
2636
2637 Since the SH cannot move a DI or DF in one instruction, we have
2638 to take care when we see overlapping source and dest registers. */
2639 const char *
2640 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2641 machine_mode mode)
2642 {
2643 rtx dst = operands[0];
2644 rtx src = operands[1];
2645
2646 if (MEM_P (dst)
2647 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2648 return "mov.l %T1,%0" "\n"
2649 " mov.l %1,%0";
2650
2651 if (register_operand (dst, mode)
2652 && register_operand (src, mode))
2653 {
2654 if (REGNO (src) == MACH_REG)
2655 return "sts mach,%S0" "\n"
2656 " sts macl,%R0";
2657
2658 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2659 when mov.d r1,r0 do r1->r0 then r2->r1. */
2660 if (REGNO (src) + 1 == REGNO (dst))
2661 return "mov %T1,%T0" "\n"
2662 " mov %1,%0";
2663 else
2664 return "mov %1,%0" "\n"
2665 " mov %T1,%T0";
2666 }
2667 else if (CONST_INT_P (src))
2668 {
2669 if (INTVAL (src) < 0)
2670 output_asm_insn ("mov #-1,%S0", operands);
2671 else
2672 output_asm_insn ("mov #0,%S0", operands);
2673
2674 return "mov %1,%R0";
2675 }
2676 else if (MEM_P (src))
2677 {
2678 int ptrreg = -1;
2679 int dreg = REGNO (dst);
2680 rtx inside = XEXP (src, 0);
2681
2682 switch (GET_CODE (inside))
2683 {
2684 case REG:
2685 ptrreg = REGNO (inside);
2686 break;
2687
2688 case SUBREG:
2689 ptrreg = subreg_regno (inside);
2690 break;
2691
2692 case PLUS:
2693 ptrreg = REGNO (XEXP (inside, 0));
2694 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2695 an offsettable address. Unfortunately, offsettable addresses use
2696 QImode to check the offset, and a QImode offsettable address
2697 requires r0 for the other operand, which is not currently
2698 supported, so we can't use the 'o' constraint.
2699 Thus we must check for and handle r0+REG addresses here.
2700 We punt for now, since this is likely very rare. */
2701 gcc_assert (!REG_P (XEXP (inside, 1)));
2702 break;
2703
2704 case LABEL_REF:
2705 return "mov.l %1,%0" "\n"
2706 " mov.l %1+4,%T0";
2707 case POST_INC:
2708 return "mov.l %1,%0" "\n"
2709 " mov.l %1,%T0";
2710 default:
2711 gcc_unreachable ();
2712 }
2713
2714 /* Work out the safe way to copy. Copy into the second half first. */
2715 if (dreg == ptrreg)
2716 return "mov.l %T1,%T0" "\n"
2717 " mov.l %1,%0";
2718 }
2719
2720 return "mov.l %1,%0" "\n"
2721 " mov.l %T1,%T0";
2722 }
2723
2724 /* Print an instruction which would have gone into a delay slot after
2725 another instruction, but couldn't because the other instruction expanded
2726 into a sequence where putting the slot insn at the end wouldn't work. */
2727 static void
2728 print_slot (rtx_sequence *seq)
2729 {
2730 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2731
2732 seq->insn (1)->set_deleted ();
2733 }
2734
2735 const char *
2736 output_far_jump (rtx_insn *insn, rtx op)
2737 {
2738 struct { rtx lab, reg, op; } this_jmp;
2739 rtx_code_label *braf_base_lab = NULL;
2740 const char *jump;
2741 int far;
2742 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2743 rtx_insn *prev;
2744
2745 this_jmp.lab = gen_label_rtx ();
2746
2747 if (TARGET_SH2
2748 && offset >= -32764
2749 && offset - get_attr_length (insn) <= 32766
2750 && ! CROSSING_JUMP_P (insn))
2751 {
2752 far = 0;
2753 jump = "mov.w %O0,%1" "\n"
2754 " braf %1";
2755 }
2756 else
2757 {
2758 far = 1;
2759 if (flag_pic)
2760 {
2761 if (TARGET_SH2)
2762 jump = "mov.l %O0,%1" "\n"
2763 " braf %1";
2764 else
2765 jump = "mov.l r0,@-r15" "\n"
2766 " mova %O0,r0" "\n"
2767 " mov.l @r0,%1" "\n"
2768 " add r0,%1" "\n"
2769 " mov.l @r15+,r0" "\n"
2770 " jmp @%1";
2771 }
2772 else
2773 jump = "mov.l %O0,%1" "\n"
2774 " jmp @%1";
2775 }
2776 /* If we have a scratch register available, use it. */
2777 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2778 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2779 {
2780 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2781 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2782 jump = "mov.l r1,@-r15" "\n"
2783 " mova %O0,r0" "\n"
2784 " mov.l @r0,r1" "\n"
2785 " add r1,r0" "\n"
2786 " mov.l @r15+,r1" "\n"
2787 " jmp @%1";
2788 output_asm_insn (jump, &this_jmp.lab);
2789 if (dbr_sequence_length ())
2790 print_slot (final_sequence);
2791 else
2792 output_asm_insn ("nop", 0);
2793 }
2794 else
2795 {
2796 /* Output the delay slot insn first if any. */
2797 if (dbr_sequence_length ())
2798 print_slot (final_sequence);
2799
2800 this_jmp.reg = gen_rtx_REG (SImode, 13);
2801 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2802 Fortunately, MACL is fixed and call-clobbered, and we never
2803 need its value across jumps, so save r13 in it instead of in
2804 the stack. */
2805 if (TARGET_SH5)
2806 output_asm_insn ("lds r13,macl", 0);
2807 else
2808 output_asm_insn ("mov.l r13,@-r15", 0);
2809 output_asm_insn (jump, &this_jmp.lab);
2810 if (TARGET_SH5)
2811 output_asm_insn ("sts macl,r13", 0);
2812 else
2813 output_asm_insn ("mov.l @r15+,r13", 0);
2814 }
2815 if (far && flag_pic && TARGET_SH2)
2816 {
2817 braf_base_lab = gen_label_rtx ();
2818 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2819 CODE_LABEL_NUMBER (braf_base_lab));
2820 }
2821 if (far)
2822 output_asm_insn (".align 2", 0);
2823 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2824 this_jmp.op = op;
2825 if (far && flag_pic)
2826 {
2827 if (TARGET_SH2)
2828 this_jmp.lab = braf_base_lab;
2829 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2830 }
2831 else
2832 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2833 return "";
2834 }
2835
2836 /* Local label counter, used for constants in the pool and inside
2837 pattern branches. */
2838 static int lf = 100;
2839
2840 /* Output code for ordinary branches. */
2841 const char *
2842 output_branch (int logic, rtx_insn *insn, rtx *operands)
2843 {
2844 switch (get_attr_length (insn))
2845 {
2846 case 6:
2847 /* This can happen if filling the delay slot has caused a forward
2848 branch to exceed its range (we could reverse it, but only
2849 when we know we won't overextend other branches; this should
2850 best be handled by relaxation).
2851 It can also happen when other condbranches hoist delay slot insn
2852 from their destination, thus leading to code size increase.
2853 But the branch will still be in the range -4092..+4098 bytes. */
2854 if (! TARGET_RELAX)
2855 {
2856 int label = lf++;
2857 /* The call to print_slot will clobber the operands. */
2858 rtx op0 = operands[0];
2859
2860 /* If the instruction in the delay slot is annulled (true), then
2861 there is no delay slot where we can put it now. The only safe
2862 place for it is after the label. final will do that by default. */
2863
2864 if (final_sequence
2865 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2866 && get_attr_length (final_sequence->insn (1)))
2867 {
2868 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2869 ASSEMBLER_DIALECT ? "/" : ".", label);
2870 print_slot (final_sequence);
2871 }
2872 else
2873 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2874
2875 output_asm_insn ("bra\t%l0", &op0);
2876 fprintf (asm_out_file, "\tnop\n");
2877 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2878
2879 return "";
2880 }
2881 /* When relaxing, handle this like a short branch. The linker
2882 will fix it up if it still doesn't fit after relaxation. */
2883 case 2:
2884 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2885
2886 /* These are for SH2e, in which we have to account for the
2887 extra nop because of the hardware bug in annulled branches. */
2888 case 8:
2889 if (! TARGET_RELAX)
2890 {
2891 int label = lf++;
2892
2893 gcc_assert (!final_sequence
2894 || !(INSN_ANNULLED_BRANCH_P
2895 (XVECEXP (final_sequence, 0, 0))));
2896 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2897 logic ? "f" : "t",
2898 ASSEMBLER_DIALECT ? "/" : ".", label);
2899 fprintf (asm_out_file, "\tnop\n");
2900 output_asm_insn ("bra\t%l0", operands);
2901 fprintf (asm_out_file, "\tnop\n");
2902 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2903
2904 return "";
2905 }
2906 /* When relaxing, fall through. */
2907 case 4:
2908 {
2909 char buffer[10];
2910
2911 sprintf (buffer, "b%s%ss\t%%l0",
2912 logic ? "t" : "f",
2913 ASSEMBLER_DIALECT ? "/" : ".");
2914 output_asm_insn (buffer, &operands[0]);
2915 return "nop";
2916 }
2917
2918 default:
2919 /* There should be no longer branches now - that would
2920 indicate that something has destroyed the branches set
2921 up in machine_dependent_reorg. */
2922 gcc_unreachable ();
2923 }
2924 }
2925
2926 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2927 fill in operands 9 as a label to the successor insn.
2928 We try to use jump threading where possible.
2929 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2930 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2931 follow jmp and bt, if the address is in range. */
2932 const char *
2933 output_branchy_insn (enum rtx_code code, const char *templ,
2934 rtx_insn *insn, rtx *operands)
2935 {
2936 rtx_insn *next_insn = NEXT_INSN (insn);
2937
2938 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2939 {
2940 rtx src = SET_SRC (PATTERN (next_insn));
2941 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2942 {
2943 /* Following branch not taken */
2944 rtx_code_label *lab = gen_label_rtx ();
2945 emit_label_after (lab, next_insn);
2946 INSN_ADDRESSES_NEW (lab,
2947 INSN_ADDRESSES (INSN_UID (next_insn))
2948 + get_attr_length (next_insn));
2949 operands[9] = lab;
2950 return templ;
2951 }
2952 else
2953 {
2954 int offset = (branch_dest (next_insn)
2955 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2956 if (offset >= -252 && offset <= 258)
2957 {
2958 if (GET_CODE (src) == IF_THEN_ELSE)
2959 /* branch_true */
2960 src = XEXP (src, 1);
2961 operands[9] = src;
2962 return templ;
2963 }
2964 }
2965 }
2966 rtx_code_label *lab = gen_label_rtx ();
2967 emit_label_after (lab, insn);
2968 INSN_ADDRESSES_NEW (lab,
2969 INSN_ADDRESSES (INSN_UID (insn))
2970 + get_attr_length (insn));
2971 operands[9] = lab;
2972 return templ;
2973 }
2974
2975 const char *
2976 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2977 {
2978 return output_branchy_insn (NE, "bt %l9" "\n"
2979 " fcmp/eq %1,%0",
2980 insn, operands);
2981 }
2982 \f
2983 /* Output the start of the assembler file. */
2984 static void
2985 sh_file_start (void)
2986 {
2987 default_file_start ();
2988
2989 if (TARGET_ELF)
2990 /* We need to show the text section with the proper
2991 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2992 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2993 will complain. We can teach GAS specifically about the
2994 default attributes for our choice of text section, but
2995 then we would have to change GAS again if/when we change
2996 the text section name. */
2997 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2998 else
2999 /* Switch to the data section so that the coffsem symbol
3000 isn't in the text section. */
3001 switch_to_section (data_section);
3002
3003 if (TARGET_LITTLE_ENDIAN)
3004 fputs ("\t.little\n", asm_out_file);
3005
3006 if (!TARGET_ELF)
3007 {
3008 if (TARGET_SHCOMPACT)
3009 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3010 else if (TARGET_SHMEDIA)
3011 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3012 TARGET_SHMEDIA64 ? 64 : 32);
3013 }
3014 }
3015 \f
3016 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3017 static bool
3018 unspec_caller_rtx_p (rtx pat)
3019 {
3020 rtx base, offset;
3021 int i;
3022
3023 split_const (pat, &base, &offset);
3024 if (GET_CODE (base) == UNSPEC)
3025 {
3026 if (XINT (base, 1) == UNSPEC_CALLER)
3027 return true;
3028 for (i = 0; i < XVECLEN (base, 0); i++)
3029 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3030 return true;
3031 }
3032 return false;
3033 }
3034
3035 /* Indicate that INSN cannot be duplicated. This is true for insn
3036 that generates a unique label. */
3037 static bool
3038 sh_cannot_copy_insn_p (rtx_insn *insn)
3039 {
3040 rtx pat;
3041
3042 if (!reload_completed || !flag_pic)
3043 return false;
3044
3045 if (!NONJUMP_INSN_P (insn))
3046 return false;
3047 if (asm_noperands (insn) >= 0)
3048 return false;
3049
3050 pat = PATTERN (insn);
3051 if (GET_CODE (pat) != SET)
3052 return false;
3053 pat = SET_SRC (pat);
3054
3055 if (unspec_caller_rtx_p (pat))
3056 return true;
3057
3058 return false;
3059 }
3060 \f
3061 /* Number of instructions used to make an arithmetic right shift by N. */
3062 static const char ashiftrt_insns[] =
3063 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3064
3065 /* Description of a logical left or right shift, when expanded to a sequence
3066 of 1/2/8/16 shifts.
3067 Notice that one bit right shifts clobber the T bit. One bit left shifts
3068 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3069 enum
3070 {
3071 ASHL_CLOBBERS_T = 1 << 0,
3072 LSHR_CLOBBERS_T = 1 << 1
3073 };
3074
3075 struct ashl_lshr_sequence
3076 {
3077 char insn_count;
3078 signed char amount[6];
3079 char clobbers_t;
3080 };
3081
3082 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3083 {
3084 { 0, { 0 }, 0 }, // 0
3085 { 1, { 1 }, LSHR_CLOBBERS_T },
3086 { 1, { 2 }, 0 },
3087 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3088 { 2, { 2, 2 }, 0 }, // 4
3089 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3090 { 3, { 2, 2, 2 }, 0 },
3091 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3092 { 1, { 8 }, 0 }, // 8
3093 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3094 { 2, { 8, 2 }, 0 },
3095 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3096 { 3, { 8, 2, 2 }, 0 }, // 12
3097 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3098 { 3, { 8, -2, 8 }, 0 },
3099 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3100 { 1, { 16 }, 0 }, // 16
3101 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3102 { 2, { 16, 2 }, 0 },
3103 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3104 { 3, { 16, 2, 2 }, 0 }, // 20
3105 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3106 { 3, { 16, -2, 8 }, 0 },
3107 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3108 { 2, { 16, 8 }, 0 }, // 24
3109 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3110 { 3, { 16, 8, 2 }, 0 },
3111 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3112 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3113 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3114 { 3, { 16, -2, 16 }, 0 },
3115
3116 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3117 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3118 However, the shift-and combiner code needs this entry here to be in
3119 terms of real shift insns. */
3120 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3121 };
3122
3123 /* Individual shift amounts for shift amounts < 16, up to three highmost
3124 bits might be clobbered. This is typically used when combined with some
3125 kind of sign or zero extension. */
3126 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3127 {
3128 { 0, { 0 }, 0 }, // 0
3129 { 1, { 1 }, LSHR_CLOBBERS_T },
3130 { 1, { 2 }, 0 },
3131 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3132 { 2, { 2, 2 }, 0 }, // 4
3133 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3134 { 2, { 8, -2 }, 0 },
3135 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3136 { 1, { 8 }, 0 }, // 8
3137 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3138 { 2, { 8, 2 }, 0 },
3139 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3140 { 3, { 8, 2, 2 }, 0 }, // 12
3141 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3142 { 2, { 16, -2 }, 0 },
3143 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3144 { 1, { 16 }, 0 }, // 16
3145 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3146 { 2, { 16, 2 }, 0 },
3147 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3148 { 3, { 16, 2, 2 }, 0 }, // 20
3149 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3150 { 3, { 16, -2, 8 }, 0 },
3151 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3152 { 2, { 16, 8 }, 0 }, // 24
3153 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3154 { 3, { 16, 8, 2 }, 0 },
3155 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3156 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3157 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3158 { 3, { 16, -2, 16 }, 0 },
3159 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3160 };
3161
3162 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3163 will clobber the T bit. */
3164 bool
3165 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3166 {
3167 gcc_assert (CONST_INT_P (shift_amount));
3168
3169 const int shift_amount_i = INTVAL (shift_amount) & 31;
3170
3171 /* Special case for shift count of 31: use and-rotl sequence. */
3172 if (shift_amount_i == 31)
3173 return true;
3174
3175 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3176 & ASHL_CLOBBERS_T) != 0;
3177 }
3178
3179 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3180 instructions will clobber the T bit. */
3181 bool
3182 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3183 {
3184 gcc_assert (CONST_INT_P (shift_amount));
3185
3186 const int shift_amount_i = INTVAL (shift_amount) & 31;
3187
3188 /* Special case for shift count of 31: use shll-movt sequence. */
3189 if (shift_amount_i == 31)
3190 return true;
3191
3192 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3193 & LSHR_CLOBBERS_T) != 0;
3194 }
3195
3196 /* Return true if it is potentially beneficial to use a dynamic shift
3197 instruction (shad / shar) instead of a combination of 1/2/8/16
3198 shift instructions for the specified shift count.
3199 If dynamic shifts are not available, always return false. */
3200 bool
3201 sh_dynamicalize_shift_p (rtx count)
3202 {
3203 gcc_assert (CONST_INT_P (count));
3204
3205 const int shift_amount_i = INTVAL (count) & 31;
3206 int insn_count;
3207
3208 /* For left and right shifts, there are shorter 2 insn sequences for
3209 shift amounts of 31. */
3210 if (shift_amount_i == 31)
3211 insn_count = 2;
3212 else
3213 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3214
3215 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3216 }
3217
3218 /* Assuming we have a value that has been sign-extended by at least one bit,
3219 can we use the ext_shift_amounts with the last shift turned to an
3220 arithmetic shift to shift it by N without data loss, and quicker than by
3221 other means? */
3222 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3223
3224 /* Return the cost of a shift. */
3225 static inline int
3226 shiftcosts (rtx x)
3227 {
3228 int value;
3229
3230 if (TARGET_SHMEDIA)
3231 return 1;
3232
3233 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3234 {
3235 if (GET_MODE (x) == DImode
3236 && CONST_INT_P (XEXP (x, 1))
3237 && INTVAL (XEXP (x, 1)) == 1)
3238 return 2;
3239
3240 /* Everything else is invalid, because there is no pattern for it. */
3241 return -1;
3242 }
3243 /* If shift by a non constant, then this will be expensive. */
3244 if (!CONST_INT_P (XEXP (x, 1)))
3245 return SH_DYNAMIC_SHIFT_COST;
3246
3247 /* Otherwise, return the true cost in instructions. Cope with out of range
3248 shift counts more or less arbitrarily. */
3249 value = INTVAL (XEXP (x, 1)) & 31;
3250
3251 if (GET_CODE (x) == ASHIFTRT)
3252 {
3253 int cost = ashiftrt_insns[value];
3254 /* If dynamic shifts are available and profitable in this case, then we
3255 put the constant in a reg and use shad. */
3256 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3257 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3258 return cost;
3259 }
3260 else
3261 return ashl_lshr_seq[value].insn_count;
3262 }
3263
3264 /* Return the cost of an AND/XOR/IOR operation. */
3265 static inline int
3266 and_xor_ior_costs (rtx x, int code)
3267 {
3268 /* On SH1-4 we have only max. SImode operations.
3269 Double the cost for modes > SImode. */
3270 const int cost_scale = !TARGET_SHMEDIA
3271 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3272 ? 2 : 1;
3273
3274 /* A logical operation with two registers is a single cycle
3275 instruction. */
3276 if (!CONST_INT_P (XEXP (x, 1)))
3277 return 1 * cost_scale;
3278
3279 int i = INTVAL (XEXP (x, 1));
3280
3281 if (TARGET_SHMEDIA)
3282 {
3283 if (satisfies_constraint_I10 (XEXP (x, 1))
3284 || satisfies_constraint_J16 (XEXP (x, 1)))
3285 return 1;
3286 else
3287 return 1 + rtx_cost (XEXP (x, 1), GET_MODE (x), AND, 1, !optimize_size);
3288 }
3289
3290 /* These constants are single cycle extu.[bw] instructions. */
3291 if ((i == 0xff || i == 0xffff) && code == AND)
3292 return 1 * cost_scale;
3293 /* Constants that can be used in an instruction as an immediate are
3294 a single cycle, but this requires r0, so make it a little more
3295 expensive. */
3296 if (CONST_OK_FOR_K08 (i))
3297 return 2 * cost_scale;
3298 /* Constants that can be loaded with a mov immediate need one more cycle.
3299 This case is probably unnecessary. */
3300 if (CONST_OK_FOR_I08 (i))
3301 return 2 * cost_scale;
3302 /* Any other constant requires an additional 2 cycle pc-relative load.
3303 This case is probably unnecessary. */
3304 return 3 * cost_scale;
3305 }
3306
3307 /* Return the cost of an addition or a subtraction. */
3308 static inline int
3309 addsubcosts (rtx x)
3310 {
3311 if (GET_MODE (x) == SImode)
3312 {
3313 /* The addc or subc patterns will eventually become one or two
3314 instructions. Below are some costs for some of the patterns
3315 which combine would reject because the costs of the individual
3316 insns in the patterns are lower.
3317
3318 FIXME: It would be much easier if we had something like insn cost
3319 attributes and the cost calculation machinery used those attributes
3320 in the first place. This would eliminate redundant recog-like C
3321 code to calculate costs of complex patterns. */
3322 rtx op0 = XEXP (x, 0);
3323 rtx op1 = XEXP (x, 1);
3324
3325 if (GET_CODE (x) == PLUS)
3326 {
3327 if (GET_CODE (op0) == AND
3328 && XEXP (op0, 1) == const1_rtx
3329 && (GET_CODE (op1) == PLUS
3330 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3331 return 1;
3332
3333 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3334 && GET_CODE (op1) == LSHIFTRT
3335 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3336 return 1;
3337 }
3338 /* Let's assume that adding the result of an insns that stores into
3339 the T bit is cheap. */
3340 if (treg_set_expr (op1, SImode))
3341 return 1;
3342 if (treg_set_expr (op0, SImode))
3343 return 1;
3344 }
3345
3346 /* On SH1-4 we have only max. SImode operations.
3347 Double the cost for modes > SImode. */
3348 const int cost_scale = !TARGET_SHMEDIA
3349 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3350 ? 2 : 1;
3351
3352 /* Adding a register is a single cycle insn. */
3353 if (REG_P (XEXP (x, 1))
3354 || GET_CODE (XEXP (x, 1)) == SUBREG)
3355 return 1 * cost_scale;
3356
3357 /* Likewise for small constants. */
3358 if (CONST_INT_P (XEXP (x, 1))
3359 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3360 return 1 * cost_scale;
3361
3362 if (TARGET_SHMEDIA)
3363 switch (GET_CODE (XEXP (x, 1)))
3364 {
3365 case CONST:
3366 case LABEL_REF:
3367 case SYMBOL_REF:
3368 return TARGET_SHMEDIA64 ? 5 : 3;
3369
3370 case CONST_INT:
3371 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3372 return 2;
3373 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3374 return 3;
3375 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3376 return 4;
3377
3378 /* Fall through. */
3379 default:
3380 return 5;
3381 }
3382
3383 /* Any other constant requires a 2 cycle pc-relative load plus an
3384 addition. */
3385 return 3 * cost_scale;
3386 }
3387
3388 /* Return the cost of a multiply. */
3389 static inline int
3390 multcosts (rtx x ATTRIBUTE_UNUSED)
3391 {
3392 if (sh_multcost >= 0)
3393 return sh_multcost;
3394 if (TARGET_SHMEDIA)
3395 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3396 accept constants. Ideally, we would use a cost of one or two and
3397 add the cost of the operand, but disregard the latter when inside loops
3398 and loop invariant code motion is still to follow.
3399 Using a multiply first and splitting it later if it's a loss
3400 doesn't work because of different sign / zero extension semantics
3401 of multiplies vs. shifts. */
3402 return optimize_size ? 2 : 3;
3403
3404 if (TARGET_SH2)
3405 {
3406 /* We have a mul insn, so we can never take more than the mul and the
3407 read of the mac reg, but count more because of the latency and extra
3408 reg usage. */
3409 if (optimize_size)
3410 return 2;
3411 return 3;
3412 }
3413
3414 /* If we're aiming at small code, then just count the number of
3415 insns in a multiply call sequence. */
3416 if (optimize_size)
3417 return 5;
3418
3419 /* Otherwise count all the insns in the routine we'd be calling too. */
3420 return 20;
3421 }
3422
3423 /* Compute a (partial) cost for rtx X. Return true if the complete
3424 cost has been computed, and false if subexpressions should be
3425 scanned. In either case, *TOTAL contains the cost result. */
3426 static bool
3427 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3428 int opno ATTRIBUTE_UNUSED,
3429 int *total, bool speed ATTRIBUTE_UNUSED)
3430 {
3431 int code = GET_CODE (x);
3432
3433 switch (code)
3434 {
3435 /* The lower-subreg pass decides whether to split multi-word regs
3436 into individual regs by looking at the cost for a SET of certain
3437 modes with the following patterns:
3438 (set (reg) (reg))
3439 (set (reg) (const_int 0))
3440 On machines that support vector-move operations a multi-word move
3441 is the same cost as individual reg move. On SH there is no
3442 vector-move, so we have to provide the correct cost in the number
3443 of move insns to load/store the reg of the mode in question. */
3444 case SET:
3445 if (register_operand (SET_DEST (x), VOIDmode)
3446 && (register_operand (SET_SRC (x), VOIDmode)
3447 || satisfies_constraint_Z (SET_SRC (x))))
3448 {
3449 const machine_mode mode = GET_MODE (SET_DEST (x));
3450 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3451 / mov_insn_size (mode, TARGET_SH2A));
3452 return true;
3453 }
3454 return false;
3455
3456 /* The cost of a mem access is mainly the cost of the address mode. */
3457 case MEM:
3458 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3459 true);
3460 return true;
3461
3462 case IF_THEN_ELSE:
3463 /* This case is required for the if_then_else negc pattern. */
3464 if (treg_set_expr (XEXP (x, 0), SImode))
3465 {
3466 *total = COSTS_N_INSNS (1);
3467 return true;
3468 }
3469 else
3470 return false;
3471
3472 /* Zero extracts of single bits are usually combine patterns for the
3473 tst insns. */
3474 case ZERO_EXTRACT:
3475 if (GET_CODE (XEXP (x, 0)) == XOR
3476 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3477 && XEXP (x, 1) == const1_rtx
3478 && CONST_INT_P (XEXP (x, 2))
3479 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3480 /* Check that the xor constaint overlaps with the extracted bit. */
3481 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3482 {
3483 *total = 1; //COSTS_N_INSNS (1);
3484 return true;
3485 }
3486 return false;
3487
3488 /* The cost of a sign or zero extend depends on whether the source is a
3489 reg or a mem. In case of a mem take the address into acount. */
3490 case SIGN_EXTEND:
3491 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3492 {
3493 *total = COSTS_N_INSNS (1);
3494 return true;
3495 }
3496 if (MEM_P (XEXP (x, 0)))
3497 {
3498 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3499 GET_MODE (XEXP (x, 0)),
3500 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3501 return true;
3502 }
3503 return false;
3504
3505 case ZERO_EXTEND:
3506 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3507 {
3508 *total = COSTS_N_INSNS (1);
3509 return true;
3510 }
3511 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3512 && (GET_MODE (XEXP (x, 0)) == QImode
3513 || GET_MODE (XEXP (x, 0)) == HImode))
3514 {
3515 /* Handle SH2A's movu.b and movu.w insn. */
3516 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3517 GET_MODE (XEXP (x, 0)),
3518 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3519 return true;
3520 }
3521 return false;
3522
3523 /* mems for SFmode and DFmode can be inside a parallel due to
3524 the way the fpscr is handled. */
3525 case PARALLEL:
3526 for (int i = 0; i < XVECLEN (x, 0); i++)
3527 {
3528 rtx xx = XVECEXP (x, 0, i);
3529 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3530 {
3531 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3532 GET_MODE (XEXP (xx, 0)),
3533 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3534 return true;
3535 }
3536 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3537 {
3538 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3539 GET_MODE (XEXP (xx, 1)),
3540 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3541 return true;
3542 }
3543 }
3544
3545 if (sh_1el_vec (x, VOIDmode))
3546 *total = outer_code != SET;
3547 else if (sh_rep_vec (x, VOIDmode))
3548 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3549 + (outer_code != SET));
3550 else
3551 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3552 return true;
3553
3554 case CONST_INT:
3555 if (TARGET_SHMEDIA)
3556 {
3557 if (INTVAL (x) == 0)
3558 *total = 0;
3559 else if (outer_code == AND && and_operand ((x), DImode))
3560 *total = 0;
3561 else if ((outer_code == IOR || outer_code == XOR
3562 || outer_code == PLUS)
3563 && CONST_OK_FOR_I10 (INTVAL (x)))
3564 *total = 0;
3565 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3566 *total = COSTS_N_INSNS (outer_code != SET);
3567 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3568 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3569 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3570 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3571 else
3572 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3573 return true;
3574 }
3575 if (CONST_OK_FOR_I08 (INTVAL (x)))
3576 *total = 0;
3577 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3578 && CONST_OK_FOR_K08 (INTVAL (x)))
3579 *total = 1;
3580 /* prepare_cmp_insn will force costly constants int registers before
3581 the cbranch[sd]i4 patterns can see them, so preserve potentially
3582 interesting ones not covered by I08 above. */
3583 else if (outer_code == COMPARE
3584 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3585 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3586 || INTVAL (x) == 0x7fffffff
3587 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3588 *total = 1;
3589 else
3590 *total = 8;
3591 return true;
3592
3593 case EQ:
3594 /* An and with a constant compared against zero is
3595 most likely going to be a TST #imm, R0 instruction. */
3596 if (XEXP (x, 1) == const0_rtx
3597 && ((GET_CODE (XEXP (x, 0)) == AND
3598 || (SUBREG_P (XEXP (x, 0))
3599 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3600 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3601 {
3602 *total = 1;
3603 return true;
3604 }
3605
3606 else if (XEXP (x, 1) == const0_rtx
3607 && GET_CODE (XEXP (x, 0)) == AND
3608 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3610 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3611 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3612 {
3613 *total = 1;
3614 return true;
3615 }
3616 else
3617 return false;
3618
3619 case SMIN:
3620 case SMAX:
3621 /* This is most likely a clips.b or clips.w insn that is being made up
3622 by combine. */
3623 if (TARGET_SH2A
3624 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3625 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3626 && REG_P (XEXP (XEXP (x, 0), 0))
3627 && CONST_INT_P (XEXP (x, 1)))
3628 {
3629 *total = COSTS_N_INSNS (1);
3630 return true;
3631 }
3632 else
3633 return false;
3634
3635 case CONST:
3636 case LABEL_REF:
3637 case SYMBOL_REF:
3638 if (TARGET_SHMEDIA64)
3639 *total = COSTS_N_INSNS (4);
3640 else if (TARGET_SHMEDIA32)
3641 *total = COSTS_N_INSNS (2);
3642 else
3643 *total = 5;
3644 return true;
3645
3646 case CONST_DOUBLE:
3647 if (TARGET_SHMEDIA)
3648 *total = COSTS_N_INSNS (4);
3649 /* prepare_cmp_insn will force costly constants int registers before
3650 the cbranchdi4 pattern can see them, so preserve potentially
3651 interesting ones. */
3652 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3653 *total = 1;
3654 else
3655 *total = 10;
3656 return true;
3657
3658 case CONST_VECTOR:
3659 /* FIXME: This looks broken. Only the last statement has any effect.
3660 Probably this could be folded with the PARALLEL case? */
3661 if (x == CONST0_RTX (GET_MODE (x)))
3662 *total = 0;
3663 else if (sh_1el_vec (x, VOIDmode))
3664 *total = outer_code != SET;
3665 if (sh_rep_vec (x, VOIDmode))
3666 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3667 + (outer_code != SET));
3668 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3669 return true;
3670
3671 case PLUS:
3672 case MINUS:
3673 *total = COSTS_N_INSNS (addsubcosts (x));
3674 return true;
3675
3676 case AND:
3677 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3678 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3679 {
3680 *total = COSTS_N_INSNS (1);
3681 return true;
3682 }
3683 /* Fall through. */
3684
3685 case XOR:
3686 case IOR:
3687 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3688 return true;
3689
3690 case MULT:
3691 *total = COSTS_N_INSNS (multcosts (x));
3692 return true;
3693
3694 case LT:
3695 case GE:
3696 /* div0s sign comparison. */
3697 if (GET_CODE (XEXP (x, 0)) == XOR
3698 && REG_P ((XEXP (XEXP (x, 0), 0)))
3699 && REG_P ((XEXP (XEXP (x, 0), 1)))
3700 && satisfies_constraint_Z (XEXP (x, 1)))
3701 {
3702 *total = COSTS_N_INSNS (1);
3703 return true;
3704 }
3705 else
3706 return false;
3707
3708 case LSHIFTRT:
3709 /* div0s sign comparison. */
3710 if (GET_CODE (XEXP (x, 0)) == XOR
3711 && REG_P ((XEXP (XEXP (x, 0), 0)))
3712 && REG_P ((XEXP (XEXP (x, 0), 1)))
3713 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3714 {
3715 *total = COSTS_N_INSNS (1);
3716 return true;
3717 }
3718 /* Fall through to shiftcosts. */
3719 case ASHIFT:
3720 case ASHIFTRT:
3721 {
3722 int cost = shiftcosts (x);
3723 if (cost < 0)
3724 return false;
3725 *total = COSTS_N_INSNS (cost);
3726 return true;
3727 }
3728
3729 case DIV:
3730 case UDIV:
3731 case MOD:
3732 case UMOD:
3733 *total = COSTS_N_INSNS (20);
3734 return true;
3735
3736 case FLOAT:
3737 case FIX:
3738 *total = 100;
3739 return true;
3740
3741 default:
3742 return false;
3743 }
3744 }
3745
3746 /* Determine the size of the fundamental move insn that will be used
3747 for the specified mode. */
3748 static inline int
3749 mov_insn_size (machine_mode mode, bool consider_sh2a)
3750 {
3751 const int mode_sz = GET_MODE_SIZE (mode);
3752
3753 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3754 || (TARGET_FMOVD && mode == DFmode))
3755 return mode_sz;
3756 else
3757 {
3758 /* The max. available mode for actual move insns is SImode.
3759 Larger accesses will be split into multiple loads/stores. */
3760 const int max_mov_sz = GET_MODE_SIZE (SImode);
3761 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3762 }
3763 }
3764
3765 /* Determine the maximum possible displacement for a move insn for the
3766 specified mode. */
3767 int
3768 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3769 {
3770 /* The 4 byte displacement move insns are the same as the 2 byte
3771 versions but take a 12 bit displacement. All we need to do is to
3772 scale the max. displacement value accordingly. */
3773 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3774
3775 /* SH2A supports FPU move insns with 12 bit displacements.
3776 Other variants to do not support any kind of displacements for
3777 FPU move insns. */
3778 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3779 return 0;
3780 else
3781 {
3782 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3783 const int mode_sz = GET_MODE_SIZE (mode);
3784 int r = 15 * mov_insn_sz * disp_scale;
3785
3786 /* If the mov insn will be split into multiple loads/stores, the
3787 maximum possible displacement is a bit smaller. */
3788 if (mode_sz > mov_insn_sz)
3789 r -= mode_sz - mov_insn_sz;
3790 return r;
3791 }
3792 }
3793
3794 /* Determine the alignment mask for a move insn of the
3795 specified mode. */
3796 static inline int
3797 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3798 {
3799 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3800 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3801 }
3802
3803 /* Return the displacement value of a displacement address. */
3804 HOST_WIDE_INT
3805 sh_disp_addr_displacement (rtx x)
3806 {
3807 gcc_assert (satisfies_constraint_Sdd (x));
3808 return INTVAL (XEXP (XEXP (x, 0), 1));
3809 }
3810
3811 /* Compute the cost of an address. */
3812 static int
3813 sh_address_cost (rtx x, machine_mode mode,
3814 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3815 {
3816 /* 'GBR + 0'. Account one more because of R0 restriction. */
3817 if (REG_P (x) && REGNO (x) == GBR_REG)
3818 return 2;
3819
3820 /* Simple reg, post-inc, pre-dec addressing. */
3821 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3822 return 1;
3823
3824 /* 'reg + disp' addressing. */
3825 if (GET_CODE (x) == PLUS
3826 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3827 {
3828 /* 'GBR + disp'. Account one more because of R0 restriction. */
3829 if (REGNO (XEXP (x, 0)) == GBR_REG
3830 && gbr_displacement (XEXP (x, 1), mode))
3831 return 2;
3832
3833 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3834
3835 if (offset == 0)
3836 return 1;
3837
3838 /* The displacement would fit into a 2 byte move insn.
3839 HImode and QImode loads/stores with displacement put pressure on
3840 R0 which will most likely require another reg copy. Thus account
3841 a higher cost for that. */
3842 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3843 return (mode == HImode || mode == QImode) ? 2 : 1;
3844
3845 /* The displacement would fit into a 4 byte move insn (SH2A). */
3846 if (TARGET_SH2A
3847 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3848 return 2;
3849
3850 /* The displacement is probably out of range and will require extra
3851 calculations. */
3852 return 3;
3853 }
3854
3855 /* 'reg + reg' addressing. Account a slightly higher cost because of
3856 increased pressure on R0. */
3857 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3858 && ! TARGET_SHMEDIA)
3859 return 3;
3860
3861 /* Not sure what it is - probably expensive. */
3862 return 10;
3863 }
3864
3865 /* Code to expand a shift. */
3866 static void
3867 gen_ashift (int type, int n, rtx reg)
3868 {
3869 rtx n_rtx;
3870
3871 /* Negative values here come from the shift_amounts array. */
3872 if (n < 0)
3873 {
3874 if (type == ASHIFT)
3875 type = LSHIFTRT;
3876 else
3877 type = ASHIFT;
3878 n = -n;
3879 }
3880
3881 n_rtx = GEN_INT (n);
3882 gcc_assert (satisfies_constraint_P27 (n_rtx));
3883
3884 switch (type)
3885 {
3886 case ASHIFTRT:
3887 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3888 break;
3889 case LSHIFTRT:
3890 if (n == 1)
3891 emit_insn (gen_shlr (reg, reg));
3892 else
3893 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3894 break;
3895 case ASHIFT:
3896 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3897 break;
3898 default:
3899 gcc_unreachable ();
3900 }
3901 }
3902
3903 /* Code to expand a HImode shift. */
3904 static void
3905 gen_ashift_hi (int type, int n, rtx reg)
3906 {
3907 /* Negative values here come from the shift_amounts array. */
3908 if (n < 0)
3909 {
3910 if (type == ASHIFT)
3911 type = LSHIFTRT;
3912 else
3913 type = ASHIFT;
3914 n = -n;
3915 }
3916
3917 switch (type)
3918 {
3919 case ASHIFTRT:
3920 case LSHIFTRT:
3921 /* We don't have HImode right shift operations because using the
3922 ordinary 32 bit shift instructions for that doesn't generate proper
3923 zero/sign extension.
3924 gen_ashift_hi is only called in contexts where we know that the
3925 sign extension works out correctly. */
3926 {
3927 int offset = 0;
3928 if (GET_CODE (reg) == SUBREG)
3929 {
3930 offset = SUBREG_BYTE (reg);
3931 reg = SUBREG_REG (reg);
3932 }
3933 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3934 break;
3935 }
3936 case ASHIFT:
3937 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3938 break;
3939 }
3940 }
3941
3942 /* Output RTL to split a constant shift into its component SH constant
3943 shift instructions. */
3944 void
3945 gen_shifty_op (int code, rtx *operands)
3946 {
3947 int value = INTVAL (operands[2]);
3948 int max, i;
3949
3950 /* Truncate the shift count in case it is out of bounds. */
3951 value = value & 31;
3952
3953 if (value == 31)
3954 {
3955 if (code == LSHIFTRT)
3956 {
3957 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3958 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3959 return;
3960 }
3961 else if (code == ASHIFT)
3962 {
3963 /* There is a two instruction sequence for 31 bit left shifts,
3964 but it requires r0. */
3965 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3966 {
3967 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3968 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3969 return;
3970 }
3971 }
3972 }
3973 else if (value == 0)
3974 {
3975 /* This can happen even when optimizing, if there were subregs before
3976 reload. Don't output a nop here, as this is never optimized away;
3977 use a no-op move instead. */
3978 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3979 return;
3980 }
3981
3982 max = ashl_lshr_seq[value].insn_count;
3983 for (i = 0; i < max; i++)
3984 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3985 }
3986
3987 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3988 don't matter. */
3989 void
3990 gen_shifty_hi_op (int code, rtx *operands)
3991 {
3992 int value = INTVAL (operands[2]);
3993 int max, i;
3994 void (*gen_fun) (int, int, rtx);
3995
3996 /* This operation is used by and_shl for SImode values with a few
3997 high bits known to be cleared. */
3998 value &= 31;
3999 if (value == 0)
4000 {
4001 emit_insn (gen_nop ());
4002 return;
4003 }
4004
4005 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4006 if (code == ASHIFT)
4007 {
4008 max = ext_ashl_lshr_seq[value].insn_count;
4009 for (i = 0; i < max; i++)
4010 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4011 }
4012 else
4013 /* When shifting right, emit the shifts in reverse order, so that
4014 solitary negative values come first. */
4015 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4016 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4017 }
4018
4019 /* Output RTL for an arithmetic right shift.
4020 ??? Rewrite to use super-optimizer sequences. */
4021 bool
4022 expand_ashiftrt (rtx *operands)
4023 {
4024 rtx wrk;
4025 char func[18];
4026 int value;
4027
4028 if (TARGET_DYNSHIFT)
4029 {
4030 if (!CONST_INT_P (operands[2]))
4031 {
4032 rtx count = copy_to_mode_reg (SImode, operands[2]);
4033 emit_insn (gen_negsi2 (count, count));
4034 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4035 return true;
4036 }
4037 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4038 > 1 + SH_DYNAMIC_SHIFT_COST)
4039 {
4040 rtx count
4041 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4042 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4043 return true;
4044 }
4045 }
4046 if (!CONST_INT_P (operands[2]))
4047 return false;
4048
4049 value = INTVAL (operands[2]) & 31;
4050
4051 if (value == 31)
4052 {
4053 /* If we are called from abs expansion, arrange things so that we
4054 we can use a single MT instruction that doesn't clobber the source,
4055 if LICM can hoist out the load of the constant zero. */
4056 if (currently_expanding_to_rtl)
4057 {
4058 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4059 operands[1]));
4060 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4061 return true;
4062 }
4063 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4064 return true;
4065 }
4066 else if (value >= 16 && value <= 19)
4067 {
4068 wrk = gen_reg_rtx (SImode);
4069 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4070 value -= 16;
4071 while (value--)
4072 gen_ashift (ASHIFTRT, 1, wrk);
4073 emit_move_insn (operands[0], wrk);
4074 return true;
4075 }
4076 /* Expand a short sequence inline, longer call a magic routine. */
4077 else if (value <= 5)
4078 {
4079 wrk = gen_reg_rtx (SImode);
4080 emit_move_insn (wrk, operands[1]);
4081 while (value--)
4082 gen_ashift (ASHIFTRT, 1, wrk);
4083 emit_move_insn (operands[0], wrk);
4084 return true;
4085 }
4086
4087 wrk = gen_reg_rtx (Pmode);
4088
4089 /* Load the value into an arg reg and call a helper. */
4090 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4091 sprintf (func, "__ashiftrt_r4_%d", value);
4092 function_symbol (wrk, func, SFUNC_STATIC);
4093 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4094 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4095 return true;
4096 }
4097
4098 /* Try to find a good way to implement the combiner pattern
4099 [(set (match_operand:SI 0 "register_operand" "r")
4100 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4101 (match_operand:SI 2 "const_int_operand" "n"))
4102 (match_operand:SI 3 "const_int_operand" "n"))) .
4103 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4104 return 0 for simple right / left or left/right shift combination.
4105 return 1 for a combination of shifts with zero_extend.
4106 return 2 for a combination of shifts with an AND that needs r0.
4107 return 3 for a combination of shifts with an AND that needs an extra
4108 scratch register, when the three highmost bits of the AND mask are clear.
4109 return 4 for a combination of shifts with an AND that needs an extra
4110 scratch register, when any of the three highmost bits of the AND mask
4111 is set.
4112 If ATTRP is set, store an initial right shift width in ATTRP[0],
4113 and the instruction length in ATTRP[1] . These values are not valid
4114 when returning 0.
4115 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4116 shift_amounts for the last shift value that is to be used before the
4117 sign extend. */
4118 int
4119 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4120 {
4121 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4122 int left = INTVAL (left_rtx), right;
4123 int best = 0;
4124 int cost, best_cost = 10000;
4125 int best_right = 0, best_len = 0;
4126 int i;
4127 int can_ext;
4128
4129 if (left < 0 || left > 31)
4130 return 0;
4131 if (CONST_INT_P (mask_rtx))
4132 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4133 else
4134 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4135 /* Can this be expressed as a right shift / left shift pair? */
4136 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4137 right = exact_log2 (lsb);
4138 mask2 = ~(mask + lsb - 1);
4139 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4140 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4141 if (! mask2)
4142 best_cost = ashl_lshr_seq[right].insn_count
4143 + ashl_lshr_seq[right + left].insn_count;
4144 /* mask has no trailing zeroes <==> ! right */
4145 else if (! right && mask2 == ~(lsb2 - 1))
4146 {
4147 int late_right = exact_log2 (lsb2);
4148 best_cost = ashl_lshr_seq[left + late_right].insn_count
4149 + ashl_lshr_seq[late_right].insn_count;
4150 }
4151 /* Try to use zero extend. */
4152 if (mask2 == ~(lsb2 - 1))
4153 {
4154 int width, first;
4155
4156 for (width = 8; width <= 16; width += 8)
4157 {
4158 /* Can we zero-extend right away? */
4159 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4160 {
4161 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4162 + ext_ashl_lshr_seq[left + right].insn_count;
4163 if (cost < best_cost)
4164 {
4165 best = 1;
4166 best_cost = cost;
4167 best_right = right;
4168 best_len = cost;
4169 if (attrp)
4170 attrp[2] = -1;
4171 }
4172 continue;
4173 }
4174 /* ??? Could try to put zero extend into initial right shift,
4175 or even shift a bit left before the right shift. */
4176 /* Determine value of first part of left shift, to get to the
4177 zero extend cut-off point. */
4178 first = width - exact_log2 (lsb2) + right;
4179 if (first >= 0 && right + left - first >= 0)
4180 {
4181 cost = ext_ashl_lshr_seq[right].insn_count
4182 + ext_ashl_lshr_seq[first].insn_count + 1
4183 + ext_ashl_lshr_seq[right + left - first].insn_count;
4184
4185 if (cost < best_cost)
4186 {
4187 best = 1;
4188 best_cost = cost;
4189 best_right = right;
4190 best_len = cost;
4191 if (attrp)
4192 attrp[2] = first;
4193 }
4194 }
4195 }
4196 }
4197 /* Try to use r0 AND pattern */
4198 for (i = 0; i <= 2; i++)
4199 {
4200 if (i > right)
4201 break;
4202 if (! CONST_OK_FOR_K08 (mask >> i))
4203 continue;
4204 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4205 if (cost < best_cost)
4206 {
4207 best = 2;
4208 best_cost = cost;
4209 best_right = i;
4210 best_len = cost - 1;
4211 }
4212 }
4213 /* Try to use a scratch register to hold the AND operand. */
4214 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4215 for (i = 0; i <= 2; i++)
4216 {
4217 if (i > right)
4218 break;
4219 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4220 + (can_ext
4221 ? ext_ashl_lshr_seq
4222 : ashl_lshr_seq)[left + i].insn_count;
4223 if (cost < best_cost)
4224 {
4225 best = 4 - can_ext;
4226 best_cost = cost;
4227 best_right = i;
4228 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4229 }
4230 }
4231
4232 if (attrp)
4233 {
4234 attrp[0] = best_right;
4235 attrp[1] = best_len;
4236 }
4237 return best;
4238 }
4239
4240 /* This is used in length attributes of the unnamed instructions
4241 corresponding to shl_and_kind return values of 1 and 2. */
4242 int
4243 shl_and_length (rtx insn)
4244 {
4245 rtx set_src, left_rtx, mask_rtx;
4246 int attributes[3];
4247
4248 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4249 left_rtx = XEXP (XEXP (set_src, 0), 1);
4250 mask_rtx = XEXP (set_src, 1);
4251 shl_and_kind (left_rtx, mask_rtx, attributes);
4252 return attributes[1];
4253 }
4254
4255 /* This is used in length attribute of the and_shl_scratch instruction. */
4256 int
4257 shl_and_scr_length (rtx insn)
4258 {
4259 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4260 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4261 rtx op = XEXP (set_src, 0);
4262 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4263 op = XEXP (XEXP (op, 0), 0);
4264 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4265 }
4266
4267 /* Generate rtl for instructions for which shl_and_kind advised a particular
4268 method of generating them, i.e. returned zero. */
4269 bool
4270 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4271 {
4272 int attributes[3];
4273 unsigned HOST_WIDE_INT mask;
4274 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4275 int right, total_shift;
4276 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4277
4278 right = attributes[0];
4279 total_shift = INTVAL (left_rtx) + right;
4280 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4281 switch (kind)
4282 {
4283 default:
4284 return true;
4285 case 1:
4286 {
4287 int first = attributes[2];
4288 rtx operands[3];
4289
4290 if (first < 0)
4291 {
4292 emit_insn ((mask << right) <= 0xff
4293 ? gen_zero_extendqisi2 (dest,
4294 gen_lowpart (QImode, source))
4295 : gen_zero_extendhisi2 (dest,
4296 gen_lowpart (HImode, source)));
4297 source = dest;
4298 }
4299 if (source != dest)
4300 emit_insn (gen_movsi (dest, source));
4301 operands[0] = dest;
4302 if (right)
4303 {
4304 operands[2] = GEN_INT (right);
4305 gen_shifty_hi_op (LSHIFTRT, operands);
4306 }
4307 if (first > 0)
4308 {
4309 operands[2] = GEN_INT (first);
4310 gen_shifty_hi_op (ASHIFT, operands);
4311 total_shift -= first;
4312 mask <<= first;
4313 }
4314 if (first >= 0)
4315 emit_insn (mask <= 0xff
4316 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4317 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4318 if (total_shift > 0)
4319 {
4320 operands[2] = GEN_INT (total_shift);
4321 gen_shifty_hi_op (ASHIFT, operands);
4322 }
4323 break;
4324 }
4325 case 4:
4326 shift_gen_fun = gen_shifty_op;
4327 case 3:
4328 /* If the topmost bit that matters is set, set the topmost bits
4329 that don't matter. This way, we might be able to get a shorter
4330 signed constant. */
4331 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4332 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4333 case 2:
4334 /* Don't expand fine-grained when combining, because that will
4335 make the pattern fail. */
4336 if (currently_expanding_to_rtl
4337 || reload_in_progress || reload_completed)
4338 {
4339 rtx operands[3];
4340
4341 /* Cases 3 and 4 should be handled by this split
4342 only while combining */
4343 gcc_assert (kind <= 2);
4344 if (right)
4345 {
4346 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4347 source = dest;
4348 }
4349 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4350 if (total_shift)
4351 {
4352 operands[0] = dest;
4353 operands[1] = dest;
4354 operands[2] = GEN_INT (total_shift);
4355 shift_gen_fun (ASHIFT, operands);
4356 }
4357 break;
4358 }
4359 else
4360 {
4361 int neg = 0;
4362 if (kind != 4 && total_shift < 16)
4363 {
4364 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4365 if (neg > 0)
4366 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4367 else
4368 neg = 0;
4369 }
4370 emit_insn (gen_and_shl_scratch (dest, source,
4371 GEN_INT (right),
4372 GEN_INT (mask),
4373 GEN_INT (total_shift + neg),
4374 GEN_INT (neg)));
4375 emit_insn (gen_movsi (dest, dest));
4376 break;
4377 }
4378 }
4379 return false;
4380 }
4381
4382 /* Try to find a good way to implement the combiner pattern
4383 [(set (match_operand:SI 0 "register_operand" "=r")
4384 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4385 (match_operand:SI 2 "const_int_operand" "n")
4386 (match_operand:SI 3 "const_int_operand" "n")
4387 (const_int 0)))
4388 (clobber (reg:SI T_REG))]
4389 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4390 return 0 for simple left / right shift combination.
4391 return 1 for left shift / 8 bit sign extend / left shift.
4392 return 2 for left shift / 16 bit sign extend / left shift.
4393 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4394 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4395 return 5 for left shift / 16 bit sign extend / right shift
4396 return 6 for < 8 bit sign extend / left shift.
4397 return 7 for < 8 bit sign extend / left shift / single right shift.
4398 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4399 int
4400 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4401 {
4402 int left, size, insize, ext;
4403 int cost = 0, best_cost;
4404 int kind;
4405
4406 left = INTVAL (left_rtx);
4407 size = INTVAL (size_rtx);
4408 insize = size - left;
4409 gcc_assert (insize > 0);
4410 /* Default to left / right shift. */
4411 kind = 0;
4412 best_cost = ashl_lshr_seq[32 - insize].insn_count
4413 + ashl_lshr_seq[32 - size].insn_count;
4414 if (size <= 16)
4415 {
4416 /* 16 bit shift / sign extend / 16 bit shift */
4417 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4418 + ashl_lshr_seq[16 - size].insn_count;
4419 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4420 below, by alternative 3 or something even better. */
4421 if (cost < best_cost)
4422 {
4423 kind = 5;
4424 best_cost = cost;
4425 }
4426 }
4427 /* Try a plain sign extend between two shifts. */
4428 for (ext = 16; ext >= insize; ext -= 8)
4429 {
4430 if (ext <= size)
4431 {
4432 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4433 + ashl_lshr_seq[size - ext].insn_count;
4434 if (cost < best_cost)
4435 {
4436 kind = ext / (unsigned) 8;
4437 best_cost = cost;
4438 }
4439 }
4440 /* Check if we can do a sloppy shift with a final signed shift
4441 restoring the sign. */
4442 if (EXT_SHIFT_SIGNED (size - ext))
4443 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4444 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4445 /* If not, maybe it's still cheaper to do the second shift sloppy,
4446 and do a final sign extend? */
4447 else if (size <= 16)
4448 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4449 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4450 + 1;
4451 else
4452 continue;
4453 if (cost < best_cost)
4454 {
4455 kind = ext / (unsigned) 8 + 2;
4456 best_cost = cost;
4457 }
4458 }
4459 /* Check if we can sign extend in r0 */
4460 if (insize < 8)
4461 {
4462 cost = 3 + ashl_lshr_seq[left].insn_count;
4463 if (cost < best_cost)
4464 {
4465 kind = 6;
4466 best_cost = cost;
4467 }
4468 /* Try the same with a final signed shift. */
4469 if (left < 31)
4470 {
4471 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4472 if (cost < best_cost)
4473 {
4474 kind = 7;
4475 best_cost = cost;
4476 }
4477 }
4478 }
4479 if (TARGET_DYNSHIFT)
4480 {
4481 /* Try to use a dynamic shift. */
4482 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4483 if (cost < best_cost)
4484 {
4485 kind = 0;
4486 best_cost = cost;
4487 }
4488 }
4489 if (costp)
4490 *costp = cost;
4491 return kind;
4492 }
4493
4494 /* Function to be used in the length attribute of the instructions
4495 implementing this pattern. */
4496 int
4497 shl_sext_length (rtx insn)
4498 {
4499 rtx set_src, left_rtx, size_rtx;
4500 int cost;
4501
4502 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4503 left_rtx = XEXP (XEXP (set_src, 0), 1);
4504 size_rtx = XEXP (set_src, 1);
4505 shl_sext_kind (left_rtx, size_rtx, &cost);
4506 return cost;
4507 }
4508
4509 /* Generate rtl for this pattern */
4510 bool
4511 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4512 {
4513 int kind;
4514 int left, size, insize, cost;
4515 rtx operands[3];
4516
4517 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4518 left = INTVAL (left_rtx);
4519 size = INTVAL (size_rtx);
4520 insize = size - left;
4521 switch (kind)
4522 {
4523 case 1:
4524 case 2:
4525 case 3:
4526 case 4:
4527 {
4528 int ext = kind & 1 ? 8 : 16;
4529 int shift2 = size - ext;
4530
4531 /* Don't expand fine-grained when combining, because that will
4532 make the pattern fail. */
4533 if (! currently_expanding_to_rtl
4534 && ! reload_in_progress && ! reload_completed)
4535 {
4536 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4537 emit_insn (gen_movsi (dest, source));
4538 break;
4539 }
4540 if (dest != source)
4541 emit_insn (gen_movsi (dest, source));
4542 operands[0] = dest;
4543 if (ext - insize)
4544 {
4545 operands[2] = GEN_INT (ext - insize);
4546 gen_shifty_hi_op (ASHIFT, operands);
4547 }
4548 emit_insn (kind & 1
4549 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4550 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4551 if (kind <= 2)
4552 {
4553 if (shift2)
4554 {
4555 operands[2] = GEN_INT (shift2);
4556 gen_shifty_op (ASHIFT, operands);
4557 }
4558 }
4559 else
4560 {
4561 if (shift2 > 0)
4562 {
4563 if (EXT_SHIFT_SIGNED (shift2))
4564 {
4565 operands[2] = GEN_INT (shift2 + 1);
4566 gen_shifty_op (ASHIFT, operands);
4567 operands[2] = const1_rtx;
4568 gen_shifty_op (ASHIFTRT, operands);
4569 break;
4570 }
4571 operands[2] = GEN_INT (shift2);
4572 gen_shifty_hi_op (ASHIFT, operands);
4573 }
4574 else if (shift2)
4575 {
4576 operands[2] = GEN_INT (-shift2);
4577 gen_shifty_hi_op (LSHIFTRT, operands);
4578 }
4579 emit_insn (size <= 8
4580 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4581 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4582 }
4583 break;
4584 }
4585 case 5:
4586 {
4587 int i = 16 - size;
4588 if (! currently_expanding_to_rtl
4589 && ! reload_in_progress && ! reload_completed)
4590 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4591 else
4592 {
4593 operands[0] = dest;
4594 operands[2] = GEN_INT (16 - insize);
4595 gen_shifty_hi_op (ASHIFT, operands);
4596 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4597 }
4598 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4599 while (--i >= 0)
4600 gen_ashift (ASHIFTRT, 1, dest);
4601 break;
4602 }
4603 case 6:
4604 case 7:
4605 /* Don't expand fine-grained when combining, because that will
4606 make the pattern fail. */
4607 if (! currently_expanding_to_rtl
4608 && ! reload_in_progress && ! reload_completed)
4609 {
4610 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4611 emit_insn (gen_movsi (dest, source));
4612 break;
4613 }
4614 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4615 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4616 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4617 operands[0] = dest;
4618 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4619 gen_shifty_op (ASHIFT, operands);
4620 if (kind == 7)
4621 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4622 break;
4623 default:
4624 return true;
4625 }
4626 return false;
4627 }
4628
4629 /* Prefix a symbol_ref name with "datalabel". */
4630 rtx
4631 gen_datalabel_ref (rtx sym)
4632 {
4633 const char *str;
4634
4635 if (GET_CODE (sym) == LABEL_REF)
4636 return gen_rtx_CONST (GET_MODE (sym),
4637 gen_rtx_UNSPEC (GET_MODE (sym),
4638 gen_rtvec (1, sym),
4639 UNSPEC_DATALABEL));
4640
4641 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4642
4643 str = XSTR (sym, 0);
4644 /* Share all SYMBOL_REF strings with the same value - that is important
4645 for cse. */
4646 str = IDENTIFIER_POINTER (get_identifier (str));
4647 XSTR (sym, 0) = str;
4648
4649 return sym;
4650 }
4651
4652 \f
4653 typedef struct label_ref_list_d
4654 {
4655 rtx_code_label *label;
4656 struct label_ref_list_d *next;
4657 } *label_ref_list_t;
4658
4659 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4660 ("label references list");
4661
4662 /* The SH cannot load a large constant into a register, constants have to
4663 come from a pc relative load. The reference of a pc relative load
4664 instruction must be less than 1k in front of the instruction. This
4665 means that we often have to dump a constant inside a function, and
4666 generate code to branch around it.
4667
4668 It is important to minimize this, since the branches will slow things
4669 down and make things bigger.
4670
4671 Worst case code looks like:
4672
4673 mov.l L1,rn
4674 bra L2
4675 nop
4676 align
4677 L1: .long value
4678 L2:
4679 ..
4680
4681 mov.l L3,rn
4682 bra L4
4683 nop
4684 align
4685 L3: .long value
4686 L4:
4687 ..
4688
4689 We fix this by performing a scan before scheduling, which notices which
4690 instructions need to have their operands fetched from the constant table
4691 and builds the table.
4692
4693 The algorithm is:
4694
4695 scan, find an instruction which needs a pcrel move. Look forward, find the
4696 last barrier which is within MAX_COUNT bytes of the requirement.
4697 If there isn't one, make one. Process all the instructions between
4698 the find and the barrier.
4699
4700 In the above example, we can tell that L3 is within 1k of L1, so
4701 the first move can be shrunk from the 3 insn+constant sequence into
4702 just 1 insn, and the constant moved to L3 to make:
4703
4704 mov.l L1,rn
4705 ..
4706 mov.l L3,rn
4707 bra L4
4708 nop
4709 align
4710 L3:.long value
4711 L4:.long value
4712
4713 Then the second move becomes the target for the shortening process. */
4714
4715 typedef struct
4716 {
4717 rtx value; /* Value in table. */
4718 rtx_code_label *label; /* Label of value. */
4719 label_ref_list_t wend; /* End of window. */
4720 machine_mode mode; /* Mode of value. */
4721
4722 /* True if this constant is accessed as part of a post-increment
4723 sequence. Note that HImode constants are never accessed in this way. */
4724 bool part_of_sequence_p;
4725 } pool_node;
4726
4727 /* The maximum number of constants that can fit into one pool, since
4728 constants in the range 0..510 are at least 2 bytes long, and in the
4729 range from there to 1018 at least 4 bytes. */
4730
4731 #define MAX_POOL_SIZE 372
4732 static pool_node pool_vector[MAX_POOL_SIZE];
4733 static int pool_size;
4734 static rtx_code_label *pool_window_label;
4735 static int pool_window_last;
4736
4737 static int max_labelno_before_reorg;
4738
4739 /* ??? If we need a constant in HImode which is the truncated value of a
4740 constant we need in SImode, we could combine the two entries thus saving
4741 two bytes. Is this common enough to be worth the effort of implementing
4742 it? */
4743
4744 /* ??? This stuff should be done at the same time that we shorten branches.
4745 As it is now, we must assume that all branches are the maximum size, and
4746 this causes us to almost always output constant pools sooner than
4747 necessary. */
4748
4749 /* Add a constant to the pool and return its label. */
4750 static rtx_code_label *
4751 add_constant (rtx x, machine_mode mode, rtx last_value)
4752 {
4753 int i;
4754 rtx_code_label *lab, *new_rtx;
4755 label_ref_list_t ref, newref;
4756
4757 /* First see if we've already got it. */
4758 for (i = 0; i < pool_size; i++)
4759 {
4760 if (x->code == pool_vector[i].value->code
4761 && mode == pool_vector[i].mode)
4762 {
4763 if (x->code == CODE_LABEL)
4764 {
4765 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4766 continue;
4767 }
4768 if (rtx_equal_p (x, pool_vector[i].value))
4769 {
4770 lab = new_rtx = 0;
4771 if (! last_value
4772 || ! i
4773 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4774 {
4775 new_rtx = gen_label_rtx ();
4776 LABEL_REFS (new_rtx) = pool_vector[i].label;
4777 pool_vector[i].label = lab = new_rtx;
4778 }
4779 if (lab && pool_window_label)
4780 {
4781 newref = label_ref_list_d_pool.allocate ();
4782 newref->label = pool_window_label;
4783 ref = pool_vector[pool_window_last].wend;
4784 newref->next = ref;
4785 pool_vector[pool_window_last].wend = newref;
4786 }
4787 if (new_rtx)
4788 pool_window_label = new_rtx;
4789 pool_window_last = i;
4790 return lab;
4791 }
4792 }
4793 }
4794
4795 /* Need a new one. */
4796 pool_vector[pool_size].value = x;
4797 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4798 {
4799 lab = 0;
4800 pool_vector[pool_size - 1].part_of_sequence_p = true;
4801 }
4802 else
4803 lab = gen_label_rtx ();
4804 pool_vector[pool_size].mode = mode;
4805 pool_vector[pool_size].label = lab;
4806 pool_vector[pool_size].wend = NULL;
4807 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4808 if (lab && pool_window_label)
4809 {
4810 newref = label_ref_list_d_pool.allocate ();
4811 newref->label = pool_window_label;
4812 ref = pool_vector[pool_window_last].wend;
4813 newref->next = ref;
4814 pool_vector[pool_window_last].wend = newref;
4815 }
4816 if (lab)
4817 pool_window_label = lab;
4818 pool_window_last = pool_size;
4819 pool_size++;
4820 return lab;
4821 }
4822
4823 /* Output the literal table. START, if nonzero, is the first instruction
4824 this table is needed for, and also indicates that there is at least one
4825 casesi_worker_2 instruction; We have to emit the operand3 labels from
4826 these insns at a 4-byte aligned position. BARRIER is the barrier
4827 after which we are to place the table. */
4828 static void
4829 dump_table (rtx_insn *start, rtx_insn *barrier)
4830 {
4831 rtx_insn *scan = barrier;
4832 int i;
4833 bool need_align = true;
4834 rtx lab;
4835 label_ref_list_t ref;
4836 bool have_df = false;
4837
4838 /* Do two passes, first time dump out the HI sized constants. */
4839
4840 for (i = 0; i < pool_size; i++)
4841 {
4842 pool_node *p = &pool_vector[i];
4843
4844 if (p->mode == HImode)
4845 {
4846 if (need_align)
4847 {
4848 scan = emit_insn_after (gen_align_2 (), scan);
4849 need_align = false;
4850 }
4851 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4852 scan = emit_label_after (lab, scan);
4853 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4854 scan);
4855 for (ref = p->wend; ref; ref = ref->next)
4856 {
4857 lab = ref->label;
4858 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4859 }
4860 }
4861 else if (p->mode == DFmode)
4862 have_df = true;
4863 }
4864
4865 need_align = true;
4866
4867 if (start)
4868 {
4869 scan = emit_insn_after (gen_align_4 (), scan);
4870 need_align = false;
4871 for (; start != barrier; start = NEXT_INSN (start))
4872 if (NONJUMP_INSN_P (start)
4873 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4874 {
4875 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4876 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4877
4878 scan = emit_label_after (lab, scan);
4879 }
4880 }
4881 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4882 {
4883 rtx_insn *align_insn = NULL;
4884
4885 scan = emit_label_after (gen_label_rtx (), scan);
4886 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4887 need_align = false;
4888
4889 for (i = 0; i < pool_size; i++)
4890 {
4891 pool_node *p = &pool_vector[i];
4892
4893 switch (p->mode)
4894 {
4895 case HImode:
4896 break;
4897 case SImode:
4898 case SFmode:
4899 if (align_insn && !p->part_of_sequence_p)
4900 {
4901 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4902 emit_label_before (lab, align_insn);
4903 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4904 align_insn);
4905 for (ref = p->wend; ref; ref = ref->next)
4906 {
4907 lab = ref->label;
4908 emit_insn_before (gen_consttable_window_end (lab),
4909 align_insn);
4910 }
4911 delete_insn (align_insn);
4912 align_insn = NULL;
4913 continue;
4914 }
4915 else
4916 {
4917 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4918 scan = emit_label_after (lab, scan);
4919 scan = emit_insn_after (gen_consttable_4 (p->value,
4920 const0_rtx), scan);
4921 need_align = ! need_align;
4922 }
4923 break;
4924 case DFmode:
4925 if (need_align)
4926 {
4927 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4928 align_insn = scan;
4929 need_align = false;
4930 }
4931 case DImode:
4932 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4933 scan = emit_label_after (lab, scan);
4934 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4935 scan);
4936 break;
4937 default:
4938 gcc_unreachable ();
4939 }
4940
4941 if (p->mode != HImode)
4942 {
4943 for (ref = p->wend; ref; ref = ref->next)
4944 {
4945 lab = ref->label;
4946 scan = emit_insn_after (gen_consttable_window_end (lab),
4947 scan);
4948 }
4949 }
4950 }
4951
4952 pool_size = 0;
4953 }
4954
4955 for (i = 0; i < pool_size; i++)
4956 {
4957 pool_node *p = &pool_vector[i];
4958
4959 switch (p->mode)
4960 {
4961 case HImode:
4962 break;
4963 case SImode:
4964 case SFmode:
4965 if (need_align)
4966 {
4967 need_align = false;
4968 scan = emit_label_after (gen_label_rtx (), scan);
4969 scan = emit_insn_after (gen_align_4 (), scan);
4970 }
4971 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4972 scan = emit_label_after (lab, scan);
4973 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4974 scan);
4975 break;
4976 case DFmode:
4977 case DImode:
4978 if (need_align)
4979 {
4980 need_align = false;
4981 scan = emit_label_after (gen_label_rtx (), scan);
4982 scan = emit_insn_after (gen_align_4 (), scan);
4983 }
4984 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4985 scan = emit_label_after (lab, scan);
4986 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4987 scan);
4988 break;
4989 default:
4990 gcc_unreachable ();
4991 }
4992
4993 if (p->mode != HImode)
4994 {
4995 for (ref = p->wend; ref; ref = ref->next)
4996 {
4997 lab = ref->label;
4998 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4999 }
5000 }
5001 }
5002
5003 scan = emit_insn_after (gen_consttable_end (), scan);
5004 scan = emit_barrier_after (scan);
5005 pool_size = 0;
5006 pool_window_label = NULL;
5007 pool_window_last = 0;
5008 }
5009
5010 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5011
5012 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5013
5014 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5015 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5016 need to fix it if the input value is CONST_OK_FOR_I08. */
5017 static bool
5018 broken_move (rtx_insn *insn)
5019 {
5020 if (NONJUMP_INSN_P (insn))
5021 {
5022 rtx pat = PATTERN (insn);
5023 if (GET_CODE (pat) == PARALLEL)
5024 pat = XVECEXP (pat, 0, 0);
5025 if (GET_CODE (pat) == SET
5026 /* We can load any 8-bit value if we don't care what the high
5027 order bits end up as. */
5028 && GET_MODE (SET_DEST (pat)) != QImode
5029 && (CONSTANT_P (SET_SRC (pat))
5030 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5031 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5032 /* Match mova_const. */
5033 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5034 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5035 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5036 && ! (TARGET_SH2E
5037 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5038 && (fp_zero_operand (SET_SRC (pat))
5039 || fp_one_operand (SET_SRC (pat)))
5040 /* In general we don't know the current setting of fpscr, so
5041 disable fldi.
5042 There is an exception if this was a register-register move
5043 before reload - and hence it was ascertained that we have
5044 single precision setting - and in a post-reload optimization
5045 we changed this to do a constant load. In that case
5046 we don't have an r0 clobber, hence we must use fldi. */
5047 && (TARGET_FMOVD
5048 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5049 == SCRATCH))
5050 && REG_P (SET_DEST (pat))
5051 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5052 && ! (TARGET_SH2A
5053 && GET_MODE (SET_DEST (pat)) == SImode
5054 && (satisfies_constraint_I20 (SET_SRC (pat))
5055 || satisfies_constraint_I28 (SET_SRC (pat))))
5056 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5057 return true;
5058 }
5059
5060 return false;
5061 }
5062
5063 /* Return true if the specified insn is a mova insn. */
5064 static bool
5065 mova_p (rtx_insn *insn)
5066 {
5067 return (NONJUMP_INSN_P (insn)
5068 && GET_CODE (PATTERN (insn)) == SET
5069 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5070 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5071 /* Don't match mova_const. */
5072 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5073 }
5074
5075 /* Fix up a mova from a switch that went out of range. */
5076 static void
5077 fixup_mova (rtx_insn *mova)
5078 {
5079 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5080 if (! flag_pic)
5081 {
5082 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5083 INSN_CODE (mova) = -1;
5084 }
5085 else
5086 {
5087 rtx_insn *worker = mova;
5088 rtx_code_label *lab = gen_label_rtx ();
5089 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5090
5091 do
5092 {
5093 worker = NEXT_INSN (worker);
5094 gcc_assert (worker
5095 && !LABEL_P (worker)
5096 && !JUMP_P (worker));
5097 } while (NOTE_P (worker)
5098 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5099 wpat = PATTERN (worker);
5100 wpat0 = XVECEXP (wpat, 0, 0);
5101 wpat1 = XVECEXP (wpat, 0, 1);
5102 wsrc = SET_SRC (wpat0);
5103 PATTERN (worker) = (gen_casesi_worker_2
5104 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5105 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5106 XEXP (wpat1, 0)));
5107 INSN_CODE (worker) = -1;
5108 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5109 base = gen_rtx_LABEL_REF (Pmode, lab);
5110 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5111 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5112 INSN_CODE (mova) = -1;
5113 }
5114 }
5115
5116 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5117 *num_mova, and check if the new mova is not nested within the first one.
5118 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5119 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5120 static int
5121 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5122 {
5123 int n_addr = 0; /* Initialization to shut up spurious warning. */
5124 int f_target, n_target = 0; /* Likewise. */
5125
5126 if (optimize)
5127 {
5128 /* If NEW_MOVA has no address yet, it will be handled later. */
5129 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5130 return -1;
5131
5132 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5133 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5134 if (n_addr > n_target || n_addr + 1022 < n_target)
5135 {
5136 /* Change the mova into a load.
5137 broken_move will then return true for it. */
5138 fixup_mova (new_mova);
5139 return 1;
5140 }
5141 }
5142 if (!(*num_mova)++)
5143 {
5144 *first_mova = new_mova;
5145 return 2;
5146 }
5147 if (!optimize
5148 || ((f_target
5149 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5150 >= n_target))
5151 return -1;
5152
5153 (*num_mova)--;
5154 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5155 > n_target - n_addr)
5156 {
5157 fixup_mova (*first_mova);
5158 return 0;
5159 }
5160 else
5161 {
5162 fixup_mova (new_mova);
5163 return 1;
5164 }
5165 }
5166
5167 /* Find the last barrier from insn FROM which is close enough to hold the
5168 constant pool. If we can't find one, then create one near the end of
5169 the range. */
5170 static rtx_insn *
5171 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5172 {
5173 int count_si = 0;
5174 int count_hi = 0;
5175 int found_hi = 0;
5176 int found_si = 0;
5177 int found_di = 0;
5178 int hi_align = 2;
5179 int si_align = 2;
5180 int leading_mova = num_mova;
5181 rtx_insn *barrier_before_mova = NULL;
5182 rtx_insn *found_barrier = NULL;
5183 rtx_insn *good_barrier = NULL;
5184 int si_limit;
5185 int hi_limit;
5186 rtx_insn *orig = from;
5187 rtx_insn *last_got = NULL;
5188 rtx_insn *last_symoff = NULL;
5189
5190 /* For HImode: range is 510, add 4 because pc counts from address of
5191 second instruction after this one, subtract 2 for the jump instruction
5192 that we may need to emit before the table, subtract 2 for the instruction
5193 that fills the jump delay slot (in very rare cases, reorg will take an
5194 instruction from after the constant pool or will leave the delay slot
5195 empty). This gives 510.
5196 For SImode: range is 1020, add 4 because pc counts from address of
5197 second instruction after this one, subtract 2 in case pc is 2 byte
5198 aligned, subtract 2 for the jump instruction that we may need to emit
5199 before the table, subtract 2 for the instruction that fills the jump
5200 delay slot. This gives 1018. */
5201
5202 /* The branch will always be shortened now that the reference address for
5203 forward branches is the successor address, thus we need no longer make
5204 adjustments to the [sh]i_limit for -O0. */
5205
5206 si_limit = 1018;
5207 hi_limit = 510;
5208
5209 while (from && count_si < si_limit && count_hi < hi_limit)
5210 {
5211 int inc = get_attr_length (from);
5212 int new_align = 1;
5213
5214 /* If this is a label that existed at the time of the compute_alignments
5215 call, determine the alignment. N.B. When find_barrier recurses for
5216 an out-of-reach mova, we might see labels at the start of previously
5217 inserted constant tables. */
5218 if (LABEL_P (from)
5219 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5220 {
5221 if (optimize)
5222 new_align = 1 << label_to_alignment (from);
5223 else if (BARRIER_P (prev_nonnote_insn (from)))
5224 new_align = 1 << barrier_align (from);
5225 else
5226 new_align = 1;
5227 inc = 0;
5228 }
5229 /* In case we are scanning a constant table because of recursion, check
5230 for explicit alignments. If the table is long, we might be forced
5231 to emit the new table in front of it; the length of the alignment
5232 might be the last straw. */
5233 else if (NONJUMP_INSN_P (from)
5234 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5235 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5236 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5237 /* When we find the end of a constant table, paste the new constant
5238 at the end. That is better than putting it in front because
5239 this way, we don't need extra alignment for adding a 4-byte-aligned
5240 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5241 else if (NONJUMP_INSN_P (from)
5242 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5243 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5244 return from;
5245
5246 if (BARRIER_P (from))
5247 {
5248 rtx_insn *next;
5249
5250 found_barrier = from;
5251
5252 /* If we are at the end of the function, or in front of an alignment
5253 instruction, we need not insert an extra alignment. We prefer
5254 this kind of barrier. */
5255 if (barrier_align (from) > 2)
5256 good_barrier = from;
5257
5258 /* If we are at the end of a hot/cold block, dump the constants
5259 here. */
5260 next = NEXT_INSN (from);
5261 if (next
5262 && NOTE_P (next)
5263 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5264 break;
5265 }
5266
5267 if (broken_move (from))
5268 {
5269 rtx pat, src, dst;
5270 machine_mode mode;
5271
5272 pat = PATTERN (from);
5273 if (GET_CODE (pat) == PARALLEL)
5274 pat = XVECEXP (pat, 0, 0);
5275 src = SET_SRC (pat);
5276 dst = SET_DEST (pat);
5277 mode = GET_MODE (dst);
5278
5279 /* GOT pcrelat setting comes in pair of
5280 mova .L8,r0
5281 mov.l .L8,r12
5282 instructions. (plus add r0,r12).
5283 Remember if we see one without the other. */
5284 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5285 last_got = last_got ? NULL : from;
5286 else if (PIC_ADDR_P (src))
5287 last_got = last_got ? NULL : from;
5288
5289 /* We must explicitly check the mode, because sometimes the
5290 front end will generate code to load unsigned constants into
5291 HImode targets without properly sign extending them. */
5292 if (mode == HImode
5293 || (mode == SImode && satisfies_constraint_I16 (src)
5294 && REGNO (dst) != FPUL_REG))
5295 {
5296 found_hi += 2;
5297 /* We put the short constants before the long constants, so
5298 we must count the length of short constants in the range
5299 for the long constants. */
5300 /* ??? This isn't optimal, but is easy to do. */
5301 si_limit -= 2;
5302 }
5303 else
5304 {
5305 /* We dump DF/DI constants before SF/SI ones, because
5306 the limit is the same, but the alignment requirements
5307 are higher. We may waste up to 4 additional bytes
5308 for alignment, and the DF/DI constant may have
5309 another SF/SI constant placed before it. */
5310 if (TARGET_SHCOMPACT
5311 && ! found_di
5312 && (mode == DFmode || mode == DImode))
5313 {
5314 found_di = 1;
5315 si_limit -= 8;
5316 }
5317 while (si_align > 2 && found_si + si_align - 2 > count_si)
5318 si_align >>= 1;
5319 if (found_si > count_si)
5320 count_si = found_si;
5321 found_si += GET_MODE_SIZE (mode);
5322 if (num_mova)
5323 si_limit -= GET_MODE_SIZE (mode);
5324 }
5325 }
5326
5327 if (mova_p (from))
5328 {
5329 switch (untangle_mova (&num_mova, &mova, from))
5330 {
5331 case 1:
5332 if (flag_pic)
5333 {
5334 rtx src = SET_SRC (PATTERN (from));
5335 if (GET_CODE (src) == CONST
5336 && GET_CODE (XEXP (src, 0)) == UNSPEC
5337 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5338 last_symoff = from;
5339 }
5340 break;
5341 case 0: return find_barrier (0, 0, mova);
5342 case 2:
5343 {
5344 leading_mova = 0;
5345 barrier_before_mova
5346 = good_barrier ? good_barrier : found_barrier;
5347 }
5348 default: break;
5349 }
5350 if (found_si > count_si)
5351 count_si = found_si;
5352 }
5353 else if (JUMP_TABLE_DATA_P (from)
5354 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5355 {
5356 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5357 || (num_mova
5358 && (prev_nonnote_insn (from)
5359 == XEXP (MOVA_LABELREF (mova), 0))))
5360 num_mova--;
5361 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5362 {
5363 /* We have just passed the barrier in front of the
5364 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5365 the ADDR_DIFF_VEC is accessed as data, just like our pool
5366 constants, this is a good opportunity to accommodate what
5367 we have gathered so far.
5368 If we waited any longer, we could end up at a barrier in
5369 front of code, which gives worse cache usage for separated
5370 instruction / data caches. */
5371 good_barrier = found_barrier;
5372 break;
5373 }
5374 else
5375 {
5376 rtx body = PATTERN (from);
5377 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5378 }
5379 }
5380 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5381 else if (JUMP_P (from)
5382 && ! TARGET_SH2
5383 && ! optimize_size)
5384 new_align = 4;
5385
5386 /* There is a possibility that a bf is transformed into a bf/s by the
5387 delay slot scheduler. */
5388 if (JUMP_P (from)
5389 && get_attr_type (from) == TYPE_CBRANCH
5390 && ! sequence_insn_p (from))
5391 inc += 2;
5392
5393 if (found_si)
5394 {
5395 count_si += inc;
5396 if (new_align > si_align)
5397 {
5398 si_limit -= (count_si - 1) & (new_align - si_align);
5399 si_align = new_align;
5400 }
5401 count_si = (count_si + new_align - 1) & -new_align;
5402 }
5403 if (found_hi)
5404 {
5405 count_hi += inc;
5406 if (new_align > hi_align)
5407 {
5408 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5409 hi_align = new_align;
5410 }
5411 count_hi = (count_hi + new_align - 1) & -new_align;
5412 }
5413 from = NEXT_INSN (from);
5414 }
5415
5416 if (num_mova)
5417 {
5418 if (leading_mova)
5419 {
5420 /* Try as we might, the leading mova is out of range. Change
5421 it into a load (which will become a pcload) and retry. */
5422 fixup_mova (mova);
5423 return find_barrier (0, 0, mova);
5424 }
5425 else
5426 {
5427 /* Insert the constant pool table before the mova instruction,
5428 to prevent the mova label reference from going out of range. */
5429 from = mova;
5430 good_barrier = found_barrier = barrier_before_mova;
5431 }
5432 }
5433
5434 if (found_barrier)
5435 {
5436 if (good_barrier && next_real_insn (found_barrier))
5437 found_barrier = good_barrier;
5438 }
5439 else
5440 {
5441 /* We didn't find a barrier in time to dump our stuff,
5442 so we'll make one. */
5443 rtx_code_label *label = gen_label_rtx ();
5444
5445 /* Don't emit a constant table in the middle of insns for
5446 casesi_worker_2. This is a bit overkill but is enough
5447 because casesi_worker_2 wouldn't appear so frequently. */
5448 if (last_symoff)
5449 from = last_symoff;
5450
5451 /* If we exceeded the range, then we must back up over the last
5452 instruction we looked at. Otherwise, we just need to undo the
5453 NEXT_INSN at the end of the loop. */
5454 if (PREV_INSN (from) != orig
5455 && (count_hi > hi_limit || count_si > si_limit))
5456 from = PREV_INSN (PREV_INSN (from));
5457 else
5458 from = PREV_INSN (from);
5459
5460 /* Don't emit a constant table int the middle of global pointer setting,
5461 since that that would move the addressing base GOT into another table.
5462 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5463 in the pool anyway, so just move up the whole constant pool.
5464
5465 However, avoid doing so when the last single GOT mov is the starting
5466 insn itself. Going past above the start insn would create a negative
5467 offset, causing errors. */
5468 if (last_got && last_got != orig)
5469 from = PREV_INSN (last_got);
5470
5471 /* Don't insert the constant pool table at the position which
5472 may be the landing pad. */
5473 if (flag_exceptions
5474 && CALL_P (from)
5475 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5476 from = PREV_INSN (from);
5477
5478 /* Walk back to be just before any jump or label.
5479 Putting it before a label reduces the number of times the branch
5480 around the constant pool table will be hit. Putting it before
5481 a jump makes it more likely that the bra delay slot will be
5482 filled. */
5483 while (NOTE_P (from) || JUMP_P (from)
5484 || LABEL_P (from))
5485 from = PREV_INSN (from);
5486
5487 /* Make sure we do not split between a call and its corresponding
5488 CALL_ARG_LOCATION note. */
5489 if (CALL_P (from))
5490 {
5491 rtx_insn *next = NEXT_INSN (from);
5492 if (next && NOTE_P (next)
5493 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5494 from = next;
5495 }
5496
5497 from = emit_jump_insn_after (gen_jump (label), from);
5498 JUMP_LABEL (from) = label;
5499 LABEL_NUSES (label) = 1;
5500 found_barrier = emit_barrier_after (from);
5501 emit_label_after (label, found_barrier);
5502 }
5503
5504 return found_barrier;
5505 }
5506
5507 /* If the instruction INSN is implemented by a special function, and we can
5508 positively find the register that is used to call the sfunc, and this
5509 register is not used anywhere else in this instruction - except as the
5510 destination of a set, return this register; else, return 0. */
5511 rtx
5512 sfunc_uses_reg (rtx_insn *insn)
5513 {
5514 int i;
5515 rtx pattern, part, reg_part, reg;
5516
5517 if (!NONJUMP_INSN_P (insn))
5518 return NULL_RTX;
5519 pattern = PATTERN (insn);
5520 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5521 return NULL_RTX;
5522
5523 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5524 {
5525 part = XVECEXP (pattern, 0, i);
5526 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5527 reg_part = part;
5528 }
5529 if (! reg_part)
5530 return NULL_RTX;
5531 reg = XEXP (reg_part, 0);
5532 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5533 {
5534 part = XVECEXP (pattern, 0, i);
5535 if (part == reg_part || GET_CODE (part) == CLOBBER)
5536 continue;
5537 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5538 && REG_P (SET_DEST (part)))
5539 ? SET_SRC (part) : part)))
5540 return NULL_RTX;
5541 }
5542 return reg;
5543 }
5544
5545 /* See if the only way in which INSN uses REG is by calling it, or by
5546 setting it while calling it. Set *SET to a SET rtx if the register
5547 is set by INSN. */
5548 static bool
5549 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5550 {
5551 rtx pattern, reg2;
5552
5553 *set = NULL_RTX;
5554
5555 reg2 = sfunc_uses_reg (insn);
5556 if (reg2 && REGNO (reg2) == REGNO (reg))
5557 {
5558 pattern = single_set (insn);
5559 if (pattern
5560 && REG_P (SET_DEST (pattern))
5561 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5562 *set = pattern;
5563 return false;
5564 }
5565 if (!CALL_P (insn))
5566 {
5567 /* We don't use rtx_equal_p because we don't care if the mode is
5568 different. */
5569 pattern = single_set (insn);
5570 if (pattern
5571 && REG_P (SET_DEST (pattern))
5572 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5573 {
5574 rtx par, part;
5575 int i;
5576
5577 *set = pattern;
5578 par = PATTERN (insn);
5579 if (GET_CODE (par) == PARALLEL)
5580 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5581 {
5582 part = XVECEXP (par, 0, i);
5583 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5584 return true;
5585 }
5586 return reg_mentioned_p (reg, SET_SRC (pattern));
5587 }
5588
5589 return true;
5590 }
5591
5592 pattern = PATTERN (insn);
5593
5594 if (GET_CODE (pattern) == PARALLEL)
5595 {
5596 int i;
5597
5598 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5599 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5600 return true;
5601 pattern = XVECEXP (pattern, 0, 0);
5602 }
5603
5604 if (GET_CODE (pattern) == SET)
5605 {
5606 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5607 {
5608 /* We don't use rtx_equal_p, because we don't care if the
5609 mode is different. */
5610 if (!REG_P (SET_DEST (pattern))
5611 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5612 return true;
5613
5614 *set = pattern;
5615 }
5616
5617 pattern = SET_SRC (pattern);
5618 }
5619
5620 if (GET_CODE (pattern) != CALL
5621 || !MEM_P (XEXP (pattern, 0))
5622 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5623 return true;
5624
5625 return false;
5626 }
5627
5628 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5629 general registers. Bits 0..15 mean that the respective registers
5630 are used as inputs in the instruction. Bits 16..31 mean that the
5631 registers 0..15, respectively, are used as outputs, or are clobbered.
5632 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5633 int
5634 regs_used (rtx x, int is_dest)
5635 {
5636 enum rtx_code code;
5637 const char *fmt;
5638 int i, used = 0;
5639
5640 if (! x)
5641 return used;
5642 code = GET_CODE (x);
5643 switch (code)
5644 {
5645 case REG:
5646 if (REGNO (x) < 16)
5647 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5648 << (REGNO (x) + is_dest));
5649 return 0;
5650 case SUBREG:
5651 {
5652 rtx y = SUBREG_REG (x);
5653
5654 if (!REG_P (y))
5655 break;
5656 if (REGNO (y) < 16)
5657 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5658 << (REGNO (y) +
5659 subreg_regno_offset (REGNO (y),
5660 GET_MODE (y),
5661 SUBREG_BYTE (x),
5662 GET_MODE (x)) + is_dest));
5663 return 0;
5664 }
5665 case SET:
5666 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5667 case RETURN:
5668 /* If there was a return value, it must have been indicated with USE. */
5669 return 0x00ffff00;
5670 case CLOBBER:
5671 is_dest = 1;
5672 break;
5673 case MEM:
5674 is_dest = 0;
5675 break;
5676 case CALL:
5677 used |= 0x00ff00f0;
5678 break;
5679 default:
5680 break;
5681 }
5682
5683 fmt = GET_RTX_FORMAT (code);
5684
5685 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5686 {
5687 if (fmt[i] == 'E')
5688 {
5689 int j;
5690 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5691 used |= regs_used (XVECEXP (x, i, j), is_dest);
5692 }
5693 else if (fmt[i] == 'e')
5694 used |= regs_used (XEXP (x, i), is_dest);
5695 }
5696 return used;
5697 }
5698
5699 /* Create an instruction that prevents redirection of a conditional branch
5700 to the destination of the JUMP with address ADDR.
5701 If the branch needs to be implemented as an indirect jump, try to find
5702 a scratch register for it.
5703 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5704 If any preceding insn that doesn't fit into a delay slot is good enough,
5705 pass 1. Pass 2 if a definite blocking insn is needed.
5706 -1 is used internally to avoid deep recursion.
5707 If a blocking instruction is made or recognized, return it. */
5708 static rtx_insn *
5709 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5710 {
5711 int dead = 0;
5712 rtx_insn *prev = prev_nonnote_insn (jump);
5713 rtx dest;
5714
5715 /* First, check if we already have an instruction that satisfies our need. */
5716 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5717 {
5718 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5719 return prev;
5720 if (GET_CODE (PATTERN (prev)) == USE
5721 || GET_CODE (PATTERN (prev)) == CLOBBER
5722 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5723 prev = jump;
5724 else if ((need_block &= ~1) < 0)
5725 return prev;
5726 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5727 need_block = 0;
5728 }
5729 if (GET_CODE (PATTERN (jump)) == RETURN)
5730 {
5731 if (! need_block)
5732 return prev;
5733 /* Reorg even does nasty things with return insns that cause branches
5734 to go out of range - see find_end_label and callers. */
5735 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5736 }
5737 /* We can't use JUMP_LABEL here because it might be undefined
5738 when not optimizing. */
5739 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5740 /* If the branch is out of range, try to find a scratch register for it. */
5741 if (optimize
5742 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5743 > 4092 + 4098))
5744 {
5745 rtx_insn *scan;
5746 /* Don't look for the stack pointer as a scratch register,
5747 it would cause trouble if an interrupt occurred. */
5748 unsigned attempt = 0x7fff, used;
5749 int jump_left = flag_expensive_optimizations + 1;
5750
5751 /* It is likely that the most recent eligible instruction is wanted for
5752 the delay slot. Therefore, find out which registers it uses, and
5753 try to avoid using them. */
5754
5755 for (scan = jump; (scan = PREV_INSN (scan)); )
5756 {
5757 enum rtx_code code;
5758
5759 if (scan->deleted ())
5760 continue;
5761 code = GET_CODE (scan);
5762 if (code == CODE_LABEL || code == JUMP_INSN)
5763 break;
5764 if (code == INSN
5765 && GET_CODE (PATTERN (scan)) != USE
5766 && GET_CODE (PATTERN (scan)) != CLOBBER
5767 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5768 {
5769 attempt &= ~regs_used (PATTERN (scan), 0);
5770 break;
5771 }
5772 }
5773 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5774 (scan = NEXT_INSN (scan)); )
5775 {
5776 enum rtx_code code;
5777
5778 if (scan->deleted ())
5779 continue;
5780 code = GET_CODE (scan);
5781 if (INSN_P (scan))
5782 {
5783 used |= regs_used (PATTERN (scan), 0);
5784 if (code == CALL_INSN)
5785 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5786 dead |= (used >> 16) & ~used;
5787 if (dead & attempt)
5788 {
5789 dead &= attempt;
5790 break;
5791 }
5792 if (code == JUMP_INSN)
5793 {
5794 if (jump_left-- && simplejump_p (scan))
5795 scan = JUMP_LABEL_AS_INSN (scan);
5796 else
5797 break;
5798 }
5799 }
5800 }
5801 /* Mask out the stack pointer again, in case it was
5802 the only 'free' register we have found. */
5803 dead &= 0x7fff;
5804 }
5805 /* If the immediate destination is still in range, check for possible
5806 threading with a jump beyond the delay slot insn.
5807 Don't check if we are called recursively; the jump has been or will be
5808 checked in a different invocation then. */
5809
5810 else if (optimize && need_block >= 0)
5811 {
5812 rtx_insn *next = next_active_insn (next_active_insn (dest));
5813 if (next && JUMP_P (next)
5814 && GET_CODE (PATTERN (next)) == SET
5815 && recog_memoized (next) == CODE_FOR_jump_compact)
5816 {
5817 dest = JUMP_LABEL (next);
5818 if (dest
5819 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5820 > 4092 + 4098))
5821 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5822 }
5823 }
5824
5825 if (dead)
5826 {
5827 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5828
5829 /* It would be nice if we could convert the jump into an indirect
5830 jump / far branch right now, and thus exposing all constituent
5831 instructions to further optimization. However, reorg uses
5832 simplejump_p to determine if there is an unconditional jump where
5833 it should try to schedule instructions from the target of the
5834 branch; simplejump_p fails for indirect jumps even if they have
5835 a JUMP_LABEL. */
5836 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5837 (reg, GEN_INT (unspec_bbr_uid++)),
5838 jump);
5839 /* ??? We would like this to have the scope of the jump, but that
5840 scope will change when a delay slot insn of an inner scope is added.
5841 Hence, after delay slot scheduling, we'll have to expect
5842 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5843 the jump. */
5844
5845 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5846 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5847 return insn;
5848 }
5849 else if (need_block)
5850 /* We can't use JUMP_LABEL here because it might be undefined
5851 when not optimizing. */
5852 return emit_insn_before (gen_block_branch_redirect
5853 (GEN_INT (unspec_bbr_uid++)),
5854 jump);
5855 return prev;
5856 }
5857
5858 #define CONDJUMP_MIN -252
5859 #define CONDJUMP_MAX 262
5860 struct far_branch
5861 {
5862 /* A label (to be placed) in front of the jump
5863 that jumps to our ultimate destination. */
5864 rtx_insn *near_label;
5865 /* Where we are going to insert it if we cannot move the jump any farther,
5866 or the jump itself if we have picked up an existing jump. */
5867 rtx_insn *insert_place;
5868 /* The ultimate destination. */
5869 rtx_insn *far_label;
5870 struct far_branch *prev;
5871 /* If the branch has already been created, its address;
5872 else the address of its first prospective user. */
5873 int address;
5874 };
5875
5876 static void gen_far_branch (struct far_branch *);
5877 enum mdep_reorg_phase_e mdep_reorg_phase;
5878 static void
5879 gen_far_branch (struct far_branch *bp)
5880 {
5881 rtx_insn *insn = bp->insert_place;
5882 rtx_jump_insn *jump;
5883 rtx_code_label *label = gen_label_rtx ();
5884 int ok;
5885
5886 emit_label_after (label, insn);
5887 if (bp->far_label)
5888 {
5889 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5890 LABEL_NUSES (bp->far_label)++;
5891 }
5892 else
5893 jump = emit_jump_insn_after (gen_return (), insn);
5894
5895 /* Emit a barrier so that reorg knows that any following instructions
5896 are not reachable via a fall-through path.
5897 But don't do this when not optimizing, since we wouldn't suppress the
5898 alignment for the barrier then, and could end up with out-of-range
5899 pc-relative loads. */
5900 if (optimize)
5901 emit_barrier_after (jump);
5902 emit_label_after (bp->near_label, insn);
5903
5904 if (bp->far_label)
5905 JUMP_LABEL (jump) = bp->far_label;
5906 else
5907 {
5908 rtx pat = PATTERN (jump);
5909 gcc_assert (ANY_RETURN_P (pat));
5910 JUMP_LABEL (jump) = pat;
5911 }
5912
5913 ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5914 gcc_assert (ok);
5915
5916 /* If we are branching around a jump (rather than a return), prevent
5917 reorg from using an insn from the jump target as the delay slot insn -
5918 when reorg did this, it pessimized code (we rather hide the delay slot)
5919 and it could cause branches to go out of range. */
5920 if (bp->far_label)
5921 (emit_insn_after
5922 (gen_stuff_delay_slot
5923 (GEN_INT (unspec_bbr_uid++),
5924 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5925 insn));
5926 /* Prevent reorg from undoing our splits. */
5927 gen_block_redirect (jump, bp->address += 2, 2);
5928 }
5929
5930 /* Fix up ADDR_DIFF_VECs. */
5931 void
5932 fixup_addr_diff_vecs (rtx_insn *first)
5933 {
5934 rtx_insn *insn;
5935
5936 for (insn = first; insn; insn = NEXT_INSN (insn))
5937 {
5938 rtx vec_lab, pat, prevpat, x, braf_label;
5939 rtx_insn *prev;
5940
5941 if (! JUMP_TABLE_DATA_P (insn)
5942 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5943 continue;
5944 pat = PATTERN (insn);
5945 vec_lab = XEXP (XEXP (pat, 0), 0);
5946
5947 /* Search the matching casesi_jump_2. */
5948 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5949 {
5950 if (!JUMP_P (prev))
5951 continue;
5952 prevpat = PATTERN (prev);
5953 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5954 continue;
5955 x = XVECEXP (prevpat, 0, 1);
5956 if (GET_CODE (x) != USE)
5957 continue;
5958 x = XEXP (x, 0);
5959 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5960 break;
5961 }
5962 /* FIXME: This is a bug in the optimizer, but it seems harmless
5963 to just avoid panicing. */
5964 if (!prev)
5965 continue;
5966
5967 /* Emit the reference label of the braf where it belongs, right after
5968 the casesi_jump_2 (i.e. braf). */
5969 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5970 emit_label_after (braf_label, prev);
5971
5972 /* Fix up the ADDR_DIF_VEC to be relative
5973 to the reference address of the braf. */
5974 XEXP (XEXP (pat, 0), 0) = braf_label;
5975 }
5976 }
5977
5978 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5979 a barrier. Return the base 2 logarithm of the desired alignment. */
5980 int
5981 barrier_align (rtx_insn *barrier_or_label)
5982 {
5983 rtx next, pat;
5984
5985 if (! barrier_or_label)
5986 return 0;
5987
5988 if (LABEL_P (barrier_or_label)
5989 && NEXT_INSN (barrier_or_label)
5990 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5991 return 2;
5992
5993 if (BARRIER_P (barrier_or_label)
5994 && PREV_INSN (barrier_or_label)
5995 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5996 {
5997 pat = PATTERN (PREV_INSN (barrier_or_label));
5998 /* If this is a very small table, we want to keep the alignment after
5999 the table to the minimum for proper code alignment. */
6000 return ((optimize_size
6001 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6002 <= (unsigned) 1 << (CACHE_LOG - 2)))
6003 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6004 }
6005
6006 next = next_active_insn (barrier_or_label);
6007
6008 if (! next)
6009 return 0;
6010
6011 pat = PATTERN (next);
6012
6013 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6014 /* This is a barrier in front of a constant table. */
6015 return 0;
6016
6017 if (optimize_size)
6018 return 0;
6019
6020 if (! TARGET_SH2 || ! optimize)
6021 return align_jumps_log;
6022
6023 /* When fixing up pcloads, a constant table might be inserted just before
6024 the basic block that ends with the barrier. Thus, we can't trust the
6025 instruction lengths before that. */
6026 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6027 {
6028 /* Check if there is an immediately preceding branch to the insn beyond
6029 the barrier. We must weight the cost of discarding useful information
6030 from the current cache line when executing this branch and there is
6031 an alignment, against that of fetching unneeded insn in front of the
6032 branch target when there is no alignment. */
6033
6034 /* There are two delay_slot cases to consider. One is the simple case
6035 where the preceding branch is to the insn beyond the barrier (simple
6036 delay slot filling), and the other is where the preceding branch has
6037 a delay slot that is a duplicate of the insn after the barrier
6038 (fill_eager_delay_slots) and the branch is to the insn after the insn
6039 after the barrier. */
6040
6041 int slot, credit;
6042 bool jump_to_next = false;
6043
6044 /* Skip to the insn before the JUMP_INSN before the barrier under
6045 investigation. */
6046 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6047
6048 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6049 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6050 prev = prev_real_insn (prev))
6051 {
6052 jump_to_next = false;
6053 if (GET_CODE (PATTERN (prev)) == USE
6054 || GET_CODE (PATTERN (prev)) == CLOBBER)
6055 continue;
6056 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6057 {
6058 prev = prev_seq->insn (1);
6059 if (INSN_UID (prev) == INSN_UID (next))
6060 {
6061 /* Delay slot was filled with insn at jump target. */
6062 jump_to_next = true;
6063 continue;
6064 }
6065 }
6066
6067 if (slot &&
6068 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6069 slot = 0;
6070 credit -= get_attr_length (prev);
6071 }
6072 if (prev && jump_to_label_p (prev))
6073 {
6074 rtx_insn *x;
6075 if (jump_to_next
6076 || next_real_insn (JUMP_LABEL (prev)) == next
6077 /* If relax_delay_slots() decides NEXT was redundant
6078 with some previous instruction, it will have
6079 redirected PREV's jump to the following insn. */
6080 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6081 /* There is no upper bound on redundant instructions
6082 that might have been skipped, but we must not put an
6083 alignment where none had been before. */
6084 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6085 (INSN_P (x)
6086 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6087 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6088 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6089 {
6090 rtx pat = PATTERN (prev);
6091 if (GET_CODE (pat) == PARALLEL)
6092 pat = XVECEXP (pat, 0, 0);
6093 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6094 return 0;
6095 }
6096 }
6097 }
6098
6099 return align_jumps_log;
6100 }
6101
6102 /* If we are inside a phony loop, almost any kind of label can turn up as the
6103 first one in the loop. Aligning a braf label causes incorrect switch
6104 destination addresses; we can detect braf labels because they are
6105 followed by a BARRIER.
6106 Applying loop alignment to small constant or switch tables is a waste
6107 of space, so we suppress this too. */
6108 int
6109 sh_loop_align (rtx_insn *label)
6110 {
6111 rtx_insn *next = label;
6112
6113 if (! optimize || optimize_size)
6114 return 0;
6115
6116 do
6117 next = next_nonnote_insn (next);
6118 while (next && LABEL_P (next));
6119
6120 if (! next
6121 || ! INSN_P (next)
6122 || recog_memoized (next) == CODE_FOR_consttable_2)
6123 return 0;
6124
6125 return align_loops_log;
6126 }
6127
6128 /* Do a final pass over the function, just before delayed branch
6129 scheduling. */
6130 static void
6131 sh_reorg (void)
6132 {
6133 rtx_insn *first, *insn, *mova = NULL;
6134 int num_mova;
6135 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6136 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6137
6138 first = get_insns ();
6139 max_labelno_before_reorg = max_label_num ();
6140
6141 /* We must split call insns before introducing `mova's. If we're
6142 optimizing, they'll have already been split. Otherwise, make
6143 sure we don't split them too late. */
6144 if (! optimize)
6145 split_all_insns_noflow ();
6146
6147 if (TARGET_SHMEDIA)
6148 return;
6149
6150 /* If relaxing, generate pseudo-ops to associate function calls with
6151 the symbols they call. It does no harm to not generate these
6152 pseudo-ops. However, when we can generate them, it enables the
6153 linker to potentially relax the jsr to a bsr, and eliminate the
6154 register load and, possibly, the constant pool entry. */
6155
6156 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6157 if (TARGET_RELAX)
6158 {
6159 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6160 own purposes. This works because none of the remaining passes
6161 need to look at them.
6162
6163 ??? But it may break in the future. We should use a machine
6164 dependent REG_NOTE, or some other approach entirely. */
6165 for (insn = first; insn; insn = NEXT_INSN (insn))
6166 {
6167 if (INSN_P (insn))
6168 {
6169 rtx note;
6170
6171 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6172 NULL_RTX)) != 0)
6173 remove_note (insn, note);
6174 }
6175 }
6176
6177 for (insn = first; insn; insn = NEXT_INSN (insn))
6178 {
6179 rtx pattern, reg, set, dies;
6180 rtx_code_label *label;
6181 rtx_insn *link, *scan;
6182 int rescan = 0, foundinsn = 0;
6183
6184 if (CALL_P (insn))
6185 {
6186 pattern = PATTERN (insn);
6187
6188 if (GET_CODE (pattern) == PARALLEL)
6189 pattern = XVECEXP (pattern, 0, 0);
6190 if (GET_CODE (pattern) == SET)
6191 pattern = SET_SRC (pattern);
6192
6193 if (GET_CODE (pattern) != CALL
6194 || !MEM_P (XEXP (pattern, 0)))
6195 continue;
6196
6197 reg = XEXP (XEXP (pattern, 0), 0);
6198 }
6199 else
6200 {
6201 reg = sfunc_uses_reg (insn);
6202 if (! reg)
6203 continue;
6204 }
6205
6206 if (!REG_P (reg))
6207 continue;
6208
6209 /* Try scanning backward to find where the register is set. */
6210 link = NULL;
6211 for (scan = PREV_INSN (insn);
6212 scan && !LABEL_P (scan);
6213 scan = PREV_INSN (scan))
6214 {
6215 if (! INSN_P (scan))
6216 continue;
6217
6218 if (! reg_mentioned_p (reg, scan))
6219 continue;
6220
6221 if (noncall_uses_reg (reg, scan, &set))
6222 break;
6223
6224 if (set)
6225 {
6226 link = scan;
6227 break;
6228 }
6229 }
6230
6231 if (! link)
6232 continue;
6233
6234 /* The register is set at LINK. */
6235
6236 /* We can only optimize the function call if the register is
6237 being set to a symbol. In theory, we could sometimes
6238 optimize calls to a constant location, but the assembler
6239 and linker do not support that at present. */
6240 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6241 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6242 continue;
6243
6244 /* Scan forward from LINK to the place where REG dies, and
6245 make sure that the only insns which use REG are
6246 themselves function calls. */
6247
6248 /* ??? This doesn't work for call targets that were allocated
6249 by reload, since there may not be a REG_DEAD note for the
6250 register. */
6251
6252 dies = NULL_RTX;
6253 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6254 {
6255 rtx scanset;
6256
6257 /* Don't try to trace forward past a CODE_LABEL if we haven't
6258 seen INSN yet. Ordinarily, we will only find the setting insn
6259 if it is in the same basic block. However,
6260 cross-jumping can insert code labels in between the load and
6261 the call, and can result in situations where a single call
6262 insn may have two targets depending on where we came from. */
6263
6264 if (LABEL_P (scan) && ! foundinsn)
6265 break;
6266
6267 if (! INSN_P (scan))
6268 continue;
6269
6270 /* Don't try to trace forward past a JUMP. To optimize
6271 safely, we would have to check that all the
6272 instructions at the jump destination did not use REG. */
6273
6274 if (JUMP_P (scan))
6275 break;
6276
6277 if (! reg_mentioned_p (reg, scan))
6278 continue;
6279
6280 if (noncall_uses_reg (reg, scan, &scanset))
6281 break;
6282
6283 if (scan == insn)
6284 foundinsn = 1;
6285
6286 if (scan != insn
6287 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6288 {
6289 /* There is a function call to this register other
6290 than the one we are checking. If we optimize
6291 this call, we need to rescan again below. */
6292 rescan = 1;
6293 }
6294
6295 /* ??? We shouldn't have to worry about SCANSET here.
6296 We should just be able to check for a REG_DEAD note
6297 on a function call. However, the REG_DEAD notes are
6298 apparently not dependable around libcalls; c-torture
6299 execute/920501-2 is a test case. If SCANSET is set,
6300 then this insn sets the register, so it must have
6301 died earlier. Unfortunately, this will only handle
6302 the cases in which the register is, in fact, set in a
6303 later insn. */
6304
6305 /* ??? We shouldn't have to use FOUNDINSN here.
6306 This dates back to when we used LOG_LINKS to find
6307 the most recent insn which sets the register. */
6308
6309 if (foundinsn
6310 && (scanset
6311 || find_reg_note (scan, REG_DEAD, reg)))
6312 {
6313 dies = scan;
6314 break;
6315 }
6316 }
6317
6318 if (! dies)
6319 {
6320 /* Either there was a branch, or some insn used REG
6321 other than as a function call address. */
6322 continue;
6323 }
6324
6325 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6326 on the insn which sets the register, and on each call insn
6327 which uses the register. In final_prescan_insn we look for
6328 the REG_LABEL_OPERAND notes, and output the appropriate label
6329 or pseudo-op. */
6330
6331 label = gen_label_rtx ();
6332 add_reg_note (link, REG_LABEL_OPERAND, label);
6333 add_reg_note (insn, REG_LABEL_OPERAND, label);
6334 if (rescan)
6335 {
6336 scan = link;
6337 do
6338 {
6339 rtx reg2;
6340
6341 scan = NEXT_INSN (scan);
6342 if (scan != insn
6343 && ((CALL_P (scan)
6344 && reg_mentioned_p (reg, scan))
6345 || ((reg2 = sfunc_uses_reg (scan))
6346 && REGNO (reg2) == REGNO (reg))))
6347 add_reg_note (scan, REG_LABEL_OPERAND, label);
6348 }
6349 while (scan != dies);
6350 }
6351 }
6352 }
6353
6354 if (TARGET_SH2)
6355 fixup_addr_diff_vecs (first);
6356
6357 if (optimize)
6358 {
6359 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6360 shorten_branches (first);
6361 }
6362
6363 /* Scan the function looking for move instructions which have to be
6364 changed to pc-relative loads and insert the literal tables. */
6365 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6366 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6367 {
6368 if (mova_p (insn))
6369 {
6370 /* ??? basic block reordering can move a switch table dispatch
6371 below the switch table. Check if that has happened.
6372 We only have the addresses available when optimizing; but then,
6373 this check shouldn't be needed when not optimizing. */
6374 if (!untangle_mova (&num_mova, &mova, insn))
6375 {
6376 insn = mova;
6377 num_mova = 0;
6378 }
6379 }
6380 else if (JUMP_TABLE_DATA_P (insn)
6381 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6382 && num_mova
6383 /* ??? loop invariant motion can also move a mova out of a
6384 loop. Since loop does this code motion anyway, maybe we
6385 should wrap UNSPEC_MOVA into a CONST, so that reload can
6386 move it back. */
6387 && ((num_mova > 1
6388 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6389 || (prev_nonnote_insn (insn)
6390 == XEXP (MOVA_LABELREF (mova), 0))))
6391 {
6392 rtx_insn *scan;
6393 int total;
6394
6395 num_mova--;
6396
6397 /* Some code might have been inserted between the mova and
6398 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6399 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6400 total += get_attr_length (scan);
6401
6402 /* range of mova is 1020, add 4 because pc counts from address of
6403 second instruction after this one, subtract 2 in case pc is 2
6404 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6405 cancels out with alignment effects of the mova itself. */
6406 if (total > 1022)
6407 {
6408 /* Change the mova into a load, and restart scanning
6409 there. broken_move will then return true for mova. */
6410 fixup_mova (mova);
6411 insn = mova;
6412 }
6413 }
6414 if (broken_move (insn)
6415 || (NONJUMP_INSN_P (insn)
6416 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6417 {
6418 rtx_insn *scan;
6419 /* Scan ahead looking for a barrier to stick the constant table
6420 behind. */
6421 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6422 rtx_insn *last_float_move = NULL;
6423 rtx last_float = 0, *last_float_addr = NULL;
6424 int need_aligned_label = 0;
6425
6426 if (num_mova && ! mova_p (mova))
6427 {
6428 /* find_barrier had to change the first mova into a
6429 pcload; thus, we have to start with this new pcload. */
6430 insn = mova;
6431 num_mova = 0;
6432 }
6433 /* Now find all the moves between the points and modify them. */
6434 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6435 {
6436 if (LABEL_P (scan))
6437 last_float = 0;
6438 if (NONJUMP_INSN_P (scan)
6439 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6440 need_aligned_label = 1;
6441 if (broken_move (scan))
6442 {
6443 rtx *patp = &PATTERN (scan), pat = *patp;
6444 rtx src, dst;
6445 rtx lab;
6446 rtx newsrc;
6447 machine_mode mode;
6448
6449 if (GET_CODE (pat) == PARALLEL)
6450 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6451 src = SET_SRC (pat);
6452 dst = SET_DEST (pat);
6453 mode = GET_MODE (dst);
6454
6455 if (mode == SImode && satisfies_constraint_I16 (src)
6456 && REGNO (dst) != FPUL_REG)
6457 {
6458 int offset = 0;
6459
6460 mode = HImode;
6461 while (GET_CODE (dst) == SUBREG)
6462 {
6463 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6464 GET_MODE (SUBREG_REG (dst)),
6465 SUBREG_BYTE (dst),
6466 GET_MODE (dst));
6467 dst = SUBREG_REG (dst);
6468 }
6469 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6470 }
6471 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6472 {
6473 /* This must be an insn that clobbers r0. */
6474 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6475 XVECLEN (PATTERN (scan), 0)
6476 - 1);
6477 rtx clobber = *clobberp;
6478
6479 gcc_assert (GET_CODE (clobber) == CLOBBER
6480 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6481
6482 if (last_float
6483 && reg_set_between_p (r0_rtx, last_float_move, scan))
6484 last_float = 0;
6485 if (last_float
6486 && TARGET_SHCOMPACT
6487 && GET_MODE_SIZE (mode) != 4
6488 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6489 last_float = 0;
6490 lab = add_constant (src, mode, last_float);
6491 if (lab)
6492 emit_insn_before (gen_mova (lab), scan);
6493 else
6494 {
6495 /* There will be a REG_UNUSED note for r0 on
6496 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6497 lest reorg:mark_target_live_regs will not
6498 consider r0 to be used, and we end up with delay
6499 slot insn in front of SCAN that clobbers r0. */
6500 rtx note
6501 = find_regno_note (last_float_move, REG_UNUSED, 0);
6502
6503 /* If we are not optimizing, then there may not be
6504 a note. */
6505 if (note)
6506 PUT_REG_NOTE_KIND (note, REG_INC);
6507
6508 *last_float_addr = r0_inc_rtx;
6509 }
6510 last_float_move = scan;
6511 last_float = src;
6512 newsrc = gen_const_mem (mode,
6513 (((TARGET_SH4 && ! TARGET_FMOVD)
6514 || REGNO (dst) == FPUL_REG)
6515 ? r0_inc_rtx
6516 : r0_rtx));
6517 last_float_addr = &XEXP (newsrc, 0);
6518
6519 /* Remove the clobber of r0. */
6520 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6521 gen_rtx_SCRATCH (Pmode));
6522 }
6523 /* This is a mova needing a label. Create it. */
6524 else if (GET_CODE (src) == UNSPEC
6525 && XINT (src, 1) == UNSPEC_MOVA
6526 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6527 {
6528 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6529 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6530 newsrc = gen_rtx_UNSPEC (SImode,
6531 gen_rtvec (1, newsrc),
6532 UNSPEC_MOVA);
6533 }
6534 else if (GET_CODE (src) == UNSPEC_VOLATILE
6535 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6536 {
6537 newsrc = XVECEXP (src, 0, 0);
6538 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6539 INSN_CODE (scan) = -1;
6540 continue;
6541 }
6542 else
6543 {
6544 lab = add_constant (src, mode, 0);
6545 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6546 newsrc = gen_const_mem (mode, newsrc);
6547 }
6548 *patp = gen_rtx_SET (dst, newsrc);
6549 INSN_CODE (scan) = -1;
6550 }
6551 }
6552 dump_table (need_aligned_label ? insn : 0, barrier);
6553 insn = barrier;
6554 }
6555 }
6556 label_ref_list_d_pool.release ();
6557 for (insn = first; insn; insn = NEXT_INSN (insn))
6558 PUT_MODE (insn, VOIDmode);
6559
6560 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6561 INSN_ADDRESSES_FREE ();
6562 split_branches (first);
6563
6564 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6565 also has an effect on the register that holds the address of the sfunc.
6566 Insert an extra dummy insn in front of each sfunc that pretends to
6567 use this register. */
6568 if (flag_delayed_branch)
6569 {
6570 for (insn = first; insn; insn = NEXT_INSN (insn))
6571 {
6572 rtx reg = sfunc_uses_reg (insn);
6573
6574 if (! reg)
6575 continue;
6576 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6577 }
6578 }
6579 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6580 }
6581
6582 /* Return the UID of the insn that follows the specified label. */
6583 int
6584 get_dest_uid (rtx label, int max_uid)
6585 {
6586 rtx_insn *dest = next_real_insn (label);
6587 int dest_uid;
6588 if (! dest)
6589 /* This can happen for an undefined label. */
6590 return 0;
6591 dest_uid = INSN_UID (dest);
6592 /* If this is a newly created branch redirection blocking instruction,
6593 we cannot index the branch_uid or insn_addresses arrays with its
6594 uid. But then, we won't need to, because the actual destination is
6595 the following branch. */
6596 while (dest_uid >= max_uid)
6597 {
6598 dest = NEXT_INSN (dest);
6599 dest_uid = INSN_UID (dest);
6600 }
6601 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6602 return 0;
6603 return dest_uid;
6604 }
6605
6606 /* Split condbranches that are out of range. Also add clobbers for
6607 scratch registers that are needed in far jumps.
6608 We do this before delay slot scheduling, so that it can take our
6609 newly created instructions into account. It also allows us to
6610 find branches with common targets more easily. */
6611 static void
6612 split_branches (rtx_insn *first)
6613 {
6614 rtx_insn *insn;
6615 struct far_branch **uid_branch, *far_branch_list = 0;
6616 int max_uid = get_max_uid ();
6617 int ok;
6618
6619 /* Find out which branches are out of range. */
6620 shorten_branches (first);
6621
6622 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6623 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6624
6625 for (insn = first; insn; insn = NEXT_INSN (insn))
6626 if (! INSN_P (insn))
6627 continue;
6628 else if (insn->deleted ())
6629 {
6630 /* Shorten_branches would split this instruction again,
6631 so transform it into a note. */
6632 SET_INSN_DELETED (insn);
6633 }
6634 else if (JUMP_P (insn))
6635 {
6636 enum attr_type type = get_attr_type (insn);
6637 if (type == TYPE_CBRANCH)
6638 {
6639 rtx_insn *next, *beyond;
6640
6641 if (get_attr_length (insn) > 4)
6642 {
6643 rtx src = SET_SRC (PATTERN (insn));
6644 rtx olabel = XEXP (XEXP (src, 1), 0);
6645 int addr = INSN_ADDRESSES (INSN_UID (insn));
6646 rtx_insn *label = 0;
6647 int dest_uid = get_dest_uid (olabel, max_uid);
6648 struct far_branch *bp = uid_branch[dest_uid];
6649
6650 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6651 the label if the LABEL_NUSES count drops to zero. There is
6652 always a jump_optimize pass that sets these values, but it
6653 proceeds to delete unreferenced code, and then if not
6654 optimizing, to un-delete the deleted instructions, thus
6655 leaving labels with too low uses counts. */
6656 if (! optimize)
6657 {
6658 JUMP_LABEL (insn) = olabel;
6659 LABEL_NUSES (olabel)++;
6660 }
6661 if (! bp)
6662 {
6663 bp = (struct far_branch *) alloca (sizeof *bp);
6664 uid_branch[dest_uid] = bp;
6665 bp->prev = far_branch_list;
6666 far_branch_list = bp;
6667 bp->far_label = as_a <rtx_insn *> (
6668 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6669 0));
6670 LABEL_NUSES (bp->far_label)++;
6671 }
6672 else
6673 {
6674 label = bp->near_label;
6675 if (! label && bp->address - addr >= CONDJUMP_MIN)
6676 {
6677 rtx_insn *block = bp->insert_place;
6678
6679 if (GET_CODE (PATTERN (block)) == RETURN)
6680 block = PREV_INSN (block);
6681 else
6682 block = gen_block_redirect (block,
6683 bp->address, 2);
6684 label = emit_label_after (gen_label_rtx (),
6685 PREV_INSN (block));
6686 bp->near_label = label;
6687 }
6688 else if (label && ! NEXT_INSN (label))
6689 {
6690 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6691 bp->insert_place = insn;
6692 else
6693 gen_far_branch (bp);
6694 }
6695 }
6696 if (! label
6697 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6698 {
6699 bp->near_label = label = gen_label_rtx ();
6700 bp->insert_place = insn;
6701 bp->address = addr;
6702 }
6703 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6704 gcc_assert (ok);
6705 }
6706 else
6707 {
6708 /* get_attr_length (insn) == 2 */
6709 /* Check if we have a pattern where reorg wants to redirect
6710 the branch to a label from an unconditional branch that
6711 is too far away. */
6712 /* We can't use JUMP_LABEL here because it might be undefined
6713 when not optimizing. */
6714 /* A syntax error might cause beyond to be NULL_RTX. */
6715 beyond
6716 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6717 0));
6718
6719 if (beyond
6720 && (JUMP_P (beyond)
6721 || ((beyond = next_active_insn (beyond))
6722 && JUMP_P (beyond)))
6723 && GET_CODE (PATTERN (beyond)) == SET
6724 && recog_memoized (beyond) == CODE_FOR_jump_compact
6725 && ((INSN_ADDRESSES
6726 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6727 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6728 > 252 + 258 + 2))
6729 gen_block_redirect (beyond,
6730 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6731 }
6732
6733 next = next_active_insn (insn);
6734
6735 if (next
6736 && (JUMP_P (next)
6737 || ((next = next_active_insn (next))
6738 && JUMP_P (next)))
6739 && GET_CODE (PATTERN (next)) == SET
6740 && recog_memoized (next) == CODE_FOR_jump_compact
6741 && ((INSN_ADDRESSES
6742 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6743 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6744 > 252 + 258 + 2))
6745 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6746 }
6747 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6748 {
6749 int addr = INSN_ADDRESSES (INSN_UID (insn));
6750 rtx_insn *far_label = 0;
6751 int dest_uid = 0;
6752 struct far_branch *bp;
6753
6754 if (type == TYPE_JUMP)
6755 {
6756 if (CROSSING_JUMP_P (insn))
6757 {
6758 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6759 insn);
6760 continue;
6761 }
6762
6763 far_label = as_a <rtx_insn *> (
6764 XEXP (SET_SRC (PATTERN (insn)), 0));
6765 dest_uid = get_dest_uid (far_label, max_uid);
6766 if (! dest_uid)
6767 {
6768 /* Parse errors can lead to labels outside
6769 the insn stream. */
6770 if (! NEXT_INSN (far_label))
6771 continue;
6772
6773 if (! optimize)
6774 {
6775 JUMP_LABEL (insn) = far_label;
6776 LABEL_NUSES (far_label)++;
6777 }
6778 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6779 far_label = 0;
6780 }
6781 }
6782 bp = uid_branch[dest_uid];
6783 if (! bp)
6784 {
6785 bp = (struct far_branch *) alloca (sizeof *bp);
6786 uid_branch[dest_uid] = bp;
6787 bp->prev = far_branch_list;
6788 far_branch_list = bp;
6789 bp->near_label = 0;
6790 bp->far_label = far_label;
6791 if (far_label)
6792 LABEL_NUSES (far_label)++;
6793 }
6794 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6795 if (addr - bp->address <= CONDJUMP_MAX)
6796 emit_label_after (bp->near_label, PREV_INSN (insn));
6797 else
6798 {
6799 gen_far_branch (bp);
6800 bp->near_label = 0;
6801 }
6802 else
6803 bp->near_label = 0;
6804 bp->address = addr;
6805 bp->insert_place = insn;
6806 if (! far_label)
6807 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6808 else
6809 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6810 }
6811 }
6812 /* Generate all pending far branches,
6813 and free our references to the far labels. */
6814 while (far_branch_list)
6815 {
6816 if (far_branch_list->near_label
6817 && ! NEXT_INSN (far_branch_list->near_label))
6818 gen_far_branch (far_branch_list);
6819 if (optimize
6820 && far_branch_list->far_label
6821 && ! --LABEL_NUSES (far_branch_list->far_label))
6822 delete_insn (far_branch_list->far_label);
6823 far_branch_list = far_branch_list->prev;
6824 }
6825
6826 /* Instruction length information is no longer valid due to the new
6827 instructions that have been generated. */
6828 init_insn_lengths ();
6829 }
6830
6831 /* Dump out instruction addresses, which is useful for debugging the
6832 constant pool table stuff.
6833
6834 If relaxing, output the label and pseudo-ops used to link together
6835 calls and the instruction which set the registers.
6836
6837 ??? The addresses printed by this routine for insns are nonsense for
6838 insns which are inside of a sequence where none of the inner insns have
6839 variable length. This is because the second pass of shorten_branches
6840 does not bother to update them. */
6841 void
6842 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6843 int noperands ATTRIBUTE_UNUSED)
6844 {
6845 if (TARGET_DUMPISIZE)
6846 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6847
6848 if (TARGET_RELAX)
6849 {
6850 rtx note;
6851
6852 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6853 if (note)
6854 {
6855 rtx pattern;
6856
6857 pattern = PATTERN (insn);
6858 if (GET_CODE (pattern) == PARALLEL)
6859 pattern = XVECEXP (pattern, 0, 0);
6860 switch (GET_CODE (pattern))
6861 {
6862 case SET:
6863 if (GET_CODE (SET_SRC (pattern)) != CALL
6864 && get_attr_type (insn) != TYPE_SFUNC)
6865 {
6866 targetm.asm_out.internal_label
6867 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6868 break;
6869 }
6870 /* else FALLTHROUGH */
6871 case CALL:
6872 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6873 CODE_LABEL_NUMBER (XEXP (note, 0)));
6874 break;
6875
6876 default:
6877 gcc_unreachable ();
6878 }
6879 }
6880 }
6881 }
6882
6883 /* Dump out any constants accumulated in the final pass. These will
6884 only be labels. */
6885 const char *
6886 output_jump_label_table (void)
6887 {
6888 int i;
6889
6890 if (pool_size)
6891 {
6892 fprintf (asm_out_file, "\t.align 2\n");
6893 for (i = 0; i < pool_size; i++)
6894 {
6895 pool_node *p = &pool_vector[i];
6896
6897 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6898 CODE_LABEL_NUMBER (p->label));
6899 output_asm_insn (".long %O0", &p->value);
6900 }
6901 pool_size = 0;
6902 }
6903
6904 return "";
6905 }
6906 \f
6907 /* A full frame looks like:
6908
6909 arg-5
6910 arg-4
6911 [ if current_function_anonymous_args
6912 arg-3
6913 arg-2
6914 arg-1
6915 arg-0 ]
6916 saved-fp
6917 saved-r10
6918 saved-r11
6919 saved-r12
6920 saved-pr
6921 local-n
6922 ..
6923 local-1
6924 local-0 <- fp points here.
6925
6926 Number of bytes pushed for anonymous args, used to pass information
6927 between expand_prologue and expand_epilogue.
6928
6929 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6930 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6931 for an epilogue and a negative value means that it's for a sibcall
6932 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6933 all the registers that are about to be restored, and hence dead. */
6934 static void
6935 output_stack_adjust (int size, rtx reg, int epilogue_p,
6936 HARD_REG_SET *live_regs_mask, bool frame_p)
6937 {
6938 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6939 if (size)
6940 {
6941 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6942
6943 /* This test is bogus, as output_stack_adjust is used to re-align the
6944 stack. */
6945 #if 0
6946 gcc_assert (!(size % align));
6947 #endif
6948
6949 if (CONST_OK_FOR_ADD (size))
6950 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6951 /* Try to do it with two partial adjustments; however, we must make
6952 sure that the stack is properly aligned at all times, in case
6953 an interrupt occurs between the two partial adjustments. */
6954 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6955 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6956 {
6957 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6958 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6959 }
6960 else
6961 {
6962 rtx const_reg;
6963 rtx insn;
6964 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6965 int i;
6966
6967 /* If TEMP is invalid, we could temporarily save a general
6968 register to MACL. However, there is currently no need
6969 to handle this case, so just die when we see it. */
6970 if (epilogue_p < 0
6971 || current_function_interrupt
6972 || ! call_really_used_regs[temp] || fixed_regs[temp])
6973 temp = -1;
6974 if (temp < 0 && ! current_function_interrupt
6975 && (TARGET_SHMEDIA || epilogue_p >= 0))
6976 {
6977 HARD_REG_SET temps;
6978 COPY_HARD_REG_SET (temps, call_used_reg_set);
6979 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6980 if (epilogue_p > 0)
6981 {
6982 int nreg = 0;
6983 if (crtl->return_rtx)
6984 {
6985 machine_mode mode;
6986 mode = GET_MODE (crtl->return_rtx);
6987 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6988 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6989 }
6990 for (i = 0; i < nreg; i++)
6991 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6992 if (crtl->calls_eh_return)
6993 {
6994 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6995 for (i = 0; i <= 3; i++)
6996 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6997 }
6998 }
6999 if (TARGET_SHMEDIA && epilogue_p < 0)
7000 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7001 CLEAR_HARD_REG_BIT (temps, i);
7002 if (epilogue_p <= 0)
7003 {
7004 for (i = FIRST_PARM_REG;
7005 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7006 CLEAR_HARD_REG_BIT (temps, i);
7007 if (cfun->static_chain_decl != NULL)
7008 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7009 }
7010 temp = scavenge_reg (&temps);
7011 }
7012 if (temp < 0 && live_regs_mask)
7013 {
7014 HARD_REG_SET temps;
7015
7016 COPY_HARD_REG_SET (temps, *live_regs_mask);
7017 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7018 temp = scavenge_reg (&temps);
7019 }
7020 if (temp < 0)
7021 {
7022 rtx adj_reg, tmp_reg, mem;
7023
7024 /* If we reached here, the most likely case is the (sibcall)
7025 epilogue for non SHmedia. Put a special push/pop sequence
7026 for such case as the last resort. This looks lengthy but
7027 would not be problem because it seems to be very
7028 rare. */
7029
7030 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7031
7032
7033 /* ??? There is still the slight possibility that r4 or
7034 r5 have been reserved as fixed registers or assigned
7035 as global registers, and they change during an
7036 interrupt. There are possible ways to handle this:
7037
7038 - If we are adjusting the frame pointer (r14), we can do
7039 with a single temp register and an ordinary push / pop
7040 on the stack.
7041 - Grab any call-used or call-saved registers (i.e. not
7042 fixed or globals) for the temps we need. We might
7043 also grab r14 if we are adjusting the stack pointer.
7044 If we can't find enough available registers, issue
7045 a diagnostic and die - the user must have reserved
7046 way too many registers.
7047 But since all this is rather unlikely to happen and
7048 would require extra testing, we just die if r4 / r5
7049 are not available. */
7050 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7051 && !global_regs[4] && !global_regs[5]);
7052
7053 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7054 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7055 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7056 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7057 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7058 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7059 emit_move_insn (mem, tmp_reg);
7060 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7061 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7062 emit_move_insn (mem, tmp_reg);
7063 emit_move_insn (reg, adj_reg);
7064 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7065 emit_move_insn (adj_reg, mem);
7066 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7067 emit_move_insn (tmp_reg, mem);
7068 /* Tell flow the insns that pop r4/r5 aren't dead. */
7069 emit_use (tmp_reg);
7070 emit_use (adj_reg);
7071 return;
7072 }
7073 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7074
7075 /* If SIZE is negative, subtract the positive value.
7076 This sometimes allows a constant pool entry to be shared
7077 between prologue and epilogue code. */
7078 if (size < 0)
7079 {
7080 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7081 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7082 }
7083 else
7084 {
7085 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7086 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7087 }
7088 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7089 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7090 GEN_INT (size))));
7091 }
7092 }
7093 }
7094
7095 /* Emit the specified insn and mark it as frame related.
7096 FIXME: Rename this to emit_frame_insn. */
7097 static rtx_insn *
7098 frame_insn (rtx x)
7099 {
7100 rtx_insn *insn = emit_insn (x);
7101 RTX_FRAME_RELATED_P (insn) = 1;
7102 return insn;
7103 }
7104
7105 /* Output RTL to push register RN onto the stack. */
7106 static rtx
7107 push (int rn)
7108 {
7109 rtx x;
7110 if (rn == FPUL_REG)
7111 x = gen_push_fpul ();
7112 else if (rn == FPSCR_REG)
7113 x = gen_push_fpscr ();
7114 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7115 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7116 {
7117 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7118 return NULL_RTX;
7119 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7120 }
7121 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7122 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7123 else
7124 x = gen_push (gen_rtx_REG (SImode, rn));
7125
7126 x = frame_insn (x);
7127 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7128 return x;
7129 }
7130
7131 /* Output RTL to pop register RN from the stack. */
7132 static void
7133 pop (int rn)
7134 {
7135 rtx x, sp_reg, reg;
7136 if (rn == FPUL_REG)
7137 x = gen_pop_fpul ();
7138 else if (rn == FPSCR_REG)
7139 x = gen_pop_fpscr ();
7140 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7141 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7142 {
7143 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7144 return;
7145 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7146 }
7147 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7148 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7149 else
7150 x = gen_pop (gen_rtx_REG (SImode, rn));
7151
7152 x = emit_insn (x);
7153
7154 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7155 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7156 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7157 : SET_DEST (PATTERN (x)));
7158 add_reg_note (x, REG_CFA_RESTORE, reg);
7159 add_reg_note (x, REG_CFA_ADJUST_CFA,
7160 gen_rtx_SET (sp_reg,
7161 plus_constant (SImode, sp_reg,
7162 GET_MODE_SIZE (GET_MODE (reg)))));
7163 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7164 RTX_FRAME_RELATED_P (x) = 1;
7165 }
7166
7167 /* Generate code to push the regs specified in the mask. */
7168 static void
7169 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7170 {
7171 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7172 int skip_fpscr = 0;
7173
7174 /* Push PR last; this gives better latencies after the prologue, and
7175 candidates for the return delay slot when there are no general
7176 registers pushed. */
7177 for (; i < FIRST_PSEUDO_REGISTER; i++)
7178 {
7179 /* If this is an interrupt handler, and the SZ bit varies,
7180 and we have to push any floating point register, we need
7181 to switch to the correct precision first. */
7182 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7183 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7184 {
7185 HARD_REG_SET unsaved;
7186
7187 push (FPSCR_REG);
7188 COMPL_HARD_REG_SET (unsaved, *mask);
7189 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7190 skip_fpscr = 1;
7191 }
7192 if (i != PR_REG
7193 && (i != FPSCR_REG || ! skip_fpscr)
7194 && TEST_HARD_REG_BIT (*mask, i))
7195 {
7196 /* If the ISR has RESBANK attribute assigned, don't push any of
7197 the following registers - R0-R14, MACH, MACL and GBR. */
7198 if (! (sh_cfun_resbank_handler_p ()
7199 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7200 || i == MACH_REG
7201 || i == MACL_REG
7202 || i == GBR_REG)))
7203 push (i);
7204 }
7205 }
7206
7207 /* Push banked registers last to improve delay slot opportunities. */
7208 if (interrupt_handler)
7209 {
7210 bool use_movml = false;
7211
7212 if (TARGET_SH2A)
7213 {
7214 unsigned int count = 0;
7215
7216 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7217 if (TEST_HARD_REG_BIT (*mask, i))
7218 count++;
7219 else
7220 break;
7221
7222 /* Use movml when all banked registers are pushed. */
7223 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7224 use_movml = true;
7225 }
7226
7227 if (sh_cfun_resbank_handler_p ())
7228 ; /* Do nothing. */
7229 else if (use_movml)
7230 {
7231 rtx x, mem, reg, set;
7232 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7233
7234 /* We must avoid scheduling multiple store insn with another
7235 insns. */
7236 emit_insn (gen_blockage ());
7237 x = gen_movml_push_banked (sp_reg);
7238 x = frame_insn (x);
7239 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7240 {
7241 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7242 reg = gen_rtx_REG (SImode, i);
7243 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7244 }
7245
7246 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7247 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7248 emit_insn (gen_blockage ());
7249 }
7250 else
7251 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7252 if (TEST_HARD_REG_BIT (*mask, i))
7253 push (i);
7254 }
7255
7256 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7257 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7258 push (PR_REG);
7259 }
7260
7261 /* Calculate how much extra space is needed to save all callee-saved
7262 target registers.
7263 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7264 static int
7265 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7266 {
7267 int reg;
7268 int stack_space = 0;
7269 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7270
7271 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7272 if ((! call_really_used_regs[reg] || interrupt_handler)
7273 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7274 /* Leave space to save this target register on the stack,
7275 in case target register allocation wants to use it. */
7276 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7277 return stack_space;
7278 }
7279
7280 /* Decide whether we should reserve space for callee-save target registers,
7281 in case target register allocation wants to use them. REGS_SAVED is
7282 the space, in bytes, that is already required for register saves.
7283 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7284 static int
7285 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7286 HARD_REG_SET *live_regs_mask)
7287 {
7288 if (optimize_size)
7289 return 0;
7290 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7291 }
7292
7293 /* Decide how much space to reserve for callee-save target registers
7294 in case target register allocation wants to use them.
7295 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7296 static int
7297 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7298 {
7299 if (shmedia_space_reserved_for_target_registers)
7300 return shmedia_target_regs_stack_space (live_regs_mask);
7301 else
7302 return 0;
7303 }
7304
7305 /* Work out the registers which need to be saved, both as a mask and a
7306 count of saved words. Return the count.
7307
7308 If doing a pragma interrupt function, then push all regs used by the
7309 function, and if we call another function (we can tell by looking at PR),
7310 make sure that all the regs it clobbers are safe too. */
7311 static int
7312 calc_live_regs (HARD_REG_SET *live_regs_mask)
7313 {
7314 unsigned int reg;
7315 int count;
7316 tree attrs;
7317 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7318 bool nosave_low_regs;
7319 int pr_live, has_call;
7320
7321 attrs = DECL_ATTRIBUTES (current_function_decl);
7322 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7323 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7324 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7325 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7326
7327 CLEAR_HARD_REG_SET (*live_regs_mask);
7328 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7329 && df_regs_ever_live_p (FPSCR_REG))
7330 target_flags &= ~MASK_FPU_SINGLE;
7331 /* If we can save a lot of saves by switching to double mode, do that. */
7332 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7333 && TARGET_FPU_SINGLE)
7334 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7335 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7336 && (! call_really_used_regs[reg]
7337 || interrupt_handler)
7338 && ++count > 2)
7339 {
7340 target_flags &= ~MASK_FPU_SINGLE;
7341 break;
7342 }
7343 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7344 knows how to use it. That means the pseudo originally allocated for
7345 the initial value can become the PR_MEDIA_REG hard register, as seen for
7346 execute/20010122-1.c:test9. */
7347 if (TARGET_SHMEDIA)
7348 /* ??? this function is called from initial_elimination_offset, hence we
7349 can't use the result of sh_media_register_for_return here. */
7350 pr_live = sh_pr_n_sets ();
7351 else
7352 {
7353 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7354 pr_live = (pr_initial
7355 ? (!REG_P (pr_initial)
7356 || REGNO (pr_initial) != (PR_REG))
7357 : df_regs_ever_live_p (PR_REG));
7358 /* For Shcompact, if not optimizing, we end up with a memory reference
7359 using the return address pointer for __builtin_return_address even
7360 though there is no actual need to put the PR register on the stack. */
7361 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7362 }
7363 /* Force PR to be live if the prologue has to call the SHmedia
7364 argument decoder or register saver. */
7365 if (TARGET_SHCOMPACT
7366 && ((crtl->args.info.call_cookie
7367 & ~ CALL_COOKIE_RET_TRAMP (1))
7368 || crtl->saves_all_registers))
7369 pr_live = 1;
7370 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7371 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7372 {
7373 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7374 ? pr_live
7375 : interrupt_handler
7376 ? (/* Need to save all the regs ever live. */
7377 (df_regs_ever_live_p (reg)
7378 || (call_really_used_regs[reg]
7379 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7380 || reg == PIC_OFFSET_TABLE_REGNUM)
7381 && has_call)
7382 || (TARGET_SHMEDIA && has_call
7383 && REGISTER_NATURAL_MODE (reg) == SImode
7384 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7385 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7386 && reg != RETURN_ADDRESS_POINTER_REGNUM
7387 && reg != T_REG && reg != GBR_REG
7388 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7389 /* Push fpscr only on targets which have FPU */
7390 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7391 : (/* Only push those regs which are used and need to be saved. */
7392 (TARGET_SHCOMPACT
7393 && flag_pic
7394 && crtl->args.info.call_cookie
7395 && reg == PIC_OFFSET_TABLE_REGNUM)
7396 || (df_regs_ever_live_p (reg)
7397 && ((!call_really_used_regs[reg]
7398 && !(reg != PIC_OFFSET_TABLE_REGNUM
7399 && fixed_regs[reg] && call_used_regs[reg]))
7400 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7401 || (crtl->calls_eh_return
7402 && (reg == EH_RETURN_DATA_REGNO (0)
7403 || reg == EH_RETURN_DATA_REGNO (1)
7404 || reg == EH_RETURN_DATA_REGNO (2)
7405 || reg == EH_RETURN_DATA_REGNO (3)))
7406 || ((reg == MACL_REG || reg == MACH_REG)
7407 && df_regs_ever_live_p (reg)
7408 && sh_cfun_attr_renesas_p ())
7409 ))
7410 {
7411 SET_HARD_REG_BIT (*live_regs_mask, reg);
7412 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7413
7414 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7415 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7416 {
7417 if (FP_REGISTER_P (reg))
7418 {
7419 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7420 {
7421 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7422 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7423 }
7424 }
7425 else if (XD_REGISTER_P (reg))
7426 {
7427 /* Must switch to double mode to access these registers. */
7428 target_flags &= ~MASK_FPU_SINGLE;
7429 }
7430 }
7431 }
7432 if (nosave_low_regs && reg == R8_REG)
7433 break;
7434 }
7435 /* If we have a target register optimization pass after prologue / epilogue
7436 threading, we need to assume all target registers will be live even if
7437 they aren't now. */
7438 if (flag_branch_target_load_optimize2
7439 && TARGET_SAVE_ALL_TARGET_REGS
7440 && shmedia_space_reserved_for_target_registers)
7441 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7442 if ((! call_really_used_regs[reg] || interrupt_handler)
7443 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7444 {
7445 SET_HARD_REG_BIT (*live_regs_mask, reg);
7446 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7447 }
7448 /* If this is an interrupt handler, we don't have any call-clobbered
7449 registers we can conveniently use for target register save/restore.
7450 Make sure we save at least one general purpose register when we need
7451 to save target registers. */
7452 if (interrupt_handler
7453 && hard_reg_set_intersect_p (*live_regs_mask,
7454 reg_class_contents[TARGET_REGS])
7455 && ! hard_reg_set_intersect_p (*live_regs_mask,
7456 reg_class_contents[GENERAL_REGS]))
7457 {
7458 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7459 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7460 }
7461
7462 return count;
7463 }
7464
7465 /* Code to generate prologue and epilogue sequences */
7466
7467 /* PUSHED is the number of bytes that are being pushed on the
7468 stack for register saves. Return the frame size, padded
7469 appropriately so that the stack stays properly aligned. */
7470 static HOST_WIDE_INT
7471 rounded_frame_size (int pushed)
7472 {
7473 HOST_WIDE_INT size = get_frame_size ();
7474 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7475
7476 if (ACCUMULATE_OUTGOING_ARGS)
7477 size += crtl->outgoing_args_size;
7478
7479 return ((size + pushed + align - 1) & -align) - pushed;
7480 }
7481
7482 /* Choose a call-clobbered target-branch register that remains
7483 unchanged along the whole function. We set it up as the return
7484 value in the prologue. */
7485 int
7486 sh_media_register_for_return (void)
7487 {
7488 int regno;
7489 int tr0_used;
7490
7491 if (! crtl->is_leaf)
7492 return -1;
7493 if (lookup_attribute ("interrupt_handler",
7494 DECL_ATTRIBUTES (current_function_decl)))
7495 return -1;
7496 if (sh_cfun_interrupt_handler_p ())
7497 return -1;
7498
7499 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7500
7501 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7502 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7503 return regno;
7504
7505 return -1;
7506 }
7507
7508 /* The maximum registers we need to save are:
7509 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7510 - 32 floating point registers (for each pair, we save none,
7511 one single precision value, or a double precision value).
7512 - 8 target registers
7513 - add 1 entry for a delimiter. */
7514 #define MAX_SAVED_REGS (62+32+8)
7515
7516 typedef struct save_entry_s
7517 {
7518 unsigned char reg;
7519 unsigned char mode;
7520 short offset;
7521 } save_entry;
7522
7523 #define MAX_TEMPS 4
7524
7525 /* There will be a delimiter entry with VOIDmode both at the start and the
7526 end of a filled in schedule. The end delimiter has the offset of the
7527 save with the smallest (i.e. most negative) offset. */
7528 typedef struct save_schedule_s
7529 {
7530 save_entry entries[MAX_SAVED_REGS + 2];
7531 int temps[MAX_TEMPS+1];
7532 } save_schedule;
7533
7534 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7535 use reverse order. Returns the last entry written to (not counting
7536 the delimiter). OFFSET_BASE is a number to be added to all offset
7537 entries. */
7538 static save_entry *
7539 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7540 int offset_base)
7541 {
7542 int align, i;
7543 save_entry *entry = schedule->entries;
7544 int tmpx = 0;
7545 int offset;
7546
7547 if (! current_function_interrupt)
7548 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7549 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7550 && ! FUNCTION_ARG_REGNO_P (i)
7551 && i != FIRST_RET_REG
7552 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7553 && ! (crtl->calls_eh_return
7554 && (i == EH_RETURN_STACKADJ_REGNO
7555 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7556 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7557 schedule->temps[tmpx++] = i;
7558 entry->reg = -1;
7559 entry->mode = VOIDmode;
7560 entry->offset = offset_base;
7561 entry++;
7562 /* We loop twice: first, we save 8-byte aligned registers in the
7563 higher addresses, that are known to be aligned. Then, we
7564 proceed to saving 32-bit registers that don't need 8-byte
7565 alignment.
7566 If this is an interrupt function, all registers that need saving
7567 need to be saved in full. moreover, we need to postpone saving
7568 target registers till we have saved some general purpose registers
7569 we can then use as scratch registers. */
7570 offset = offset_base;
7571 for (align = 1; align >= 0; align--)
7572 {
7573 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7574 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7575 {
7576 machine_mode mode = REGISTER_NATURAL_MODE (i);
7577 int reg = i;
7578
7579 if (current_function_interrupt)
7580 {
7581 if (TARGET_REGISTER_P (i))
7582 continue;
7583 if (GENERAL_REGISTER_P (i))
7584 mode = DImode;
7585 }
7586 if (mode == SFmode && (i % 2) == 1
7587 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7588 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7589 {
7590 mode = DFmode;
7591 i--;
7592 reg--;
7593 }
7594
7595 /* If we're doing the aligned pass and this is not aligned,
7596 or we're doing the unaligned pass and this is aligned,
7597 skip it. */
7598 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7599 != align)
7600 continue;
7601
7602 if (current_function_interrupt
7603 && GENERAL_REGISTER_P (i)
7604 && tmpx < MAX_TEMPS)
7605 schedule->temps[tmpx++] = i;
7606
7607 offset -= GET_MODE_SIZE (mode);
7608 entry->reg = i;
7609 entry->mode = mode;
7610 entry->offset = offset;
7611 entry++;
7612 }
7613 if (align && current_function_interrupt)
7614 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7615 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7616 {
7617 offset -= GET_MODE_SIZE (DImode);
7618 entry->reg = i;
7619 entry->mode = DImode;
7620 entry->offset = offset;
7621 entry++;
7622 }
7623 }
7624 entry->reg = -1;
7625 entry->mode = VOIDmode;
7626 entry->offset = offset;
7627 schedule->temps[tmpx] = -1;
7628 return entry - 1;
7629 }
7630
7631 /* Expand code for the function prologue. */
7632 void
7633 sh_expand_prologue (void)
7634 {
7635 HARD_REG_SET live_regs_mask;
7636 int d, i;
7637 int d_rounding = 0;
7638 int save_flags = target_flags;
7639 int pretend_args;
7640 int stack_usage;
7641 tree sp_switch_attr
7642 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7643
7644 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7645
7646 /* We have pretend args if we had an object sent partially in registers
7647 and partially on the stack, e.g. a large structure. */
7648 pretend_args = crtl->args.pretend_args_size;
7649 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7650 && (NPARM_REGS(SImode)
7651 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7652 pretend_args = 0;
7653
7654 output_stack_adjust (-pretend_args
7655 - crtl->args.info.stack_regs * 8,
7656 stack_pointer_rtx, 0, NULL, true);
7657 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7658
7659 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7660 /* We're going to use the PIC register to load the address of the
7661 incoming-argument decoder and/or of the return trampoline from
7662 the GOT, so make sure the PIC register is preserved and
7663 initialized. */
7664 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7665
7666 if (TARGET_SHCOMPACT
7667 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7668 {
7669 int reg;
7670
7671 /* First, make all registers with incoming arguments that will
7672 be pushed onto the stack live, so that register renaming
7673 doesn't overwrite them. */
7674 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7675 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7676 >= NPARM_REGS (SImode) - reg)
7677 for (; reg < NPARM_REGS (SImode); reg++)
7678 emit_insn (gen_shcompact_preserve_incoming_args
7679 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7680 else if (CALL_COOKIE_INT_REG_GET
7681 (crtl->args.info.call_cookie, reg) == 1)
7682 emit_insn (gen_shcompact_preserve_incoming_args
7683 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7684
7685 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7686 stack_pointer_rtx);
7687 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7688 GEN_INT (crtl->args.info.call_cookie));
7689 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7690 gen_rtx_REG (SImode, R0_REG));
7691 }
7692 else if (TARGET_SHMEDIA)
7693 {
7694 int tr = sh_media_register_for_return ();
7695
7696 if (tr >= 0)
7697 emit_move_insn (gen_rtx_REG (DImode, tr),
7698 gen_rtx_REG (DImode, PR_MEDIA_REG));
7699 }
7700
7701 /* Emit the code for SETUP_VARARGS. */
7702 if (cfun->stdarg)
7703 {
7704 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7705 {
7706 /* Push arg regs as if they'd been provided by caller in stack. */
7707 for (i = 0; i < NPARM_REGS(SImode); i++)
7708 {
7709 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7710
7711 if (i >= (NPARM_REGS(SImode)
7712 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7713 ))
7714 break;
7715 push (rn);
7716 stack_usage += GET_MODE_SIZE (SImode);
7717 }
7718 }
7719 }
7720
7721 /* If we're supposed to switch stacks at function entry, do so now. */
7722 if (sp_switch_attr)
7723 {
7724 rtx lab, newsrc;
7725 /* The argument specifies a variable holding the address of the
7726 stack the interrupt function should switch to/from at entry/exit. */
7727 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7728 const char *s
7729 = ggc_strdup (TREE_STRING_POINTER (arg));
7730 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7731
7732 lab = add_constant (sp_switch, SImode, 0);
7733 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7734
7735 emit_insn (gen_sp_switch_1 (newsrc));
7736 }
7737
7738 d = calc_live_regs (&live_regs_mask);
7739 /* ??? Maybe we could save some switching if we can move a mode switch
7740 that already happens to be at the function start into the prologue. */
7741 if (target_flags != save_flags && ! current_function_interrupt)
7742 emit_insn (gen_toggle_sz ());
7743
7744 if (TARGET_SH5)
7745 {
7746 int offset_base, offset;
7747 rtx r0 = NULL_RTX;
7748 int offset_in_r0 = -1;
7749 int sp_in_r0 = 0;
7750 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7751 int total_size, save_size;
7752 save_schedule schedule;
7753 save_entry *entry;
7754 int *tmp_pnt;
7755
7756 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7757 && ! current_function_interrupt)
7758 r0 = gen_rtx_REG (Pmode, R0_REG);
7759
7760 /* D is the actual number of bytes that we need for saving registers,
7761 however, in initial_elimination_offset we have committed to using
7762 an additional TREGS_SPACE amount of bytes - in order to keep both
7763 addresses to arguments supplied by the caller and local variables
7764 valid, we must keep this gap. Place it between the incoming
7765 arguments and the actually saved registers in a bid to optimize
7766 locality of reference. */
7767 total_size = d + tregs_space;
7768 total_size += rounded_frame_size (total_size);
7769 save_size = total_size - rounded_frame_size (d);
7770 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7771 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7772 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7773
7774 /* If adjusting the stack in a single step costs nothing extra, do so.
7775 I.e. either if a single addi is enough, or we need a movi anyway,
7776 and we don't exceed the maximum offset range (the test for the
7777 latter is conservative for simplicity). */
7778 if (TARGET_SHMEDIA
7779 && (CONST_OK_FOR_I10 (-total_size)
7780 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7781 && total_size <= 2044)))
7782 d_rounding = total_size - save_size;
7783
7784 offset_base = d + d_rounding;
7785
7786 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7787 0, NULL, true);
7788 stack_usage += save_size + d_rounding;
7789
7790 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7791 tmp_pnt = schedule.temps;
7792 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7793 {
7794 machine_mode mode = (machine_mode) entry->mode;
7795 unsigned int reg = entry->reg;
7796 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7797 rtx orig_reg_rtx;
7798
7799 offset = entry->offset;
7800
7801 reg_rtx = gen_rtx_REG (mode, reg);
7802
7803 mem_rtx = gen_frame_mem (mode,
7804 gen_rtx_PLUS (Pmode,
7805 stack_pointer_rtx,
7806 GEN_INT (offset)));
7807
7808 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7809 {
7810 gcc_assert (r0);
7811 mem_rtx = NULL_RTX;
7812 }
7813
7814 if (HAVE_PRE_DECREMENT
7815 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7816 || mem_rtx == NULL_RTX
7817 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7818 {
7819 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7820
7821 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7822 pre_dec = NULL_RTX;
7823 else
7824 {
7825 mem_rtx = NULL_RTX;
7826 offset += GET_MODE_SIZE (mode);
7827 }
7828 }
7829
7830 if (mem_rtx != NULL_RTX)
7831 goto addr_ok;
7832
7833 if (offset_in_r0 == -1)
7834 {
7835 emit_move_insn (r0, GEN_INT (offset));
7836 offset_in_r0 = offset;
7837 }
7838 else if (offset != offset_in_r0)
7839 {
7840 emit_move_insn (r0,
7841 gen_rtx_PLUS
7842 (Pmode, r0,
7843 GEN_INT (offset - offset_in_r0)));
7844 offset_in_r0 += offset - offset_in_r0;
7845 }
7846
7847 if (pre_dec != NULL_RTX)
7848 {
7849 if (! sp_in_r0)
7850 {
7851 emit_move_insn (r0,
7852 gen_rtx_PLUS
7853 (Pmode, r0, stack_pointer_rtx));
7854 sp_in_r0 = 1;
7855 }
7856
7857 offset -= GET_MODE_SIZE (mode);
7858 offset_in_r0 -= GET_MODE_SIZE (mode);
7859
7860 mem_rtx = pre_dec;
7861 }
7862 else if (sp_in_r0)
7863 mem_rtx = gen_frame_mem (mode, r0);
7864 else
7865 mem_rtx = gen_frame_mem (mode,
7866 gen_rtx_PLUS (Pmode,
7867 stack_pointer_rtx,
7868 r0));
7869
7870 /* We must not use an r0-based address for target-branch
7871 registers or for special registers without pre-dec
7872 memory addresses, since we store their values in r0
7873 first. */
7874 gcc_assert (!TARGET_REGISTER_P (reg)
7875 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7876 || mem_rtx == pre_dec));
7877
7878 addr_ok:
7879 orig_reg_rtx = reg_rtx;
7880 if (TARGET_REGISTER_P (reg)
7881 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7882 && mem_rtx != pre_dec))
7883 {
7884 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7885
7886 emit_move_insn (tmp_reg, reg_rtx);
7887
7888 if (REGNO (tmp_reg) == R0_REG)
7889 {
7890 offset_in_r0 = -1;
7891 sp_in_r0 = 0;
7892 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7893 }
7894
7895 if (*++tmp_pnt <= 0)
7896 tmp_pnt = schedule.temps;
7897
7898 reg_rtx = tmp_reg;
7899 }
7900 {
7901 rtx insn;
7902
7903 /* Mark as interesting for dwarf cfi generator */
7904 insn = emit_move_insn (mem_rtx, reg_rtx);
7905 RTX_FRAME_RELATED_P (insn) = 1;
7906 /* If we use an intermediate register for the save, we can't
7907 describe this exactly in cfi as a copy of the to-be-saved
7908 register into the temporary register and then the temporary
7909 register on the stack, because the temporary register can
7910 have a different natural size than the to-be-saved register.
7911 Thus, we gloss over the intermediate copy and pretend we do
7912 a direct save from the to-be-saved register. */
7913 if (REGNO (reg_rtx) != reg)
7914 {
7915 rtx set;
7916
7917 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7918 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7919 }
7920
7921 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7922 {
7923 rtx reg_rtx = gen_rtx_REG (mode, reg);
7924 rtx set;
7925 rtx mem_rtx = gen_frame_mem (mode,
7926 gen_rtx_PLUS (Pmode,
7927 stack_pointer_rtx,
7928 GEN_INT (offset)));
7929
7930 set = gen_rtx_SET (mem_rtx, reg_rtx);
7931 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7932 }
7933 }
7934 }
7935
7936 gcc_assert (entry->offset == d_rounding);
7937 }
7938 else
7939 {
7940 push_regs (&live_regs_mask, current_function_interrupt);
7941 stack_usage += d;
7942 }
7943
7944 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7945 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7946
7947 if (SHMEDIA_REGS_STACK_ADJUST ())
7948 {
7949 /* This must NOT go through the PLT, otherwise mach and macl
7950 may be clobbered. */
7951 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7952 (TARGET_FPU_ANY
7953 ? "__GCC_push_shmedia_regs"
7954 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7955 emit_insn (gen_shmedia_save_restore_regs_compact
7956 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7957 }
7958
7959 if (target_flags != save_flags && ! current_function_interrupt)
7960 emit_insn (gen_toggle_sz ());
7961
7962 target_flags = save_flags;
7963
7964 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7965 stack_pointer_rtx, 0, NULL, true);
7966 stack_usage += rounded_frame_size (d) - d_rounding;
7967
7968 if (frame_pointer_needed)
7969 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7970
7971 if (TARGET_SHCOMPACT
7972 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7973 {
7974 /* This must NOT go through the PLT, otherwise mach and macl
7975 may be clobbered. */
7976 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7977 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7978 emit_insn (gen_shcompact_incoming_args ());
7979 }
7980
7981 /* If we are profiling, make sure no instructions are scheduled before
7982 the call to mcount. Similarly if some call instructions are swapped
7983 before frame related insns, it'll confuse the unwinder because
7984 currently SH has no unwind info for function epilogues. */
7985 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7986 emit_insn (gen_blockage ());
7987
7988 if (flag_stack_usage_info)
7989 current_function_static_stack_size = stack_usage;
7990 }
7991
7992 /* Expand code for the function epilogue. */
7993 void
7994 sh_expand_epilogue (bool sibcall_p)
7995 {
7996 HARD_REG_SET live_regs_mask;
7997 int d, i;
7998 int d_rounding = 0;
7999
8000 int save_flags = target_flags;
8001 int frame_size, save_size;
8002 int fpscr_deferred = 0;
8003 int e = sibcall_p ? -1 : 1;
8004
8005 d = calc_live_regs (&live_regs_mask);
8006
8007 save_size = d;
8008 frame_size = rounded_frame_size (d);
8009
8010 if (TARGET_SH5)
8011 {
8012 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8013 int total_size;
8014 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8015 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8016 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8017
8018 total_size = d + tregs_space;
8019 total_size += rounded_frame_size (total_size);
8020 save_size = total_size - frame_size;
8021
8022 /* If adjusting the stack in a single step costs nothing extra, do so.
8023 I.e. either if a single addi is enough, or we need a movi anyway,
8024 and we don't exceed the maximum offset range (the test for the
8025 latter is conservative for simplicity). */
8026 if (TARGET_SHMEDIA
8027 && ! frame_pointer_needed
8028 && (CONST_OK_FOR_I10 (total_size)
8029 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8030 && total_size <= 2044)))
8031 d_rounding = frame_size;
8032
8033 frame_size -= d_rounding;
8034 }
8035
8036 if (frame_pointer_needed)
8037 {
8038 /* We must avoid scheduling the epilogue with previous basic blocks.
8039 See PR/18032 and PR/40313. */
8040 emit_insn (gen_blockage ());
8041 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8042 &live_regs_mask, true);
8043
8044 /* We must avoid moving the stack pointer adjustment past code
8045 which reads from the local frame, else an interrupt could
8046 occur after the SP adjustment and clobber data in the local
8047 frame. */
8048 emit_insn (gen_blockage ());
8049 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8050 }
8051 else if (frame_size)
8052 {
8053 /* We must avoid moving the stack pointer adjustment past code
8054 which reads from the local frame, else an interrupt could
8055 occur after the SP adjustment and clobber data in the local
8056 frame. */
8057 emit_insn (gen_blockage ());
8058 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8059 &live_regs_mask, true);
8060 }
8061
8062 if (SHMEDIA_REGS_STACK_ADJUST ())
8063 {
8064 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8065 (TARGET_FPU_ANY
8066 ? "__GCC_pop_shmedia_regs"
8067 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8068 /* This must NOT go through the PLT, otherwise mach and macl
8069 may be clobbered. */
8070 emit_insn (gen_shmedia_save_restore_regs_compact
8071 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8072 }
8073
8074 /* Pop all the registers. */
8075
8076 if (target_flags != save_flags && ! current_function_interrupt)
8077 emit_insn (gen_toggle_sz ());
8078 if (TARGET_SH5)
8079 {
8080 int offset_base, offset;
8081 int offset_in_r0 = -1;
8082 int sp_in_r0 = 0;
8083 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8084 save_schedule schedule;
8085 save_entry *entry;
8086 int *tmp_pnt;
8087
8088 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8089 offset_base = -entry[1].offset + d_rounding;
8090 tmp_pnt = schedule.temps;
8091 for (; entry->mode != VOIDmode; entry--)
8092 {
8093 machine_mode mode = (machine_mode) entry->mode;
8094 int reg = entry->reg;
8095 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8096
8097 offset = offset_base + entry->offset;
8098 reg_rtx = gen_rtx_REG (mode, reg);
8099
8100 mem_rtx = gen_frame_mem (mode,
8101 gen_rtx_PLUS (Pmode,
8102 stack_pointer_rtx,
8103 GEN_INT (offset)));
8104
8105 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8106 mem_rtx = NULL_RTX;
8107
8108 if (HAVE_POST_INCREMENT
8109 && (offset == offset_in_r0
8110 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8111 && mem_rtx == NULL_RTX)
8112 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8113 {
8114 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8115
8116 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8117 post_inc = NULL_RTX;
8118 else
8119 mem_rtx = NULL_RTX;
8120 }
8121
8122 if (mem_rtx != NULL_RTX)
8123 goto addr_ok;
8124
8125 if (offset_in_r0 == -1)
8126 {
8127 emit_move_insn (r0, GEN_INT (offset));
8128 offset_in_r0 = offset;
8129 }
8130 else if (offset != offset_in_r0)
8131 {
8132 emit_move_insn (r0,
8133 gen_rtx_PLUS
8134 (Pmode, r0,
8135 GEN_INT (offset - offset_in_r0)));
8136 offset_in_r0 += offset - offset_in_r0;
8137 }
8138
8139 if (post_inc != NULL_RTX)
8140 {
8141 if (! sp_in_r0)
8142 {
8143 emit_move_insn (r0,
8144 gen_rtx_PLUS
8145 (Pmode, r0, stack_pointer_rtx));
8146 sp_in_r0 = 1;
8147 }
8148
8149 mem_rtx = post_inc;
8150
8151 offset_in_r0 += GET_MODE_SIZE (mode);
8152 }
8153 else if (sp_in_r0)
8154 mem_rtx = gen_frame_mem (mode, r0);
8155 else
8156 mem_rtx = gen_frame_mem (mode,
8157 gen_rtx_PLUS (Pmode,
8158 stack_pointer_rtx,
8159 r0));
8160
8161 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8162 || mem_rtx == post_inc);
8163
8164 addr_ok:
8165 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8166 && mem_rtx != post_inc)
8167 {
8168 emit_move_insn (r0, mem_rtx);
8169 mem_rtx = r0;
8170 }
8171 else if (TARGET_REGISTER_P (reg))
8172 {
8173 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8174
8175 /* Give the scheduler a bit of freedom by using up to
8176 MAX_TEMPS registers in a round-robin fashion. */
8177 emit_move_insn (tmp_reg, mem_rtx);
8178 mem_rtx = tmp_reg;
8179 if (*++tmp_pnt < 0)
8180 tmp_pnt = schedule.temps;
8181 }
8182
8183 emit_move_insn (reg_rtx, mem_rtx);
8184 }
8185
8186 gcc_assert (entry->offset + offset_base == d + d_rounding);
8187 }
8188 else /* ! TARGET_SH5 */
8189 {
8190 int last_reg;
8191
8192 save_size = 0;
8193 /* For an ISR with RESBANK attribute assigned, don't pop PR
8194 register. */
8195 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8196 && !sh_cfun_resbank_handler_p ())
8197 {
8198 if (!frame_pointer_needed)
8199 emit_insn (gen_blockage ());
8200 pop (PR_REG);
8201 }
8202
8203 /* Banked registers are popped first to avoid being scheduled in the
8204 delay slot. RTE switches banks before the ds instruction. */
8205 if (current_function_interrupt)
8206 {
8207 bool use_movml = false;
8208
8209 if (TARGET_SH2A)
8210 {
8211 unsigned int count = 0;
8212
8213 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8214 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8215 count++;
8216 else
8217 break;
8218
8219 /* Use movml when all banked register are poped. */
8220 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8221 use_movml = true;
8222 }
8223
8224 if (sh_cfun_resbank_handler_p ())
8225 ; /* Do nothing. */
8226 else if (use_movml)
8227 {
8228 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8229
8230 /* We must avoid scheduling multiple load insn with another
8231 insns. */
8232 emit_insn (gen_blockage ());
8233 emit_insn (gen_movml_pop_banked (sp_reg));
8234 emit_insn (gen_blockage ());
8235 }
8236 else
8237 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8238 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8239 pop (i);
8240
8241 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8242 }
8243 else
8244 last_reg = FIRST_PSEUDO_REGISTER;
8245
8246 for (i = 0; i < last_reg; i++)
8247 {
8248 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8249
8250 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8251 && hard_reg_set_intersect_p (live_regs_mask,
8252 reg_class_contents[DF_REGS]))
8253 fpscr_deferred = 1;
8254 /* For an ISR with RESBANK attribute assigned, don't pop
8255 following registers, R0-R14, MACH, MACL and GBR. */
8256 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8257 && ! (sh_cfun_resbank_handler_p ()
8258 && ((j >= FIRST_GENERAL_REG
8259 && j < LAST_GENERAL_REG)
8260 || j == MACH_REG
8261 || j == MACL_REG
8262 || j == GBR_REG)))
8263 pop (j);
8264
8265 if (j == FIRST_FP_REG && fpscr_deferred)
8266 pop (FPSCR_REG);
8267 }
8268 }
8269 if (target_flags != save_flags && ! current_function_interrupt)
8270 emit_insn (gen_toggle_sz ());
8271 target_flags = save_flags;
8272
8273 output_stack_adjust (crtl->args.pretend_args_size
8274 + save_size + d_rounding
8275 + crtl->args.info.stack_regs * 8,
8276 stack_pointer_rtx, e, NULL, true);
8277
8278 if (crtl->calls_eh_return)
8279 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8280 EH_RETURN_STACKADJ_RTX));
8281
8282 /* Switch back to the normal stack if necessary. */
8283 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8284 emit_insn (gen_sp_switch_2 ());
8285
8286 /* Tell flow the insn that pops PR isn't dead. */
8287 /* PR_REG will never be live in SHmedia mode, and we don't need to
8288 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8289 by the return pattern. */
8290 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8291 emit_use (gen_rtx_REG (SImode, PR_REG));
8292 }
8293
8294 /* Emit code to change the current function's return address to RA.
8295 TEMP is available as a scratch register, if needed. */
8296 void
8297 sh_set_return_address (rtx ra, rtx tmp)
8298 {
8299 HARD_REG_SET live_regs_mask;
8300 int d;
8301 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8302 int pr_offset;
8303
8304 d = calc_live_regs (&live_regs_mask);
8305
8306 /* If pr_reg isn't life, we can set it (or the register given in
8307 sh_media_register_for_return) directly. */
8308 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8309 {
8310 rtx rr;
8311
8312 if (TARGET_SHMEDIA)
8313 {
8314 int rr_regno = sh_media_register_for_return ();
8315
8316 if (rr_regno < 0)
8317 rr_regno = pr_reg;
8318
8319 rr = gen_rtx_REG (DImode, rr_regno);
8320 }
8321 else
8322 rr = gen_rtx_REG (SImode, pr_reg);
8323
8324 emit_insn (GEN_MOV (rr, ra));
8325 /* Tell flow the register for return isn't dead. */
8326 emit_use (rr);
8327 return;
8328 }
8329
8330 if (TARGET_SH5)
8331 {
8332 int offset;
8333 save_schedule schedule;
8334 save_entry *entry;
8335
8336 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8337 offset = entry[1].offset;
8338 for (; entry->mode != VOIDmode; entry--)
8339 if (entry->reg == pr_reg)
8340 goto found;
8341
8342 /* We can't find pr register. */
8343 gcc_unreachable ();
8344
8345 found:
8346 offset = entry->offset - offset;
8347 pr_offset = (rounded_frame_size (d) + offset
8348 + SHMEDIA_REGS_STACK_ADJUST ());
8349 }
8350 else
8351 pr_offset = rounded_frame_size (d);
8352
8353 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8354
8355 if (frame_pointer_needed)
8356 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8357 else
8358 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8359
8360 tmp = gen_frame_mem (Pmode, tmp);
8361 emit_insn (GEN_MOV (tmp, ra));
8362 /* Tell this store isn't dead. */
8363 emit_use (tmp);
8364 }
8365
8366 /* Clear variables at function end. */
8367 static void
8368 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8369 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8370 {
8371 }
8372
8373 static rtx
8374 sh_builtin_saveregs (void)
8375 {
8376 /* First unnamed integer register. */
8377 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8378 /* Number of integer registers we need to save. */
8379 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8380 /* First unnamed SFmode float reg */
8381 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8382 /* Number of SFmode float regs to save. */
8383 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8384 rtx regbuf, fpregs;
8385 int bufsize, regno;
8386 alias_set_type alias_set;
8387
8388 if (TARGET_SH5)
8389 {
8390 if (n_intregs)
8391 {
8392 int pushregs = n_intregs;
8393
8394 while (pushregs < NPARM_REGS (SImode) - 1
8395 && (CALL_COOKIE_INT_REG_GET
8396 (crtl->args.info.call_cookie,
8397 NPARM_REGS (SImode) - pushregs)
8398 == 1))
8399 {
8400 crtl->args.info.call_cookie
8401 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8402 - pushregs, 1);
8403 pushregs++;
8404 }
8405
8406 if (pushregs == NPARM_REGS (SImode))
8407 crtl->args.info.call_cookie
8408 |= (CALL_COOKIE_INT_REG (0, 1)
8409 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8410 else
8411 crtl->args.info.call_cookie
8412 |= CALL_COOKIE_STACKSEQ (pushregs);
8413
8414 crtl->args.pretend_args_size += 8 * n_intregs;
8415 }
8416 if (TARGET_SHCOMPACT)
8417 return const0_rtx;
8418 }
8419
8420 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8421 {
8422 error ("__builtin_saveregs not supported by this subtarget");
8423 return const0_rtx;
8424 }
8425
8426 if (TARGET_SHMEDIA)
8427 n_floatregs = 0;
8428
8429 /* Allocate block of memory for the regs. */
8430 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8431 Or can assign_stack_local accept a 0 SIZE argument? */
8432 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8433
8434 if (TARGET_SHMEDIA)
8435 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8436 else if (n_floatregs & 1)
8437 {
8438 rtx addr;
8439
8440 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8441 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8442 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8443 regbuf = change_address (regbuf, BLKmode, addr);
8444 }
8445 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8446 {
8447 rtx addr, mask;
8448
8449 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8450 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8451 XEXP (regbuf, 0), 4));
8452 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8453 emit_insn (gen_andsi3 (addr, addr, mask));
8454 regbuf = change_address (regbuf, BLKmode, addr);
8455 }
8456 else
8457 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8458 alias_set = get_varargs_alias_set ();
8459 set_mem_alias_set (regbuf, alias_set);
8460
8461 /* Save int args.
8462 This is optimized to only save the regs that are necessary. Explicitly
8463 named args need not be saved. */
8464 if (n_intregs > 0)
8465 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8466 adjust_address (regbuf, BLKmode,
8467 n_floatregs * UNITS_PER_WORD),
8468 n_intregs);
8469
8470 if (TARGET_SHMEDIA)
8471 /* Return the address of the regbuf. */
8472 return XEXP (regbuf, 0);
8473
8474 /* Save float args.
8475 This is optimized to only save the regs that are necessary. Explicitly
8476 named args need not be saved.
8477 We explicitly build a pointer to the buffer because it halves the insn
8478 count when not optimizing (otherwise the pointer is built for each reg
8479 saved).
8480 We emit the moves in reverse order so that we can use predecrement. */
8481
8482 fpregs = copy_to_mode_reg (Pmode,
8483 plus_constant (Pmode, XEXP (regbuf, 0),
8484 n_floatregs * UNITS_PER_WORD));
8485 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8486 {
8487 rtx mem;
8488 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8489 {
8490 emit_insn (gen_addsi3 (fpregs, fpregs,
8491 GEN_INT (-2 * UNITS_PER_WORD)));
8492 mem = change_address (regbuf, DFmode, fpregs);
8493 emit_move_insn (mem,
8494 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8495 }
8496 regno = first_floatreg;
8497 if (regno & 1)
8498 {
8499 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8500 mem = change_address (regbuf, SFmode, fpregs);
8501 emit_move_insn (mem,
8502 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8503 + regno - SH_REG_MSW_OFFSET));
8504 }
8505 }
8506 else
8507 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8508 {
8509 rtx mem;
8510
8511 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8512 mem = change_address (regbuf, SFmode, fpregs);
8513 emit_move_insn (mem,
8514 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8515 }
8516
8517 /* Return the address of the regbuf. */
8518 return XEXP (regbuf, 0);
8519 }
8520
8521 /* Define the `__builtin_va_list' type for the ABI. */
8522 static tree
8523 sh_build_builtin_va_list (void)
8524 {
8525 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8526 tree record, type_decl;
8527
8528 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8529 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8530 return ptr_type_node;
8531
8532 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8533 type_decl = build_decl (BUILTINS_LOCATION,
8534 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8535
8536 f_next_o = build_decl (BUILTINS_LOCATION,
8537 FIELD_DECL, get_identifier ("__va_next_o"),
8538 ptr_type_node);
8539 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8540 FIELD_DECL,
8541 get_identifier ("__va_next_o_limit"),
8542 ptr_type_node);
8543 f_next_fp = build_decl (BUILTINS_LOCATION,
8544 FIELD_DECL, get_identifier ("__va_next_fp"),
8545 ptr_type_node);
8546 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8547 FIELD_DECL,
8548 get_identifier ("__va_next_fp_limit"),
8549 ptr_type_node);
8550 f_next_stack = build_decl (BUILTINS_LOCATION,
8551 FIELD_DECL, get_identifier ("__va_next_stack"),
8552 ptr_type_node);
8553
8554 DECL_FIELD_CONTEXT (f_next_o) = record;
8555 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8556 DECL_FIELD_CONTEXT (f_next_fp) = record;
8557 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8558 DECL_FIELD_CONTEXT (f_next_stack) = record;
8559
8560 TYPE_STUB_DECL (record) = type_decl;
8561 TYPE_NAME (record) = type_decl;
8562 TYPE_FIELDS (record) = f_next_o;
8563 DECL_CHAIN (f_next_o) = f_next_o_limit;
8564 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8565 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8566 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8567
8568 layout_type (record);
8569
8570 return record;
8571 }
8572
8573 /* Implement `va_start' for varargs and stdarg. */
8574 static void
8575 sh_va_start (tree valist, rtx nextarg)
8576 {
8577 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8578 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8579 tree t, u;
8580 int nfp, nint;
8581
8582 if (TARGET_SH5)
8583 {
8584 expand_builtin_saveregs ();
8585 std_expand_builtin_va_start (valist, nextarg);
8586 return;
8587 }
8588
8589 if ((! TARGET_SH2E && ! TARGET_SH4)
8590 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8591 {
8592 std_expand_builtin_va_start (valist, nextarg);
8593 return;
8594 }
8595
8596 f_next_o = TYPE_FIELDS (va_list_type_node);
8597 f_next_o_limit = DECL_CHAIN (f_next_o);
8598 f_next_fp = DECL_CHAIN (f_next_o_limit);
8599 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8600 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8601
8602 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8603 NULL_TREE);
8604 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8605 valist, f_next_o_limit, NULL_TREE);
8606 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8607 NULL_TREE);
8608 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8609 valist, f_next_fp_limit, NULL_TREE);
8610 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8611 valist, f_next_stack, NULL_TREE);
8612
8613 /* Call __builtin_saveregs. */
8614 u = make_tree (sizetype, expand_builtin_saveregs ());
8615 u = fold_convert (ptr_type_node, u);
8616 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8617 TREE_SIDE_EFFECTS (t) = 1;
8618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8619
8620 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8621 if (nfp < 8)
8622 nfp = 8 - nfp;
8623 else
8624 nfp = 0;
8625 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8626 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8627 TREE_SIDE_EFFECTS (t) = 1;
8628 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8629
8630 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8631 TREE_SIDE_EFFECTS (t) = 1;
8632 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8633
8634 nint = crtl->args.info.arg_count[SH_ARG_INT];
8635 if (nint < 4)
8636 nint = 4 - nint;
8637 else
8638 nint = 0;
8639 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8640 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8641 TREE_SIDE_EFFECTS (t) = 1;
8642 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8643
8644 u = make_tree (ptr_type_node, nextarg);
8645 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8646 TREE_SIDE_EFFECTS (t) = 1;
8647 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8648 }
8649
8650 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8651 member, return it. */
8652 static tree
8653 find_sole_member (tree type)
8654 {
8655 tree field, member = NULL_TREE;
8656
8657 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8658 {
8659 if (TREE_CODE (field) != FIELD_DECL)
8660 continue;
8661 if (!DECL_SIZE (field))
8662 return NULL_TREE;
8663 if (integer_zerop (DECL_SIZE (field)))
8664 continue;
8665 if (member)
8666 return NULL_TREE;
8667 member = field;
8668 }
8669 return member;
8670 }
8671
8672 /* Implement `va_arg'. */
8673 static tree
8674 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8675 gimple_seq *post_p ATTRIBUTE_UNUSED)
8676 {
8677 HOST_WIDE_INT size, rsize;
8678 tree tmp, pptr_type_node;
8679 tree addr, lab_over = NULL, result = NULL;
8680 bool pass_by_ref;
8681 tree eff_type;
8682
8683 if (!VOID_TYPE_P (type))
8684 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8685 else
8686 pass_by_ref = false;
8687
8688 if (pass_by_ref)
8689 type = build_pointer_type (type);
8690
8691 size = int_size_in_bytes (type);
8692 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8693 pptr_type_node = build_pointer_type (ptr_type_node);
8694
8695 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8696 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8697 {
8698 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8699 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8700 int pass_as_float;
8701 tree lab_false;
8702 tree member;
8703
8704 f_next_o = TYPE_FIELDS (va_list_type_node);
8705 f_next_o_limit = DECL_CHAIN (f_next_o);
8706 f_next_fp = DECL_CHAIN (f_next_o_limit);
8707 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8708 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8709
8710 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8711 NULL_TREE);
8712 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8713 valist, f_next_o_limit, NULL_TREE);
8714 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8715 valist, f_next_fp, NULL_TREE);
8716 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8717 valist, f_next_fp_limit, NULL_TREE);
8718 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8719 valist, f_next_stack, NULL_TREE);
8720
8721 /* Structures with a single member with a distinct mode are passed
8722 like their member. This is relevant if the latter has a REAL_TYPE
8723 or COMPLEX_TYPE type. */
8724 eff_type = type;
8725 while (TREE_CODE (eff_type) == RECORD_TYPE
8726 && (member = find_sole_member (eff_type))
8727 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8728 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8729 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8730 {
8731 tree field_type = TREE_TYPE (member);
8732
8733 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8734 eff_type = field_type;
8735 else
8736 {
8737 gcc_assert ((TYPE_ALIGN (eff_type)
8738 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8739 || (TYPE_ALIGN (eff_type)
8740 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8741 break;
8742 }
8743 }
8744
8745 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8746 {
8747 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8748 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8749 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8750 && size <= 16));
8751 }
8752 else
8753 {
8754 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8755 }
8756
8757 addr = create_tmp_var (pptr_type_node);
8758 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8759 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8760
8761 valist = build_simple_mem_ref (addr);
8762
8763 if (pass_as_float)
8764 {
8765 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8766 tree cmp;
8767 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8768
8769 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8770 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8771
8772 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8773 tmp = next_fp_limit;
8774 if (size > 4 && !is_double)
8775 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8776 tmp = build2 (GE_EXPR, boolean_type_node,
8777 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8778 cmp = build3 (COND_EXPR, void_type_node, tmp,
8779 build1 (GOTO_EXPR, void_type_node,
8780 unshare_expr (lab_false)), NULL_TREE);
8781 if (!is_double)
8782 gimplify_and_add (cmp, pre_p);
8783
8784 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8785 || (is_double || size == 16))
8786 {
8787 tmp = fold_convert (sizetype, next_fp_tmp);
8788 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8789 size_int (UNITS_PER_WORD));
8790 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8791 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8792 }
8793 if (is_double)
8794 gimplify_and_add (cmp, pre_p);
8795
8796 #ifdef FUNCTION_ARG_SCmode_WART
8797 if (TYPE_MODE (eff_type) == SCmode
8798 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8799 {
8800 tree subtype = TREE_TYPE (eff_type);
8801 tree real, imag;
8802
8803 imag
8804 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8805 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8806
8807 real
8808 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8809 real = get_initialized_tmp_var (real, pre_p, NULL);
8810
8811 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8812 if (type != eff_type)
8813 result = build1 (VIEW_CONVERT_EXPR, type, result);
8814 result = get_initialized_tmp_var (result, pre_p, NULL);
8815 }
8816 #endif /* FUNCTION_ARG_SCmode_WART */
8817
8818 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8819 gimplify_and_add (tmp, pre_p);
8820
8821 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8822 gimplify_and_add (tmp, pre_p);
8823
8824 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8825 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8826 gimplify_assign (unshare_expr (next_fp_tmp),
8827 unshare_expr (valist), pre_p);
8828
8829 gimplify_assign (unshare_expr (valist),
8830 unshare_expr (next_fp_tmp), post_p);
8831 valist = next_fp_tmp;
8832 }
8833 else
8834 {
8835 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8836 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8837 unshare_expr (next_o_limit));
8838 tmp = build3 (COND_EXPR, void_type_node, tmp,
8839 build1 (GOTO_EXPR, void_type_node,
8840 unshare_expr (lab_false)),
8841 NULL_TREE);
8842 gimplify_and_add (tmp, pre_p);
8843
8844 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8845 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8846
8847 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8848 gimplify_and_add (tmp, pre_p);
8849
8850 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8851 gimplify_and_add (tmp, pre_p);
8852
8853 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8854 gimplify_assign (unshare_expr (next_o),
8855 unshare_expr (next_o_limit), pre_p);
8856
8857 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8858 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8859 }
8860
8861 if (!result)
8862 {
8863 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8864 gimplify_and_add (tmp, pre_p);
8865 }
8866 }
8867
8868 /* ??? In va-sh.h, there had been code to make values larger than
8869 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8870
8871 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8872 if (result)
8873 {
8874 gimplify_assign (result, tmp, pre_p);
8875 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8876 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8877 gimplify_and_add (tmp, pre_p);
8878 }
8879 else
8880 result = tmp;
8881
8882 if (pass_by_ref)
8883 result = build_va_arg_indirect_ref (result);
8884
8885 return result;
8886 }
8887
8888 /* 64 bit floating points memory transfers are paired single precision loads
8889 or store. So DWARF information needs fixing in little endian (unless
8890 PR=SZ=1 in FPSCR). */
8891 rtx
8892 sh_dwarf_register_span (rtx reg)
8893 {
8894 unsigned regno = REGNO (reg);
8895
8896 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8897 return NULL_RTX;
8898
8899 return
8900 gen_rtx_PARALLEL (VOIDmode,
8901 gen_rtvec (2,
8902 gen_rtx_REG (SFmode, regno + 1),
8903 gen_rtx_REG (SFmode, regno)));
8904 }
8905
8906 static machine_mode
8907 sh_promote_function_mode (const_tree type, machine_mode mode,
8908 int *punsignedp, const_tree funtype,
8909 int for_return)
8910 {
8911 if (sh_promote_prototypes (funtype))
8912 return promote_mode (type, mode, punsignedp);
8913 else
8914 return default_promote_function_mode (type, mode, punsignedp, funtype,
8915 for_return);
8916 }
8917
8918 static bool
8919 sh_promote_prototypes (const_tree type)
8920 {
8921 if (TARGET_HITACHI)
8922 return false;
8923 if (! type)
8924 return true;
8925 return ! sh_attr_renesas_p (type);
8926 }
8927
8928 /* Whether an argument must be passed by reference. On SHcompact, we
8929 pretend arguments wider than 32-bits that would have been passed in
8930 registers are passed by reference, so that an SHmedia trampoline
8931 loads them into the full 64-bits registers. */
8932 static int
8933 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8934 const_tree type, bool named)
8935 {
8936 unsigned HOST_WIDE_INT size;
8937
8938 if (type)
8939 size = int_size_in_bytes (type);
8940 else
8941 size = GET_MODE_SIZE (mode);
8942
8943 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8944 && (!named
8945 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8946 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8947 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8948 && size > 4
8949 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8950 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8951 return size;
8952 else
8953 return 0;
8954 }
8955
8956 static bool
8957 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8958 const_tree type, bool named)
8959 {
8960 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8961
8962 if (targetm.calls.must_pass_in_stack (mode, type))
8963 return true;
8964
8965 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8966 wants to know about pass-by-reference semantics for incoming
8967 arguments. */
8968 if (! cum)
8969 return false;
8970
8971 if (TARGET_SHCOMPACT)
8972 {
8973 cum->byref = shcompact_byref (cum, mode, type, named);
8974 return cum->byref != 0;
8975 }
8976
8977 return false;
8978 }
8979
8980 static bool
8981 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8982 const_tree type, bool named ATTRIBUTE_UNUSED)
8983 {
8984 /* ??? How can it possibly be correct to return true only on the
8985 caller side of the equation? Is there someplace else in the
8986 sh backend that's magically producing the copies? */
8987 return (get_cumulative_args (cum)->outgoing
8988 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8989 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8990 }
8991
8992 /* Round a register number up to a proper boundary for an arg of mode
8993 MODE.
8994 The SH doesn't care about double alignment, so we only
8995 round doubles to even regs when asked to explicitly. */
8996 static int
8997 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8998 {
8999 /* FIXME: This used to be a macro and has been copy pasted into this
9000 function as is. Make this more readable. */
9001 return
9002 (((TARGET_ALIGN_DOUBLE
9003 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9004 && (mode == DFmode || mode == DCmode)
9005 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9006 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9007 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9008 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9009 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9010 }
9011
9012 /* Return true if arg of the specified mode should be passed in a register
9013 or false otherwise. */
9014 static bool
9015 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9016 const_tree type)
9017 {
9018 /* FIXME: This used to be a macro and has been copy pasted into this
9019 function as is. Make this more readable. */
9020 return
9021 ((type == 0
9022 || (! TREE_ADDRESSABLE (type)
9023 && (! (TARGET_HITACHI || cum.renesas_abi)
9024 || ! (AGGREGATE_TYPE_P (type)
9025 || (!TARGET_FPU_ANY
9026 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9027 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9028 && ! cum.force_mem
9029 && (TARGET_SH2E
9030 ? ((mode) == BLKmode
9031 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9032 + int_size_in_bytes (type))
9033 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9034 : ((sh_round_reg (cum, mode)
9035 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9036 <= NPARM_REGS (mode)))
9037 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9038 }
9039
9040 static int
9041 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9042 tree type, bool named ATTRIBUTE_UNUSED)
9043 {
9044 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9045 int words = 0;
9046
9047 if (!TARGET_SH5
9048 && sh_pass_in_reg_p (*cum, mode, type)
9049 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9050 && (sh_round_reg (*cum, mode)
9051 + (mode != BLKmode
9052 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9053 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9054 > NPARM_REGS (mode)))
9055 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9056
9057 else if (!TARGET_SHCOMPACT
9058 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9059 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9060
9061 return words * UNITS_PER_WORD;
9062 }
9063
9064
9065 /* Define where to put the arguments to a function.
9066 Value is zero to push the argument on the stack,
9067 or a hard register in which to store the argument.
9068
9069 MODE is the argument's machine mode.
9070 TYPE is the data type of the argument (as a tree).
9071 This is null for libcalls where that information may
9072 not be available.
9073 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9074 the preceding args and about the function being called.
9075 NAMED is nonzero if this argument is a named parameter
9076 (otherwise it is an extra parameter matching an ellipsis).
9077
9078 On SH the first args are normally in registers
9079 and the rest are pushed. Any arg that starts within the first
9080 NPARM_REGS words is at least partially passed in a register unless
9081 its data type forbids. */
9082 static rtx
9083 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9084 const_tree type, bool named)
9085 {
9086 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9087
9088 if (! TARGET_SH5 && mode == VOIDmode)
9089 return GEN_INT (ca->renesas_abi ? 1 : 0);
9090
9091 if (! TARGET_SH5
9092 && sh_pass_in_reg_p (*ca, mode, type)
9093 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9094 {
9095 int regno;
9096
9097 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9098 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9099 {
9100 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9101 gen_rtx_REG (SFmode,
9102 BASE_ARG_REG (mode)
9103 + (sh_round_reg (*ca, mode) ^ 1)),
9104 const0_rtx);
9105 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9106 gen_rtx_REG (SFmode,
9107 BASE_ARG_REG (mode)
9108 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9109 GEN_INT (4));
9110 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9111 }
9112
9113 /* If the alignment of a DF value causes an SF register to be
9114 skipped, we will use that skipped register for the next SF
9115 value. */
9116 if ((TARGET_HITACHI || ca->renesas_abi)
9117 && ca->free_single_fp_reg
9118 && mode == SFmode)
9119 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9120
9121 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9122 ^ (mode == SFmode && TARGET_SH4
9123 && TARGET_LITTLE_ENDIAN
9124 && ! TARGET_HITACHI && ! ca->renesas_abi);
9125 return gen_rtx_REG (mode, regno);
9126
9127 }
9128
9129 if (TARGET_SH5)
9130 {
9131 if (mode == VOIDmode && TARGET_SHCOMPACT)
9132 return GEN_INT (ca->call_cookie);
9133
9134 /* The following test assumes unnamed arguments are promoted to
9135 DFmode. */
9136 if (mode == SFmode && ca->free_single_fp_reg)
9137 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9138
9139 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9140 && (named || ! ca->prototype_p)
9141 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9142 {
9143 if (! ca->prototype_p && TARGET_SHMEDIA)
9144 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9145
9146 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9147 FIRST_FP_PARM_REG
9148 + ca->arg_count[(int) SH_ARG_FLOAT]);
9149 }
9150
9151 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9152 && (! TARGET_SHCOMPACT
9153 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9154 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9155 type, named))))
9156 {
9157 return gen_rtx_REG (mode, (FIRST_PARM_REG
9158 + ca->arg_count[(int) SH_ARG_INT]));
9159 }
9160
9161 return NULL_RTX;
9162 }
9163
9164 return NULL_RTX;
9165 }
9166
9167 /* Update the data in CUM to advance over an argument
9168 of mode MODE and data type TYPE.
9169 (TYPE is null for libcalls where that information may not be
9170 available.) */
9171 static void
9172 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9173 const_tree type, bool named)
9174 {
9175 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9176
9177 if (ca->force_mem)
9178 ca->force_mem = 0;
9179 else if (TARGET_SH5)
9180 {
9181 const_tree type2 = (ca->byref && type
9182 ? TREE_TYPE (type)
9183 : type);
9184 machine_mode mode2 = (ca->byref && type
9185 ? TYPE_MODE (type2)
9186 : mode);
9187 int dwords = ((ca->byref
9188 ? ca->byref
9189 : mode2 == BLKmode
9190 ? int_size_in_bytes (type2)
9191 : GET_MODE_SIZE (mode2)) + 7) / 8;
9192 int numregs = MIN (dwords, NPARM_REGS (SImode)
9193 - ca->arg_count[(int) SH_ARG_INT]);
9194
9195 if (numregs)
9196 {
9197 ca->arg_count[(int) SH_ARG_INT] += numregs;
9198 if (TARGET_SHCOMPACT
9199 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9200 {
9201 ca->call_cookie
9202 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9203 - numregs, 1);
9204 /* N.B. We want this also for outgoing. */
9205 ca->stack_regs += numregs;
9206 }
9207 else if (ca->byref)
9208 {
9209 if (! ca->outgoing)
9210 ca->stack_regs += numregs;
9211 ca->byref_regs += numregs;
9212 ca->byref = 0;
9213 do
9214 ca->call_cookie
9215 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9216 - numregs, 2);
9217 while (--numregs);
9218 ca->call_cookie
9219 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9220 - 1, 1);
9221 }
9222 else if (dwords > numregs)
9223 {
9224 int pushregs = numregs;
9225
9226 if (TARGET_SHCOMPACT)
9227 ca->stack_regs += numregs;
9228 while (pushregs < NPARM_REGS (SImode) - 1
9229 && (CALL_COOKIE_INT_REG_GET
9230 (ca->call_cookie,
9231 NPARM_REGS (SImode) - pushregs)
9232 == 1))
9233 {
9234 ca->call_cookie
9235 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9236 - pushregs, 1);
9237 pushregs++;
9238 }
9239 if (numregs == NPARM_REGS (SImode))
9240 ca->call_cookie
9241 |= CALL_COOKIE_INT_REG (0, 1)
9242 | CALL_COOKIE_STACKSEQ (numregs - 1);
9243 else
9244 ca->call_cookie
9245 |= CALL_COOKIE_STACKSEQ (numregs);
9246 }
9247 }
9248 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9249 && (named || ! ca->prototype_p))
9250 {
9251 if (mode2 == SFmode && ca->free_single_fp_reg)
9252 ca->free_single_fp_reg = 0;
9253 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9254 < NPARM_REGS (SFmode))
9255 {
9256 int numfpregs
9257 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9258 NPARM_REGS (SFmode)
9259 - ca->arg_count[(int) SH_ARG_FLOAT]);
9260
9261 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9262
9263 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9264 {
9265 if (ca->outgoing && numregs > 0)
9266 do
9267 {
9268 ca->call_cookie
9269 |= (CALL_COOKIE_INT_REG
9270 (ca->arg_count[(int) SH_ARG_INT]
9271 - numregs + ((numfpregs - 2) / 2),
9272 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9273 - numfpregs) / 2));
9274 }
9275 while (numfpregs -= 2);
9276 }
9277 else if (mode2 == SFmode && (named)
9278 && (ca->arg_count[(int) SH_ARG_FLOAT]
9279 < NPARM_REGS (SFmode)))
9280 ca->free_single_fp_reg
9281 = FIRST_FP_PARM_REG - numfpregs
9282 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9283 }
9284 }
9285 return;
9286 }
9287
9288 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9289 {
9290 /* Note that we've used the skipped register. */
9291 if (mode == SFmode && ca->free_single_fp_reg)
9292 {
9293 ca->free_single_fp_reg = 0;
9294 return;
9295 }
9296 /* When we have a DF after an SF, there's an SF register that get
9297 skipped in order to align the DF value. We note this skipped
9298 register, because the next SF value will use it, and not the
9299 SF that follows the DF. */
9300 if (mode == DFmode
9301 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9302 {
9303 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9304 + BASE_ARG_REG (mode));
9305 }
9306 }
9307
9308 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9309 || sh_pass_in_reg_p (*ca, mode, type))
9310 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9311 = (sh_round_reg (*ca, mode)
9312 + (mode == BLKmode
9313 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9314 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9315 }
9316
9317 /* The Renesas calling convention doesn't quite fit into this scheme since
9318 the address is passed like an invisible argument, but one that is always
9319 passed in memory. */
9320 static rtx
9321 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9322 {
9323 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9324 return NULL_RTX;
9325 return gen_rtx_REG (Pmode, 2);
9326 }
9327
9328 /* Worker function for TARGET_FUNCTION_VALUE.
9329
9330 For the SH, this is like LIBCALL_VALUE, except that we must change the
9331 mode like PROMOTE_MODE does.
9332 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9333 tested here has to be kept in sync with the one in
9334 explow.c:promote_mode. */
9335 static rtx
9336 sh_function_value (const_tree valtype,
9337 const_tree fn_decl_or_type,
9338 bool outgoing ATTRIBUTE_UNUSED)
9339 {
9340 if (fn_decl_or_type
9341 && !DECL_P (fn_decl_or_type))
9342 fn_decl_or_type = NULL;
9343
9344 return gen_rtx_REG (
9345 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9346 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9347 && (TREE_CODE (valtype) == INTEGER_TYPE
9348 || TREE_CODE (valtype) == ENUMERAL_TYPE
9349 || TREE_CODE (valtype) == BOOLEAN_TYPE
9350 || TREE_CODE (valtype) == REAL_TYPE
9351 || TREE_CODE (valtype) == OFFSET_TYPE))
9352 && sh_promote_prototypes (fn_decl_or_type)
9353 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9354 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9355 }
9356
9357 /* Worker function for TARGET_LIBCALL_VALUE. */
9358 static rtx
9359 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9360 {
9361 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9362 }
9363
9364 /* Return true if N is a possible register number of function value. */
9365 static bool
9366 sh_function_value_regno_p (const unsigned int regno)
9367 {
9368 return ((regno) == FIRST_RET_REG
9369 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9370 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9371 }
9372
9373 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9374 static bool
9375 sh_return_in_memory (const_tree type, const_tree fndecl)
9376 {
9377 if (TARGET_SH5)
9378 {
9379 if (TYPE_MODE (type) == BLKmode)
9380 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9381 else
9382 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9383 }
9384 else
9385 {
9386 return (TYPE_MODE (type) == BLKmode
9387 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9388 && TREE_CODE (type) == RECORD_TYPE));
9389 }
9390 }
9391
9392 /* We actually emit the code in sh_expand_prologue. We used to use
9393 a static variable to flag that we need to emit this code, but that
9394 doesn't when inlining, when functions are deferred and then emitted
9395 later. Fortunately, we already have two flags that are part of struct
9396 function that tell if a function uses varargs or stdarg. */
9397 static void
9398 sh_setup_incoming_varargs (cumulative_args_t ca,
9399 machine_mode mode,
9400 tree type,
9401 int *pretend_arg_size,
9402 int second_time ATTRIBUTE_UNUSED)
9403 {
9404 gcc_assert (cfun->stdarg);
9405 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9406 {
9407 int named_parm_regs, anon_parm_regs;
9408
9409 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9410 + (mode == BLKmode
9411 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9412 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9413 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9414 if (anon_parm_regs > 0)
9415 *pretend_arg_size = anon_parm_regs * 4;
9416 }
9417 }
9418
9419 static bool
9420 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9421 {
9422 return TARGET_SH5;
9423 }
9424
9425 static bool
9426 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9427 {
9428 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9429
9430 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9431 }
9432
9433
9434 /* Define the offset between two registers, one to be eliminated, and
9435 the other its replacement, at the start of a routine. */
9436 int
9437 initial_elimination_offset (int from, int to)
9438 {
9439 int regs_saved;
9440 int regs_saved_rounding = 0;
9441 int total_saved_regs_space;
9442 int total_auto_space;
9443 int save_flags = target_flags;
9444 int copy_flags;
9445 HARD_REG_SET live_regs_mask;
9446
9447 shmedia_space_reserved_for_target_registers = false;
9448 regs_saved = calc_live_regs (&live_regs_mask);
9449 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9450
9451 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9452 {
9453 shmedia_space_reserved_for_target_registers = true;
9454 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9455 }
9456
9457 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9458 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9459 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9460
9461 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9462 copy_flags = target_flags;
9463 target_flags = save_flags;
9464
9465 total_saved_regs_space = regs_saved + regs_saved_rounding;
9466
9467 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9468 return total_saved_regs_space + total_auto_space
9469 + crtl->args.info.byref_regs * 8;
9470
9471 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9472 return total_saved_regs_space + total_auto_space
9473 + crtl->args.info.byref_regs * 8;
9474
9475 /* Initial gap between fp and sp is 0. */
9476 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9477 return 0;
9478
9479 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9480 return rounded_frame_size (0);
9481
9482 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9483 return rounded_frame_size (0);
9484
9485 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9486 && (to == HARD_FRAME_POINTER_REGNUM
9487 || to == STACK_POINTER_REGNUM));
9488 if (TARGET_SH5)
9489 {
9490 int n = total_saved_regs_space;
9491 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9492 save_schedule schedule;
9493 save_entry *entry;
9494
9495 n += total_auto_space;
9496
9497 /* If it wasn't saved, there's not much we can do. */
9498 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9499 return n;
9500
9501 target_flags = copy_flags;
9502
9503 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9504 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9505 if (entry->reg == pr_reg)
9506 {
9507 target_flags = save_flags;
9508 return entry->offset;
9509 }
9510 gcc_unreachable ();
9511 }
9512 else
9513 return total_auto_space;
9514 }
9515
9516 /* Parse the -mfixed-range= option string. */
9517 void
9518 sh_fix_range (const char *const_str)
9519 {
9520 int i, first, last;
9521 char *str, *dash, *comma;
9522
9523 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9524 REG2 are either register names or register numbers. The effect
9525 of this option is to mark the registers in the range from REG1 to
9526 REG2 as ``fixed'' so they won't be used by the compiler. */
9527
9528 i = strlen (const_str);
9529 str = (char *) alloca (i + 1);
9530 memcpy (str, const_str, i + 1);
9531
9532 while (1)
9533 {
9534 dash = strchr (str, '-');
9535 if (!dash)
9536 {
9537 warning (0, "value of -mfixed-range must have form REG1-REG2");
9538 return;
9539 }
9540 *dash = '\0';
9541 comma = strchr (dash + 1, ',');
9542 if (comma)
9543 *comma = '\0';
9544
9545 first = decode_reg_name (str);
9546 if (first < 0)
9547 {
9548 warning (0, "unknown register name: %s", str);
9549 return;
9550 }
9551
9552 last = decode_reg_name (dash + 1);
9553 if (last < 0)
9554 {
9555 warning (0, "unknown register name: %s", dash + 1);
9556 return;
9557 }
9558
9559 *dash = '-';
9560
9561 if (first > last)
9562 {
9563 warning (0, "%s-%s is an empty range", str, dash + 1);
9564 return;
9565 }
9566
9567 for (i = first; i <= last; ++i)
9568 fixed_regs[i] = call_used_regs[i] = 1;
9569
9570 if (!comma)
9571 break;
9572
9573 *comma = ',';
9574 str = comma + 1;
9575 }
9576 }
9577 \f
9578 /* Insert any deferred function attributes from earlier pragmas. */
9579 static void
9580 sh_insert_attributes (tree node, tree *attributes)
9581 {
9582 tree attrs;
9583
9584 if (TREE_CODE (node) != FUNCTION_DECL)
9585 return;
9586
9587 /* We are only interested in fields. */
9588 if (!DECL_P (node))
9589 return;
9590
9591 /* Append the attributes to the deferred attributes. */
9592 *sh_deferred_function_attributes_tail = *attributes;
9593 attrs = sh_deferred_function_attributes;
9594 if (!attrs)
9595 return;
9596
9597 /* Some attributes imply or require the interrupt attribute. */
9598 if (!lookup_attribute ("interrupt_handler", attrs)
9599 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9600 {
9601 /* If we have a trapa_handler, but no interrupt_handler attribute,
9602 insert an interrupt_handler attribute. */
9603 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9604 /* We can't use sh_pr_interrupt here because that's not in the
9605 java frontend. */
9606 attrs
9607 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9608 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9609 if the interrupt attribute is missing, we ignore the attribute
9610 and warn. */
9611 else if (lookup_attribute ("sp_switch", attrs)
9612 || lookup_attribute ("trap_exit", attrs)
9613 || lookup_attribute ("nosave_low_regs", attrs)
9614 || lookup_attribute ("resbank", attrs))
9615 {
9616 tree *tail;
9617
9618 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9619 {
9620 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9621 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9622 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9623 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9624 warning (OPT_Wattributes,
9625 "%qE attribute only applies to interrupt functions",
9626 TREE_PURPOSE (attrs));
9627 else
9628 {
9629 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9630 NULL_TREE);
9631 tail = &TREE_CHAIN (*tail);
9632 }
9633 }
9634 attrs = *attributes;
9635 }
9636 }
9637
9638 /* Install the processed list. */
9639 *attributes = attrs;
9640
9641 /* Clear deferred attributes. */
9642 sh_deferred_function_attributes = NULL_TREE;
9643 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9644
9645 return;
9646 }
9647
9648 /*------------------------------------------------------------------------------
9649 Target specific attributes
9650 Supported attributes are:
9651
9652 * interrupt_handler
9653 Specifies this function is an interrupt handler.
9654
9655 * trapa_handler
9656 Like interrupt_handler, but don't save all registers.
9657
9658 * sp_switch
9659 Specifies an alternate stack for an interrupt handler to run on.
9660
9661 * trap_exit
9662 Use a trapa to exit an interrupt function instead of rte.
9663
9664 * nosave_low_regs
9665 Don't save r0..r7 in an interrupt handler function.
9666 This is useful on SH3* and SH4*, which have a separate set of low
9667 regs for user and privileged modes.
9668 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9669 those that run with interrupts disabled and thus can't be
9670 interrupted thenselves).
9671
9672 * renesas
9673 Use Renesas calling/layout conventions (functions and structures).
9674
9675 * resbank
9676 In case of an interrupt handler function, use a register bank to
9677 save registers R0-R14, MACH, MACL, GBR and PR.
9678 This is available only on SH2A targets.
9679
9680 * function_vector
9681 Declares a function to be called using the TBR relative addressing
9682 mode. Takes an argument that specifies the slot number in the table
9683 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9684 */
9685
9686 /* Handle a 'resbank' attribute. */
9687 static tree
9688 sh_handle_resbank_handler_attribute (tree * node, tree name,
9689 tree args ATTRIBUTE_UNUSED,
9690 int flags ATTRIBUTE_UNUSED,
9691 bool * no_add_attrs)
9692 {
9693 if (!TARGET_SH2A)
9694 {
9695 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9696 name);
9697 *no_add_attrs = true;
9698 }
9699 if (TREE_CODE (*node) != FUNCTION_DECL)
9700 {
9701 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9702 name);
9703 *no_add_attrs = true;
9704 }
9705
9706 return NULL_TREE;
9707 }
9708
9709 /* Handle an "interrupt_handler" attribute; arguments as in
9710 struct attribute_spec.handler. */
9711 static tree
9712 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9713 tree args ATTRIBUTE_UNUSED,
9714 int flags ATTRIBUTE_UNUSED,
9715 bool *no_add_attrs)
9716 {
9717 if (TREE_CODE (*node) != FUNCTION_DECL)
9718 {
9719 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9720 name);
9721 *no_add_attrs = true;
9722 }
9723 else if (TARGET_SHCOMPACT)
9724 {
9725 error ("attribute interrupt_handler is not compatible with -m5-compact");
9726 *no_add_attrs = true;
9727 }
9728
9729 return NULL_TREE;
9730 }
9731
9732 /* Handle an 'function_vector' attribute; arguments as in
9733 struct attribute_spec.handler. */
9734 static tree
9735 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9736 tree args ATTRIBUTE_UNUSED,
9737 int flags ATTRIBUTE_UNUSED,
9738 bool * no_add_attrs)
9739 {
9740 if (!TARGET_SH2A)
9741 {
9742 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9743 name);
9744 *no_add_attrs = true;
9745 }
9746 else if (TREE_CODE (*node) != FUNCTION_DECL)
9747 {
9748 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9749 name);
9750 *no_add_attrs = true;
9751 }
9752 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9753 {
9754 /* The argument must be a constant integer. */
9755 warning (OPT_Wattributes,
9756 "%qE attribute argument not an integer constant",
9757 name);
9758 *no_add_attrs = true;
9759 }
9760 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9761 {
9762 /* The argument value must be between 0 to 255. */
9763 warning (OPT_Wattributes,
9764 "%qE attribute argument should be between 0 to 255",
9765 name);
9766 *no_add_attrs = true;
9767 }
9768 return NULL_TREE;
9769 }
9770
9771 /* Returns true if current function has been assigned the attribute
9772 'function_vector'. */
9773 bool
9774 sh2a_is_function_vector_call (rtx x)
9775 {
9776 if (GET_CODE (x) == SYMBOL_REF
9777 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9778 {
9779 tree tr = SYMBOL_REF_DECL (x);
9780
9781 if (sh2a_function_vector_p (tr))
9782 return true;
9783 }
9784
9785 return false;
9786 }
9787
9788 /* Returns the function vector number, if the attribute
9789 'function_vector' is assigned, otherwise returns zero. */
9790 int
9791 sh2a_get_function_vector_number (rtx x)
9792 {
9793 int num;
9794 tree list, t;
9795
9796 if ((GET_CODE (x) == SYMBOL_REF)
9797 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9798 {
9799 t = SYMBOL_REF_DECL (x);
9800
9801 if (TREE_CODE (t) != FUNCTION_DECL)
9802 return 0;
9803
9804 list = SH_ATTRIBUTES (t);
9805 while (list)
9806 {
9807 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9808 {
9809 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9810 return num;
9811 }
9812
9813 list = TREE_CHAIN (list);
9814 }
9815
9816 return 0;
9817 }
9818 else
9819 return 0;
9820 }
9821
9822 /* Handle an "sp_switch" attribute; arguments as in
9823 struct attribute_spec.handler. */
9824 static tree
9825 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9827 {
9828 if (TREE_CODE (*node) != FUNCTION_DECL)
9829 {
9830 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9831 name);
9832 *no_add_attrs = true;
9833 }
9834 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9835 {
9836 /* The argument must be a constant string. */
9837 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9838 name);
9839 *no_add_attrs = true;
9840 }
9841
9842 return NULL_TREE;
9843 }
9844
9845 /* Handle an "trap_exit" attribute; arguments as in
9846 struct attribute_spec.handler. */
9847 static tree
9848 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9849 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9850 {
9851 if (TREE_CODE (*node) != FUNCTION_DECL)
9852 {
9853 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9854 name);
9855 *no_add_attrs = true;
9856 }
9857 /* The argument specifies a trap number to be used in a trapa instruction
9858 at function exit (instead of an rte instruction). */
9859 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9860 {
9861 /* The argument must be a constant integer. */
9862 warning (OPT_Wattributes, "%qE attribute argument not an "
9863 "integer constant", name);
9864 *no_add_attrs = true;
9865 }
9866
9867 return NULL_TREE;
9868 }
9869
9870 static tree
9871 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9872 tree name ATTRIBUTE_UNUSED,
9873 tree args ATTRIBUTE_UNUSED,
9874 int flags ATTRIBUTE_UNUSED,
9875 bool *no_add_attrs ATTRIBUTE_UNUSED)
9876 {
9877 return NULL_TREE;
9878 }
9879
9880 /* True if __attribute__((renesas)) or -mrenesas. */
9881 bool
9882 sh_attr_renesas_p (const_tree td)
9883 {
9884 if (TARGET_HITACHI)
9885 return true;
9886 if (td == NULL_TREE)
9887 return false;
9888 if (DECL_P (td))
9889 td = TREE_TYPE (td);
9890 if (td == error_mark_node)
9891 return false;
9892 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9893 != NULL_TREE);
9894 }
9895
9896 /* True if __attribute__((renesas)) or -mrenesas, for the current
9897 function. */
9898 bool
9899 sh_cfun_attr_renesas_p (void)
9900 {
9901 return sh_attr_renesas_p (current_function_decl);
9902 }
9903
9904 /* Returns true if the current function has the "interrupt_handler"
9905 attribute set. */
9906 bool
9907 sh_cfun_interrupt_handler_p (void)
9908 {
9909 return (lookup_attribute ("interrupt_handler",
9910 DECL_ATTRIBUTES (current_function_decl))
9911 != NULL_TREE);
9912 }
9913
9914 /* Returns true if FUNC has been assigned the attribute
9915 "function_vector". */
9916 bool
9917 sh2a_function_vector_p (tree func)
9918 {
9919 tree list;
9920 if (TREE_CODE (func) != FUNCTION_DECL)
9921 return false;
9922
9923 list = SH_ATTRIBUTES (func);
9924 while (list)
9925 {
9926 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9927 return true;
9928
9929 list = TREE_CHAIN (list);
9930 }
9931 return false;
9932 }
9933
9934 /* Returns true if given tree has the "resbank" attribute set. */
9935 bool
9936 sh_cfun_resbank_handler_p (void)
9937 {
9938 return ((lookup_attribute ("resbank",
9939 DECL_ATTRIBUTES (current_function_decl))
9940 != NULL_TREE)
9941 && (lookup_attribute ("interrupt_handler",
9942 DECL_ATTRIBUTES (current_function_decl))
9943 != NULL_TREE) && TARGET_SH2A);
9944 }
9945
9946 /* Returns true if the current function has a "trap_exit" attribute set. */
9947 bool
9948 sh_cfun_trap_exit_p (void)
9949 {
9950 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9951 != NULL_TREE;
9952 }
9953
9954 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9955 static const char *
9956 sh_check_pch_target_flags (int old_flags)
9957 {
9958 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9959 | MASK_SH_E | MASK_HARD_SH4
9960 | MASK_FPU_SINGLE | MASK_SH4))
9961 return _("created and used with different architectures / ABIs");
9962 if ((old_flags ^ target_flags) & MASK_HITACHI)
9963 return _("created and used with different ABIs");
9964 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9965 return _("created and used with different endianness");
9966 return NULL;
9967 }
9968 \f
9969 /* Predicates used by the templates. */
9970
9971 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9972 Used only in general_movsrc_operand. */
9973 bool
9974 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9975 {
9976 switch (REGNO (op))
9977 {
9978 case PR_REG:
9979 case MACL_REG:
9980 case MACH_REG:
9981 return true;
9982 }
9983 return false;
9984 }
9985
9986 /* Returns true if OP is a floating point value with value 0.0. */
9987 bool
9988 fp_zero_operand (rtx op)
9989 {
9990 REAL_VALUE_TYPE r;
9991
9992 if (GET_MODE (op) != SFmode)
9993 return false;
9994
9995 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9996 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9997 }
9998
9999 /* Returns true if OP is a floating point value with value 1.0. */
10000 bool
10001 fp_one_operand (rtx op)
10002 {
10003 REAL_VALUE_TYPE r;
10004
10005 if (GET_MODE (op) != SFmode)
10006 return false;
10007
10008 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10009 return REAL_VALUES_EQUAL (r, dconst1);
10010 }
10011
10012 /* Return the TLS type for TLS symbols. */
10013 enum tls_model
10014 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10015 {
10016 if (GET_CODE (op) != SYMBOL_REF)
10017 return TLS_MODEL_NONE;
10018 return SYMBOL_REF_TLS_MODEL (op);
10019 }
10020 \f
10021 /* Return the destination address of a branch. */
10022 static int
10023 branch_dest (rtx branch)
10024 {
10025 rtx dest = SET_SRC (PATTERN (branch));
10026 int dest_uid;
10027
10028 if (GET_CODE (dest) == IF_THEN_ELSE)
10029 dest = XEXP (dest, 1);
10030 dest = XEXP (dest, 0);
10031 dest_uid = INSN_UID (dest);
10032 return INSN_ADDRESSES (dest_uid);
10033 }
10034 \f
10035 /* Return nonzero if REG is not used after INSN.
10036 We assume REG is a reload reg, and therefore does
10037 not live past labels. It may live past calls or jumps though. */
10038 bool
10039 reg_unused_after (rtx reg, rtx_insn *insn)
10040 {
10041 enum rtx_code code;
10042 rtx set;
10043
10044 /* If the reg is set by this instruction, then it is safe for our
10045 case. Disregard the case where this is a store to memory, since
10046 we are checking a register used in the store address. */
10047 set = single_set (insn);
10048 if (set && !MEM_P (SET_DEST (set))
10049 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10050 return true;
10051
10052 while ((insn = NEXT_INSN (insn)))
10053 {
10054 rtx set;
10055 if (!INSN_P (insn))
10056 continue;
10057
10058 code = GET_CODE (insn);
10059
10060 #if 0
10061 /* If this is a label that existed before reload, then the register
10062 is dead here. However, if this is a label added by reorg, then
10063 the register may still be live here. We can't tell the difference,
10064 so we just ignore labels completely. */
10065 if (code == CODE_LABEL)
10066 return 1;
10067 /* else */
10068 #endif
10069
10070 if (code == JUMP_INSN)
10071 return false;
10072
10073 /* If this is a sequence, we must handle them all at once.
10074 We could have for instance a call that sets the target register,
10075 and an insn in a delay slot that uses the register. In this case,
10076 we must return 0. */
10077 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10078 {
10079 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10080 int i;
10081 int retval = 0;
10082
10083 for (i = 0; i < seq->len (); i++)
10084 {
10085 rtx_insn *this_insn = seq->insn (i);
10086 rtx set = single_set (this_insn);
10087
10088 if (CALL_P (this_insn))
10089 code = CALL_INSN;
10090 else if (JUMP_P (this_insn))
10091 {
10092 if (INSN_ANNULLED_BRANCH_P (this_insn))
10093 return false;
10094 code = JUMP_INSN;
10095 }
10096
10097 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10098 return false;
10099 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10100 {
10101 if (!MEM_P (SET_DEST (set)))
10102 retval = true;
10103 else
10104 return false;
10105 }
10106 if (set == NULL_RTX
10107 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10108 return false;
10109 }
10110 if (retval == 1)
10111 return true;
10112 else if (code == JUMP_INSN)
10113 return false;
10114 }
10115
10116 set = single_set (insn);
10117 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10118 return false;
10119 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10120 return !MEM_P (SET_DEST (set));
10121 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10122 return false;
10123
10124 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10125 return true;
10126 }
10127 return true;
10128 }
10129 \f
10130
10131 static GTY(()) rtx t_reg_rtx;
10132 rtx
10133 get_t_reg_rtx (void)
10134 {
10135 if (! t_reg_rtx)
10136 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10137 return t_reg_rtx;
10138 }
10139
10140 static GTY(()) tree fpscr_values;
10141
10142 static void
10143 emit_fpu_switch (rtx scratch, int index)
10144 {
10145 rtx src;
10146
10147 if (fpscr_values == NULL)
10148 {
10149 tree t;
10150
10151 t = build_index_type (integer_one_node);
10152 t = build_array_type (integer_type_node, t);
10153 t = build_decl (BUILTINS_LOCATION,
10154 VAR_DECL, get_identifier ("__fpscr_values"), t);
10155 DECL_ARTIFICIAL (t) = 1;
10156 DECL_IGNORED_P (t) = 1;
10157 DECL_EXTERNAL (t) = 1;
10158 TREE_STATIC (t) = 1;
10159 TREE_PUBLIC (t) = 1;
10160 TREE_USED (t) = 1;
10161
10162 fpscr_values = t;
10163 }
10164
10165 src = DECL_RTL (fpscr_values);
10166 if (!can_create_pseudo_p ())
10167 {
10168 emit_move_insn (scratch, XEXP (src, 0));
10169 if (index != 0)
10170 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10171 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10172 }
10173 else
10174 src = adjust_address (src, SImode, index * 4);
10175
10176 emit_insn (gen_lds_fpscr (src));
10177 }
10178 \f
10179 static rtx get_free_reg (HARD_REG_SET);
10180
10181 /* This function returns a register to use to load the address to load
10182 the fpscr from. Currently it always returns r1 or r7, but when we are
10183 able to use pseudo registers after combine, or have a better mechanism
10184 for choosing a register, it should be done here. */
10185 /* REGS_LIVE is the liveness information for the point for which we
10186 need this allocation. In some bare-bones exit blocks, r1 is live at the
10187 start. We can even have all of r0..r3 being live:
10188 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10189 INSN before which new insns are placed with will clobber the register
10190 we return. If a basic block consists only of setting the return value
10191 register to a pseudo and using that register, the return value is not
10192 live before or after this block, yet we we'll insert our insns right in
10193 the middle. */
10194 static rtx
10195 get_free_reg (HARD_REG_SET regs_live)
10196 {
10197 if (! TEST_HARD_REG_BIT (regs_live, 1))
10198 return gen_rtx_REG (Pmode, 1);
10199
10200 /* Hard reg 1 is live; since this is a small register classes target,
10201 there shouldn't be anything but a jump before the function end. */
10202 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10203 return gen_rtx_REG (Pmode, 7);
10204 }
10205
10206 /* This function will set the fpscr from memory.
10207 MODE is the mode we are setting it to. */
10208 void
10209 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10210 {
10211 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10212 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10213 rtx addr_reg;
10214
10215 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10216 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10217 }
10218
10219 /* Is the given character a logical line separator for the assembler? */
10220 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10221 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10222 #endif
10223
10224 static bool
10225 sequence_insn_p (rtx_insn *insn)
10226 {
10227 rtx_insn *prev, *next;
10228
10229 prev = PREV_INSN (insn);
10230 if (prev == NULL)
10231 return false;
10232
10233 next = NEXT_INSN (prev);
10234 if (next == NULL)
10235 return false;
10236
10237 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10238 }
10239
10240 int
10241 sh_insn_length_adjustment (rtx_insn *insn)
10242 {
10243 /* Instructions with unfilled delay slots take up an extra two bytes for
10244 the nop in the delay slot. */
10245 if (((NONJUMP_INSN_P (insn)
10246 && GET_CODE (PATTERN (insn)) != USE
10247 && GET_CODE (PATTERN (insn)) != CLOBBER)
10248 || CALL_P (insn) || JUMP_P (insn))
10249 && ! sequence_insn_p (insn)
10250 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10251 return 2;
10252
10253 /* Increase the insn length of a cbranch without a delay slot insn to
10254 force a delay slot which will be stuffed with a nop. */
10255 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10256 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10257 && ! sequence_insn_p (insn))
10258 return 2;
10259
10260 /* sh-dsp parallel processing insn take four bytes instead of two. */
10261
10262 if (NONJUMP_INSN_P (insn))
10263 {
10264 int sum = 0;
10265 rtx body = PATTERN (insn);
10266 const char *templ;
10267 char c;
10268 bool maybe_label = true;
10269
10270 if (GET_CODE (body) == ASM_INPUT)
10271 templ = XSTR (body, 0);
10272 else if (asm_noperands (body) >= 0)
10273 templ
10274 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10275 else
10276 return 0;
10277 do
10278 {
10279 int ppi_adjust = 0;
10280
10281 do
10282 c = *templ++;
10283 while (c == ' ' || c == '\t');
10284 /* all sh-dsp parallel-processing insns start with p.
10285 The only non-ppi sh insn starting with p is pref.
10286 The only ppi starting with pr is prnd. */
10287 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10288 ppi_adjust = 2;
10289 /* The repeat pseudo-insn expands two three insns, a total of
10290 six bytes in size. */
10291 else if ((c == 'r' || c == 'R')
10292 && ! strncasecmp ("epeat", templ, 5))
10293 ppi_adjust = 4;
10294 while (c && c != '\n'
10295 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10296 {
10297 /* If this is a label, it is obviously not a ppi insn. */
10298 if (c == ':' && maybe_label)
10299 {
10300 ppi_adjust = 0;
10301 break;
10302 }
10303 else if (c == '\'' || c == '"')
10304 maybe_label = false;
10305 c = *templ++;
10306 }
10307 sum += ppi_adjust;
10308 maybe_label = c != ':';
10309 }
10310 while (c);
10311 return sum;
10312 }
10313 return 0;
10314 }
10315 \f
10316 /* Return TRUE for a valid displacement for the REG+disp addressing
10317 with MODE. */
10318 bool
10319 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10320 bool allow_zero)
10321 {
10322 if (! CONST_INT_P (op))
10323 return false;
10324
10325 if (TARGET_SHMEDIA)
10326 {
10327 int size;
10328
10329 /* Check if this is the address of an unaligned load / store. */
10330 if (mode == VOIDmode)
10331 return satisfies_constraint_I06 (op);
10332
10333 size = GET_MODE_SIZE (mode);
10334 return (!(INTVAL (op) & (size - 1))
10335 && INTVAL (op) >= -512 * size
10336 && INTVAL (op) < 512 * size);
10337 }
10338 else
10339 {
10340 const HOST_WIDE_INT offset = INTVAL (op);
10341 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10342 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10343
10344 /* If the mode does not support any displacement always return false.
10345 Even though an index of '0' is actually always valid, it will cause
10346 troubles when e.g. a DFmode move is split into two SFmode moves,
10347 where one SFmode move will have index '0' and the other move will
10348 have index '4'. */
10349 if (!allow_zero && max_disp < 1)
10350 return false;
10351
10352 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10353 }
10354 }
10355
10356 /* Recognize an RTL expression that is a valid memory address for
10357 an instruction.
10358 The MODE argument is the machine mode for the MEM expression
10359 that wants to use this address.
10360 Allow REG
10361 REG+disp
10362 REG+r0
10363 REG++
10364 --REG
10365 GBR
10366 GBR+disp */
10367 static bool
10368 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10369 {
10370 if (! ALLOW_INDEXED_ADDRESS
10371 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10372 return false;
10373
10374 if (REG_P (x) && REGNO (x) == GBR_REG)
10375 return true;
10376
10377 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10378 return true;
10379 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10380 && ! TARGET_SHMEDIA
10381 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10382 return true;
10383 else if (GET_CODE (x) == PLUS)
10384 {
10385 rtx xop0 = XEXP (x, 0);
10386 rtx xop1 = XEXP (x, 1);
10387
10388 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10389 return gbr_displacement (xop1, mode);
10390
10391 if (GET_MODE_SIZE (mode) <= 8
10392 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10393 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10394 return true;
10395
10396 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10397 || ((xop0 == stack_pointer_rtx
10398 || xop0 == hard_frame_pointer_rtx)
10399 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10400 || ((xop1 == stack_pointer_rtx
10401 || xop1 == hard_frame_pointer_rtx)
10402 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10403 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10404 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10405 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10406 && TARGET_FMOVD && mode == DFmode)))
10407 {
10408 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10409 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10410 return true;
10411 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10412 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10413 return true;
10414 }
10415 }
10416
10417 return false;
10418 }
10419 \f
10420 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10421 isn't protected by a PIC unspec. */
10422 bool
10423 nonpic_symbol_mentioned_p (rtx x)
10424 {
10425 const char *fmt;
10426 int i;
10427
10428 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10429 || GET_CODE (x) == PC)
10430 return true;
10431
10432 /* We don't want to look into the possible MEM location of a
10433 CONST_DOUBLE, since we're not going to use it, in general. */
10434 if (GET_CODE (x) == CONST_DOUBLE)
10435 return false;
10436
10437 if (GET_CODE (x) == UNSPEC
10438 && (XINT (x, 1) == UNSPEC_PIC
10439 || XINT (x, 1) == UNSPEC_GOT
10440 || XINT (x, 1) == UNSPEC_GOTOFF
10441 || XINT (x, 1) == UNSPEC_GOTPLT
10442 || XINT (x, 1) == UNSPEC_GOTTPOFF
10443 || XINT (x, 1) == UNSPEC_DTPOFF
10444 || XINT (x, 1) == UNSPEC_TPOFF
10445 || XINT (x, 1) == UNSPEC_PLT
10446 || XINT (x, 1) == UNSPEC_PCREL
10447 || XINT (x, 1) == UNSPEC_SYMOFF
10448 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10449 return false;
10450
10451 fmt = GET_RTX_FORMAT (GET_CODE (x));
10452 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10453 {
10454 if (fmt[i] == 'E')
10455 {
10456 int j;
10457 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10458 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10459 return true;
10460 }
10461 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10462 return true;
10463 }
10464
10465 return false;
10466 }
10467
10468 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10469 @GOTOFF in `reg'. */
10470 rtx
10471 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10472 rtx reg)
10473 {
10474 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10475 return orig;
10476
10477 if (GET_CODE (orig) == LABEL_REF
10478 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10479 {
10480 if (reg == NULL_RTX)
10481 reg = gen_reg_rtx (Pmode);
10482
10483 emit_insn (gen_symGOTOFF2reg (reg, orig));
10484 return reg;
10485 }
10486 else if (GET_CODE (orig) == SYMBOL_REF)
10487 {
10488 if (reg == NULL_RTX)
10489 reg = gen_reg_rtx (Pmode);
10490
10491 emit_insn (gen_symGOT2reg (reg, orig));
10492 return reg;
10493 }
10494 return orig;
10495 }
10496
10497 /* Given a (logical) mode size and an offset in bytes, try to find a the
10498 appropriate displacement value for a mov insn. On SH the displacements
10499 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10500 15 bytes in QImode. To compensate this we create a new base address by
10501 adding an adjustment value to it.
10502
10503 If the originally requested offset is greater than 127 we prefer using
10504 values 124..127 over 128..131 to increase opportunities to use the
10505 add #imm, Rn insn.
10506
10507 In some cases it is possible that a requested offset might seem unaligned
10508 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10509 This is compensated by adjusting the base address so that the effective
10510 address of the displacement move insn will be aligned.
10511
10512 This is not the best possible way of rebasing the base address, as it
10513 does not look at other present displacement addressings around it.
10514 In some cases this can create more base address adjustments than would
10515 actually be necessary. */
10516 struct disp_adjust
10517 {
10518 rtx offset_adjust;
10519 rtx mov_disp;
10520 };
10521
10522 static struct disp_adjust
10523 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10524 {
10525 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10526
10527 /* Do not try to use SH2A's large displacements here, because this would
10528 effectively disable the small displacement insns. */
10529 const int mode_sz = GET_MODE_SIZE (mode);
10530 const int mov_insn_sz = mov_insn_size (mode, false);
10531 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10532 const int max_disp_next = max_disp + mov_insn_sz;
10533 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10534 HOST_WIDE_INT offset_adjust;
10535
10536 /* In some cases this actually does happen and we must check for it. */
10537 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10538 return res;
10539
10540 /* Keeps the previous behavior for QImode displacement addressing.
10541 This just decides how the offset is re-based. Removing this special
10542 case will result in slightly bigger code on average, but it's not that
10543 bad actually. */
10544 if (mov_insn_sz == 1)
10545 align_modifier = 0;
10546
10547 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10548
10549 if (mode_sz + offset - offset_adjust <= max_disp_next)
10550 {
10551 res.offset_adjust = GEN_INT (offset_adjust);
10552 res.mov_disp = GEN_INT (offset - offset_adjust);
10553 }
10554
10555 return res;
10556 }
10557
10558 /* Try to modify an illegitimate address and make it legitimate.
10559 If we find one, return the new, valid address.
10560 Otherwise, return the original address. */
10561 static rtx
10562 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10563 {
10564 if (flag_pic)
10565 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10566
10567 if (TARGET_SHMEDIA)
10568 return x;
10569
10570 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10571 || (TARGET_SH2E && mode == SFmode))
10572 return x;
10573
10574 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10575 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10576 {
10577 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10578 INTVAL (XEXP (x, 1)));
10579
10580 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10581 {
10582 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10583 adj.offset_adjust, NULL_RTX, 0,
10584 OPTAB_LIB_WIDEN);
10585 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10586 }
10587 }
10588 return x;
10589 }
10590
10591 /* Attempt to replace *p, which is an address that needs reloading, with
10592 a valid memory address for an operand of mode MODE.
10593 Like for sh_legitimize_address, for the SH we try to get a normal form
10594 of the address. That will allow inheritance of the address reloads. */
10595 bool
10596 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10597 int itype)
10598 {
10599 enum reload_type type = (enum reload_type) itype;
10600 const int mode_sz = GET_MODE_SIZE (mode);
10601
10602 if (sh_lra_p ())
10603 return false;
10604
10605 if (! ALLOW_INDEXED_ADDRESS
10606 && GET_CODE (*p) == PLUS
10607 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10608 {
10609 *p = copy_rtx (*p);
10610 push_reload (*p, NULL_RTX, p, NULL,
10611 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10612 return true;
10613 }
10614
10615 if (! ALLOW_INDEXED_ADDRESS
10616 && GET_CODE (*p) == PLUS
10617 && GET_CODE (XEXP (*p, 0)) == PLUS)
10618 {
10619 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10620 XEXP (XEXP (*p, 0), 1));
10621 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10622 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10623 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10624 return true;
10625 }
10626
10627 if (TARGET_SHMEDIA)
10628 return false;
10629
10630 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10631 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10632 && (ALLOW_INDEXED_ADDRESS
10633 || XEXP (*p, 0) == stack_pointer_rtx
10634 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10635 {
10636 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10637 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10638
10639 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10640 {
10641 push_reload (*p, NULL_RTX, p, NULL,
10642 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10643 return true;
10644 }
10645
10646 if (TARGET_SH2E && mode == SFmode)
10647 {
10648 *p = copy_rtx (*p);
10649 push_reload (*p, NULL_RTX, p, NULL,
10650 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10651 return true;
10652 }
10653
10654 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10655 moves because then reload has a problem figuring the constraint
10656 that the move insn target/source reg must be R0.
10657 Or maybe some handling is wrong in sh_secondary_reload for this
10658 to work properly? */
10659 if ((mode_sz == 4 || mode_sz == 8)
10660 && ! (TARGET_SH4 && mode == DFmode)
10661 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10662 {
10663 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10664 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10665 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10666 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10667 return true;
10668 }
10669 }
10670
10671 /* We must re-recognize what we created before. */
10672 if (GET_CODE (*p) == PLUS
10673 && (mode_sz == 4 || mode_sz == 8)
10674 && GET_CODE (XEXP (*p, 0)) == PLUS
10675 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10676 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10677 && CONST_INT_P (XEXP (*p, 1))
10678 && ! (TARGET_SH2E && mode == SFmode))
10679 {
10680 /* Because this address is so complex, we know it must have
10681 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10682 it is already unshared, and needs no further unsharing. */
10683 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10684 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10685 return true;
10686 }
10687
10688 return false;
10689 }
10690
10691 /* In the name of slightly smaller debug output, and to cater to
10692 general assembler lossage, recognize various UNSPEC sequences
10693 and turn them back into a direct symbol reference. */
10694 static rtx
10695 sh_delegitimize_address (rtx orig_x)
10696 {
10697 rtx x, y;
10698
10699 orig_x = delegitimize_mem_from_attrs (orig_x);
10700
10701 x = orig_x;
10702 if (MEM_P (x))
10703 x = XEXP (x, 0);
10704 if (GET_CODE (x) == CONST)
10705 {
10706 y = XEXP (x, 0);
10707 if (GET_CODE (y) == UNSPEC)
10708 {
10709 if (XINT (y, 1) == UNSPEC_GOT
10710 || XINT (y, 1) == UNSPEC_GOTOFF
10711 || XINT (y, 1) == UNSPEC_SYMOFF)
10712 return XVECEXP (y, 0, 0);
10713 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10714 {
10715 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10716 {
10717 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10718
10719 if (GET_CODE (symplt) == UNSPEC
10720 && (XINT (symplt, 1) == UNSPEC_PLT
10721 || XINT (symplt, 1) == UNSPEC_PCREL))
10722 return XVECEXP (symplt, 0, 0);
10723 }
10724 }
10725 else if (TARGET_SHMEDIA
10726 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10727 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10728 {
10729 rtx offset = XVECEXP (y, 0, 1);
10730
10731 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10732 if (MEM_P (orig_x))
10733 x = replace_equiv_address_nv (orig_x, x);
10734 return x;
10735 }
10736 }
10737 }
10738
10739 return orig_x;
10740 }
10741
10742 /* Mark the use of a constant in the literal table. If the constant
10743 has multiple labels, make it unique. */
10744 static rtx
10745 mark_constant_pool_use (rtx x)
10746 {
10747 rtx_insn *insn, *lab;
10748 rtx pattern;
10749
10750 if (x == NULL_RTX)
10751 return x;
10752
10753 switch (GET_CODE (x))
10754 {
10755 case LABEL_REF:
10756 x = XEXP (x, 0);
10757 case CODE_LABEL:
10758 break;
10759 default:
10760 return x;
10761 }
10762
10763 /* Get the first label in the list of labels for the same constant
10764 and delete another labels in the list. */
10765 lab = as_a <rtx_insn *> (x);
10766 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10767 {
10768 if (!LABEL_P (insn)
10769 || LABEL_REFS (insn) != NEXT_INSN (insn))
10770 break;
10771 lab = insn;
10772 }
10773
10774 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10775 as_a<rtx_insn *> (insn)->set_deleted ();
10776
10777 /* Mark constants in a window. */
10778 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10779 {
10780 if (!NONJUMP_INSN_P (insn))
10781 continue;
10782
10783 pattern = PATTERN (insn);
10784 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10785 continue;
10786
10787 switch (XINT (pattern, 1))
10788 {
10789 case UNSPECV_CONST2:
10790 case UNSPECV_CONST4:
10791 case UNSPECV_CONST8:
10792 XVECEXP (pattern, 0, 1) = const1_rtx;
10793 break;
10794 case UNSPECV_WINDOW_END:
10795 if (XVECEXP (pattern, 0, 0) == x)
10796 return lab;
10797 break;
10798 case UNSPECV_CONST_END:
10799 return lab;
10800 default:
10801 break;
10802 }
10803 }
10804
10805 return lab;
10806 }
10807 \f
10808 /* Return true if it's possible to redirect BRANCH1 to the destination
10809 of an unconditional jump BRANCH2. We only want to do this if the
10810 resulting branch will have a short displacement. */
10811 static bool
10812 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10813 {
10814 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10815 hot and cold partitions. */
10816 if (TARGET_SH1
10817 && flag_reorder_blocks_and_partition
10818 && simplejump_p (branch2)
10819 && CROSSING_JUMP_P (branch2))
10820 return false;
10821
10822 if (flag_expensive_optimizations && simplejump_p (branch2))
10823 {
10824 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10825 rtx_insn *insn;
10826 int distance;
10827
10828 for (distance = 0, insn = NEXT_INSN (branch1);
10829 insn && distance < 256;
10830 insn = PREV_INSN (insn))
10831 {
10832 if (insn == dest)
10833 return true;
10834 else
10835 distance += get_attr_length (insn);
10836 }
10837 for (distance = 0, insn = NEXT_INSN (branch1);
10838 insn && distance < 256;
10839 insn = NEXT_INSN (insn))
10840 {
10841 if (insn == dest)
10842 return true;
10843 else
10844 distance += get_attr_length (insn);
10845 }
10846 }
10847 return false;
10848 }
10849
10850 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10851 bool
10852 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10853 unsigned int new_reg)
10854 {
10855 /* Interrupt functions can only use registers that have already been
10856 saved by the prologue, even if they would normally be
10857 call-clobbered. */
10858 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10859 return false;
10860
10861 return true;
10862 }
10863
10864 /* Function to update the integer COST
10865 based on the relationship between INSN that is dependent on
10866 DEP_INSN through the dependence LINK. The default is to make no
10867 adjustment to COST. This can be used for example to specify to
10868 the scheduler that an output- or anti-dependence does not incur
10869 the same cost as a data-dependence. The return value should be
10870 the new value for COST. */
10871 static int
10872 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10873 rtx_insn *dep_insn, int cost)
10874 {
10875 rtx reg, use_pat;
10876
10877 if (TARGET_SHMEDIA)
10878 {
10879 /* On SHmedia, if the dependence is an anti-dependence or
10880 output-dependence, there is no cost. */
10881 if (REG_NOTE_KIND (link) != 0)
10882 {
10883 /* However, dependencies between target register loads and
10884 uses of the register in a subsequent block that are separated
10885 by a conditional branch are not modelled - we have to do with
10886 the anti-dependency between the target register load and the
10887 conditional branch that ends the current block. */
10888 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10889 && GET_CODE (PATTERN (dep_insn)) == SET
10890 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10891 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10892 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10893 {
10894 int orig_cost = cost;
10895 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10896 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10897 ? insn : JUMP_LABEL (insn));
10898 /* On the likely path, the branch costs 1, on the unlikely path,
10899 it costs 3. */
10900 cost--;
10901 do
10902 target = next_active_insn (target);
10903 while (target && ! flow_dependent_p (target, dep_insn)
10904 && --cost > 0);
10905 /* If two branches are executed in immediate succession, with the
10906 first branch properly predicted, this causes a stall at the
10907 second branch, hence we won't need the target for the
10908 second branch for two cycles after the launch of the first
10909 branch. */
10910 if (cost > orig_cost - 2)
10911 cost = orig_cost - 2;
10912 }
10913 else
10914 cost = 0;
10915 }
10916
10917 else if (get_attr_is_mac_media (insn)
10918 && get_attr_is_mac_media (dep_insn))
10919 cost = 1;
10920
10921 else if (! reload_completed
10922 && GET_CODE (PATTERN (insn)) == SET
10923 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10924 && GET_CODE (PATTERN (dep_insn)) == SET
10925 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10926 && cost < 4)
10927 cost = 4;
10928 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10929 that is needed at the target. */
10930 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10931 && ! flow_dependent_p (insn, dep_insn))
10932 cost--;
10933 }
10934 else if (REG_NOTE_KIND (link) == 0)
10935 {
10936 enum attr_type type;
10937 rtx dep_set;
10938
10939 if (recog_memoized (insn) < 0
10940 || recog_memoized (dep_insn) < 0)
10941 return cost;
10942
10943 dep_set = single_set (dep_insn);
10944
10945 /* The latency that we specify in the scheduling description refers
10946 to the actual output, not to an auto-increment register; for that,
10947 the latency is one. */
10948 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10949 {
10950 rtx set = single_set (insn);
10951
10952 if (set
10953 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10954 && (!MEM_P (SET_DEST (set))
10955 || !reg_mentioned_p (SET_DEST (dep_set),
10956 XEXP (SET_DEST (set), 0))))
10957 cost = 1;
10958 }
10959 /* The only input for a call that is timing-critical is the
10960 function's address. */
10961 if (CALL_P (insn))
10962 {
10963 rtx call = get_call_rtx_from (insn);
10964 if (call
10965 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10966 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10967 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10968 cost -= TARGET_SH4_300 ? 3 : 6;
10969 }
10970 /* Likewise, the most timing critical input for an sfuncs call
10971 is the function address. However, sfuncs typically start
10972 using their arguments pretty quickly.
10973 Assume a four cycle delay for SH4 before they are needed.
10974 Cached ST40-300 calls are quicker, so assume only a one
10975 cycle delay there.
10976 ??? Maybe we should encode the delays till input registers
10977 are needed by sfuncs into the sfunc call insn. */
10978 /* All sfunc calls are parallels with at least four components.
10979 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10980 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10981 && XVECLEN (PATTERN (insn), 0) >= 4
10982 && (reg = sfunc_uses_reg (insn)))
10983 {
10984 if (! reg_set_p (reg, dep_insn))
10985 cost -= TARGET_SH4_300 ? 1 : 4;
10986 }
10987 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10988 {
10989 enum attr_type dep_type = get_attr_type (dep_insn);
10990
10991 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10992 cost--;
10993 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10994 && (type = get_attr_type (insn)) != TYPE_CALL
10995 && type != TYPE_SFUNC)
10996 cost--;
10997 /* When the preceding instruction loads the shift amount of
10998 the following SHAD/SHLD, the latency of the load is increased
10999 by 1 cycle. */
11000 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11001 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11002 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11003 XEXP (SET_SRC (single_set (insn)),
11004 1)))
11005 cost++;
11006 /* When an LS group instruction with a latency of less than
11007 3 cycles is followed by a double-precision floating-point
11008 instruction, FIPR, or FTRV, the latency of the first
11009 instruction is increased to 3 cycles. */
11010 else if (cost < 3
11011 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11012 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11013 cost = 3;
11014 /* The lsw register of a double-precision computation is ready one
11015 cycle earlier. */
11016 else if (reload_completed
11017 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11018 && (use_pat = single_set (insn))
11019 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11020 SET_SRC (use_pat)))
11021 cost -= 1;
11022
11023 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11024 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11025 cost -= 1;
11026 }
11027 else if (TARGET_SH4_300)
11028 {
11029 /* Stores need their input register two cycles later. */
11030 if (dep_set && cost >= 1
11031 && ((type = get_attr_type (insn)) == TYPE_STORE
11032 || type == TYPE_PSTORE
11033 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11034 {
11035 rtx set = single_set (insn);
11036
11037 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11038 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11039 {
11040 cost -= 2;
11041 /* But don't reduce the cost below 1 if the address depends
11042 on a side effect of dep_insn. */
11043 if (cost < 1
11044 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11045 cost = 1;
11046 }
11047 }
11048 }
11049 }
11050 /* An anti-dependence penalty of two applies if the first insn is a double
11051 precision fadd / fsub / fmul. */
11052 else if (!TARGET_SH4_300
11053 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11054 && recog_memoized (dep_insn) >= 0
11055 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11056 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11057 /* A lot of alleged anti-flow dependences are fake,
11058 so check this one is real. */
11059 && flow_dependent_p (dep_insn, insn))
11060 cost = 2;
11061
11062 return cost;
11063 }
11064
11065 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11066 if DEP_INSN is anti-flow dependent on INSN. */
11067 static bool
11068 flow_dependent_p (rtx insn, rtx dep_insn)
11069 {
11070 rtx tmp = PATTERN (insn);
11071
11072 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11073 return tmp == NULL_RTX;
11074 }
11075
11076 /* A helper function for flow_dependent_p called through note_stores. */
11077 static void
11078 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11079 {
11080 rtx * pinsn = (rtx *) data;
11081
11082 if (*pinsn && reg_referenced_p (x, *pinsn))
11083 *pinsn = NULL_RTX;
11084 }
11085
11086 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11087 'special function' patterns (type sfunc) that clobber pr, but that
11088 do not look like function calls to leaf_function_p. Hence we must
11089 do this extra check. */
11090 static int
11091 sh_pr_n_sets (void)
11092 {
11093 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11094 }
11095
11096 /* Return where to allocate pseudo for a given hard register initial
11097 value. */
11098 static rtx
11099 sh_allocate_initial_value (rtx hard_reg)
11100 {
11101 rtx x;
11102
11103 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11104 {
11105 if (crtl->is_leaf
11106 && ! sh_pr_n_sets ()
11107 && ! (TARGET_SHCOMPACT
11108 && ((crtl->args.info.call_cookie
11109 & ~ CALL_COOKIE_RET_TRAMP (1))
11110 || crtl->saves_all_registers)))
11111 x = hard_reg;
11112 else
11113 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11114 }
11115 else
11116 x = NULL_RTX;
11117
11118 return x;
11119 }
11120
11121 /* This function returns "2" to indicate dual issue for the SH4
11122 processor. To be used by the DFA pipeline description. */
11123 static int
11124 sh_issue_rate (void)
11125 {
11126 if (TARGET_SUPERSCALAR)
11127 return 2;
11128 else
11129 return 1;
11130 }
11131
11132 /* Functions for ready queue reordering for sched1. */
11133
11134 /* Get weight for mode for a set x. */
11135 static short
11136 find_set_regmode_weight (rtx x, machine_mode mode)
11137 {
11138 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11139 return 1;
11140 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11141 {
11142 if (REG_P (SET_DEST (x)))
11143 {
11144 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11145 return 1;
11146 else
11147 return 0;
11148 }
11149 return 1;
11150 }
11151 return 0;
11152 }
11153
11154 /* Get regmode weight for insn. */
11155 static short
11156 find_insn_regmode_weight (rtx insn, machine_mode mode)
11157 {
11158 short reg_weight = 0;
11159 rtx x;
11160
11161 /* Increment weight for each register born here. */
11162 x = PATTERN (insn);
11163 reg_weight += find_set_regmode_weight (x, mode);
11164 if (GET_CODE (x) == PARALLEL)
11165 {
11166 int j;
11167 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11168 {
11169 x = XVECEXP (PATTERN (insn), 0, j);
11170 reg_weight += find_set_regmode_weight (x, mode);
11171 }
11172 }
11173 /* Decrement weight for each register that dies here. */
11174 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11175 {
11176 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11177 {
11178 rtx note = XEXP (x, 0);
11179 if (REG_P (note) && GET_MODE (note) == mode)
11180 reg_weight--;
11181 }
11182 }
11183 return reg_weight;
11184 }
11185
11186 /* Calculate regmode weights for all insns of a basic block. */
11187 static void
11188 find_regmode_weight (basic_block b, machine_mode mode)
11189 {
11190 rtx_insn *insn, *next_tail, *head, *tail;
11191
11192 get_ebb_head_tail (b, b, &head, &tail);
11193 next_tail = NEXT_INSN (tail);
11194
11195 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11196 {
11197 /* Handle register life information. */
11198 if (!INSN_P (insn))
11199 continue;
11200
11201 if (mode == SFmode)
11202 INSN_REGMODE_WEIGHT (insn, mode) =
11203 find_insn_regmode_weight (insn, mode)
11204 + 2 * find_insn_regmode_weight (insn, DFmode);
11205 else if (mode == SImode)
11206 INSN_REGMODE_WEIGHT (insn, mode) =
11207 find_insn_regmode_weight (insn, mode)
11208 + 2 * find_insn_regmode_weight (insn, DImode);
11209 }
11210 }
11211
11212 /* Comparison function for ready queue sorting. */
11213 static int
11214 rank_for_reorder (const void *x, const void *y)
11215 {
11216 rtx_insn *tmp = *(rtx_insn * const *) y;
11217 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11218
11219 /* The insn in a schedule group should be issued the first. */
11220 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11221 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11222
11223 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11224 minimizes instruction movement, thus minimizing sched's effect on
11225 register pressure. */
11226 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11227 }
11228
11229 /* Resort the array A in which only element at index N may be out of order. */
11230 static void
11231 swap_reorder (rtx_insn **a, int n)
11232 {
11233 rtx_insn *insn = a[n - 1];
11234 int i = n - 2;
11235
11236 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11237 {
11238 a[i + 1] = a[i];
11239 i -= 1;
11240 }
11241 a[i + 1] = insn;
11242 }
11243
11244 /* Sort the ready list by ascending priority. */
11245 static void
11246 ready_reorder (rtx_insn **ready, int nready)
11247 {
11248 if (nready == 2)
11249 swap_reorder (ready, nready);
11250 else if (nready > 2)
11251 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11252 }
11253
11254 /* Count life regions of r0 for a block. */
11255 static int
11256 find_r0_life_regions (basic_block b)
11257 {
11258 rtx_insn *end, *insn;
11259 rtx pset;
11260 rtx r0_reg;
11261 int live;
11262 int set;
11263 int death = 0;
11264
11265 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11266 {
11267 set = 1;
11268 live = 1;
11269 }
11270 else
11271 {
11272 set = 0;
11273 live = 0;
11274 }
11275
11276 insn = BB_HEAD (b);
11277 end = BB_END (b);
11278 r0_reg = gen_rtx_REG (SImode, R0_REG);
11279 while (1)
11280 {
11281 if (INSN_P (insn))
11282 {
11283 if (find_regno_note (insn, REG_DEAD, R0_REG))
11284 {
11285 death++;
11286 live = 0;
11287 }
11288 if (!live
11289 && (pset = single_set (insn))
11290 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11291 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11292 {
11293 set++;
11294 live = 1;
11295 }
11296 }
11297 if (insn == end)
11298 break;
11299 insn = NEXT_INSN (insn);
11300 }
11301 return set - death;
11302 }
11303
11304 /* Calculate regmode weights for all insns of all basic block. */
11305 static void
11306 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11307 int verbose ATTRIBUTE_UNUSED,
11308 int old_max_uid)
11309 {
11310 basic_block b;
11311
11312 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11313 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11314 r0_life_regions = 0;
11315
11316 FOR_EACH_BB_REVERSE_FN (b, cfun)
11317 {
11318 find_regmode_weight (b, SImode);
11319 find_regmode_weight (b, SFmode);
11320 if (!reload_completed)
11321 r0_life_regions += find_r0_life_regions (b);
11322 }
11323
11324 CURR_REGMODE_PRESSURE (SImode) = 0;
11325 CURR_REGMODE_PRESSURE (SFmode) = 0;
11326 }
11327
11328 /* Cleanup. */
11329 static void
11330 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11331 int verbose ATTRIBUTE_UNUSED)
11332 {
11333 if (regmode_weight[0])
11334 {
11335 free (regmode_weight[0]);
11336 regmode_weight[0] = NULL;
11337 }
11338 if (regmode_weight[1])
11339 {
11340 free (regmode_weight[1]);
11341 regmode_weight[1] = NULL;
11342 }
11343 }
11344
11345 /* The scalar modes supported differs from the default version in TImode
11346 for 32-bit SHMEDIA. */
11347 static bool
11348 sh_scalar_mode_supported_p (machine_mode mode)
11349 {
11350 if (TARGET_SHMEDIA32 && mode == TImode)
11351 return false;
11352
11353 return default_scalar_mode_supported_p (mode);
11354 }
11355
11356 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11357 keep count of register pressures on SImode and SFmode. */
11358 static int
11359 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11360 int sched_verbose ATTRIBUTE_UNUSED,
11361 rtx_insn *insn,
11362 int can_issue_more)
11363 {
11364 if (GET_CODE (PATTERN (insn)) != USE
11365 && GET_CODE (PATTERN (insn)) != CLOBBER)
11366 cached_can_issue_more = can_issue_more - 1;
11367 else
11368 cached_can_issue_more = can_issue_more;
11369
11370 if (reload_completed)
11371 return cached_can_issue_more;
11372
11373 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11374 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11375
11376 return cached_can_issue_more;
11377 }
11378
11379 static void
11380 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11381 int verbose ATTRIBUTE_UNUSED,
11382 int veclen ATTRIBUTE_UNUSED)
11383 {
11384 CURR_REGMODE_PRESSURE (SImode) = 0;
11385 CURR_REGMODE_PRESSURE (SFmode) = 0;
11386 }
11387
11388 /* Some magic numbers. */
11389 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11390 functions that already have high pressure on r0. */
11391 #define R0_MAX_LIFE_REGIONS 2
11392 /* Register Pressure thresholds for SImode and SFmode registers. */
11393 #define SIMODE_MAX_WEIGHT 5
11394 #define SFMODE_MAX_WEIGHT 10
11395
11396 /* Return true if the pressure is high for MODE. */
11397 static bool
11398 high_pressure (machine_mode mode)
11399 {
11400 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11401 functions that already have high pressure on r0. */
11402 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11403 return true;
11404
11405 if (mode == SFmode)
11406 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11407 else
11408 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11409 }
11410
11411 /* Reorder ready queue if register pressure is high. */
11412 static int
11413 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11414 int sched_verbose ATTRIBUTE_UNUSED,
11415 rtx_insn **ready,
11416 int *n_readyp,
11417 int clock_var ATTRIBUTE_UNUSED)
11418 {
11419 if (reload_completed)
11420 return sh_issue_rate ();
11421
11422 if (high_pressure (SFmode) || high_pressure (SImode))
11423 {
11424 ready_reorder (ready, *n_readyp);
11425 }
11426
11427 return sh_issue_rate ();
11428 }
11429
11430 /* Skip cycles if the current register pressure is high. */
11431 static int
11432 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11433 int sched_verbose ATTRIBUTE_UNUSED,
11434 rtx_insn **ready ATTRIBUTE_UNUSED,
11435 int *n_readyp ATTRIBUTE_UNUSED,
11436 int clock_var ATTRIBUTE_UNUSED)
11437 {
11438 if (reload_completed)
11439 return cached_can_issue_more;
11440
11441 if (high_pressure(SFmode) || high_pressure (SImode))
11442 skip_cycles = 1;
11443
11444 return cached_can_issue_more;
11445 }
11446
11447 /* Skip cycles without sorting the ready queue. This will move insn from
11448 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11449 queue by sh_reorder. */
11450
11451 /* Generally, skipping these many cycles are sufficient for all insns to move
11452 from Q -> R. */
11453 #define MAX_SKIPS 8
11454
11455 static int
11456 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11457 int sched_verbose ATTRIBUTE_UNUSED,
11458 rtx_insn *insn ATTRIBUTE_UNUSED,
11459 int last_clock_var,
11460 int clock_var,
11461 int *sort_p)
11462 {
11463 if (reload_completed)
11464 return 0;
11465
11466 if (skip_cycles)
11467 {
11468 if ((clock_var - last_clock_var) < MAX_SKIPS)
11469 {
11470 *sort_p = 0;
11471 return 1;
11472 }
11473 /* If this is the last cycle we are skipping, allow reordering of R. */
11474 if ((clock_var - last_clock_var) == MAX_SKIPS)
11475 {
11476 *sort_p = 1;
11477 return 1;
11478 }
11479 }
11480
11481 skip_cycles = 0;
11482
11483 return 0;
11484 }
11485
11486 /* SHmedia requires registers for branches, so we can't generate new
11487 branches past reload. */
11488 static bool
11489 sh_cannot_modify_jumps_p (void)
11490 {
11491 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11492 }
11493
11494 static reg_class_t
11495 sh_target_reg_class (void)
11496 {
11497 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11498 }
11499
11500 static bool
11501 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11502 {
11503 if (! shmedia_space_reserved_for_target_registers)
11504 return 0;
11505 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11506 return 0;
11507
11508 HARD_REG_SET dummy;
11509 if (calc_live_regs (&dummy) >= 6 * 8)
11510 return 1;
11511 return 0;
11512 }
11513
11514 static bool
11515 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11516 {
11517 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11518 }
11519 \f
11520 /*
11521 On the SH1..SH4, the trampoline looks like
11522 2 0002 D202 mov.l l2,r2
11523 1 0000 D301 mov.l l1,r3
11524 3 0004 422B jmp @r2
11525 4 0006 0009 nop
11526 5 0008 00000000 l1: .long area
11527 6 000c 00000000 l2: .long function
11528
11529 SH5 (compact) uses r1 instead of r3 for the static chain. */
11530
11531
11532 /* Emit RTL insns to initialize the variable parts of a trampoline.
11533 FNADDR is an RTX for the address of the function's pure code.
11534 CXT is an RTX for the static chain value for the function. */
11535 static void
11536 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11537 {
11538 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11539 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11540
11541 if (TARGET_SHMEDIA64)
11542 {
11543 rtx tramp_templ;
11544 int fixed_len;
11545
11546 rtx movi1 = GEN_INT (0xcc000010);
11547 rtx shori1 = GEN_INT (0xc8000010);
11548 rtx src, dst;
11549
11550 /* The following trampoline works within a +- 128 KB range for cxt:
11551 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11552 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11553 gettr tr1,r1; blink tr0,r63 */
11554 /* Address rounding makes it hard to compute the exact bounds of the
11555 offset for this trampoline, but we have a rather generous offset
11556 range, so frame_offset should do fine as an upper bound. */
11557 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11558 {
11559 /* ??? could optimize this trampoline initialization
11560 by writing DImode words with two insns each. */
11561 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11562 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11563 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11564 insn = gen_rtx_AND (DImode, insn, mask);
11565 /* Or in ptb/u .,tr1 pattern */
11566 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11567 insn = force_operand (insn, NULL_RTX);
11568 insn = gen_lowpart (SImode, insn);
11569 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11570 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11571 insn = gen_rtx_AND (DImode, insn, mask);
11572 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11573 insn = gen_lowpart (SImode, insn);
11574 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11575 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11576 insn = gen_rtx_AND (DImode, insn, mask);
11577 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11578 insn = gen_lowpart (SImode, insn);
11579 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11580 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11581 insn = gen_rtx_AND (DImode, insn, mask);
11582 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11583 insn = gen_lowpart (SImode, insn);
11584 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11585 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11586 insn = gen_rtx_AND (DImode, insn, mask);
11587 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11588 insn = gen_lowpart (SImode, insn);
11589 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11590 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11591 GEN_INT (0x6bf10600));
11592 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11593 GEN_INT (0x4415fc10));
11594 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11595 GEN_INT (0x4401fff0));
11596 emit_insn (gen_ic_invalidate_line (tramp));
11597 return;
11598 }
11599 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11600 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11601
11602 tramp_templ = gen_datalabel_ref (tramp_templ);
11603 dst = tramp_mem;
11604 src = gen_const_mem (BLKmode, tramp_templ);
11605 set_mem_align (dst, 256);
11606 set_mem_align (src, 64);
11607 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11608
11609 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11610 emit_move_insn (adjust_address (tramp_mem, Pmode,
11611 fixed_len + GET_MODE_SIZE (Pmode)),
11612 cxt);
11613 emit_insn (gen_ic_invalidate_line (tramp));
11614 return;
11615 }
11616 else if (TARGET_SHMEDIA)
11617 {
11618 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11619 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11620 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11621 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11622 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11623 rotated 10 right, and higher 16 bit of every 32 selected. */
11624 rtx movishori
11625 = force_reg (V2HImode, (simplify_gen_subreg
11626 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11627 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11628 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11629
11630 fnaddr = force_reg (SImode, fnaddr);
11631 cxt = force_reg (SImode, cxt);
11632 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11633 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11634 movishori));
11635 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11636 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11637 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11638 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11639 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11640 gen_rtx_SUBREG (V2HImode, cxt, 0),
11641 movishori));
11642 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11643 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11644 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11645 if (TARGET_LITTLE_ENDIAN)
11646 {
11647 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11648 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11649 }
11650 else
11651 {
11652 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11653 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11654 }
11655 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11656 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11657 emit_insn (gen_ic_invalidate_line (tramp));
11658 return;
11659 }
11660 else if (TARGET_SHCOMPACT)
11661 {
11662 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11663 return;
11664 }
11665 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11666 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11667 SImode));
11668 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11669 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11670 SImode));
11671 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11672 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11673 if (TARGET_HARD_SH4 || TARGET_SH5)
11674 {
11675 if (!TARGET_INLINE_IC_INVALIDATE
11676 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11677 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11678 FUNCTION_ORDINARY),
11679 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11680 else
11681 emit_insn (gen_ic_invalidate_line (tramp));
11682 }
11683 }
11684
11685 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11686 static rtx
11687 sh_trampoline_adjust_address (rtx tramp)
11688 {
11689 if (TARGET_SHMEDIA)
11690 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11691 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11692 return tramp;
11693 }
11694
11695 /* FIXME: This is overly conservative. A SHcompact function that
11696 receives arguments ``by reference'' will have them stored in its
11697 own stack frame, so it must not pass pointers or references to
11698 these arguments to other functions by means of sibling calls. */
11699 /* If PIC, we cannot make sibling calls to global functions
11700 because the PLT requires r12 to be live. */
11701 static bool
11702 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11703 {
11704 return (1
11705 && (! TARGET_SHCOMPACT
11706 || crtl->args.info.stack_regs == 0)
11707 && ! sh_cfun_interrupt_handler_p ()
11708 && (! flag_pic
11709 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
11710 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11711 }
11712
11713 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
11714 void
11715 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
11716 {
11717 const_tree decl = SYMBOL_REF_DECL (sym);
11718 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
11719
11720 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
11721 emit_insn (gen_sym_label2reg (reg, sym, lab));
11722 else if (sibcall_p)
11723 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
11724 else
11725 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
11726 }
11727 \f
11728 /* Machine specific built-in functions. */
11729
11730 struct builtin_description
11731 {
11732 bool (* const is_enabled) (void);
11733 const enum insn_code icode;
11734 const char *const name;
11735 int signature;
11736 tree fndecl;
11737 };
11738
11739 static bool
11740 shmedia_builtin_p (void)
11741 {
11742 return TARGET_SHMEDIA;
11743 }
11744
11745 /* This function can be used if there are any built-ins that are not for
11746 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11747 static bool
11748 sh1_builtin_p (void)
11749 {
11750 return TARGET_SH1;
11751 }
11752
11753 /* describe number and signedness of arguments; arg[0] == result
11754 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11755 /* 9: 64-bit pointer, 10: 32-bit pointer */
11756 static const char signature_args[][4] =
11757 {
11758 #define SH_BLTIN_V2SI2 0
11759 { 4, 4 },
11760 #define SH_BLTIN_V4HI2 1
11761 { 4, 4 },
11762 #define SH_BLTIN_V2SI3 2
11763 { 4, 4, 4 },
11764 #define SH_BLTIN_V4HI3 3
11765 { 4, 4, 4 },
11766 #define SH_BLTIN_V8QI3 4
11767 { 4, 4, 4 },
11768 #define SH_BLTIN_MAC_HISI 5
11769 { 1, 4, 4, 1 },
11770 #define SH_BLTIN_SH_HI 6
11771 { 4, 4, 1 },
11772 #define SH_BLTIN_SH_SI 7
11773 { 4, 4, 1 },
11774 #define SH_BLTIN_V4HI2V2SI 8
11775 { 4, 4, 4 },
11776 #define SH_BLTIN_V4HI2V8QI 9
11777 { 4, 4, 4 },
11778 #define SH_BLTIN_SISF 10
11779 { 4, 2 },
11780 #define SH_BLTIN_LDUA_L 11
11781 { 2, 10 },
11782 #define SH_BLTIN_LDUA_Q 12
11783 { 1, 10 },
11784 #define SH_BLTIN_STUA_L 13
11785 { 0, 10, 2 },
11786 #define SH_BLTIN_STUA_Q 14
11787 { 0, 10, 1 },
11788 #define SH_BLTIN_LDUA_L64 15
11789 { 2, 9 },
11790 #define SH_BLTIN_LDUA_Q64 16
11791 { 1, 9 },
11792 #define SH_BLTIN_STUA_L64 17
11793 { 0, 9, 2 },
11794 #define SH_BLTIN_STUA_Q64 18
11795 { 0, 9, 1 },
11796 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11797 #define SH_BLTIN_2 19
11798 #define SH_BLTIN_SU 19
11799 { 1, 2 },
11800 #define SH_BLTIN_3 20
11801 #define SH_BLTIN_SUS 20
11802 { 2, 2, 1 },
11803 #define SH_BLTIN_PSSV 21
11804 { 0, 8, 2, 2 },
11805 #define SH_BLTIN_XXUU 22
11806 #define SH_BLTIN_UUUU 22
11807 { 1, 1, 1, 1 },
11808 #define SH_BLTIN_PV 23
11809 { 0, 8 },
11810 #define SH_BLTIN_VP 24
11811 { 8, 0 },
11812 #define SH_BLTIN_UV 25
11813 { 1, 0 },
11814 #define SH_BLTIN_VU 26
11815 { 0, 1 },
11816 };
11817 /* mcmv: operands considered unsigned. */
11818 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11819 /* mperm: control value considered unsigned int. */
11820 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11821 /* mshards_q: returns signed short. */
11822 /* nsb: takes long long arg, returns unsigned char. */
11823 static struct builtin_description bdesc[] =
11824 {
11825 { shmedia_builtin_p,
11826 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11887 { shmedia_builtin_p,
11888 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11889 { shmedia_builtin_p,
11890 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11891 { shmedia_builtin_p,
11892 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11893 { shmedia_builtin_p,
11894 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11895 { shmedia_builtin_p,
11896 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11897 { shmedia_builtin_p,
11898 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11899 { shmedia_builtin_p,
11900 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11901 { shmedia_builtin_p,
11902 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11903 { shmedia_builtin_p,
11904 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11905 { shmedia_builtin_p,
11906 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11907 { shmedia_builtin_p,
11908 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11909 { shmedia_builtin_p,
11910 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11911 { shmedia_builtin_p,
11912 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11913 { shmedia_builtin_p,
11914 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11915 { shmedia_builtin_p,
11916 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11917 { shmedia_builtin_p,
11918 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11919 { shmedia_builtin_p,
11920 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11921 { shmedia_builtin_p,
11922 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11923 { shmedia_builtin_p,
11924 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11925 { shmedia_builtin_p,
11926 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11927 { shmedia_builtin_p,
11928 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11929 { shmedia_builtin_p,
11930 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11931 { shmedia_builtin_p,
11932 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11933 { shmedia_builtin_p,
11934 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11935 { shmedia_builtin_p,
11936 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11937 { shmedia_builtin_p,
11938 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11939 { shmedia_builtin_p,
11940 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11941 { shmedia_builtin_p,
11942 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11943 { shmedia_builtin_p,
11944 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11945 { shmedia_builtin_p,
11946 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11947 { shmedia_builtin_p,
11948 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11949 { shmedia_builtin_p,
11950 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11951 { shmedia_builtin_p,
11952 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11953 { shmedia_builtin_p,
11954 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11955 { shmedia_builtin_p,
11956 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11957 { shmedia_builtin_p,
11958 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11959 { shmedia_builtin_p,
11960 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11961 { shmedia_builtin_p,
11962 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11963 { shmedia_builtin_p,
11964 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11965 { shmedia_builtin_p,
11966 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11967 { shmedia_builtin_p,
11968 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11969 { shmedia_builtin_p,
11970 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11971 { shmedia_builtin_p,
11972 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11973 { shmedia_builtin_p,
11974 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11975 { shmedia_builtin_p,
11976 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11977 { shmedia_builtin_p,
11978 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11979 { shmedia_builtin_p,
11980 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11981 { shmedia_builtin_p,
11982 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11983 { shmedia_builtin_p,
11984 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11985 { shmedia_builtin_p,
11986 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11987 { shmedia_builtin_p,
11988 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11989 { shmedia_builtin_p,
11990 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11991
11992 { sh1_builtin_p,
11993 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11994 { sh1_builtin_p,
11995 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11996 };
11997
11998 static tree sh_builtin_get_fpscr;
11999 static tree sh_builtin_set_fpscr;
12000
12001 static void
12002 sh_init_builtins (void)
12003 {
12004 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
12005 memset (shared, 0, sizeof shared);
12006
12007 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12008 {
12009 builtin_description* d = &bdesc[di];
12010
12011 if (!d->is_enabled ())
12012 continue;
12013
12014 tree type, arg_type = NULL_TREE;
12015 int signature = d->signature;
12016
12017 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12018 type = shared[signature];
12019 else
12020 {
12021 int has_result = signature_args[signature][0] != 0;
12022 tree args[3];
12023
12024 if ((signature_args[signature][1] & 8)
12025 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12026 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12027 continue;
12028 if (! TARGET_FPU_ANY
12029 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12030 continue;
12031 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12032 args[i] = NULL_TREE;
12033 for (int i = 3; ; i--)
12034 {
12035 int arg = signature_args[signature][i];
12036 int opno = i - 1 + has_result;
12037
12038 if (arg & 8)
12039 arg_type = ptr_type_node;
12040 else if (arg)
12041 arg_type = (*lang_hooks.types.type_for_mode)
12042 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12043 else if (i)
12044 continue;
12045 else
12046 arg_type = void_type_node;
12047 if (i == 0)
12048 break;
12049 args[i-1] = arg_type;
12050 }
12051 type = build_function_type_list (arg_type, args[0], args[1],
12052 args[2], NULL_TREE);
12053 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12054 shared[signature] = type;
12055 }
12056 d->fndecl =
12057 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12058 NULL, NULL_TREE);
12059 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12060 if (d->icode == CODE_FOR_sts_fpscr)
12061 sh_builtin_get_fpscr = d->fndecl;
12062 else if (d->icode == CODE_FOR_set_fpscr)
12063 sh_builtin_set_fpscr = d->fndecl;
12064 }
12065 }
12066
12067 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12068
12069 static void
12070 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12071 {
12072 const unsigned SH_FE_INVALID = 64;
12073 const unsigned SH_FE_DIVBYZERO = 32;
12074 const unsigned SH_FE_OVERFLOW = 16;
12075 const unsigned SH_FE_UNDERFLOW = 8;
12076 const unsigned SH_FE_INEXACT = 4;
12077 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12078 | SH_FE_DIVBYZERO
12079 | SH_FE_OVERFLOW
12080 | SH_FE_UNDERFLOW
12081 | SH_FE_INEXACT);
12082 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12083 tree fenv_var, mask, ld_fenv, masked_fenv;
12084 tree new_fenv_var, reload_fenv, restore_fnenv;
12085 tree update_call, atomic_feraiseexcept, hold_fnclex;
12086
12087 if (! TARGET_FPU_ANY)
12088 return;
12089
12090 /* Generate the equivalent of :
12091 unsigned int fenv_var;
12092 fenv_var = __builtin_sh_get_fpscr ();
12093
12094 unsigned int masked_fenv;
12095 masked_fenv = fenv_var & mask;
12096
12097 __builtin_sh_set_fpscr (masked_fenv); */
12098
12099 fenv_var = create_tmp_var (unsigned_type_node);
12100 mask = build_int_cst (unsigned_type_node,
12101 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12102 | SH_FE_ALL_EXCEPT));
12103 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12104 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12105 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12106 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12107 *hold = build2 (COMPOUND_EXPR, void_type_node,
12108 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12109 hold_fnclex);
12110
12111 /* Store the value of masked_fenv to clear the exceptions:
12112 __builtin_sh_set_fpscr (masked_fenv); */
12113
12114 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12115
12116 /* Generate the equivalent of :
12117 unsigned int new_fenv_var;
12118 new_fenv_var = __builtin_sh_get_fpscr ();
12119
12120 __builtin_sh_set_fpscr (fenv_var);
12121
12122 __atomic_feraiseexcept (new_fenv_var); */
12123
12124 new_fenv_var = create_tmp_var (unsigned_type_node);
12125 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12126 build_call_expr (sh_builtin_get_fpscr, 0));
12127 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12128 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12129 update_call = build_call_expr (atomic_feraiseexcept, 1,
12130 fold_convert (integer_type_node,
12131 new_fenv_var));
12132 *update = build2 (COMPOUND_EXPR, void_type_node,
12133 build2 (COMPOUND_EXPR, void_type_node,
12134 reload_fenv, restore_fnenv), update_call);
12135 }
12136
12137 /* Implements target hook vector_mode_supported_p. */
12138 bool
12139 sh_vector_mode_supported_p (machine_mode mode)
12140 {
12141 if (TARGET_FPU_ANY
12142 && ((mode == V2SFmode)
12143 || (mode == V4SFmode)
12144 || (mode == V16SFmode)))
12145 return true;
12146
12147 else if (TARGET_SHMEDIA
12148 && ((mode == V8QImode)
12149 || (mode == V2HImode)
12150 || (mode == V4HImode)
12151 || (mode == V2SImode)))
12152 return true;
12153
12154 return false;
12155 }
12156
12157 bool
12158 sh_frame_pointer_required (void)
12159 {
12160 /* If needed override this in other tm.h files to cope with various OS
12161 lossage requiring a frame pointer. */
12162 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12163 return true;
12164
12165 if (crtl->profile)
12166 return true;
12167
12168 return false;
12169 }
12170
12171 /* Implements target hook dwarf_calling_convention. Return an enum
12172 of dwarf_calling_convention. */
12173 int
12174 sh_dwarf_calling_convention (const_tree func)
12175 {
12176 if (sh_attr_renesas_p (func))
12177 return DW_CC_GNU_renesas_sh;
12178
12179 return DW_CC_normal;
12180 }
12181
12182 /* Returns the sh builtin decl for CODE. */
12183 static tree
12184 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12185 {
12186 if (code >= ARRAY_SIZE (bdesc))
12187 return error_mark_node;
12188
12189 if (!bdesc[code].is_enabled ())
12190 return error_mark_node;
12191
12192 return bdesc[code].fndecl;
12193 }
12194
12195 /* Expand an expression EXP that calls a built-in function,
12196 with result going to TARGET if that's convenient
12197 (and in mode MODE if that's convenient).
12198 SUBTARGET may be used as the target for computing one of EXP's operands.
12199 IGNORE is nonzero if the value is to be ignored. */
12200 static rtx
12201 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12202 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12203 {
12204 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12205 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12206 const struct builtin_description *d = &bdesc[fcode];
12207 enum insn_code icode = d->icode;
12208 int signature = d->signature;
12209 int nop = 0;
12210 rtx op[4];
12211
12212 if (signature_args[signature][0])
12213 {
12214 if (ignore)
12215 return NULL_RTX;
12216
12217 machine_mode tmode = insn_data[icode].operand[0].mode;
12218 if (! target || GET_MODE (target) != tmode
12219 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12220 target = gen_reg_rtx (tmode);
12221 op[nop++] = target;
12222 }
12223 else
12224 target = NULL_RTX;
12225
12226 for (int i = 1; i <= 3; i++, nop++)
12227 {
12228 tree arg;
12229 machine_mode opmode, argmode;
12230 tree optype;
12231
12232 if (! signature_args[signature][i])
12233 break;
12234 arg = CALL_EXPR_ARG (exp, i - 1);
12235 if (arg == error_mark_node)
12236 return const0_rtx;
12237 if (signature_args[signature][i] & 8)
12238 {
12239 opmode = ptr_mode;
12240 optype = ptr_type_node;
12241 }
12242 else
12243 {
12244 opmode = insn_data[icode].operand[nop].mode;
12245 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12246 }
12247 argmode = TYPE_MODE (TREE_TYPE (arg));
12248 if (argmode != opmode)
12249 arg = build1 (NOP_EXPR, optype, arg);
12250 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12251 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12252 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12253 }
12254
12255 rtx pat = NULL_RTX;
12256
12257 switch (nop)
12258 {
12259 case 1:
12260 pat = (*insn_data[d->icode].genfun) (op[0]);
12261 break;
12262 case 2:
12263 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12264 break;
12265 case 3:
12266 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12267 break;
12268 case 4:
12269 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12270 break;
12271 default:
12272 gcc_unreachable ();
12273 }
12274 if (! pat)
12275 return NULL_RTX;
12276 emit_insn (pat);
12277 return target;
12278 }
12279
12280 void
12281 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12282 {
12283 rtx sel0 = const0_rtx;
12284 rtx sel1 = const1_rtx;
12285 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12286 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12287
12288 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12289 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12290 }
12291
12292 void
12293 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12294 {
12295 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12296
12297 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12298 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12299 }
12300
12301 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12302 We can allow any mode in any general register. The special registers
12303 only allow SImode. Don't allow any mode in the PR.
12304
12305 We cannot hold DCmode values in the XD registers because alter_reg
12306 handles subregs of them incorrectly. We could work around this by
12307 spacing the XD registers like the DR registers, but this would require
12308 additional memory in every compilation to hold larger register vectors.
12309 We could hold SFmode / SCmode values in XD registers, but that
12310 would require a tertiary reload when reloading from / to memory,
12311 and a secondary reload to reload from / to general regs; that
12312 seems to be a losing proposition.
12313
12314 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12315 it won't be ferried through GP registers first. */
12316 bool
12317 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12318 {
12319 if (SPECIAL_REGISTER_P (regno))
12320 return mode == SImode;
12321
12322 if (regno == FPUL_REG)
12323 return (mode == SImode || mode == SFmode);
12324
12325 if (FP_REGISTER_P (regno) && mode == SFmode)
12326 return true;
12327
12328 if (mode == V2SFmode)
12329 {
12330 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12331 || GENERAL_REGISTER_P (regno)))
12332 return true;
12333 else
12334 return false;
12335 }
12336
12337 if (mode == V4SFmode)
12338 {
12339 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12340 || GENERAL_REGISTER_P (regno))
12341 return true;
12342 else
12343 return false;
12344 }
12345
12346 if (mode == V16SFmode)
12347 {
12348 if (TARGET_SHMEDIA)
12349 {
12350 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12351 return true;
12352 else
12353 return false;
12354 }
12355 else
12356 return regno == FIRST_XD_REG;
12357 }
12358
12359 if (FP_REGISTER_P (regno))
12360 {
12361 if (mode == SFmode
12362 || mode == SImode
12363 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12364 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12365 || mode == DCmode
12366 || (TARGET_SHMEDIA
12367 && (mode == DFmode || mode == DImode
12368 || mode == V2SFmode || mode == TImode)))
12369 && ((regno - FIRST_FP_REG) & 1) == 0)
12370 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12371 && ((regno - FIRST_FP_REG) & 3) == 0))
12372 return true;
12373 else
12374 return false;
12375 }
12376
12377 if (XD_REGISTER_P (regno))
12378 return mode == DFmode;
12379
12380 if (TARGET_REGISTER_P (regno))
12381 return (mode == DImode || mode == SImode || mode == PDImode);
12382
12383 if (regno == PR_REG)
12384 return mode == SImode;
12385
12386 if (regno == FPSCR_REG)
12387 return mode == SImode;
12388
12389 /* FIXME. This works around PR target/37633 for -O0. */
12390 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12391 {
12392 unsigned int n = GET_MODE_SIZE (mode) / 8;
12393
12394 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12395 && regno <= FIRST_GENERAL_REG + 14)
12396 return false;
12397 }
12398
12399 return true;
12400 }
12401
12402 /* Specify the modes required to caller save a given hard regno.
12403 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12404 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12405 permits integer modes on them. That makes LRA's split process
12406 unhappy. See PR55212.
12407 */
12408 machine_mode
12409 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12410 machine_mode mode)
12411 {
12412 if (FP_REGISTER_P (regno)
12413 && (mode == SFmode
12414 || mode == SCmode
12415 || ((mode == DFmode || mode == DCmode)
12416 && ((regno - FIRST_FP_REG) & 1) == 0)))
12417 return mode;
12418
12419 return choose_hard_reg_mode (regno, nregs, false);
12420 }
12421
12422 /* Return the class of registers for which a mode change from FROM to TO
12423 is invalid. */
12424 bool
12425 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12426 enum reg_class rclass)
12427 {
12428 /* We want to enable the use of SUBREGs as a means to
12429 VEC_SELECT a single element of a vector. */
12430
12431 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12432 This can be problematic when SFmode vector subregs need to be accessed
12433 on the stack with displacement addressing, as it happens with -O0.
12434 Thus we disallow the mode change for -O0. */
12435 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12436 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12437
12438 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12439 {
12440 if (TARGET_LITTLE_ENDIAN)
12441 {
12442 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12443 return reg_classes_intersect_p (DF_REGS, rclass);
12444 }
12445 else
12446 {
12447 if (GET_MODE_SIZE (from) < 8)
12448 return reg_classes_intersect_p (DF_REGS, rclass);
12449 }
12450 }
12451 return false;
12452 }
12453
12454 /* Return true if registers in machine mode MODE will likely be
12455 allocated to registers in small register classes. */
12456 bool
12457 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12458 {
12459 return (! TARGET_SHMEDIA);
12460 }
12461
12462 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12463 that label is used. */
12464 void
12465 sh_mark_label (rtx address, int nuses)
12466 {
12467 if (GOTOFF_P (address))
12468 {
12469 /* Extract the label or symbol. */
12470 address = XEXP (address, 0);
12471 if (GET_CODE (address) == PLUS)
12472 address = XEXP (address, 0);
12473 address = XVECEXP (address, 0, 0);
12474 }
12475 if (GET_CODE (address) == LABEL_REF
12476 && LABEL_P (XEXP (address, 0)))
12477 LABEL_NUSES (XEXP (address, 0)) += nuses;
12478 }
12479
12480 /* Compute extra cost of moving data between one register class
12481 and another.
12482
12483 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12484 uses this information. Hence, the general register <-> floating point
12485 register information here is not used for SFmode. */
12486 static int
12487 sh_register_move_cost (machine_mode mode,
12488 reg_class_t srcclass, reg_class_t dstclass)
12489 {
12490 if (dstclass == T_REGS || dstclass == PR_REGS)
12491 return 10;
12492
12493 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12494 return 4;
12495
12496 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12497 && REGCLASS_HAS_FP_REG (srcclass)
12498 && REGCLASS_HAS_FP_REG (dstclass))
12499 return 4;
12500
12501 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12502 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12503
12504 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12505 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12506 return 9;
12507
12508 if ((REGCLASS_HAS_FP_REG (dstclass)
12509 && REGCLASS_HAS_GENERAL_REG (srcclass))
12510 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12511 && REGCLASS_HAS_FP_REG (srcclass)))
12512 {
12513 /* Discourage trying to use fp regs for a pointer. This also
12514 discourages fp regs with SImode because Pmode is an alias
12515 of SImode on this target. See PR target/48596. */
12516 int addend = (mode == Pmode) ? 40 : 0;
12517
12518 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12519 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12520 }
12521
12522 if ((dstclass == FPUL_REGS
12523 && REGCLASS_HAS_GENERAL_REG (srcclass))
12524 || (srcclass == FPUL_REGS
12525 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12526 return 5;
12527
12528 if ((dstclass == FPUL_REGS
12529 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12530 || (srcclass == FPUL_REGS
12531 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12532 return 7;
12533
12534 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12535 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12536 return 20;
12537
12538 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12539 if (TARGET_SHMEDIA
12540 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12541 {
12542 if (sh_gettrcost >= 0)
12543 return sh_gettrcost;
12544 else if (!TARGET_PT_FIXED)
12545 return 100;
12546 }
12547
12548 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12549 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12550 return 4;
12551
12552 if (TARGET_SHMEDIA
12553 || (TARGET_FMOVD
12554 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12555 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12556 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12557
12558 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12559 }
12560
12561 static rtx
12562 emit_load_ptr (rtx reg, rtx addr)
12563 {
12564 rtx mem = gen_const_mem (ptr_mode, addr);
12565
12566 if (Pmode != ptr_mode)
12567 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12568 return emit_move_insn (reg, mem);
12569 }
12570
12571 static void
12572 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12573 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12574 tree function)
12575 {
12576 CUMULATIVE_ARGS cum;
12577 int structure_value_byref = 0;
12578 rtx this_rtx, this_value, sibcall, funexp;
12579 rtx_insn *insns;
12580 tree funtype = TREE_TYPE (function);
12581 int simple_add = CONST_OK_FOR_ADD (delta);
12582 int did_load = 0;
12583 rtx scratch0, scratch1, scratch2;
12584 unsigned i;
12585
12586 reload_completed = 1;
12587 epilogue_completed = 1;
12588 crtl->uses_only_leaf_regs = 1;
12589
12590 emit_note (NOTE_INSN_PROLOGUE_END);
12591
12592 /* Find the "this" pointer. We have such a wide range of ABIs for the
12593 SH that it's best to do this completely machine independently.
12594 "this" is passed as first argument, unless a structure return pointer
12595 comes first, in which case "this" comes second. */
12596 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12597 #ifndef PCC_STATIC_STRUCT_RETURN
12598 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12599 structure_value_byref = 1;
12600 #endif /* not PCC_STATIC_STRUCT_RETURN */
12601 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12602 {
12603 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12604
12605 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12606 }
12607 this_rtx
12608 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12609
12610 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12611 static chain pointer (even if you can't have nested virtual functions
12612 right now, someone might implement them sometime), and the rest of the
12613 registers are used for argument passing, are callee-saved, or reserved. */
12614 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12615 -ffixed-reg has been used. */
12616 if (! call_used_regs[0] || fixed_regs[0])
12617 error ("r0 needs to be available as a call-clobbered register");
12618 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12619 if (! TARGET_SH5)
12620 {
12621 if (call_used_regs[1] && ! fixed_regs[1])
12622 scratch1 = gen_rtx_REG (ptr_mode, 1);
12623 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12624 pointing where to return struct values. */
12625 if (call_used_regs[3] && ! fixed_regs[3])
12626 scratch2 = gen_rtx_REG (Pmode, 3);
12627 }
12628 else if (TARGET_SHMEDIA)
12629 {
12630 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12631 if (i != REGNO (scratch0) &&
12632 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12633 {
12634 scratch1 = gen_rtx_REG (ptr_mode, i);
12635 break;
12636 }
12637 if (scratch1 == scratch0)
12638 error ("need a second call-clobbered general purpose register");
12639 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12640 if (call_used_regs[i] && ! fixed_regs[i])
12641 {
12642 scratch2 = gen_rtx_REG (Pmode, i);
12643 break;
12644 }
12645 if (scratch2 == scratch0)
12646 error ("need a call-clobbered target register");
12647 }
12648
12649 this_value = plus_constant (Pmode, this_rtx, delta);
12650 if (vcall_offset
12651 && (simple_add || scratch0 != scratch1)
12652 && strict_memory_address_p (ptr_mode, this_value))
12653 {
12654 emit_load_ptr (scratch0, this_value);
12655 did_load = 1;
12656 }
12657
12658 if (!delta)
12659 ; /* Do nothing. */
12660 else if (simple_add)
12661 emit_move_insn (this_rtx, this_value);
12662 else
12663 {
12664 emit_move_insn (scratch1, GEN_INT (delta));
12665 emit_insn (gen_add2_insn (this_rtx, scratch1));
12666 }
12667
12668 if (vcall_offset)
12669 {
12670 rtx offset_addr;
12671
12672 if (!did_load)
12673 emit_load_ptr (scratch0, this_rtx);
12674
12675 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12676 if (strict_memory_address_p (ptr_mode, offset_addr))
12677 ; /* Do nothing. */
12678 else if (! TARGET_SH5 && scratch0 != scratch1)
12679 {
12680 /* scratch0 != scratch1, and we have indexed loads. Get better
12681 schedule by loading the offset into r1 and using an indexed
12682 load - then the load of r1 can issue before the load from
12683 (this_rtx + delta) finishes. */
12684 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12685 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12686 }
12687 else if (CONST_OK_FOR_ADD (vcall_offset))
12688 {
12689 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12690 offset_addr = scratch0;
12691 }
12692 else if (scratch0 != scratch1)
12693 {
12694 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12695 emit_insn (gen_add2_insn (scratch0, scratch1));
12696 offset_addr = scratch0;
12697 }
12698 else
12699 gcc_unreachable (); /* FIXME */
12700 emit_load_ptr (scratch0, offset_addr);
12701
12702 if (Pmode != ptr_mode)
12703 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12704 emit_insn (gen_add2_insn (this_rtx, scratch0));
12705 }
12706
12707 /* Generate a tail call to the target function. */
12708 if (! TREE_USED (function))
12709 {
12710 assemble_external (function);
12711 TREE_USED (function) = 1;
12712 }
12713 funexp = XEXP (DECL_RTL (function), 0);
12714 /* If the function is overridden, so is the thunk, hence we don't
12715 need GOT addressing even if this is a public symbol. */
12716 #if 0
12717 if (TARGET_SH1 && ! flag_weak)
12718 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12719 else
12720 #endif
12721 if (TARGET_SH2 && flag_pic)
12722 {
12723 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12724 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12725 }
12726 else
12727 {
12728 if (TARGET_SHMEDIA && flag_pic)
12729 {
12730 funexp = gen_sym2PIC (funexp);
12731 PUT_MODE (funexp, Pmode);
12732 }
12733 emit_move_insn (scratch2, funexp);
12734 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12735 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12736 }
12737 sibcall = emit_call_insn (sibcall);
12738 SIBLING_CALL_P (sibcall) = 1;
12739 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12740 emit_barrier ();
12741
12742 /* Run just enough of rest_of_compilation to do scheduling and get
12743 the insns emitted. Note that use_thunk calls
12744 assemble_start_function and assemble_end_function. */
12745
12746 insns = get_insns ();
12747
12748 if (optimize > 0)
12749 {
12750 if (! cfun->cfg)
12751 init_flow (cfun);
12752 split_all_insns_noflow ();
12753 }
12754
12755 sh_reorg ();
12756 shorten_branches (insns);
12757 final_start_function (insns, file, 1);
12758 final (insns, file, 1);
12759 final_end_function ();
12760
12761 reload_completed = 0;
12762 epilogue_completed = 0;
12763 }
12764
12765 rtx
12766 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12767 {
12768 rtx sym;
12769
12770 /* If this is not an ordinary function, the name usually comes from a
12771 string literal or an sprintf buffer. Make sure we use the same
12772 string consistently, so that cse will be able to unify address loads. */
12773 if (kind != FUNCTION_ORDINARY)
12774 name = IDENTIFIER_POINTER (get_identifier (name));
12775 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12776 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12777 if (flag_pic)
12778 switch (kind)
12779 {
12780 case FUNCTION_ORDINARY:
12781 break;
12782 case SFUNC_GOT:
12783 {
12784 rtx reg = target ? target : gen_reg_rtx (Pmode);
12785
12786 emit_insn (gen_symGOT2reg (reg, sym));
12787 sym = reg;
12788 break;
12789 }
12790 case SFUNC_STATIC:
12791 {
12792 /* ??? To allow cse to work, we use GOTOFF relocations.
12793 We could add combiner patterns to transform this into
12794 straight pc-relative calls with sym2PIC / bsrf when
12795 label load and function call are still 1:1 and in the
12796 same basic block during combine. */
12797 rtx reg = target ? target : gen_reg_rtx (Pmode);
12798
12799 emit_insn (gen_symGOTOFF2reg (reg, sym));
12800 sym = reg;
12801 break;
12802 }
12803 }
12804 if (target && sym != target)
12805 {
12806 emit_move_insn (target, sym);
12807 return target;
12808 }
12809 return sym;
12810 }
12811
12812 /* Find the number of a general purpose register in S. */
12813 static int
12814 scavenge_reg (HARD_REG_SET *s)
12815 {
12816 int r;
12817 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12818 if (TEST_HARD_REG_BIT (*s, r))
12819 return r;
12820 return -1;
12821 }
12822
12823 rtx
12824 sh_get_pr_initial_val (void)
12825 {
12826 rtx val;
12827
12828 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12829 PR register on SHcompact, because it might be clobbered by the prologue.
12830 We check first if that is known to be the case. */
12831 if (TARGET_SHCOMPACT
12832 && ((crtl->args.info.call_cookie
12833 & ~ CALL_COOKIE_RET_TRAMP (1))
12834 || crtl->saves_all_registers))
12835 return gen_frame_mem (SImode, return_address_pointer_rtx);
12836
12837 /* If we haven't finished rtl generation, there might be a nonlocal label
12838 that we haven't seen yet.
12839 ??? get_hard_reg_initial_val fails if it is called after register
12840 allocation has started, unless it has been called before for the
12841 same register. And even then, we end in trouble if we didn't use
12842 the register in the same basic block before. So call
12843 get_hard_reg_initial_val now and wrap it in an unspec if we might
12844 need to replace it. */
12845 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12846 combine can put the pseudo returned by get_hard_reg_initial_val into
12847 instructions that need a general purpose registers, which will fail to
12848 be recognized when the pseudo becomes allocated to PR. */
12849 val
12850 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12851 if (TARGET_SH1)
12852 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12853 return val;
12854 }
12855
12856 bool
12857 sh_expand_t_scc (rtx operands[])
12858 {
12859 enum rtx_code code = GET_CODE (operands[1]);
12860 rtx target = operands[0];
12861 rtx op0 = operands[2];
12862 rtx op1 = operands[3];
12863 rtx result = target;
12864 HOST_WIDE_INT val;
12865
12866 if (!REG_P (op0) || REGNO (op0) != T_REG
12867 || !CONST_INT_P (op1))
12868 return false;
12869 if (!REG_P (result))
12870 result = gen_reg_rtx (SImode);
12871 val = INTVAL (op1);
12872 if ((code == EQ && val == 1) || (code == NE && val == 0))
12873 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12874 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12875 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12876 else if (code == EQ || code == NE)
12877 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12878 else
12879 return false;
12880 if (result != target)
12881 emit_move_insn (target, result);
12882 return true;
12883 }
12884
12885 /* INSN is an sfunc; return the rtx that describes the address used. */
12886 static rtx
12887 extract_sfunc_addr (rtx insn)
12888 {
12889 rtx pattern, part = NULL_RTX;
12890 int len, i;
12891
12892 pattern = PATTERN (insn);
12893 len = XVECLEN (pattern, 0);
12894 for (i = 0; i < len; i++)
12895 {
12896 part = XVECEXP (pattern, 0, i);
12897 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12898 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12899 return XEXP (part, 0);
12900 }
12901 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12902 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12903 }
12904
12905 /* Verify that the register in use_sfunc_addr still agrees with the address
12906 used in the sfunc. This prevents fill_slots_from_thread from changing
12907 use_sfunc_addr.
12908 INSN is the use_sfunc_addr instruction, and REG is the register it
12909 guards. */
12910 bool
12911 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12912 {
12913 /* Search for the sfunc. It should really come right after INSN. */
12914 while ((insn = NEXT_INSN (insn)))
12915 {
12916 if (LABEL_P (insn) || JUMP_P (insn))
12917 break;
12918 if (! INSN_P (insn))
12919 continue;
12920
12921 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12922 insn = seq->insn (0);
12923 if (GET_CODE (PATTERN (insn)) != PARALLEL
12924 || get_attr_type (insn) != TYPE_SFUNC)
12925 continue;
12926 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12927 }
12928 gcc_unreachable ();
12929 }
12930
12931 /* This function returns a constant rtx that represents 2**15 / pi in
12932 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12933 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12934 static GTY(()) rtx sh_fsca_sf2int_rtx;
12935
12936 rtx
12937 sh_fsca_sf2int (void)
12938 {
12939 if (! sh_fsca_sf2int_rtx)
12940 {
12941 REAL_VALUE_TYPE rv;
12942
12943 real_from_string (&rv, "10430.378350470453");
12944 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12945 }
12946
12947 return sh_fsca_sf2int_rtx;
12948 }
12949
12950 /* This function returns a constant rtx that represents pi / 2**15 in
12951 SFmode. It's used to scale SFmode angles, in radians, to a
12952 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12953 maps to 0x10000. */
12954 static GTY(()) rtx sh_fsca_int2sf_rtx;
12955
12956 rtx
12957 sh_fsca_int2sf (void)
12958 {
12959 if (! sh_fsca_int2sf_rtx)
12960 {
12961 REAL_VALUE_TYPE rv;
12962
12963 real_from_string (&rv, "9.587379924285257e-5");
12964 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12965 }
12966
12967 return sh_fsca_int2sf_rtx;
12968 }
12969
12970 /* Initialize the CUMULATIVE_ARGS structure. */
12971 void
12972 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12973 tree fntype,
12974 rtx libname ATTRIBUTE_UNUSED,
12975 tree fndecl,
12976 signed int n_named_args,
12977 machine_mode mode)
12978 {
12979 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12980 pcum->free_single_fp_reg = 0;
12981 pcum->stack_regs = 0;
12982 pcum->byref_regs = 0;
12983 pcum->byref = 0;
12984 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12985
12986 /* XXX - Should we check TARGET_HITACHI here ??? */
12987 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12988
12989 if (fntype)
12990 {
12991 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12992 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12993 pcum->prototype_p = prototype_p (fntype);
12994 pcum->arg_count [(int) SH_ARG_INT]
12995 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12996
12997 pcum->call_cookie
12998 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12999 && pcum->arg_count [(int) SH_ARG_INT] == 0
13000 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
13001 ? int_size_in_bytes (TREE_TYPE (fntype))
13002 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
13003 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
13004 == FIRST_RET_REG));
13005 }
13006 else
13007 {
13008 pcum->arg_count [(int) SH_ARG_INT] = 0;
13009 pcum->prototype_p = FALSE;
13010 if (mode != VOIDmode)
13011 {
13012 pcum->call_cookie =
13013 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13014 && GET_MODE_SIZE (mode) > 4
13015 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13016
13017 /* If the default ABI is the Renesas ABI then all library
13018 calls must assume that the library will be using the
13019 Renesas ABI. So if the function would return its result
13020 in memory then we must force the address of this memory
13021 block onto the stack. Ideally we would like to call
13022 targetm.calls.return_in_memory() here but we do not have
13023 the TYPE or the FNDECL available so we synthesize the
13024 contents of that function as best we can. */
13025 pcum->force_mem =
13026 (TARGET_DEFAULT & MASK_HITACHI)
13027 && (mode == BLKmode
13028 || (GET_MODE_SIZE (mode) > 4
13029 && !(mode == DFmode
13030 && TARGET_FPU_DOUBLE)));
13031 }
13032 else
13033 {
13034 pcum->call_cookie = 0;
13035 pcum->force_mem = FALSE;
13036 }
13037 }
13038 }
13039
13040 rtx
13041 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13042 {
13043 enum rtx_code code = TRUNCATE;
13044
13045 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13046 {
13047 rtx inner = XEXP (x, 0);
13048 machine_mode inner_mode = GET_MODE (inner);
13049
13050 if (inner_mode == mode)
13051 return inner;
13052 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13053 x = inner;
13054 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13055 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13056 {
13057 code = GET_CODE (x);
13058 x = inner;
13059 }
13060 }
13061 return gen_rtx_fmt_e (code, mode, x);
13062 }
13063
13064 /* Look through X cleaning up truncates of registers that span multiple
13065 actual hard registers. Return the number of changes made. */
13066 int
13067 shmedia_cleanup_truncate (rtx x)
13068 {
13069 int n_changes = 0;
13070 subrtx_var_iterator::array_type array;
13071 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13072 {
13073 rtx x = *iter;
13074 if (GET_CODE (x) == TRUNCATE)
13075 {
13076 rtx reg = XEXP (x, 0);
13077 machine_mode reg_mode = GET_MODE (reg);
13078 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13079 {
13080 int offset = subreg_lowpart_offset (DImode, reg_mode);
13081 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13082 n_changes += 1;
13083 iter.skip_subrtxes ();
13084 }
13085 }
13086 }
13087 return n_changes;
13088 }
13089
13090 /* Load and store depend on the highpart of the address. However,
13091 set_attr_alternative does not give well-defined results before reload,
13092 so we must look at the rtl ourselves to see if any of the feeding
13093 registers is used in a memref.
13094
13095 Return true iff INSN contains a MEM. */
13096 bool
13097 sh_contains_memref_p (rtx insn)
13098 {
13099 subrtx_iterator::array_type array;
13100 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13101 if (MEM_P (*iter))
13102 return true;
13103 return false;
13104 }
13105
13106 /* Return true iff INSN loads a banked register. */
13107 bool
13108 sh_loads_bankedreg_p (rtx insn)
13109 {
13110 if (GET_CODE (PATTERN (insn)) == SET)
13111 {
13112 rtx op = SET_DEST (PATTERN(insn));
13113 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13114 return true;
13115 }
13116
13117 return false;
13118 }
13119
13120 /* FNADDR is the MEM expression from a call expander. Return an address
13121 to use in an SHmedia insn pattern. */
13122 rtx
13123 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13124 {
13125 int is_sym;
13126
13127 fnaddr = XEXP (fnaddr, 0);
13128 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13129 if (flag_pic && is_sym)
13130 {
13131 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13132 {
13133 rtx reg = gen_reg_rtx (Pmode);
13134
13135 /* We must not use GOTPLT for sibcalls, because PIC_REG
13136 must be restored before the PLT code gets to run. */
13137 if (is_sibcall)
13138 emit_insn (gen_symGOT2reg (reg, fnaddr));
13139 else
13140 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13141 fnaddr = reg;
13142 }
13143 else
13144 {
13145 fnaddr = gen_sym2PIC (fnaddr);
13146 PUT_MODE (fnaddr, Pmode);
13147 }
13148 }
13149 /* If ptabs might trap, make this visible to the rest of the compiler.
13150 We generally assume that symbols pertain to valid locations, but
13151 it is possible to generate invalid symbols with asm or linker tricks.
13152 In a list of functions where each returns its successor, an invalid
13153 symbol might denote an empty list. */
13154 if (!TARGET_PT_FIXED
13155 && (!is_sym || TARGET_INVALID_SYMBOLS)
13156 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13157 {
13158 rtx tr = gen_reg_rtx (PDImode);
13159
13160 emit_insn (gen_ptabs (tr, fnaddr));
13161 fnaddr = tr;
13162 }
13163 else if (! target_reg_operand (fnaddr, Pmode))
13164 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13165 return fnaddr;
13166 }
13167
13168 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13169 static reg_class_t
13170 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13171 {
13172 if (rclass == NO_REGS
13173 && TARGET_SHMEDIA
13174 && (CONST_DOUBLE_P (x)
13175 || GET_CODE (x) == SYMBOL_REF
13176 || PIC_ADDR_P (x)))
13177 return GENERAL_REGS;
13178
13179 return rclass;
13180 }
13181
13182 /* Implement TARGET_SECONDARY_RELOAD. */
13183 static reg_class_t
13184 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13185 machine_mode mode, secondary_reload_info *sri)
13186 {
13187 enum reg_class rclass = (enum reg_class) rclass_i;
13188
13189 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13190 && REG_P (XEXP (XEXP (x, 0), 0))
13191 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13192 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13193
13194 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13195 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13196
13197 if (REG_P (x) && REGNO (x) == GBR_REG)
13198 return NO_REGS;
13199
13200 if (in_p)
13201 {
13202 if (REGCLASS_HAS_FP_REG (rclass)
13203 && ! TARGET_SHMEDIA
13204 && immediate_operand ((x), mode)
13205 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13206 switch (mode)
13207 {
13208 case SFmode:
13209 sri->icode = CODE_FOR_reload_insf__frn;
13210 return NO_REGS;
13211 case DFmode:
13212 sri->icode = CODE_FOR_reload_indf__frn;
13213 return NO_REGS;
13214 case SImode:
13215 /* ??? If we knew that we are in the appropriate mode -
13216 single precision - we could use a reload pattern directly. */
13217 return FPUL_REGS;
13218 default:
13219 abort ();
13220 }
13221 if (rclass == FPUL_REGS
13222 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13223 || REGNO (x) == T_REG))
13224 || GET_CODE (x) == PLUS))
13225 return GENERAL_REGS;
13226 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13227 {
13228 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13229 return GENERAL_REGS;
13230 else if (mode == SFmode)
13231 return FP_REGS;
13232 sri->icode = CODE_FOR_reload_insi__i_fpul;
13233 return NO_REGS;
13234 }
13235 if (rclass == FPSCR_REGS
13236 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13237 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13238 return GENERAL_REGS;
13239 if (REGCLASS_HAS_FP_REG (rclass)
13240 && TARGET_SHMEDIA
13241 && immediate_operand (x, mode)
13242 && x != CONST0_RTX (GET_MODE (x))
13243 && GET_MODE (x) != V4SFmode)
13244 return GENERAL_REGS;
13245 if ((mode == QImode || mode == HImode)
13246 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13247 {
13248 sri->icode = ((mode == QImode)
13249 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13250 return NO_REGS;
13251 }
13252 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13253 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13254 return TARGET_REGS;
13255 } /* end of input-only processing. */
13256
13257 if (((REGCLASS_HAS_FP_REG (rclass)
13258 && (REG_P (x)
13259 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13260 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13261 && TARGET_FMOVD))))
13262 || (REGCLASS_HAS_GENERAL_REG (rclass)
13263 && REG_P (x)
13264 && FP_REGISTER_P (REGNO (x))))
13265 && ! TARGET_SHMEDIA
13266 && (mode == SFmode || mode == SImode))
13267 return FPUL_REGS;
13268 if ((rclass == FPUL_REGS
13269 || (REGCLASS_HAS_FP_REG (rclass)
13270 && ! TARGET_SHMEDIA && mode == SImode))
13271 && (MEM_P (x)
13272 || (REG_P (x)
13273 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13274 || REGNO (x) == T_REG
13275 || system_reg_operand (x, VOIDmode)))))
13276 {
13277 if (rclass == FPUL_REGS)
13278 return GENERAL_REGS;
13279 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13280 }
13281 if ((rclass == TARGET_REGS
13282 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13283 && !satisfies_constraint_Csy (x)
13284 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13285 return GENERAL_REGS;
13286 if ((rclass == MAC_REGS || rclass == PR_REGS)
13287 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13288 && rclass != REGNO_REG_CLASS (REGNO (x)))
13289 return GENERAL_REGS;
13290 if (rclass != GENERAL_REGS && REG_P (x)
13291 && TARGET_REGISTER_P (REGNO (x)))
13292 return GENERAL_REGS;
13293
13294 /* If here fall back to loading FPUL register through general registers.
13295 This case can happen when movsi_ie insn is picked initially to
13296 load/store the FPUL register from/to another register, and then the
13297 other register is allocated on the stack. */
13298 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13299 return GENERAL_REGS;
13300
13301 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13302 the other operand.
13303 On SH2A could also just leave it alone here, which would result in a
13304 4 byte move insn being generated instead. However, for this to work
13305 the insns must have the appropriate alternatives. */
13306 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13307 && satisfies_constraint_Sdd (x)
13308 && sh_disp_addr_displacement (x)
13309 <= sh_max_mov_insn_displacement (mode, false))
13310 return R0_REGS;
13311
13312 /* When reload is trying to address a QImode or HImode subreg on the stack,
13313 force any subreg byte into R0_REGS, as this is going to become a
13314 displacement address.
13315 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13316 is on the stack, the memref to it might already require a displacement
13317 and that has to be added to the final address. At this point we don't
13318 know the cumulative displacement so we assume the worst case. */
13319 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13320 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13321 return R0_REGS;
13322
13323 return NO_REGS;
13324 }
13325
13326 /* Return true if SUBST can't safely replace its equivalent during RA. */
13327 static bool
13328 sh_cannot_substitute_mem_equiv_p (rtx)
13329 {
13330 if (TARGET_SHMEDIA)
13331 return false;
13332
13333 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13334 uses R0 and may cause spill failure when R0 is already used.
13335 We have to return true for that case at least.
13336 Moreover SH has strong R0 parity and also have not enough numbers of
13337 the hard registers to make the equiv substitution win in the size
13338 and the speed on average working sets. The pseudos produced to
13339 hold the equiv values can't get good hard registers for bad cases
13340 and end up memory save/restore insns which make the code worse. */
13341 return true;
13342 }
13343
13344 /* Return true if DISP can be legitimized. */
13345 static bool
13346 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13347 machine_mode mode)
13348 {
13349 if (TARGET_SHMEDIA)
13350 return false;
13351
13352 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13353 || (TARGET_SH2E && mode == SFmode))
13354 return false;
13355
13356 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13357 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13358 {
13359 *disp = adj.mov_disp;
13360 *offs = adj.offset_adjust;
13361 return true;
13362 }
13363
13364 return false;
13365 }
13366
13367 /* Return true if movsf insn should be splited with an additional
13368 register. */
13369 bool
13370 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13371 {
13372 /* op0 == op1 */
13373 if (rtx_equal_p (op0, op1))
13374 return true;
13375 /* fy, FQ, reg */
13376 if (GET_CODE (op1) == CONST_DOUBLE
13377 && ! satisfies_constraint_G (op1)
13378 && ! satisfies_constraint_H (op1)
13379 && REG_P (op0)
13380 && REG_P (op2))
13381 return true;
13382 /* f, r, y */
13383 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13384 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13385 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13386 return true;
13387 /* r, f, y */
13388 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13389 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13390 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13391 return true;
13392
13393 return false;
13394 }
13395
13396 static void
13397 sh_conditional_register_usage (void)
13398 {
13399 int regno;
13400 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13401 if (! VALID_REGISTER_P (regno))
13402 fixed_regs[regno] = call_used_regs[regno] = 1;
13403 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13404 if (TARGET_SH5)
13405 {
13406 call_used_regs[FIRST_GENERAL_REG + 8]
13407 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13408 call_really_used_regs[FIRST_GENERAL_REG + 8]
13409 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13410 }
13411 if (TARGET_SHMEDIA)
13412 {
13413 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13414 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13415 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13416 }
13417 if (flag_pic)
13418 {
13419 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13420 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13421 }
13422 /* Renesas saves and restores mac registers on call. */
13423 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13424 {
13425 call_really_used_regs[MACH_REG] = 0;
13426 call_really_used_regs[MACL_REG] = 0;
13427 }
13428
13429 if (TARGET_SHMEDIA)
13430 {
13431 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13432 if (! fixed_regs[regno] && call_really_used_regs[regno])
13433 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13434 }
13435 else
13436 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13437 if (! fixed_regs[regno] && call_really_used_regs[regno])
13438 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13439
13440 call_really_used_regs[FPSCR_MODES_REG] = 0;
13441 call_really_used_regs[FPSCR_STAT_REG] = 0;
13442 }
13443
13444 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13445
13446 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13447 static bool
13448 sh_legitimate_constant_p (machine_mode mode, rtx x)
13449 {
13450 return (TARGET_SHMEDIA
13451 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13452 || x == CONST0_RTX (mode)
13453 || !TARGET_SHMEDIA_FPU
13454 || TARGET_SHMEDIA64)
13455 : (GET_CODE (x) != CONST_DOUBLE
13456 || mode == DFmode || mode == SFmode
13457 || mode == DImode || GET_MODE (x) == VOIDmode));
13458 }
13459
13460 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13461
13462 static void
13463 sh_init_sync_libfuncs (void)
13464 {
13465 init_sync_libfuncs (UNITS_PER_WORD);
13466 }
13467
13468 /* Return true if it is appropriate to emit `ret' instructions in the
13469 body of a function. */
13470 bool
13471 sh_can_use_simple_return_p (void)
13472 {
13473 HARD_REG_SET live_regs_mask;
13474 int d;
13475
13476 /* Some targets require special return insns. */
13477 if (TARGET_SHMEDIA
13478 || (TARGET_SHCOMPACT
13479 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13480 return false;
13481
13482 if (! reload_completed || frame_pointer_needed)
13483 return false;
13484
13485 /* Moving prologue around does't reduce the size. */
13486 if (optimize_function_for_size_p (cfun))
13487 return false;
13488
13489 /* Finally, allow for pr save. */
13490 d = calc_live_regs (&live_regs_mask);
13491
13492 if (rounded_frame_size (d) > 4)
13493 return false;
13494
13495 return true;
13496 }
13497
13498 /*------------------------------------------------------------------------------
13499 Address mode optimization support code
13500 */
13501
13502 typedef HOST_WIDE_INT disp_t;
13503 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13504 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13505 static const disp_t INVALID_DISP = MAX_DISP;
13506
13507 /* A memory reference which is described by a base register and a
13508 displacement. */
13509 class base_reg_disp
13510 {
13511 public:
13512 base_reg_disp (rtx br, disp_t d);
13513
13514 bool is_reg (void) const;
13515 bool is_disp (void) const;
13516 rtx reg (void) const;
13517 disp_t disp (void) const;
13518
13519 private:
13520 rtx reg_;
13521 disp_t disp_;
13522 };
13523
13524 inline
13525 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13526 : reg_ (br), disp_ (d)
13527 {
13528 }
13529
13530 inline bool
13531 base_reg_disp::is_reg (void) const
13532 {
13533 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13534 }
13535
13536 inline bool
13537 base_reg_disp::is_disp (void) const
13538 {
13539 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13540 }
13541
13542 inline rtx
13543 base_reg_disp::reg (void) const
13544 {
13545 return reg_;
13546 }
13547
13548 inline disp_t
13549 base_reg_disp::disp (void) const
13550 {
13551 return disp_;
13552 }
13553
13554 /* Find the base register and calculate the displacement for a given
13555 address rtx 'x'. */
13556 static base_reg_disp
13557 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13558 rtx base_reg = NULL)
13559 {
13560 if (REG_P (x))
13561 {
13562 if (REGNO (x) == GBR_REG)
13563 return base_reg_disp (x, disp);
13564
13565 /* We've reached a hard-reg. This is probably the point where
13566 function args are copied to pseudos. Do not go any further and
13567 stick to the pseudo. If the original mem addr was in a hard reg
13568 from the beginning, it will become the base reg. */
13569 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13570 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13571
13572 /* Find the def of the reg and trace it. If there are more than one
13573 defs and they are not the same, assume it's not safe to proceed. */
13574 rtx_insn* last_i = NULL;
13575 rtx last_set = NULL;
13576 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13577 d = DF_REF_NEXT_REG (d))
13578 {
13579 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13580
13581 /* Accept multiple defs, as long as they are equal. */
13582 if (last_set == NULL || rtx_equal_p (last_set, set))
13583 {
13584 last_i = DF_REF_INSN (d);
13585 last_set = set;
13586 }
13587 else
13588 {
13589 last_i = NULL;
13590 last_set = NULL;
13591 break;
13592 }
13593 }
13594
13595 if (last_set != NULL && last_i != NULL)
13596 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13597 XEXP (last_set, 0));
13598
13599 /* When here, no previous insn was found that sets the reg.
13600 The input reg is already the base reg. */
13601 return base_reg_disp (x, disp);
13602 }
13603
13604 else if (GET_CODE (x) == PLUS)
13605 {
13606 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13607 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13608
13609 /* Either left or right val must be a reg.
13610 We don't handle the case of 'reg + reg' here. */
13611 if (left_val.is_reg () && right_val.is_disp ())
13612 return base_reg_disp (left_val.reg (), left_val.disp ()
13613 + right_val.disp () + disp);
13614 else if (right_val.is_reg () && left_val.is_disp ())
13615 return base_reg_disp (right_val.reg (), right_val.disp ()
13616 + left_val.disp () + disp);
13617 else
13618 return base_reg_disp (base_reg, disp);
13619 }
13620
13621 else if (CONST_INT_P (x))
13622 return base_reg_disp (NULL, disp + INTVAL (x));
13623
13624 /* Didn't find anything useful. */
13625 return base_reg_disp (base_reg, disp);
13626 }
13627
13628 /* Given an insn and a memory operand, try to find an equivalent GBR
13629 based memory address and return the corresponding new memory address.
13630 Return NULL_RTX if not found. */
13631 rtx
13632 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13633 {
13634 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13635 return NULL_RTX;
13636
13637 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13638 if (side_effects_p (XEXP (mem, 0)))
13639 return NULL_RTX;
13640
13641 /* When not optimizing there might be no dataflow available. */
13642 if (df == NULL)
13643 return NULL_RTX;
13644
13645 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13646
13647 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13648 {
13649 /* If GBR is marked as call clobbered we bail out if we see a call.
13650 FIXME: Actually should check if this mem refers to the gbr value
13651 before or after the call. If there is a store_gbr preceeding this
13652 mem, it's safe to use GBR for this mem.
13653
13654 If GBR is not marked as call clobbered, but there is some other
13655 def than a call, it's probably a load_gbr upon which we also
13656 bail out to be on the safe side.
13657 FIXME: Should check if we have a use-after-def case, such as
13658 the call case above. */
13659 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13660 d = DF_REF_NEXT_REG (d))
13661 {
13662 if (CALL_P (DF_REF_INSN (d)))
13663 {
13664 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13665 return NULL_RTX;
13666 else
13667 continue;
13668 }
13669 else
13670 return NULL_RTX;
13671 }
13672
13673 rtx disp = GEN_INT (gbr_disp.disp ());
13674 if (gbr_displacement (disp, GET_MODE (mem)))
13675 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13676 }
13677
13678 return NULL_RTX;
13679 }
13680
13681 /*------------------------------------------------------------------------------
13682 Manual insn combine support code.
13683 */
13684
13685 /* Return true if the specified insn contains any UNSPECs or
13686 UNSPEC_VOLATILEs. */
13687 static bool
13688 sh_unspec_insn_p (rtx x)
13689 {
13690 subrtx_iterator::array_type array;
13691 FOR_EACH_SUBRTX (i, array, x, ALL)
13692 if (*i != NULL
13693 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13694 return true;
13695
13696 return false;
13697 }
13698
13699 /* Return true if the register operands of the specified insn are modified
13700 between the specified from and to insns (exclusive of those two). */
13701 bool
13702 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13703 const rtx_insn* from,
13704 const rtx_insn* to)
13705 {
13706 /* FIXME: Return true for multiple sets for now. */
13707 rtx s = single_set (operands_insn);
13708 if (s == NULL_RTX)
13709 return true;
13710
13711 subrtx_iterator::array_type array;
13712 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13713 if (*i != NULL &&
13714 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13715 return true;
13716
13717 return false;
13718 }
13719
13720 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13721 negates the T bit and stores the result in the T bit. */
13722 bool
13723 sh_is_nott_insn (const rtx_insn* i)
13724 {
13725 return i != NULL && GET_CODE (PATTERN (i)) == SET
13726 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13727 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13728 }
13729
13730 rtx
13731 sh_movt_set_dest (const rtx_insn* i)
13732 {
13733 if (i == NULL)
13734 return NULL;
13735
13736 const_rtx p = PATTERN (i);
13737 return GET_CODE (p) == SET
13738 && arith_reg_dest (XEXP (p, 0), SImode)
13739 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13740 }
13741
13742 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13743 that stores the negated T bit in a register, and return the destination
13744 register rtx, or null. */
13745 rtx
13746 sh_movrt_set_dest (const rtx_insn* i)
13747 {
13748 if (i == NULL)
13749 return NULL;
13750
13751 const_rtx p = PATTERN (i);
13752
13753 /* The negc movrt replacement is inside a parallel. */
13754 if (GET_CODE (p) == PARALLEL)
13755 p = XVECEXP (p, 0, 0);
13756
13757 return GET_CODE (p) == SET
13758 && arith_reg_dest (XEXP (p, 0), SImode)
13759 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13760 }
13761
13762 /* Given an insn and a reg number, tell whether the reg dies or is unused
13763 after the insn. */
13764 bool
13765 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13766 {
13767 return find_regno_note (i, REG_DEAD, regno) != NULL
13768 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13769 }
13770
13771 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13772 mark it as being used after the insn. */
13773 void
13774 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13775 {
13776 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13777 remove_note (i, n);
13778 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13779 remove_note (i, n);
13780 }
13781
13782 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13783 add the REG_INC notes accordingly.
13784 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13785 FIXME: This function is currently used by peephole2 patterns because
13786 the peephole2 pass does not preserve REG_INC notes. If the notes
13787 are dropped the following passes will do wrong things. */
13788 rtx_insn*
13789 sh_check_add_incdec_notes (rtx_insn* i)
13790 {
13791 struct for_each_inc_dec_clb
13792 {
13793 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13794 rtx dest, rtx src ATTRIBUTE_UNUSED,
13795 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13796 {
13797 gcc_assert (REG_P (dest));
13798
13799 rtx_insn* i = (rtx_insn*)arg;
13800 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13801 add_reg_note (i, REG_INC, dest);
13802
13803 return 0;
13804 }
13805 };
13806
13807 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13808 return i;
13809 }
13810
13811 /* Given a move insn destiation and a source, make sure that the move source
13812 operand is not a post-inc mem load with the same address reg as the
13813 destination. Returns the modified source operand with the post-inc removed
13814 if necessary. */
13815 rtx
13816 sh_remove_overlapping_post_inc (rtx dst, rtx src)
13817 {
13818 if (!MEM_P (src))
13819 return src;
13820
13821 rtx addr = XEXP (src, 0);
13822
13823 if (GET_CODE (addr) == POST_INC
13824 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
13825 return replace_equiv_address (src, XEXP (addr, 0));
13826
13827 gcc_assert (GET_CODE (addr) != POST_MODIFY);
13828 return src;
13829 }
13830
13831 /* Emit a move insn that is safe to be used in peephole patterns. */
13832 rtx_insn*
13833 sh_peephole_emit_move_insn (rtx dst, rtx src)
13834 {
13835 return sh_check_add_incdec_notes (
13836 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
13837 }
13838
13839 /* Given an op rtx and an insn, try to find out whether the result of the
13840 specified op consists only of logical operations on T bit stores. */
13841 bool
13842 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13843 {
13844 if (!logical_operator (op, SImode))
13845 return false;
13846
13847 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13848 int op_is_t_count = 0;
13849
13850 for (int i = 0; i < 2; ++i)
13851 {
13852 if (t_reg_operand (ops[i], VOIDmode)
13853 || negt_reg_operand (ops[i], VOIDmode))
13854 op_is_t_count++;
13855
13856 else
13857 {
13858 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13859 prev_nonnote_insn_bb);
13860 if (op_set.set_src == NULL_RTX)
13861 continue;
13862
13863 if (t_reg_operand (op_set.set_src, VOIDmode)
13864 || negt_reg_operand (op_set.set_src, VOIDmode)
13865 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13866 op_is_t_count++;
13867 }
13868 }
13869
13870 return op_is_t_count == 2;
13871 }
13872
13873 /* Given the operand that is extended in a sign/zero extend insn, and the
13874 insn, try to figure out whether the sign/zero extension can be replaced
13875 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13876 NULL_RTX otherwise. */
13877 rtx
13878 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13879 {
13880 if (REG_P (extended_op))
13881 extended_op = extended_op;
13882 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13883 extended_op = SUBREG_REG (extended_op);
13884 else
13885 return NULL_RTX;
13886
13887 /* Reg moves must be of the same mode. */
13888 if (GET_MODE (extended_op) != SImode)
13889 return NULL_RTX;
13890
13891 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13892 if (s.set_src == NULL_RTX)
13893 return NULL_RTX;
13894
13895 if (t_reg_operand (s.set_src, VOIDmode)
13896 || negt_reg_operand (s.set_src, VOIDmode))
13897 return extended_op;
13898
13899 /* If the zero extended reg was formed by a logical operation, check the
13900 operands of the logical operation. If both originated from T bit
13901 stores the zero extension can be eliminated. */
13902 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13903 return extended_op;
13904
13905 return NULL_RTX;
13906 }
13907
13908 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13909 figure out whether it should be converted into a movt-xor sequence in
13910 the movrt_negc splitter.
13911 Returns true if insns have been modified and the splitter has succeeded. */
13912 bool
13913 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13914 {
13915 /* In cases such as
13916 tst r4,r4
13917 mov #-1,r1
13918 negc r1,r1
13919 tst r4,r4
13920 we can replace the T bit clobbering negc with a movt-xor sequence and
13921 eliminate the redundant comparison.
13922 Because the xor insn depends on register allocation results, allow this
13923 only before reload. */
13924 if (!can_create_pseudo_p ())
13925 return false;
13926
13927 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13928 prev_nonnote_insn_bb);
13929 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13930 next_nonnote_insn_bb);
13931
13932 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13933 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13934 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13935 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13936 t_before_negc.insn,
13937 t_after_negc.insn)
13938 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13939 && !sh_unspec_insn_p (t_after_negc.insn)
13940 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13941 && !side_effects_p (PATTERN (t_after_negc.insn))
13942 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13943 {
13944 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13945 set_insn_deleted (t_after_negc.insn);
13946 return true;
13947 }
13948 else
13949 return false;
13950 }
13951
13952 /* Given a reg and the current insn, see if the value of the reg originated
13953 from a sign or zero extension and return the discovered information. */
13954 sh_extending_set_of_reg
13955 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13956 {
13957 if (reg == NULL)
13958 return sh_extending_set_of_reg (curr_insn);
13959
13960 if (SUBREG_P (reg))
13961 reg = SUBREG_REG (reg);
13962
13963 if (!REG_P (reg))
13964 return sh_extending_set_of_reg (curr_insn);
13965
13966 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13967 only the adjacent predecessor blocks would cover most of the cases.
13968 Also try to look through the first extension that we hit. There are some
13969 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13970 fails to see the sign_extend. */
13971 sh_extending_set_of_reg result =
13972 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13973
13974 if (result.set_src != NULL)
13975 {
13976 if (GET_CODE (result.set_src) == SIGN_EXTEND
13977 || GET_CODE (result.set_src) == ZERO_EXTEND)
13978 {
13979 if (dump_file)
13980 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13981 "explicitly sign/zero extended in insn %d\n",
13982 REGNO (reg), INSN_UID (result.insn));
13983 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13984 result.ext_code = GET_CODE (result.set_src);
13985 }
13986 else if (MEM_P (result.set_src)
13987 && (GET_MODE (result.set_src) == QImode
13988 || GET_MODE (result.set_src) == HImode)
13989 && !sh_unspec_insn_p (result.insn))
13990 {
13991 /* On SH QIHImode memory loads always sign extend. However, in
13992 some cases where it seems that the higher bits are not
13993 interesting, the loads will not be expanded as sign extending
13994 insns, but as QIHImode loads into QIHImode regs. We report that
13995 the reg has been sign extended by the mem load. When it is used
13996 as such, we must convert the mem load into a sign extending insn,
13997 see also sh_extending_set_of_reg::use_as_extended_reg. */
13998 if (dump_file)
13999 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
14000 "implicitly sign extended in insn %d\n",
14001 REGNO (reg), INSN_UID (result.insn));
14002 result.from_mode = GET_MODE (result.set_src);
14003 result.ext_code = SIGN_EXTEND;
14004 }
14005 }
14006
14007 return result;
14008 }
14009
14010 /* Given a reg that is known to be sign or zero extended at some insn,
14011 take the appropriate measures so that the extended value can be used as
14012 a reg at the specified insn and return the resulting reg rtx. */
14013 rtx
14014 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
14015 {
14016 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
14017 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
14018 gcc_assert (from_mode == QImode || from_mode == HImode);
14019
14020 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
14021 {
14022 if (dump_file)
14023 fprintf (dump_file,
14024 "use_as_extended_reg: converting non-extending mem load in "
14025 "insn %d into sign-extending load\n", INSN_UID (insn));
14026
14027 rtx r = gen_reg_rtx (SImode);
14028 rtx_insn* i0;
14029 if (from_mode == QImode)
14030 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14031 else if (from_mode == HImode)
14032 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14033 else
14034 gcc_unreachable ();
14035
14036 emit_insn_after (
14037 gen_move_insn (XEXP (set_rtx, 0),
14038 gen_lowpart (GET_MODE (set_src), r)), i0);
14039 set_insn_deleted (insn);
14040 return r;
14041 }
14042 else
14043 {
14044 rtx extension_dst = XEXP (set_rtx, 0);
14045 if (GET_MODE (extension_dst) != SImode)
14046 extension_dst = simplify_gen_subreg (SImode, extension_dst,
14047 GET_MODE (extension_dst), 0);
14048 if (modified_between_p (extension_dst, insn, use_at_insn))
14049 {
14050 if (dump_file)
14051 fprintf (dump_file,
14052 "use_as_extended_reg: dest reg %d of extending insn %d is "
14053 "modified, inserting a reg-reg copy\n",
14054 REGNO (extension_dst), INSN_UID (insn));
14055
14056 rtx r = gen_reg_rtx (SImode);
14057 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14058 return r;
14059 }
14060 else
14061 {
14062 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14063 return extension_dst;
14064 }
14065 }
14066 }
14067
14068 bool
14069 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14070 {
14071 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14072 && (from_mode == QImode || from_mode == HImode)
14073 && set_src != NULL)
14074 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14075 else
14076 return false;
14077 }
14078
14079 rtx
14080 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14081 {
14082 gcc_assert (can_use_as_unextended_reg ());
14083
14084 rtx r = XEXP (set_src, 0);
14085 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14086
14087 if (modified_between_p (r, insn, use_at_insn))
14088 {
14089 rtx r1 = gen_reg_rtx (SImode);
14090 emit_insn_after (gen_move_insn (r1, r0), insn);
14091 return r1;
14092 }
14093 else
14094 {
14095 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14096 ? REGNO (SUBREG_REG (r))
14097 : REGNO (r));
14098 return r0;
14099 }
14100 }
14101
14102 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14103 perform the necessary checks on the operands and split it accordingly. */
14104 void
14105 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14106 int subreg_offset, rtx operands[])
14107 {
14108 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14109
14110 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14111 curr_insn);
14112 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14113 curr_insn);
14114
14115 /* If one of the operands is known to be zero extended, that's already
14116 sufficient to mask out the unwanted high bits. */
14117 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14118 {
14119 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14120 operands[1]));
14121 return;
14122 }
14123 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14124 {
14125 emit_insn (gen_tstsi_t (operands[0],
14126 eop1.use_as_extended_reg (curr_insn)));
14127 return;
14128 }
14129
14130 /* None of the operands seem to be zero extended.
14131 If both are sign extended it's OK, too. */
14132 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14133 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14134 {
14135 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14136 eop1.use_as_extended_reg (curr_insn)));
14137 return;
14138 }
14139
14140 /* Otherwise we have to insert a zero extension on one of the operands to
14141 mask out the unwanted high bits.
14142 Prefer the operand that has no known extension. */
14143 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14144 std::swap (operands[0], operands[1]);
14145
14146 rtx tmp0 = gen_reg_rtx (SImode);
14147 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14148 GET_MODE (operands[0]), subreg_offset);
14149 emit_insn (subreg_mode == QImode
14150 ? gen_zero_extendqisi2 (tmp0, tmp1)
14151 : gen_zero_extendhisi2 (tmp0, tmp1));
14152 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14153 }
14154
14155 /* A helper class to increment/decrement a counter variable each time a
14156 function is entered/left. */
14157 class scope_counter
14158 {
14159 public:
14160 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14161
14162 ~scope_counter (void)
14163 {
14164 --m_counter;
14165 gcc_assert (m_counter >= 0);
14166 }
14167
14168 int count (void) const { return m_counter; }
14169
14170 private:
14171 int& m_counter;
14172 };
14173
14174 /* Given an rtx x, determine whether the expression can be used to create
14175 an insn that calulates x and stores the result in the T bit.
14176 This is used by the 'treg_set_expr' predicate to construct insns sequences
14177 where T bit results are fed into other insns, such as addc, subc, negc
14178 insns.
14179
14180 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14181 distinguish between 'positive' and 'negative' forms. For now this has to
14182 be done in the preparation code. We could also introduce
14183 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14184 two different patterns for the 'postive' and 'negative' forms. However,
14185 the total amount of lines of code seems to be about the same and the
14186 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14187 recog function would need to look inside the expression by temporarily
14188 splitting it. */
14189 static int sh_recog_treg_set_expr_reent_count = 0;
14190
14191 bool
14192 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14193 {
14194 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14195
14196 /* Limit the recursion count to avoid nested expressions which we can't
14197 resolve to a single treg set insn. */
14198 if (recursion.count () > 1)
14199 return false;
14200
14201 /* Early accept known possible operands before doing recog. */
14202 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
14203 || negt_reg_operand (op, mode))
14204 return true;
14205
14206 /* Early reject impossible operands before doing recog.
14207 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14208 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14209 such as lower-subreg will bail out. Some insns such as SH4A movua are
14210 done with UNSPEC, so must reject those, too, or else it would result
14211 in an invalid reg -> treg move. */
14212 if (CONST_INT_P (op) || register_operand (op, mode)
14213 || memory_operand (op, mode) || sh_unspec_insn_p (op))
14214 return false;
14215
14216 if (!can_create_pseudo_p ())
14217 return false;
14218
14219 /* expand_debug_locations may call this to compute rtx costs at
14220 very early stage. In that case, don't make new insns here to
14221 avoid codegen differences with -g. */
14222 if (currently_expanding_to_rtl)
14223 return false;
14224
14225 /* We are going to invoke recog in a re-entrant way and thus
14226 have to capture its current state and restore it afterwards. */
14227 recog_data_d prev_recog_data = recog_data;
14228
14229 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14230 SET_PREV_INSN (i) = NULL;
14231 SET_NEXT_INSN (i) = NULL;
14232
14233 /* If the comparison op doesn't have a result mode, set it to SImode. */
14234 machine_mode prev_op_mode = GET_MODE (op);
14235 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
14236 PUT_MODE (op, SImode);
14237
14238 int result = recog (PATTERN (i), i, 0);
14239
14240 /* It seems there is no insn like that. Create a negated version and
14241 try again. If we hit a negated form, we'll allow that and append a
14242 nott sequence when splitting out the insns. Insns that do the split
14243 can then remove the trailing nott if they know how to deal with it. */
14244 if (result < 0 && COMPARISON_P (op))
14245 {
14246 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
14247 if (cmp_mode == VOIDmode)
14248 cmp_mode = GET_MODE (XEXP (op, 1));
14249
14250 rtx_code prev_code = GET_CODE (op);
14251 PUT_CODE (op, reverse_condition (GET_CODE (op)));
14252 result = recog (PATTERN (i), i, 0);
14253 PUT_CODE (op, prev_code);
14254 }
14255
14256 PUT_MODE (op, prev_op_mode);
14257 recog_data = prev_recog_data;
14258 return result >= 0;
14259 }
14260
14261 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14262 This can be used as a condition for insn/split patterns to allow certain
14263 T bit setting patters only to be matched as sub expressions of other
14264 patterns. */
14265 bool
14266 sh_in_recog_treg_set_expr (void)
14267 {
14268 return sh_recog_treg_set_expr_reent_count > 0;
14269 }
14270
14271 /* Given an rtx x, which is assumed to be some expression that has been
14272 matched by the 'treg_set_expr' predicate before, split and emit the
14273 insns that are necessary to calculate the expression and store the result
14274 in the T bit.
14275 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14276 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14277 'delete_insn' which then causes the DF parts to bail out, because we
14278 currently are inside another gen_split* function and would invoke
14279 'try_split' in a reentrant way. */
14280 static std::pair<rtx_insn*, rtx_insn*>
14281 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14282 {
14283 if (dump_file)
14284 {
14285 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14286 print_rtl_single (dump_file, i);
14287 fprintf (dump_file, "\n");
14288 }
14289
14290 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
14291
14292 if (seq == NULL)
14293 return std::make_pair (i, i);
14294
14295 /* Avoid infinite splitter loops if any insn of the result matches
14296 the original pattern. */
14297 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14298 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14299 return std::make_pair (i, i);
14300
14301 unshare_all_rtl_in_chain (seq);
14302
14303 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14304 a linked list, replace the single insn with the new insns. */
14305 rtx_insn* seqlast = seq;
14306 while (NEXT_INSN (seqlast) != NULL)
14307 seqlast = NEXT_INSN (seqlast);
14308
14309 if (rtx_insn* iprev = PREV_INSN (i))
14310 SET_NEXT_INSN (iprev) = seq;
14311 if (rtx_insn* inext = NEXT_INSN (i))
14312 SET_PREV_INSN (inext) = seqlast;
14313
14314 SET_PREV_INSN (seq) = PREV_INSN (i);
14315 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14316
14317 SET_PREV_INSN (i) = NULL;
14318 SET_NEXT_INSN (i) = NULL;
14319
14320 /* Recursively split all insns. */
14321 for (i = seq; ; i = NEXT_INSN (i))
14322 {
14323 std::pair<rtx_insn*, rtx_insn*> ii =
14324 sh_try_split_insn_simple (i, curr_insn, n + 1);
14325 if (i == seq)
14326 seq = ii.first;
14327 if (i == seqlast)
14328 {
14329 seqlast = ii.second;
14330 break;
14331 }
14332 i = ii.first;
14333 }
14334
14335 return std::make_pair (seq, seqlast);
14336 }
14337
14338 sh_treg_insns
14339 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14340 {
14341 if (t_reg_operand (x, VOIDmode))
14342 return sh_treg_insns ();
14343
14344 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14345
14346 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14347 SET_PREV_INSN (i) = NULL;
14348 SET_NEXT_INSN (i) = NULL;
14349
14350 if (dump_file)
14351 {
14352 fprintf (dump_file, "split_treg_set_expr insn:\n");
14353 print_rtl (dump_file, i);
14354 fprintf (dump_file, "\n");
14355 }
14356
14357 /* If the insn is not found, we will try a negated form and append
14358 a nott. */
14359 bool append_nott = false;
14360
14361 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14362 have to capture its current state and restore it afterwards. */
14363 recog_data_d prev_recog_data = recog_data;
14364
14365 if (negt_reg_operand (x, GET_MODE (x)))
14366 {
14367 /* This is a normal movt followed by a nott. It will be converted
14368 into a movrt after initial expansion. */
14369 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
14370 append_nott = true;
14371 }
14372 else
14373 {
14374 /* If the comparison op doesn't have a mode set, set it to SImode. */
14375 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
14376 PUT_MODE (x, SImode);
14377
14378 int insn_code = recog (PATTERN (i), i, 0);
14379
14380 if (insn_code < 0 && COMPARISON_P (x))
14381 {
14382 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
14383 if (cmp_mode == VOIDmode)
14384 cmp_mode = GET_MODE (XEXP (x, 1));
14385
14386 PUT_CODE (x, reverse_condition (GET_CODE (x)));
14387 insn_code = recog (PATTERN (i), i, 0);
14388 append_nott = true;
14389 }
14390
14391 gcc_assert (insn_code >= 0);
14392 }
14393
14394 /* Try to recursively split the insn. Some insns might refuse to split
14395 any further while we are in the treg_set_expr splitting phase. They
14396 will be emitted as part of the outer insn and then split again. */
14397 std::pair<rtx_insn*, rtx_insn*> insnlist =
14398 sh_try_split_insn_simple (i, curr_insn);
14399
14400 /* Restore recog state. */
14401 recog_data = prev_recog_data;
14402
14403 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14404 ? insnlist.second
14405 : NULL;
14406 if (dump_file)
14407 {
14408 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14409 print_rtl (dump_file, insnlist.first);
14410 fprintf (dump_file, "\n");
14411
14412 if (nott_insn != NULL)
14413 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14414 }
14415
14416 emit_insn (insnlist.first);
14417
14418 if (nott_insn != NULL && append_nott)
14419 {
14420 if (dump_file)
14421 fprintf (dump_file, "removing trailing nott\n");
14422 remove_insn (nott_insn);
14423 nott_insn = NULL;
14424 append_nott = false;
14425 }
14426
14427 if (append_nott)
14428 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14429
14430 rtx_insn* first_insn = get_insns ();
14431
14432 if (dump_file)
14433 {
14434 fprintf (dump_file, "resulting insns:\n");
14435 print_rtl (dump_file, first_insn);
14436 fprintf (dump_file, "\n");
14437 }
14438
14439 return sh_treg_insns (first_insn, nott_insn);
14440 }
14441
14442 /*------------------------------------------------------------------------------
14443 Mode switching support code.
14444 */
14445
14446 static void
14447 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14448 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14449 {
14450 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14451 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14452 {
14453 emit_insn (gen_toggle_pr ());
14454 if (TARGET_FMOVD)
14455 emit_insn (gen_toggle_sz ());
14456 }
14457 else if (mode != FP_MODE_NONE)
14458 {
14459 rtx tmp = gen_reg_rtx (SImode);
14460 emit_insn (gen_sts_fpscr (tmp));
14461 rtx i = NULL;
14462
14463 const unsigned HOST_WIDE_INT fpbits =
14464 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14465
14466 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14467 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14468 else if (mode == FP_MODE_SINGLE)
14469 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14470 else if (mode == FP_MODE_DOUBLE)
14471 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14472 else
14473 gcc_unreachable ();
14474
14475 emit_insn (i);
14476 emit_insn (gen_lds_fpscr (tmp));
14477 }
14478 }
14479
14480 static int
14481 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14482 {
14483 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14484 }
14485
14486 static int
14487 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14488 {
14489 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14490 get_attr_fp_set (insn) != FP_SET_NONE)
14491 return (int) get_attr_fp_set (insn);
14492 else
14493 return mode;
14494 }
14495
14496 static int
14497 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14498 {
14499 return NORMAL_MODE (entity);
14500 }
14501
14502 static int
14503 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14504 {
14505 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14506 }
14507
14508 static int
14509 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14510 {
14511 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14512 }
14513
14514 /*------------------------------------------------------------------------------
14515 Misc
14516 */
14517
14518 /* Return true if we use LRA instead of reload pass. */
14519 bool
14520 sh_lra_p (void)
14521 {
14522 return sh_lra_flag;
14523 }
14524
14525 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14526
14527 static bool
14528 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14529 unsigned int align,
14530 enum by_pieces_operation op,
14531 bool speed_p)
14532 {
14533 switch (op)
14534 {
14535 case MOVE_BY_PIECES:
14536 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14537 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14538 case STORE_BY_PIECES:
14539 case SET_BY_PIECES:
14540 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14541 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14542 default:
14543 return default_use_by_pieces_infrastructure_p (size, align,
14544 op, speed_p);
14545 }
14546 }
14547
14548 #include "gt-sh.h"