]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
re PR target/67506 ([SH]: error: unrecognizable insn when compiling texlive-binaries)
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23 #include <vector>
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "cfghooks.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "rtl.h"
33 #include "df.h"
34 #include "insn-config.h"
35 #include "alias.h"
36 #include "fold-const.h"
37 #include "stringpool.h"
38 #include "stor-layout.h"
39 #include "calls.h"
40 #include "varasm.h"
41 #include "flags.h"
42 #include "expmed.h"
43 #include "dojump.h"
44 #include "explow.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "reload.h"
51 #include "regs.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "dwarf2.h"
57 #include "tm_p.h"
58 #include "target.h"
59 #include "langhooks.h"
60 #include "cfgrtl.h"
61 #include "cfganal.h"
62 #include "lcm.h"
63 #include "cfgbuild.h"
64 #include "cfgcleanup.h"
65 #include "intl.h"
66 #include "sched-int.h"
67 #include "params.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
70 #include "tree-eh.h"
71 #include "gimplify.h"
72 #include "cfgloop.h"
73 #include "alloc-pool.h"
74 #include "tm-constrs.h"
75 #include "opts.h"
76 #include "tree-pass.h"
77 #include "pass_manager.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
86
87 /* These are some macros to abstract register modes. */
88 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
89 && ((HOST_WIDE_INT)(VALUE)) <= 511)
90
91 #define CONST_OK_FOR_ADD(size) \
92 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
93 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
94 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
95 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
96
97 /* Used to simplify the logic below. Find the attributes wherever
98 they may be. */
99 #define SH_ATTRIBUTES(decl) \
100 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
101 : DECL_ATTRIBUTES (decl) \
102 ? (DECL_ATTRIBUTES (decl)) \
103 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
104
105 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
106 int current_function_interrupt;
107
108 tree sh_deferred_function_attributes;
109 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
110
111 /* Global variables for machine-dependent things. */
112
113 /* Which cpu are we scheduling for. */
114 enum processor_type sh_cpu;
115
116 /* Definitions used in ready queue reordering for first scheduling pass. */
117
118 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
119 static short *regmode_weight[2];
120
121 /* Total SFmode and SImode weights of scheduled insns. */
122 static int curr_regmode_pressure[2];
123
124 /* Number of r0 life regions. */
125 static int r0_life_regions;
126
127 /* If true, skip cycles for Q -> R movement. */
128 static int skip_cycles = 0;
129
130 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
131 and returned from sh_reorder2. */
132 static short cached_can_issue_more;
133
134 /* Unique number for UNSPEC_BBR pattern. */
135 static unsigned int unspec_bbr_uid = 1;
136
137 /* Provides the class number of the smallest class containing
138 reg number. */
139 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
140 {
141 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
151 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
152 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
153 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
154 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
155 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
156 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
157 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
167 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
168 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
169 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
170 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
171 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
172 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
173 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
174 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
175 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
176 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
177 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
178 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
179 GENERAL_REGS, GENERAL_REGS,
180 };
181
182 char sh_register_names[FIRST_PSEUDO_REGISTER] \
183 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
184
185 char sh_additional_register_names[ADDREGNAMES_SIZE] \
186 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
187 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
188
189 int assembler_dialect;
190
191 static bool shmedia_space_reserved_for_target_registers;
192
193 static void split_branches (rtx_insn *);
194 static int branch_dest (rtx);
195 static void print_slot (rtx_sequence *);
196 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
197 static void dump_table (rtx_insn *, rtx_insn *);
198 static bool broken_move (rtx_insn *);
199 static bool mova_p (rtx_insn *);
200 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
201 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
202 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
203 static void sh_reorg (void);
204 static void sh_option_override (void);
205 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
206 static rtx_insn *frame_insn (rtx);
207 static rtx push (int);
208 static void pop (int);
209 static void push_regs (HARD_REG_SET *, int);
210 static int calc_live_regs (HARD_REG_SET *);
211 static HOST_WIDE_INT rounded_frame_size (int);
212 static bool sh_frame_pointer_required (void);
213 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
214 static int sh_mode_needed (int, rtx_insn *);
215 static int sh_mode_after (int, int, rtx_insn *);
216 static int sh_mode_entry (int);
217 static int sh_mode_exit (int);
218 static int sh_mode_priority (int entity, int n);
219 static bool sh_lra_p (void);
220
221 static rtx mark_constant_pool_use (rtx);
222 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
223 int, bool *);
224 static tree sh_handle_resbank_handler_attribute (tree *, tree,
225 tree, int, bool *);
226 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
227 tree, int, bool *);
228 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
229 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
230 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
231 static void sh_print_operand (FILE *, rtx, int);
232 static void sh_print_operand_address (FILE *, rtx);
233 static bool sh_print_operand_punct_valid_p (unsigned char code);
234 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
235 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
236 static void sh_insert_attributes (tree, tree *);
237 static const char *sh_check_pch_target_flags (int);
238 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
239 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
240 static int sh_issue_rate (void);
241 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
242 static short find_set_regmode_weight (rtx, machine_mode);
243 static short find_insn_regmode_weight (rtx, machine_mode);
244 static void find_regmode_weight (basic_block, machine_mode);
245 static int find_r0_life_regions (basic_block);
246 static void sh_md_init_global (FILE *, int, int);
247 static void sh_md_finish_global (FILE *, int);
248 static int rank_for_reorder (const void *, const void *);
249 static void swap_reorder (rtx_insn **, int);
250 static void ready_reorder (rtx_insn **, int);
251 static bool high_pressure (machine_mode);
252 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
253 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
254 static void sh_md_init (FILE *, int, int);
255 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
256
257 static bool sh_function_ok_for_sibcall (tree, tree);
258
259 static bool sh_cannot_modify_jumps_p (void);
260 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
261 static reg_class_t sh_target_reg_class (void);
262 static bool sh_optimize_target_register_callee_saved (bool);
263 static bool sh_ms_bitfield_layout_p (const_tree);
264
265 static void sh_init_builtins (void);
266 static tree sh_builtin_decl (unsigned, bool);
267 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
268 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
269 HOST_WIDE_INT, tree);
270 static void sh_file_start (void);
271 static bool flow_dependent_p (rtx, rtx);
272 static void flow_dependent_p_1 (rtx, const_rtx, void *);
273 static int shiftcosts (rtx);
274 static int and_xor_ior_costs (rtx, int);
275 static int addsubcosts (rtx);
276 static int multcosts (rtx);
277 static bool unspec_caller_rtx_p (rtx);
278 static bool sh_cannot_copy_insn_p (rtx_insn *);
279 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
280 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
281 static int sh_pr_n_sets (void);
282 static rtx sh_allocate_initial_value (rtx);
283 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
284 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
285 machine_mode,
286 struct secondary_reload_info *);
287 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
288 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
289 static rtx sh_delegitimize_address (rtx);
290 static bool sh_cannot_substitute_mem_equiv_p (rtx);
291 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
292 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
293 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
294 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
295 static int scavenge_reg (HARD_REG_SET *s);
296 struct save_schedule_s;
297 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
298 struct save_schedule_s *, int);
299
300 static rtx sh_struct_value_rtx (tree, int);
301 static rtx sh_function_value (const_tree, const_tree, bool);
302 static bool sh_function_value_regno_p (const unsigned int);
303 static rtx sh_libcall_value (machine_mode, const_rtx);
304 static bool sh_return_in_memory (const_tree, const_tree);
305 static rtx sh_builtin_saveregs (void);
306 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
307 tree, int *, int);
308 static bool sh_strict_argument_naming (cumulative_args_t);
309 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
310 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
311 static tree sh_build_builtin_va_list (void);
312 static void sh_va_start (tree, rtx);
313 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
314 static bool sh_promote_prototypes (const_tree);
315 static machine_mode sh_promote_function_mode (const_tree type,
316 machine_mode,
317 int *punsignedp,
318 const_tree funtype,
319 int for_return);
320 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
321 const_tree, bool);
322 static bool sh_callee_copies (cumulative_args_t, machine_mode,
323 const_tree, bool);
324 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
325 tree, bool);
326 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
327 const_tree, bool);
328 static rtx sh_function_arg (cumulative_args_t, machine_mode,
329 const_tree, bool);
330 static bool sh_scalar_mode_supported_p (machine_mode);
331 static int sh_dwarf_calling_convention (const_tree);
332 static void sh_encode_section_info (tree, rtx, int);
333 static bool sh2a_function_vector_p (tree);
334 static void sh_trampoline_init (rtx, tree, rtx);
335 static rtx sh_trampoline_adjust_address (rtx);
336 static void sh_conditional_register_usage (void);
337 static bool sh_legitimate_constant_p (machine_mode, rtx);
338 static int mov_insn_size (machine_mode, bool);
339 static int mov_insn_alignment_mask (machine_mode, bool);
340 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
341 unsigned int,
342 enum by_pieces_operation,
343 bool);
344 static bool sequence_insn_p (rtx_insn *);
345 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
346 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
347 machine_mode, bool);
348 static bool sh_legitimate_combined_insn (rtx_insn* insn);
349
350 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
351
352 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
353 \f
354 static const struct attribute_spec sh_attribute_table[] =
355 {
356 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
357 affects_type_identity } */
358 { "interrupt_handler", 0, 0, true, false, false,
359 sh_handle_interrupt_handler_attribute, false },
360 { "sp_switch", 1, 1, true, false, false,
361 sh_handle_sp_switch_attribute, false },
362 { "trap_exit", 1, 1, true, false, false,
363 sh_handle_trap_exit_attribute, false },
364 { "renesas", 0, 0, false, true, false,
365 sh_handle_renesas_attribute, false },
366 { "trapa_handler", 0, 0, true, false, false,
367 sh_handle_interrupt_handler_attribute, false },
368 { "nosave_low_regs", 0, 0, true, false, false,
369 sh_handle_interrupt_handler_attribute, false },
370 { "resbank", 0, 0, true, false, false,
371 sh_handle_resbank_handler_attribute, false },
372 { "function_vector", 1, 1, true, false, false,
373 sh2a_handle_function_vector_handler_attribute, false },
374 { NULL, 0, 0, false, false, false, NULL, false }
375 };
376 \f
377 /* Initialize the GCC target structure. */
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
380
381 /* The next two are used for debug info when compiling with -gdwarf. */
382 #undef TARGET_ASM_UNALIGNED_HI_OP
383 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
384 #undef TARGET_ASM_UNALIGNED_SI_OP
385 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
386
387 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
388 #undef TARGET_ASM_UNALIGNED_DI_OP
389 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
390 #undef TARGET_ASM_ALIGNED_DI_OP
391 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
392
393 #undef TARGET_OPTION_OVERRIDE
394 #define TARGET_OPTION_OVERRIDE sh_option_override
395
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND sh_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
404
405 #undef TARGET_ASM_FUNCTION_EPILOGUE
406 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
407
408 #undef TARGET_ASM_OUTPUT_MI_THUNK
409 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
410
411 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
412 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
413 hook_bool_const_tree_hwi_hwi_const_tree_true
414
415 #undef TARGET_ASM_FILE_START
416 #define TARGET_ASM_FILE_START sh_file_start
417 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
418 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
419
420 #undef TARGET_REGISTER_MOVE_COST
421 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
422
423 #undef TARGET_INSERT_ATTRIBUTES
424 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
425
426 #undef TARGET_SCHED_ADJUST_COST
427 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
428
429 #undef TARGET_SCHED_ISSUE_RATE
430 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
431
432 /* The next 5 hooks have been implemented for reenabling sched1. With the
433 help of these macros we are limiting the movement of insns in sched1 to
434 reduce the register pressure. The overall idea is to keep count of SImode
435 and SFmode regs required by already scheduled insns. When these counts
436 cross some threshold values; give priority to insns that free registers.
437 The insn that frees registers is most likely to be the insn with lowest
438 LUID (original insn order); but such an insn might be there in the stalled
439 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
440 up to a max of 8 cycles so that such insns may move from Q -> R.
441
442 The description of the hooks are as below:
443
444 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
445 scheduler; it is called inside the sched_init function just after
446 find_insn_reg_weights function call. It is used to calculate the SImode
447 and SFmode weights of insns of basic blocks; much similar to what
448 find_insn_reg_weights does.
449 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
450
451 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
452 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
453 (Q)->(R).
454
455 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
456 high; reorder the ready queue so that the insn with lowest LUID will be
457 issued next.
458
459 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
460 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
461
462 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
463 can be returned from TARGET_SCHED_REORDER2.
464
465 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
466
467 #undef TARGET_SCHED_DFA_NEW_CYCLE
468 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
469
470 #undef TARGET_SCHED_INIT_GLOBAL
471 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
472
473 #undef TARGET_SCHED_FINISH_GLOBAL
474 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
475
476 #undef TARGET_SCHED_VARIABLE_ISSUE
477 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
478
479 #undef TARGET_SCHED_REORDER
480 #define TARGET_SCHED_REORDER sh_reorder
481
482 #undef TARGET_SCHED_REORDER2
483 #define TARGET_SCHED_REORDER2 sh_reorder2
484
485 #undef TARGET_SCHED_INIT
486 #define TARGET_SCHED_INIT sh_md_init
487
488 #undef TARGET_DELEGITIMIZE_ADDRESS
489 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
490
491 #undef TARGET_LEGITIMIZE_ADDRESS
492 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
493
494 #undef TARGET_CANNOT_MODIFY_JUMPS_P
495 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
496 #undef TARGET_CAN_FOLLOW_JUMP
497 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
498 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
499 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
500 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
501 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
502 sh_optimize_target_register_callee_saved
503
504 #undef TARGET_MS_BITFIELD_LAYOUT_P
505 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
506
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS sh_init_builtins
509 #undef TARGET_BUILTIN_DECL
510 #define TARGET_BUILTIN_DECL sh_builtin_decl
511 #undef TARGET_EXPAND_BUILTIN
512 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
513
514 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
515 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
516
517 #undef TARGET_CANNOT_COPY_INSN_P
518 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
519 #undef TARGET_RTX_COSTS
520 #define TARGET_RTX_COSTS sh_rtx_costs
521 #undef TARGET_ADDRESS_COST
522 #define TARGET_ADDRESS_COST sh_address_cost
523 #undef TARGET_ALLOCATE_INITIAL_VALUE
524 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
528
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
531
532 #ifdef HAVE_AS_TLS
533 #undef TARGET_HAVE_TLS
534 #define TARGET_HAVE_TLS true
535 #endif
536
537 #undef TARGET_PROMOTE_PROTOTYPES
538 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
541
542 #undef TARGET_FUNCTION_VALUE
543 #define TARGET_FUNCTION_VALUE sh_function_value
544 #undef TARGET_FUNCTION_VALUE_REGNO_P
545 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
546 #undef TARGET_LIBCALL_VALUE
547 #define TARGET_LIBCALL_VALUE sh_libcall_value
548 #undef TARGET_STRUCT_VALUE_RTX
549 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
550 #undef TARGET_RETURN_IN_MEMORY
551 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
552
553 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
554 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
555 #undef TARGET_SETUP_INCOMING_VARARGS
556 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
557 #undef TARGET_STRICT_ARGUMENT_NAMING
558 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
559 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
560 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
561 #undef TARGET_MUST_PASS_IN_STACK
562 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
565 #undef TARGET_CALLEE_COPIES
566 #define TARGET_CALLEE_COPIES sh_callee_copies
567 #undef TARGET_ARG_PARTIAL_BYTES
568 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
569 #undef TARGET_FUNCTION_ARG
570 #define TARGET_FUNCTION_ARG sh_function_arg
571 #undef TARGET_FUNCTION_ARG_ADVANCE
572 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
573
574 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
575 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
576
577 #undef TARGET_BUILD_BUILTIN_VA_LIST
578 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
579 #undef TARGET_EXPAND_BUILTIN_VA_START
580 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
581 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
582 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
583
584 #undef TARGET_SCALAR_MODE_SUPPORTED_P
585 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
586 #undef TARGET_VECTOR_MODE_SUPPORTED_P
587 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
588
589 #undef TARGET_CHECK_PCH_TARGET_FLAGS
590 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
591
592 #undef TARGET_DWARF_CALLING_CONVENTION
593 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
594
595 #undef TARGET_FRAME_POINTER_REQUIRED
596 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
597
598 #undef TARGET_MODE_EMIT
599 #define TARGET_MODE_EMIT sh_emit_mode_set
600
601 #undef TARGET_MODE_NEEDED
602 #define TARGET_MODE_NEEDED sh_mode_needed
603
604 #undef TARGET_MODE_AFTER
605 #define TARGET_MODE_AFTER sh_mode_after
606
607 #undef TARGET_MODE_ENTRY
608 #define TARGET_MODE_ENTRY sh_mode_entry
609
610 #undef TARGET_MODE_EXIT
611 #define TARGET_MODE_EXIT sh_mode_exit
612
613 #undef TARGET_MODE_PRIORITY
614 #define TARGET_MODE_PRIORITY sh_mode_priority
615
616 /* Return regmode weight for insn. */
617 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
618 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
619
620 /* Return current register pressure for regmode. */
621 #define CURR_REGMODE_PRESSURE(MODE)\
622 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
623
624 #undef TARGET_ENCODE_SECTION_INFO
625 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
626
627 #undef TARGET_LRA_P
628 #define TARGET_LRA_P sh_lra_p
629
630 #undef TARGET_SECONDARY_RELOAD
631 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
632
633 #undef TARGET_PREFERRED_RELOAD_CLASS
634 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
635
636 #undef TARGET_CONDITIONAL_REGISTER_USAGE
637 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
638
639 #undef TARGET_LEGITIMATE_ADDRESS_P
640 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
641
642 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
643 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
644
645 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
646 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
647 sh_legitimize_address_displacement
648
649 #undef TARGET_TRAMPOLINE_INIT
650 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
651 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
652 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
653
654 #undef TARGET_LEGITIMATE_CONSTANT_P
655 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
656
657 #undef TARGET_CANONICALIZE_COMPARISON
658 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
659
660 #undef TARGET_LEGITIMATE_COMBINED_INSN
661 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
662
663 #undef TARGET_FIXED_CONDITION_CODE_REGS
664 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
665
666 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
667 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
668 sh_use_by_pieces_infrastructure_p
669
670 /* Machine-specific symbol_ref flags. */
671 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
672
673 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
674 is used by optabs.c atomic op expansion code as well as in sync.md. */
675 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
676 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
677
678 struct gcc_target targetm = TARGET_INITIALIZER;
679 \f
680
681 /* Information on the currently selected atomic model.
682 This is initialized in sh_option_override. */
683 static sh_atomic_model selected_atomic_model_;
684
685 const sh_atomic_model&
686 selected_atomic_model (void)
687 {
688 return selected_atomic_model_;
689 }
690
691 static sh_atomic_model
692 parse_validate_atomic_model_option (const char* str)
693 {
694 const char* model_names[sh_atomic_model::num_models];
695 model_names[sh_atomic_model::none] = "none";
696 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
697 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
698 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
699 model_names[sh_atomic_model::soft_imask] = "soft-imask";
700
701 const char* model_cdef_names[sh_atomic_model::num_models];
702 model_cdef_names[sh_atomic_model::none] = "NONE";
703 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
704 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
705 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
706 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
707
708 sh_atomic_model ret;
709 ret.type = sh_atomic_model::none;
710 ret.name = model_names[sh_atomic_model::none];
711 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
712 ret.strict = false;
713 ret.tcb_gbr_offset = -1;
714
715 /* Handle empty string as 'none'. */
716 if (str == NULL || *str == '\0')
717 return ret;
718
719 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
720
721 std::vector<std::string> tokens;
722 for (std::stringstream ss (str); ss.good (); )
723 {
724 tokens.push_back (std::string ());
725 std::getline (ss, tokens.back (), ',');
726 }
727
728 if (tokens.empty ())
729 err_ret ("invalid atomic model option");
730
731 /* The first token must be the atomic model name. */
732 {
733 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
734 if (tokens.front () == model_names[i])
735 {
736 ret.type = (sh_atomic_model::enum_type)i;
737 ret.name = model_names[i];
738 ret.cdef_name = model_cdef_names[i];
739 goto got_mode_name;
740 }
741
742 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
743 got_mode_name:;
744 }
745
746 /* Go through the remaining tokens. */
747 for (size_t i = 1; i < tokens.size (); ++i)
748 {
749 if (tokens[i] == "strict")
750 ret.strict = true;
751 else if (tokens[i].find ("gbr-offset=") == 0)
752 {
753 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
754 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
755 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
756 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
757 "option", offset_str.c_str ());
758 }
759 else
760 err_ret ("unknown parameter \"%s\" in atomic model option",
761 tokens[i].c_str ());
762 }
763
764 /* Check that the selection makes sense. */
765 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
766 err_ret ("atomic operations are not supported on SHmedia");
767
768 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
769 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
770 ret.name);
771
772 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
773 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
774
775 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
776 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
777
778 if (ret.type == sh_atomic_model::soft_tcb
779 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
780 || (ret.tcb_gbr_offset & 3) != 0))
781 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
782 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
783 ret.name);
784
785 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
786 err_ret ("cannot use atomic model %s in user mode", ret.name);
787
788 return ret;
789
790 #undef err_ret
791 }
792
793 /* Register SH specific RTL passes. */
794 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
795 const char* name);
796 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
797 const char* name);
798 static void
799 register_sh_passes (void)
800 {
801 if (!TARGET_SH1)
802 return;
803
804 /* Running the sh_treg_combine pass after ce1 generates better code when
805 comparisons are combined and reg-reg moves are introduced, because
806 reg-reg moves will be eliminated afterwards. However, there are quite
807 some cases where combine will be unable to fold comparison related insns,
808 thus for now don't do it.
809 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
810 PASS_POS_INSERT_AFTER, "ce1", 1);
811 */
812
813 /* Run sh_treg_combine pass after combine but before register allocation. */
814 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
815 PASS_POS_INSERT_AFTER, "split1", 1);
816
817 /* Run sh_treg_combine pass after register allocation and basic block
818 reordering as this sometimes creates new opportunities. */
819 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
820 PASS_POS_INSERT_AFTER, "split4", 1);
821
822 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
823 is known after a conditional branch.
824 This must be done after basic blocks and branch conditions have
825 stabilized and won't be changed by further passes. */
826 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
827 PASS_POS_INSERT_BEFORE, "sched2", 1);
828 }
829
830 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
831 various options, and do some machine dependent initialization. */
832 static void
833 sh_option_override (void)
834 {
835 int regno;
836
837 SUBTARGET_OVERRIDE_OPTIONS;
838 if (optimize > 1 && !optimize_size)
839 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
840
841 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
842 TARGET_CBRANCHDI4 = 1;
843 TARGET_CMPEQDI_T = 0;
844
845 sh_cpu = PROCESSOR_SH1;
846 assembler_dialect = 0;
847 if (TARGET_SH2)
848 sh_cpu = PROCESSOR_SH2;
849 if (TARGET_SH2E)
850 sh_cpu = PROCESSOR_SH2E;
851 if (TARGET_SH2A)
852 sh_cpu = PROCESSOR_SH2A;
853 if (TARGET_SH3)
854 sh_cpu = PROCESSOR_SH3;
855 if (TARGET_SH3E)
856 sh_cpu = PROCESSOR_SH3E;
857 if (TARGET_SH4)
858 {
859 assembler_dialect = 1;
860 sh_cpu = PROCESSOR_SH4;
861 }
862 if (TARGET_SH4A)
863 {
864 assembler_dialect = 1;
865 sh_cpu = PROCESSOR_SH4A;
866 }
867 if (TARGET_SH5)
868 {
869 sh_cpu = PROCESSOR_SH5;
870 target_flags |= MASK_ALIGN_DOUBLE;
871 if (TARGET_SHMEDIA_FPU)
872 target_flags |= MASK_FMOVD;
873 if (TARGET_SHMEDIA)
874 {
875 /* There are no delay slots on SHmedia. */
876 flag_delayed_branch = 0;
877 /* Relaxation isn't yet supported for SHmedia */
878 target_flags &= ~MASK_RELAX;
879 /* After reload, if conversion does little good but can cause
880 ICEs:
881 - find_if_block doesn't do anything for SH because we don't
882 have conditional execution patterns. (We use conditional
883 move patterns, which are handled differently, and only
884 before reload).
885 - find_cond_trap doesn't do anything for the SH because we
886 don't have conditional traps.
887 - find_if_case_1 uses redirect_edge_and_branch_force in
888 the only path that does an optimization, and this causes
889 an ICE when branch targets are in registers.
890 - find_if_case_2 doesn't do anything for the SHmedia after
891 reload except when it can redirect a tablejump - and
892 that's rather rare. */
893 flag_if_conversion2 = 0;
894 if (! strcmp (sh_div_str, "call"))
895 sh_div_strategy = SH_DIV_CALL;
896 else if (! strcmp (sh_div_str, "call2"))
897 sh_div_strategy = SH_DIV_CALL2;
898 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
899 sh_div_strategy = SH_DIV_FP;
900 else if (! strcmp (sh_div_str, "inv"))
901 sh_div_strategy = SH_DIV_INV;
902 else if (! strcmp (sh_div_str, "inv:minlat"))
903 sh_div_strategy = SH_DIV_INV_MINLAT;
904 else if (! strcmp (sh_div_str, "inv20u"))
905 sh_div_strategy = SH_DIV_INV20U;
906 else if (! strcmp (sh_div_str, "inv20l"))
907 sh_div_strategy = SH_DIV_INV20L;
908 else if (! strcmp (sh_div_str, "inv:call2"))
909 sh_div_strategy = SH_DIV_INV_CALL2;
910 else if (! strcmp (sh_div_str, "inv:call"))
911 sh_div_strategy = SH_DIV_INV_CALL;
912 else if (! strcmp (sh_div_str, "inv:fp"))
913 {
914 if (TARGET_FPU_ANY)
915 sh_div_strategy = SH_DIV_INV_FP;
916 else
917 sh_div_strategy = SH_DIV_INV;
918 }
919 TARGET_CBRANCHDI4 = 0;
920 /* Assembler CFI isn't yet fully supported for SHmedia. */
921 flag_dwarf2_cfi_asm = 0;
922 }
923 }
924 else
925 {
926 /* Only the sh64-elf assembler fully supports .quad properly. */
927 targetm.asm_out.aligned_op.di = NULL;
928 targetm.asm_out.unaligned_op.di = NULL;
929 }
930
931 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
932 Disable it for everything else. */
933 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
934 TARGET_USERMODE = false;
935
936 if (TARGET_SH1)
937 {
938 if (! strcmp (sh_div_str, "call-div1"))
939 sh_div_strategy = SH_DIV_CALL_DIV1;
940 else if (! strcmp (sh_div_str, "call-fp")
941 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
942 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
943 sh_div_strategy = SH_DIV_CALL_FP;
944 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
945 sh_div_strategy = SH_DIV_CALL_TABLE;
946 else
947 /* Pick one that makes most sense for the target in general.
948 It is not much good to use different functions depending
949 on -Os, since then we'll end up with two different functions
950 when some of the code is compiled for size, and some for
951 speed. */
952
953 /* SH4 tends to emphasize speed. */
954 if (TARGET_HARD_SH4)
955 sh_div_strategy = SH_DIV_CALL_TABLE;
956 /* These have their own way of doing things. */
957 else if (TARGET_SH2A)
958 sh_div_strategy = SH_DIV_INTRINSIC;
959 /* ??? Should we use the integer SHmedia function instead? */
960 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
961 sh_div_strategy = SH_DIV_CALL_FP;
962 /* SH1 .. SH3 cores often go into small-footprint systems, so
963 default to the smallest implementation available. */
964 else
965 sh_div_strategy = SH_DIV_CALL_DIV1;
966 }
967 if (!TARGET_SH1)
968 TARGET_PRETEND_CMOVE = 0;
969 if (sh_divsi3_libfunc[0])
970 ; /* User supplied - leave it alone. */
971 else if (TARGET_DIVIDE_CALL_FP)
972 sh_divsi3_libfunc = "__sdivsi3_i4";
973 else if (TARGET_DIVIDE_CALL_TABLE)
974 sh_divsi3_libfunc = "__sdivsi3_i4i";
975 else if (TARGET_SH5)
976 sh_divsi3_libfunc = "__sdivsi3_1";
977 else
978 sh_divsi3_libfunc = "__sdivsi3";
979
980 if (sh_branch_cost == -1)
981 {
982 /* The SH1 does not have delay slots, hence we get a pipeline stall
983 at every branch. The SH4 is superscalar, so the single delay slot
984 is not sufficient to keep both pipelines filled.
985 In any case, set the default branch cost to '2', as it results in
986 slightly overall smaller code and also enables some if conversions
987 that are required for matching special T bit related insns. */
988 sh_branch_cost = 2;
989 }
990
991 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
992 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
993 TARGET_ZDCBRANCH = 1;
994
995 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
996 if (! VALID_REGISTER_P (regno))
997 sh_register_names[regno][0] = '\0';
998
999 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
1000 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
1001 sh_additional_register_names[regno][0] = '\0';
1002
1003 if ((flag_pic && ! TARGET_PREFERGOT)
1004 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
1005 flag_no_function_cse = 1;
1006
1007 if (targetm.small_register_classes_for_mode_p (VOIDmode))
1008 {
1009 /* Never run scheduling before reload, since that can
1010 break global alloc, and generates slower code anyway due
1011 to the pressure on R0. */
1012 /* Enable sched1 for SH4 if the user explicitly requests.
1013 When sched1 is enabled, the ready queue will be reordered by
1014 the target hooks if pressure is high. We can not do this for
1015 PIC, SH3 and lower as they give spill failures for R0. */
1016 if (!TARGET_HARD_SH4 || flag_pic)
1017 flag_schedule_insns = 0;
1018 /* ??? Current exception handling places basic block boundaries
1019 after call_insns. It causes the high pressure on R0 and gives
1020 spill failures for R0 in reload. See PR 22553 and the thread
1021 on gcc-patches
1022 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
1023 else if (flag_exceptions)
1024 {
1025 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
1026 warning (0, "ignoring -fschedule-insns because of exception "
1027 "handling bug");
1028 flag_schedule_insns = 0;
1029 }
1030 else if (flag_schedule_insns
1031 && !global_options_set.x_flag_schedule_insns)
1032 flag_schedule_insns = 0;
1033 }
1034
1035 /* Unwind info is not correct around the CFG unless either a frame
1036 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1037 unwind info generation to be aware of the CFG and propagating states
1038 around edges. */
1039 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1040 || flag_exceptions || flag_non_call_exceptions)
1041 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1042 {
1043 warning (0, "unwind tables currently require either a frame pointer "
1044 "or -maccumulate-outgoing-args for correctness");
1045 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1046 }
1047
1048 /* Adjust loop, jump and function alignment values (in bytes), if those
1049 were not specified by the user using -falign-loops, -falign-jumps
1050 and -falign-functions options.
1051 32 bit alignment is better for speed, because instructions can be
1052 fetched as a pair from a longword boundary. For size use 16 bit
1053 alignment to get more compact code.
1054 Aligning all jumps increases the code size, even if it might
1055 result in slightly faster code. Thus, it is set to the smallest
1056 alignment possible if not specified by the user. */
1057 if (align_loops == 0)
1058 {
1059 if (TARGET_SH5)
1060 align_loops = 8;
1061 else
1062 align_loops = optimize_size ? 2 : 4;
1063 }
1064
1065 if (align_jumps == 0)
1066 {
1067 if (TARGET_SHMEDIA)
1068 align_jumps = 1 << CACHE_LOG;
1069 else
1070 align_jumps = 2;
1071 }
1072 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1073 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1074
1075 if (align_functions == 0)
1076 {
1077 if (TARGET_SHMEDIA)
1078 align_functions = optimize_size
1079 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1080 else
1081 align_functions = optimize_size ? 2 : 4;
1082 }
1083
1084 /* The linker relaxation code breaks when a function contains
1085 alignments that are larger than that at the start of a
1086 compilation unit. */
1087 if (TARGET_RELAX)
1088 {
1089 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1090
1091 /* Also take possible .long constants / mova tables into account. */
1092 if (min_align < 4)
1093 min_align = 4;
1094 if (align_functions < min_align)
1095 align_functions = min_align;
1096 }
1097
1098 if (flag_unsafe_math_optimizations)
1099 {
1100 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1101 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1102 TARGET_FSCA = 1;
1103
1104 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1105 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1106 TARGET_FSRRA = 1;
1107 }
1108
1109 /* Allow fsrra insn only if -funsafe-math-optimizations and
1110 -ffinite-math-only is enabled. */
1111 TARGET_FSRRA = TARGET_FSRRA
1112 && flag_unsafe_math_optimizations
1113 && flag_finite_math_only;
1114
1115 /* If the -mieee option was not explicitly set by the user, turn it on
1116 unless -ffinite-math-only was specified. See also PR 33135. */
1117 if (! global_options_set.x_TARGET_IEEE)
1118 TARGET_IEEE = ! flag_finite_math_only;
1119
1120 if (sh_fixed_range_str)
1121 sh_fix_range (sh_fixed_range_str);
1122
1123 /* This target defaults to strict volatile bitfields. */
1124 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1125 flag_strict_volatile_bitfields = 1;
1126
1127 /* Parse atomic model option and make sure it is valid for the current
1128 target CPU. */
1129 selected_atomic_model_
1130 = parse_validate_atomic_model_option (sh_atomic_model_str);
1131
1132 register_sh_passes ();
1133 }
1134 \f
1135 /* Print the operand address in x to the stream. */
1136 static void
1137 sh_print_operand_address (FILE *stream, rtx x)
1138 {
1139 switch (GET_CODE (x))
1140 {
1141 case REG:
1142 case SUBREG:
1143 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1144 break;
1145
1146 case PLUS:
1147 {
1148 rtx base = XEXP (x, 0);
1149 rtx index = XEXP (x, 1);
1150
1151 switch (GET_CODE (index))
1152 {
1153 case CONST_INT:
1154 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1155 reg_names[true_regnum (base)]);
1156 break;
1157
1158 case REG:
1159 case SUBREG:
1160 {
1161 int base_num = true_regnum (base);
1162 int index_num = true_regnum (index);
1163
1164 fprintf (stream, "@(r0,%s)",
1165 reg_names[MAX (base_num, index_num)]);
1166 break;
1167 }
1168
1169 default:
1170 gcc_unreachable ();
1171 }
1172 }
1173 break;
1174
1175 case PRE_DEC:
1176 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1177 break;
1178
1179 case POST_INC:
1180 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1181 break;
1182
1183 default:
1184 x = mark_constant_pool_use (x);
1185 output_addr_const (stream, x);
1186 break;
1187 }
1188 }
1189
1190 /* Print operand x (an rtx) in assembler syntax to file stream
1191 according to modifier code.
1192
1193 '.' print a .s if insn needs delay slot
1194 ',' print LOCAL_LABEL_PREFIX
1195 '@' print trap, rte or rts depending upon pragma interruptness
1196 '#' output a nop if there is nothing to put in the delay slot
1197 ''' print likelihood suffix (/u for unlikely).
1198 '>' print branch target if -fverbose-asm
1199 'O' print a constant without the #
1200 'R' print the LSW of a dp value - changes if in little endian
1201 'S' print the MSW of a dp value - changes if in little endian
1202 'T' print the next word of a dp value - same as 'R' in big endian mode.
1203 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1204 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1205 'N' print 'r63' if the operand is (const_int 0).
1206 'd' print a V2SF reg as dN instead of fpN.
1207 'm' print a pair `base,offset' or `base,index', for LD and ST.
1208 'U' Likewise for {LD,ST}{HI,LO}.
1209 'V' print the position of a single bit set.
1210 'W' print the position of a single bit cleared.
1211 't' print a memory address which is a register.
1212 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1213 'o' output an operator. */
1214 static void
1215 sh_print_operand (FILE *stream, rtx x, int code)
1216 {
1217 int regno;
1218 machine_mode mode;
1219
1220 switch (code)
1221 {
1222 tree trapa_attr;
1223
1224 case '.':
1225 if (final_sequence
1226 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1227 && get_attr_length (final_sequence->insn (1)))
1228 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1229 break;
1230 case ',':
1231 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1232 break;
1233 case '@':
1234 trapa_attr = lookup_attribute ("trap_exit",
1235 DECL_ATTRIBUTES (current_function_decl));
1236 if (trapa_attr)
1237 fprintf (stream, "trapa #%ld",
1238 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1239 else if (sh_cfun_interrupt_handler_p ())
1240 {
1241 if (sh_cfun_resbank_handler_p ())
1242 fprintf (stream, "resbank\n");
1243 fprintf (stream, "rte");
1244 }
1245 else
1246 fprintf (stream, "rts");
1247 break;
1248 case '#':
1249 /* Output a nop if there's nothing in the delay slot. */
1250 if (dbr_sequence_length () == 0)
1251 fprintf (stream, "\n\tnop");
1252 break;
1253 case '\'':
1254 {
1255 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1256
1257 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1258 fputs ("/u", stream);
1259 break;
1260 }
1261 case '>':
1262 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1263 {
1264 fputs ("\t! target: ", stream);
1265 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1266 }
1267 break;
1268 case 'O':
1269 x = mark_constant_pool_use (x);
1270 output_addr_const (stream, x);
1271 break;
1272 /* N.B.: %R / %S / %T adjust memory addresses by four.
1273 For SHMEDIA, that means they can be used to access the first and
1274 second 32 bit part of a 64 bit (or larger) value that
1275 might be held in floating point registers or memory.
1276 While they can be used to access 64 bit parts of a larger value
1277 held in general purpose registers, that won't work with memory -
1278 neither for fp registers, since the frxx names are used. */
1279 case 'R':
1280 if (REG_P (x) || GET_CODE (x) == SUBREG)
1281 {
1282 regno = true_regnum (x);
1283 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1284 fputs (reg_names[regno], (stream));
1285 }
1286 else if (MEM_P (x))
1287 {
1288 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1289 sh_print_operand_address (stream, XEXP (x, 0));
1290 }
1291 else
1292 {
1293 rtx sub = NULL_RTX;
1294
1295 mode = GET_MODE (x);
1296 if (mode == VOIDmode)
1297 mode = DImode;
1298 if (GET_MODE_SIZE (mode) >= 8)
1299 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1300 if (sub)
1301 sh_print_operand (stream, sub, 0);
1302 else
1303 output_operand_lossage ("invalid operand to %%R");
1304 }
1305 break;
1306 case 'S':
1307 if (REG_P (x) || GET_CODE (x) == SUBREG)
1308 {
1309 regno = true_regnum (x);
1310 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1311 fputs (reg_names[regno], (stream));
1312 }
1313 else if (MEM_P (x))
1314 {
1315 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1316 sh_print_operand_address (stream, XEXP (x, 0));
1317 }
1318 else
1319 {
1320 rtx sub = NULL_RTX;
1321
1322 mode = GET_MODE (x);
1323 if (mode == VOIDmode)
1324 mode = DImode;
1325 if (GET_MODE_SIZE (mode) >= 8)
1326 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1327 if (sub)
1328 sh_print_operand (stream, sub, 0);
1329 else
1330 output_operand_lossage ("invalid operand to %%S");
1331 }
1332 break;
1333 case 'T':
1334 /* Next word of a double. */
1335 switch (GET_CODE (x))
1336 {
1337 case REG:
1338 fputs (reg_names[REGNO (x) + 1], (stream));
1339 break;
1340 case MEM:
1341 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1342 && GET_CODE (XEXP (x, 0)) != POST_INC)
1343 x = adjust_address (x, SImode, 4);
1344 sh_print_operand_address (stream, XEXP (x, 0));
1345 break;
1346 default:
1347 break;
1348 }
1349 break;
1350
1351 case 't':
1352 gcc_assert (MEM_P (x));
1353 x = XEXP (x, 0);
1354 switch (GET_CODE (x))
1355 {
1356 case REG:
1357 case SUBREG:
1358 sh_print_operand (stream, x, 0);
1359 break;
1360 default:
1361 break;
1362 }
1363 break;
1364
1365 case 'o':
1366 switch (GET_CODE (x))
1367 {
1368 case PLUS: fputs ("add", stream); break;
1369 case MINUS: fputs ("sub", stream); break;
1370 case MULT: fputs ("mul", stream); break;
1371 case DIV: fputs ("div", stream); break;
1372 case EQ: fputs ("eq", stream); break;
1373 case NE: fputs ("ne", stream); break;
1374 case GT: case LT: fputs ("gt", stream); break;
1375 case GE: case LE: fputs ("ge", stream); break;
1376 case GTU: case LTU: fputs ("gtu", stream); break;
1377 case GEU: case LEU: fputs ("geu", stream); break;
1378 default:
1379 break;
1380 }
1381 break;
1382 case 'M':
1383 if (TARGET_SHMEDIA)
1384 {
1385 if (MEM_P (x)
1386 && GET_CODE (XEXP (x, 0)) == PLUS
1387 && (REG_P (XEXP (XEXP (x, 0), 1))
1388 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1389 fputc ('x', stream);
1390 }
1391 else
1392 {
1393 if (MEM_P (x))
1394 {
1395 switch (GET_MODE (x))
1396 {
1397 case QImode: fputs (".b", stream); break;
1398 case HImode: fputs (".w", stream); break;
1399 case SImode: fputs (".l", stream); break;
1400 case SFmode: fputs (".s", stream); break;
1401 case DFmode: fputs (".d", stream); break;
1402 default: gcc_unreachable ();
1403 }
1404 }
1405 }
1406 break;
1407
1408 case 'm':
1409 gcc_assert (MEM_P (x));
1410 x = XEXP (x, 0);
1411 /* Fall through. */
1412 case 'U':
1413 switch (GET_CODE (x))
1414 {
1415 case REG:
1416 case SUBREG:
1417 sh_print_operand (stream, x, 0);
1418 fputs (", 0", stream);
1419 break;
1420
1421 case PLUS:
1422 sh_print_operand (stream, XEXP (x, 0), 0);
1423 fputs (", ", stream);
1424 sh_print_operand (stream, XEXP (x, 1), 0);
1425 break;
1426
1427 default:
1428 gcc_unreachable ();
1429 }
1430 break;
1431
1432 case 'V':
1433 {
1434 int num = exact_log2 (INTVAL (x));
1435 gcc_assert (num >= 0);
1436 fprintf (stream, "#%d", num);
1437 }
1438 break;
1439
1440 case 'W':
1441 {
1442 int num = exact_log2 (~INTVAL (x));
1443 gcc_assert (num >= 0);
1444 fprintf (stream, "#%d", num);
1445 }
1446 break;
1447
1448 case 'd':
1449 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1450
1451 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1452 break;
1453
1454 case 'N':
1455 if (x == CONST0_RTX (GET_MODE (x)))
1456 {
1457 fprintf ((stream), "r63");
1458 break;
1459 }
1460 goto default_output;
1461 case 'u':
1462 if (CONST_INT_P (x))
1463 {
1464 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1465 break;
1466 }
1467 /* Fall through. */
1468
1469 default_output:
1470 default:
1471 regno = 0;
1472 mode = GET_MODE (x);
1473
1474 switch (GET_CODE (x))
1475 {
1476 case TRUNCATE:
1477 {
1478 rtx inner = XEXP (x, 0);
1479 int offset = 0;
1480 machine_mode inner_mode;
1481
1482 /* We might see SUBREGs with vector mode registers inside. */
1483 if (GET_CODE (inner) == SUBREG
1484 && (GET_MODE_SIZE (GET_MODE (inner))
1485 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1486 && subreg_lowpart_p (inner))
1487 inner = SUBREG_REG (inner);
1488 if (CONST_INT_P (inner))
1489 {
1490 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1491 goto default_output;
1492 }
1493 inner_mode = GET_MODE (inner);
1494 if (GET_CODE (inner) == SUBREG
1495 && (GET_MODE_SIZE (GET_MODE (inner))
1496 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1497 && REG_P (SUBREG_REG (inner)))
1498 {
1499 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1500 GET_MODE (SUBREG_REG (inner)),
1501 SUBREG_BYTE (inner),
1502 GET_MODE (inner));
1503 inner = SUBREG_REG (inner);
1504 }
1505 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1506 abort ();
1507 /* Floating point register pairs are always big endian;
1508 general purpose registers are 64 bit wide. */
1509 regno = REGNO (inner);
1510 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1511 - HARD_REGNO_NREGS (regno, mode))
1512 + offset;
1513 x = inner;
1514 goto reg;
1515 }
1516 case SIGN_EXTEND:
1517 x = XEXP (x, 0);
1518 goto reg;
1519 /* FIXME: We need this on SHmedia32 because reload generates
1520 some sign-extended HI or QI loads into DImode registers
1521 but, because Pmode is SImode, the address ends up with a
1522 subreg:SI of the DImode register. Maybe reload should be
1523 fixed so as to apply alter_subreg to such loads? */
1524 case IF_THEN_ELSE:
1525 gcc_assert (trapping_target_operand (x, VOIDmode));
1526 x = XEXP (XEXP (x, 2), 0);
1527 goto default_output;
1528 case SUBREG:
1529 gcc_assert (SUBREG_BYTE (x) == 0
1530 && REG_P (SUBREG_REG (x)));
1531
1532 x = SUBREG_REG (x);
1533 /* Fall through. */
1534
1535 reg:
1536 case REG:
1537 regno += REGNO (x);
1538 if (FP_REGISTER_P (regno)
1539 && mode == V16SFmode)
1540 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1541 else if (FP_REGISTER_P (REGNO (x))
1542 && mode == V4SFmode)
1543 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1544 else if (REG_P (x)
1545 && mode == V2SFmode)
1546 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1547 else if (FP_REGISTER_P (REGNO (x))
1548 && GET_MODE_SIZE (mode) > 4)
1549 fprintf ((stream), "d%s", reg_names[regno] + 1);
1550 else
1551 fputs (reg_names[regno], (stream));
1552 break;
1553
1554 case MEM:
1555 output_address (XEXP (x, 0));
1556 break;
1557
1558 default:
1559 if (TARGET_SH1)
1560 fputc ('#', stream);
1561 output_addr_const (stream, x);
1562 break;
1563 }
1564 break;
1565 }
1566 }
1567
1568 static bool
1569 sh_print_operand_punct_valid_p (unsigned char code)
1570 {
1571 return (code == '.' || code == '#' || code == '@' || code == ','
1572 || code == '$' || code == '\'' || code == '>');
1573 }
1574
1575 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1576 static bool
1577 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1578 {
1579 if (GET_CODE (x) == UNSPEC)
1580 {
1581 switch (XINT (x, 1))
1582 {
1583 case UNSPEC_DATALABEL:
1584 fputs ("datalabel ", file);
1585 output_addr_const (file, XVECEXP (x, 0, 0));
1586 break;
1587 case UNSPEC_PIC:
1588 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1589 output_addr_const (file, XVECEXP (x, 0, 0));
1590 break;
1591 case UNSPEC_GOT:
1592 output_addr_const (file, XVECEXP (x, 0, 0));
1593 fputs ("@GOT", file);
1594 break;
1595 case UNSPEC_GOTOFF:
1596 output_addr_const (file, XVECEXP (x, 0, 0));
1597 fputs ("@GOTOFF", file);
1598 break;
1599 case UNSPEC_PLT:
1600 output_addr_const (file, XVECEXP (x, 0, 0));
1601 fputs ("@PLT", file);
1602 break;
1603 case UNSPEC_GOTPLT:
1604 output_addr_const (file, XVECEXP (x, 0, 0));
1605 fputs ("@GOTPLT", file);
1606 break;
1607 case UNSPEC_PCREL:
1608 output_addr_const (file, XVECEXP (x, 0, 0));
1609 fputs ("@PCREL", file);
1610 break;
1611 case UNSPEC_DTPOFF:
1612 output_addr_const (file, XVECEXP (x, 0, 0));
1613 fputs ("@DTPOFF", file);
1614 break;
1615 case UNSPEC_GOTTPOFF:
1616 output_addr_const (file, XVECEXP (x, 0, 0));
1617 fputs ("@GOTTPOFF", file);
1618 break;
1619 case UNSPEC_TPOFF:
1620 output_addr_const (file, XVECEXP (x, 0, 0));
1621 fputs ("@TPOFF", file);
1622 break;
1623 case UNSPEC_CALLER:
1624 {
1625 char name[32];
1626 /* LPCS stands for Label for PIC Call Site. */
1627 targetm.asm_out.generate_internal_label (name, "LPCS",
1628 INTVAL (XVECEXP (x, 0, 0)));
1629 assemble_name (file, name);
1630 }
1631 break;
1632 case UNSPEC_EXTRACT_S16:
1633 case UNSPEC_EXTRACT_U16:
1634 {
1635 rtx val, shift;
1636
1637 val = XVECEXP (x, 0, 0);
1638 shift = XVECEXP (x, 0, 1);
1639 fputc ('(', file);
1640 if (shift != const0_rtx)
1641 fputc ('(', file);
1642 if (GET_CODE (val) == CONST
1643 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1644 {
1645 fputc ('(', file);
1646 output_addr_const (file, val);
1647 fputc (')', file);
1648 }
1649 else
1650 output_addr_const (file, val);
1651 if (shift != const0_rtx)
1652 {
1653 fputs (" >> ", file);
1654 output_addr_const (file, shift);
1655 fputc (')', file);
1656 }
1657 fputs (" & 65535)", file);
1658 }
1659 break;
1660 case UNSPEC_SYMOFF:
1661 output_addr_const (file, XVECEXP (x, 0, 0));
1662 fputc ('-', file);
1663 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1664 {
1665 fputc ('(', file);
1666 output_addr_const (file, XVECEXP (x, 0, 1));
1667 fputc (')', file);
1668 }
1669 else
1670 output_addr_const (file, XVECEXP (x, 0, 1));
1671 break;
1672 case UNSPEC_PCREL_SYMOFF:
1673 output_addr_const (file, XVECEXP (x, 0, 0));
1674 fputs ("-(", file);
1675 output_addr_const (file, XVECEXP (x, 0, 1));
1676 fputs ("-.)", file);
1677 break;
1678 default:
1679 return false;
1680 }
1681 return true;
1682 }
1683 else
1684 return false;
1685 }
1686 \f
1687 /* Encode symbol attributes of a SYMBOL_REF into its
1688 SYMBOL_REF_FLAGS. */
1689 static void
1690 sh_encode_section_info (tree decl, rtx rtl, int first)
1691 {
1692 default_encode_section_info (decl, rtl, first);
1693
1694 if (TREE_CODE (decl) == FUNCTION_DECL
1695 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1696 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1697 }
1698
1699 /* Prepare operands for a move define_expand; specifically, one of the
1700 operands must be in a register. */
1701 void
1702 prepare_move_operands (rtx operands[], machine_mode mode)
1703 {
1704 if ((mode == SImode || mode == DImode)
1705 && flag_pic
1706 && ! ((mode == Pmode || mode == ptr_mode)
1707 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1708 {
1709 rtx temp;
1710 if (SYMBOLIC_CONST_P (operands[1]))
1711 {
1712 if (MEM_P (operands[0]))
1713 operands[1] = force_reg (Pmode, operands[1]);
1714 else if (TARGET_SHMEDIA
1715 && GET_CODE (operands[1]) == LABEL_REF
1716 && target_reg_operand (operands[0], mode))
1717 /* It's ok. */;
1718 else
1719 {
1720 temp = (!can_create_pseudo_p ()
1721 ? operands[0]
1722 : gen_reg_rtx (Pmode));
1723 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1724 }
1725 }
1726 else if (GET_CODE (operands[1]) == CONST
1727 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1728 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1729 {
1730 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1731 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1732 mode, temp);
1733 operands[1] = expand_binop (mode, add_optab, temp,
1734 XEXP (XEXP (operands[1], 0), 1),
1735 (!can_create_pseudo_p ()
1736 ? temp
1737 : gen_reg_rtx (Pmode)),
1738 0, OPTAB_LIB_WIDEN);
1739 }
1740 }
1741
1742 if (! reload_in_progress && ! reload_completed)
1743 {
1744 /* Copy the source to a register if both operands aren't registers. */
1745 if (! register_operand (operands[0], mode)
1746 && ! sh_register_operand (operands[1], mode))
1747 operands[1] = copy_to_mode_reg (mode, operands[1]);
1748
1749 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1750 {
1751 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1752 except that we can't use that function because it is static. */
1753 rtx new_rtx = change_address (operands[0], mode, 0);
1754 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1755 operands[0] = new_rtx;
1756 }
1757
1758 /* This case can happen while generating code to move the result
1759 of a library call to the target. Reject `st r0,@(rX,rY)' because
1760 reload will fail to find a spill register for rX, since r0 is already
1761 being used for the source. */
1762 else if (TARGET_SH1
1763 && refers_to_regno_p (R0_REG, operands[1])
1764 && MEM_P (operands[0])
1765 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1766 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1767 operands[1] = copy_to_mode_reg (mode, operands[1]);
1768
1769 /* When the displacement addressing is used, RA will assign r0 to
1770 the pseudo register operand for the QI/HImode load/store.
1771 This tends to make a long live range for R0 and might cause
1772 anomalous register spills in some case with LRA. See PR
1773 target/55212.
1774 We split possible load/store to two move insns via r0 so as to
1775 shorten R0 live range. It will make some codes worse but will
1776 win on average for LRA.
1777 Also when base+index addressing is used and the index term is
1778 a subreg, LRA assumes that more hard registers can be available
1779 in some situation. It isn't the case for SH in the problematic
1780 case. We can pre-allocate R0 for that index term to avoid
1781 the issue. See PR target/66591. */
1782 else if (sh_lra_p ()
1783 && TARGET_SH1 && ! TARGET_SH2A
1784 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1785 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1786 {
1787 bool load_p = REG_P (operands[0]);
1788 rtx reg = operands[load_p ? 0 : 1];
1789 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1790
1791 if ((mode == QImode || mode == HImode)
1792 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1793 && GET_CODE (adr) == PLUS
1794 && REG_P (XEXP (adr, 0))
1795 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1796 && CONST_INT_P (XEXP (adr, 1))
1797 && INTVAL (XEXP (adr, 1)) != 0
1798 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1799 {
1800 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1801 emit_move_insn (r0_rtx, operands[1]);
1802 operands[1] = r0_rtx;
1803 }
1804 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1805 && GET_CODE (adr) == PLUS
1806 && REG_P (XEXP (adr, 0))
1807 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1808 && SUBREG_P (XEXP (adr, 1))
1809 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1810 {
1811 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1812 emit_move_insn (r0_rtx, XEXP (adr, 1));
1813 XEXP (adr, 1) = r0_rtx;
1814 }
1815 }
1816 }
1817
1818 if (mode == Pmode || mode == ptr_mode)
1819 {
1820 rtx op0, op1, opc;
1821 enum tls_model tls_kind;
1822
1823 op0 = operands[0];
1824 op1 = operands[1];
1825 if (GET_CODE (op1) == CONST
1826 && GET_CODE (XEXP (op1, 0)) == PLUS
1827 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1828 != TLS_MODEL_NONE))
1829 {
1830 opc = XEXP (XEXP (op1, 0), 1);
1831 op1 = XEXP (XEXP (op1, 0), 0);
1832 }
1833 else
1834 opc = NULL_RTX;
1835
1836 if (! reload_in_progress && ! reload_completed
1837 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1838 {
1839 rtx tga_op1, tga_ret, tmp, tmp2;
1840
1841 if (! flag_pic
1842 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1843 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1844 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1845 {
1846 static int got_labelno;
1847 /* Don't schedule insns for getting GOT address when
1848 the first scheduling is enabled, to avoid spill
1849 failures for R0. */
1850 if (flag_schedule_insns)
1851 emit_insn (gen_blockage ());
1852 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1853 emit_use (gen_rtx_REG (SImode, PIC_REG));
1854 if (flag_schedule_insns)
1855 emit_insn (gen_blockage ());
1856 }
1857
1858 switch (tls_kind)
1859 {
1860 case TLS_MODEL_GLOBAL_DYNAMIC:
1861 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1862 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1863 tmp = gen_reg_rtx (Pmode);
1864 emit_move_insn (tmp, tga_ret);
1865 op1 = tmp;
1866 break;
1867
1868 case TLS_MODEL_LOCAL_DYNAMIC:
1869 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1870 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1871
1872 tmp = gen_reg_rtx (Pmode);
1873 emit_move_insn (tmp, tga_ret);
1874
1875 if (register_operand (op0, Pmode))
1876 tmp2 = op0;
1877 else
1878 tmp2 = gen_reg_rtx (Pmode);
1879
1880 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1881 op1 = tmp2;
1882 break;
1883
1884 case TLS_MODEL_INITIAL_EXEC:
1885 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1886 tmp = gen_sym2GOTTPOFF (op1);
1887 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1888 op1 = tga_op1;
1889 break;
1890
1891 case TLS_MODEL_LOCAL_EXEC:
1892 tmp2 = gen_reg_rtx (Pmode);
1893 emit_insn (gen_store_gbr (tmp2));
1894 tmp = gen_reg_rtx (Pmode);
1895 emit_insn (gen_symTPOFF2reg (tmp, op1));
1896
1897 if (register_operand (op0, Pmode))
1898 op1 = op0;
1899 else
1900 op1 = gen_reg_rtx (Pmode);
1901
1902 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1903 break;
1904
1905 default:
1906 gcc_unreachable ();
1907 }
1908 if (opc)
1909 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1910 operands[1] = op1;
1911 }
1912 }
1913 }
1914
1915 /* Implement the canonicalize_comparison target hook for the combine
1916 pass. For the target hook this function is invoked via
1917 sh_canonicalize_comparison. This function is also re-used to
1918 canonicalize comparisons in cbranch pattern expanders. */
1919 static void
1920 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1921 machine_mode mode,
1922 bool op0_preserve_value)
1923 {
1924 /* When invoked from within the combine pass the mode is not specified,
1925 so try to get it from one of the operands. */
1926 if (mode == VOIDmode)
1927 mode = GET_MODE (op0);
1928 if (mode == VOIDmode)
1929 mode = GET_MODE (op1);
1930
1931 // We need to have a mode to do something useful here.
1932 if (mode == VOIDmode)
1933 return;
1934
1935 // Currently, we don't deal with floats here.
1936 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1937 return;
1938
1939 // Make sure that the constant operand is the second operand.
1940 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1941 {
1942 if (op0_preserve_value)
1943 return;
1944
1945 std::swap (op0, op1);
1946 cmp = swap_condition (cmp);
1947 }
1948
1949 if (CONST_INT_P (op1))
1950 {
1951 /* Try to adjust the constant operand in such a way that available
1952 comparison insns can be utilized better and the constant can be
1953 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1954 constant pool. */
1955 const HOST_WIDE_INT val = INTVAL (op1);
1956
1957 /* x > -1 --> x >= 0
1958 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1959 x <= -1 --> x < 0
1960 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1961 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1962 {
1963 cmp = cmp == GT ? GE : LT;
1964 op1 = gen_int_mode (val + 1, mode);
1965 }
1966
1967 /* x >= 1 --> x > 0
1968 x >= 0x80 --> x > 0x7F
1969 x < 1 --> x <= 0
1970 x < 0x80 --> x <= 0x7F */
1971 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1972 {
1973 cmp = cmp == GE ? GT : LE;
1974 op1 = gen_int_mode (val - 1, mode);
1975 }
1976
1977 /* unsigned x >= 1 --> x != 0
1978 unsigned x < 1 --> x == 0 */
1979 else if (val == 1 && (cmp == GEU || cmp == LTU))
1980 {
1981 cmp = cmp == GEU ? NE : EQ;
1982 op1 = CONST0_RTX (mode);
1983 }
1984
1985 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1986 unsigned x < 0x80 --> unsigned x < 0x7F */
1987 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1988 {
1989 cmp = cmp == GEU ? GTU : LEU;
1990 op1 = gen_int_mode (val - 1, mode);
1991 }
1992
1993 /* unsigned x > 0 --> x != 0
1994 unsigned x <= 0 --> x == 0 */
1995 else if (val == 0 && (cmp == GTU || cmp == LEU))
1996 cmp = cmp == GTU ? NE : EQ;
1997
1998 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1999 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
2000 else if (mode == SImode && (cmp == GTU || cmp == LEU)
2001 && val == 0x7FFFFFFF)
2002 {
2003 cmp = cmp == GTU ? LT : GE;
2004 op1 = const0_rtx;
2005 }
2006
2007 /* unsigned x >= 0x80000000 --> signed x < 0
2008 unsigned x < 0x80000000 --> signed x >= 0 */
2009 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2010 && (unsigned HOST_WIDE_INT)val
2011 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2012 {
2013 cmp = cmp == GEU ? LT : GE;
2014 op1 = const0_rtx;
2015 }
2016 }
2017 }
2018
2019 /* This function implements the canonicalize_comparison target hook.
2020 This wrapper around the internally used sh_canonicalize_comparison
2021 function is needed to do the enum rtx_code <-> int conversion.
2022 Target hooks cannot use enum rtx_code in its definition. */
2023 static void
2024 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2025 bool op0_preserve_value)
2026 {
2027 enum rtx_code tmp_code = (enum rtx_code)*code;
2028 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2029 VOIDmode, op0_preserve_value);
2030 *code = (int)tmp_code;
2031 }
2032
2033 /* This function implements the legitimate_combined_insn target hook,
2034 which the combine pass uses to early reject combined insns, before
2035 it tries to recog the insn and determine its cost. */
2036 static bool
2037 sh_legitimate_combined_insn (rtx_insn* insn)
2038 {
2039 /* Reject combinations of memory loads and zero extensions, as these
2040 interfere with other combine patterns such as zero extracts and bit
2041 tests. The SH2A movu.{b|w} insns are formed later in the
2042 'sh_optimize_extu_exts' pass after combine/split1. */
2043 rtx p = PATTERN (insn);
2044 if (GET_CODE (p) == SET
2045 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
2046 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
2047 && MEM_P (XEXP (XEXP (p, 1), 0)))
2048 return false;
2049
2050 return true;
2051 }
2052
2053 bool
2054 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
2055 {
2056 *p1 = T_REG;
2057 *p2 = INVALID_REGNUM;
2058 return true;
2059 }
2060
2061 enum rtx_code
2062 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2063 enum rtx_code comparison)
2064 {
2065 /* The scratch reg is only available when this is invoked from within
2066 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2067 rtx scratch = NULL_RTX;
2068
2069 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2070 comparison = GET_CODE (operands[0]);
2071 else
2072 scratch = operands[4];
2073
2074 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2075 mode, false);
2076
2077 /* Notice that this function is also invoked after reload by
2078 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2079 rtx op1 = operands[1];
2080
2081 if (can_create_pseudo_p ())
2082 operands[1] = force_reg (mode, op1);
2083 /* When we are handling DImode comparisons, we want to keep constants so
2084 that we can optimize the component comparisons; however, memory loads
2085 are better issued as a whole so that they can be scheduled well.
2086 SImode equality comparisons allow I08 constants, but only when they
2087 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2088 into a register, that register might as well be r0, and we allow the
2089 constant. If it is already in a register, this is likely to be
2090 allocated to a different hard register, thus we load the constant into
2091 a register unless it is zero. */
2092 if (!REG_P (operands[2])
2093 && (!CONST_INT_P (operands[2])
2094 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2095 && ((comparison != EQ && comparison != NE)
2096 || (REG_P (op1) && REGNO (op1) != R0_REG)
2097 || !satisfies_constraint_I08 (operands[2])))))
2098 {
2099 if (scratch && GET_MODE (scratch) == mode)
2100 {
2101 emit_move_insn (scratch, operands[2]);
2102 operands[2] = scratch;
2103 }
2104 else if (can_create_pseudo_p ())
2105 operands[2] = force_reg (mode, operands[2]);
2106 }
2107 return comparison;
2108 }
2109
2110 void
2111 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2112 {
2113 rtx (*branch_expander) (rtx) = gen_branch_true;
2114 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2115 switch (comparison)
2116 {
2117 case NE: case LT: case LE: case LTU: case LEU:
2118 comparison = reverse_condition (comparison);
2119 branch_expander = gen_branch_false;
2120 default: ;
2121 }
2122 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2123 gen_rtx_fmt_ee (comparison, SImode,
2124 operands[1], operands[2])));
2125 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2126 if (probability >= 0)
2127 add_int_reg_note (jump, REG_BR_PROB, probability);
2128 }
2129
2130 /* ??? How should we distribute probabilities when more than one branch
2131 is generated. So far we only have some ad-hoc observations:
2132 - If the operands are random, they are likely to differ in both parts.
2133 - If comparing items in a hash chain, the operands are random or equal;
2134 operation should be EQ or NE.
2135 - If items are searched in an ordered tree from the root, we can expect
2136 the highpart to be unequal about half of the time; operation should be
2137 an inequality comparison, operands non-constant, and overall probability
2138 about 50%. Likewise for quicksort.
2139 - Range checks will be often made against constants. Even if we assume for
2140 simplicity an even distribution of the non-constant operand over a
2141 sub-range here, the same probability could be generated with differently
2142 wide sub-ranges - as long as the ratio of the part of the subrange that
2143 is before the threshold to the part that comes after the threshold stays
2144 the same. Thus, we can't really tell anything here;
2145 assuming random distribution is at least simple.
2146 */
2147 bool
2148 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2149 {
2150 enum rtx_code msw_taken, msw_skip, lsw_taken;
2151 rtx_code_label *skip_label = NULL;
2152 rtx op1h, op1l, op2h, op2l;
2153 int num_branches;
2154 int prob, rev_prob;
2155 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2156 rtx scratch = operands[4];
2157
2158 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2159 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2160 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2161 op1l = gen_lowpart (SImode, operands[1]);
2162 op2l = gen_lowpart (SImode, operands[2]);
2163 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2164 prob = split_branch_probability;
2165 rev_prob = REG_BR_PROB_BASE - prob;
2166 switch (comparison)
2167 {
2168 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2169 That costs 1 cycle more when the first branch can be predicted taken,
2170 but saves us mispredicts because only one branch needs prediction.
2171 It also enables generating the cmpeqdi_t-1 pattern. */
2172 case EQ:
2173 if (TARGET_CMPEQDI_T)
2174 {
2175 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2176 emit_jump_insn (gen_branch_true (operands[3]));
2177 return true;
2178 }
2179 msw_skip = NE;
2180 lsw_taken = EQ;
2181 if (prob >= 0)
2182 {
2183 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2184 msw_skip_prob = rev_prob;
2185 if (REG_BR_PROB_BASE <= 65535)
2186 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2187 else
2188 {
2189 lsw_taken_prob
2190 = (prob
2191 ? (REG_BR_PROB_BASE
2192 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2193 / ((gcov_type) prob << 32)))
2194 : 0);
2195 }
2196 }
2197 break;
2198 case NE:
2199 if (TARGET_CMPEQDI_T)
2200 {
2201 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2202 emit_jump_insn (gen_branch_false (operands[3]));
2203 return true;
2204 }
2205 msw_taken = NE;
2206 msw_taken_prob = prob;
2207 lsw_taken = NE;
2208 lsw_taken_prob = 0;
2209 break;
2210 case GTU: case GT:
2211 msw_taken = comparison;
2212 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2213 break;
2214 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2215 msw_skip = swap_condition (msw_taken);
2216 lsw_taken = GTU;
2217 break;
2218 case GEU: case GE:
2219 if (op2l == CONST0_RTX (SImode))
2220 msw_taken = comparison;
2221 else
2222 {
2223 msw_taken = comparison == GE ? GT : GTU;
2224 msw_skip = swap_condition (msw_taken);
2225 lsw_taken = GEU;
2226 }
2227 break;
2228 case LTU: case LT:
2229 msw_taken = comparison;
2230 if (op2l == CONST0_RTX (SImode))
2231 break;
2232 msw_skip = swap_condition (msw_taken);
2233 lsw_taken = LTU;
2234 break;
2235 case LEU: case LE:
2236 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2237 msw_taken = comparison;
2238 else
2239 {
2240 lsw_taken = LEU;
2241 if (comparison == LE)
2242 msw_taken = LT;
2243 else if (op2h != CONST0_RTX (SImode))
2244 msw_taken = LTU;
2245 else
2246 {
2247 msw_skip = swap_condition (LTU);
2248 break;
2249 }
2250 msw_skip = swap_condition (msw_taken);
2251 }
2252 break;
2253 default: return false;
2254 }
2255 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2256 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2257 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2258 if (comparison != EQ && comparison != NE && num_branches > 1)
2259 {
2260 if (!CONSTANT_P (operands[2])
2261 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2262 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2263 {
2264 msw_taken_prob = prob / 2U;
2265 msw_skip_prob
2266 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2267 lsw_taken_prob = prob;
2268 }
2269 else
2270 {
2271 msw_taken_prob = prob;
2272 msw_skip_prob = REG_BR_PROB_BASE;
2273 /* ??? If we have a constant op2h, should we use that when
2274 calculating lsw_taken_prob? */
2275 lsw_taken_prob = prob;
2276 }
2277 }
2278 operands[1] = op1h;
2279 operands[2] = op2h;
2280 operands[4] = NULL_RTX;
2281 if (reload_completed
2282 && ! arith_reg_or_0_operand (op2h, SImode)
2283 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2284 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2285 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2286 {
2287 emit_move_insn (scratch, operands[2]);
2288 operands[2] = scratch;
2289 }
2290 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2291 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2292 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2293 {
2294 rtx taken_label = operands[3];
2295
2296 /* Operands were possibly modified, but msw_skip doesn't expect this.
2297 Always use the original ones. */
2298 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2299 {
2300 operands[1] = op1h;
2301 operands[2] = op2h;
2302 if (reload_completed
2303 && ! arith_reg_or_0_operand (op2h, SImode)
2304 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2305 {
2306 emit_move_insn (scratch, operands[2]);
2307 operands[2] = scratch;
2308 }
2309 }
2310
2311 operands[3] = skip_label = gen_label_rtx ();
2312 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2313 operands[3] = taken_label;
2314 }
2315 operands[1] = op1l;
2316 operands[2] = op2l;
2317 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2318 {
2319 if (reload_completed
2320 && ! arith_reg_or_0_operand (op2l, SImode)
2321 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2322 {
2323 emit_move_insn (scratch, operands[2]);
2324 operands[2] = scratch;
2325 }
2326 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2327 }
2328 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2329 emit_label (skip_label);
2330 return true;
2331 }
2332
2333 /* Given an operand, return 1 if the evaluated operand plugged into an
2334 if_then_else will result in a branch_true, 0 if branch_false, or
2335 -1 if neither nor applies. The truth table goes like this:
2336
2337 op | cmpval | code | result
2338 ---------+--------+---------+--------------------
2339 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2340 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2341 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2342 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2343 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2344 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2345 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2346 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2347 int
2348 sh_eval_treg_value (rtx op)
2349 {
2350 if (t_reg_operand (op, GET_MODE (op)))
2351 return 1;
2352 if (negt_reg_operand (op, GET_MODE (op)))
2353 return 0;
2354
2355 rtx_code code = GET_CODE (op);
2356 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2357 return -1;
2358
2359 int cmpop = code == EQ ? 1 : 0;
2360 int cmpval = INTVAL (XEXP (op, 1));
2361 if (cmpval != 0 && cmpval != 1)
2362 return -1;
2363
2364 int t;
2365 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2366 t = 0;
2367 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2368 t = 1;
2369 else
2370 return -1;
2371
2372 return t ^ (cmpval == cmpop);
2373 }
2374
2375 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2376 of floating-point comparisons. */
2377 static void
2378 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2379 {
2380 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2381 && GET_CODE (insn) != PARALLEL)
2382 {
2383 insn = gen_rtx_PARALLEL (VOIDmode,
2384 gen_rtvec (3, insn,
2385 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2386 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2387 }
2388 emit_insn (insn);
2389 }
2390
2391 /* Prepare the operands for an scc instruction; make sure that the
2392 compare has been done and the result is in T_REG. */
2393 void
2394 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2395 {
2396 rtx t_reg = get_t_reg_rtx ();
2397 enum rtx_code oldcode = code;
2398 machine_mode mode;
2399
2400 /* First need a compare insn. */
2401 switch (code)
2402 {
2403 case NE:
2404 /* It isn't possible to handle this case. */
2405 gcc_unreachable ();
2406 case LT:
2407 code = GT;
2408 break;
2409 case LE:
2410 code = GE;
2411 break;
2412 case LTU:
2413 code = GTU;
2414 break;
2415 case LEU:
2416 code = GEU;
2417 break;
2418 default:
2419 break;
2420 }
2421 if (code != oldcode)
2422 std::swap (op0, op1);
2423
2424 mode = GET_MODE (op0);
2425 if (mode == VOIDmode)
2426 mode = GET_MODE (op1);
2427
2428 op0 = force_reg (mode, op0);
2429 if ((code != EQ && code != NE
2430 && (op1 != const0_rtx
2431 || code == GTU || code == GEU || code == LTU || code == LEU))
2432 || (mode == DImode && op1 != const0_rtx)
2433 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2434 op1 = force_reg (mode, op1);
2435
2436 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2437 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2438 mode);
2439 }
2440
2441 rtx
2442 sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code,
2443 rtx op0, rtx op1)
2444 {
2445 rtx target = gen_reg_rtx (SImode);
2446 rtx tmp;
2447
2448 gcc_assert (TARGET_SHMEDIA);
2449 switch (code)
2450 {
2451 case EQ:
2452 case GT:
2453 case LT:
2454 case UNORDERED:
2455 case GTU:
2456 case LTU:
2457 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2458 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2459 code = NE;
2460 break;
2461
2462 case NE:
2463 case GE:
2464 case LE:
2465 case ORDERED:
2466 case GEU:
2467 case LEU:
2468 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2469 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2470 code = EQ;
2471 break;
2472
2473 case UNEQ:
2474 case UNGE:
2475 case UNGT:
2476 case UNLE:
2477 case UNLT:
2478 case LTGT:
2479 return NULL_RTX;
2480
2481 default:
2482 gcc_unreachable ();
2483 }
2484
2485 if (mode == DImode)
2486 {
2487 rtx t2 = gen_reg_rtx (DImode);
2488 emit_insn (gen_extendsidi2 (t2, target));
2489 target = t2;
2490 }
2491
2492 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2493 }
2494
2495 /* Called from the md file, set up the operands of a compare instruction. */
2496 void
2497 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2498 {
2499 enum rtx_code code = GET_CODE (operands[0]);
2500 enum rtx_code branch_code;
2501 rtx op0 = operands[1];
2502 rtx op1 = operands[2];
2503 rtx insn;
2504 bool need_ccmpeq = false;
2505
2506 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2507 {
2508 op0 = force_reg (mode, op0);
2509 op1 = force_reg (mode, op1);
2510 }
2511 else
2512 {
2513 if (code != EQ || mode == DImode)
2514 {
2515 /* Force args into regs, since we can't use constants here. */
2516 op0 = force_reg (mode, op0);
2517 if (op1 != const0_rtx || code == GTU || code == GEU)
2518 op1 = force_reg (mode, op1);
2519 }
2520 }
2521
2522 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2523 {
2524 if (code == LT
2525 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2526 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2527 {
2528 std::swap (op0, op1);
2529 code = swap_condition (code);
2530 }
2531
2532 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2533 if (code == GE)
2534 {
2535 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2536 need_ccmpeq = true;
2537 code = GT;
2538 }
2539
2540 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2541 to EQ/GT respectively. */
2542 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2543 }
2544
2545 switch (code)
2546 {
2547 case EQ:
2548 case GT:
2549 case GE:
2550 case GTU:
2551 case GEU:
2552 branch_code = code;
2553 break;
2554 case NE:
2555 case LT:
2556 case LE:
2557 case LTU:
2558 case LEU:
2559 branch_code = reverse_condition (code);
2560 break;
2561 default:
2562 gcc_unreachable ();
2563 }
2564
2565 insn = gen_rtx_SET (get_t_reg_rtx (),
2566 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2567
2568 sh_emit_set_t_insn (insn, mode);
2569 if (need_ccmpeq)
2570 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2571
2572 if (branch_code == code)
2573 emit_jump_insn (gen_branch_true (operands[3]));
2574 else
2575 emit_jump_insn (gen_branch_false (operands[3]));
2576 }
2577
2578 void
2579 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2580 {
2581 enum rtx_code code = GET_CODE (operands[1]);
2582 rtx op0 = operands[2];
2583 rtx op1 = operands[3];
2584 rtx_code_label *lab = NULL;
2585 bool invert = false;
2586
2587 op0 = force_reg (mode, op0);
2588 if ((code != EQ && code != NE
2589 && (op1 != const0_rtx
2590 || code == GTU || code == GEU || code == LTU || code == LEU))
2591 || (mode == DImode && op1 != const0_rtx)
2592 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2593 op1 = force_reg (mode, op1);
2594
2595 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2596 {
2597 if (code == LT || code == LE)
2598 {
2599 std::swap (op0, op1);
2600 code = swap_condition (code);
2601 }
2602 if (code == GE)
2603 {
2604 if (TARGET_IEEE)
2605 {
2606 lab = gen_label_rtx ();
2607 sh_emit_scc_to_t (EQ, op0, op1);
2608 emit_jump_insn (gen_branch_true (lab));
2609 code = GT;
2610 }
2611 else
2612 {
2613 code = LT;
2614 invert = true;
2615 }
2616 }
2617 }
2618
2619 if (code == NE)
2620 {
2621 code = EQ;
2622 invert = true;
2623 }
2624
2625 sh_emit_scc_to_t (code, op0, op1);
2626 if (lab)
2627 emit_label (lab);
2628 if (invert)
2629 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2630 else
2631 emit_move_insn (operands[0], get_t_reg_rtx ());
2632 }
2633 \f
2634 /* Functions to output assembly code. */
2635
2636 /* Return a sequence of instructions to perform DI or DF move.
2637
2638 Since the SH cannot move a DI or DF in one instruction, we have
2639 to take care when we see overlapping source and dest registers. */
2640 const char *
2641 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2642 machine_mode mode)
2643 {
2644 rtx dst = operands[0];
2645 rtx src = operands[1];
2646
2647 if (MEM_P (dst)
2648 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2649 return "mov.l %T1,%0" "\n"
2650 " mov.l %1,%0";
2651
2652 if (register_operand (dst, mode)
2653 && register_operand (src, mode))
2654 {
2655 if (REGNO (src) == MACH_REG)
2656 return "sts mach,%S0" "\n"
2657 " sts macl,%R0";
2658
2659 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2660 when mov.d r1,r0 do r1->r0 then r2->r1. */
2661 if (REGNO (src) + 1 == REGNO (dst))
2662 return "mov %T1,%T0" "\n"
2663 " mov %1,%0";
2664 else
2665 return "mov %1,%0" "\n"
2666 " mov %T1,%T0";
2667 }
2668 else if (CONST_INT_P (src))
2669 {
2670 if (INTVAL (src) < 0)
2671 output_asm_insn ("mov #-1,%S0", operands);
2672 else
2673 output_asm_insn ("mov #0,%S0", operands);
2674
2675 return "mov %1,%R0";
2676 }
2677 else if (MEM_P (src))
2678 {
2679 int ptrreg = -1;
2680 int dreg = REGNO (dst);
2681 rtx inside = XEXP (src, 0);
2682
2683 switch (GET_CODE (inside))
2684 {
2685 case REG:
2686 ptrreg = REGNO (inside);
2687 break;
2688
2689 case SUBREG:
2690 ptrreg = subreg_regno (inside);
2691 break;
2692
2693 case PLUS:
2694 ptrreg = REGNO (XEXP (inside, 0));
2695 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2696 an offsettable address. Unfortunately, offsettable addresses use
2697 QImode to check the offset, and a QImode offsettable address
2698 requires r0 for the other operand, which is not currently
2699 supported, so we can't use the 'o' constraint.
2700 Thus we must check for and handle r0+REG addresses here.
2701 We punt for now, since this is likely very rare. */
2702 gcc_assert (!REG_P (XEXP (inside, 1)));
2703 break;
2704
2705 case LABEL_REF:
2706 return "mov.l %1,%0" "\n"
2707 " mov.l %1+4,%T0";
2708 case POST_INC:
2709 return "mov.l %1,%0" "\n"
2710 " mov.l %1,%T0";
2711 default:
2712 gcc_unreachable ();
2713 }
2714
2715 /* Work out the safe way to copy. Copy into the second half first. */
2716 if (dreg == ptrreg)
2717 return "mov.l %T1,%T0" "\n"
2718 " mov.l %1,%0";
2719 }
2720
2721 return "mov.l %1,%0" "\n"
2722 " mov.l %T1,%T0";
2723 }
2724
2725 /* Print an instruction which would have gone into a delay slot after
2726 another instruction, but couldn't because the other instruction expanded
2727 into a sequence where putting the slot insn at the end wouldn't work. */
2728 static void
2729 print_slot (rtx_sequence *seq)
2730 {
2731 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2732
2733 seq->insn (1)->set_deleted ();
2734 }
2735
2736 const char *
2737 output_far_jump (rtx_insn *insn, rtx op)
2738 {
2739 struct { rtx lab, reg, op; } this_jmp;
2740 rtx_code_label *braf_base_lab = NULL;
2741 const char *jump;
2742 int far;
2743 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2744 rtx_insn *prev;
2745
2746 this_jmp.lab = gen_label_rtx ();
2747
2748 if (TARGET_SH2
2749 && offset >= -32764
2750 && offset - get_attr_length (insn) <= 32766
2751 && ! CROSSING_JUMP_P (insn))
2752 {
2753 far = 0;
2754 jump = "mov.w %O0,%1" "\n"
2755 " braf %1";
2756 }
2757 else
2758 {
2759 far = 1;
2760 if (flag_pic)
2761 {
2762 if (TARGET_SH2)
2763 jump = "mov.l %O0,%1" "\n"
2764 " braf %1";
2765 else
2766 jump = "mov.l r0,@-r15" "\n"
2767 " mova %O0,r0" "\n"
2768 " mov.l @r0,%1" "\n"
2769 " add r0,%1" "\n"
2770 " mov.l @r15+,r0" "\n"
2771 " jmp @%1";
2772 }
2773 else
2774 jump = "mov.l %O0,%1" "\n"
2775 " jmp @%1";
2776 }
2777 /* If we have a scratch register available, use it. */
2778 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2779 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2780 {
2781 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2782 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2783 jump = "mov.l r1,@-r15" "\n"
2784 " mova %O0,r0" "\n"
2785 " mov.l @r0,r1" "\n"
2786 " add r1,r0" "\n"
2787 " mov.l @r15+,r1" "\n"
2788 " jmp @%1";
2789 output_asm_insn (jump, &this_jmp.lab);
2790 if (dbr_sequence_length ())
2791 print_slot (final_sequence);
2792 else
2793 output_asm_insn ("nop", 0);
2794 }
2795 else
2796 {
2797 /* Output the delay slot insn first if any. */
2798 if (dbr_sequence_length ())
2799 print_slot (final_sequence);
2800
2801 this_jmp.reg = gen_rtx_REG (SImode, 13);
2802 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2803 Fortunately, MACL is fixed and call-clobbered, and we never
2804 need its value across jumps, so save r13 in it instead of in
2805 the stack. */
2806 if (TARGET_SH5)
2807 output_asm_insn ("lds r13,macl", 0);
2808 else
2809 output_asm_insn ("mov.l r13,@-r15", 0);
2810 output_asm_insn (jump, &this_jmp.lab);
2811 if (TARGET_SH5)
2812 output_asm_insn ("sts macl,r13", 0);
2813 else
2814 output_asm_insn ("mov.l @r15+,r13", 0);
2815 }
2816 if (far && flag_pic && TARGET_SH2)
2817 {
2818 braf_base_lab = gen_label_rtx ();
2819 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2820 CODE_LABEL_NUMBER (braf_base_lab));
2821 }
2822 if (far)
2823 output_asm_insn (".align 2", 0);
2824 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2825 this_jmp.op = op;
2826 if (far && flag_pic)
2827 {
2828 if (TARGET_SH2)
2829 this_jmp.lab = braf_base_lab;
2830 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2831 }
2832 else
2833 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2834 return "";
2835 }
2836
2837 /* Local label counter, used for constants in the pool and inside
2838 pattern branches. */
2839 static int lf = 100;
2840
2841 /* Output code for ordinary branches. */
2842 const char *
2843 output_branch (int logic, rtx_insn *insn, rtx *operands)
2844 {
2845 switch (get_attr_length (insn))
2846 {
2847 case 6:
2848 /* This can happen if filling the delay slot has caused a forward
2849 branch to exceed its range (we could reverse it, but only
2850 when we know we won't overextend other branches; this should
2851 best be handled by relaxation).
2852 It can also happen when other condbranches hoist delay slot insn
2853 from their destination, thus leading to code size increase.
2854 But the branch will still be in the range -4092..+4098 bytes. */
2855 if (! TARGET_RELAX)
2856 {
2857 int label = lf++;
2858 /* The call to print_slot will clobber the operands. */
2859 rtx op0 = operands[0];
2860
2861 /* If the instruction in the delay slot is annulled (true), then
2862 there is no delay slot where we can put it now. The only safe
2863 place for it is after the label. final will do that by default. */
2864
2865 if (final_sequence
2866 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2867 && get_attr_length (final_sequence->insn (1)))
2868 {
2869 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2870 ASSEMBLER_DIALECT ? "/" : ".", label);
2871 print_slot (final_sequence);
2872 }
2873 else
2874 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2875
2876 output_asm_insn ("bra\t%l0", &op0);
2877 fprintf (asm_out_file, "\tnop\n");
2878 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2879
2880 return "";
2881 }
2882 /* When relaxing, handle this like a short branch. The linker
2883 will fix it up if it still doesn't fit after relaxation. */
2884 case 2:
2885 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2886
2887 /* These are for SH2e, in which we have to account for the
2888 extra nop because of the hardware bug in annulled branches. */
2889 case 8:
2890 if (! TARGET_RELAX)
2891 {
2892 int label = lf++;
2893
2894 gcc_assert (!final_sequence
2895 || !(INSN_ANNULLED_BRANCH_P
2896 (XVECEXP (final_sequence, 0, 0))));
2897 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2898 logic ? "f" : "t",
2899 ASSEMBLER_DIALECT ? "/" : ".", label);
2900 fprintf (asm_out_file, "\tnop\n");
2901 output_asm_insn ("bra\t%l0", operands);
2902 fprintf (asm_out_file, "\tnop\n");
2903 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2904
2905 return "";
2906 }
2907 /* When relaxing, fall through. */
2908 case 4:
2909 {
2910 char buffer[10];
2911
2912 sprintf (buffer, "b%s%ss\t%%l0",
2913 logic ? "t" : "f",
2914 ASSEMBLER_DIALECT ? "/" : ".");
2915 output_asm_insn (buffer, &operands[0]);
2916 return "nop";
2917 }
2918
2919 default:
2920 /* There should be no longer branches now - that would
2921 indicate that something has destroyed the branches set
2922 up in machine_dependent_reorg. */
2923 gcc_unreachable ();
2924 }
2925 }
2926
2927 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2928 fill in operands 9 as a label to the successor insn.
2929 We try to use jump threading where possible.
2930 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2931 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2932 follow jmp and bt, if the address is in range. */
2933 const char *
2934 output_branchy_insn (enum rtx_code code, const char *templ,
2935 rtx_insn *insn, rtx *operands)
2936 {
2937 rtx_insn *next_insn = NEXT_INSN (insn);
2938
2939 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2940 {
2941 rtx src = SET_SRC (PATTERN (next_insn));
2942 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2943 {
2944 /* Following branch not taken */
2945 rtx_code_label *lab = gen_label_rtx ();
2946 emit_label_after (lab, next_insn);
2947 INSN_ADDRESSES_NEW (lab,
2948 INSN_ADDRESSES (INSN_UID (next_insn))
2949 + get_attr_length (next_insn));
2950 operands[9] = lab;
2951 return templ;
2952 }
2953 else
2954 {
2955 int offset = (branch_dest (next_insn)
2956 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2957 if (offset >= -252 && offset <= 258)
2958 {
2959 if (GET_CODE (src) == IF_THEN_ELSE)
2960 /* branch_true */
2961 src = XEXP (src, 1);
2962 operands[9] = src;
2963 return templ;
2964 }
2965 }
2966 }
2967 rtx_code_label *lab = gen_label_rtx ();
2968 emit_label_after (lab, insn);
2969 INSN_ADDRESSES_NEW (lab,
2970 INSN_ADDRESSES (INSN_UID (insn))
2971 + get_attr_length (insn));
2972 operands[9] = lab;
2973 return templ;
2974 }
2975
2976 const char *
2977 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2978 {
2979 return output_branchy_insn (NE, "bt %l9" "\n"
2980 " fcmp/eq %1,%0",
2981 insn, operands);
2982 }
2983 \f
2984 /* Output the start of the assembler file. */
2985 static void
2986 sh_file_start (void)
2987 {
2988 default_file_start ();
2989
2990 if (TARGET_ELF)
2991 /* We need to show the text section with the proper
2992 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2993 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2994 will complain. We can teach GAS specifically about the
2995 default attributes for our choice of text section, but
2996 then we would have to change GAS again if/when we change
2997 the text section name. */
2998 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2999 else
3000 /* Switch to the data section so that the coffsem symbol
3001 isn't in the text section. */
3002 switch_to_section (data_section);
3003
3004 if (TARGET_LITTLE_ENDIAN)
3005 fputs ("\t.little\n", asm_out_file);
3006
3007 if (!TARGET_ELF)
3008 {
3009 if (TARGET_SHCOMPACT)
3010 fputs ("\t.mode\tSHcompact\n", asm_out_file);
3011 else if (TARGET_SHMEDIA)
3012 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
3013 TARGET_SHMEDIA64 ? 64 : 32);
3014 }
3015 }
3016 \f
3017 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
3018 static bool
3019 unspec_caller_rtx_p (rtx pat)
3020 {
3021 rtx base, offset;
3022 int i;
3023
3024 split_const (pat, &base, &offset);
3025 if (GET_CODE (base) == UNSPEC)
3026 {
3027 if (XINT (base, 1) == UNSPEC_CALLER)
3028 return true;
3029 for (i = 0; i < XVECLEN (base, 0); i++)
3030 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
3031 return true;
3032 }
3033 return false;
3034 }
3035
3036 /* Indicate that INSN cannot be duplicated. This is true for insn
3037 that generates a unique label. */
3038 static bool
3039 sh_cannot_copy_insn_p (rtx_insn *insn)
3040 {
3041 rtx pat;
3042
3043 if (!reload_completed || !flag_pic)
3044 return false;
3045
3046 if (!NONJUMP_INSN_P (insn))
3047 return false;
3048 if (asm_noperands (insn) >= 0)
3049 return false;
3050
3051 pat = PATTERN (insn);
3052 if (GET_CODE (pat) != SET)
3053 return false;
3054 pat = SET_SRC (pat);
3055
3056 if (unspec_caller_rtx_p (pat))
3057 return true;
3058
3059 return false;
3060 }
3061 \f
3062 /* Number of instructions used to make an arithmetic right shift by N. */
3063 static const char ashiftrt_insns[] =
3064 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3065
3066 /* Description of a logical left or right shift, when expanded to a sequence
3067 of 1/2/8/16 shifts.
3068 Notice that one bit right shifts clobber the T bit. One bit left shifts
3069 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3070 enum
3071 {
3072 ASHL_CLOBBERS_T = 1 << 0,
3073 LSHR_CLOBBERS_T = 1 << 1
3074 };
3075
3076 struct ashl_lshr_sequence
3077 {
3078 char insn_count;
3079 signed char amount[6];
3080 char clobbers_t;
3081 };
3082
3083 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3084 {
3085 { 0, { 0 }, 0 }, // 0
3086 { 1, { 1 }, LSHR_CLOBBERS_T },
3087 { 1, { 2 }, 0 },
3088 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3089 { 2, { 2, 2 }, 0 }, // 4
3090 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3091 { 3, { 2, 2, 2 }, 0 },
3092 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3093 { 1, { 8 }, 0 }, // 8
3094 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3095 { 2, { 8, 2 }, 0 },
3096 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3097 { 3, { 8, 2, 2 }, 0 }, // 12
3098 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3099 { 3, { 8, -2, 8 }, 0 },
3100 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3101 { 1, { 16 }, 0 }, // 16
3102 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3103 { 2, { 16, 2 }, 0 },
3104 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3105 { 3, { 16, 2, 2 }, 0 }, // 20
3106 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3107 { 3, { 16, -2, 8 }, 0 },
3108 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3109 { 2, { 16, 8 }, 0 }, // 24
3110 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3111 { 3, { 16, 8, 2 }, 0 },
3112 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3113 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3114 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3115 { 3, { 16, -2, 16 }, 0 },
3116
3117 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3118 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3119 However, the shift-and combiner code needs this entry here to be in
3120 terms of real shift insns. */
3121 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3122 };
3123
3124 /* Individual shift amounts for shift amounts < 16, up to three highmost
3125 bits might be clobbered. This is typically used when combined with some
3126 kind of sign or zero extension. */
3127 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3128 {
3129 { 0, { 0 }, 0 }, // 0
3130 { 1, { 1 }, LSHR_CLOBBERS_T },
3131 { 1, { 2 }, 0 },
3132 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3133 { 2, { 2, 2 }, 0 }, // 4
3134 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3135 { 2, { 8, -2 }, 0 },
3136 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3137 { 1, { 8 }, 0 }, // 8
3138 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3139 { 2, { 8, 2 }, 0 },
3140 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3141 { 3, { 8, 2, 2 }, 0 }, // 12
3142 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3143 { 2, { 16, -2 }, 0 },
3144 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3145 { 1, { 16 }, 0 }, // 16
3146 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3147 { 2, { 16, 2 }, 0 },
3148 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3149 { 3, { 16, 2, 2 }, 0 }, // 20
3150 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3151 { 3, { 16, -2, 8 }, 0 },
3152 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3153 { 2, { 16, 8 }, 0 }, // 24
3154 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3155 { 3, { 16, 8, 2 }, 0 },
3156 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3157 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3158 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3159 { 3, { 16, -2, 16 }, 0 },
3160 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3161 };
3162
3163 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3164 will clobber the T bit. */
3165 bool
3166 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3167 {
3168 gcc_assert (CONST_INT_P (shift_amount));
3169
3170 const int shift_amount_i = INTVAL (shift_amount) & 31;
3171
3172 /* Special case for shift count of 31: use and-rotl sequence. */
3173 if (shift_amount_i == 31)
3174 return true;
3175
3176 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3177 & ASHL_CLOBBERS_T) != 0;
3178 }
3179
3180 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3181 instructions will clobber the T bit. */
3182 bool
3183 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3184 {
3185 gcc_assert (CONST_INT_P (shift_amount));
3186
3187 const int shift_amount_i = INTVAL (shift_amount) & 31;
3188
3189 /* Special case for shift count of 31: use shll-movt sequence. */
3190 if (shift_amount_i == 31)
3191 return true;
3192
3193 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3194 & LSHR_CLOBBERS_T) != 0;
3195 }
3196
3197 /* Return true if it is potentially beneficial to use a dynamic shift
3198 instruction (shad / shar) instead of a combination of 1/2/8/16
3199 shift instructions for the specified shift count.
3200 If dynamic shifts are not available, always return false. */
3201 bool
3202 sh_dynamicalize_shift_p (rtx count)
3203 {
3204 gcc_assert (CONST_INT_P (count));
3205
3206 const int shift_amount_i = INTVAL (count) & 31;
3207 int insn_count;
3208
3209 /* For left and right shifts, there are shorter 2 insn sequences for
3210 shift amounts of 31. */
3211 if (shift_amount_i == 31)
3212 insn_count = 2;
3213 else
3214 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3215
3216 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3217 }
3218
3219 /* Assuming we have a value that has been sign-extended by at least one bit,
3220 can we use the ext_shift_amounts with the last shift turned to an
3221 arithmetic shift to shift it by N without data loss, and quicker than by
3222 other means? */
3223 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3224
3225 /* Return the cost of a shift. */
3226 static inline int
3227 shiftcosts (rtx x)
3228 {
3229 int value;
3230
3231 if (TARGET_SHMEDIA)
3232 return 1;
3233
3234 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3235 {
3236 if (GET_MODE (x) == DImode
3237 && CONST_INT_P (XEXP (x, 1))
3238 && INTVAL (XEXP (x, 1)) == 1)
3239 return 2;
3240
3241 /* Everything else is invalid, because there is no pattern for it. */
3242 return -1;
3243 }
3244 /* If shift by a non constant, then this will be expensive. */
3245 if (!CONST_INT_P (XEXP (x, 1)))
3246 return SH_DYNAMIC_SHIFT_COST;
3247
3248 /* Otherwise, return the true cost in instructions. Cope with out of range
3249 shift counts more or less arbitrarily. */
3250 value = INTVAL (XEXP (x, 1)) & 31;
3251
3252 if (GET_CODE (x) == ASHIFTRT)
3253 {
3254 int cost = ashiftrt_insns[value];
3255 /* If dynamic shifts are available and profitable in this case, then we
3256 put the constant in a reg and use shad. */
3257 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3258 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3259 return cost;
3260 }
3261 else
3262 return ashl_lshr_seq[value].insn_count;
3263 }
3264
3265 /* Return the cost of an AND/XOR/IOR operation. */
3266 static inline int
3267 and_xor_ior_costs (rtx x, int code)
3268 {
3269 /* On SH1-4 we have only max. SImode operations.
3270 Double the cost for modes > SImode. */
3271 const int cost_scale = !TARGET_SHMEDIA
3272 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3273 ? 2 : 1;
3274
3275 /* A logical operation with two registers is a single cycle
3276 instruction. */
3277 if (!CONST_INT_P (XEXP (x, 1)))
3278 return 1 * cost_scale;
3279
3280 int i = INTVAL (XEXP (x, 1));
3281
3282 if (TARGET_SHMEDIA)
3283 {
3284 if (satisfies_constraint_I10 (XEXP (x, 1))
3285 || satisfies_constraint_J16 (XEXP (x, 1)))
3286 return 1;
3287 else
3288 return 1 + rtx_cost (XEXP (x, 1), GET_MODE (x), AND, 1, !optimize_size);
3289 }
3290
3291 /* These constants are single cycle extu.[bw] instructions. */
3292 if ((i == 0xff || i == 0xffff) && code == AND)
3293 return 1 * cost_scale;
3294 /* Constants that can be used in an instruction as an immediate are
3295 a single cycle, but this requires r0, so make it a little more
3296 expensive. */
3297 if (CONST_OK_FOR_K08 (i))
3298 return 2 * cost_scale;
3299 /* Constants that can be loaded with a mov immediate need one more cycle.
3300 This case is probably unnecessary. */
3301 if (CONST_OK_FOR_I08 (i))
3302 return 2 * cost_scale;
3303 /* Any other constant requires an additional 2 cycle pc-relative load.
3304 This case is probably unnecessary. */
3305 return 3 * cost_scale;
3306 }
3307
3308 /* Return the cost of an addition or a subtraction. */
3309 static inline int
3310 addsubcosts (rtx x)
3311 {
3312 if (GET_MODE (x) == SImode)
3313 {
3314 /* The addc or subc patterns will eventually become one or two
3315 instructions. Below are some costs for some of the patterns
3316 which combine would reject because the costs of the individual
3317 insns in the patterns are lower.
3318
3319 FIXME: It would be much easier if we had something like insn cost
3320 attributes and the cost calculation machinery used those attributes
3321 in the first place. This would eliminate redundant recog-like C
3322 code to calculate costs of complex patterns. */
3323 rtx op0 = XEXP (x, 0);
3324 rtx op1 = XEXP (x, 1);
3325
3326 if (GET_CODE (x) == PLUS)
3327 {
3328 if (GET_CODE (op0) == AND
3329 && XEXP (op0, 1) == const1_rtx
3330 && (GET_CODE (op1) == PLUS
3331 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3332 return 1;
3333
3334 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3335 && GET_CODE (op1) == LSHIFTRT
3336 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3337 return 1;
3338 }
3339 /* Let's assume that adding the result of an insns that stores into
3340 the T bit is cheap. */
3341 if (treg_set_expr (op1, SImode))
3342 return 1;
3343 if (treg_set_expr (op0, SImode))
3344 return 1;
3345 }
3346
3347 /* On SH1-4 we have only max. SImode operations.
3348 Double the cost for modes > SImode. */
3349 const int cost_scale = !TARGET_SHMEDIA
3350 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3351 ? 2 : 1;
3352
3353 /* Adding a register is a single cycle insn. */
3354 if (REG_P (XEXP (x, 1))
3355 || GET_CODE (XEXP (x, 1)) == SUBREG)
3356 return 1 * cost_scale;
3357
3358 /* Likewise for small constants. */
3359 if (CONST_INT_P (XEXP (x, 1))
3360 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3361 return 1 * cost_scale;
3362
3363 if (TARGET_SHMEDIA)
3364 switch (GET_CODE (XEXP (x, 1)))
3365 {
3366 case CONST:
3367 case LABEL_REF:
3368 case SYMBOL_REF:
3369 return TARGET_SHMEDIA64 ? 5 : 3;
3370
3371 case CONST_INT:
3372 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3373 return 2;
3374 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3375 return 3;
3376 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3377 return 4;
3378
3379 /* Fall through. */
3380 default:
3381 return 5;
3382 }
3383
3384 /* Any other constant requires a 2 cycle pc-relative load plus an
3385 addition. */
3386 return 3 * cost_scale;
3387 }
3388
3389 /* Return the cost of a multiply. */
3390 static inline int
3391 multcosts (rtx x ATTRIBUTE_UNUSED)
3392 {
3393 if (sh_multcost >= 0)
3394 return sh_multcost;
3395 if (TARGET_SHMEDIA)
3396 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3397 accept constants. Ideally, we would use a cost of one or two and
3398 add the cost of the operand, but disregard the latter when inside loops
3399 and loop invariant code motion is still to follow.
3400 Using a multiply first and splitting it later if it's a loss
3401 doesn't work because of different sign / zero extension semantics
3402 of multiplies vs. shifts. */
3403 return optimize_size ? 2 : 3;
3404
3405 if (TARGET_SH2)
3406 {
3407 /* We have a mul insn, so we can never take more than the mul and the
3408 read of the mac reg, but count more because of the latency and extra
3409 reg usage. */
3410 if (optimize_size)
3411 return 2;
3412 return 3;
3413 }
3414
3415 /* If we're aiming at small code, then just count the number of
3416 insns in a multiply call sequence. */
3417 if (optimize_size)
3418 return 5;
3419
3420 /* Otherwise count all the insns in the routine we'd be calling too. */
3421 return 20;
3422 }
3423
3424 /* Compute a (partial) cost for rtx X. Return true if the complete
3425 cost has been computed, and false if subexpressions should be
3426 scanned. In either case, *TOTAL contains the cost result. */
3427 static bool
3428 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3429 int opno ATTRIBUTE_UNUSED,
3430 int *total, bool speed ATTRIBUTE_UNUSED)
3431 {
3432 int code = GET_CODE (x);
3433
3434 switch (code)
3435 {
3436 /* The lower-subreg pass decides whether to split multi-word regs
3437 into individual regs by looking at the cost for a SET of certain
3438 modes with the following patterns:
3439 (set (reg) (reg))
3440 (set (reg) (const_int 0))
3441 On machines that support vector-move operations a multi-word move
3442 is the same cost as individual reg move. On SH there is no
3443 vector-move, so we have to provide the correct cost in the number
3444 of move insns to load/store the reg of the mode in question. */
3445 case SET:
3446 if (register_operand (SET_DEST (x), VOIDmode)
3447 && (register_operand (SET_SRC (x), VOIDmode)
3448 || satisfies_constraint_Z (SET_SRC (x))))
3449 {
3450 const machine_mode mode = GET_MODE (SET_DEST (x));
3451 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3452 / mov_insn_size (mode, TARGET_SH2A));
3453 return true;
3454 }
3455 return false;
3456
3457 /* The cost of a mem access is mainly the cost of the address mode. */
3458 case MEM:
3459 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3460 true);
3461 return true;
3462
3463 case IF_THEN_ELSE:
3464 /* This case is required for the if_then_else negc pattern. */
3465 if (treg_set_expr (XEXP (x, 0), SImode))
3466 {
3467 *total = COSTS_N_INSNS (1);
3468 return true;
3469 }
3470 else
3471 return false;
3472
3473 /* Zero extracts of single bits are usually combine patterns for the
3474 tst insns. */
3475 case ZERO_EXTRACT:
3476 if (GET_CODE (XEXP (x, 0)) == XOR
3477 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3478 && XEXP (x, 1) == const1_rtx
3479 && CONST_INT_P (XEXP (x, 2))
3480 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3481 /* Check that the xor constaint overlaps with the extracted bit. */
3482 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3483 {
3484 *total = 1; //COSTS_N_INSNS (1);
3485 return true;
3486 }
3487 return false;
3488
3489 /* The cost of a sign or zero extend depends on whether the source is a
3490 reg or a mem. In case of a mem take the address into acount. */
3491 case SIGN_EXTEND:
3492 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3493 {
3494 *total = COSTS_N_INSNS (1);
3495 return true;
3496 }
3497 if (MEM_P (XEXP (x, 0)))
3498 {
3499 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3500 GET_MODE (XEXP (x, 0)),
3501 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3502 return true;
3503 }
3504 return false;
3505
3506 case ZERO_EXTEND:
3507 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3508 {
3509 *total = COSTS_N_INSNS (1);
3510 return true;
3511 }
3512 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3513 && (GET_MODE (XEXP (x, 0)) == QImode
3514 || GET_MODE (XEXP (x, 0)) == HImode))
3515 {
3516 /* Handle SH2A's movu.b and movu.w insn. */
3517 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3518 GET_MODE (XEXP (x, 0)),
3519 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3520 return true;
3521 }
3522 return false;
3523
3524 /* mems for SFmode and DFmode can be inside a parallel due to
3525 the way the fpscr is handled. */
3526 case PARALLEL:
3527 for (int i = 0; i < XVECLEN (x, 0); i++)
3528 {
3529 rtx xx = XVECEXP (x, 0, i);
3530 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3531 {
3532 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3533 GET_MODE (XEXP (xx, 0)),
3534 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3535 return true;
3536 }
3537 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3538 {
3539 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3540 GET_MODE (XEXP (xx, 1)),
3541 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3542 return true;
3543 }
3544 }
3545
3546 if (sh_1el_vec (x, VOIDmode))
3547 *total = outer_code != SET;
3548 else if (sh_rep_vec (x, VOIDmode))
3549 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3550 + (outer_code != SET));
3551 else
3552 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3553 return true;
3554
3555 case CONST_INT:
3556 if (TARGET_SHMEDIA)
3557 {
3558 if (INTVAL (x) == 0)
3559 *total = 0;
3560 else if (outer_code == AND && and_operand ((x), DImode))
3561 *total = 0;
3562 else if ((outer_code == IOR || outer_code == XOR
3563 || outer_code == PLUS)
3564 && CONST_OK_FOR_I10 (INTVAL (x)))
3565 *total = 0;
3566 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3567 *total = COSTS_N_INSNS (outer_code != SET);
3568 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3569 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3570 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3571 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3572 else
3573 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3574 return true;
3575 }
3576 if (CONST_OK_FOR_I08 (INTVAL (x)))
3577 *total = 0;
3578 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3579 && CONST_OK_FOR_K08 (INTVAL (x)))
3580 *total = 1;
3581 /* prepare_cmp_insn will force costly constants int registers before
3582 the cbranch[sd]i4 patterns can see them, so preserve potentially
3583 interesting ones not covered by I08 above. */
3584 else if (outer_code == COMPARE
3585 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3586 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3587 || INTVAL (x) == 0x7fffffff
3588 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3589 *total = 1;
3590 else
3591 *total = 8;
3592 return true;
3593
3594 case EQ:
3595 /* An and with a constant compared against zero is
3596 most likely going to be a TST #imm, R0 instruction.
3597 Notice that this does not catch the zero_extract variants from
3598 the md file. */
3599 if (XEXP (x, 1) == const0_rtx
3600 && (GET_CODE (XEXP (x, 0)) == AND
3601 || (SUBREG_P (XEXP (x, 0))
3602 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)))
3603 {
3604 *total = 1;
3605 return true;
3606 }
3607
3608 else if (XEXP (x, 1) == const0_rtx
3609 && GET_CODE (XEXP (x, 0)) == AND
3610 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3611 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3612 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3613 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3614 {
3615 *total = 1;
3616 return true;
3617 }
3618 else
3619 return false;
3620
3621 case SMIN:
3622 case SMAX:
3623 /* This is most likely a clips.b or clips.w insn that is being made up
3624 by combine. */
3625 if (TARGET_SH2A
3626 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3627 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3628 && REG_P (XEXP (XEXP (x, 0), 0))
3629 && CONST_INT_P (XEXP (x, 1)))
3630 {
3631 *total = COSTS_N_INSNS (1);
3632 return true;
3633 }
3634 else
3635 return false;
3636
3637 case CONST:
3638 case LABEL_REF:
3639 case SYMBOL_REF:
3640 if (TARGET_SHMEDIA64)
3641 *total = COSTS_N_INSNS (4);
3642 else if (TARGET_SHMEDIA32)
3643 *total = COSTS_N_INSNS (2);
3644 else
3645 *total = 5;
3646 return true;
3647
3648 case CONST_DOUBLE:
3649 if (TARGET_SHMEDIA)
3650 *total = COSTS_N_INSNS (4);
3651 /* prepare_cmp_insn will force costly constants int registers before
3652 the cbranchdi4 pattern can see them, so preserve potentially
3653 interesting ones. */
3654 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3655 *total = 1;
3656 else
3657 *total = 10;
3658 return true;
3659
3660 case CONST_VECTOR:
3661 /* FIXME: This looks broken. Only the last statement has any effect.
3662 Probably this could be folded with the PARALLEL case? */
3663 if (x == CONST0_RTX (GET_MODE (x)))
3664 *total = 0;
3665 else if (sh_1el_vec (x, VOIDmode))
3666 *total = outer_code != SET;
3667 if (sh_rep_vec (x, VOIDmode))
3668 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3669 + (outer_code != SET));
3670 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3671 return true;
3672
3673 case PLUS:
3674 case MINUS:
3675 *total = COSTS_N_INSNS (addsubcosts (x));
3676 return true;
3677
3678 case AND:
3679 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3680 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3681 {
3682 *total = COSTS_N_INSNS (1);
3683 return true;
3684 }
3685 /* Fall through. */
3686
3687 case XOR:
3688 case IOR:
3689 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3690 return true;
3691
3692 case MULT:
3693 *total = COSTS_N_INSNS (multcosts (x));
3694 return true;
3695
3696 case LT:
3697 case GE:
3698 /* div0s sign comparison. */
3699 if (GET_CODE (XEXP (x, 0)) == XOR
3700 && REG_P ((XEXP (XEXP (x, 0), 0)))
3701 && REG_P ((XEXP (XEXP (x, 0), 1)))
3702 && satisfies_constraint_Z (XEXP (x, 1)))
3703 {
3704 *total = COSTS_N_INSNS (1);
3705 return true;
3706 }
3707 else
3708 return false;
3709
3710 case LSHIFTRT:
3711 /* div0s sign comparison. */
3712 if (GET_CODE (XEXP (x, 0)) == XOR
3713 && REG_P ((XEXP (XEXP (x, 0), 0)))
3714 && REG_P ((XEXP (XEXP (x, 0), 1)))
3715 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3716 {
3717 *total = COSTS_N_INSNS (1);
3718 return true;
3719 }
3720 /* Fall through to shiftcosts. */
3721 case ASHIFT:
3722 case ASHIFTRT:
3723 {
3724 int cost = shiftcosts (x);
3725 if (cost < 0)
3726 return false;
3727 *total = COSTS_N_INSNS (cost);
3728 return true;
3729 }
3730
3731 case DIV:
3732 case UDIV:
3733 case MOD:
3734 case UMOD:
3735 *total = COSTS_N_INSNS (20);
3736 return true;
3737
3738 case FLOAT:
3739 case FIX:
3740 *total = 100;
3741 return true;
3742
3743 default:
3744 return false;
3745 }
3746 }
3747
3748 /* Determine the size of the fundamental move insn that will be used
3749 for the specified mode. */
3750 static inline int
3751 mov_insn_size (machine_mode mode, bool consider_sh2a)
3752 {
3753 const int mode_sz = GET_MODE_SIZE (mode);
3754
3755 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3756 || (TARGET_FMOVD && mode == DFmode))
3757 return mode_sz;
3758 else
3759 {
3760 /* The max. available mode for actual move insns is SImode.
3761 Larger accesses will be split into multiple loads/stores. */
3762 const int max_mov_sz = GET_MODE_SIZE (SImode);
3763 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3764 }
3765 }
3766
3767 /* Determine the maximum possible displacement for a move insn for the
3768 specified mode. */
3769 int
3770 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3771 {
3772 /* The 4 byte displacement move insns are the same as the 2 byte
3773 versions but take a 12 bit displacement. All we need to do is to
3774 scale the max. displacement value accordingly. */
3775 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3776
3777 /* SH2A supports FPU move insns with 12 bit displacements.
3778 Other variants to do not support any kind of displacements for
3779 FPU move insns. */
3780 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3781 return 0;
3782 else
3783 {
3784 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3785 const int mode_sz = GET_MODE_SIZE (mode);
3786 int r = 15 * mov_insn_sz * disp_scale;
3787
3788 /* If the mov insn will be split into multiple loads/stores, the
3789 maximum possible displacement is a bit smaller. */
3790 if (mode_sz > mov_insn_sz)
3791 r -= mode_sz - mov_insn_sz;
3792 return r;
3793 }
3794 }
3795
3796 /* Determine the alignment mask for a move insn of the
3797 specified mode. */
3798 static inline int
3799 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3800 {
3801 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3802 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3803 }
3804
3805 /* Return the displacement value of a displacement address. */
3806 HOST_WIDE_INT
3807 sh_disp_addr_displacement (rtx x)
3808 {
3809 gcc_assert (satisfies_constraint_Sdd (x));
3810 return INTVAL (XEXP (XEXP (x, 0), 1));
3811 }
3812
3813 /* Compute the cost of an address. */
3814 static int
3815 sh_address_cost (rtx x, machine_mode mode,
3816 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3817 {
3818 /* 'GBR + 0'. Account one more because of R0 restriction. */
3819 if (REG_P (x) && REGNO (x) == GBR_REG)
3820 return 2;
3821
3822 /* Simple reg, post-inc, pre-dec addressing. */
3823 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3824 return 1;
3825
3826 /* 'reg + disp' addressing. */
3827 if (GET_CODE (x) == PLUS
3828 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3829 {
3830 /* 'GBR + disp'. Account one more because of R0 restriction. */
3831 if (REGNO (XEXP (x, 0)) == GBR_REG
3832 && gbr_displacement (XEXP (x, 1), mode))
3833 return 2;
3834
3835 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3836
3837 if (offset == 0)
3838 return 1;
3839
3840 /* The displacement would fit into a 2 byte move insn.
3841 HImode and QImode loads/stores with displacement put pressure on
3842 R0 which will most likely require another reg copy. Thus account
3843 a higher cost for that. */
3844 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3845 return (mode == HImode || mode == QImode) ? 2 : 1;
3846
3847 /* The displacement would fit into a 4 byte move insn (SH2A). */
3848 if (TARGET_SH2A
3849 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3850 return 2;
3851
3852 /* The displacement is probably out of range and will require extra
3853 calculations. */
3854 return 3;
3855 }
3856
3857 /* 'reg + reg' addressing. Account a slightly higher cost because of
3858 increased pressure on R0. */
3859 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3860 && ! TARGET_SHMEDIA)
3861 return 3;
3862
3863 /* Not sure what it is - probably expensive. */
3864 return 10;
3865 }
3866
3867 /* Code to expand a shift. */
3868 static void
3869 gen_ashift (int type, int n, rtx reg)
3870 {
3871 rtx n_rtx;
3872
3873 /* Negative values here come from the shift_amounts array. */
3874 if (n < 0)
3875 {
3876 if (type == ASHIFT)
3877 type = LSHIFTRT;
3878 else
3879 type = ASHIFT;
3880 n = -n;
3881 }
3882
3883 n_rtx = GEN_INT (n);
3884 gcc_assert (satisfies_constraint_P27 (n_rtx));
3885
3886 switch (type)
3887 {
3888 case ASHIFTRT:
3889 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3890 break;
3891 case LSHIFTRT:
3892 if (n == 1)
3893 emit_insn (gen_shlr (reg, reg));
3894 else
3895 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3896 break;
3897 case ASHIFT:
3898 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3899 break;
3900 default:
3901 gcc_unreachable ();
3902 }
3903 }
3904
3905 /* Code to expand a HImode shift. */
3906 static void
3907 gen_ashift_hi (int type, int n, rtx reg)
3908 {
3909 /* Negative values here come from the shift_amounts array. */
3910 if (n < 0)
3911 {
3912 if (type == ASHIFT)
3913 type = LSHIFTRT;
3914 else
3915 type = ASHIFT;
3916 n = -n;
3917 }
3918
3919 switch (type)
3920 {
3921 case ASHIFTRT:
3922 case LSHIFTRT:
3923 /* We don't have HImode right shift operations because using the
3924 ordinary 32 bit shift instructions for that doesn't generate proper
3925 zero/sign extension.
3926 gen_ashift_hi is only called in contexts where we know that the
3927 sign extension works out correctly. */
3928 {
3929 int offset = 0;
3930 if (GET_CODE (reg) == SUBREG)
3931 {
3932 offset = SUBREG_BYTE (reg);
3933 reg = SUBREG_REG (reg);
3934 }
3935 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3936 break;
3937 }
3938 case ASHIFT:
3939 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3940 break;
3941 }
3942 }
3943
3944 /* Output RTL to split a constant shift into its component SH constant
3945 shift instructions. */
3946 void
3947 gen_shifty_op (int code, rtx *operands)
3948 {
3949 int value = INTVAL (operands[2]);
3950 int max, i;
3951
3952 /* Truncate the shift count in case it is out of bounds. */
3953 value = value & 31;
3954
3955 if (value == 31)
3956 {
3957 if (code == LSHIFTRT)
3958 {
3959 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3960 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3961 return;
3962 }
3963 else if (code == ASHIFT)
3964 {
3965 /* There is a two instruction sequence for 31 bit left shifts,
3966 but it requires r0. */
3967 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3968 {
3969 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3970 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3971 return;
3972 }
3973 }
3974 }
3975 else if (value == 0)
3976 {
3977 /* This can happen even when optimizing, if there were subregs before
3978 reload. Don't output a nop here, as this is never optimized away;
3979 use a no-op move instead. */
3980 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3981 return;
3982 }
3983
3984 max = ashl_lshr_seq[value].insn_count;
3985 for (i = 0; i < max; i++)
3986 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3987 }
3988
3989 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3990 don't matter. */
3991 void
3992 gen_shifty_hi_op (int code, rtx *operands)
3993 {
3994 int value = INTVAL (operands[2]);
3995 int max, i;
3996 void (*gen_fun) (int, int, rtx);
3997
3998 /* This operation is used by and_shl for SImode values with a few
3999 high bits known to be cleared. */
4000 value &= 31;
4001 if (value == 0)
4002 {
4003 emit_insn (gen_nop ());
4004 return;
4005 }
4006
4007 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
4008 if (code == ASHIFT)
4009 {
4010 max = ext_ashl_lshr_seq[value].insn_count;
4011 for (i = 0; i < max; i++)
4012 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4013 }
4014 else
4015 /* When shifting right, emit the shifts in reverse order, so that
4016 solitary negative values come first. */
4017 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
4018 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
4019 }
4020
4021 /* Output RTL for an arithmetic right shift.
4022 ??? Rewrite to use super-optimizer sequences. */
4023 bool
4024 expand_ashiftrt (rtx *operands)
4025 {
4026 rtx wrk;
4027 char func[18];
4028 int value;
4029
4030 if (TARGET_DYNSHIFT)
4031 {
4032 if (!CONST_INT_P (operands[2]))
4033 {
4034 rtx count = copy_to_mode_reg (SImode, operands[2]);
4035 emit_insn (gen_negsi2 (count, count));
4036 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4037 return true;
4038 }
4039 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
4040 > 1 + SH_DYNAMIC_SHIFT_COST)
4041 {
4042 rtx count
4043 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
4044 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
4045 return true;
4046 }
4047 }
4048 if (!CONST_INT_P (operands[2]))
4049 return false;
4050
4051 value = INTVAL (operands[2]) & 31;
4052
4053 if (value == 31)
4054 {
4055 /* If we are called from abs expansion, arrange things so that we
4056 we can use a single MT instruction that doesn't clobber the source,
4057 if LICM can hoist out the load of the constant zero. */
4058 if (currently_expanding_to_rtl)
4059 {
4060 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
4061 operands[1]));
4062 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
4063 return true;
4064 }
4065 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
4066 return true;
4067 }
4068 else if (value >= 16 && value <= 19)
4069 {
4070 wrk = gen_reg_rtx (SImode);
4071 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
4072 value -= 16;
4073 while (value--)
4074 gen_ashift (ASHIFTRT, 1, wrk);
4075 emit_move_insn (operands[0], wrk);
4076 return true;
4077 }
4078 /* Expand a short sequence inline, longer call a magic routine. */
4079 else if (value <= 5)
4080 {
4081 wrk = gen_reg_rtx (SImode);
4082 emit_move_insn (wrk, operands[1]);
4083 while (value--)
4084 gen_ashift (ASHIFTRT, 1, wrk);
4085 emit_move_insn (operands[0], wrk);
4086 return true;
4087 }
4088
4089 wrk = gen_reg_rtx (Pmode);
4090
4091 /* Load the value into an arg reg and call a helper. */
4092 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
4093 sprintf (func, "__ashiftrt_r4_%d", value);
4094 function_symbol (wrk, func, SFUNC_STATIC);
4095 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
4096 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
4097 return true;
4098 }
4099
4100 /* Try to find a good way to implement the combiner pattern
4101 [(set (match_operand:SI 0 "register_operand" "r")
4102 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4103 (match_operand:SI 2 "const_int_operand" "n"))
4104 (match_operand:SI 3 "const_int_operand" "n"))) .
4105 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
4106 return 0 for simple right / left or left/right shift combination.
4107 return 1 for a combination of shifts with zero_extend.
4108 return 2 for a combination of shifts with an AND that needs r0.
4109 return 3 for a combination of shifts with an AND that needs an extra
4110 scratch register, when the three highmost bits of the AND mask are clear.
4111 return 4 for a combination of shifts with an AND that needs an extra
4112 scratch register, when any of the three highmost bits of the AND mask
4113 is set.
4114 If ATTRP is set, store an initial right shift width in ATTRP[0],
4115 and the instruction length in ATTRP[1] . These values are not valid
4116 when returning 0.
4117 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
4118 shift_amounts for the last shift value that is to be used before the
4119 sign extend. */
4120 int
4121 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
4122 {
4123 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
4124 int left = INTVAL (left_rtx), right;
4125 int best = 0;
4126 int cost, best_cost = 10000;
4127 int best_right = 0, best_len = 0;
4128 int i;
4129 int can_ext;
4130
4131 if (left < 0 || left > 31)
4132 return 0;
4133 if (CONST_INT_P (mask_rtx))
4134 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4135 else
4136 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4137 /* Can this be expressed as a right shift / left shift pair? */
4138 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4139 right = exact_log2 (lsb);
4140 mask2 = ~(mask + lsb - 1);
4141 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4142 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4143 if (! mask2)
4144 best_cost = ashl_lshr_seq[right].insn_count
4145 + ashl_lshr_seq[right + left].insn_count;
4146 /* mask has no trailing zeroes <==> ! right */
4147 else if (! right && mask2 == ~(lsb2 - 1))
4148 {
4149 int late_right = exact_log2 (lsb2);
4150 best_cost = ashl_lshr_seq[left + late_right].insn_count
4151 + ashl_lshr_seq[late_right].insn_count;
4152 }
4153 /* Try to use zero extend. */
4154 if (mask2 == ~(lsb2 - 1))
4155 {
4156 int width, first;
4157
4158 for (width = 8; width <= 16; width += 8)
4159 {
4160 /* Can we zero-extend right away? */
4161 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4162 {
4163 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4164 + ext_ashl_lshr_seq[left + right].insn_count;
4165 if (cost < best_cost)
4166 {
4167 best = 1;
4168 best_cost = cost;
4169 best_right = right;
4170 best_len = cost;
4171 if (attrp)
4172 attrp[2] = -1;
4173 }
4174 continue;
4175 }
4176 /* ??? Could try to put zero extend into initial right shift,
4177 or even shift a bit left before the right shift. */
4178 /* Determine value of first part of left shift, to get to the
4179 zero extend cut-off point. */
4180 first = width - exact_log2 (lsb2) + right;
4181 if (first >= 0 && right + left - first >= 0)
4182 {
4183 cost = ext_ashl_lshr_seq[right].insn_count
4184 + ext_ashl_lshr_seq[first].insn_count + 1
4185 + ext_ashl_lshr_seq[right + left - first].insn_count;
4186
4187 if (cost < best_cost)
4188 {
4189 best = 1;
4190 best_cost = cost;
4191 best_right = right;
4192 best_len = cost;
4193 if (attrp)
4194 attrp[2] = first;
4195 }
4196 }
4197 }
4198 }
4199 /* Try to use r0 AND pattern */
4200 for (i = 0; i <= 2; i++)
4201 {
4202 if (i > right)
4203 break;
4204 if (! CONST_OK_FOR_K08 (mask >> i))
4205 continue;
4206 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4207 if (cost < best_cost)
4208 {
4209 best = 2;
4210 best_cost = cost;
4211 best_right = i;
4212 best_len = cost - 1;
4213 }
4214 }
4215 /* Try to use a scratch register to hold the AND operand. */
4216 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4217 for (i = 0; i <= 2; i++)
4218 {
4219 if (i > right)
4220 break;
4221 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4222 + (can_ext
4223 ? ext_ashl_lshr_seq
4224 : ashl_lshr_seq)[left + i].insn_count;
4225 if (cost < best_cost)
4226 {
4227 best = 4 - can_ext;
4228 best_cost = cost;
4229 best_right = i;
4230 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4231 }
4232 }
4233
4234 if (attrp)
4235 {
4236 attrp[0] = best_right;
4237 attrp[1] = best_len;
4238 }
4239 return best;
4240 }
4241
4242 /* This is used in length attributes of the unnamed instructions
4243 corresponding to shl_and_kind return values of 1 and 2. */
4244 int
4245 shl_and_length (rtx insn)
4246 {
4247 rtx set_src, left_rtx, mask_rtx;
4248 int attributes[3];
4249
4250 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4251 left_rtx = XEXP (XEXP (set_src, 0), 1);
4252 mask_rtx = XEXP (set_src, 1);
4253 shl_and_kind (left_rtx, mask_rtx, attributes);
4254 return attributes[1];
4255 }
4256
4257 /* This is used in length attribute of the and_shl_scratch instruction. */
4258 int
4259 shl_and_scr_length (rtx insn)
4260 {
4261 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4262 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4263 rtx op = XEXP (set_src, 0);
4264 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4265 op = XEXP (XEXP (op, 0), 0);
4266 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4267 }
4268
4269 /* Generate rtl for instructions for which shl_and_kind advised a particular
4270 method of generating them, i.e. returned zero. */
4271 bool
4272 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4273 {
4274 int attributes[3];
4275 unsigned HOST_WIDE_INT mask;
4276 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4277 int right, total_shift;
4278 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4279
4280 right = attributes[0];
4281 total_shift = INTVAL (left_rtx) + right;
4282 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4283 switch (kind)
4284 {
4285 default:
4286 return true;
4287 case 1:
4288 {
4289 int first = attributes[2];
4290 rtx operands[3];
4291
4292 if (first < 0)
4293 {
4294 emit_insn ((mask << right) <= 0xff
4295 ? gen_zero_extendqisi2 (dest,
4296 gen_lowpart (QImode, source))
4297 : gen_zero_extendhisi2 (dest,
4298 gen_lowpart (HImode, source)));
4299 source = dest;
4300 }
4301 if (source != dest)
4302 emit_insn (gen_movsi (dest, source));
4303 operands[0] = dest;
4304 if (right)
4305 {
4306 operands[2] = GEN_INT (right);
4307 gen_shifty_hi_op (LSHIFTRT, operands);
4308 }
4309 if (first > 0)
4310 {
4311 operands[2] = GEN_INT (first);
4312 gen_shifty_hi_op (ASHIFT, operands);
4313 total_shift -= first;
4314 mask <<= first;
4315 }
4316 if (first >= 0)
4317 emit_insn (mask <= 0xff
4318 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4319 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4320 if (total_shift > 0)
4321 {
4322 operands[2] = GEN_INT (total_shift);
4323 gen_shifty_hi_op (ASHIFT, operands);
4324 }
4325 break;
4326 }
4327 case 4:
4328 shift_gen_fun = gen_shifty_op;
4329 case 3:
4330 /* If the topmost bit that matters is set, set the topmost bits
4331 that don't matter. This way, we might be able to get a shorter
4332 signed constant. */
4333 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4334 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4335 case 2:
4336 /* Don't expand fine-grained when combining, because that will
4337 make the pattern fail. */
4338 if (currently_expanding_to_rtl
4339 || reload_in_progress || reload_completed)
4340 {
4341 rtx operands[3];
4342
4343 /* Cases 3 and 4 should be handled by this split
4344 only while combining */
4345 gcc_assert (kind <= 2);
4346 if (right)
4347 {
4348 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4349 source = dest;
4350 }
4351 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4352 if (total_shift)
4353 {
4354 operands[0] = dest;
4355 operands[1] = dest;
4356 operands[2] = GEN_INT (total_shift);
4357 shift_gen_fun (ASHIFT, operands);
4358 }
4359 break;
4360 }
4361 else
4362 {
4363 int neg = 0;
4364 if (kind != 4 && total_shift < 16)
4365 {
4366 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4367 if (neg > 0)
4368 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4369 else
4370 neg = 0;
4371 }
4372 emit_insn (gen_and_shl_scratch (dest, source,
4373 GEN_INT (right),
4374 GEN_INT (mask),
4375 GEN_INT (total_shift + neg),
4376 GEN_INT (neg)));
4377 emit_insn (gen_movsi (dest, dest));
4378 break;
4379 }
4380 }
4381 return false;
4382 }
4383
4384 /* Try to find a good way to implement the combiner pattern
4385 [(set (match_operand:SI 0 "register_operand" "=r")
4386 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4387 (match_operand:SI 2 "const_int_operand" "n")
4388 (match_operand:SI 3 "const_int_operand" "n")
4389 (const_int 0)))
4390 (clobber (reg:SI T_REG))]
4391 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4392 return 0 for simple left / right shift combination.
4393 return 1 for left shift / 8 bit sign extend / left shift.
4394 return 2 for left shift / 16 bit sign extend / left shift.
4395 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4396 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4397 return 5 for left shift / 16 bit sign extend / right shift
4398 return 6 for < 8 bit sign extend / left shift.
4399 return 7 for < 8 bit sign extend / left shift / single right shift.
4400 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4401 int
4402 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4403 {
4404 int left, size, insize, ext;
4405 int cost = 0, best_cost;
4406 int kind;
4407
4408 left = INTVAL (left_rtx);
4409 size = INTVAL (size_rtx);
4410 insize = size - left;
4411 gcc_assert (insize > 0);
4412 /* Default to left / right shift. */
4413 kind = 0;
4414 best_cost = ashl_lshr_seq[32 - insize].insn_count
4415 + ashl_lshr_seq[32 - size].insn_count;
4416 if (size <= 16)
4417 {
4418 /* 16 bit shift / sign extend / 16 bit shift */
4419 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4420 + ashl_lshr_seq[16 - size].insn_count;
4421 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4422 below, by alternative 3 or something even better. */
4423 if (cost < best_cost)
4424 {
4425 kind = 5;
4426 best_cost = cost;
4427 }
4428 }
4429 /* Try a plain sign extend between two shifts. */
4430 for (ext = 16; ext >= insize; ext -= 8)
4431 {
4432 if (ext <= size)
4433 {
4434 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4435 + ashl_lshr_seq[size - ext].insn_count;
4436 if (cost < best_cost)
4437 {
4438 kind = ext / (unsigned) 8;
4439 best_cost = cost;
4440 }
4441 }
4442 /* Check if we can do a sloppy shift with a final signed shift
4443 restoring the sign. */
4444 if (EXT_SHIFT_SIGNED (size - ext))
4445 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4446 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4447 /* If not, maybe it's still cheaper to do the second shift sloppy,
4448 and do a final sign extend? */
4449 else if (size <= 16)
4450 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4451 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4452 + 1;
4453 else
4454 continue;
4455 if (cost < best_cost)
4456 {
4457 kind = ext / (unsigned) 8 + 2;
4458 best_cost = cost;
4459 }
4460 }
4461 /* Check if we can sign extend in r0 */
4462 if (insize < 8)
4463 {
4464 cost = 3 + ashl_lshr_seq[left].insn_count;
4465 if (cost < best_cost)
4466 {
4467 kind = 6;
4468 best_cost = cost;
4469 }
4470 /* Try the same with a final signed shift. */
4471 if (left < 31)
4472 {
4473 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4474 if (cost < best_cost)
4475 {
4476 kind = 7;
4477 best_cost = cost;
4478 }
4479 }
4480 }
4481 if (TARGET_DYNSHIFT)
4482 {
4483 /* Try to use a dynamic shift. */
4484 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4485 if (cost < best_cost)
4486 {
4487 kind = 0;
4488 best_cost = cost;
4489 }
4490 }
4491 if (costp)
4492 *costp = cost;
4493 return kind;
4494 }
4495
4496 /* Function to be used in the length attribute of the instructions
4497 implementing this pattern. */
4498 int
4499 shl_sext_length (rtx insn)
4500 {
4501 rtx set_src, left_rtx, size_rtx;
4502 int cost;
4503
4504 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4505 left_rtx = XEXP (XEXP (set_src, 0), 1);
4506 size_rtx = XEXP (set_src, 1);
4507 shl_sext_kind (left_rtx, size_rtx, &cost);
4508 return cost;
4509 }
4510
4511 /* Generate rtl for this pattern */
4512 bool
4513 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4514 {
4515 int kind;
4516 int left, size, insize, cost;
4517 rtx operands[3];
4518
4519 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4520 left = INTVAL (left_rtx);
4521 size = INTVAL (size_rtx);
4522 insize = size - left;
4523 switch (kind)
4524 {
4525 case 1:
4526 case 2:
4527 case 3:
4528 case 4:
4529 {
4530 int ext = kind & 1 ? 8 : 16;
4531 int shift2 = size - ext;
4532
4533 /* Don't expand fine-grained when combining, because that will
4534 make the pattern fail. */
4535 if (! currently_expanding_to_rtl
4536 && ! reload_in_progress && ! reload_completed)
4537 {
4538 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4539 emit_insn (gen_movsi (dest, source));
4540 break;
4541 }
4542 if (dest != source)
4543 emit_insn (gen_movsi (dest, source));
4544 operands[0] = dest;
4545 if (ext - insize)
4546 {
4547 operands[2] = GEN_INT (ext - insize);
4548 gen_shifty_hi_op (ASHIFT, operands);
4549 }
4550 emit_insn (kind & 1
4551 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4552 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4553 if (kind <= 2)
4554 {
4555 if (shift2)
4556 {
4557 operands[2] = GEN_INT (shift2);
4558 gen_shifty_op (ASHIFT, operands);
4559 }
4560 }
4561 else
4562 {
4563 if (shift2 > 0)
4564 {
4565 if (EXT_SHIFT_SIGNED (shift2))
4566 {
4567 operands[2] = GEN_INT (shift2 + 1);
4568 gen_shifty_op (ASHIFT, operands);
4569 operands[2] = const1_rtx;
4570 gen_shifty_op (ASHIFTRT, operands);
4571 break;
4572 }
4573 operands[2] = GEN_INT (shift2);
4574 gen_shifty_hi_op (ASHIFT, operands);
4575 }
4576 else if (shift2)
4577 {
4578 operands[2] = GEN_INT (-shift2);
4579 gen_shifty_hi_op (LSHIFTRT, operands);
4580 }
4581 emit_insn (size <= 8
4582 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4583 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4584 }
4585 break;
4586 }
4587 case 5:
4588 {
4589 int i = 16 - size;
4590 if (! currently_expanding_to_rtl
4591 && ! reload_in_progress && ! reload_completed)
4592 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4593 else
4594 {
4595 operands[0] = dest;
4596 operands[2] = GEN_INT (16 - insize);
4597 gen_shifty_hi_op (ASHIFT, operands);
4598 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4599 }
4600 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4601 while (--i >= 0)
4602 gen_ashift (ASHIFTRT, 1, dest);
4603 break;
4604 }
4605 case 6:
4606 case 7:
4607 /* Don't expand fine-grained when combining, because that will
4608 make the pattern fail. */
4609 if (! currently_expanding_to_rtl
4610 && ! reload_in_progress && ! reload_completed)
4611 {
4612 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4613 emit_insn (gen_movsi (dest, source));
4614 break;
4615 }
4616 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4617 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4618 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4619 operands[0] = dest;
4620 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4621 gen_shifty_op (ASHIFT, operands);
4622 if (kind == 7)
4623 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4624 break;
4625 default:
4626 return true;
4627 }
4628 return false;
4629 }
4630
4631 /* Prefix a symbol_ref name with "datalabel". */
4632 rtx
4633 gen_datalabel_ref (rtx sym)
4634 {
4635 const char *str;
4636
4637 if (GET_CODE (sym) == LABEL_REF)
4638 return gen_rtx_CONST (GET_MODE (sym),
4639 gen_rtx_UNSPEC (GET_MODE (sym),
4640 gen_rtvec (1, sym),
4641 UNSPEC_DATALABEL));
4642
4643 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4644
4645 str = XSTR (sym, 0);
4646 /* Share all SYMBOL_REF strings with the same value - that is important
4647 for cse. */
4648 str = IDENTIFIER_POINTER (get_identifier (str));
4649 XSTR (sym, 0) = str;
4650
4651 return sym;
4652 }
4653
4654 \f
4655 typedef struct label_ref_list_d
4656 {
4657 rtx_code_label *label;
4658 struct label_ref_list_d *next;
4659 } *label_ref_list_t;
4660
4661 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4662 ("label references list", 30);
4663
4664 /* The SH cannot load a large constant into a register, constants have to
4665 come from a pc relative load. The reference of a pc relative load
4666 instruction must be less than 1k in front of the instruction. This
4667 means that we often have to dump a constant inside a function, and
4668 generate code to branch around it.
4669
4670 It is important to minimize this, since the branches will slow things
4671 down and make things bigger.
4672
4673 Worst case code looks like:
4674
4675 mov.l L1,rn
4676 bra L2
4677 nop
4678 align
4679 L1: .long value
4680 L2:
4681 ..
4682
4683 mov.l L3,rn
4684 bra L4
4685 nop
4686 align
4687 L3: .long value
4688 L4:
4689 ..
4690
4691 We fix this by performing a scan before scheduling, which notices which
4692 instructions need to have their operands fetched from the constant table
4693 and builds the table.
4694
4695 The algorithm is:
4696
4697 scan, find an instruction which needs a pcrel move. Look forward, find the
4698 last barrier which is within MAX_COUNT bytes of the requirement.
4699 If there isn't one, make one. Process all the instructions between
4700 the find and the barrier.
4701
4702 In the above example, we can tell that L3 is within 1k of L1, so
4703 the first move can be shrunk from the 3 insn+constant sequence into
4704 just 1 insn, and the constant moved to L3 to make:
4705
4706 mov.l L1,rn
4707 ..
4708 mov.l L3,rn
4709 bra L4
4710 nop
4711 align
4712 L3:.long value
4713 L4:.long value
4714
4715 Then the second move becomes the target for the shortening process. */
4716
4717 typedef struct
4718 {
4719 rtx value; /* Value in table. */
4720 rtx_code_label *label; /* Label of value. */
4721 label_ref_list_t wend; /* End of window. */
4722 machine_mode mode; /* Mode of value. */
4723
4724 /* True if this constant is accessed as part of a post-increment
4725 sequence. Note that HImode constants are never accessed in this way. */
4726 bool part_of_sequence_p;
4727 } pool_node;
4728
4729 /* The maximum number of constants that can fit into one pool, since
4730 constants in the range 0..510 are at least 2 bytes long, and in the
4731 range from there to 1018 at least 4 bytes. */
4732
4733 #define MAX_POOL_SIZE 372
4734 static pool_node pool_vector[MAX_POOL_SIZE];
4735 static int pool_size;
4736 static rtx_code_label *pool_window_label;
4737 static int pool_window_last;
4738
4739 static int max_labelno_before_reorg;
4740
4741 /* ??? If we need a constant in HImode which is the truncated value of a
4742 constant we need in SImode, we could combine the two entries thus saving
4743 two bytes. Is this common enough to be worth the effort of implementing
4744 it? */
4745
4746 /* ??? This stuff should be done at the same time that we shorten branches.
4747 As it is now, we must assume that all branches are the maximum size, and
4748 this causes us to almost always output constant pools sooner than
4749 necessary. */
4750
4751 /* Add a constant to the pool and return its label. */
4752 static rtx_code_label *
4753 add_constant (rtx x, machine_mode mode, rtx last_value)
4754 {
4755 int i;
4756 rtx_code_label *lab, *new_rtx;
4757 label_ref_list_t ref, newref;
4758
4759 /* First see if we've already got it. */
4760 for (i = 0; i < pool_size; i++)
4761 {
4762 if (x->code == pool_vector[i].value->code
4763 && mode == pool_vector[i].mode)
4764 {
4765 if (x->code == CODE_LABEL)
4766 {
4767 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4768 continue;
4769 }
4770 if (rtx_equal_p (x, pool_vector[i].value))
4771 {
4772 lab = new_rtx = 0;
4773 if (! last_value
4774 || ! i
4775 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4776 {
4777 new_rtx = gen_label_rtx ();
4778 LABEL_REFS (new_rtx) = pool_vector[i].label;
4779 pool_vector[i].label = lab = new_rtx;
4780 }
4781 if (lab && pool_window_label)
4782 {
4783 newref = label_ref_list_d_pool.allocate ();
4784 newref->label = pool_window_label;
4785 ref = pool_vector[pool_window_last].wend;
4786 newref->next = ref;
4787 pool_vector[pool_window_last].wend = newref;
4788 }
4789 if (new_rtx)
4790 pool_window_label = new_rtx;
4791 pool_window_last = i;
4792 return lab;
4793 }
4794 }
4795 }
4796
4797 /* Need a new one. */
4798 pool_vector[pool_size].value = x;
4799 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4800 {
4801 lab = 0;
4802 pool_vector[pool_size - 1].part_of_sequence_p = true;
4803 }
4804 else
4805 lab = gen_label_rtx ();
4806 pool_vector[pool_size].mode = mode;
4807 pool_vector[pool_size].label = lab;
4808 pool_vector[pool_size].wend = NULL;
4809 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4810 if (lab && pool_window_label)
4811 {
4812 newref = label_ref_list_d_pool.allocate ();
4813 newref->label = pool_window_label;
4814 ref = pool_vector[pool_window_last].wend;
4815 newref->next = ref;
4816 pool_vector[pool_window_last].wend = newref;
4817 }
4818 if (lab)
4819 pool_window_label = lab;
4820 pool_window_last = pool_size;
4821 pool_size++;
4822 return lab;
4823 }
4824
4825 /* Output the literal table. START, if nonzero, is the first instruction
4826 this table is needed for, and also indicates that there is at least one
4827 casesi_worker_2 instruction; We have to emit the operand3 labels from
4828 these insns at a 4-byte aligned position. BARRIER is the barrier
4829 after which we are to place the table. */
4830 static void
4831 dump_table (rtx_insn *start, rtx_insn *barrier)
4832 {
4833 rtx_insn *scan = barrier;
4834 int i;
4835 bool need_align = true;
4836 rtx lab;
4837 label_ref_list_t ref;
4838 bool have_df = false;
4839
4840 /* Do two passes, first time dump out the HI sized constants. */
4841
4842 for (i = 0; i < pool_size; i++)
4843 {
4844 pool_node *p = &pool_vector[i];
4845
4846 if (p->mode == HImode)
4847 {
4848 if (need_align)
4849 {
4850 scan = emit_insn_after (gen_align_2 (), scan);
4851 need_align = false;
4852 }
4853 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4854 scan = emit_label_after (lab, scan);
4855 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4856 scan);
4857 for (ref = p->wend; ref; ref = ref->next)
4858 {
4859 lab = ref->label;
4860 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4861 }
4862 }
4863 else if (p->mode == DFmode)
4864 have_df = true;
4865 }
4866
4867 need_align = true;
4868
4869 if (start)
4870 {
4871 scan = emit_insn_after (gen_align_4 (), scan);
4872 need_align = false;
4873 for (; start != barrier; start = NEXT_INSN (start))
4874 if (NONJUMP_INSN_P (start)
4875 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4876 {
4877 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4878 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4879
4880 scan = emit_label_after (lab, scan);
4881 }
4882 }
4883 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4884 {
4885 rtx_insn *align_insn = NULL;
4886
4887 scan = emit_label_after (gen_label_rtx (), scan);
4888 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4889 need_align = false;
4890
4891 for (i = 0; i < pool_size; i++)
4892 {
4893 pool_node *p = &pool_vector[i];
4894
4895 switch (p->mode)
4896 {
4897 case HImode:
4898 break;
4899 case SImode:
4900 case SFmode:
4901 if (align_insn && !p->part_of_sequence_p)
4902 {
4903 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4904 emit_label_before (lab, align_insn);
4905 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4906 align_insn);
4907 for (ref = p->wend; ref; ref = ref->next)
4908 {
4909 lab = ref->label;
4910 emit_insn_before (gen_consttable_window_end (lab),
4911 align_insn);
4912 }
4913 delete_insn (align_insn);
4914 align_insn = NULL;
4915 continue;
4916 }
4917 else
4918 {
4919 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4920 scan = emit_label_after (lab, scan);
4921 scan = emit_insn_after (gen_consttable_4 (p->value,
4922 const0_rtx), scan);
4923 need_align = ! need_align;
4924 }
4925 break;
4926 case DFmode:
4927 if (need_align)
4928 {
4929 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4930 align_insn = scan;
4931 need_align = false;
4932 }
4933 case DImode:
4934 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4935 scan = emit_label_after (lab, scan);
4936 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4937 scan);
4938 break;
4939 default:
4940 gcc_unreachable ();
4941 }
4942
4943 if (p->mode != HImode)
4944 {
4945 for (ref = p->wend; ref; ref = ref->next)
4946 {
4947 lab = ref->label;
4948 scan = emit_insn_after (gen_consttable_window_end (lab),
4949 scan);
4950 }
4951 }
4952 }
4953
4954 pool_size = 0;
4955 }
4956
4957 for (i = 0; i < pool_size; i++)
4958 {
4959 pool_node *p = &pool_vector[i];
4960
4961 switch (p->mode)
4962 {
4963 case HImode:
4964 break;
4965 case SImode:
4966 case SFmode:
4967 if (need_align)
4968 {
4969 need_align = false;
4970 scan = emit_label_after (gen_label_rtx (), scan);
4971 scan = emit_insn_after (gen_align_4 (), scan);
4972 }
4973 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4974 scan = emit_label_after (lab, scan);
4975 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4976 scan);
4977 break;
4978 case DFmode:
4979 case DImode:
4980 if (need_align)
4981 {
4982 need_align = false;
4983 scan = emit_label_after (gen_label_rtx (), scan);
4984 scan = emit_insn_after (gen_align_4 (), scan);
4985 }
4986 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4987 scan = emit_label_after (lab, scan);
4988 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4989 scan);
4990 break;
4991 default:
4992 gcc_unreachable ();
4993 }
4994
4995 if (p->mode != HImode)
4996 {
4997 for (ref = p->wend; ref; ref = ref->next)
4998 {
4999 lab = ref->label;
5000 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
5001 }
5002 }
5003 }
5004
5005 scan = emit_insn_after (gen_consttable_end (), scan);
5006 scan = emit_barrier_after (scan);
5007 pool_size = 0;
5008 pool_window_label = NULL;
5009 pool_window_last = 0;
5010 }
5011
5012 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
5013
5014 /* Nonzero if the insn is a move instruction which needs to be fixed. */
5015
5016 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
5017 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
5018 need to fix it if the input value is CONST_OK_FOR_I08. */
5019 static bool
5020 broken_move (rtx_insn *insn)
5021 {
5022 if (NONJUMP_INSN_P (insn))
5023 {
5024 rtx pat = PATTERN (insn);
5025 if (GET_CODE (pat) == PARALLEL)
5026 pat = XVECEXP (pat, 0, 0);
5027 if (GET_CODE (pat) == SET
5028 /* We can load any 8-bit value if we don't care what the high
5029 order bits end up as. */
5030 && GET_MODE (SET_DEST (pat)) != QImode
5031 && (CONSTANT_P (SET_SRC (pat))
5032 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
5033 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
5034 /* Match mova_const. */
5035 || (GET_CODE (SET_SRC (pat)) == UNSPEC
5036 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
5037 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
5038 && ! (TARGET_SH2E
5039 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
5040 && (fp_zero_operand (SET_SRC (pat))
5041 || fp_one_operand (SET_SRC (pat)))
5042 /* In general we don't know the current setting of fpscr, so
5043 disable fldi.
5044 There is an exception if this was a register-register move
5045 before reload - and hence it was ascertained that we have
5046 single precision setting - and in a post-reload optimization
5047 we changed this to do a constant load. In that case
5048 we don't have an r0 clobber, hence we must use fldi. */
5049 && (TARGET_FMOVD
5050 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
5051 == SCRATCH))
5052 && REG_P (SET_DEST (pat))
5053 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
5054 && ! (TARGET_SH2A
5055 && GET_MODE (SET_DEST (pat)) == SImode
5056 && (satisfies_constraint_I20 (SET_SRC (pat))
5057 || satisfies_constraint_I28 (SET_SRC (pat))))
5058 && ! satisfies_constraint_I08 (SET_SRC (pat)))
5059 return true;
5060 }
5061
5062 return false;
5063 }
5064
5065 /* Return true if the specified insn is a mova insn. */
5066 static bool
5067 mova_p (rtx_insn *insn)
5068 {
5069 return (NONJUMP_INSN_P (insn)
5070 && GET_CODE (PATTERN (insn)) == SET
5071 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
5072 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
5073 /* Don't match mova_const. */
5074 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
5075 }
5076
5077 /* Fix up a mova from a switch that went out of range. */
5078 static void
5079 fixup_mova (rtx_insn *mova)
5080 {
5081 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
5082 if (! flag_pic)
5083 {
5084 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
5085 INSN_CODE (mova) = -1;
5086 }
5087 else
5088 {
5089 rtx_insn *worker = mova;
5090 rtx_code_label *lab = gen_label_rtx ();
5091 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
5092
5093 do
5094 {
5095 worker = NEXT_INSN (worker);
5096 gcc_assert (worker
5097 && !LABEL_P (worker)
5098 && !JUMP_P (worker));
5099 } while (NOTE_P (worker)
5100 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
5101 wpat = PATTERN (worker);
5102 wpat0 = XVECEXP (wpat, 0, 0);
5103 wpat1 = XVECEXP (wpat, 0, 1);
5104 wsrc = SET_SRC (wpat0);
5105 PATTERN (worker) = (gen_casesi_worker_2
5106 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
5107 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
5108 XEXP (wpat1, 0)));
5109 INSN_CODE (worker) = -1;
5110 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
5111 base = gen_rtx_LABEL_REF (Pmode, lab);
5112 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
5113 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
5114 INSN_CODE (mova) = -1;
5115 }
5116 }
5117
5118 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
5119 *num_mova, and check if the new mova is not nested within the first one.
5120 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
5121 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
5122 static int
5123 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
5124 {
5125 int n_addr = 0; /* Initialization to shut up spurious warning. */
5126 int f_target, n_target = 0; /* Likewise. */
5127
5128 if (optimize)
5129 {
5130 /* If NEW_MOVA has no address yet, it will be handled later. */
5131 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
5132 return -1;
5133
5134 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
5135 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5136 if (n_addr > n_target || n_addr + 1022 < n_target)
5137 {
5138 /* Change the mova into a load.
5139 broken_move will then return true for it. */
5140 fixup_mova (new_mova);
5141 return 1;
5142 }
5143 }
5144 if (!(*num_mova)++)
5145 {
5146 *first_mova = new_mova;
5147 return 2;
5148 }
5149 if (!optimize
5150 || ((f_target
5151 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5152 >= n_target))
5153 return -1;
5154
5155 (*num_mova)--;
5156 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5157 > n_target - n_addr)
5158 {
5159 fixup_mova (*first_mova);
5160 return 0;
5161 }
5162 else
5163 {
5164 fixup_mova (new_mova);
5165 return 1;
5166 }
5167 }
5168
5169 /* Find the last barrier from insn FROM which is close enough to hold the
5170 constant pool. If we can't find one, then create one near the end of
5171 the range. */
5172 static rtx_insn *
5173 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5174 {
5175 int count_si = 0;
5176 int count_hi = 0;
5177 int found_hi = 0;
5178 int found_si = 0;
5179 int found_di = 0;
5180 int hi_align = 2;
5181 int si_align = 2;
5182 int leading_mova = num_mova;
5183 rtx_insn *barrier_before_mova = NULL;
5184 rtx_insn *found_barrier = NULL;
5185 rtx_insn *good_barrier = NULL;
5186 int si_limit;
5187 int hi_limit;
5188 rtx_insn *orig = from;
5189 rtx_insn *last_got = NULL;
5190 rtx_insn *last_symoff = NULL;
5191
5192 /* For HImode: range is 510, add 4 because pc counts from address of
5193 second instruction after this one, subtract 2 for the jump instruction
5194 that we may need to emit before the table, subtract 2 for the instruction
5195 that fills the jump delay slot (in very rare cases, reorg will take an
5196 instruction from after the constant pool or will leave the delay slot
5197 empty). This gives 510.
5198 For SImode: range is 1020, add 4 because pc counts from address of
5199 second instruction after this one, subtract 2 in case pc is 2 byte
5200 aligned, subtract 2 for the jump instruction that we may need to emit
5201 before the table, subtract 2 for the instruction that fills the jump
5202 delay slot. This gives 1018. */
5203
5204 /* The branch will always be shortened now that the reference address for
5205 forward branches is the successor address, thus we need no longer make
5206 adjustments to the [sh]i_limit for -O0. */
5207
5208 si_limit = 1018;
5209 hi_limit = 510;
5210
5211 while (from && count_si < si_limit && count_hi < hi_limit)
5212 {
5213 int inc = get_attr_length (from);
5214 int new_align = 1;
5215
5216 /* If this is a label that existed at the time of the compute_alignments
5217 call, determine the alignment. N.B. When find_barrier recurses for
5218 an out-of-reach mova, we might see labels at the start of previously
5219 inserted constant tables. */
5220 if (LABEL_P (from)
5221 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5222 {
5223 if (optimize)
5224 new_align = 1 << label_to_alignment (from);
5225 else if (BARRIER_P (prev_nonnote_insn (from)))
5226 new_align = 1 << barrier_align (from);
5227 else
5228 new_align = 1;
5229 inc = 0;
5230 }
5231 /* In case we are scanning a constant table because of recursion, check
5232 for explicit alignments. If the table is long, we might be forced
5233 to emit the new table in front of it; the length of the alignment
5234 might be the last straw. */
5235 else if (NONJUMP_INSN_P (from)
5236 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5237 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5238 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5239 /* When we find the end of a constant table, paste the new constant
5240 at the end. That is better than putting it in front because
5241 this way, we don't need extra alignment for adding a 4-byte-aligned
5242 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5243 else if (NONJUMP_INSN_P (from)
5244 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5245 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5246 return from;
5247
5248 if (BARRIER_P (from))
5249 {
5250 rtx_insn *next;
5251
5252 found_barrier = from;
5253
5254 /* If we are at the end of the function, or in front of an alignment
5255 instruction, we need not insert an extra alignment. We prefer
5256 this kind of barrier. */
5257 if (barrier_align (from) > 2)
5258 good_barrier = from;
5259
5260 /* If we are at the end of a hot/cold block, dump the constants
5261 here. */
5262 next = NEXT_INSN (from);
5263 if (next
5264 && NOTE_P (next)
5265 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5266 break;
5267 }
5268
5269 if (broken_move (from))
5270 {
5271 rtx pat, src, dst;
5272 machine_mode mode;
5273
5274 pat = PATTERN (from);
5275 if (GET_CODE (pat) == PARALLEL)
5276 pat = XVECEXP (pat, 0, 0);
5277 src = SET_SRC (pat);
5278 dst = SET_DEST (pat);
5279 mode = GET_MODE (dst);
5280
5281 /* GOT pcrelat setting comes in pair of
5282 mova .L8,r0
5283 mov.l .L8,r12
5284 instructions. (plus add r0,r12).
5285 Remember if we see one without the other. */
5286 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5287 last_got = last_got ? NULL : from;
5288 else if (PIC_ADDR_P (src))
5289 last_got = last_got ? NULL : from;
5290
5291 /* We must explicitly check the mode, because sometimes the
5292 front end will generate code to load unsigned constants into
5293 HImode targets without properly sign extending them. */
5294 if (mode == HImode
5295 || (mode == SImode && satisfies_constraint_I16 (src)
5296 && REGNO (dst) != FPUL_REG))
5297 {
5298 found_hi += 2;
5299 /* We put the short constants before the long constants, so
5300 we must count the length of short constants in the range
5301 for the long constants. */
5302 /* ??? This isn't optimal, but is easy to do. */
5303 si_limit -= 2;
5304 }
5305 else
5306 {
5307 /* We dump DF/DI constants before SF/SI ones, because
5308 the limit is the same, but the alignment requirements
5309 are higher. We may waste up to 4 additional bytes
5310 for alignment, and the DF/DI constant may have
5311 another SF/SI constant placed before it. */
5312 if (TARGET_SHCOMPACT
5313 && ! found_di
5314 && (mode == DFmode || mode == DImode))
5315 {
5316 found_di = 1;
5317 si_limit -= 8;
5318 }
5319 while (si_align > 2 && found_si + si_align - 2 > count_si)
5320 si_align >>= 1;
5321 if (found_si > count_si)
5322 count_si = found_si;
5323 found_si += GET_MODE_SIZE (mode);
5324 if (num_mova)
5325 si_limit -= GET_MODE_SIZE (mode);
5326 }
5327 }
5328
5329 if (mova_p (from))
5330 {
5331 switch (untangle_mova (&num_mova, &mova, from))
5332 {
5333 case 1:
5334 if (flag_pic)
5335 {
5336 rtx src = SET_SRC (PATTERN (from));
5337 if (GET_CODE (src) == CONST
5338 && GET_CODE (XEXP (src, 0)) == UNSPEC
5339 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5340 last_symoff = from;
5341 }
5342 break;
5343 case 0: return find_barrier (0, 0, mova);
5344 case 2:
5345 {
5346 leading_mova = 0;
5347 barrier_before_mova
5348 = good_barrier ? good_barrier : found_barrier;
5349 }
5350 default: break;
5351 }
5352 if (found_si > count_si)
5353 count_si = found_si;
5354 }
5355 else if (JUMP_TABLE_DATA_P (from)
5356 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5357 {
5358 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5359 || (num_mova
5360 && (prev_nonnote_insn (from)
5361 == XEXP (MOVA_LABELREF (mova), 0))))
5362 num_mova--;
5363 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5364 {
5365 /* We have just passed the barrier in front of the
5366 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5367 the ADDR_DIFF_VEC is accessed as data, just like our pool
5368 constants, this is a good opportunity to accommodate what
5369 we have gathered so far.
5370 If we waited any longer, we could end up at a barrier in
5371 front of code, which gives worse cache usage for separated
5372 instruction / data caches. */
5373 good_barrier = found_barrier;
5374 break;
5375 }
5376 else
5377 {
5378 rtx body = PATTERN (from);
5379 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5380 }
5381 }
5382 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5383 else if (JUMP_P (from)
5384 && ! TARGET_SH2
5385 && ! optimize_size)
5386 new_align = 4;
5387
5388 /* There is a possibility that a bf is transformed into a bf/s by the
5389 delay slot scheduler. */
5390 if (JUMP_P (from)
5391 && get_attr_type (from) == TYPE_CBRANCH
5392 && ! sequence_insn_p (from))
5393 inc += 2;
5394
5395 if (found_si)
5396 {
5397 count_si += inc;
5398 if (new_align > si_align)
5399 {
5400 si_limit -= (count_si - 1) & (new_align - si_align);
5401 si_align = new_align;
5402 }
5403 count_si = (count_si + new_align - 1) & -new_align;
5404 }
5405 if (found_hi)
5406 {
5407 count_hi += inc;
5408 if (new_align > hi_align)
5409 {
5410 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5411 hi_align = new_align;
5412 }
5413 count_hi = (count_hi + new_align - 1) & -new_align;
5414 }
5415 from = NEXT_INSN (from);
5416 }
5417
5418 if (num_mova)
5419 {
5420 if (leading_mova)
5421 {
5422 /* Try as we might, the leading mova is out of range. Change
5423 it into a load (which will become a pcload) and retry. */
5424 fixup_mova (mova);
5425 return find_barrier (0, 0, mova);
5426 }
5427 else
5428 {
5429 /* Insert the constant pool table before the mova instruction,
5430 to prevent the mova label reference from going out of range. */
5431 from = mova;
5432 good_barrier = found_barrier = barrier_before_mova;
5433 }
5434 }
5435
5436 if (found_barrier)
5437 {
5438 if (good_barrier && next_real_insn (found_barrier))
5439 found_barrier = good_barrier;
5440 }
5441 else
5442 {
5443 /* We didn't find a barrier in time to dump our stuff,
5444 so we'll make one. */
5445 rtx_code_label *label = gen_label_rtx ();
5446
5447 /* Don't emit a constant table in the middle of insns for
5448 casesi_worker_2. This is a bit overkill but is enough
5449 because casesi_worker_2 wouldn't appear so frequently. */
5450 if (last_symoff)
5451 from = last_symoff;
5452
5453 /* If we exceeded the range, then we must back up over the last
5454 instruction we looked at. Otherwise, we just need to undo the
5455 NEXT_INSN at the end of the loop. */
5456 if (PREV_INSN (from) != orig
5457 && (count_hi > hi_limit || count_si > si_limit))
5458 from = PREV_INSN (PREV_INSN (from));
5459 else
5460 from = PREV_INSN (from);
5461
5462 /* Don't emit a constant table int the middle of global pointer setting,
5463 since that that would move the addressing base GOT into another table.
5464 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5465 in the pool anyway, so just move up the whole constant pool.
5466
5467 However, avoid doing so when the last single GOT mov is the starting
5468 insn itself. Going past above the start insn would create a negative
5469 offset, causing errors. */
5470 if (last_got && last_got != orig)
5471 from = PREV_INSN (last_got);
5472
5473 /* Don't insert the constant pool table at the position which
5474 may be the landing pad. */
5475 if (flag_exceptions
5476 && CALL_P (from)
5477 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5478 from = PREV_INSN (from);
5479
5480 /* Walk back to be just before any jump or label.
5481 Putting it before a label reduces the number of times the branch
5482 around the constant pool table will be hit. Putting it before
5483 a jump makes it more likely that the bra delay slot will be
5484 filled. */
5485 while (NOTE_P (from) || JUMP_P (from)
5486 || LABEL_P (from))
5487 from = PREV_INSN (from);
5488
5489 /* Make sure we do not split between a call and its corresponding
5490 CALL_ARG_LOCATION note. */
5491 if (CALL_P (from))
5492 {
5493 rtx_insn *next = NEXT_INSN (from);
5494 if (next && NOTE_P (next)
5495 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5496 from = next;
5497 }
5498
5499 from = emit_jump_insn_after (gen_jump (label), from);
5500 JUMP_LABEL (from) = label;
5501 LABEL_NUSES (label) = 1;
5502 found_barrier = emit_barrier_after (from);
5503 emit_label_after (label, found_barrier);
5504 }
5505
5506 return found_barrier;
5507 }
5508
5509 /* If the instruction INSN is implemented by a special function, and we can
5510 positively find the register that is used to call the sfunc, and this
5511 register is not used anywhere else in this instruction - except as the
5512 destination of a set, return this register; else, return 0. */
5513 rtx
5514 sfunc_uses_reg (rtx_insn *insn)
5515 {
5516 int i;
5517 rtx pattern, part, reg_part, reg;
5518
5519 if (!NONJUMP_INSN_P (insn))
5520 return NULL_RTX;
5521 pattern = PATTERN (insn);
5522 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5523 return NULL_RTX;
5524
5525 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5526 {
5527 part = XVECEXP (pattern, 0, i);
5528 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5529 reg_part = part;
5530 }
5531 if (! reg_part)
5532 return NULL_RTX;
5533 reg = XEXP (reg_part, 0);
5534 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5535 {
5536 part = XVECEXP (pattern, 0, i);
5537 if (part == reg_part || GET_CODE (part) == CLOBBER)
5538 continue;
5539 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5540 && REG_P (SET_DEST (part)))
5541 ? SET_SRC (part) : part)))
5542 return NULL_RTX;
5543 }
5544 return reg;
5545 }
5546
5547 /* See if the only way in which INSN uses REG is by calling it, or by
5548 setting it while calling it. Set *SET to a SET rtx if the register
5549 is set by INSN. */
5550 static bool
5551 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5552 {
5553 rtx pattern, reg2;
5554
5555 *set = NULL_RTX;
5556
5557 reg2 = sfunc_uses_reg (insn);
5558 if (reg2 && REGNO (reg2) == REGNO (reg))
5559 {
5560 pattern = single_set (insn);
5561 if (pattern
5562 && REG_P (SET_DEST (pattern))
5563 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5564 *set = pattern;
5565 return false;
5566 }
5567 if (!CALL_P (insn))
5568 {
5569 /* We don't use rtx_equal_p because we don't care if the mode is
5570 different. */
5571 pattern = single_set (insn);
5572 if (pattern
5573 && REG_P (SET_DEST (pattern))
5574 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5575 {
5576 rtx par, part;
5577 int i;
5578
5579 *set = pattern;
5580 par = PATTERN (insn);
5581 if (GET_CODE (par) == PARALLEL)
5582 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5583 {
5584 part = XVECEXP (par, 0, i);
5585 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5586 return true;
5587 }
5588 return reg_mentioned_p (reg, SET_SRC (pattern));
5589 }
5590
5591 return true;
5592 }
5593
5594 pattern = PATTERN (insn);
5595
5596 if (GET_CODE (pattern) == PARALLEL)
5597 {
5598 int i;
5599
5600 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5601 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5602 return true;
5603 pattern = XVECEXP (pattern, 0, 0);
5604 }
5605
5606 if (GET_CODE (pattern) == SET)
5607 {
5608 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5609 {
5610 /* We don't use rtx_equal_p, because we don't care if the
5611 mode is different. */
5612 if (!REG_P (SET_DEST (pattern))
5613 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5614 return true;
5615
5616 *set = pattern;
5617 }
5618
5619 pattern = SET_SRC (pattern);
5620 }
5621
5622 if (GET_CODE (pattern) != CALL
5623 || !MEM_P (XEXP (pattern, 0))
5624 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5625 return true;
5626
5627 return false;
5628 }
5629
5630 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5631 general registers. Bits 0..15 mean that the respective registers
5632 are used as inputs in the instruction. Bits 16..31 mean that the
5633 registers 0..15, respectively, are used as outputs, or are clobbered.
5634 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5635 int
5636 regs_used (rtx x, int is_dest)
5637 {
5638 enum rtx_code code;
5639 const char *fmt;
5640 int i, used = 0;
5641
5642 if (! x)
5643 return used;
5644 code = GET_CODE (x);
5645 switch (code)
5646 {
5647 case REG:
5648 if (REGNO (x) < 16)
5649 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5650 << (REGNO (x) + is_dest));
5651 return 0;
5652 case SUBREG:
5653 {
5654 rtx y = SUBREG_REG (x);
5655
5656 if (!REG_P (y))
5657 break;
5658 if (REGNO (y) < 16)
5659 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5660 << (REGNO (y) +
5661 subreg_regno_offset (REGNO (y),
5662 GET_MODE (y),
5663 SUBREG_BYTE (x),
5664 GET_MODE (x)) + is_dest));
5665 return 0;
5666 }
5667 case SET:
5668 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5669 case RETURN:
5670 /* If there was a return value, it must have been indicated with USE. */
5671 return 0x00ffff00;
5672 case CLOBBER:
5673 is_dest = 1;
5674 break;
5675 case MEM:
5676 is_dest = 0;
5677 break;
5678 case CALL:
5679 used |= 0x00ff00f0;
5680 break;
5681 default:
5682 break;
5683 }
5684
5685 fmt = GET_RTX_FORMAT (code);
5686
5687 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5688 {
5689 if (fmt[i] == 'E')
5690 {
5691 int j;
5692 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5693 used |= regs_used (XVECEXP (x, i, j), is_dest);
5694 }
5695 else if (fmt[i] == 'e')
5696 used |= regs_used (XEXP (x, i), is_dest);
5697 }
5698 return used;
5699 }
5700
5701 /* Create an instruction that prevents redirection of a conditional branch
5702 to the destination of the JUMP with address ADDR.
5703 If the branch needs to be implemented as an indirect jump, try to find
5704 a scratch register for it.
5705 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5706 If any preceding insn that doesn't fit into a delay slot is good enough,
5707 pass 1. Pass 2 if a definite blocking insn is needed.
5708 -1 is used internally to avoid deep recursion.
5709 If a blocking instruction is made or recognized, return it. */
5710 static rtx_insn *
5711 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5712 {
5713 int dead = 0;
5714 rtx_insn *prev = prev_nonnote_insn (jump);
5715 rtx dest;
5716
5717 /* First, check if we already have an instruction that satisfies our need. */
5718 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5719 {
5720 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5721 return prev;
5722 if (GET_CODE (PATTERN (prev)) == USE
5723 || GET_CODE (PATTERN (prev)) == CLOBBER
5724 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5725 prev = jump;
5726 else if ((need_block &= ~1) < 0)
5727 return prev;
5728 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5729 need_block = 0;
5730 }
5731 if (GET_CODE (PATTERN (jump)) == RETURN)
5732 {
5733 if (! need_block)
5734 return prev;
5735 /* Reorg even does nasty things with return insns that cause branches
5736 to go out of range - see find_end_label and callers. */
5737 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5738 }
5739 /* We can't use JUMP_LABEL here because it might be undefined
5740 when not optimizing. */
5741 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5742 /* If the branch is out of range, try to find a scratch register for it. */
5743 if (optimize
5744 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5745 > 4092 + 4098))
5746 {
5747 rtx_insn *scan;
5748 /* Don't look for the stack pointer as a scratch register,
5749 it would cause trouble if an interrupt occurred. */
5750 unsigned attempt = 0x7fff, used;
5751 int jump_left = flag_expensive_optimizations + 1;
5752
5753 /* It is likely that the most recent eligible instruction is wanted for
5754 the delay slot. Therefore, find out which registers it uses, and
5755 try to avoid using them. */
5756
5757 for (scan = jump; (scan = PREV_INSN (scan)); )
5758 {
5759 enum rtx_code code;
5760
5761 if (scan->deleted ())
5762 continue;
5763 code = GET_CODE (scan);
5764 if (code == CODE_LABEL || code == JUMP_INSN)
5765 break;
5766 if (code == INSN
5767 && GET_CODE (PATTERN (scan)) != USE
5768 && GET_CODE (PATTERN (scan)) != CLOBBER
5769 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5770 {
5771 attempt &= ~regs_used (PATTERN (scan), 0);
5772 break;
5773 }
5774 }
5775 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5776 (scan = NEXT_INSN (scan)); )
5777 {
5778 enum rtx_code code;
5779
5780 if (scan->deleted ())
5781 continue;
5782 code = GET_CODE (scan);
5783 if (INSN_P (scan))
5784 {
5785 used |= regs_used (PATTERN (scan), 0);
5786 if (code == CALL_INSN)
5787 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5788 dead |= (used >> 16) & ~used;
5789 if (dead & attempt)
5790 {
5791 dead &= attempt;
5792 break;
5793 }
5794 if (code == JUMP_INSN)
5795 {
5796 if (jump_left-- && simplejump_p (scan))
5797 scan = JUMP_LABEL_AS_INSN (scan);
5798 else
5799 break;
5800 }
5801 }
5802 }
5803 /* Mask out the stack pointer again, in case it was
5804 the only 'free' register we have found. */
5805 dead &= 0x7fff;
5806 }
5807 /* If the immediate destination is still in range, check for possible
5808 threading with a jump beyond the delay slot insn.
5809 Don't check if we are called recursively; the jump has been or will be
5810 checked in a different invocation then. */
5811
5812 else if (optimize && need_block >= 0)
5813 {
5814 rtx_insn *next = next_active_insn (next_active_insn (dest));
5815 if (next && JUMP_P (next)
5816 && GET_CODE (PATTERN (next)) == SET
5817 && recog_memoized (next) == CODE_FOR_jump_compact)
5818 {
5819 dest = JUMP_LABEL (next);
5820 if (dest
5821 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5822 > 4092 + 4098))
5823 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5824 }
5825 }
5826
5827 if (dead)
5828 {
5829 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5830
5831 /* It would be nice if we could convert the jump into an indirect
5832 jump / far branch right now, and thus exposing all constituent
5833 instructions to further optimization. However, reorg uses
5834 simplejump_p to determine if there is an unconditional jump where
5835 it should try to schedule instructions from the target of the
5836 branch; simplejump_p fails for indirect jumps even if they have
5837 a JUMP_LABEL. */
5838 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5839 (reg, GEN_INT (unspec_bbr_uid++)),
5840 jump);
5841 /* ??? We would like this to have the scope of the jump, but that
5842 scope will change when a delay slot insn of an inner scope is added.
5843 Hence, after delay slot scheduling, we'll have to expect
5844 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5845 the jump. */
5846
5847 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5848 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5849 return insn;
5850 }
5851 else if (need_block)
5852 /* We can't use JUMP_LABEL here because it might be undefined
5853 when not optimizing. */
5854 return emit_insn_before (gen_block_branch_redirect
5855 (GEN_INT (unspec_bbr_uid++)),
5856 jump);
5857 return prev;
5858 }
5859
5860 #define CONDJUMP_MIN -252
5861 #define CONDJUMP_MAX 262
5862 struct far_branch
5863 {
5864 /* A label (to be placed) in front of the jump
5865 that jumps to our ultimate destination. */
5866 rtx_insn *near_label;
5867 /* Where we are going to insert it if we cannot move the jump any farther,
5868 or the jump itself if we have picked up an existing jump. */
5869 rtx_insn *insert_place;
5870 /* The ultimate destination. */
5871 rtx_insn *far_label;
5872 struct far_branch *prev;
5873 /* If the branch has already been created, its address;
5874 else the address of its first prospective user. */
5875 int address;
5876 };
5877
5878 static void gen_far_branch (struct far_branch *);
5879 enum mdep_reorg_phase_e mdep_reorg_phase;
5880 static void
5881 gen_far_branch (struct far_branch *bp)
5882 {
5883 rtx_insn *insn = bp->insert_place;
5884 rtx_jump_insn *jump;
5885 rtx_code_label *label = gen_label_rtx ();
5886 int ok;
5887
5888 emit_label_after (label, insn);
5889 if (bp->far_label)
5890 {
5891 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5892 LABEL_NUSES (bp->far_label)++;
5893 }
5894 else
5895 jump = emit_jump_insn_after (gen_return (), insn);
5896
5897 /* Emit a barrier so that reorg knows that any following instructions
5898 are not reachable via a fall-through path.
5899 But don't do this when not optimizing, since we wouldn't suppress the
5900 alignment for the barrier then, and could end up with out-of-range
5901 pc-relative loads. */
5902 if (optimize)
5903 emit_barrier_after (jump);
5904 emit_label_after (bp->near_label, insn);
5905
5906 if (bp->far_label)
5907 JUMP_LABEL (jump) = bp->far_label;
5908 else
5909 {
5910 rtx pat = PATTERN (jump);
5911 gcc_assert (ANY_RETURN_P (pat));
5912 JUMP_LABEL (jump) = pat;
5913 }
5914
5915 ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5916 gcc_assert (ok);
5917
5918 /* If we are branching around a jump (rather than a return), prevent
5919 reorg from using an insn from the jump target as the delay slot insn -
5920 when reorg did this, it pessimized code (we rather hide the delay slot)
5921 and it could cause branches to go out of range. */
5922 if (bp->far_label)
5923 (emit_insn_after
5924 (gen_stuff_delay_slot
5925 (GEN_INT (unspec_bbr_uid++),
5926 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5927 insn));
5928 /* Prevent reorg from undoing our splits. */
5929 gen_block_redirect (jump, bp->address += 2, 2);
5930 }
5931
5932 /* Fix up ADDR_DIFF_VECs. */
5933 void
5934 fixup_addr_diff_vecs (rtx_insn *first)
5935 {
5936 rtx_insn *insn;
5937
5938 for (insn = first; insn; insn = NEXT_INSN (insn))
5939 {
5940 rtx vec_lab, pat, prevpat, x, braf_label;
5941 rtx_insn *prev;
5942
5943 if (! JUMP_TABLE_DATA_P (insn)
5944 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5945 continue;
5946 pat = PATTERN (insn);
5947 vec_lab = XEXP (XEXP (pat, 0), 0);
5948
5949 /* Search the matching casesi_jump_2. */
5950 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5951 {
5952 if (!JUMP_P (prev))
5953 continue;
5954 prevpat = PATTERN (prev);
5955 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5956 continue;
5957 x = XVECEXP (prevpat, 0, 1);
5958 if (GET_CODE (x) != USE)
5959 continue;
5960 x = XEXP (x, 0);
5961 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5962 break;
5963 }
5964 /* FIXME: This is a bug in the optimizer, but it seems harmless
5965 to just avoid panicing. */
5966 if (!prev)
5967 continue;
5968
5969 /* Emit the reference label of the braf where it belongs, right after
5970 the casesi_jump_2 (i.e. braf). */
5971 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5972 emit_label_after (braf_label, prev);
5973
5974 /* Fix up the ADDR_DIF_VEC to be relative
5975 to the reference address of the braf. */
5976 XEXP (XEXP (pat, 0), 0) = braf_label;
5977 }
5978 }
5979
5980 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5981 a barrier. Return the base 2 logarithm of the desired alignment. */
5982 int
5983 barrier_align (rtx_insn *barrier_or_label)
5984 {
5985 rtx next, pat;
5986
5987 if (! barrier_or_label)
5988 return 0;
5989
5990 if (LABEL_P (barrier_or_label)
5991 && NEXT_INSN (barrier_or_label)
5992 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5993 return 2;
5994
5995 if (BARRIER_P (barrier_or_label)
5996 && PREV_INSN (barrier_or_label)
5997 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5998 {
5999 pat = PATTERN (PREV_INSN (barrier_or_label));
6000 /* If this is a very small table, we want to keep the alignment after
6001 the table to the minimum for proper code alignment. */
6002 return ((optimize_size
6003 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
6004 <= (unsigned) 1 << (CACHE_LOG - 2)))
6005 ? 1 << TARGET_SHMEDIA : align_jumps_log);
6006 }
6007
6008 next = next_active_insn (barrier_or_label);
6009
6010 if (! next)
6011 return 0;
6012
6013 pat = PATTERN (next);
6014
6015 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
6016 /* This is a barrier in front of a constant table. */
6017 return 0;
6018
6019 if (optimize_size)
6020 return 0;
6021
6022 if (! TARGET_SH2 || ! optimize)
6023 return align_jumps_log;
6024
6025 /* When fixing up pcloads, a constant table might be inserted just before
6026 the basic block that ends with the barrier. Thus, we can't trust the
6027 instruction lengths before that. */
6028 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
6029 {
6030 /* Check if there is an immediately preceding branch to the insn beyond
6031 the barrier. We must weight the cost of discarding useful information
6032 from the current cache line when executing this branch and there is
6033 an alignment, against that of fetching unneeded insn in front of the
6034 branch target when there is no alignment. */
6035
6036 /* There are two delay_slot cases to consider. One is the simple case
6037 where the preceding branch is to the insn beyond the barrier (simple
6038 delay slot filling), and the other is where the preceding branch has
6039 a delay slot that is a duplicate of the insn after the barrier
6040 (fill_eager_delay_slots) and the branch is to the insn after the insn
6041 after the barrier. */
6042
6043 int slot, credit;
6044 bool jump_to_next = false;
6045
6046 /* Skip to the insn before the JUMP_INSN before the barrier under
6047 investigation. */
6048 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
6049
6050 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
6051 credit >= 0 && prev && NONJUMP_INSN_P (prev);
6052 prev = prev_real_insn (prev))
6053 {
6054 jump_to_next = false;
6055 if (GET_CODE (PATTERN (prev)) == USE
6056 || GET_CODE (PATTERN (prev)) == CLOBBER)
6057 continue;
6058 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
6059 {
6060 prev = prev_seq->insn (1);
6061 if (INSN_UID (prev) == INSN_UID (next))
6062 {
6063 /* Delay slot was filled with insn at jump target. */
6064 jump_to_next = true;
6065 continue;
6066 }
6067 }
6068
6069 if (slot &&
6070 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
6071 slot = 0;
6072 credit -= get_attr_length (prev);
6073 }
6074 if (prev && jump_to_label_p (prev))
6075 {
6076 rtx_insn *x;
6077 if (jump_to_next
6078 || next_real_insn (JUMP_LABEL (prev)) == next
6079 /* If relax_delay_slots() decides NEXT was redundant
6080 with some previous instruction, it will have
6081 redirected PREV's jump to the following insn. */
6082 || JUMP_LABEL (prev) == next_nonnote_insn (next)
6083 /* There is no upper bound on redundant instructions
6084 that might have been skipped, but we must not put an
6085 alignment where none had been before. */
6086 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
6087 (INSN_P (x)
6088 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
6089 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
6090 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
6091 {
6092 rtx pat = PATTERN (prev);
6093 if (GET_CODE (pat) == PARALLEL)
6094 pat = XVECEXP (pat, 0, 0);
6095 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
6096 return 0;
6097 }
6098 }
6099 }
6100
6101 return align_jumps_log;
6102 }
6103
6104 /* If we are inside a phony loop, almost any kind of label can turn up as the
6105 first one in the loop. Aligning a braf label causes incorrect switch
6106 destination addresses; we can detect braf labels because they are
6107 followed by a BARRIER.
6108 Applying loop alignment to small constant or switch tables is a waste
6109 of space, so we suppress this too. */
6110 int
6111 sh_loop_align (rtx_insn *label)
6112 {
6113 rtx_insn *next = label;
6114
6115 if (! optimize || optimize_size)
6116 return 0;
6117
6118 do
6119 next = next_nonnote_insn (next);
6120 while (next && LABEL_P (next));
6121
6122 if (! next
6123 || ! INSN_P (next)
6124 || recog_memoized (next) == CODE_FOR_consttable_2)
6125 return 0;
6126
6127 return align_loops_log;
6128 }
6129
6130 /* Do a final pass over the function, just before delayed branch
6131 scheduling. */
6132 static void
6133 sh_reorg (void)
6134 {
6135 rtx_insn *first, *insn, *mova = NULL;
6136 int num_mova;
6137 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
6138 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
6139
6140 first = get_insns ();
6141 max_labelno_before_reorg = max_label_num ();
6142
6143 /* We must split call insns before introducing `mova's. If we're
6144 optimizing, they'll have already been split. Otherwise, make
6145 sure we don't split them too late. */
6146 if (! optimize)
6147 split_all_insns_noflow ();
6148
6149 if (TARGET_SHMEDIA)
6150 return;
6151
6152 /* If relaxing, generate pseudo-ops to associate function calls with
6153 the symbols they call. It does no harm to not generate these
6154 pseudo-ops. However, when we can generate them, it enables the
6155 linker to potentially relax the jsr to a bsr, and eliminate the
6156 register load and, possibly, the constant pool entry. */
6157
6158 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6159 if (TARGET_RELAX)
6160 {
6161 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6162 own purposes. This works because none of the remaining passes
6163 need to look at them.
6164
6165 ??? But it may break in the future. We should use a machine
6166 dependent REG_NOTE, or some other approach entirely. */
6167 for (insn = first; insn; insn = NEXT_INSN (insn))
6168 {
6169 if (INSN_P (insn))
6170 {
6171 rtx note;
6172
6173 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6174 NULL_RTX)) != 0)
6175 remove_note (insn, note);
6176 }
6177 }
6178
6179 for (insn = first; insn; insn = NEXT_INSN (insn))
6180 {
6181 rtx pattern, reg, set, dies;
6182 rtx_code_label *label;
6183 rtx_insn *link, *scan;
6184 int rescan = 0, foundinsn = 0;
6185
6186 if (CALL_P (insn))
6187 {
6188 pattern = PATTERN (insn);
6189
6190 if (GET_CODE (pattern) == PARALLEL)
6191 pattern = XVECEXP (pattern, 0, 0);
6192 if (GET_CODE (pattern) == SET)
6193 pattern = SET_SRC (pattern);
6194
6195 if (GET_CODE (pattern) != CALL
6196 || !MEM_P (XEXP (pattern, 0)))
6197 continue;
6198
6199 reg = XEXP (XEXP (pattern, 0), 0);
6200 }
6201 else
6202 {
6203 reg = sfunc_uses_reg (insn);
6204 if (! reg)
6205 continue;
6206 }
6207
6208 if (!REG_P (reg))
6209 continue;
6210
6211 /* Try scanning backward to find where the register is set. */
6212 link = NULL;
6213 for (scan = PREV_INSN (insn);
6214 scan && !LABEL_P (scan);
6215 scan = PREV_INSN (scan))
6216 {
6217 if (! INSN_P (scan))
6218 continue;
6219
6220 if (! reg_mentioned_p (reg, scan))
6221 continue;
6222
6223 if (noncall_uses_reg (reg, scan, &set))
6224 break;
6225
6226 if (set)
6227 {
6228 link = scan;
6229 break;
6230 }
6231 }
6232
6233 if (! link)
6234 continue;
6235
6236 /* The register is set at LINK. */
6237
6238 /* We can only optimize the function call if the register is
6239 being set to a symbol. In theory, we could sometimes
6240 optimize calls to a constant location, but the assembler
6241 and linker do not support that at present. */
6242 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6243 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6244 continue;
6245
6246 /* Scan forward from LINK to the place where REG dies, and
6247 make sure that the only insns which use REG are
6248 themselves function calls. */
6249
6250 /* ??? This doesn't work for call targets that were allocated
6251 by reload, since there may not be a REG_DEAD note for the
6252 register. */
6253
6254 dies = NULL_RTX;
6255 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6256 {
6257 rtx scanset;
6258
6259 /* Don't try to trace forward past a CODE_LABEL if we haven't
6260 seen INSN yet. Ordinarily, we will only find the setting insn
6261 if it is in the same basic block. However,
6262 cross-jumping can insert code labels in between the load and
6263 the call, and can result in situations where a single call
6264 insn may have two targets depending on where we came from. */
6265
6266 if (LABEL_P (scan) && ! foundinsn)
6267 break;
6268
6269 if (! INSN_P (scan))
6270 continue;
6271
6272 /* Don't try to trace forward past a JUMP. To optimize
6273 safely, we would have to check that all the
6274 instructions at the jump destination did not use REG. */
6275
6276 if (JUMP_P (scan))
6277 break;
6278
6279 if (! reg_mentioned_p (reg, scan))
6280 continue;
6281
6282 if (noncall_uses_reg (reg, scan, &scanset))
6283 break;
6284
6285 if (scan == insn)
6286 foundinsn = 1;
6287
6288 if (scan != insn
6289 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6290 {
6291 /* There is a function call to this register other
6292 than the one we are checking. If we optimize
6293 this call, we need to rescan again below. */
6294 rescan = 1;
6295 }
6296
6297 /* ??? We shouldn't have to worry about SCANSET here.
6298 We should just be able to check for a REG_DEAD note
6299 on a function call. However, the REG_DEAD notes are
6300 apparently not dependable around libcalls; c-torture
6301 execute/920501-2 is a test case. If SCANSET is set,
6302 then this insn sets the register, so it must have
6303 died earlier. Unfortunately, this will only handle
6304 the cases in which the register is, in fact, set in a
6305 later insn. */
6306
6307 /* ??? We shouldn't have to use FOUNDINSN here.
6308 This dates back to when we used LOG_LINKS to find
6309 the most recent insn which sets the register. */
6310
6311 if (foundinsn
6312 && (scanset
6313 || find_reg_note (scan, REG_DEAD, reg)))
6314 {
6315 dies = scan;
6316 break;
6317 }
6318 }
6319
6320 if (! dies)
6321 {
6322 /* Either there was a branch, or some insn used REG
6323 other than as a function call address. */
6324 continue;
6325 }
6326
6327 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6328 on the insn which sets the register, and on each call insn
6329 which uses the register. In final_prescan_insn we look for
6330 the REG_LABEL_OPERAND notes, and output the appropriate label
6331 or pseudo-op. */
6332
6333 label = gen_label_rtx ();
6334 add_reg_note (link, REG_LABEL_OPERAND, label);
6335 add_reg_note (insn, REG_LABEL_OPERAND, label);
6336 if (rescan)
6337 {
6338 scan = link;
6339 do
6340 {
6341 rtx reg2;
6342
6343 scan = NEXT_INSN (scan);
6344 if (scan != insn
6345 && ((CALL_P (scan)
6346 && reg_mentioned_p (reg, scan))
6347 || ((reg2 = sfunc_uses_reg (scan))
6348 && REGNO (reg2) == REGNO (reg))))
6349 add_reg_note (scan, REG_LABEL_OPERAND, label);
6350 }
6351 while (scan != dies);
6352 }
6353 }
6354 }
6355
6356 if (TARGET_SH2)
6357 fixup_addr_diff_vecs (first);
6358
6359 if (optimize)
6360 {
6361 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6362 shorten_branches (first);
6363 }
6364
6365 /* Scan the function looking for move instructions which have to be
6366 changed to pc-relative loads and insert the literal tables. */
6367 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6368 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6369 {
6370 if (mova_p (insn))
6371 {
6372 /* ??? basic block reordering can move a switch table dispatch
6373 below the switch table. Check if that has happened.
6374 We only have the addresses available when optimizing; but then,
6375 this check shouldn't be needed when not optimizing. */
6376 if (!untangle_mova (&num_mova, &mova, insn))
6377 {
6378 insn = mova;
6379 num_mova = 0;
6380 }
6381 }
6382 else if (JUMP_TABLE_DATA_P (insn)
6383 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6384 && num_mova
6385 /* ??? loop invariant motion can also move a mova out of a
6386 loop. Since loop does this code motion anyway, maybe we
6387 should wrap UNSPEC_MOVA into a CONST, so that reload can
6388 move it back. */
6389 && ((num_mova > 1
6390 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6391 || (prev_nonnote_insn (insn)
6392 == XEXP (MOVA_LABELREF (mova), 0))))
6393 {
6394 rtx_insn *scan;
6395 int total;
6396
6397 num_mova--;
6398
6399 /* Some code might have been inserted between the mova and
6400 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6401 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6402 total += get_attr_length (scan);
6403
6404 /* range of mova is 1020, add 4 because pc counts from address of
6405 second instruction after this one, subtract 2 in case pc is 2
6406 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6407 cancels out with alignment effects of the mova itself. */
6408 if (total > 1022)
6409 {
6410 /* Change the mova into a load, and restart scanning
6411 there. broken_move will then return true for mova. */
6412 fixup_mova (mova);
6413 insn = mova;
6414 }
6415 }
6416 if (broken_move (insn)
6417 || (NONJUMP_INSN_P (insn)
6418 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6419 {
6420 rtx_insn *scan;
6421 /* Scan ahead looking for a barrier to stick the constant table
6422 behind. */
6423 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6424 rtx_insn *last_float_move = NULL;
6425 rtx last_float = 0, *last_float_addr = NULL;
6426 int need_aligned_label = 0;
6427
6428 if (num_mova && ! mova_p (mova))
6429 {
6430 /* find_barrier had to change the first mova into a
6431 pcload; thus, we have to start with this new pcload. */
6432 insn = mova;
6433 num_mova = 0;
6434 }
6435 /* Now find all the moves between the points and modify them. */
6436 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6437 {
6438 if (LABEL_P (scan))
6439 last_float = 0;
6440 if (NONJUMP_INSN_P (scan)
6441 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6442 need_aligned_label = 1;
6443 if (broken_move (scan))
6444 {
6445 rtx *patp = &PATTERN (scan), pat = *patp;
6446 rtx src, dst;
6447 rtx lab;
6448 rtx newsrc;
6449 machine_mode mode;
6450
6451 if (GET_CODE (pat) == PARALLEL)
6452 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6453 src = SET_SRC (pat);
6454 dst = SET_DEST (pat);
6455 mode = GET_MODE (dst);
6456
6457 if (mode == SImode && satisfies_constraint_I16 (src)
6458 && REGNO (dst) != FPUL_REG)
6459 {
6460 int offset = 0;
6461
6462 mode = HImode;
6463 while (GET_CODE (dst) == SUBREG)
6464 {
6465 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6466 GET_MODE (SUBREG_REG (dst)),
6467 SUBREG_BYTE (dst),
6468 GET_MODE (dst));
6469 dst = SUBREG_REG (dst);
6470 }
6471 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6472 }
6473 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6474 {
6475 /* This must be an insn that clobbers r0. */
6476 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6477 XVECLEN (PATTERN (scan), 0)
6478 - 1);
6479 rtx clobber = *clobberp;
6480
6481 gcc_assert (GET_CODE (clobber) == CLOBBER
6482 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6483
6484 if (last_float
6485 && reg_set_between_p (r0_rtx, last_float_move, scan))
6486 last_float = 0;
6487 if (last_float
6488 && TARGET_SHCOMPACT
6489 && GET_MODE_SIZE (mode) != 4
6490 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6491 last_float = 0;
6492 lab = add_constant (src, mode, last_float);
6493 if (lab)
6494 emit_insn_before (gen_mova (lab), scan);
6495 else
6496 {
6497 /* There will be a REG_UNUSED note for r0 on
6498 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6499 lest reorg:mark_target_live_regs will not
6500 consider r0 to be used, and we end up with delay
6501 slot insn in front of SCAN that clobbers r0. */
6502 rtx note
6503 = find_regno_note (last_float_move, REG_UNUSED, 0);
6504
6505 /* If we are not optimizing, then there may not be
6506 a note. */
6507 if (note)
6508 PUT_REG_NOTE_KIND (note, REG_INC);
6509
6510 *last_float_addr = r0_inc_rtx;
6511 }
6512 last_float_move = scan;
6513 last_float = src;
6514 newsrc = gen_const_mem (mode,
6515 (((TARGET_SH4 && ! TARGET_FMOVD)
6516 || REGNO (dst) == FPUL_REG)
6517 ? r0_inc_rtx
6518 : r0_rtx));
6519 last_float_addr = &XEXP (newsrc, 0);
6520
6521 /* Remove the clobber of r0. */
6522 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6523 gen_rtx_SCRATCH (Pmode));
6524 }
6525 /* This is a mova needing a label. Create it. */
6526 else if (GET_CODE (src) == UNSPEC
6527 && XINT (src, 1) == UNSPEC_MOVA
6528 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6529 {
6530 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6531 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6532 newsrc = gen_rtx_UNSPEC (SImode,
6533 gen_rtvec (1, newsrc),
6534 UNSPEC_MOVA);
6535 }
6536 else if (GET_CODE (src) == UNSPEC_VOLATILE
6537 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6538 {
6539 newsrc = XVECEXP (src, 0, 0);
6540 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6541 INSN_CODE (scan) = -1;
6542 continue;
6543 }
6544 else
6545 {
6546 lab = add_constant (src, mode, 0);
6547 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6548 newsrc = gen_const_mem (mode, newsrc);
6549 }
6550 *patp = gen_rtx_SET (dst, newsrc);
6551 INSN_CODE (scan) = -1;
6552 }
6553 }
6554 dump_table (need_aligned_label ? insn : 0, barrier);
6555 insn = barrier;
6556 }
6557 }
6558 label_ref_list_d_pool.release ();
6559 for (insn = first; insn; insn = NEXT_INSN (insn))
6560 PUT_MODE (insn, VOIDmode);
6561
6562 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6563 INSN_ADDRESSES_FREE ();
6564 split_branches (first);
6565
6566 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6567 also has an effect on the register that holds the address of the sfunc.
6568 Insert an extra dummy insn in front of each sfunc that pretends to
6569 use this register. */
6570 if (flag_delayed_branch)
6571 {
6572 for (insn = first; insn; insn = NEXT_INSN (insn))
6573 {
6574 rtx reg = sfunc_uses_reg (insn);
6575
6576 if (! reg)
6577 continue;
6578 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6579 }
6580 }
6581 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6582 }
6583
6584 /* Return the UID of the insn that follows the specified label. */
6585 int
6586 get_dest_uid (rtx label, int max_uid)
6587 {
6588 rtx_insn *dest = next_real_insn (label);
6589 int dest_uid;
6590 if (! dest)
6591 /* This can happen for an undefined label. */
6592 return 0;
6593 dest_uid = INSN_UID (dest);
6594 /* If this is a newly created branch redirection blocking instruction,
6595 we cannot index the branch_uid or insn_addresses arrays with its
6596 uid. But then, we won't need to, because the actual destination is
6597 the following branch. */
6598 while (dest_uid >= max_uid)
6599 {
6600 dest = NEXT_INSN (dest);
6601 dest_uid = INSN_UID (dest);
6602 }
6603 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6604 return 0;
6605 return dest_uid;
6606 }
6607
6608 /* Split condbranches that are out of range. Also add clobbers for
6609 scratch registers that are needed in far jumps.
6610 We do this before delay slot scheduling, so that it can take our
6611 newly created instructions into account. It also allows us to
6612 find branches with common targets more easily. */
6613 static void
6614 split_branches (rtx_insn *first)
6615 {
6616 rtx_insn *insn;
6617 struct far_branch **uid_branch, *far_branch_list = 0;
6618 int max_uid = get_max_uid ();
6619 int ok;
6620
6621 /* Find out which branches are out of range. */
6622 shorten_branches (first);
6623
6624 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6625 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6626
6627 for (insn = first; insn; insn = NEXT_INSN (insn))
6628 if (! INSN_P (insn))
6629 continue;
6630 else if (insn->deleted ())
6631 {
6632 /* Shorten_branches would split this instruction again,
6633 so transform it into a note. */
6634 SET_INSN_DELETED (insn);
6635 }
6636 else if (JUMP_P (insn))
6637 {
6638 enum attr_type type = get_attr_type (insn);
6639 if (type == TYPE_CBRANCH)
6640 {
6641 rtx_insn *next, *beyond;
6642
6643 if (get_attr_length (insn) > 4)
6644 {
6645 rtx src = SET_SRC (PATTERN (insn));
6646 rtx olabel = XEXP (XEXP (src, 1), 0);
6647 int addr = INSN_ADDRESSES (INSN_UID (insn));
6648 rtx_insn *label = 0;
6649 int dest_uid = get_dest_uid (olabel, max_uid);
6650 struct far_branch *bp = uid_branch[dest_uid];
6651
6652 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6653 the label if the LABEL_NUSES count drops to zero. There is
6654 always a jump_optimize pass that sets these values, but it
6655 proceeds to delete unreferenced code, and then if not
6656 optimizing, to un-delete the deleted instructions, thus
6657 leaving labels with too low uses counts. */
6658 if (! optimize)
6659 {
6660 JUMP_LABEL (insn) = olabel;
6661 LABEL_NUSES (olabel)++;
6662 }
6663 if (! bp)
6664 {
6665 bp = (struct far_branch *) alloca (sizeof *bp);
6666 uid_branch[dest_uid] = bp;
6667 bp->prev = far_branch_list;
6668 far_branch_list = bp;
6669 bp->far_label = as_a <rtx_insn *> (
6670 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6671 0));
6672 LABEL_NUSES (bp->far_label)++;
6673 }
6674 else
6675 {
6676 label = bp->near_label;
6677 if (! label && bp->address - addr >= CONDJUMP_MIN)
6678 {
6679 rtx_insn *block = bp->insert_place;
6680
6681 if (GET_CODE (PATTERN (block)) == RETURN)
6682 block = PREV_INSN (block);
6683 else
6684 block = gen_block_redirect (block,
6685 bp->address, 2);
6686 label = emit_label_after (gen_label_rtx (),
6687 PREV_INSN (block));
6688 bp->near_label = label;
6689 }
6690 else if (label && ! NEXT_INSN (label))
6691 {
6692 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6693 bp->insert_place = insn;
6694 else
6695 gen_far_branch (bp);
6696 }
6697 }
6698 if (! label
6699 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6700 {
6701 bp->near_label = label = gen_label_rtx ();
6702 bp->insert_place = insn;
6703 bp->address = addr;
6704 }
6705 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6706 gcc_assert (ok);
6707 }
6708 else
6709 {
6710 /* get_attr_length (insn) == 2 */
6711 /* Check if we have a pattern where reorg wants to redirect
6712 the branch to a label from an unconditional branch that
6713 is too far away. */
6714 /* We can't use JUMP_LABEL here because it might be undefined
6715 when not optimizing. */
6716 /* A syntax error might cause beyond to be NULL_RTX. */
6717 beyond
6718 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6719 0));
6720
6721 if (beyond
6722 && (JUMP_P (beyond)
6723 || ((beyond = next_active_insn (beyond))
6724 && JUMP_P (beyond)))
6725 && GET_CODE (PATTERN (beyond)) == SET
6726 && recog_memoized (beyond) == CODE_FOR_jump_compact
6727 && ((INSN_ADDRESSES
6728 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6729 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6730 > 252 + 258 + 2))
6731 gen_block_redirect (beyond,
6732 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6733 }
6734
6735 next = next_active_insn (insn);
6736
6737 if (next
6738 && (JUMP_P (next)
6739 || ((next = next_active_insn (next))
6740 && JUMP_P (next)))
6741 && GET_CODE (PATTERN (next)) == SET
6742 && recog_memoized (next) == CODE_FOR_jump_compact
6743 && ((INSN_ADDRESSES
6744 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6745 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6746 > 252 + 258 + 2))
6747 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6748 }
6749 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6750 {
6751 int addr = INSN_ADDRESSES (INSN_UID (insn));
6752 rtx_insn *far_label = 0;
6753 int dest_uid = 0;
6754 struct far_branch *bp;
6755
6756 if (type == TYPE_JUMP)
6757 {
6758 if (CROSSING_JUMP_P (insn))
6759 {
6760 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6761 insn);
6762 continue;
6763 }
6764
6765 far_label = as_a <rtx_insn *> (
6766 XEXP (SET_SRC (PATTERN (insn)), 0));
6767 dest_uid = get_dest_uid (far_label, max_uid);
6768 if (! dest_uid)
6769 {
6770 /* Parse errors can lead to labels outside
6771 the insn stream. */
6772 if (! NEXT_INSN (far_label))
6773 continue;
6774
6775 if (! optimize)
6776 {
6777 JUMP_LABEL (insn) = far_label;
6778 LABEL_NUSES (far_label)++;
6779 }
6780 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6781 far_label = 0;
6782 }
6783 }
6784 bp = uid_branch[dest_uid];
6785 if (! bp)
6786 {
6787 bp = (struct far_branch *) alloca (sizeof *bp);
6788 uid_branch[dest_uid] = bp;
6789 bp->prev = far_branch_list;
6790 far_branch_list = bp;
6791 bp->near_label = 0;
6792 bp->far_label = far_label;
6793 if (far_label)
6794 LABEL_NUSES (far_label)++;
6795 }
6796 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6797 if (addr - bp->address <= CONDJUMP_MAX)
6798 emit_label_after (bp->near_label, PREV_INSN (insn));
6799 else
6800 {
6801 gen_far_branch (bp);
6802 bp->near_label = 0;
6803 }
6804 else
6805 bp->near_label = 0;
6806 bp->address = addr;
6807 bp->insert_place = insn;
6808 if (! far_label)
6809 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6810 else
6811 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6812 }
6813 }
6814 /* Generate all pending far branches,
6815 and free our references to the far labels. */
6816 while (far_branch_list)
6817 {
6818 if (far_branch_list->near_label
6819 && ! NEXT_INSN (far_branch_list->near_label))
6820 gen_far_branch (far_branch_list);
6821 if (optimize
6822 && far_branch_list->far_label
6823 && ! --LABEL_NUSES (far_branch_list->far_label))
6824 delete_insn (far_branch_list->far_label);
6825 far_branch_list = far_branch_list->prev;
6826 }
6827
6828 /* Instruction length information is no longer valid due to the new
6829 instructions that have been generated. */
6830 init_insn_lengths ();
6831 }
6832
6833 /* Dump out instruction addresses, which is useful for debugging the
6834 constant pool table stuff.
6835
6836 If relaxing, output the label and pseudo-ops used to link together
6837 calls and the instruction which set the registers.
6838
6839 ??? The addresses printed by this routine for insns are nonsense for
6840 insns which are inside of a sequence where none of the inner insns have
6841 variable length. This is because the second pass of shorten_branches
6842 does not bother to update them. */
6843 void
6844 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6845 int noperands ATTRIBUTE_UNUSED)
6846 {
6847 if (TARGET_DUMPISIZE)
6848 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6849
6850 if (TARGET_RELAX)
6851 {
6852 rtx note;
6853
6854 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6855 if (note)
6856 {
6857 rtx pattern;
6858
6859 pattern = PATTERN (insn);
6860 if (GET_CODE (pattern) == PARALLEL)
6861 pattern = XVECEXP (pattern, 0, 0);
6862 switch (GET_CODE (pattern))
6863 {
6864 case SET:
6865 if (GET_CODE (SET_SRC (pattern)) != CALL
6866 && get_attr_type (insn) != TYPE_SFUNC)
6867 {
6868 targetm.asm_out.internal_label
6869 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6870 break;
6871 }
6872 /* else FALLTHROUGH */
6873 case CALL:
6874 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6875 CODE_LABEL_NUMBER (XEXP (note, 0)));
6876 break;
6877
6878 default:
6879 gcc_unreachable ();
6880 }
6881 }
6882 }
6883 }
6884
6885 /* Dump out any constants accumulated in the final pass. These will
6886 only be labels. */
6887 const char *
6888 output_jump_label_table (void)
6889 {
6890 int i;
6891
6892 if (pool_size)
6893 {
6894 fprintf (asm_out_file, "\t.align 2\n");
6895 for (i = 0; i < pool_size; i++)
6896 {
6897 pool_node *p = &pool_vector[i];
6898
6899 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6900 CODE_LABEL_NUMBER (p->label));
6901 output_asm_insn (".long %O0", &p->value);
6902 }
6903 pool_size = 0;
6904 }
6905
6906 return "";
6907 }
6908 \f
6909 /* A full frame looks like:
6910
6911 arg-5
6912 arg-4
6913 [ if current_function_anonymous_args
6914 arg-3
6915 arg-2
6916 arg-1
6917 arg-0 ]
6918 saved-fp
6919 saved-r10
6920 saved-r11
6921 saved-r12
6922 saved-pr
6923 local-n
6924 ..
6925 local-1
6926 local-0 <- fp points here.
6927
6928 Number of bytes pushed for anonymous args, used to pass information
6929 between expand_prologue and expand_epilogue.
6930
6931 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6932 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6933 for an epilogue and a negative value means that it's for a sibcall
6934 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6935 all the registers that are about to be restored, and hence dead. */
6936 static void
6937 output_stack_adjust (int size, rtx reg, int epilogue_p,
6938 HARD_REG_SET *live_regs_mask, bool frame_p)
6939 {
6940 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6941 if (size)
6942 {
6943 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6944
6945 /* This test is bogus, as output_stack_adjust is used to re-align the
6946 stack. */
6947 #if 0
6948 gcc_assert (!(size % align));
6949 #endif
6950
6951 if (CONST_OK_FOR_ADD (size))
6952 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6953 /* Try to do it with two partial adjustments; however, we must make
6954 sure that the stack is properly aligned at all times, in case
6955 an interrupt occurs between the two partial adjustments. */
6956 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6957 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6958 {
6959 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6960 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6961 }
6962 else
6963 {
6964 rtx const_reg;
6965 rtx insn;
6966 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6967 int i;
6968
6969 /* If TEMP is invalid, we could temporarily save a general
6970 register to MACL. However, there is currently no need
6971 to handle this case, so just die when we see it. */
6972 if (epilogue_p < 0
6973 || current_function_interrupt
6974 || ! call_really_used_regs[temp] || fixed_regs[temp])
6975 temp = -1;
6976 if (temp < 0 && ! current_function_interrupt
6977 && (TARGET_SHMEDIA || epilogue_p >= 0))
6978 {
6979 HARD_REG_SET temps;
6980 COPY_HARD_REG_SET (temps, call_used_reg_set);
6981 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6982 if (epilogue_p > 0)
6983 {
6984 int nreg = 0;
6985 if (crtl->return_rtx)
6986 {
6987 machine_mode mode;
6988 mode = GET_MODE (crtl->return_rtx);
6989 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6990 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6991 }
6992 for (i = 0; i < nreg; i++)
6993 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6994 if (crtl->calls_eh_return)
6995 {
6996 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6997 for (i = 0; i <= 3; i++)
6998 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6999 }
7000 }
7001 if (TARGET_SHMEDIA && epilogue_p < 0)
7002 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
7003 CLEAR_HARD_REG_BIT (temps, i);
7004 if (epilogue_p <= 0)
7005 {
7006 for (i = FIRST_PARM_REG;
7007 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
7008 CLEAR_HARD_REG_BIT (temps, i);
7009 if (cfun->static_chain_decl != NULL)
7010 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
7011 }
7012 temp = scavenge_reg (&temps);
7013 }
7014 if (temp < 0 && live_regs_mask)
7015 {
7016 HARD_REG_SET temps;
7017
7018 COPY_HARD_REG_SET (temps, *live_regs_mask);
7019 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
7020 temp = scavenge_reg (&temps);
7021 }
7022 if (temp < 0)
7023 {
7024 rtx adj_reg, tmp_reg, mem;
7025
7026 /* If we reached here, the most likely case is the (sibcall)
7027 epilogue for non SHmedia. Put a special push/pop sequence
7028 for such case as the last resort. This looks lengthy but
7029 would not be problem because it seems to be very
7030 rare. */
7031
7032 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
7033
7034
7035 /* ??? There is still the slight possibility that r4 or
7036 r5 have been reserved as fixed registers or assigned
7037 as global registers, and they change during an
7038 interrupt. There are possible ways to handle this:
7039
7040 - If we are adjusting the frame pointer (r14), we can do
7041 with a single temp register and an ordinary push / pop
7042 on the stack.
7043 - Grab any call-used or call-saved registers (i.e. not
7044 fixed or globals) for the temps we need. We might
7045 also grab r14 if we are adjusting the stack pointer.
7046 If we can't find enough available registers, issue
7047 a diagnostic and die - the user must have reserved
7048 way too many registers.
7049 But since all this is rather unlikely to happen and
7050 would require extra testing, we just die if r4 / r5
7051 are not available. */
7052 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
7053 && !global_regs[4] && !global_regs[5]);
7054
7055 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
7056 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
7057 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
7058 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
7059 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
7060 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7061 emit_move_insn (mem, tmp_reg);
7062 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
7063 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
7064 emit_move_insn (mem, tmp_reg);
7065 emit_move_insn (reg, adj_reg);
7066 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7067 emit_move_insn (adj_reg, mem);
7068 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
7069 emit_move_insn (tmp_reg, mem);
7070 /* Tell flow the insns that pop r4/r5 aren't dead. */
7071 emit_use (tmp_reg);
7072 emit_use (adj_reg);
7073 return;
7074 }
7075 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
7076
7077 /* If SIZE is negative, subtract the positive value.
7078 This sometimes allows a constant pool entry to be shared
7079 between prologue and epilogue code. */
7080 if (size < 0)
7081 {
7082 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
7083 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
7084 }
7085 else
7086 {
7087 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
7088 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
7089 }
7090 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7091 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
7092 GEN_INT (size))));
7093 }
7094 }
7095 }
7096
7097 /* Emit the specified insn and mark it as frame related.
7098 FIXME: Rename this to emit_frame_insn. */
7099 static rtx_insn *
7100 frame_insn (rtx x)
7101 {
7102 rtx_insn *insn = emit_insn (x);
7103 RTX_FRAME_RELATED_P (insn) = 1;
7104 return insn;
7105 }
7106
7107 /* Output RTL to push register RN onto the stack. */
7108 static rtx
7109 push (int rn)
7110 {
7111 rtx x;
7112 if (rn == FPUL_REG)
7113 x = gen_push_fpul ();
7114 else if (rn == FPSCR_REG)
7115 x = gen_push_fpscr ();
7116 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7117 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7118 {
7119 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7120 return NULL_RTX;
7121 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
7122 }
7123 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7124 x = gen_push_e (gen_rtx_REG (SFmode, rn));
7125 else
7126 x = gen_push (gen_rtx_REG (SImode, rn));
7127
7128 x = frame_insn (x);
7129 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7130 return x;
7131 }
7132
7133 /* Output RTL to pop register RN from the stack. */
7134 static void
7135 pop (int rn)
7136 {
7137 rtx x, sp_reg, reg;
7138 if (rn == FPUL_REG)
7139 x = gen_pop_fpul ();
7140 else if (rn == FPSCR_REG)
7141 x = gen_pop_fpscr ();
7142 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7143 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7144 {
7145 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7146 return;
7147 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7148 }
7149 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7150 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7151 else
7152 x = gen_pop (gen_rtx_REG (SImode, rn));
7153
7154 x = emit_insn (x);
7155
7156 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7157 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7158 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7159 : SET_DEST (PATTERN (x)));
7160 add_reg_note (x, REG_CFA_RESTORE, reg);
7161 add_reg_note (x, REG_CFA_ADJUST_CFA,
7162 gen_rtx_SET (sp_reg,
7163 plus_constant (SImode, sp_reg,
7164 GET_MODE_SIZE (GET_MODE (reg)))));
7165 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7166 RTX_FRAME_RELATED_P (x) = 1;
7167 }
7168
7169 /* Generate code to push the regs specified in the mask. */
7170 static void
7171 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7172 {
7173 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7174 int skip_fpscr = 0;
7175
7176 /* Push PR last; this gives better latencies after the prologue, and
7177 candidates for the return delay slot when there are no general
7178 registers pushed. */
7179 for (; i < FIRST_PSEUDO_REGISTER; i++)
7180 {
7181 /* If this is an interrupt handler, and the SZ bit varies,
7182 and we have to push any floating point register, we need
7183 to switch to the correct precision first. */
7184 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7185 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7186 {
7187 HARD_REG_SET unsaved;
7188
7189 push (FPSCR_REG);
7190 COMPL_HARD_REG_SET (unsaved, *mask);
7191 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7192 skip_fpscr = 1;
7193 }
7194 if (i != PR_REG
7195 && (i != FPSCR_REG || ! skip_fpscr)
7196 && TEST_HARD_REG_BIT (*mask, i))
7197 {
7198 /* If the ISR has RESBANK attribute assigned, don't push any of
7199 the following registers - R0-R14, MACH, MACL and GBR. */
7200 if (! (sh_cfun_resbank_handler_p ()
7201 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7202 || i == MACH_REG
7203 || i == MACL_REG
7204 || i == GBR_REG)))
7205 push (i);
7206 }
7207 }
7208
7209 /* Push banked registers last to improve delay slot opportunities. */
7210 if (interrupt_handler)
7211 {
7212 bool use_movml = false;
7213
7214 if (TARGET_SH2A)
7215 {
7216 unsigned int count = 0;
7217
7218 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7219 if (TEST_HARD_REG_BIT (*mask, i))
7220 count++;
7221 else
7222 break;
7223
7224 /* Use movml when all banked registers are pushed. */
7225 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7226 use_movml = true;
7227 }
7228
7229 if (sh_cfun_resbank_handler_p ())
7230 ; /* Do nothing. */
7231 else if (use_movml)
7232 {
7233 rtx x, mem, reg, set;
7234 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7235
7236 /* We must avoid scheduling multiple store insn with another
7237 insns. */
7238 emit_insn (gen_blockage ());
7239 x = gen_movml_push_banked (sp_reg);
7240 x = frame_insn (x);
7241 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7242 {
7243 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7244 reg = gen_rtx_REG (SImode, i);
7245 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7246 }
7247
7248 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
7249 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7250 emit_insn (gen_blockage ());
7251 }
7252 else
7253 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7254 if (TEST_HARD_REG_BIT (*mask, i))
7255 push (i);
7256 }
7257
7258 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7259 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7260 push (PR_REG);
7261 }
7262
7263 /* Calculate how much extra space is needed to save all callee-saved
7264 target registers.
7265 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7266 static int
7267 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7268 {
7269 int reg;
7270 int stack_space = 0;
7271 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7272
7273 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7274 if ((! call_really_used_regs[reg] || interrupt_handler)
7275 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7276 /* Leave space to save this target register on the stack,
7277 in case target register allocation wants to use it. */
7278 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7279 return stack_space;
7280 }
7281
7282 /* Decide whether we should reserve space for callee-save target registers,
7283 in case target register allocation wants to use them. REGS_SAVED is
7284 the space, in bytes, that is already required for register saves.
7285 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7286 static int
7287 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7288 HARD_REG_SET *live_regs_mask)
7289 {
7290 if (optimize_size)
7291 return 0;
7292 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7293 }
7294
7295 /* Decide how much space to reserve for callee-save target registers
7296 in case target register allocation wants to use them.
7297 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7298 static int
7299 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7300 {
7301 if (shmedia_space_reserved_for_target_registers)
7302 return shmedia_target_regs_stack_space (live_regs_mask);
7303 else
7304 return 0;
7305 }
7306
7307 /* Work out the registers which need to be saved, both as a mask and a
7308 count of saved words. Return the count.
7309
7310 If doing a pragma interrupt function, then push all regs used by the
7311 function, and if we call another function (we can tell by looking at PR),
7312 make sure that all the regs it clobbers are safe too. */
7313 static int
7314 calc_live_regs (HARD_REG_SET *live_regs_mask)
7315 {
7316 unsigned int reg;
7317 int count;
7318 tree attrs;
7319 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7320 bool nosave_low_regs;
7321 int pr_live, has_call;
7322
7323 attrs = DECL_ATTRIBUTES (current_function_decl);
7324 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7325 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7326 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7327 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7328
7329 CLEAR_HARD_REG_SET (*live_regs_mask);
7330 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7331 && df_regs_ever_live_p (FPSCR_REG))
7332 target_flags &= ~MASK_FPU_SINGLE;
7333 /* If we can save a lot of saves by switching to double mode, do that. */
7334 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7335 && TARGET_FPU_SINGLE)
7336 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7337 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7338 && (! call_really_used_regs[reg]
7339 || interrupt_handler)
7340 && ++count > 2)
7341 {
7342 target_flags &= ~MASK_FPU_SINGLE;
7343 break;
7344 }
7345 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7346 knows how to use it. That means the pseudo originally allocated for
7347 the initial value can become the PR_MEDIA_REG hard register, as seen for
7348 execute/20010122-1.c:test9. */
7349 if (TARGET_SHMEDIA)
7350 /* ??? this function is called from initial_elimination_offset, hence we
7351 can't use the result of sh_media_register_for_return here. */
7352 pr_live = sh_pr_n_sets ();
7353 else
7354 {
7355 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7356 pr_live = (pr_initial
7357 ? (!REG_P (pr_initial)
7358 || REGNO (pr_initial) != (PR_REG))
7359 : df_regs_ever_live_p (PR_REG));
7360 /* For Shcompact, if not optimizing, we end up with a memory reference
7361 using the return address pointer for __builtin_return_address even
7362 though there is no actual need to put the PR register on the stack. */
7363 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7364 }
7365 /* Force PR to be live if the prologue has to call the SHmedia
7366 argument decoder or register saver. */
7367 if (TARGET_SHCOMPACT
7368 && ((crtl->args.info.call_cookie
7369 & ~ CALL_COOKIE_RET_TRAMP (1))
7370 || crtl->saves_all_registers))
7371 pr_live = 1;
7372 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7373 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7374 {
7375 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7376 ? pr_live
7377 : interrupt_handler
7378 ? (/* Need to save all the regs ever live. */
7379 (df_regs_ever_live_p (reg)
7380 || (call_really_used_regs[reg]
7381 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7382 || reg == PIC_OFFSET_TABLE_REGNUM)
7383 && has_call)
7384 || (TARGET_SHMEDIA && has_call
7385 && REGISTER_NATURAL_MODE (reg) == SImode
7386 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7387 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7388 && reg != RETURN_ADDRESS_POINTER_REGNUM
7389 && reg != T_REG && reg != GBR_REG
7390 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7391 /* Push fpscr only on targets which have FPU */
7392 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7393 : (/* Only push those regs which are used and need to be saved. */
7394 (TARGET_SHCOMPACT
7395 && flag_pic
7396 && crtl->args.info.call_cookie
7397 && reg == PIC_OFFSET_TABLE_REGNUM)
7398 || (df_regs_ever_live_p (reg)
7399 && ((!call_really_used_regs[reg]
7400 && !(reg != PIC_OFFSET_TABLE_REGNUM
7401 && fixed_regs[reg] && call_used_regs[reg]))
7402 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7403 || (crtl->calls_eh_return
7404 && (reg == EH_RETURN_DATA_REGNO (0)
7405 || reg == EH_RETURN_DATA_REGNO (1)
7406 || reg == EH_RETURN_DATA_REGNO (2)
7407 || reg == EH_RETURN_DATA_REGNO (3)))
7408 || ((reg == MACL_REG || reg == MACH_REG)
7409 && df_regs_ever_live_p (reg)
7410 && sh_cfun_attr_renesas_p ())
7411 ))
7412 {
7413 SET_HARD_REG_BIT (*live_regs_mask, reg);
7414 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7415
7416 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7417 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7418 {
7419 if (FP_REGISTER_P (reg))
7420 {
7421 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7422 {
7423 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7424 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7425 }
7426 }
7427 else if (XD_REGISTER_P (reg))
7428 {
7429 /* Must switch to double mode to access these registers. */
7430 target_flags &= ~MASK_FPU_SINGLE;
7431 }
7432 }
7433 }
7434 if (nosave_low_regs && reg == R8_REG)
7435 break;
7436 }
7437 /* If we have a target register optimization pass after prologue / epilogue
7438 threading, we need to assume all target registers will be live even if
7439 they aren't now. */
7440 if (flag_branch_target_load_optimize2
7441 && TARGET_SAVE_ALL_TARGET_REGS
7442 && shmedia_space_reserved_for_target_registers)
7443 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7444 if ((! call_really_used_regs[reg] || interrupt_handler)
7445 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7446 {
7447 SET_HARD_REG_BIT (*live_regs_mask, reg);
7448 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7449 }
7450 /* If this is an interrupt handler, we don't have any call-clobbered
7451 registers we can conveniently use for target register save/restore.
7452 Make sure we save at least one general purpose register when we need
7453 to save target registers. */
7454 if (interrupt_handler
7455 && hard_reg_set_intersect_p (*live_regs_mask,
7456 reg_class_contents[TARGET_REGS])
7457 && ! hard_reg_set_intersect_p (*live_regs_mask,
7458 reg_class_contents[GENERAL_REGS]))
7459 {
7460 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7461 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7462 }
7463
7464 return count;
7465 }
7466
7467 /* Code to generate prologue and epilogue sequences */
7468
7469 /* PUSHED is the number of bytes that are being pushed on the
7470 stack for register saves. Return the frame size, padded
7471 appropriately so that the stack stays properly aligned. */
7472 static HOST_WIDE_INT
7473 rounded_frame_size (int pushed)
7474 {
7475 HOST_WIDE_INT size = get_frame_size ();
7476 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7477
7478 if (ACCUMULATE_OUTGOING_ARGS)
7479 size += crtl->outgoing_args_size;
7480
7481 return ((size + pushed + align - 1) & -align) - pushed;
7482 }
7483
7484 /* Choose a call-clobbered target-branch register that remains
7485 unchanged along the whole function. We set it up as the return
7486 value in the prologue. */
7487 int
7488 sh_media_register_for_return (void)
7489 {
7490 int regno;
7491 int tr0_used;
7492
7493 if (! crtl->is_leaf)
7494 return -1;
7495 if (lookup_attribute ("interrupt_handler",
7496 DECL_ATTRIBUTES (current_function_decl)))
7497 return -1;
7498 if (sh_cfun_interrupt_handler_p ())
7499 return -1;
7500
7501 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7502
7503 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7504 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7505 return regno;
7506
7507 return -1;
7508 }
7509
7510 /* The maximum registers we need to save are:
7511 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7512 - 32 floating point registers (for each pair, we save none,
7513 one single precision value, or a double precision value).
7514 - 8 target registers
7515 - add 1 entry for a delimiter. */
7516 #define MAX_SAVED_REGS (62+32+8)
7517
7518 typedef struct save_entry_s
7519 {
7520 unsigned char reg;
7521 unsigned char mode;
7522 short offset;
7523 } save_entry;
7524
7525 #define MAX_TEMPS 4
7526
7527 /* There will be a delimiter entry with VOIDmode both at the start and the
7528 end of a filled in schedule. The end delimiter has the offset of the
7529 save with the smallest (i.e. most negative) offset. */
7530 typedef struct save_schedule_s
7531 {
7532 save_entry entries[MAX_SAVED_REGS + 2];
7533 int temps[MAX_TEMPS+1];
7534 } save_schedule;
7535
7536 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7537 use reverse order. Returns the last entry written to (not counting
7538 the delimiter). OFFSET_BASE is a number to be added to all offset
7539 entries. */
7540 static save_entry *
7541 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7542 int offset_base)
7543 {
7544 int align, i;
7545 save_entry *entry = schedule->entries;
7546 int tmpx = 0;
7547 int offset;
7548
7549 if (! current_function_interrupt)
7550 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7551 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7552 && ! FUNCTION_ARG_REGNO_P (i)
7553 && i != FIRST_RET_REG
7554 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7555 && ! (crtl->calls_eh_return
7556 && (i == EH_RETURN_STACKADJ_REGNO
7557 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7558 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7559 schedule->temps[tmpx++] = i;
7560 entry->reg = -1;
7561 entry->mode = VOIDmode;
7562 entry->offset = offset_base;
7563 entry++;
7564 /* We loop twice: first, we save 8-byte aligned registers in the
7565 higher addresses, that are known to be aligned. Then, we
7566 proceed to saving 32-bit registers that don't need 8-byte
7567 alignment.
7568 If this is an interrupt function, all registers that need saving
7569 need to be saved in full. moreover, we need to postpone saving
7570 target registers till we have saved some general purpose registers
7571 we can then use as scratch registers. */
7572 offset = offset_base;
7573 for (align = 1; align >= 0; align--)
7574 {
7575 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7576 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7577 {
7578 machine_mode mode = REGISTER_NATURAL_MODE (i);
7579 int reg = i;
7580
7581 if (current_function_interrupt)
7582 {
7583 if (TARGET_REGISTER_P (i))
7584 continue;
7585 if (GENERAL_REGISTER_P (i))
7586 mode = DImode;
7587 }
7588 if (mode == SFmode && (i % 2) == 1
7589 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7590 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7591 {
7592 mode = DFmode;
7593 i--;
7594 reg--;
7595 }
7596
7597 /* If we're doing the aligned pass and this is not aligned,
7598 or we're doing the unaligned pass and this is aligned,
7599 skip it. */
7600 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7601 != align)
7602 continue;
7603
7604 if (current_function_interrupt
7605 && GENERAL_REGISTER_P (i)
7606 && tmpx < MAX_TEMPS)
7607 schedule->temps[tmpx++] = i;
7608
7609 offset -= GET_MODE_SIZE (mode);
7610 entry->reg = i;
7611 entry->mode = mode;
7612 entry->offset = offset;
7613 entry++;
7614 }
7615 if (align && current_function_interrupt)
7616 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7617 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7618 {
7619 offset -= GET_MODE_SIZE (DImode);
7620 entry->reg = i;
7621 entry->mode = DImode;
7622 entry->offset = offset;
7623 entry++;
7624 }
7625 }
7626 entry->reg = -1;
7627 entry->mode = VOIDmode;
7628 entry->offset = offset;
7629 schedule->temps[tmpx] = -1;
7630 return entry - 1;
7631 }
7632
7633 /* Expand code for the function prologue. */
7634 void
7635 sh_expand_prologue (void)
7636 {
7637 HARD_REG_SET live_regs_mask;
7638 int d, i;
7639 int d_rounding = 0;
7640 int save_flags = target_flags;
7641 int pretend_args;
7642 int stack_usage;
7643 tree sp_switch_attr
7644 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7645
7646 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7647
7648 /* We have pretend args if we had an object sent partially in registers
7649 and partially on the stack, e.g. a large structure. */
7650 pretend_args = crtl->args.pretend_args_size;
7651 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7652 && (NPARM_REGS(SImode)
7653 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7654 pretend_args = 0;
7655
7656 output_stack_adjust (-pretend_args
7657 - crtl->args.info.stack_regs * 8,
7658 stack_pointer_rtx, 0, NULL, true);
7659 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7660
7661 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7662 /* We're going to use the PIC register to load the address of the
7663 incoming-argument decoder and/or of the return trampoline from
7664 the GOT, so make sure the PIC register is preserved and
7665 initialized. */
7666 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7667
7668 if (TARGET_SHCOMPACT
7669 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7670 {
7671 int reg;
7672
7673 /* First, make all registers with incoming arguments that will
7674 be pushed onto the stack live, so that register renaming
7675 doesn't overwrite them. */
7676 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7677 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7678 >= NPARM_REGS (SImode) - reg)
7679 for (; reg < NPARM_REGS (SImode); reg++)
7680 emit_insn (gen_shcompact_preserve_incoming_args
7681 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7682 else if (CALL_COOKIE_INT_REG_GET
7683 (crtl->args.info.call_cookie, reg) == 1)
7684 emit_insn (gen_shcompact_preserve_incoming_args
7685 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7686
7687 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7688 stack_pointer_rtx);
7689 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7690 GEN_INT (crtl->args.info.call_cookie));
7691 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7692 gen_rtx_REG (SImode, R0_REG));
7693 }
7694 else if (TARGET_SHMEDIA)
7695 {
7696 int tr = sh_media_register_for_return ();
7697
7698 if (tr >= 0)
7699 emit_move_insn (gen_rtx_REG (DImode, tr),
7700 gen_rtx_REG (DImode, PR_MEDIA_REG));
7701 }
7702
7703 /* Emit the code for SETUP_VARARGS. */
7704 if (cfun->stdarg)
7705 {
7706 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7707 {
7708 /* Push arg regs as if they'd been provided by caller in stack. */
7709 for (i = 0; i < NPARM_REGS(SImode); i++)
7710 {
7711 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7712
7713 if (i >= (NPARM_REGS(SImode)
7714 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7715 ))
7716 break;
7717 push (rn);
7718 stack_usage += GET_MODE_SIZE (SImode);
7719 }
7720 }
7721 }
7722
7723 /* If we're supposed to switch stacks at function entry, do so now. */
7724 if (sp_switch_attr)
7725 {
7726 rtx lab, newsrc;
7727 /* The argument specifies a variable holding the address of the
7728 stack the interrupt function should switch to/from at entry/exit. */
7729 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7730 const char *s
7731 = ggc_strdup (TREE_STRING_POINTER (arg));
7732 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7733
7734 lab = add_constant (sp_switch, SImode, 0);
7735 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7736
7737 emit_insn (gen_sp_switch_1 (newsrc));
7738 }
7739
7740 d = calc_live_regs (&live_regs_mask);
7741 /* ??? Maybe we could save some switching if we can move a mode switch
7742 that already happens to be at the function start into the prologue. */
7743 if (target_flags != save_flags && ! current_function_interrupt)
7744 emit_insn (gen_toggle_sz ());
7745
7746 if (TARGET_SH5)
7747 {
7748 int offset_base, offset;
7749 rtx r0 = NULL_RTX;
7750 int offset_in_r0 = -1;
7751 int sp_in_r0 = 0;
7752 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7753 int total_size, save_size;
7754 save_schedule schedule;
7755 save_entry *entry;
7756 int *tmp_pnt;
7757
7758 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7759 && ! current_function_interrupt)
7760 r0 = gen_rtx_REG (Pmode, R0_REG);
7761
7762 /* D is the actual number of bytes that we need for saving registers,
7763 however, in initial_elimination_offset we have committed to using
7764 an additional TREGS_SPACE amount of bytes - in order to keep both
7765 addresses to arguments supplied by the caller and local variables
7766 valid, we must keep this gap. Place it between the incoming
7767 arguments and the actually saved registers in a bid to optimize
7768 locality of reference. */
7769 total_size = d + tregs_space;
7770 total_size += rounded_frame_size (total_size);
7771 save_size = total_size - rounded_frame_size (d);
7772 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7773 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7774 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7775
7776 /* If adjusting the stack in a single step costs nothing extra, do so.
7777 I.e. either if a single addi is enough, or we need a movi anyway,
7778 and we don't exceed the maximum offset range (the test for the
7779 latter is conservative for simplicity). */
7780 if (TARGET_SHMEDIA
7781 && (CONST_OK_FOR_I10 (-total_size)
7782 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7783 && total_size <= 2044)))
7784 d_rounding = total_size - save_size;
7785
7786 offset_base = d + d_rounding;
7787
7788 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7789 0, NULL, true);
7790 stack_usage += save_size + d_rounding;
7791
7792 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7793 tmp_pnt = schedule.temps;
7794 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7795 {
7796 machine_mode mode = (machine_mode) entry->mode;
7797 unsigned int reg = entry->reg;
7798 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7799 rtx orig_reg_rtx;
7800
7801 offset = entry->offset;
7802
7803 reg_rtx = gen_rtx_REG (mode, reg);
7804
7805 mem_rtx = gen_frame_mem (mode,
7806 gen_rtx_PLUS (Pmode,
7807 stack_pointer_rtx,
7808 GEN_INT (offset)));
7809
7810 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7811 {
7812 gcc_assert (r0);
7813 mem_rtx = NULL_RTX;
7814 }
7815
7816 if (HAVE_PRE_DECREMENT
7817 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7818 || mem_rtx == NULL_RTX
7819 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7820 {
7821 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7822
7823 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7824 pre_dec = NULL_RTX;
7825 else
7826 {
7827 mem_rtx = NULL_RTX;
7828 offset += GET_MODE_SIZE (mode);
7829 }
7830 }
7831
7832 if (mem_rtx != NULL_RTX)
7833 goto addr_ok;
7834
7835 if (offset_in_r0 == -1)
7836 {
7837 emit_move_insn (r0, GEN_INT (offset));
7838 offset_in_r0 = offset;
7839 }
7840 else if (offset != offset_in_r0)
7841 {
7842 emit_move_insn (r0,
7843 gen_rtx_PLUS
7844 (Pmode, r0,
7845 GEN_INT (offset - offset_in_r0)));
7846 offset_in_r0 += offset - offset_in_r0;
7847 }
7848
7849 if (pre_dec != NULL_RTX)
7850 {
7851 if (! sp_in_r0)
7852 {
7853 emit_move_insn (r0,
7854 gen_rtx_PLUS
7855 (Pmode, r0, stack_pointer_rtx));
7856 sp_in_r0 = 1;
7857 }
7858
7859 offset -= GET_MODE_SIZE (mode);
7860 offset_in_r0 -= GET_MODE_SIZE (mode);
7861
7862 mem_rtx = pre_dec;
7863 }
7864 else if (sp_in_r0)
7865 mem_rtx = gen_frame_mem (mode, r0);
7866 else
7867 mem_rtx = gen_frame_mem (mode,
7868 gen_rtx_PLUS (Pmode,
7869 stack_pointer_rtx,
7870 r0));
7871
7872 /* We must not use an r0-based address for target-branch
7873 registers or for special registers without pre-dec
7874 memory addresses, since we store their values in r0
7875 first. */
7876 gcc_assert (!TARGET_REGISTER_P (reg)
7877 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7878 || mem_rtx == pre_dec));
7879
7880 addr_ok:
7881 orig_reg_rtx = reg_rtx;
7882 if (TARGET_REGISTER_P (reg)
7883 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7884 && mem_rtx != pre_dec))
7885 {
7886 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7887
7888 emit_move_insn (tmp_reg, reg_rtx);
7889
7890 if (REGNO (tmp_reg) == R0_REG)
7891 {
7892 offset_in_r0 = -1;
7893 sp_in_r0 = 0;
7894 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx));
7895 }
7896
7897 if (*++tmp_pnt <= 0)
7898 tmp_pnt = schedule.temps;
7899
7900 reg_rtx = tmp_reg;
7901 }
7902 {
7903 rtx insn;
7904
7905 /* Mark as interesting for dwarf cfi generator */
7906 insn = emit_move_insn (mem_rtx, reg_rtx);
7907 RTX_FRAME_RELATED_P (insn) = 1;
7908 /* If we use an intermediate register for the save, we can't
7909 describe this exactly in cfi as a copy of the to-be-saved
7910 register into the temporary register and then the temporary
7911 register on the stack, because the temporary register can
7912 have a different natural size than the to-be-saved register.
7913 Thus, we gloss over the intermediate copy and pretend we do
7914 a direct save from the to-be-saved register. */
7915 if (REGNO (reg_rtx) != reg)
7916 {
7917 rtx set;
7918
7919 set = gen_rtx_SET (mem_rtx, orig_reg_rtx);
7920 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7921 }
7922
7923 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7924 {
7925 rtx reg_rtx = gen_rtx_REG (mode, reg);
7926 rtx set;
7927 rtx mem_rtx = gen_frame_mem (mode,
7928 gen_rtx_PLUS (Pmode,
7929 stack_pointer_rtx,
7930 GEN_INT (offset)));
7931
7932 set = gen_rtx_SET (mem_rtx, reg_rtx);
7933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7934 }
7935 }
7936 }
7937
7938 gcc_assert (entry->offset == d_rounding);
7939 }
7940 else
7941 {
7942 push_regs (&live_regs_mask, current_function_interrupt);
7943 stack_usage += d;
7944 }
7945
7946 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7947 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7948
7949 if (SHMEDIA_REGS_STACK_ADJUST ())
7950 {
7951 /* This must NOT go through the PLT, otherwise mach and macl
7952 may be clobbered. */
7953 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7954 (TARGET_FPU_ANY
7955 ? "__GCC_push_shmedia_regs"
7956 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7957 emit_insn (gen_shmedia_save_restore_regs_compact
7958 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7959 }
7960
7961 if (target_flags != save_flags && ! current_function_interrupt)
7962 emit_insn (gen_toggle_sz ());
7963
7964 target_flags = save_flags;
7965
7966 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7967 stack_pointer_rtx, 0, NULL, true);
7968 stack_usage += rounded_frame_size (d) - d_rounding;
7969
7970 if (frame_pointer_needed)
7971 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7972
7973 if (TARGET_SHCOMPACT
7974 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7975 {
7976 /* This must NOT go through the PLT, otherwise mach and macl
7977 may be clobbered. */
7978 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7979 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7980 emit_insn (gen_shcompact_incoming_args ());
7981 }
7982
7983 /* If we are profiling, make sure no instructions are scheduled before
7984 the call to mcount. Similarly if some call instructions are swapped
7985 before frame related insns, it'll confuse the unwinder because
7986 currently SH has no unwind info for function epilogues. */
7987 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7988 emit_insn (gen_blockage ());
7989
7990 if (flag_stack_usage_info)
7991 current_function_static_stack_size = stack_usage;
7992 }
7993
7994 /* Expand code for the function epilogue. */
7995 void
7996 sh_expand_epilogue (bool sibcall_p)
7997 {
7998 HARD_REG_SET live_regs_mask;
7999 int d, i;
8000 int d_rounding = 0;
8001
8002 int save_flags = target_flags;
8003 int frame_size, save_size;
8004 int fpscr_deferred = 0;
8005 int e = sibcall_p ? -1 : 1;
8006
8007 d = calc_live_regs (&live_regs_mask);
8008
8009 save_size = d;
8010 frame_size = rounded_frame_size (d);
8011
8012 if (TARGET_SH5)
8013 {
8014 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
8015 int total_size;
8016 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
8017 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8018 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
8019
8020 total_size = d + tregs_space;
8021 total_size += rounded_frame_size (total_size);
8022 save_size = total_size - frame_size;
8023
8024 /* If adjusting the stack in a single step costs nothing extra, do so.
8025 I.e. either if a single addi is enough, or we need a movi anyway,
8026 and we don't exceed the maximum offset range (the test for the
8027 latter is conservative for simplicity). */
8028 if (TARGET_SHMEDIA
8029 && ! frame_pointer_needed
8030 && (CONST_OK_FOR_I10 (total_size)
8031 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
8032 && total_size <= 2044)))
8033 d_rounding = frame_size;
8034
8035 frame_size -= d_rounding;
8036 }
8037
8038 if (frame_pointer_needed)
8039 {
8040 /* We must avoid scheduling the epilogue with previous basic blocks.
8041 See PR/18032 and PR/40313. */
8042 emit_insn (gen_blockage ());
8043 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
8044 &live_regs_mask, true);
8045
8046 /* We must avoid moving the stack pointer adjustment past code
8047 which reads from the local frame, else an interrupt could
8048 occur after the SP adjustment and clobber data in the local
8049 frame. */
8050 emit_insn (gen_blockage ());
8051 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
8052 }
8053 else if (frame_size)
8054 {
8055 /* We must avoid moving the stack pointer adjustment past code
8056 which reads from the local frame, else an interrupt could
8057 occur after the SP adjustment and clobber data in the local
8058 frame. */
8059 emit_insn (gen_blockage ());
8060 output_stack_adjust (frame_size, stack_pointer_rtx, e,
8061 &live_regs_mask, true);
8062 }
8063
8064 if (SHMEDIA_REGS_STACK_ADJUST ())
8065 {
8066 function_symbol (gen_rtx_REG (Pmode, R0_REG),
8067 (TARGET_FPU_ANY
8068 ? "__GCC_pop_shmedia_regs"
8069 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
8070 /* This must NOT go through the PLT, otherwise mach and macl
8071 may be clobbered. */
8072 emit_insn (gen_shmedia_save_restore_regs_compact
8073 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
8074 }
8075
8076 /* Pop all the registers. */
8077
8078 if (target_flags != save_flags && ! current_function_interrupt)
8079 emit_insn (gen_toggle_sz ());
8080 if (TARGET_SH5)
8081 {
8082 int offset_base, offset;
8083 int offset_in_r0 = -1;
8084 int sp_in_r0 = 0;
8085 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
8086 save_schedule schedule;
8087 save_entry *entry;
8088 int *tmp_pnt;
8089
8090 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
8091 offset_base = -entry[1].offset + d_rounding;
8092 tmp_pnt = schedule.temps;
8093 for (; entry->mode != VOIDmode; entry--)
8094 {
8095 machine_mode mode = (machine_mode) entry->mode;
8096 int reg = entry->reg;
8097 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
8098
8099 offset = offset_base + entry->offset;
8100 reg_rtx = gen_rtx_REG (mode, reg);
8101
8102 mem_rtx = gen_frame_mem (mode,
8103 gen_rtx_PLUS (Pmode,
8104 stack_pointer_rtx,
8105 GEN_INT (offset)));
8106
8107 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
8108 mem_rtx = NULL_RTX;
8109
8110 if (HAVE_POST_INCREMENT
8111 && (offset == offset_in_r0
8112 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
8113 && mem_rtx == NULL_RTX)
8114 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
8115 {
8116 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
8117
8118 if (!memory_address_p (mode, XEXP (post_inc, 0)))
8119 post_inc = NULL_RTX;
8120 else
8121 mem_rtx = NULL_RTX;
8122 }
8123
8124 if (mem_rtx != NULL_RTX)
8125 goto addr_ok;
8126
8127 if (offset_in_r0 == -1)
8128 {
8129 emit_move_insn (r0, GEN_INT (offset));
8130 offset_in_r0 = offset;
8131 }
8132 else if (offset != offset_in_r0)
8133 {
8134 emit_move_insn (r0,
8135 gen_rtx_PLUS
8136 (Pmode, r0,
8137 GEN_INT (offset - offset_in_r0)));
8138 offset_in_r0 += offset - offset_in_r0;
8139 }
8140
8141 if (post_inc != NULL_RTX)
8142 {
8143 if (! sp_in_r0)
8144 {
8145 emit_move_insn (r0,
8146 gen_rtx_PLUS
8147 (Pmode, r0, stack_pointer_rtx));
8148 sp_in_r0 = 1;
8149 }
8150
8151 mem_rtx = post_inc;
8152
8153 offset_in_r0 += GET_MODE_SIZE (mode);
8154 }
8155 else if (sp_in_r0)
8156 mem_rtx = gen_frame_mem (mode, r0);
8157 else
8158 mem_rtx = gen_frame_mem (mode,
8159 gen_rtx_PLUS (Pmode,
8160 stack_pointer_rtx,
8161 r0));
8162
8163 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8164 || mem_rtx == post_inc);
8165
8166 addr_ok:
8167 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8168 && mem_rtx != post_inc)
8169 {
8170 emit_move_insn (r0, mem_rtx);
8171 mem_rtx = r0;
8172 }
8173 else if (TARGET_REGISTER_P (reg))
8174 {
8175 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8176
8177 /* Give the scheduler a bit of freedom by using up to
8178 MAX_TEMPS registers in a round-robin fashion. */
8179 emit_move_insn (tmp_reg, mem_rtx);
8180 mem_rtx = tmp_reg;
8181 if (*++tmp_pnt < 0)
8182 tmp_pnt = schedule.temps;
8183 }
8184
8185 emit_move_insn (reg_rtx, mem_rtx);
8186 }
8187
8188 gcc_assert (entry->offset + offset_base == d + d_rounding);
8189 }
8190 else /* ! TARGET_SH5 */
8191 {
8192 int last_reg;
8193
8194 save_size = 0;
8195 /* For an ISR with RESBANK attribute assigned, don't pop PR
8196 register. */
8197 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8198 && !sh_cfun_resbank_handler_p ())
8199 {
8200 if (!frame_pointer_needed)
8201 emit_insn (gen_blockage ());
8202 pop (PR_REG);
8203 }
8204
8205 /* Banked registers are popped first to avoid being scheduled in the
8206 delay slot. RTE switches banks before the ds instruction. */
8207 if (current_function_interrupt)
8208 {
8209 bool use_movml = false;
8210
8211 if (TARGET_SH2A)
8212 {
8213 unsigned int count = 0;
8214
8215 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8216 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8217 count++;
8218 else
8219 break;
8220
8221 /* Use movml when all banked register are poped. */
8222 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8223 use_movml = true;
8224 }
8225
8226 if (sh_cfun_resbank_handler_p ())
8227 ; /* Do nothing. */
8228 else if (use_movml)
8229 {
8230 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8231
8232 /* We must avoid scheduling multiple load insn with another
8233 insns. */
8234 emit_insn (gen_blockage ());
8235 emit_insn (gen_movml_pop_banked (sp_reg));
8236 emit_insn (gen_blockage ());
8237 }
8238 else
8239 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8240 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8241 pop (i);
8242
8243 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8244 }
8245 else
8246 last_reg = FIRST_PSEUDO_REGISTER;
8247
8248 for (i = 0; i < last_reg; i++)
8249 {
8250 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8251
8252 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8253 && hard_reg_set_intersect_p (live_regs_mask,
8254 reg_class_contents[DF_REGS]))
8255 fpscr_deferred = 1;
8256 /* For an ISR with RESBANK attribute assigned, don't pop
8257 following registers, R0-R14, MACH, MACL and GBR. */
8258 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8259 && ! (sh_cfun_resbank_handler_p ()
8260 && ((j >= FIRST_GENERAL_REG
8261 && j < LAST_GENERAL_REG)
8262 || j == MACH_REG
8263 || j == MACL_REG
8264 || j == GBR_REG)))
8265 pop (j);
8266
8267 if (j == FIRST_FP_REG && fpscr_deferred)
8268 pop (FPSCR_REG);
8269 }
8270 }
8271 if (target_flags != save_flags && ! current_function_interrupt)
8272 emit_insn (gen_toggle_sz ());
8273 target_flags = save_flags;
8274
8275 output_stack_adjust (crtl->args.pretend_args_size
8276 + save_size + d_rounding
8277 + crtl->args.info.stack_regs * 8,
8278 stack_pointer_rtx, e, NULL, true);
8279
8280 if (crtl->calls_eh_return)
8281 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8282 EH_RETURN_STACKADJ_RTX));
8283
8284 /* Switch back to the normal stack if necessary. */
8285 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8286 emit_insn (gen_sp_switch_2 ());
8287
8288 /* Tell flow the insn that pops PR isn't dead. */
8289 /* PR_REG will never be live in SHmedia mode, and we don't need to
8290 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8291 by the return pattern. */
8292 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8293 emit_use (gen_rtx_REG (SImode, PR_REG));
8294 }
8295
8296 /* Emit code to change the current function's return address to RA.
8297 TEMP is available as a scratch register, if needed. */
8298 void
8299 sh_set_return_address (rtx ra, rtx tmp)
8300 {
8301 HARD_REG_SET live_regs_mask;
8302 int d;
8303 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8304 int pr_offset;
8305
8306 d = calc_live_regs (&live_regs_mask);
8307
8308 /* If pr_reg isn't life, we can set it (or the register given in
8309 sh_media_register_for_return) directly. */
8310 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8311 {
8312 rtx rr;
8313
8314 if (TARGET_SHMEDIA)
8315 {
8316 int rr_regno = sh_media_register_for_return ();
8317
8318 if (rr_regno < 0)
8319 rr_regno = pr_reg;
8320
8321 rr = gen_rtx_REG (DImode, rr_regno);
8322 }
8323 else
8324 rr = gen_rtx_REG (SImode, pr_reg);
8325
8326 emit_insn (GEN_MOV (rr, ra));
8327 /* Tell flow the register for return isn't dead. */
8328 emit_use (rr);
8329 return;
8330 }
8331
8332 if (TARGET_SH5)
8333 {
8334 int offset;
8335 save_schedule schedule;
8336 save_entry *entry;
8337
8338 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8339 offset = entry[1].offset;
8340 for (; entry->mode != VOIDmode; entry--)
8341 if (entry->reg == pr_reg)
8342 goto found;
8343
8344 /* We can't find pr register. */
8345 gcc_unreachable ();
8346
8347 found:
8348 offset = entry->offset - offset;
8349 pr_offset = (rounded_frame_size (d) + offset
8350 + SHMEDIA_REGS_STACK_ADJUST ());
8351 }
8352 else
8353 pr_offset = rounded_frame_size (d);
8354
8355 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8356
8357 if (frame_pointer_needed)
8358 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8359 else
8360 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8361
8362 tmp = gen_frame_mem (Pmode, tmp);
8363 emit_insn (GEN_MOV (tmp, ra));
8364 /* Tell this store isn't dead. */
8365 emit_use (tmp);
8366 }
8367
8368 /* Clear variables at function end. */
8369 static void
8370 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8371 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8372 {
8373 }
8374
8375 static rtx
8376 sh_builtin_saveregs (void)
8377 {
8378 /* First unnamed integer register. */
8379 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8380 /* Number of integer registers we need to save. */
8381 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8382 /* First unnamed SFmode float reg */
8383 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8384 /* Number of SFmode float regs to save. */
8385 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8386 rtx regbuf, fpregs;
8387 int bufsize, regno;
8388 alias_set_type alias_set;
8389
8390 if (TARGET_SH5)
8391 {
8392 if (n_intregs)
8393 {
8394 int pushregs = n_intregs;
8395
8396 while (pushregs < NPARM_REGS (SImode) - 1
8397 && (CALL_COOKIE_INT_REG_GET
8398 (crtl->args.info.call_cookie,
8399 NPARM_REGS (SImode) - pushregs)
8400 == 1))
8401 {
8402 crtl->args.info.call_cookie
8403 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8404 - pushregs, 1);
8405 pushregs++;
8406 }
8407
8408 if (pushregs == NPARM_REGS (SImode))
8409 crtl->args.info.call_cookie
8410 |= (CALL_COOKIE_INT_REG (0, 1)
8411 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8412 else
8413 crtl->args.info.call_cookie
8414 |= CALL_COOKIE_STACKSEQ (pushregs);
8415
8416 crtl->args.pretend_args_size += 8 * n_intregs;
8417 }
8418 if (TARGET_SHCOMPACT)
8419 return const0_rtx;
8420 }
8421
8422 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8423 {
8424 error ("__builtin_saveregs not supported by this subtarget");
8425 return const0_rtx;
8426 }
8427
8428 if (TARGET_SHMEDIA)
8429 n_floatregs = 0;
8430
8431 /* Allocate block of memory for the regs. */
8432 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8433 Or can assign_stack_local accept a 0 SIZE argument? */
8434 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8435
8436 if (TARGET_SHMEDIA)
8437 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8438 else if (n_floatregs & 1)
8439 {
8440 rtx addr;
8441
8442 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8443 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8444 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8445 regbuf = change_address (regbuf, BLKmode, addr);
8446 }
8447 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8448 {
8449 rtx addr, mask;
8450
8451 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8452 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8453 XEXP (regbuf, 0), 4));
8454 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8455 emit_insn (gen_andsi3 (addr, addr, mask));
8456 regbuf = change_address (regbuf, BLKmode, addr);
8457 }
8458 else
8459 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8460 alias_set = get_varargs_alias_set ();
8461 set_mem_alias_set (regbuf, alias_set);
8462
8463 /* Save int args.
8464 This is optimized to only save the regs that are necessary. Explicitly
8465 named args need not be saved. */
8466 if (n_intregs > 0)
8467 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8468 adjust_address (regbuf, BLKmode,
8469 n_floatregs * UNITS_PER_WORD),
8470 n_intregs);
8471
8472 if (TARGET_SHMEDIA)
8473 /* Return the address of the regbuf. */
8474 return XEXP (regbuf, 0);
8475
8476 /* Save float args.
8477 This is optimized to only save the regs that are necessary. Explicitly
8478 named args need not be saved.
8479 We explicitly build a pointer to the buffer because it halves the insn
8480 count when not optimizing (otherwise the pointer is built for each reg
8481 saved).
8482 We emit the moves in reverse order so that we can use predecrement. */
8483
8484 fpregs = copy_to_mode_reg (Pmode,
8485 plus_constant (Pmode, XEXP (regbuf, 0),
8486 n_floatregs * UNITS_PER_WORD));
8487 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8488 {
8489 rtx mem;
8490 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8491 {
8492 emit_insn (gen_addsi3 (fpregs, fpregs,
8493 GEN_INT (-2 * UNITS_PER_WORD)));
8494 mem = change_address (regbuf, DFmode, fpregs);
8495 emit_move_insn (mem,
8496 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8497 }
8498 regno = first_floatreg;
8499 if (regno & 1)
8500 {
8501 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8502 mem = change_address (regbuf, SFmode, fpregs);
8503 emit_move_insn (mem,
8504 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8505 + regno - SH_REG_MSW_OFFSET));
8506 }
8507 }
8508 else
8509 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8510 {
8511 rtx mem;
8512
8513 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8514 mem = change_address (regbuf, SFmode, fpregs);
8515 emit_move_insn (mem,
8516 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8517 }
8518
8519 /* Return the address of the regbuf. */
8520 return XEXP (regbuf, 0);
8521 }
8522
8523 /* Define the `__builtin_va_list' type for the ABI. */
8524 static tree
8525 sh_build_builtin_va_list (void)
8526 {
8527 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8528 tree record, type_decl;
8529
8530 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8531 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8532 return ptr_type_node;
8533
8534 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8535 type_decl = build_decl (BUILTINS_LOCATION,
8536 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8537
8538 f_next_o = build_decl (BUILTINS_LOCATION,
8539 FIELD_DECL, get_identifier ("__va_next_o"),
8540 ptr_type_node);
8541 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8542 FIELD_DECL,
8543 get_identifier ("__va_next_o_limit"),
8544 ptr_type_node);
8545 f_next_fp = build_decl (BUILTINS_LOCATION,
8546 FIELD_DECL, get_identifier ("__va_next_fp"),
8547 ptr_type_node);
8548 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8549 FIELD_DECL,
8550 get_identifier ("__va_next_fp_limit"),
8551 ptr_type_node);
8552 f_next_stack = build_decl (BUILTINS_LOCATION,
8553 FIELD_DECL, get_identifier ("__va_next_stack"),
8554 ptr_type_node);
8555
8556 DECL_FIELD_CONTEXT (f_next_o) = record;
8557 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8558 DECL_FIELD_CONTEXT (f_next_fp) = record;
8559 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8560 DECL_FIELD_CONTEXT (f_next_stack) = record;
8561
8562 TYPE_STUB_DECL (record) = type_decl;
8563 TYPE_NAME (record) = type_decl;
8564 TYPE_FIELDS (record) = f_next_o;
8565 DECL_CHAIN (f_next_o) = f_next_o_limit;
8566 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8567 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8568 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8569
8570 layout_type (record);
8571
8572 return record;
8573 }
8574
8575 /* Implement `va_start' for varargs and stdarg. */
8576 static void
8577 sh_va_start (tree valist, rtx nextarg)
8578 {
8579 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8580 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8581 tree t, u;
8582 int nfp, nint;
8583
8584 if (TARGET_SH5)
8585 {
8586 expand_builtin_saveregs ();
8587 std_expand_builtin_va_start (valist, nextarg);
8588 return;
8589 }
8590
8591 if ((! TARGET_SH2E && ! TARGET_SH4)
8592 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8593 {
8594 std_expand_builtin_va_start (valist, nextarg);
8595 return;
8596 }
8597
8598 f_next_o = TYPE_FIELDS (va_list_type_node);
8599 f_next_o_limit = DECL_CHAIN (f_next_o);
8600 f_next_fp = DECL_CHAIN (f_next_o_limit);
8601 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8602 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8603
8604 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8605 NULL_TREE);
8606 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8607 valist, f_next_o_limit, NULL_TREE);
8608 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8609 NULL_TREE);
8610 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8611 valist, f_next_fp_limit, NULL_TREE);
8612 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8613 valist, f_next_stack, NULL_TREE);
8614
8615 /* Call __builtin_saveregs. */
8616 u = make_tree (sizetype, expand_builtin_saveregs ());
8617 u = fold_convert (ptr_type_node, u);
8618 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8619 TREE_SIDE_EFFECTS (t) = 1;
8620 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8621
8622 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8623 if (nfp < 8)
8624 nfp = 8 - nfp;
8625 else
8626 nfp = 0;
8627 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8628 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8629 TREE_SIDE_EFFECTS (t) = 1;
8630 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8631
8632 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8633 TREE_SIDE_EFFECTS (t) = 1;
8634 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8635
8636 nint = crtl->args.info.arg_count[SH_ARG_INT];
8637 if (nint < 4)
8638 nint = 4 - nint;
8639 else
8640 nint = 0;
8641 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8642 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8643 TREE_SIDE_EFFECTS (t) = 1;
8644 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8645
8646 u = make_tree (ptr_type_node, nextarg);
8647 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8648 TREE_SIDE_EFFECTS (t) = 1;
8649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8650 }
8651
8652 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8653 member, return it. */
8654 static tree
8655 find_sole_member (tree type)
8656 {
8657 tree field, member = NULL_TREE;
8658
8659 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8660 {
8661 if (TREE_CODE (field) != FIELD_DECL)
8662 continue;
8663 if (!DECL_SIZE (field))
8664 return NULL_TREE;
8665 if (integer_zerop (DECL_SIZE (field)))
8666 continue;
8667 if (member)
8668 return NULL_TREE;
8669 member = field;
8670 }
8671 return member;
8672 }
8673
8674 /* Implement `va_arg'. */
8675 static tree
8676 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8677 gimple_seq *post_p ATTRIBUTE_UNUSED)
8678 {
8679 HOST_WIDE_INT size, rsize;
8680 tree tmp, pptr_type_node;
8681 tree addr, lab_over = NULL, result = NULL;
8682 bool pass_by_ref;
8683 tree eff_type;
8684
8685 if (!VOID_TYPE_P (type))
8686 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8687 else
8688 pass_by_ref = false;
8689
8690 if (pass_by_ref)
8691 type = build_pointer_type (type);
8692
8693 size = int_size_in_bytes (type);
8694 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8695 pptr_type_node = build_pointer_type (ptr_type_node);
8696
8697 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8698 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8699 {
8700 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8701 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8702 int pass_as_float;
8703 tree lab_false;
8704 tree member;
8705
8706 f_next_o = TYPE_FIELDS (va_list_type_node);
8707 f_next_o_limit = DECL_CHAIN (f_next_o);
8708 f_next_fp = DECL_CHAIN (f_next_o_limit);
8709 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8710 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8711
8712 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8713 NULL_TREE);
8714 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8715 valist, f_next_o_limit, NULL_TREE);
8716 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8717 valist, f_next_fp, NULL_TREE);
8718 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8719 valist, f_next_fp_limit, NULL_TREE);
8720 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8721 valist, f_next_stack, NULL_TREE);
8722
8723 /* Structures with a single member with a distinct mode are passed
8724 like their member. This is relevant if the latter has a REAL_TYPE
8725 or COMPLEX_TYPE type. */
8726 eff_type = type;
8727 while (TREE_CODE (eff_type) == RECORD_TYPE
8728 && (member = find_sole_member (eff_type))
8729 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8730 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8731 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8732 {
8733 tree field_type = TREE_TYPE (member);
8734
8735 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8736 eff_type = field_type;
8737 else
8738 {
8739 gcc_assert ((TYPE_ALIGN (eff_type)
8740 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8741 || (TYPE_ALIGN (eff_type)
8742 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8743 break;
8744 }
8745 }
8746
8747 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8748 {
8749 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8750 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8751 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8752 && size <= 16));
8753 }
8754 else
8755 {
8756 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8757 }
8758
8759 addr = create_tmp_var (pptr_type_node);
8760 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8761 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8762
8763 valist = build_simple_mem_ref (addr);
8764
8765 if (pass_as_float)
8766 {
8767 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
8768 tree cmp;
8769 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8770
8771 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8772 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8773
8774 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8775 tmp = next_fp_limit;
8776 if (size > 4 && !is_double)
8777 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8778 tmp = build2 (GE_EXPR, boolean_type_node,
8779 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8780 cmp = build3 (COND_EXPR, void_type_node, tmp,
8781 build1 (GOTO_EXPR, void_type_node,
8782 unshare_expr (lab_false)), NULL_TREE);
8783 if (!is_double)
8784 gimplify_and_add (cmp, pre_p);
8785
8786 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8787 || (is_double || size == 16))
8788 {
8789 tmp = fold_convert (sizetype, next_fp_tmp);
8790 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8791 size_int (UNITS_PER_WORD));
8792 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8793 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8794 }
8795 if (is_double)
8796 gimplify_and_add (cmp, pre_p);
8797
8798 #ifdef FUNCTION_ARG_SCmode_WART
8799 if (TYPE_MODE (eff_type) == SCmode
8800 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8801 {
8802 tree subtype = TREE_TYPE (eff_type);
8803 tree real, imag;
8804
8805 imag
8806 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8807 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8808
8809 real
8810 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8811 real = get_initialized_tmp_var (real, pre_p, NULL);
8812
8813 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8814 if (type != eff_type)
8815 result = build1 (VIEW_CONVERT_EXPR, type, result);
8816 result = get_initialized_tmp_var (result, pre_p, NULL);
8817 }
8818 #endif /* FUNCTION_ARG_SCmode_WART */
8819
8820 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8821 gimplify_and_add (tmp, pre_p);
8822
8823 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8824 gimplify_and_add (tmp, pre_p);
8825
8826 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8827 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8828 gimplify_assign (unshare_expr (next_fp_tmp),
8829 unshare_expr (valist), pre_p);
8830
8831 gimplify_assign (unshare_expr (valist),
8832 unshare_expr (next_fp_tmp), post_p);
8833 valist = next_fp_tmp;
8834 }
8835 else
8836 {
8837 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8838 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8839 unshare_expr (next_o_limit));
8840 tmp = build3 (COND_EXPR, void_type_node, tmp,
8841 build1 (GOTO_EXPR, void_type_node,
8842 unshare_expr (lab_false)),
8843 NULL_TREE);
8844 gimplify_and_add (tmp, pre_p);
8845
8846 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8847 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8848
8849 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8850 gimplify_and_add (tmp, pre_p);
8851
8852 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8853 gimplify_and_add (tmp, pre_p);
8854
8855 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8856 gimplify_assign (unshare_expr (next_o),
8857 unshare_expr (next_o_limit), pre_p);
8858
8859 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8860 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8861 }
8862
8863 if (!result)
8864 {
8865 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8866 gimplify_and_add (tmp, pre_p);
8867 }
8868 }
8869
8870 /* ??? In va-sh.h, there had been code to make values larger than
8871 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8872
8873 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8874 if (result)
8875 {
8876 gimplify_assign (result, tmp, pre_p);
8877 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8878 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8879 gimplify_and_add (tmp, pre_p);
8880 }
8881 else
8882 result = tmp;
8883
8884 if (pass_by_ref)
8885 result = build_va_arg_indirect_ref (result);
8886
8887 return result;
8888 }
8889
8890 /* 64 bit floating points memory transfers are paired single precision loads
8891 or store. So DWARF information needs fixing in little endian (unless
8892 PR=SZ=1 in FPSCR). */
8893 rtx
8894 sh_dwarf_register_span (rtx reg)
8895 {
8896 unsigned regno = REGNO (reg);
8897
8898 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8899 return NULL_RTX;
8900
8901 return
8902 gen_rtx_PARALLEL (VOIDmode,
8903 gen_rtvec (2,
8904 gen_rtx_REG (SFmode, regno + 1),
8905 gen_rtx_REG (SFmode, regno)));
8906 }
8907
8908 static machine_mode
8909 sh_promote_function_mode (const_tree type, machine_mode mode,
8910 int *punsignedp, const_tree funtype,
8911 int for_return)
8912 {
8913 if (sh_promote_prototypes (funtype))
8914 return promote_mode (type, mode, punsignedp);
8915 else
8916 return default_promote_function_mode (type, mode, punsignedp, funtype,
8917 for_return);
8918 }
8919
8920 static bool
8921 sh_promote_prototypes (const_tree type)
8922 {
8923 if (TARGET_HITACHI)
8924 return false;
8925 if (! type)
8926 return true;
8927 return ! sh_attr_renesas_p (type);
8928 }
8929
8930 /* Whether an argument must be passed by reference. On SHcompact, we
8931 pretend arguments wider than 32-bits that would have been passed in
8932 registers are passed by reference, so that an SHmedia trampoline
8933 loads them into the full 64-bits registers. */
8934 static int
8935 shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode,
8936 const_tree type, bool named)
8937 {
8938 unsigned HOST_WIDE_INT size;
8939
8940 if (type)
8941 size = int_size_in_bytes (type);
8942 else
8943 size = GET_MODE_SIZE (mode);
8944
8945 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8946 && (!named
8947 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8948 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8949 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8950 && size > 4
8951 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8952 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8953 return size;
8954 else
8955 return 0;
8956 }
8957
8958 static bool
8959 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8960 const_tree type, bool named)
8961 {
8962 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8963
8964 if (targetm.calls.must_pass_in_stack (mode, type))
8965 return true;
8966
8967 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8968 wants to know about pass-by-reference semantics for incoming
8969 arguments. */
8970 if (! cum)
8971 return false;
8972
8973 if (TARGET_SHCOMPACT)
8974 {
8975 cum->byref = shcompact_byref (cum, mode, type, named);
8976 return cum->byref != 0;
8977 }
8978
8979 return false;
8980 }
8981
8982 static bool
8983 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
8984 const_tree type, bool named ATTRIBUTE_UNUSED)
8985 {
8986 /* ??? How can it possibly be correct to return true only on the
8987 caller side of the equation? Is there someplace else in the
8988 sh backend that's magically producing the copies? */
8989 return (get_cumulative_args (cum)->outgoing
8990 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8991 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8992 }
8993
8994 /* Round a register number up to a proper boundary for an arg of mode
8995 MODE.
8996 The SH doesn't care about double alignment, so we only
8997 round doubles to even regs when asked to explicitly. */
8998 static int
8999 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
9000 {
9001 /* FIXME: This used to be a macro and has been copy pasted into this
9002 function as is. Make this more readable. */
9003 return
9004 (((TARGET_ALIGN_DOUBLE
9005 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9006 && (mode == DFmode || mode == DCmode)
9007 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
9008 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
9009 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
9010 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
9011 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
9012 }
9013
9014 /* Return true if arg of the specified mode should be passed in a register
9015 or false otherwise. */
9016 static bool
9017 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
9018 const_tree type)
9019 {
9020 /* FIXME: This used to be a macro and has been copy pasted into this
9021 function as is. Make this more readable. */
9022 return
9023 ((type == 0
9024 || (! TREE_ADDRESSABLE (type)
9025 && (! (TARGET_HITACHI || cum.renesas_abi)
9026 || ! (AGGREGATE_TYPE_P (type)
9027 || (!TARGET_FPU_ANY
9028 && (GET_MODE_CLASS (mode) == MODE_FLOAT
9029 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
9030 && ! cum.force_mem
9031 && (TARGET_SH2E
9032 ? ((mode) == BLKmode
9033 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
9034 + int_size_in_bytes (type))
9035 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
9036 : ((sh_round_reg (cum, mode)
9037 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
9038 <= NPARM_REGS (mode)))
9039 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
9040 }
9041
9042 static int
9043 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9044 tree type, bool named ATTRIBUTE_UNUSED)
9045 {
9046 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9047 int words = 0;
9048
9049 if (!TARGET_SH5
9050 && sh_pass_in_reg_p (*cum, mode, type)
9051 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
9052 && (sh_round_reg (*cum, mode)
9053 + (mode != BLKmode
9054 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
9055 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
9056 > NPARM_REGS (mode)))
9057 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
9058
9059 else if (!TARGET_SHCOMPACT
9060 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
9061 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
9062
9063 return words * UNITS_PER_WORD;
9064 }
9065
9066
9067 /* Define where to put the arguments to a function.
9068 Value is zero to push the argument on the stack,
9069 or a hard register in which to store the argument.
9070
9071 MODE is the argument's machine mode.
9072 TYPE is the data type of the argument (as a tree).
9073 This is null for libcalls where that information may
9074 not be available.
9075 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9076 the preceding args and about the function being called.
9077 NAMED is nonzero if this argument is a named parameter
9078 (otherwise it is an extra parameter matching an ellipsis).
9079
9080 On SH the first args are normally in registers
9081 and the rest are pushed. Any arg that starts within the first
9082 NPARM_REGS words is at least partially passed in a register unless
9083 its data type forbids. */
9084 static rtx
9085 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
9086 const_tree type, bool named)
9087 {
9088 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9089
9090 if (! TARGET_SH5 && mode == VOIDmode)
9091 return GEN_INT (ca->renesas_abi ? 1 : 0);
9092
9093 if (! TARGET_SH5
9094 && sh_pass_in_reg_p (*ca, mode, type)
9095 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
9096 {
9097 int regno;
9098
9099 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
9100 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
9101 {
9102 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
9103 gen_rtx_REG (SFmode,
9104 BASE_ARG_REG (mode)
9105 + (sh_round_reg (*ca, mode) ^ 1)),
9106 const0_rtx);
9107 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
9108 gen_rtx_REG (SFmode,
9109 BASE_ARG_REG (mode)
9110 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
9111 GEN_INT (4));
9112 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
9113 }
9114
9115 /* If the alignment of a DF value causes an SF register to be
9116 skipped, we will use that skipped register for the next SF
9117 value. */
9118 if ((TARGET_HITACHI || ca->renesas_abi)
9119 && ca->free_single_fp_reg
9120 && mode == SFmode)
9121 return gen_rtx_REG (mode, ca->free_single_fp_reg);
9122
9123 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
9124 ^ (mode == SFmode && TARGET_SH4
9125 && TARGET_LITTLE_ENDIAN
9126 && ! TARGET_HITACHI && ! ca->renesas_abi);
9127 return gen_rtx_REG (mode, regno);
9128
9129 }
9130
9131 if (TARGET_SH5)
9132 {
9133 if (mode == VOIDmode && TARGET_SHCOMPACT)
9134 return GEN_INT (ca->call_cookie);
9135
9136 /* The following test assumes unnamed arguments are promoted to
9137 DFmode. */
9138 if (mode == SFmode && ca->free_single_fp_reg)
9139 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9140
9141 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9142 && (named || ! ca->prototype_p)
9143 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9144 {
9145 if (! ca->prototype_p && TARGET_SHMEDIA)
9146 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9147
9148 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9149 FIRST_FP_PARM_REG
9150 + ca->arg_count[(int) SH_ARG_FLOAT]);
9151 }
9152
9153 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9154 && (! TARGET_SHCOMPACT
9155 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9156 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9157 type, named))))
9158 {
9159 return gen_rtx_REG (mode, (FIRST_PARM_REG
9160 + ca->arg_count[(int) SH_ARG_INT]));
9161 }
9162
9163 return NULL_RTX;
9164 }
9165
9166 return NULL_RTX;
9167 }
9168
9169 /* Update the data in CUM to advance over an argument
9170 of mode MODE and data type TYPE.
9171 (TYPE is null for libcalls where that information may not be
9172 available.) */
9173 static void
9174 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
9175 const_tree type, bool named)
9176 {
9177 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9178
9179 if (ca->force_mem)
9180 ca->force_mem = 0;
9181 else if (TARGET_SH5)
9182 {
9183 const_tree type2 = (ca->byref && type
9184 ? TREE_TYPE (type)
9185 : type);
9186 machine_mode mode2 = (ca->byref && type
9187 ? TYPE_MODE (type2)
9188 : mode);
9189 int dwords = ((ca->byref
9190 ? ca->byref
9191 : mode2 == BLKmode
9192 ? int_size_in_bytes (type2)
9193 : GET_MODE_SIZE (mode2)) + 7) / 8;
9194 int numregs = MIN (dwords, NPARM_REGS (SImode)
9195 - ca->arg_count[(int) SH_ARG_INT]);
9196
9197 if (numregs)
9198 {
9199 ca->arg_count[(int) SH_ARG_INT] += numregs;
9200 if (TARGET_SHCOMPACT
9201 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9202 {
9203 ca->call_cookie
9204 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9205 - numregs, 1);
9206 /* N.B. We want this also for outgoing. */
9207 ca->stack_regs += numregs;
9208 }
9209 else if (ca->byref)
9210 {
9211 if (! ca->outgoing)
9212 ca->stack_regs += numregs;
9213 ca->byref_regs += numregs;
9214 ca->byref = 0;
9215 do
9216 ca->call_cookie
9217 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9218 - numregs, 2);
9219 while (--numregs);
9220 ca->call_cookie
9221 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9222 - 1, 1);
9223 }
9224 else if (dwords > numregs)
9225 {
9226 int pushregs = numregs;
9227
9228 if (TARGET_SHCOMPACT)
9229 ca->stack_regs += numregs;
9230 while (pushregs < NPARM_REGS (SImode) - 1
9231 && (CALL_COOKIE_INT_REG_GET
9232 (ca->call_cookie,
9233 NPARM_REGS (SImode) - pushregs)
9234 == 1))
9235 {
9236 ca->call_cookie
9237 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9238 - pushregs, 1);
9239 pushregs++;
9240 }
9241 if (numregs == NPARM_REGS (SImode))
9242 ca->call_cookie
9243 |= CALL_COOKIE_INT_REG (0, 1)
9244 | CALL_COOKIE_STACKSEQ (numregs - 1);
9245 else
9246 ca->call_cookie
9247 |= CALL_COOKIE_STACKSEQ (numregs);
9248 }
9249 }
9250 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9251 && (named || ! ca->prototype_p))
9252 {
9253 if (mode2 == SFmode && ca->free_single_fp_reg)
9254 ca->free_single_fp_reg = 0;
9255 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9256 < NPARM_REGS (SFmode))
9257 {
9258 int numfpregs
9259 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9260 NPARM_REGS (SFmode)
9261 - ca->arg_count[(int) SH_ARG_FLOAT]);
9262
9263 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9264
9265 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9266 {
9267 if (ca->outgoing && numregs > 0)
9268 do
9269 {
9270 ca->call_cookie
9271 |= (CALL_COOKIE_INT_REG
9272 (ca->arg_count[(int) SH_ARG_INT]
9273 - numregs + ((numfpregs - 2) / 2),
9274 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9275 - numfpregs) / 2));
9276 }
9277 while (numfpregs -= 2);
9278 }
9279 else if (mode2 == SFmode && (named)
9280 && (ca->arg_count[(int) SH_ARG_FLOAT]
9281 < NPARM_REGS (SFmode)))
9282 ca->free_single_fp_reg
9283 = FIRST_FP_PARM_REG - numfpregs
9284 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9285 }
9286 }
9287 return;
9288 }
9289
9290 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9291 {
9292 /* Note that we've used the skipped register. */
9293 if (mode == SFmode && ca->free_single_fp_reg)
9294 {
9295 ca->free_single_fp_reg = 0;
9296 return;
9297 }
9298 /* When we have a DF after an SF, there's an SF register that get
9299 skipped in order to align the DF value. We note this skipped
9300 register, because the next SF value will use it, and not the
9301 SF that follows the DF. */
9302 if (mode == DFmode
9303 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9304 {
9305 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9306 + BASE_ARG_REG (mode));
9307 }
9308 }
9309
9310 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9311 || sh_pass_in_reg_p (*ca, mode, type))
9312 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9313 = (sh_round_reg (*ca, mode)
9314 + (mode == BLKmode
9315 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9316 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9317 }
9318
9319 /* The Renesas calling convention doesn't quite fit into this scheme since
9320 the address is passed like an invisible argument, but one that is always
9321 passed in memory. */
9322 static rtx
9323 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9324 {
9325 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9326 return NULL_RTX;
9327 return gen_rtx_REG (Pmode, 2);
9328 }
9329
9330 /* Worker function for TARGET_FUNCTION_VALUE.
9331
9332 For the SH, this is like LIBCALL_VALUE, except that we must change the
9333 mode like PROMOTE_MODE does.
9334 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9335 tested here has to be kept in sync with the one in
9336 explow.c:promote_mode. */
9337 static rtx
9338 sh_function_value (const_tree valtype,
9339 const_tree fn_decl_or_type,
9340 bool outgoing ATTRIBUTE_UNUSED)
9341 {
9342 if (fn_decl_or_type
9343 && !DECL_P (fn_decl_or_type))
9344 fn_decl_or_type = NULL;
9345
9346 return gen_rtx_REG (
9347 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9348 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9349 && (TREE_CODE (valtype) == INTEGER_TYPE
9350 || TREE_CODE (valtype) == ENUMERAL_TYPE
9351 || TREE_CODE (valtype) == BOOLEAN_TYPE
9352 || TREE_CODE (valtype) == REAL_TYPE
9353 || TREE_CODE (valtype) == OFFSET_TYPE))
9354 && sh_promote_prototypes (fn_decl_or_type)
9355 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9356 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9357 }
9358
9359 /* Worker function for TARGET_LIBCALL_VALUE. */
9360 static rtx
9361 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9362 {
9363 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9364 }
9365
9366 /* Return true if N is a possible register number of function value. */
9367 static bool
9368 sh_function_value_regno_p (const unsigned int regno)
9369 {
9370 return ((regno) == FIRST_RET_REG
9371 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9372 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9373 }
9374
9375 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9376 static bool
9377 sh_return_in_memory (const_tree type, const_tree fndecl)
9378 {
9379 if (TARGET_SH5)
9380 {
9381 if (TYPE_MODE (type) == BLKmode)
9382 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9383 else
9384 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9385 }
9386 else
9387 {
9388 return (TYPE_MODE (type) == BLKmode
9389 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9390 && TREE_CODE (type) == RECORD_TYPE));
9391 }
9392 }
9393
9394 /* We actually emit the code in sh_expand_prologue. We used to use
9395 a static variable to flag that we need to emit this code, but that
9396 doesn't when inlining, when functions are deferred and then emitted
9397 later. Fortunately, we already have two flags that are part of struct
9398 function that tell if a function uses varargs or stdarg. */
9399 static void
9400 sh_setup_incoming_varargs (cumulative_args_t ca,
9401 machine_mode mode,
9402 tree type,
9403 int *pretend_arg_size,
9404 int second_time ATTRIBUTE_UNUSED)
9405 {
9406 gcc_assert (cfun->stdarg);
9407 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9408 {
9409 int named_parm_regs, anon_parm_regs;
9410
9411 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9412 + (mode == BLKmode
9413 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9414 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9415 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9416 if (anon_parm_regs > 0)
9417 *pretend_arg_size = anon_parm_regs * 4;
9418 }
9419 }
9420
9421 static bool
9422 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9423 {
9424 return TARGET_SH5;
9425 }
9426
9427 static bool
9428 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9429 {
9430 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9431
9432 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9433 }
9434
9435
9436 /* Define the offset between two registers, one to be eliminated, and
9437 the other its replacement, at the start of a routine. */
9438 int
9439 initial_elimination_offset (int from, int to)
9440 {
9441 int regs_saved;
9442 int regs_saved_rounding = 0;
9443 int total_saved_regs_space;
9444 int total_auto_space;
9445 int save_flags = target_flags;
9446 int copy_flags;
9447 HARD_REG_SET live_regs_mask;
9448
9449 shmedia_space_reserved_for_target_registers = false;
9450 regs_saved = calc_live_regs (&live_regs_mask);
9451 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9452
9453 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9454 {
9455 shmedia_space_reserved_for_target_registers = true;
9456 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9457 }
9458
9459 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9460 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9461 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9462
9463 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9464 copy_flags = target_flags;
9465 target_flags = save_flags;
9466
9467 total_saved_regs_space = regs_saved + regs_saved_rounding;
9468
9469 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9470 return total_saved_regs_space + total_auto_space
9471 + crtl->args.info.byref_regs * 8;
9472
9473 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9474 return total_saved_regs_space + total_auto_space
9475 + crtl->args.info.byref_regs * 8;
9476
9477 /* Initial gap between fp and sp is 0. */
9478 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9479 return 0;
9480
9481 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9482 return rounded_frame_size (0);
9483
9484 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9485 return rounded_frame_size (0);
9486
9487 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9488 && (to == HARD_FRAME_POINTER_REGNUM
9489 || to == STACK_POINTER_REGNUM));
9490 if (TARGET_SH5)
9491 {
9492 int n = total_saved_regs_space;
9493 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9494 save_schedule schedule;
9495 save_entry *entry;
9496
9497 n += total_auto_space;
9498
9499 /* If it wasn't saved, there's not much we can do. */
9500 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9501 return n;
9502
9503 target_flags = copy_flags;
9504
9505 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9506 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9507 if (entry->reg == pr_reg)
9508 {
9509 target_flags = save_flags;
9510 return entry->offset;
9511 }
9512 gcc_unreachable ();
9513 }
9514 else
9515 return total_auto_space;
9516 }
9517
9518 /* Parse the -mfixed-range= option string. */
9519 void
9520 sh_fix_range (const char *const_str)
9521 {
9522 int i, first, last;
9523 char *str, *dash, *comma;
9524
9525 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9526 REG2 are either register names or register numbers. The effect
9527 of this option is to mark the registers in the range from REG1 to
9528 REG2 as ``fixed'' so they won't be used by the compiler. */
9529
9530 i = strlen (const_str);
9531 str = (char *) alloca (i + 1);
9532 memcpy (str, const_str, i + 1);
9533
9534 while (1)
9535 {
9536 dash = strchr (str, '-');
9537 if (!dash)
9538 {
9539 warning (0, "value of -mfixed-range must have form REG1-REG2");
9540 return;
9541 }
9542 *dash = '\0';
9543 comma = strchr (dash + 1, ',');
9544 if (comma)
9545 *comma = '\0';
9546
9547 first = decode_reg_name (str);
9548 if (first < 0)
9549 {
9550 warning (0, "unknown register name: %s", str);
9551 return;
9552 }
9553
9554 last = decode_reg_name (dash + 1);
9555 if (last < 0)
9556 {
9557 warning (0, "unknown register name: %s", dash + 1);
9558 return;
9559 }
9560
9561 *dash = '-';
9562
9563 if (first > last)
9564 {
9565 warning (0, "%s-%s is an empty range", str, dash + 1);
9566 return;
9567 }
9568
9569 for (i = first; i <= last; ++i)
9570 fixed_regs[i] = call_used_regs[i] = 1;
9571
9572 if (!comma)
9573 break;
9574
9575 *comma = ',';
9576 str = comma + 1;
9577 }
9578 }
9579 \f
9580 /* Insert any deferred function attributes from earlier pragmas. */
9581 static void
9582 sh_insert_attributes (tree node, tree *attributes)
9583 {
9584 tree attrs;
9585
9586 if (TREE_CODE (node) != FUNCTION_DECL)
9587 return;
9588
9589 /* We are only interested in fields. */
9590 if (!DECL_P (node))
9591 return;
9592
9593 /* Append the attributes to the deferred attributes. */
9594 *sh_deferred_function_attributes_tail = *attributes;
9595 attrs = sh_deferred_function_attributes;
9596 if (!attrs)
9597 return;
9598
9599 /* Some attributes imply or require the interrupt attribute. */
9600 if (!lookup_attribute ("interrupt_handler", attrs)
9601 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9602 {
9603 /* If we have a trapa_handler, but no interrupt_handler attribute,
9604 insert an interrupt_handler attribute. */
9605 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9606 /* We can't use sh_pr_interrupt here because that's not in the
9607 java frontend. */
9608 attrs
9609 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9610 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9611 if the interrupt attribute is missing, we ignore the attribute
9612 and warn. */
9613 else if (lookup_attribute ("sp_switch", attrs)
9614 || lookup_attribute ("trap_exit", attrs)
9615 || lookup_attribute ("nosave_low_regs", attrs)
9616 || lookup_attribute ("resbank", attrs))
9617 {
9618 tree *tail;
9619
9620 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9621 {
9622 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9623 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9624 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9625 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9626 warning (OPT_Wattributes,
9627 "%qE attribute only applies to interrupt functions",
9628 TREE_PURPOSE (attrs));
9629 else
9630 {
9631 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9632 NULL_TREE);
9633 tail = &TREE_CHAIN (*tail);
9634 }
9635 }
9636 attrs = *attributes;
9637 }
9638 }
9639
9640 /* Install the processed list. */
9641 *attributes = attrs;
9642
9643 /* Clear deferred attributes. */
9644 sh_deferred_function_attributes = NULL_TREE;
9645 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9646
9647 return;
9648 }
9649
9650 /*------------------------------------------------------------------------------
9651 Target specific attributes
9652 Supported attributes are:
9653
9654 * interrupt_handler
9655 Specifies this function is an interrupt handler.
9656
9657 * trapa_handler
9658 Like interrupt_handler, but don't save all registers.
9659
9660 * sp_switch
9661 Specifies an alternate stack for an interrupt handler to run on.
9662
9663 * trap_exit
9664 Use a trapa to exit an interrupt function instead of rte.
9665
9666 * nosave_low_regs
9667 Don't save r0..r7 in an interrupt handler function.
9668 This is useful on SH3* and SH4*, which have a separate set of low
9669 regs for user and privileged modes.
9670 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9671 those that run with interrupts disabled and thus can't be
9672 interrupted thenselves).
9673
9674 * renesas
9675 Use Renesas calling/layout conventions (functions and structures).
9676
9677 * resbank
9678 In case of an interrupt handler function, use a register bank to
9679 save registers R0-R14, MACH, MACL, GBR and PR.
9680 This is available only on SH2A targets.
9681
9682 * function_vector
9683 Declares a function to be called using the TBR relative addressing
9684 mode. Takes an argument that specifies the slot number in the table
9685 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9686 */
9687
9688 /* Handle a 'resbank' attribute. */
9689 static tree
9690 sh_handle_resbank_handler_attribute (tree * node, tree name,
9691 tree args ATTRIBUTE_UNUSED,
9692 int flags ATTRIBUTE_UNUSED,
9693 bool * no_add_attrs)
9694 {
9695 if (!TARGET_SH2A)
9696 {
9697 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9698 name);
9699 *no_add_attrs = true;
9700 }
9701 if (TREE_CODE (*node) != FUNCTION_DECL)
9702 {
9703 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9704 name);
9705 *no_add_attrs = true;
9706 }
9707
9708 return NULL_TREE;
9709 }
9710
9711 /* Handle an "interrupt_handler" attribute; arguments as in
9712 struct attribute_spec.handler. */
9713 static tree
9714 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9715 tree args ATTRIBUTE_UNUSED,
9716 int flags ATTRIBUTE_UNUSED,
9717 bool *no_add_attrs)
9718 {
9719 if (TREE_CODE (*node) != FUNCTION_DECL)
9720 {
9721 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9722 name);
9723 *no_add_attrs = true;
9724 }
9725 else if (TARGET_SHCOMPACT)
9726 {
9727 error ("attribute interrupt_handler is not compatible with -m5-compact");
9728 *no_add_attrs = true;
9729 }
9730
9731 return NULL_TREE;
9732 }
9733
9734 /* Handle an 'function_vector' attribute; arguments as in
9735 struct attribute_spec.handler. */
9736 static tree
9737 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9738 tree args ATTRIBUTE_UNUSED,
9739 int flags ATTRIBUTE_UNUSED,
9740 bool * no_add_attrs)
9741 {
9742 if (!TARGET_SH2A)
9743 {
9744 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9745 name);
9746 *no_add_attrs = true;
9747 }
9748 else if (TREE_CODE (*node) != FUNCTION_DECL)
9749 {
9750 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9751 name);
9752 *no_add_attrs = true;
9753 }
9754 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9755 {
9756 /* The argument must be a constant integer. */
9757 warning (OPT_Wattributes,
9758 "%qE attribute argument not an integer constant",
9759 name);
9760 *no_add_attrs = true;
9761 }
9762 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9763 {
9764 /* The argument value must be between 0 to 255. */
9765 warning (OPT_Wattributes,
9766 "%qE attribute argument should be between 0 to 255",
9767 name);
9768 *no_add_attrs = true;
9769 }
9770 return NULL_TREE;
9771 }
9772
9773 /* Returns true if current function has been assigned the attribute
9774 'function_vector'. */
9775 bool
9776 sh2a_is_function_vector_call (rtx x)
9777 {
9778 if (GET_CODE (x) == SYMBOL_REF
9779 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9780 {
9781 tree tr = SYMBOL_REF_DECL (x);
9782
9783 if (sh2a_function_vector_p (tr))
9784 return true;
9785 }
9786
9787 return false;
9788 }
9789
9790 /* Returns the function vector number, if the attribute
9791 'function_vector' is assigned, otherwise returns zero. */
9792 int
9793 sh2a_get_function_vector_number (rtx x)
9794 {
9795 int num;
9796 tree list, t;
9797
9798 if ((GET_CODE (x) == SYMBOL_REF)
9799 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9800 {
9801 t = SYMBOL_REF_DECL (x);
9802
9803 if (TREE_CODE (t) != FUNCTION_DECL)
9804 return 0;
9805
9806 list = SH_ATTRIBUTES (t);
9807 while (list)
9808 {
9809 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9810 {
9811 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9812 return num;
9813 }
9814
9815 list = TREE_CHAIN (list);
9816 }
9817
9818 return 0;
9819 }
9820 else
9821 return 0;
9822 }
9823
9824 /* Handle an "sp_switch" attribute; arguments as in
9825 struct attribute_spec.handler. */
9826 static tree
9827 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9828 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9829 {
9830 if (TREE_CODE (*node) != FUNCTION_DECL)
9831 {
9832 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9833 name);
9834 *no_add_attrs = true;
9835 }
9836 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9837 {
9838 /* The argument must be a constant string. */
9839 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9840 name);
9841 *no_add_attrs = true;
9842 }
9843
9844 return NULL_TREE;
9845 }
9846
9847 /* Handle an "trap_exit" attribute; arguments as in
9848 struct attribute_spec.handler. */
9849 static tree
9850 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9851 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9852 {
9853 if (TREE_CODE (*node) != FUNCTION_DECL)
9854 {
9855 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9856 name);
9857 *no_add_attrs = true;
9858 }
9859 /* The argument specifies a trap number to be used in a trapa instruction
9860 at function exit (instead of an rte instruction). */
9861 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9862 {
9863 /* The argument must be a constant integer. */
9864 warning (OPT_Wattributes, "%qE attribute argument not an "
9865 "integer constant", name);
9866 *no_add_attrs = true;
9867 }
9868
9869 return NULL_TREE;
9870 }
9871
9872 static tree
9873 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9874 tree name ATTRIBUTE_UNUSED,
9875 tree args ATTRIBUTE_UNUSED,
9876 int flags ATTRIBUTE_UNUSED,
9877 bool *no_add_attrs ATTRIBUTE_UNUSED)
9878 {
9879 return NULL_TREE;
9880 }
9881
9882 /* True if __attribute__((renesas)) or -mrenesas. */
9883 bool
9884 sh_attr_renesas_p (const_tree td)
9885 {
9886 if (TARGET_HITACHI)
9887 return true;
9888 if (td == NULL_TREE)
9889 return false;
9890 if (DECL_P (td))
9891 td = TREE_TYPE (td);
9892 if (td == error_mark_node)
9893 return false;
9894 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9895 != NULL_TREE);
9896 }
9897
9898 /* True if __attribute__((renesas)) or -mrenesas, for the current
9899 function. */
9900 bool
9901 sh_cfun_attr_renesas_p (void)
9902 {
9903 return sh_attr_renesas_p (current_function_decl);
9904 }
9905
9906 /* Returns true if the current function has the "interrupt_handler"
9907 attribute set. */
9908 bool
9909 sh_cfun_interrupt_handler_p (void)
9910 {
9911 return (lookup_attribute ("interrupt_handler",
9912 DECL_ATTRIBUTES (current_function_decl))
9913 != NULL_TREE);
9914 }
9915
9916 /* Returns true if FUNC has been assigned the attribute
9917 "function_vector". */
9918 bool
9919 sh2a_function_vector_p (tree func)
9920 {
9921 tree list;
9922 if (TREE_CODE (func) != FUNCTION_DECL)
9923 return false;
9924
9925 list = SH_ATTRIBUTES (func);
9926 while (list)
9927 {
9928 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9929 return true;
9930
9931 list = TREE_CHAIN (list);
9932 }
9933 return false;
9934 }
9935
9936 /* Returns true if given tree has the "resbank" attribute set. */
9937 bool
9938 sh_cfun_resbank_handler_p (void)
9939 {
9940 return ((lookup_attribute ("resbank",
9941 DECL_ATTRIBUTES (current_function_decl))
9942 != NULL_TREE)
9943 && (lookup_attribute ("interrupt_handler",
9944 DECL_ATTRIBUTES (current_function_decl))
9945 != NULL_TREE) && TARGET_SH2A);
9946 }
9947
9948 /* Returns true if the current function has a "trap_exit" attribute set. */
9949 bool
9950 sh_cfun_trap_exit_p (void)
9951 {
9952 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9953 != NULL_TREE;
9954 }
9955
9956 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9957 static const char *
9958 sh_check_pch_target_flags (int old_flags)
9959 {
9960 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9961 | MASK_SH_E | MASK_HARD_SH4
9962 | MASK_FPU_SINGLE | MASK_SH4))
9963 return _("created and used with different architectures / ABIs");
9964 if ((old_flags ^ target_flags) & MASK_HITACHI)
9965 return _("created and used with different ABIs");
9966 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9967 return _("created and used with different endianness");
9968 return NULL;
9969 }
9970 \f
9971 /* Predicates used by the templates. */
9972
9973 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9974 Used only in general_movsrc_operand. */
9975 bool
9976 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
9977 {
9978 switch (REGNO (op))
9979 {
9980 case PR_REG:
9981 case MACL_REG:
9982 case MACH_REG:
9983 return true;
9984 }
9985 return false;
9986 }
9987
9988 /* Returns true if OP is a floating point value with value 0.0. */
9989 bool
9990 fp_zero_operand (rtx op)
9991 {
9992 REAL_VALUE_TYPE r;
9993
9994 if (GET_MODE (op) != SFmode)
9995 return false;
9996
9997 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9998 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9999 }
10000
10001 /* Returns true if OP is a floating point value with value 1.0. */
10002 bool
10003 fp_one_operand (rtx op)
10004 {
10005 REAL_VALUE_TYPE r;
10006
10007 if (GET_MODE (op) != SFmode)
10008 return false;
10009
10010 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
10011 return REAL_VALUES_EQUAL (r, dconst1);
10012 }
10013
10014 /* Return the TLS type for TLS symbols. */
10015 enum tls_model
10016 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
10017 {
10018 if (GET_CODE (op) != SYMBOL_REF)
10019 return TLS_MODEL_NONE;
10020 return SYMBOL_REF_TLS_MODEL (op);
10021 }
10022 \f
10023 /* Return the destination address of a branch. */
10024 static int
10025 branch_dest (rtx branch)
10026 {
10027 rtx dest = SET_SRC (PATTERN (branch));
10028 int dest_uid;
10029
10030 if (GET_CODE (dest) == IF_THEN_ELSE)
10031 dest = XEXP (dest, 1);
10032 dest = XEXP (dest, 0);
10033 dest_uid = INSN_UID (dest);
10034 return INSN_ADDRESSES (dest_uid);
10035 }
10036 \f
10037 /* Return nonzero if REG is not used after INSN.
10038 We assume REG is a reload reg, and therefore does
10039 not live past labels. It may live past calls or jumps though. */
10040 bool
10041 reg_unused_after (rtx reg, rtx_insn *insn)
10042 {
10043 enum rtx_code code;
10044 rtx set;
10045
10046 /* If the reg is set by this instruction, then it is safe for our
10047 case. Disregard the case where this is a store to memory, since
10048 we are checking a register used in the store address. */
10049 set = single_set (insn);
10050 if (set && !MEM_P (SET_DEST (set))
10051 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10052 return true;
10053
10054 while ((insn = NEXT_INSN (insn)))
10055 {
10056 rtx set;
10057 if (!INSN_P (insn))
10058 continue;
10059
10060 code = GET_CODE (insn);
10061
10062 #if 0
10063 /* If this is a label that existed before reload, then the register
10064 is dead here. However, if this is a label added by reorg, then
10065 the register may still be live here. We can't tell the difference,
10066 so we just ignore labels completely. */
10067 if (code == CODE_LABEL)
10068 return 1;
10069 /* else */
10070 #endif
10071
10072 if (code == JUMP_INSN)
10073 return false;
10074
10075 /* If this is a sequence, we must handle them all at once.
10076 We could have for instance a call that sets the target register,
10077 and an insn in a delay slot that uses the register. In this case,
10078 we must return 0. */
10079 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
10080 {
10081 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
10082 int i;
10083 int retval = 0;
10084
10085 for (i = 0; i < seq->len (); i++)
10086 {
10087 rtx_insn *this_insn = seq->insn (i);
10088 rtx set = single_set (this_insn);
10089
10090 if (CALL_P (this_insn))
10091 code = CALL_INSN;
10092 else if (JUMP_P (this_insn))
10093 {
10094 if (INSN_ANNULLED_BRANCH_P (this_insn))
10095 return false;
10096 code = JUMP_INSN;
10097 }
10098
10099 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10100 return false;
10101 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10102 {
10103 if (!MEM_P (SET_DEST (set)))
10104 retval = true;
10105 else
10106 return false;
10107 }
10108 if (set == NULL_RTX
10109 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
10110 return false;
10111 }
10112 if (retval == 1)
10113 return true;
10114 else if (code == JUMP_INSN)
10115 return false;
10116 }
10117
10118 set = single_set (insn);
10119 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
10120 return false;
10121 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
10122 return !MEM_P (SET_DEST (set));
10123 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10124 return false;
10125
10126 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10127 return true;
10128 }
10129 return true;
10130 }
10131 \f
10132
10133 static GTY(()) rtx t_reg_rtx;
10134 rtx
10135 get_t_reg_rtx (void)
10136 {
10137 if (! t_reg_rtx)
10138 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10139 return t_reg_rtx;
10140 }
10141
10142 static GTY(()) tree fpscr_values;
10143
10144 static void
10145 emit_fpu_switch (rtx scratch, int index)
10146 {
10147 rtx src;
10148
10149 if (fpscr_values == NULL)
10150 {
10151 tree t;
10152
10153 t = build_index_type (integer_one_node);
10154 t = build_array_type (integer_type_node, t);
10155 t = build_decl (BUILTINS_LOCATION,
10156 VAR_DECL, get_identifier ("__fpscr_values"), t);
10157 DECL_ARTIFICIAL (t) = 1;
10158 DECL_IGNORED_P (t) = 1;
10159 DECL_EXTERNAL (t) = 1;
10160 TREE_STATIC (t) = 1;
10161 TREE_PUBLIC (t) = 1;
10162 TREE_USED (t) = 1;
10163
10164 fpscr_values = t;
10165 }
10166
10167 src = DECL_RTL (fpscr_values);
10168 if (!can_create_pseudo_p ())
10169 {
10170 emit_move_insn (scratch, XEXP (src, 0));
10171 if (index != 0)
10172 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10173 src = adjust_automodify_address (src, SImode, scratch, index * 4);
10174 }
10175 else
10176 src = adjust_address (src, SImode, index * 4);
10177
10178 emit_insn (gen_lds_fpscr (src));
10179 }
10180 \f
10181 static rtx get_free_reg (HARD_REG_SET);
10182
10183 /* This function returns a register to use to load the address to load
10184 the fpscr from. Currently it always returns r1 or r7, but when we are
10185 able to use pseudo registers after combine, or have a better mechanism
10186 for choosing a register, it should be done here. */
10187 /* REGS_LIVE is the liveness information for the point for which we
10188 need this allocation. In some bare-bones exit blocks, r1 is live at the
10189 start. We can even have all of r0..r3 being live:
10190 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10191 INSN before which new insns are placed with will clobber the register
10192 we return. If a basic block consists only of setting the return value
10193 register to a pseudo and using that register, the return value is not
10194 live before or after this block, yet we we'll insert our insns right in
10195 the middle. */
10196 static rtx
10197 get_free_reg (HARD_REG_SET regs_live)
10198 {
10199 if (! TEST_HARD_REG_BIT (regs_live, 1))
10200 return gen_rtx_REG (Pmode, 1);
10201
10202 /* Hard reg 1 is live; since this is a small register classes target,
10203 there shouldn't be anything but a jump before the function end. */
10204 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10205 return gen_rtx_REG (Pmode, 7);
10206 }
10207
10208 /* This function will set the fpscr from memory.
10209 MODE is the mode we are setting it to. */
10210 void
10211 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10212 {
10213 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10214 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10215 rtx addr_reg;
10216
10217 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10218 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10219 }
10220
10221 /* Is the given character a logical line separator for the assembler? */
10222 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10223 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10224 #endif
10225
10226 static bool
10227 sequence_insn_p (rtx_insn *insn)
10228 {
10229 rtx_insn *prev, *next;
10230
10231 prev = PREV_INSN (insn);
10232 if (prev == NULL)
10233 return false;
10234
10235 next = NEXT_INSN (prev);
10236 if (next == NULL)
10237 return false;
10238
10239 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10240 }
10241
10242 int
10243 sh_insn_length_adjustment (rtx_insn *insn)
10244 {
10245 /* Instructions with unfilled delay slots take up an extra two bytes for
10246 the nop in the delay slot. */
10247 if (((NONJUMP_INSN_P (insn)
10248 && GET_CODE (PATTERN (insn)) != USE
10249 && GET_CODE (PATTERN (insn)) != CLOBBER)
10250 || CALL_P (insn) || JUMP_P (insn))
10251 && ! sequence_insn_p (insn)
10252 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10253 return 2;
10254
10255 /* Increase the insn length of a cbranch without a delay slot insn to
10256 force a delay slot which will be stuffed with a nop. */
10257 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
10258 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
10259 && ! sequence_insn_p (insn))
10260 return 2;
10261
10262 /* sh-dsp parallel processing insn take four bytes instead of two. */
10263
10264 if (NONJUMP_INSN_P (insn))
10265 {
10266 int sum = 0;
10267 rtx body = PATTERN (insn);
10268 const char *templ;
10269 char c;
10270 bool maybe_label = true;
10271
10272 if (GET_CODE (body) == ASM_INPUT)
10273 templ = XSTR (body, 0);
10274 else if (asm_noperands (body) >= 0)
10275 templ
10276 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10277 else
10278 return 0;
10279 do
10280 {
10281 int ppi_adjust = 0;
10282
10283 do
10284 c = *templ++;
10285 while (c == ' ' || c == '\t');
10286 /* all sh-dsp parallel-processing insns start with p.
10287 The only non-ppi sh insn starting with p is pref.
10288 The only ppi starting with pr is prnd. */
10289 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10290 ppi_adjust = 2;
10291 /* The repeat pseudo-insn expands two three insns, a total of
10292 six bytes in size. */
10293 else if ((c == 'r' || c == 'R')
10294 && ! strncasecmp ("epeat", templ, 5))
10295 ppi_adjust = 4;
10296 while (c && c != '\n'
10297 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10298 {
10299 /* If this is a label, it is obviously not a ppi insn. */
10300 if (c == ':' && maybe_label)
10301 {
10302 ppi_adjust = 0;
10303 break;
10304 }
10305 else if (c == '\'' || c == '"')
10306 maybe_label = false;
10307 c = *templ++;
10308 }
10309 sum += ppi_adjust;
10310 maybe_label = c != ':';
10311 }
10312 while (c);
10313 return sum;
10314 }
10315 return 0;
10316 }
10317 \f
10318 /* Return TRUE for a valid displacement for the REG+disp addressing
10319 with MODE. */
10320 bool
10321 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
10322 bool allow_zero)
10323 {
10324 if (! CONST_INT_P (op))
10325 return false;
10326
10327 if (TARGET_SHMEDIA)
10328 {
10329 int size;
10330
10331 /* Check if this is the address of an unaligned load / store. */
10332 if (mode == VOIDmode)
10333 return satisfies_constraint_I06 (op);
10334
10335 size = GET_MODE_SIZE (mode);
10336 return (!(INTVAL (op) & (size - 1))
10337 && INTVAL (op) >= -512 * size
10338 && INTVAL (op) < 512 * size);
10339 }
10340 else
10341 {
10342 const HOST_WIDE_INT offset = INTVAL (op);
10343 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10344 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10345
10346 /* If the mode does not support any displacement always return false.
10347 Even though an index of '0' is actually always valid, it will cause
10348 troubles when e.g. a DFmode move is split into two SFmode moves,
10349 where one SFmode move will have index '0' and the other move will
10350 have index '4'. */
10351 if (!allow_zero && max_disp < 1)
10352 return false;
10353
10354 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10355 }
10356 }
10357
10358 /* Recognize an RTL expression that is a valid memory address for
10359 an instruction.
10360 The MODE argument is the machine mode for the MEM expression
10361 that wants to use this address.
10362 Allow REG
10363 REG+disp
10364 REG+r0
10365 REG++
10366 --REG
10367 GBR
10368 GBR+disp */
10369 static bool
10370 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10371 {
10372 if (! ALLOW_INDEXED_ADDRESS
10373 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10374 return false;
10375
10376 if (REG_P (x) && REGNO (x) == GBR_REG)
10377 return true;
10378
10379 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10380 return true;
10381 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10382 && ! TARGET_SHMEDIA
10383 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10384 return true;
10385 else if (GET_CODE (x) == PLUS)
10386 {
10387 rtx xop0 = XEXP (x, 0);
10388 rtx xop1 = XEXP (x, 1);
10389
10390 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10391 return gbr_displacement (xop1, mode);
10392
10393 if (GET_MODE_SIZE (mode) <= 8
10394 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10395 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10396 return true;
10397
10398 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10399 || ((xop0 == stack_pointer_rtx
10400 || xop0 == hard_frame_pointer_rtx)
10401 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10402 || ((xop1 == stack_pointer_rtx
10403 || xop1 == hard_frame_pointer_rtx)
10404 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10405 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10406 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10407 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10408 && TARGET_FMOVD && mode == DFmode)))
10409 {
10410 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10411 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10412 return true;
10413 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10414 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10415 return true;
10416 }
10417 }
10418
10419 return false;
10420 }
10421 \f
10422 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10423 isn't protected by a PIC unspec. */
10424 bool
10425 nonpic_symbol_mentioned_p (rtx x)
10426 {
10427 const char *fmt;
10428 int i;
10429
10430 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10431 || GET_CODE (x) == PC)
10432 return true;
10433
10434 /* We don't want to look into the possible MEM location of a
10435 CONST_DOUBLE, since we're not going to use it, in general. */
10436 if (GET_CODE (x) == CONST_DOUBLE)
10437 return false;
10438
10439 if (GET_CODE (x) == UNSPEC
10440 && (XINT (x, 1) == UNSPEC_PIC
10441 || XINT (x, 1) == UNSPEC_GOT
10442 || XINT (x, 1) == UNSPEC_GOTOFF
10443 || XINT (x, 1) == UNSPEC_GOTPLT
10444 || XINT (x, 1) == UNSPEC_GOTTPOFF
10445 || XINT (x, 1) == UNSPEC_DTPOFF
10446 || XINT (x, 1) == UNSPEC_TPOFF
10447 || XINT (x, 1) == UNSPEC_PLT
10448 || XINT (x, 1) == UNSPEC_PCREL
10449 || XINT (x, 1) == UNSPEC_SYMOFF
10450 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10451 return false;
10452
10453 fmt = GET_RTX_FORMAT (GET_CODE (x));
10454 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10455 {
10456 if (fmt[i] == 'E')
10457 {
10458 int j;
10459 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10460 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10461 return true;
10462 }
10463 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10464 return true;
10465 }
10466
10467 return false;
10468 }
10469
10470 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10471 @GOTOFF in `reg'. */
10472 rtx
10473 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
10474 rtx reg)
10475 {
10476 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10477 return orig;
10478
10479 if (GET_CODE (orig) == LABEL_REF
10480 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10481 {
10482 if (reg == NULL_RTX)
10483 reg = gen_reg_rtx (Pmode);
10484
10485 emit_insn (gen_symGOTOFF2reg (reg, orig));
10486 return reg;
10487 }
10488 else if (GET_CODE (orig) == SYMBOL_REF)
10489 {
10490 if (reg == NULL_RTX)
10491 reg = gen_reg_rtx (Pmode);
10492
10493 emit_insn (gen_symGOT2reg (reg, orig));
10494 return reg;
10495 }
10496 return orig;
10497 }
10498
10499 /* Given a (logical) mode size and an offset in bytes, try to find a the
10500 appropriate displacement value for a mov insn. On SH the displacements
10501 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10502 15 bytes in QImode. To compensate this we create a new base address by
10503 adding an adjustment value to it.
10504
10505 If the originally requested offset is greater than 127 we prefer using
10506 values 124..127 over 128..131 to increase opportunities to use the
10507 add #imm, Rn insn.
10508
10509 In some cases it is possible that a requested offset might seem unaligned
10510 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10511 This is compensated by adjusting the base address so that the effective
10512 address of the displacement move insn will be aligned.
10513
10514 This is not the best possible way of rebasing the base address, as it
10515 does not look at other present displacement addressings around it.
10516 In some cases this can create more base address adjustments than would
10517 actually be necessary. */
10518 struct disp_adjust
10519 {
10520 rtx offset_adjust;
10521 rtx mov_disp;
10522 };
10523
10524 static struct disp_adjust
10525 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
10526 {
10527 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10528
10529 /* Do not try to use SH2A's large displacements here, because this would
10530 effectively disable the small displacement insns. */
10531 const int mode_sz = GET_MODE_SIZE (mode);
10532 const int mov_insn_sz = mov_insn_size (mode, false);
10533 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10534 const int max_disp_next = max_disp + mov_insn_sz;
10535 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10536 HOST_WIDE_INT offset_adjust;
10537
10538 /* In some cases this actually does happen and we must check for it. */
10539 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10540 return res;
10541
10542 /* Keeps the previous behavior for QImode displacement addressing.
10543 This just decides how the offset is re-based. Removing this special
10544 case will result in slightly bigger code on average, but it's not that
10545 bad actually. */
10546 if (mov_insn_sz == 1)
10547 align_modifier = 0;
10548
10549 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10550
10551 if (mode_sz + offset - offset_adjust <= max_disp_next)
10552 {
10553 res.offset_adjust = GEN_INT (offset_adjust);
10554 res.mov_disp = GEN_INT (offset - offset_adjust);
10555 }
10556
10557 return res;
10558 }
10559
10560 /* Try to modify an illegitimate address and make it legitimate.
10561 If we find one, return the new, valid address.
10562 Otherwise, return the original address. */
10563 static rtx
10564 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
10565 {
10566 if (flag_pic)
10567 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10568
10569 if (TARGET_SHMEDIA)
10570 return x;
10571
10572 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10573 || (TARGET_SH2E && mode == SFmode))
10574 return x;
10575
10576 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10577 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10578 {
10579 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10580 INTVAL (XEXP (x, 1)));
10581
10582 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10583 {
10584 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10585 adj.offset_adjust, NULL_RTX, 0,
10586 OPTAB_LIB_WIDEN);
10587 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10588 }
10589 }
10590 return x;
10591 }
10592
10593 /* Attempt to replace *p, which is an address that needs reloading, with
10594 a valid memory address for an operand of mode MODE.
10595 Like for sh_legitimize_address, for the SH we try to get a normal form
10596 of the address. That will allow inheritance of the address reloads. */
10597 bool
10598 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
10599 int itype)
10600 {
10601 enum reload_type type = (enum reload_type) itype;
10602 const int mode_sz = GET_MODE_SIZE (mode);
10603
10604 if (sh_lra_p ())
10605 return false;
10606
10607 if (! ALLOW_INDEXED_ADDRESS
10608 && GET_CODE (*p) == PLUS
10609 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10610 {
10611 *p = copy_rtx (*p);
10612 push_reload (*p, NULL_RTX, p, NULL,
10613 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10614 return true;
10615 }
10616
10617 if (! ALLOW_INDEXED_ADDRESS
10618 && GET_CODE (*p) == PLUS
10619 && GET_CODE (XEXP (*p, 0)) == PLUS)
10620 {
10621 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10622 XEXP (XEXP (*p, 0), 1));
10623 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10624 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10625 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10626 return true;
10627 }
10628
10629 if (TARGET_SHMEDIA)
10630 return false;
10631
10632 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10633 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10634 && (ALLOW_INDEXED_ADDRESS
10635 || XEXP (*p, 0) == stack_pointer_rtx
10636 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10637 {
10638 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10639 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10640
10641 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10642 {
10643 push_reload (*p, NULL_RTX, p, NULL,
10644 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10645 return true;
10646 }
10647
10648 if (TARGET_SH2E && mode == SFmode)
10649 {
10650 *p = copy_rtx (*p);
10651 push_reload (*p, NULL_RTX, p, NULL,
10652 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10653 return true;
10654 }
10655
10656 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10657 moves because then reload has a problem figuring the constraint
10658 that the move insn target/source reg must be R0.
10659 Or maybe some handling is wrong in sh_secondary_reload for this
10660 to work properly? */
10661 if ((mode_sz == 4 || mode_sz == 8)
10662 && ! (TARGET_SH4 && mode == DFmode)
10663 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10664 {
10665 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10666 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10667 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10668 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10669 return true;
10670 }
10671 }
10672
10673 /* We must re-recognize what we created before. */
10674 if (GET_CODE (*p) == PLUS
10675 && (mode_sz == 4 || mode_sz == 8)
10676 && GET_CODE (XEXP (*p, 0)) == PLUS
10677 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10678 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10679 && CONST_INT_P (XEXP (*p, 1))
10680 && ! (TARGET_SH2E && mode == SFmode))
10681 {
10682 /* Because this address is so complex, we know it must have
10683 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10684 it is already unshared, and needs no further unsharing. */
10685 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10686 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10687 return true;
10688 }
10689
10690 return false;
10691 }
10692
10693 /* In the name of slightly smaller debug output, and to cater to
10694 general assembler lossage, recognize various UNSPEC sequences
10695 and turn them back into a direct symbol reference. */
10696 static rtx
10697 sh_delegitimize_address (rtx orig_x)
10698 {
10699 rtx x, y;
10700
10701 orig_x = delegitimize_mem_from_attrs (orig_x);
10702
10703 x = orig_x;
10704 if (MEM_P (x))
10705 x = XEXP (x, 0);
10706 if (GET_CODE (x) == CONST)
10707 {
10708 y = XEXP (x, 0);
10709 if (GET_CODE (y) == UNSPEC)
10710 {
10711 if (XINT (y, 1) == UNSPEC_GOT
10712 || XINT (y, 1) == UNSPEC_GOTOFF
10713 || XINT (y, 1) == UNSPEC_SYMOFF)
10714 return XVECEXP (y, 0, 0);
10715 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10716 {
10717 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10718 {
10719 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10720
10721 if (GET_CODE (symplt) == UNSPEC
10722 && (XINT (symplt, 1) == UNSPEC_PLT
10723 || XINT (symplt, 1) == UNSPEC_PCREL))
10724 return XVECEXP (symplt, 0, 0);
10725 }
10726 }
10727 else if (TARGET_SHMEDIA
10728 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10729 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10730 {
10731 rtx offset = XVECEXP (y, 0, 1);
10732
10733 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10734 if (MEM_P (orig_x))
10735 x = replace_equiv_address_nv (orig_x, x);
10736 return x;
10737 }
10738 }
10739 }
10740
10741 return orig_x;
10742 }
10743
10744 /* Mark the use of a constant in the literal table. If the constant
10745 has multiple labels, make it unique. */
10746 static rtx
10747 mark_constant_pool_use (rtx x)
10748 {
10749 rtx_insn *insn, *lab;
10750 rtx pattern;
10751
10752 if (x == NULL_RTX)
10753 return x;
10754
10755 switch (GET_CODE (x))
10756 {
10757 case LABEL_REF:
10758 x = XEXP (x, 0);
10759 case CODE_LABEL:
10760 break;
10761 default:
10762 return x;
10763 }
10764
10765 /* Get the first label in the list of labels for the same constant
10766 and delete another labels in the list. */
10767 lab = as_a <rtx_insn *> (x);
10768 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10769 {
10770 if (!LABEL_P (insn)
10771 || LABEL_REFS (insn) != NEXT_INSN (insn))
10772 break;
10773 lab = insn;
10774 }
10775
10776 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10777 as_a<rtx_insn *> (insn)->set_deleted ();
10778
10779 /* Mark constants in a window. */
10780 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10781 {
10782 if (!NONJUMP_INSN_P (insn))
10783 continue;
10784
10785 pattern = PATTERN (insn);
10786 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10787 continue;
10788
10789 switch (XINT (pattern, 1))
10790 {
10791 case UNSPECV_CONST2:
10792 case UNSPECV_CONST4:
10793 case UNSPECV_CONST8:
10794 XVECEXP (pattern, 0, 1) = const1_rtx;
10795 break;
10796 case UNSPECV_WINDOW_END:
10797 if (XVECEXP (pattern, 0, 0) == x)
10798 return lab;
10799 break;
10800 case UNSPECV_CONST_END:
10801 return lab;
10802 default:
10803 break;
10804 }
10805 }
10806
10807 return lab;
10808 }
10809 \f
10810 /* Return true if it's possible to redirect BRANCH1 to the destination
10811 of an unconditional jump BRANCH2. We only want to do this if the
10812 resulting branch will have a short displacement. */
10813 static bool
10814 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
10815 {
10816 /* Don't follow if BRANCH2 is possible to be a jump crossing between
10817 hot and cold partitions. */
10818 if (TARGET_SH1
10819 && flag_reorder_blocks_and_partition
10820 && simplejump_p (branch2)
10821 && CROSSING_JUMP_P (branch2))
10822 return false;
10823
10824 if (flag_expensive_optimizations && simplejump_p (branch2))
10825 {
10826 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10827 rtx_insn *insn;
10828 int distance;
10829
10830 for (distance = 0, insn = NEXT_INSN (branch1);
10831 insn && distance < 256;
10832 insn = PREV_INSN (insn))
10833 {
10834 if (insn == dest)
10835 return true;
10836 else
10837 distance += get_attr_length (insn);
10838 }
10839 for (distance = 0, insn = NEXT_INSN (branch1);
10840 insn && distance < 256;
10841 insn = NEXT_INSN (insn))
10842 {
10843 if (insn == dest)
10844 return true;
10845 else
10846 distance += get_attr_length (insn);
10847 }
10848 }
10849 return false;
10850 }
10851
10852 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10853 bool
10854 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10855 unsigned int new_reg)
10856 {
10857 /* Interrupt functions can only use registers that have already been
10858 saved by the prologue, even if they would normally be
10859 call-clobbered. */
10860 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10861 return false;
10862
10863 return true;
10864 }
10865
10866 /* Function to update the integer COST
10867 based on the relationship between INSN that is dependent on
10868 DEP_INSN through the dependence LINK. The default is to make no
10869 adjustment to COST. This can be used for example to specify to
10870 the scheduler that an output- or anti-dependence does not incur
10871 the same cost as a data-dependence. The return value should be
10872 the new value for COST. */
10873 static int
10874 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10875 rtx_insn *dep_insn, int cost)
10876 {
10877 rtx reg, use_pat;
10878
10879 if (TARGET_SHMEDIA)
10880 {
10881 /* On SHmedia, if the dependence is an anti-dependence or
10882 output-dependence, there is no cost. */
10883 if (REG_NOTE_KIND (link) != 0)
10884 {
10885 /* However, dependencies between target register loads and
10886 uses of the register in a subsequent block that are separated
10887 by a conditional branch are not modelled - we have to do with
10888 the anti-dependency between the target register load and the
10889 conditional branch that ends the current block. */
10890 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10891 && GET_CODE (PATTERN (dep_insn)) == SET
10892 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10893 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10894 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10895 {
10896 int orig_cost = cost;
10897 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10898 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10899 ? insn : JUMP_LABEL (insn));
10900 /* On the likely path, the branch costs 1, on the unlikely path,
10901 it costs 3. */
10902 cost--;
10903 do
10904 target = next_active_insn (target);
10905 while (target && ! flow_dependent_p (target, dep_insn)
10906 && --cost > 0);
10907 /* If two branches are executed in immediate succession, with the
10908 first branch properly predicted, this causes a stall at the
10909 second branch, hence we won't need the target for the
10910 second branch for two cycles after the launch of the first
10911 branch. */
10912 if (cost > orig_cost - 2)
10913 cost = orig_cost - 2;
10914 }
10915 else
10916 cost = 0;
10917 }
10918
10919 else if (get_attr_is_mac_media (insn)
10920 && get_attr_is_mac_media (dep_insn))
10921 cost = 1;
10922
10923 else if (! reload_completed
10924 && GET_CODE (PATTERN (insn)) == SET
10925 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10926 && GET_CODE (PATTERN (dep_insn)) == SET
10927 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10928 && cost < 4)
10929 cost = 4;
10930 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10931 that is needed at the target. */
10932 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10933 && ! flow_dependent_p (insn, dep_insn))
10934 cost--;
10935 }
10936 else if (REG_NOTE_KIND (link) == 0)
10937 {
10938 enum attr_type type;
10939 rtx dep_set;
10940
10941 if (recog_memoized (insn) < 0
10942 || recog_memoized (dep_insn) < 0)
10943 return cost;
10944
10945 dep_set = single_set (dep_insn);
10946
10947 /* The latency that we specify in the scheduling description refers
10948 to the actual output, not to an auto-increment register; for that,
10949 the latency is one. */
10950 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10951 {
10952 rtx set = single_set (insn);
10953
10954 if (set
10955 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10956 && (!MEM_P (SET_DEST (set))
10957 || !reg_mentioned_p (SET_DEST (dep_set),
10958 XEXP (SET_DEST (set), 0))))
10959 cost = 1;
10960 }
10961 /* The only input for a call that is timing-critical is the
10962 function's address. */
10963 if (CALL_P (insn))
10964 {
10965 rtx call = get_call_rtx_from (insn);
10966 if (call
10967 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10968 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10969 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10970 cost -= TARGET_SH4_300 ? 3 : 6;
10971 }
10972 /* Likewise, the most timing critical input for an sfuncs call
10973 is the function address. However, sfuncs typically start
10974 using their arguments pretty quickly.
10975 Assume a four cycle delay for SH4 before they are needed.
10976 Cached ST40-300 calls are quicker, so assume only a one
10977 cycle delay there.
10978 ??? Maybe we should encode the delays till input registers
10979 are needed by sfuncs into the sfunc call insn. */
10980 /* All sfunc calls are parallels with at least four components.
10981 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10982 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10983 && XVECLEN (PATTERN (insn), 0) >= 4
10984 && (reg = sfunc_uses_reg (insn)))
10985 {
10986 if (! reg_set_p (reg, dep_insn))
10987 cost -= TARGET_SH4_300 ? 1 : 4;
10988 }
10989 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10990 {
10991 enum attr_type dep_type = get_attr_type (dep_insn);
10992
10993 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10994 cost--;
10995 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10996 && (type = get_attr_type (insn)) != TYPE_CALL
10997 && type != TYPE_SFUNC)
10998 cost--;
10999 /* When the preceding instruction loads the shift amount of
11000 the following SHAD/SHLD, the latency of the load is increased
11001 by 1 cycle. */
11002 if (get_attr_type (insn) == TYPE_DYN_SHIFT
11003 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
11004 && reg_overlap_mentioned_p (SET_DEST (dep_set),
11005 XEXP (SET_SRC (single_set (insn)),
11006 1)))
11007 cost++;
11008 /* When an LS group instruction with a latency of less than
11009 3 cycles is followed by a double-precision floating-point
11010 instruction, FIPR, or FTRV, the latency of the first
11011 instruction is increased to 3 cycles. */
11012 else if (cost < 3
11013 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
11014 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
11015 cost = 3;
11016 /* The lsw register of a double-precision computation is ready one
11017 cycle earlier. */
11018 else if (reload_completed
11019 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
11020 && (use_pat = single_set (insn))
11021 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
11022 SET_SRC (use_pat)))
11023 cost -= 1;
11024
11025 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
11026 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
11027 cost -= 1;
11028 }
11029 else if (TARGET_SH4_300)
11030 {
11031 /* Stores need their input register two cycles later. */
11032 if (dep_set && cost >= 1
11033 && ((type = get_attr_type (insn)) == TYPE_STORE
11034 || type == TYPE_PSTORE
11035 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
11036 {
11037 rtx set = single_set (insn);
11038
11039 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
11040 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
11041 {
11042 cost -= 2;
11043 /* But don't reduce the cost below 1 if the address depends
11044 on a side effect of dep_insn. */
11045 if (cost < 1
11046 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
11047 cost = 1;
11048 }
11049 }
11050 }
11051 }
11052 /* An anti-dependence penalty of two applies if the first insn is a double
11053 precision fadd / fsub / fmul. */
11054 else if (!TARGET_SH4_300
11055 && REG_NOTE_KIND (link) == REG_DEP_ANTI
11056 && recog_memoized (dep_insn) >= 0
11057 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
11058 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
11059 /* A lot of alleged anti-flow dependences are fake,
11060 so check this one is real. */
11061 && flow_dependent_p (dep_insn, insn))
11062 cost = 2;
11063
11064 return cost;
11065 }
11066
11067 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
11068 if DEP_INSN is anti-flow dependent on INSN. */
11069 static bool
11070 flow_dependent_p (rtx insn, rtx dep_insn)
11071 {
11072 rtx tmp = PATTERN (insn);
11073
11074 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
11075 return tmp == NULL_RTX;
11076 }
11077
11078 /* A helper function for flow_dependent_p called through note_stores. */
11079 static void
11080 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11081 {
11082 rtx * pinsn = (rtx *) data;
11083
11084 if (*pinsn && reg_referenced_p (x, *pinsn))
11085 *pinsn = NULL_RTX;
11086 }
11087
11088 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11089 'special function' patterns (type sfunc) that clobber pr, but that
11090 do not look like function calls to leaf_function_p. Hence we must
11091 do this extra check. */
11092 static int
11093 sh_pr_n_sets (void)
11094 {
11095 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11096 }
11097
11098 /* Return where to allocate pseudo for a given hard register initial
11099 value. */
11100 static rtx
11101 sh_allocate_initial_value (rtx hard_reg)
11102 {
11103 rtx x;
11104
11105 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11106 {
11107 if (crtl->is_leaf
11108 && ! sh_pr_n_sets ()
11109 && ! (TARGET_SHCOMPACT
11110 && ((crtl->args.info.call_cookie
11111 & ~ CALL_COOKIE_RET_TRAMP (1))
11112 || crtl->saves_all_registers)))
11113 x = hard_reg;
11114 else
11115 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11116 }
11117 else
11118 x = NULL_RTX;
11119
11120 return x;
11121 }
11122
11123 /* This function returns "2" to indicate dual issue for the SH4
11124 processor. To be used by the DFA pipeline description. */
11125 static int
11126 sh_issue_rate (void)
11127 {
11128 if (TARGET_SUPERSCALAR)
11129 return 2;
11130 else
11131 return 1;
11132 }
11133
11134 /* Functions for ready queue reordering for sched1. */
11135
11136 /* Get weight for mode for a set x. */
11137 static short
11138 find_set_regmode_weight (rtx x, machine_mode mode)
11139 {
11140 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11141 return 1;
11142 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11143 {
11144 if (REG_P (SET_DEST (x)))
11145 {
11146 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11147 return 1;
11148 else
11149 return 0;
11150 }
11151 return 1;
11152 }
11153 return 0;
11154 }
11155
11156 /* Get regmode weight for insn. */
11157 static short
11158 find_insn_regmode_weight (rtx insn, machine_mode mode)
11159 {
11160 short reg_weight = 0;
11161 rtx x;
11162
11163 /* Increment weight for each register born here. */
11164 x = PATTERN (insn);
11165 reg_weight += find_set_regmode_weight (x, mode);
11166 if (GET_CODE (x) == PARALLEL)
11167 {
11168 int j;
11169 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11170 {
11171 x = XVECEXP (PATTERN (insn), 0, j);
11172 reg_weight += find_set_regmode_weight (x, mode);
11173 }
11174 }
11175 /* Decrement weight for each register that dies here. */
11176 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11177 {
11178 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11179 {
11180 rtx note = XEXP (x, 0);
11181 if (REG_P (note) && GET_MODE (note) == mode)
11182 reg_weight--;
11183 }
11184 }
11185 return reg_weight;
11186 }
11187
11188 /* Calculate regmode weights for all insns of a basic block. */
11189 static void
11190 find_regmode_weight (basic_block b, machine_mode mode)
11191 {
11192 rtx_insn *insn, *next_tail, *head, *tail;
11193
11194 get_ebb_head_tail (b, b, &head, &tail);
11195 next_tail = NEXT_INSN (tail);
11196
11197 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11198 {
11199 /* Handle register life information. */
11200 if (!INSN_P (insn))
11201 continue;
11202
11203 if (mode == SFmode)
11204 INSN_REGMODE_WEIGHT (insn, mode) =
11205 find_insn_regmode_weight (insn, mode)
11206 + 2 * find_insn_regmode_weight (insn, DFmode);
11207 else if (mode == SImode)
11208 INSN_REGMODE_WEIGHT (insn, mode) =
11209 find_insn_regmode_weight (insn, mode)
11210 + 2 * find_insn_regmode_weight (insn, DImode);
11211 }
11212 }
11213
11214 /* Comparison function for ready queue sorting. */
11215 static int
11216 rank_for_reorder (const void *x, const void *y)
11217 {
11218 rtx_insn *tmp = *(rtx_insn * const *) y;
11219 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11220
11221 /* The insn in a schedule group should be issued the first. */
11222 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11223 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11224
11225 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11226 minimizes instruction movement, thus minimizing sched's effect on
11227 register pressure. */
11228 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11229 }
11230
11231 /* Resort the array A in which only element at index N may be out of order. */
11232 static void
11233 swap_reorder (rtx_insn **a, int n)
11234 {
11235 rtx_insn *insn = a[n - 1];
11236 int i = n - 2;
11237
11238 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11239 {
11240 a[i + 1] = a[i];
11241 i -= 1;
11242 }
11243 a[i + 1] = insn;
11244 }
11245
11246 /* Sort the ready list by ascending priority. */
11247 static void
11248 ready_reorder (rtx_insn **ready, int nready)
11249 {
11250 if (nready == 2)
11251 swap_reorder (ready, nready);
11252 else if (nready > 2)
11253 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11254 }
11255
11256 /* Count life regions of r0 for a block. */
11257 static int
11258 find_r0_life_regions (basic_block b)
11259 {
11260 rtx_insn *end, *insn;
11261 rtx pset;
11262 rtx r0_reg;
11263 int live;
11264 int set;
11265 int death = 0;
11266
11267 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11268 {
11269 set = 1;
11270 live = 1;
11271 }
11272 else
11273 {
11274 set = 0;
11275 live = 0;
11276 }
11277
11278 insn = BB_HEAD (b);
11279 end = BB_END (b);
11280 r0_reg = gen_rtx_REG (SImode, R0_REG);
11281 while (1)
11282 {
11283 if (INSN_P (insn))
11284 {
11285 if (find_regno_note (insn, REG_DEAD, R0_REG))
11286 {
11287 death++;
11288 live = 0;
11289 }
11290 if (!live
11291 && (pset = single_set (insn))
11292 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11293 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11294 {
11295 set++;
11296 live = 1;
11297 }
11298 }
11299 if (insn == end)
11300 break;
11301 insn = NEXT_INSN (insn);
11302 }
11303 return set - death;
11304 }
11305
11306 /* Calculate regmode weights for all insns of all basic block. */
11307 static void
11308 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11309 int verbose ATTRIBUTE_UNUSED,
11310 int old_max_uid)
11311 {
11312 basic_block b;
11313
11314 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11315 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11316 r0_life_regions = 0;
11317
11318 FOR_EACH_BB_REVERSE_FN (b, cfun)
11319 {
11320 find_regmode_weight (b, SImode);
11321 find_regmode_weight (b, SFmode);
11322 if (!reload_completed)
11323 r0_life_regions += find_r0_life_regions (b);
11324 }
11325
11326 CURR_REGMODE_PRESSURE (SImode) = 0;
11327 CURR_REGMODE_PRESSURE (SFmode) = 0;
11328 }
11329
11330 /* Cleanup. */
11331 static void
11332 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11333 int verbose ATTRIBUTE_UNUSED)
11334 {
11335 if (regmode_weight[0])
11336 {
11337 free (regmode_weight[0]);
11338 regmode_weight[0] = NULL;
11339 }
11340 if (regmode_weight[1])
11341 {
11342 free (regmode_weight[1]);
11343 regmode_weight[1] = NULL;
11344 }
11345 }
11346
11347 /* The scalar modes supported differs from the default version in TImode
11348 for 32-bit SHMEDIA. */
11349 static bool
11350 sh_scalar_mode_supported_p (machine_mode mode)
11351 {
11352 if (TARGET_SHMEDIA32 && mode == TImode)
11353 return false;
11354
11355 return default_scalar_mode_supported_p (mode);
11356 }
11357
11358 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11359 keep count of register pressures on SImode and SFmode. */
11360 static int
11361 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11362 int sched_verbose ATTRIBUTE_UNUSED,
11363 rtx_insn *insn,
11364 int can_issue_more)
11365 {
11366 if (GET_CODE (PATTERN (insn)) != USE
11367 && GET_CODE (PATTERN (insn)) != CLOBBER)
11368 cached_can_issue_more = can_issue_more - 1;
11369 else
11370 cached_can_issue_more = can_issue_more;
11371
11372 if (reload_completed)
11373 return cached_can_issue_more;
11374
11375 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11376 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11377
11378 return cached_can_issue_more;
11379 }
11380
11381 static void
11382 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11383 int verbose ATTRIBUTE_UNUSED,
11384 int veclen ATTRIBUTE_UNUSED)
11385 {
11386 CURR_REGMODE_PRESSURE (SImode) = 0;
11387 CURR_REGMODE_PRESSURE (SFmode) = 0;
11388 }
11389
11390 /* Some magic numbers. */
11391 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11392 functions that already have high pressure on r0. */
11393 #define R0_MAX_LIFE_REGIONS 2
11394 /* Register Pressure thresholds for SImode and SFmode registers. */
11395 #define SIMODE_MAX_WEIGHT 5
11396 #define SFMODE_MAX_WEIGHT 10
11397
11398 /* Return true if the pressure is high for MODE. */
11399 static bool
11400 high_pressure (machine_mode mode)
11401 {
11402 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11403 functions that already have high pressure on r0. */
11404 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11405 return true;
11406
11407 if (mode == SFmode)
11408 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11409 else
11410 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11411 }
11412
11413 /* Reorder ready queue if register pressure is high. */
11414 static int
11415 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11416 int sched_verbose ATTRIBUTE_UNUSED,
11417 rtx_insn **ready,
11418 int *n_readyp,
11419 int clock_var ATTRIBUTE_UNUSED)
11420 {
11421 if (reload_completed)
11422 return sh_issue_rate ();
11423
11424 if (high_pressure (SFmode) || high_pressure (SImode))
11425 {
11426 ready_reorder (ready, *n_readyp);
11427 }
11428
11429 return sh_issue_rate ();
11430 }
11431
11432 /* Skip cycles if the current register pressure is high. */
11433 static int
11434 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11435 int sched_verbose ATTRIBUTE_UNUSED,
11436 rtx_insn **ready ATTRIBUTE_UNUSED,
11437 int *n_readyp ATTRIBUTE_UNUSED,
11438 int clock_var ATTRIBUTE_UNUSED)
11439 {
11440 if (reload_completed)
11441 return cached_can_issue_more;
11442
11443 if (high_pressure(SFmode) || high_pressure (SImode))
11444 skip_cycles = 1;
11445
11446 return cached_can_issue_more;
11447 }
11448
11449 /* Skip cycles without sorting the ready queue. This will move insn from
11450 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11451 queue by sh_reorder. */
11452
11453 /* Generally, skipping these many cycles are sufficient for all insns to move
11454 from Q -> R. */
11455 #define MAX_SKIPS 8
11456
11457 static int
11458 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11459 int sched_verbose ATTRIBUTE_UNUSED,
11460 rtx_insn *insn ATTRIBUTE_UNUSED,
11461 int last_clock_var,
11462 int clock_var,
11463 int *sort_p)
11464 {
11465 if (reload_completed)
11466 return 0;
11467
11468 if (skip_cycles)
11469 {
11470 if ((clock_var - last_clock_var) < MAX_SKIPS)
11471 {
11472 *sort_p = 0;
11473 return 1;
11474 }
11475 /* If this is the last cycle we are skipping, allow reordering of R. */
11476 if ((clock_var - last_clock_var) == MAX_SKIPS)
11477 {
11478 *sort_p = 1;
11479 return 1;
11480 }
11481 }
11482
11483 skip_cycles = 0;
11484
11485 return 0;
11486 }
11487
11488 /* SHmedia requires registers for branches, so we can't generate new
11489 branches past reload. */
11490 static bool
11491 sh_cannot_modify_jumps_p (void)
11492 {
11493 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11494 }
11495
11496 static reg_class_t
11497 sh_target_reg_class (void)
11498 {
11499 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11500 }
11501
11502 static bool
11503 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11504 {
11505 if (! shmedia_space_reserved_for_target_registers)
11506 return 0;
11507 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11508 return 0;
11509
11510 HARD_REG_SET dummy;
11511 if (calc_live_regs (&dummy) >= 6 * 8)
11512 return 1;
11513 return 0;
11514 }
11515
11516 static bool
11517 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11518 {
11519 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11520 }
11521 \f
11522 /*
11523 On the SH1..SH4, the trampoline looks like
11524 2 0002 D202 mov.l l2,r2
11525 1 0000 D301 mov.l l1,r3
11526 3 0004 422B jmp @r2
11527 4 0006 0009 nop
11528 5 0008 00000000 l1: .long area
11529 6 000c 00000000 l2: .long function
11530
11531 SH5 (compact) uses r1 instead of r3 for the static chain. */
11532
11533
11534 /* Emit RTL insns to initialize the variable parts of a trampoline.
11535 FNADDR is an RTX for the address of the function's pure code.
11536 CXT is an RTX for the static chain value for the function. */
11537 static void
11538 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11539 {
11540 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11541 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11542
11543 if (TARGET_SHMEDIA64)
11544 {
11545 rtx tramp_templ;
11546 int fixed_len;
11547
11548 rtx movi1 = GEN_INT (0xcc000010);
11549 rtx shori1 = GEN_INT (0xc8000010);
11550 rtx src, dst;
11551
11552 /* The following trampoline works within a +- 128 KB range for cxt:
11553 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11554 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11555 gettr tr1,r1; blink tr0,r63 */
11556 /* Address rounding makes it hard to compute the exact bounds of the
11557 offset for this trampoline, but we have a rather generous offset
11558 range, so frame_offset should do fine as an upper bound. */
11559 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11560 {
11561 /* ??? could optimize this trampoline initialization
11562 by writing DImode words with two insns each. */
11563 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11564 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11565 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11566 insn = gen_rtx_AND (DImode, insn, mask);
11567 /* Or in ptb/u .,tr1 pattern */
11568 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11569 insn = force_operand (insn, NULL_RTX);
11570 insn = gen_lowpart (SImode, insn);
11571 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11572 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11573 insn = gen_rtx_AND (DImode, insn, mask);
11574 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11575 insn = gen_lowpart (SImode, insn);
11576 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11577 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11578 insn = gen_rtx_AND (DImode, insn, mask);
11579 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11580 insn = gen_lowpart (SImode, insn);
11581 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11582 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11583 insn = gen_rtx_AND (DImode, insn, mask);
11584 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11585 insn = gen_lowpart (SImode, insn);
11586 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11587 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11588 insn = gen_rtx_AND (DImode, insn, mask);
11589 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11590 insn = gen_lowpart (SImode, insn);
11591 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11592 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11593 GEN_INT (0x6bf10600));
11594 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11595 GEN_INT (0x4415fc10));
11596 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11597 GEN_INT (0x4401fff0));
11598 emit_insn (gen_ic_invalidate_line (tramp));
11599 return;
11600 }
11601 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11602 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11603
11604 tramp_templ = gen_datalabel_ref (tramp_templ);
11605 dst = tramp_mem;
11606 src = gen_const_mem (BLKmode, tramp_templ);
11607 set_mem_align (dst, 256);
11608 set_mem_align (src, 64);
11609 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11610
11611 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11612 emit_move_insn (adjust_address (tramp_mem, Pmode,
11613 fixed_len + GET_MODE_SIZE (Pmode)),
11614 cxt);
11615 emit_insn (gen_ic_invalidate_line (tramp));
11616 return;
11617 }
11618 else if (TARGET_SHMEDIA)
11619 {
11620 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11621 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11622 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11623 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11624 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11625 rotated 10 right, and higher 16 bit of every 32 selected. */
11626 rtx movishori
11627 = force_reg (V2HImode, (simplify_gen_subreg
11628 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11629 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11630 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11631
11632 fnaddr = force_reg (SImode, fnaddr);
11633 cxt = force_reg (SImode, cxt);
11634 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11635 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11636 movishori));
11637 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11638 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11639 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11640 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11641 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11642 gen_rtx_SUBREG (V2HImode, cxt, 0),
11643 movishori));
11644 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11645 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11646 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11647 if (TARGET_LITTLE_ENDIAN)
11648 {
11649 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11650 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11651 }
11652 else
11653 {
11654 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11655 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11656 }
11657 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11658 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11659 emit_insn (gen_ic_invalidate_line (tramp));
11660 return;
11661 }
11662 else if (TARGET_SHCOMPACT)
11663 {
11664 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11665 return;
11666 }
11667 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11668 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11669 SImode));
11670 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11671 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11672 SImode));
11673 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11674 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11675 if (TARGET_HARD_SH4 || TARGET_SH5)
11676 {
11677 if (!TARGET_INLINE_IC_INVALIDATE
11678 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
11679 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11680 FUNCTION_ORDINARY),
11681 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11682 else
11683 emit_insn (gen_ic_invalidate_line (tramp));
11684 }
11685 }
11686
11687 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11688 static rtx
11689 sh_trampoline_adjust_address (rtx tramp)
11690 {
11691 if (TARGET_SHMEDIA)
11692 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11693 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11694 return tramp;
11695 }
11696
11697 /* FIXME: This is overly conservative. A SHcompact function that
11698 receives arguments ``by reference'' will have them stored in its
11699 own stack frame, so it must not pass pointers or references to
11700 these arguments to other functions by means of sibling calls. */
11701 /* If PIC, we cannot make sibling calls to global functions
11702 because the PLT requires r12 to be live. */
11703 static bool
11704 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11705 {
11706 return (1
11707 && (! TARGET_SHCOMPACT
11708 || crtl->args.info.stack_regs == 0)
11709 && ! sh_cfun_interrupt_handler_p ()
11710 && (! flag_pic
11711 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
11712 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11713 }
11714
11715 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
11716 void
11717 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
11718 {
11719 const_tree decl = SYMBOL_REF_DECL (sym);
11720 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
11721
11722 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
11723 emit_insn (gen_sym_label2reg (reg, sym, lab));
11724 else if (sibcall_p)
11725 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
11726 else
11727 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
11728 }
11729 \f
11730 /* Machine specific built-in functions. */
11731
11732 struct builtin_description
11733 {
11734 bool (* const is_enabled) (void);
11735 const enum insn_code icode;
11736 const char *const name;
11737 int signature;
11738 tree fndecl;
11739 };
11740
11741 static bool
11742 shmedia_builtin_p (void)
11743 {
11744 return TARGET_SHMEDIA;
11745 }
11746
11747 /* This function can be used if there are any built-ins that are not for
11748 SHmedia. It's commented out to avoid the defined-but-unused warning. */
11749 static bool
11750 sh1_builtin_p (void)
11751 {
11752 return TARGET_SH1;
11753 }
11754
11755 /* describe number and signedness of arguments; arg[0] == result
11756 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11757 /* 9: 64-bit pointer, 10: 32-bit pointer */
11758 static const char signature_args[][4] =
11759 {
11760 #define SH_BLTIN_V2SI2 0
11761 { 4, 4 },
11762 #define SH_BLTIN_V4HI2 1
11763 { 4, 4 },
11764 #define SH_BLTIN_V2SI3 2
11765 { 4, 4, 4 },
11766 #define SH_BLTIN_V4HI3 3
11767 { 4, 4, 4 },
11768 #define SH_BLTIN_V8QI3 4
11769 { 4, 4, 4 },
11770 #define SH_BLTIN_MAC_HISI 5
11771 { 1, 4, 4, 1 },
11772 #define SH_BLTIN_SH_HI 6
11773 { 4, 4, 1 },
11774 #define SH_BLTIN_SH_SI 7
11775 { 4, 4, 1 },
11776 #define SH_BLTIN_V4HI2V2SI 8
11777 { 4, 4, 4 },
11778 #define SH_BLTIN_V4HI2V8QI 9
11779 { 4, 4, 4 },
11780 #define SH_BLTIN_SISF 10
11781 { 4, 2 },
11782 #define SH_BLTIN_LDUA_L 11
11783 { 2, 10 },
11784 #define SH_BLTIN_LDUA_Q 12
11785 { 1, 10 },
11786 #define SH_BLTIN_STUA_L 13
11787 { 0, 10, 2 },
11788 #define SH_BLTIN_STUA_Q 14
11789 { 0, 10, 1 },
11790 #define SH_BLTIN_LDUA_L64 15
11791 { 2, 9 },
11792 #define SH_BLTIN_LDUA_Q64 16
11793 { 1, 9 },
11794 #define SH_BLTIN_STUA_L64 17
11795 { 0, 9, 2 },
11796 #define SH_BLTIN_STUA_Q64 18
11797 { 0, 9, 1 },
11798 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11799 #define SH_BLTIN_2 19
11800 #define SH_BLTIN_SU 19
11801 { 1, 2 },
11802 #define SH_BLTIN_3 20
11803 #define SH_BLTIN_SUS 20
11804 { 2, 2, 1 },
11805 #define SH_BLTIN_PSSV 21
11806 { 0, 8, 2, 2 },
11807 #define SH_BLTIN_XXUU 22
11808 #define SH_BLTIN_UUUU 22
11809 { 1, 1, 1, 1 },
11810 #define SH_BLTIN_PV 23
11811 { 0, 8 },
11812 #define SH_BLTIN_VP 24
11813 { 8, 0 },
11814 #define SH_BLTIN_UV 25
11815 { 1, 0 },
11816 #define SH_BLTIN_VU 26
11817 { 0, 1 },
11818 };
11819 /* mcmv: operands considered unsigned. */
11820 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11821 /* mperm: control value considered unsigned int. */
11822 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11823 /* mshards_q: returns signed short. */
11824 /* nsb: takes long long arg, returns unsigned char. */
11825 static struct builtin_description bdesc[] =
11826 {
11827 { shmedia_builtin_p,
11828 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11887 { shmedia_builtin_p,
11888 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11889 { shmedia_builtin_p,
11890 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11891 { shmedia_builtin_p,
11892 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11893 { shmedia_builtin_p,
11894 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11895 { shmedia_builtin_p,
11896 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11897 { shmedia_builtin_p,
11898 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11899 { shmedia_builtin_p,
11900 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11901 { shmedia_builtin_p,
11902 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11903 { shmedia_builtin_p,
11904 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11905 { shmedia_builtin_p,
11906 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11907 { shmedia_builtin_p,
11908 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11909 { shmedia_builtin_p,
11910 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11911 { shmedia_builtin_p,
11912 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11913 { shmedia_builtin_p,
11914 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11915 { shmedia_builtin_p,
11916 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11917 { shmedia_builtin_p,
11918 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11919 { shmedia_builtin_p,
11920 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11921 { shmedia_builtin_p,
11922 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11923 { shmedia_builtin_p,
11924 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11925 { shmedia_builtin_p,
11926 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11927 { shmedia_builtin_p,
11928 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11929 { shmedia_builtin_p,
11930 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11931 { shmedia_builtin_p,
11932 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11933 { shmedia_builtin_p,
11934 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11935 { shmedia_builtin_p,
11936 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11937 { shmedia_builtin_p,
11938 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11939 { shmedia_builtin_p,
11940 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11941 { shmedia_builtin_p,
11942 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11943 { shmedia_builtin_p,
11944 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11945 { shmedia_builtin_p,
11946 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11947 { shmedia_builtin_p,
11948 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11949 { shmedia_builtin_p,
11950 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11951 { shmedia_builtin_p,
11952 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11953 { shmedia_builtin_p,
11954 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11955 { shmedia_builtin_p,
11956 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11957 { shmedia_builtin_p,
11958 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11959 { shmedia_builtin_p,
11960 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11961 { shmedia_builtin_p,
11962 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11963 { shmedia_builtin_p,
11964 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11965 { shmedia_builtin_p,
11966 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11967 { shmedia_builtin_p,
11968 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11969 { shmedia_builtin_p,
11970 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11971 { shmedia_builtin_p,
11972 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11973 { shmedia_builtin_p,
11974 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11975 { shmedia_builtin_p,
11976 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11977 { shmedia_builtin_p,
11978 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11979 { shmedia_builtin_p,
11980 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11981 { shmedia_builtin_p,
11982 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11983 { shmedia_builtin_p,
11984 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11985 { shmedia_builtin_p,
11986 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11987 { shmedia_builtin_p,
11988 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11989 { shmedia_builtin_p,
11990 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11991 { shmedia_builtin_p,
11992 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11993
11994 { sh1_builtin_p,
11995 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
11996 { sh1_builtin_p,
11997 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
11998 };
11999
12000 static tree sh_builtin_get_fpscr;
12001 static tree sh_builtin_set_fpscr;
12002
12003 static void
12004 sh_init_builtins (void)
12005 {
12006 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
12007 memset (shared, 0, sizeof shared);
12008
12009 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
12010 {
12011 builtin_description* d = &bdesc[di];
12012
12013 if (!d->is_enabled ())
12014 continue;
12015
12016 tree type, arg_type = NULL_TREE;
12017 int signature = d->signature;
12018
12019 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
12020 type = shared[signature];
12021 else
12022 {
12023 int has_result = signature_args[signature][0] != 0;
12024 tree args[3];
12025
12026 if ((signature_args[signature][1] & 8)
12027 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
12028 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
12029 continue;
12030 if (! TARGET_FPU_ANY
12031 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
12032 continue;
12033 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
12034 args[i] = NULL_TREE;
12035 for (int i = 3; ; i--)
12036 {
12037 int arg = signature_args[signature][i];
12038 int opno = i - 1 + has_result;
12039
12040 if (arg & 8)
12041 arg_type = ptr_type_node;
12042 else if (arg)
12043 arg_type = (*lang_hooks.types.type_for_mode)
12044 (insn_data[d->icode].operand[opno].mode, (arg & 1));
12045 else if (i)
12046 continue;
12047 else
12048 arg_type = void_type_node;
12049 if (i == 0)
12050 break;
12051 args[i-1] = arg_type;
12052 }
12053 type = build_function_type_list (arg_type, args[0], args[1],
12054 args[2], NULL_TREE);
12055 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
12056 shared[signature] = type;
12057 }
12058 d->fndecl =
12059 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
12060 NULL, NULL_TREE);
12061 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
12062 if (d->icode == CODE_FOR_sts_fpscr)
12063 sh_builtin_get_fpscr = d->fndecl;
12064 else if (d->icode == CODE_FOR_set_fpscr)
12065 sh_builtin_set_fpscr = d->fndecl;
12066 }
12067 }
12068
12069 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
12070
12071 static void
12072 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12073 {
12074 const unsigned SH_FE_INVALID = 64;
12075 const unsigned SH_FE_DIVBYZERO = 32;
12076 const unsigned SH_FE_OVERFLOW = 16;
12077 const unsigned SH_FE_UNDERFLOW = 8;
12078 const unsigned SH_FE_INEXACT = 4;
12079 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
12080 | SH_FE_DIVBYZERO
12081 | SH_FE_OVERFLOW
12082 | SH_FE_UNDERFLOW
12083 | SH_FE_INEXACT);
12084 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
12085 tree fenv_var, mask, ld_fenv, masked_fenv;
12086 tree new_fenv_var, reload_fenv, restore_fnenv;
12087 tree update_call, atomic_feraiseexcept, hold_fnclex;
12088
12089 if (! TARGET_FPU_ANY)
12090 return;
12091
12092 /* Generate the equivalent of :
12093 unsigned int fenv_var;
12094 fenv_var = __builtin_sh_get_fpscr ();
12095
12096 unsigned int masked_fenv;
12097 masked_fenv = fenv_var & mask;
12098
12099 __builtin_sh_set_fpscr (masked_fenv); */
12100
12101 fenv_var = create_tmp_var (unsigned_type_node);
12102 mask = build_int_cst (unsigned_type_node,
12103 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
12104 | SH_FE_ALL_EXCEPT));
12105 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
12106 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
12107 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
12108 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12109 *hold = build2 (COMPOUND_EXPR, void_type_node,
12110 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
12111 hold_fnclex);
12112
12113 /* Store the value of masked_fenv to clear the exceptions:
12114 __builtin_sh_set_fpscr (masked_fenv); */
12115
12116 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
12117
12118 /* Generate the equivalent of :
12119 unsigned int new_fenv_var;
12120 new_fenv_var = __builtin_sh_get_fpscr ();
12121
12122 __builtin_sh_set_fpscr (fenv_var);
12123
12124 __atomic_feraiseexcept (new_fenv_var); */
12125
12126 new_fenv_var = create_tmp_var (unsigned_type_node);
12127 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
12128 build_call_expr (sh_builtin_get_fpscr, 0));
12129 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
12130 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12131 update_call = build_call_expr (atomic_feraiseexcept, 1,
12132 fold_convert (integer_type_node,
12133 new_fenv_var));
12134 *update = build2 (COMPOUND_EXPR, void_type_node,
12135 build2 (COMPOUND_EXPR, void_type_node,
12136 reload_fenv, restore_fnenv), update_call);
12137 }
12138
12139 /* Implements target hook vector_mode_supported_p. */
12140 bool
12141 sh_vector_mode_supported_p (machine_mode mode)
12142 {
12143 if (TARGET_FPU_ANY
12144 && ((mode == V2SFmode)
12145 || (mode == V4SFmode)
12146 || (mode == V16SFmode)))
12147 return true;
12148
12149 else if (TARGET_SHMEDIA
12150 && ((mode == V8QImode)
12151 || (mode == V2HImode)
12152 || (mode == V4HImode)
12153 || (mode == V2SImode)))
12154 return true;
12155
12156 return false;
12157 }
12158
12159 bool
12160 sh_frame_pointer_required (void)
12161 {
12162 /* If needed override this in other tm.h files to cope with various OS
12163 lossage requiring a frame pointer. */
12164 if (SUBTARGET_FRAME_POINTER_REQUIRED)
12165 return true;
12166
12167 if (crtl->profile)
12168 return true;
12169
12170 return false;
12171 }
12172
12173 /* Implements target hook dwarf_calling_convention. Return an enum
12174 of dwarf_calling_convention. */
12175 int
12176 sh_dwarf_calling_convention (const_tree func)
12177 {
12178 if (sh_attr_renesas_p (func))
12179 return DW_CC_GNU_renesas_sh;
12180
12181 return DW_CC_normal;
12182 }
12183
12184 /* Returns the sh builtin decl for CODE. */
12185 static tree
12186 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12187 {
12188 if (code >= ARRAY_SIZE (bdesc))
12189 return error_mark_node;
12190
12191 if (!bdesc[code].is_enabled ())
12192 return error_mark_node;
12193
12194 return bdesc[code].fndecl;
12195 }
12196
12197 /* Expand an expression EXP that calls a built-in function,
12198 with result going to TARGET if that's convenient
12199 (and in mode MODE if that's convenient).
12200 SUBTARGET may be used as the target for computing one of EXP's operands.
12201 IGNORE is nonzero if the value is to be ignored. */
12202 static rtx
12203 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12204 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12205 {
12206 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12207 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12208 const struct builtin_description *d = &bdesc[fcode];
12209 enum insn_code icode = d->icode;
12210 int signature = d->signature;
12211 int nop = 0;
12212 rtx op[4];
12213
12214 if (signature_args[signature][0])
12215 {
12216 if (ignore)
12217 return NULL_RTX;
12218
12219 machine_mode tmode = insn_data[icode].operand[0].mode;
12220 if (! target || GET_MODE (target) != tmode
12221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12222 target = gen_reg_rtx (tmode);
12223 op[nop++] = target;
12224 }
12225 else
12226 target = NULL_RTX;
12227
12228 for (int i = 1; i <= 3; i++, nop++)
12229 {
12230 tree arg;
12231 machine_mode opmode, argmode;
12232 tree optype;
12233
12234 if (! signature_args[signature][i])
12235 break;
12236 arg = CALL_EXPR_ARG (exp, i - 1);
12237 if (arg == error_mark_node)
12238 return const0_rtx;
12239 if (signature_args[signature][i] & 8)
12240 {
12241 opmode = ptr_mode;
12242 optype = ptr_type_node;
12243 }
12244 else
12245 {
12246 opmode = insn_data[icode].operand[nop].mode;
12247 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12248 }
12249 argmode = TYPE_MODE (TREE_TYPE (arg));
12250 if (argmode != opmode)
12251 arg = build1 (NOP_EXPR, optype, arg);
12252 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12253 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12254 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12255 }
12256
12257 rtx pat = NULL_RTX;
12258
12259 switch (nop)
12260 {
12261 case 1:
12262 pat = (*insn_data[d->icode].genfun) (op[0]);
12263 break;
12264 case 2:
12265 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12266 break;
12267 case 3:
12268 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12269 break;
12270 case 4:
12271 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12272 break;
12273 default:
12274 gcc_unreachable ();
12275 }
12276 if (! pat)
12277 return NULL_RTX;
12278 emit_insn (pat);
12279 return target;
12280 }
12281
12282 void
12283 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12284 {
12285 rtx sel0 = const0_rtx;
12286 rtx sel1 = const1_rtx;
12287 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12288 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12289
12290 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12291 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12292 }
12293
12294 void
12295 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12296 {
12297 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12298
12299 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12300 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12301 }
12302
12303 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12304 We can allow any mode in any general register. The special registers
12305 only allow SImode. Don't allow any mode in the PR.
12306
12307 We cannot hold DCmode values in the XD registers because alter_reg
12308 handles subregs of them incorrectly. We could work around this by
12309 spacing the XD registers like the DR registers, but this would require
12310 additional memory in every compilation to hold larger register vectors.
12311 We could hold SFmode / SCmode values in XD registers, but that
12312 would require a tertiary reload when reloading from / to memory,
12313 and a secondary reload to reload from / to general regs; that
12314 seems to be a losing proposition.
12315
12316 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12317 it won't be ferried through GP registers first. */
12318 bool
12319 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
12320 {
12321 if (SPECIAL_REGISTER_P (regno))
12322 return mode == SImode;
12323
12324 if (regno == FPUL_REG)
12325 return (mode == SImode || mode == SFmode);
12326
12327 if (FP_REGISTER_P (regno) && mode == SFmode)
12328 return true;
12329
12330 if (mode == V2SFmode)
12331 {
12332 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12333 || GENERAL_REGISTER_P (regno)))
12334 return true;
12335 else
12336 return false;
12337 }
12338
12339 if (mode == V4SFmode)
12340 {
12341 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12342 || GENERAL_REGISTER_P (regno))
12343 return true;
12344 else
12345 return false;
12346 }
12347
12348 if (mode == V16SFmode)
12349 {
12350 if (TARGET_SHMEDIA)
12351 {
12352 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12353 return true;
12354 else
12355 return false;
12356 }
12357 else
12358 return regno == FIRST_XD_REG;
12359 }
12360
12361 if (FP_REGISTER_P (regno))
12362 {
12363 if (mode == SFmode
12364 || mode == SImode
12365 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12366 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12367 || mode == DCmode
12368 || (TARGET_SHMEDIA
12369 && (mode == DFmode || mode == DImode
12370 || mode == V2SFmode || mode == TImode)))
12371 && ((regno - FIRST_FP_REG) & 1) == 0)
12372 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12373 && ((regno - FIRST_FP_REG) & 3) == 0))
12374 return true;
12375 else
12376 return false;
12377 }
12378
12379 if (XD_REGISTER_P (regno))
12380 return mode == DFmode;
12381
12382 if (TARGET_REGISTER_P (regno))
12383 return (mode == DImode || mode == SImode || mode == PDImode);
12384
12385 if (regno == PR_REG)
12386 return mode == SImode;
12387
12388 if (regno == FPSCR_REG)
12389 return mode == SImode;
12390
12391 /* FIXME. This works around PR target/37633 for -O0. */
12392 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12393 {
12394 unsigned int n = GET_MODE_SIZE (mode) / 8;
12395
12396 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12397 && regno <= FIRST_GENERAL_REG + 14)
12398 return false;
12399 }
12400
12401 return true;
12402 }
12403
12404 /* Specify the modes required to caller save a given hard regno.
12405 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
12406 and returns ?Imode for float regs when sh_hard_regno_mode_ok
12407 permits integer modes on them. That makes LRA's split process
12408 unhappy. See PR55212.
12409 */
12410 machine_mode
12411 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
12412 machine_mode mode)
12413 {
12414 if (FP_REGISTER_P (regno)
12415 && (mode == SFmode
12416 || mode == SCmode
12417 || ((mode == DFmode || mode == DCmode)
12418 && ((regno - FIRST_FP_REG) & 1) == 0)))
12419 return mode;
12420
12421 return choose_hard_reg_mode (regno, nregs, false);
12422 }
12423
12424 /* Return the class of registers for which a mode change from FROM to TO
12425 is invalid. */
12426 bool
12427 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
12428 enum reg_class rclass)
12429 {
12430 /* We want to enable the use of SUBREGs as a means to
12431 VEC_SELECT a single element of a vector. */
12432
12433 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12434 This can be problematic when SFmode vector subregs need to be accessed
12435 on the stack with displacement addressing, as it happens with -O0.
12436 Thus we disallow the mode change for -O0. */
12437 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12438 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12439
12440 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12441 {
12442 if (TARGET_LITTLE_ENDIAN)
12443 {
12444 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12445 return reg_classes_intersect_p (DF_REGS, rclass);
12446 }
12447 else
12448 {
12449 if (GET_MODE_SIZE (from) < 8)
12450 return reg_classes_intersect_p (DF_REGS, rclass);
12451 }
12452 }
12453 return false;
12454 }
12455
12456 /* Return true if registers in machine mode MODE will likely be
12457 allocated to registers in small register classes. */
12458 bool
12459 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
12460 {
12461 return (! TARGET_SHMEDIA);
12462 }
12463
12464 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12465 that label is used. */
12466 void
12467 sh_mark_label (rtx address, int nuses)
12468 {
12469 if (GOTOFF_P (address))
12470 {
12471 /* Extract the label or symbol. */
12472 address = XEXP (address, 0);
12473 if (GET_CODE (address) == PLUS)
12474 address = XEXP (address, 0);
12475 address = XVECEXP (address, 0, 0);
12476 }
12477 if (GET_CODE (address) == LABEL_REF
12478 && LABEL_P (XEXP (address, 0)))
12479 LABEL_NUSES (XEXP (address, 0)) += nuses;
12480 }
12481
12482 /* Compute extra cost of moving data between one register class
12483 and another.
12484
12485 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12486 uses this information. Hence, the general register <-> floating point
12487 register information here is not used for SFmode. */
12488 static int
12489 sh_register_move_cost (machine_mode mode,
12490 reg_class_t srcclass, reg_class_t dstclass)
12491 {
12492 if (dstclass == T_REGS || dstclass == PR_REGS)
12493 return 10;
12494
12495 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12496 return 4;
12497
12498 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12499 && REGCLASS_HAS_FP_REG (srcclass)
12500 && REGCLASS_HAS_FP_REG (dstclass))
12501 return 4;
12502
12503 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12504 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12505
12506 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12507 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12508 return 9;
12509
12510 if ((REGCLASS_HAS_FP_REG (dstclass)
12511 && REGCLASS_HAS_GENERAL_REG (srcclass))
12512 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12513 && REGCLASS_HAS_FP_REG (srcclass)))
12514 {
12515 /* Discourage trying to use fp regs for a pointer. This also
12516 discourages fp regs with SImode because Pmode is an alias
12517 of SImode on this target. See PR target/48596. */
12518 int addend = (mode == Pmode) ? 40 : 0;
12519
12520 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12521 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12522 }
12523
12524 if ((dstclass == FPUL_REGS
12525 && REGCLASS_HAS_GENERAL_REG (srcclass))
12526 || (srcclass == FPUL_REGS
12527 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12528 return 5;
12529
12530 if ((dstclass == FPUL_REGS
12531 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12532 || (srcclass == FPUL_REGS
12533 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12534 return 7;
12535
12536 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12537 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12538 return 20;
12539
12540 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12541 if (TARGET_SHMEDIA
12542 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12543 {
12544 if (sh_gettrcost >= 0)
12545 return sh_gettrcost;
12546 else if (!TARGET_PT_FIXED)
12547 return 100;
12548 }
12549
12550 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12551 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12552 return 4;
12553
12554 if (TARGET_SHMEDIA
12555 || (TARGET_FMOVD
12556 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12557 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12558 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12559
12560 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12561 }
12562
12563 static rtx
12564 emit_load_ptr (rtx reg, rtx addr)
12565 {
12566 rtx mem = gen_const_mem (ptr_mode, addr);
12567
12568 if (Pmode != ptr_mode)
12569 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12570 return emit_move_insn (reg, mem);
12571 }
12572
12573 static void
12574 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12575 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12576 tree function)
12577 {
12578 CUMULATIVE_ARGS cum;
12579 int structure_value_byref = 0;
12580 rtx this_rtx, this_value, sibcall, funexp;
12581 rtx_insn *insns;
12582 tree funtype = TREE_TYPE (function);
12583 int simple_add = CONST_OK_FOR_ADD (delta);
12584 int did_load = 0;
12585 rtx scratch0, scratch1, scratch2;
12586 unsigned i;
12587
12588 reload_completed = 1;
12589 epilogue_completed = 1;
12590 crtl->uses_only_leaf_regs = 1;
12591
12592 emit_note (NOTE_INSN_PROLOGUE_END);
12593
12594 /* Find the "this" pointer. We have such a wide range of ABIs for the
12595 SH that it's best to do this completely machine independently.
12596 "this" is passed as first argument, unless a structure return pointer
12597 comes first, in which case "this" comes second. */
12598 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12599 #ifndef PCC_STATIC_STRUCT_RETURN
12600 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12601 structure_value_byref = 1;
12602 #endif /* not PCC_STATIC_STRUCT_RETURN */
12603 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12604 {
12605 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12606
12607 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12608 }
12609 this_rtx
12610 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12611
12612 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12613 static chain pointer (even if you can't have nested virtual functions
12614 right now, someone might implement them sometime), and the rest of the
12615 registers are used for argument passing, are callee-saved, or reserved. */
12616 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12617 -ffixed-reg has been used. */
12618 if (! call_used_regs[0] || fixed_regs[0])
12619 error ("r0 needs to be available as a call-clobbered register");
12620 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12621 if (! TARGET_SH5)
12622 {
12623 if (call_used_regs[1] && ! fixed_regs[1])
12624 scratch1 = gen_rtx_REG (ptr_mode, 1);
12625 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12626 pointing where to return struct values. */
12627 if (call_used_regs[3] && ! fixed_regs[3])
12628 scratch2 = gen_rtx_REG (Pmode, 3);
12629 }
12630 else if (TARGET_SHMEDIA)
12631 {
12632 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12633 if (i != REGNO (scratch0) &&
12634 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12635 {
12636 scratch1 = gen_rtx_REG (ptr_mode, i);
12637 break;
12638 }
12639 if (scratch1 == scratch0)
12640 error ("need a second call-clobbered general purpose register");
12641 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12642 if (call_used_regs[i] && ! fixed_regs[i])
12643 {
12644 scratch2 = gen_rtx_REG (Pmode, i);
12645 break;
12646 }
12647 if (scratch2 == scratch0)
12648 error ("need a call-clobbered target register");
12649 }
12650
12651 this_value = plus_constant (Pmode, this_rtx, delta);
12652 if (vcall_offset
12653 && (simple_add || scratch0 != scratch1)
12654 && strict_memory_address_p (ptr_mode, this_value))
12655 {
12656 emit_load_ptr (scratch0, this_value);
12657 did_load = 1;
12658 }
12659
12660 if (!delta)
12661 ; /* Do nothing. */
12662 else if (simple_add)
12663 emit_move_insn (this_rtx, this_value);
12664 else
12665 {
12666 emit_move_insn (scratch1, GEN_INT (delta));
12667 emit_insn (gen_add2_insn (this_rtx, scratch1));
12668 }
12669
12670 if (vcall_offset)
12671 {
12672 rtx offset_addr;
12673
12674 if (!did_load)
12675 emit_load_ptr (scratch0, this_rtx);
12676
12677 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12678 if (strict_memory_address_p (ptr_mode, offset_addr))
12679 ; /* Do nothing. */
12680 else if (! TARGET_SH5 && scratch0 != scratch1)
12681 {
12682 /* scratch0 != scratch1, and we have indexed loads. Get better
12683 schedule by loading the offset into r1 and using an indexed
12684 load - then the load of r1 can issue before the load from
12685 (this_rtx + delta) finishes. */
12686 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12687 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12688 }
12689 else if (CONST_OK_FOR_ADD (vcall_offset))
12690 {
12691 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12692 offset_addr = scratch0;
12693 }
12694 else if (scratch0 != scratch1)
12695 {
12696 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12697 emit_insn (gen_add2_insn (scratch0, scratch1));
12698 offset_addr = scratch0;
12699 }
12700 else
12701 gcc_unreachable (); /* FIXME */
12702 emit_load_ptr (scratch0, offset_addr);
12703
12704 if (Pmode != ptr_mode)
12705 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12706 emit_insn (gen_add2_insn (this_rtx, scratch0));
12707 }
12708
12709 /* Generate a tail call to the target function. */
12710 if (! TREE_USED (function))
12711 {
12712 assemble_external (function);
12713 TREE_USED (function) = 1;
12714 }
12715 funexp = XEXP (DECL_RTL (function), 0);
12716 /* If the function is overridden, so is the thunk, hence we don't
12717 need GOT addressing even if this is a public symbol. */
12718 #if 0
12719 if (TARGET_SH1 && ! flag_weak)
12720 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12721 else
12722 #endif
12723 if (TARGET_SH2 && flag_pic)
12724 {
12725 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12726 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12727 }
12728 else
12729 {
12730 if (TARGET_SHMEDIA && flag_pic)
12731 {
12732 funexp = gen_sym2PIC (funexp);
12733 PUT_MODE (funexp, Pmode);
12734 }
12735 emit_move_insn (scratch2, funexp);
12736 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12737 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12738 }
12739 sibcall = emit_call_insn (sibcall);
12740 SIBLING_CALL_P (sibcall) = 1;
12741 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12742 emit_barrier ();
12743
12744 /* Run just enough of rest_of_compilation to do scheduling and get
12745 the insns emitted. Note that use_thunk calls
12746 assemble_start_function and assemble_end_function. */
12747
12748 insns = get_insns ();
12749
12750 if (optimize > 0)
12751 {
12752 if (! cfun->cfg)
12753 init_flow (cfun);
12754 split_all_insns_noflow ();
12755 }
12756
12757 sh_reorg ();
12758 shorten_branches (insns);
12759 final_start_function (insns, file, 1);
12760 final (insns, file, 1);
12761 final_end_function ();
12762
12763 reload_completed = 0;
12764 epilogue_completed = 0;
12765 }
12766
12767 rtx
12768 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12769 {
12770 rtx sym;
12771
12772 /* If this is not an ordinary function, the name usually comes from a
12773 string literal or an sprintf buffer. Make sure we use the same
12774 string consistently, so that cse will be able to unify address loads. */
12775 if (kind != FUNCTION_ORDINARY)
12776 name = IDENTIFIER_POINTER (get_identifier (name));
12777 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12778 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12779 if (flag_pic)
12780 switch (kind)
12781 {
12782 case FUNCTION_ORDINARY:
12783 break;
12784 case SFUNC_GOT:
12785 {
12786 rtx reg = target ? target : gen_reg_rtx (Pmode);
12787
12788 emit_insn (gen_symGOT2reg (reg, sym));
12789 sym = reg;
12790 break;
12791 }
12792 case SFUNC_STATIC:
12793 {
12794 /* ??? To allow cse to work, we use GOTOFF relocations.
12795 We could add combiner patterns to transform this into
12796 straight pc-relative calls with sym2PIC / bsrf when
12797 label load and function call are still 1:1 and in the
12798 same basic block during combine. */
12799 rtx reg = target ? target : gen_reg_rtx (Pmode);
12800
12801 emit_insn (gen_symGOTOFF2reg (reg, sym));
12802 sym = reg;
12803 break;
12804 }
12805 }
12806 if (target && sym != target)
12807 {
12808 emit_move_insn (target, sym);
12809 return target;
12810 }
12811 return sym;
12812 }
12813
12814 /* Find the number of a general purpose register in S. */
12815 static int
12816 scavenge_reg (HARD_REG_SET *s)
12817 {
12818 int r;
12819 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12820 if (TEST_HARD_REG_BIT (*s, r))
12821 return r;
12822 return -1;
12823 }
12824
12825 rtx
12826 sh_get_pr_initial_val (void)
12827 {
12828 rtx val;
12829
12830 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12831 PR register on SHcompact, because it might be clobbered by the prologue.
12832 We check first if that is known to be the case. */
12833 if (TARGET_SHCOMPACT
12834 && ((crtl->args.info.call_cookie
12835 & ~ CALL_COOKIE_RET_TRAMP (1))
12836 || crtl->saves_all_registers))
12837 return gen_frame_mem (SImode, return_address_pointer_rtx);
12838
12839 /* If we haven't finished rtl generation, there might be a nonlocal label
12840 that we haven't seen yet.
12841 ??? get_hard_reg_initial_val fails if it is called after register
12842 allocation has started, unless it has been called before for the
12843 same register. And even then, we end in trouble if we didn't use
12844 the register in the same basic block before. So call
12845 get_hard_reg_initial_val now and wrap it in an unspec if we might
12846 need to replace it. */
12847 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12848 combine can put the pseudo returned by get_hard_reg_initial_val into
12849 instructions that need a general purpose registers, which will fail to
12850 be recognized when the pseudo becomes allocated to PR. */
12851 val
12852 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12853 if (TARGET_SH1)
12854 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12855 return val;
12856 }
12857
12858 bool
12859 sh_expand_t_scc (rtx operands[])
12860 {
12861 enum rtx_code code = GET_CODE (operands[1]);
12862 rtx target = operands[0];
12863 rtx op0 = operands[2];
12864 rtx op1 = operands[3];
12865 rtx result = target;
12866 HOST_WIDE_INT val;
12867
12868 if (!REG_P (op0) || REGNO (op0) != T_REG
12869 || !CONST_INT_P (op1))
12870 return false;
12871 if (!REG_P (result))
12872 result = gen_reg_rtx (SImode);
12873 val = INTVAL (op1);
12874 if ((code == EQ && val == 1) || (code == NE && val == 0))
12875 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12876 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12877 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12878 else if (code == EQ || code == NE)
12879 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12880 else
12881 return false;
12882 if (result != target)
12883 emit_move_insn (target, result);
12884 return true;
12885 }
12886
12887 /* INSN is an sfunc; return the rtx that describes the address used. */
12888 static rtx
12889 extract_sfunc_addr (rtx insn)
12890 {
12891 rtx pattern, part = NULL_RTX;
12892 int len, i;
12893
12894 pattern = PATTERN (insn);
12895 len = XVECLEN (pattern, 0);
12896 for (i = 0; i < len; i++)
12897 {
12898 part = XVECEXP (pattern, 0, i);
12899 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12900 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12901 return XEXP (part, 0);
12902 }
12903 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12904 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12905 }
12906
12907 /* Verify that the register in use_sfunc_addr still agrees with the address
12908 used in the sfunc. This prevents fill_slots_from_thread from changing
12909 use_sfunc_addr.
12910 INSN is the use_sfunc_addr instruction, and REG is the register it
12911 guards. */
12912 bool
12913 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12914 {
12915 /* Search for the sfunc. It should really come right after INSN. */
12916 while ((insn = NEXT_INSN (insn)))
12917 {
12918 if (LABEL_P (insn) || JUMP_P (insn))
12919 break;
12920 if (! INSN_P (insn))
12921 continue;
12922
12923 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12924 insn = seq->insn (0);
12925 if (GET_CODE (PATTERN (insn)) != PARALLEL
12926 || get_attr_type (insn) != TYPE_SFUNC)
12927 continue;
12928 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12929 }
12930 gcc_unreachable ();
12931 }
12932
12933 /* This function returns a constant rtx that represents 2**15 / pi in
12934 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12935 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12936 static GTY(()) rtx sh_fsca_sf2int_rtx;
12937
12938 rtx
12939 sh_fsca_sf2int (void)
12940 {
12941 if (! sh_fsca_sf2int_rtx)
12942 {
12943 REAL_VALUE_TYPE rv;
12944
12945 real_from_string (&rv, "10430.378350470453");
12946 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12947 }
12948
12949 return sh_fsca_sf2int_rtx;
12950 }
12951
12952 /* This function returns a constant rtx that represents pi / 2**15 in
12953 SFmode. It's used to scale SFmode angles, in radians, to a
12954 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12955 maps to 0x10000. */
12956 static GTY(()) rtx sh_fsca_int2sf_rtx;
12957
12958 rtx
12959 sh_fsca_int2sf (void)
12960 {
12961 if (! sh_fsca_int2sf_rtx)
12962 {
12963 REAL_VALUE_TYPE rv;
12964
12965 real_from_string (&rv, "9.587379924285257e-5");
12966 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12967 }
12968
12969 return sh_fsca_int2sf_rtx;
12970 }
12971
12972 /* Initialize the CUMULATIVE_ARGS structure. */
12973 void
12974 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12975 tree fntype,
12976 rtx libname ATTRIBUTE_UNUSED,
12977 tree fndecl,
12978 signed int n_named_args,
12979 machine_mode mode)
12980 {
12981 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12982 pcum->free_single_fp_reg = 0;
12983 pcum->stack_regs = 0;
12984 pcum->byref_regs = 0;
12985 pcum->byref = 0;
12986 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12987
12988 /* XXX - Should we check TARGET_HITACHI here ??? */
12989 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12990
12991 if (fntype)
12992 {
12993 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12994 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12995 pcum->prototype_p = prototype_p (fntype);
12996 pcum->arg_count [(int) SH_ARG_INT]
12997 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12998
12999 pcum->call_cookie
13000 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13001 && pcum->arg_count [(int) SH_ARG_INT] == 0
13002 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
13003 ? int_size_in_bytes (TREE_TYPE (fntype))
13004 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
13005 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
13006 == FIRST_RET_REG));
13007 }
13008 else
13009 {
13010 pcum->arg_count [(int) SH_ARG_INT] = 0;
13011 pcum->prototype_p = FALSE;
13012 if (mode != VOIDmode)
13013 {
13014 pcum->call_cookie =
13015 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
13016 && GET_MODE_SIZE (mode) > 4
13017 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
13018
13019 /* If the default ABI is the Renesas ABI then all library
13020 calls must assume that the library will be using the
13021 Renesas ABI. So if the function would return its result
13022 in memory then we must force the address of this memory
13023 block onto the stack. Ideally we would like to call
13024 targetm.calls.return_in_memory() here but we do not have
13025 the TYPE or the FNDECL available so we synthesize the
13026 contents of that function as best we can. */
13027 pcum->force_mem =
13028 (TARGET_DEFAULT & MASK_HITACHI)
13029 && (mode == BLKmode
13030 || (GET_MODE_SIZE (mode) > 4
13031 && !(mode == DFmode
13032 && TARGET_FPU_DOUBLE)));
13033 }
13034 else
13035 {
13036 pcum->call_cookie = 0;
13037 pcum->force_mem = FALSE;
13038 }
13039 }
13040 }
13041
13042 rtx
13043 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
13044 {
13045 enum rtx_code code = TRUNCATE;
13046
13047 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
13048 {
13049 rtx inner = XEXP (x, 0);
13050 machine_mode inner_mode = GET_MODE (inner);
13051
13052 if (inner_mode == mode)
13053 return inner;
13054 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
13055 x = inner;
13056 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
13057 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13058 {
13059 code = GET_CODE (x);
13060 x = inner;
13061 }
13062 }
13063 return gen_rtx_fmt_e (code, mode, x);
13064 }
13065
13066 /* Look through X cleaning up truncates of registers that span multiple
13067 actual hard registers. Return the number of changes made. */
13068 int
13069 shmedia_cleanup_truncate (rtx x)
13070 {
13071 int n_changes = 0;
13072 subrtx_var_iterator::array_type array;
13073 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
13074 {
13075 rtx x = *iter;
13076 if (GET_CODE (x) == TRUNCATE)
13077 {
13078 rtx reg = XEXP (x, 0);
13079 machine_mode reg_mode = GET_MODE (reg);
13080 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8)
13081 {
13082 int offset = subreg_lowpart_offset (DImode, reg_mode);
13083 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset);
13084 n_changes += 1;
13085 iter.skip_subrtxes ();
13086 }
13087 }
13088 }
13089 return n_changes;
13090 }
13091
13092 /* Load and store depend on the highpart of the address. However,
13093 set_attr_alternative does not give well-defined results before reload,
13094 so we must look at the rtl ourselves to see if any of the feeding
13095 registers is used in a memref.
13096
13097 Return true iff INSN contains a MEM. */
13098 bool
13099 sh_contains_memref_p (rtx insn)
13100 {
13101 subrtx_iterator::array_type array;
13102 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13103 if (MEM_P (*iter))
13104 return true;
13105 return false;
13106 }
13107
13108 /* Return true iff INSN loads a banked register. */
13109 bool
13110 sh_loads_bankedreg_p (rtx insn)
13111 {
13112 if (GET_CODE (PATTERN (insn)) == SET)
13113 {
13114 rtx op = SET_DEST (PATTERN(insn));
13115 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13116 return true;
13117 }
13118
13119 return false;
13120 }
13121
13122 /* FNADDR is the MEM expression from a call expander. Return an address
13123 to use in an SHmedia insn pattern. */
13124 rtx
13125 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13126 {
13127 int is_sym;
13128
13129 fnaddr = XEXP (fnaddr, 0);
13130 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13131 if (flag_pic && is_sym)
13132 {
13133 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13134 {
13135 rtx reg = gen_reg_rtx (Pmode);
13136
13137 /* We must not use GOTPLT for sibcalls, because PIC_REG
13138 must be restored before the PLT code gets to run. */
13139 if (is_sibcall)
13140 emit_insn (gen_symGOT2reg (reg, fnaddr));
13141 else
13142 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13143 fnaddr = reg;
13144 }
13145 else
13146 {
13147 fnaddr = gen_sym2PIC (fnaddr);
13148 PUT_MODE (fnaddr, Pmode);
13149 }
13150 }
13151 /* If ptabs might trap, make this visible to the rest of the compiler.
13152 We generally assume that symbols pertain to valid locations, but
13153 it is possible to generate invalid symbols with asm or linker tricks.
13154 In a list of functions where each returns its successor, an invalid
13155 symbol might denote an empty list. */
13156 if (!TARGET_PT_FIXED
13157 && (!is_sym || TARGET_INVALID_SYMBOLS)
13158 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13159 {
13160 rtx tr = gen_reg_rtx (PDImode);
13161
13162 emit_insn (gen_ptabs (tr, fnaddr));
13163 fnaddr = tr;
13164 }
13165 else if (! target_reg_operand (fnaddr, Pmode))
13166 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13167 return fnaddr;
13168 }
13169
13170 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13171 static reg_class_t
13172 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13173 {
13174 if (rclass == NO_REGS
13175 && TARGET_SHMEDIA
13176 && (CONST_DOUBLE_P (x)
13177 || GET_CODE (x) == SYMBOL_REF
13178 || PIC_ADDR_P (x)))
13179 return GENERAL_REGS;
13180
13181 return rclass;
13182 }
13183
13184 /* Implement TARGET_SECONDARY_RELOAD. */
13185 static reg_class_t
13186 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13187 machine_mode mode, secondary_reload_info *sri)
13188 {
13189 enum reg_class rclass = (enum reg_class) rclass_i;
13190
13191 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13192 && REG_P (XEXP (XEXP (x, 0), 0))
13193 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13194 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13195
13196 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13197 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13198
13199 if (REG_P (x) && REGNO (x) == GBR_REG)
13200 return NO_REGS;
13201
13202 if (in_p)
13203 {
13204 if (REGCLASS_HAS_FP_REG (rclass)
13205 && ! TARGET_SHMEDIA
13206 && immediate_operand ((x), mode)
13207 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
13208 switch (mode)
13209 {
13210 case SFmode:
13211 sri->icode = CODE_FOR_reload_insf__frn;
13212 return NO_REGS;
13213 case DFmode:
13214 sri->icode = CODE_FOR_reload_indf__frn;
13215 return NO_REGS;
13216 case SImode:
13217 /* ??? If we knew that we are in the appropriate mode -
13218 single precision - we could use a reload pattern directly. */
13219 return FPUL_REGS;
13220 default:
13221 abort ();
13222 }
13223 if (rclass == FPUL_REGS
13224 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13225 || REGNO (x) == T_REG))
13226 || GET_CODE (x) == PLUS))
13227 return GENERAL_REGS;
13228 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13229 {
13230 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13231 return GENERAL_REGS;
13232 else if (mode == SFmode)
13233 return FP_REGS;
13234 sri->icode = CODE_FOR_reload_insi__i_fpul;
13235 return NO_REGS;
13236 }
13237 if (rclass == FPSCR_REGS
13238 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13239 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13240 return GENERAL_REGS;
13241 if (REGCLASS_HAS_FP_REG (rclass)
13242 && TARGET_SHMEDIA
13243 && immediate_operand (x, mode)
13244 && x != CONST0_RTX (GET_MODE (x))
13245 && GET_MODE (x) != V4SFmode)
13246 return GENERAL_REGS;
13247 if ((mode == QImode || mode == HImode)
13248 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13249 {
13250 sri->icode = ((mode == QImode)
13251 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13252 return NO_REGS;
13253 }
13254 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13255 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13256 return TARGET_REGS;
13257 } /* end of input-only processing. */
13258
13259 if (((REGCLASS_HAS_FP_REG (rclass)
13260 && (REG_P (x)
13261 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13262 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13263 && TARGET_FMOVD))))
13264 || (REGCLASS_HAS_GENERAL_REG (rclass)
13265 && REG_P (x)
13266 && FP_REGISTER_P (REGNO (x))))
13267 && ! TARGET_SHMEDIA
13268 && (mode == SFmode || mode == SImode))
13269 return FPUL_REGS;
13270 if ((rclass == FPUL_REGS
13271 || (REGCLASS_HAS_FP_REG (rclass)
13272 && ! TARGET_SHMEDIA && mode == SImode))
13273 && (MEM_P (x)
13274 || (REG_P (x)
13275 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13276 || REGNO (x) == T_REG
13277 || system_reg_operand (x, VOIDmode)))))
13278 {
13279 if (rclass == FPUL_REGS)
13280 return GENERAL_REGS;
13281 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
13282 }
13283 if ((rclass == TARGET_REGS
13284 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13285 && !satisfies_constraint_Csy (x)
13286 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13287 return GENERAL_REGS;
13288 if ((rclass == MAC_REGS || rclass == PR_REGS)
13289 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13290 && rclass != REGNO_REG_CLASS (REGNO (x)))
13291 return GENERAL_REGS;
13292 if (rclass != GENERAL_REGS && REG_P (x)
13293 && TARGET_REGISTER_P (REGNO (x)))
13294 return GENERAL_REGS;
13295
13296 /* If here fall back to loading FPUL register through general registers.
13297 This case can happen when movsi_ie insn is picked initially to
13298 load/store the FPUL register from/to another register, and then the
13299 other register is allocated on the stack. */
13300 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13301 return GENERAL_REGS;
13302
13303 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13304 the other operand.
13305 On SH2A could also just leave it alone here, which would result in a
13306 4 byte move insn being generated instead. However, for this to work
13307 the insns must have the appropriate alternatives. */
13308 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13309 && satisfies_constraint_Sdd (x)
13310 && sh_disp_addr_displacement (x)
13311 <= sh_max_mov_insn_displacement (mode, false))
13312 return R0_REGS;
13313
13314 /* When reload is trying to address a QImode or HImode subreg on the stack,
13315 force any subreg byte into R0_REGS, as this is going to become a
13316 displacement address.
13317 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13318 is on the stack, the memref to it might already require a displacement
13319 and that has to be added to the final address. At this point we don't
13320 know the cumulative displacement so we assume the worst case. */
13321 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13322 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13323 return R0_REGS;
13324
13325 return NO_REGS;
13326 }
13327
13328 /* Return true if SUBST can't safely replace its equivalent during RA. */
13329 static bool
13330 sh_cannot_substitute_mem_equiv_p (rtx)
13331 {
13332 if (TARGET_SHMEDIA)
13333 return false;
13334
13335 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
13336 uses R0 and may cause spill failure when R0 is already used.
13337 We have to return true for that case at least.
13338 Moreover SH has strong R0 parity and also have not enough numbers of
13339 the hard registers to make the equiv substitution win in the size
13340 and the speed on average working sets. The pseudos produced to
13341 hold the equiv values can't get good hard registers for bad cases
13342 and end up memory save/restore insns which make the code worse. */
13343 return true;
13344 }
13345
13346 /* Return true if DISP can be legitimized. */
13347 static bool
13348 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
13349 machine_mode mode)
13350 {
13351 if (TARGET_SHMEDIA)
13352 return false;
13353
13354 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
13355 || (TARGET_SH2E && mode == SFmode))
13356 return false;
13357
13358 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
13359 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
13360 {
13361 *disp = adj.mov_disp;
13362 *offs = adj.offset_adjust;
13363 return true;
13364 }
13365
13366 return false;
13367 }
13368
13369 /* Return true if movsf insn should be splited with an additional
13370 register. */
13371 bool
13372 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
13373 {
13374 /* op0 == op1 */
13375 if (rtx_equal_p (op0, op1))
13376 return true;
13377 /* fy, FQ, reg */
13378 if (GET_CODE (op1) == CONST_DOUBLE
13379 && ! satisfies_constraint_G (op1)
13380 && ! satisfies_constraint_H (op1)
13381 && REG_P (op0)
13382 && REG_P (op2))
13383 return true;
13384 /* f, r, y */
13385 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
13386 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
13387 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13388 return true;
13389 /* r, f, y */
13390 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
13391 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
13392 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
13393 return true;
13394
13395 return false;
13396 }
13397
13398 static void
13399 sh_conditional_register_usage (void)
13400 {
13401 int regno;
13402 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13403 if (! VALID_REGISTER_P (regno))
13404 fixed_regs[regno] = call_used_regs[regno] = 1;
13405 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13406 if (TARGET_SH5)
13407 {
13408 call_used_regs[FIRST_GENERAL_REG + 8]
13409 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13410 call_really_used_regs[FIRST_GENERAL_REG + 8]
13411 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13412 }
13413 if (TARGET_SHMEDIA)
13414 {
13415 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13416 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13417 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13418 }
13419 if (flag_pic)
13420 {
13421 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13422 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13423 }
13424 /* Renesas saves and restores mac registers on call. */
13425 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13426 {
13427 call_really_used_regs[MACH_REG] = 0;
13428 call_really_used_regs[MACL_REG] = 0;
13429 }
13430
13431 if (TARGET_SHMEDIA)
13432 {
13433 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13434 if (! fixed_regs[regno] && call_really_used_regs[regno])
13435 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13436 }
13437 else
13438 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13439 if (! fixed_regs[regno] && call_really_used_regs[regno])
13440 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13441
13442 call_really_used_regs[FPSCR_MODES_REG] = 0;
13443 call_really_used_regs[FPSCR_STAT_REG] = 0;
13444 }
13445
13446 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13447
13448 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13449 static bool
13450 sh_legitimate_constant_p (machine_mode mode, rtx x)
13451 {
13452 return (TARGET_SHMEDIA
13453 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13454 || x == CONST0_RTX (mode)
13455 || !TARGET_SHMEDIA_FPU
13456 || TARGET_SHMEDIA64)
13457 : (GET_CODE (x) != CONST_DOUBLE
13458 || mode == DFmode || mode == SFmode
13459 || mode == DImode || GET_MODE (x) == VOIDmode));
13460 }
13461
13462 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13463
13464 static void
13465 sh_init_sync_libfuncs (void)
13466 {
13467 init_sync_libfuncs (UNITS_PER_WORD);
13468 }
13469
13470 /* Return true if it is appropriate to emit `ret' instructions in the
13471 body of a function. */
13472 bool
13473 sh_can_use_simple_return_p (void)
13474 {
13475 HARD_REG_SET live_regs_mask;
13476 int d;
13477
13478 /* Some targets require special return insns. */
13479 if (TARGET_SHMEDIA
13480 || (TARGET_SHCOMPACT
13481 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13482 return false;
13483
13484 if (! reload_completed || frame_pointer_needed)
13485 return false;
13486
13487 /* Moving prologue around does't reduce the size. */
13488 if (optimize_function_for_size_p (cfun))
13489 return false;
13490
13491 /* Finally, allow for pr save. */
13492 d = calc_live_regs (&live_regs_mask);
13493
13494 if (rounded_frame_size (d) > 4)
13495 return false;
13496
13497 return true;
13498 }
13499
13500 /*------------------------------------------------------------------------------
13501 Address mode optimization support code
13502 */
13503
13504 typedef HOST_WIDE_INT disp_t;
13505 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13506 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13507 static const disp_t INVALID_DISP = MAX_DISP;
13508
13509 /* A memory reference which is described by a base register and a
13510 displacement. */
13511 class base_reg_disp
13512 {
13513 public:
13514 base_reg_disp (rtx br, disp_t d);
13515
13516 bool is_reg (void) const;
13517 bool is_disp (void) const;
13518 rtx reg (void) const;
13519 disp_t disp (void) const;
13520
13521 private:
13522 rtx reg_;
13523 disp_t disp_;
13524 };
13525
13526 inline
13527 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13528 : reg_ (br), disp_ (d)
13529 {
13530 }
13531
13532 inline bool
13533 base_reg_disp::is_reg (void) const
13534 {
13535 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13536 }
13537
13538 inline bool
13539 base_reg_disp::is_disp (void) const
13540 {
13541 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13542 }
13543
13544 inline rtx
13545 base_reg_disp::reg (void) const
13546 {
13547 return reg_;
13548 }
13549
13550 inline disp_t
13551 base_reg_disp::disp (void) const
13552 {
13553 return disp_;
13554 }
13555
13556 /* Find the base register and calculate the displacement for a given
13557 address rtx 'x'. */
13558 static base_reg_disp
13559 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
13560 rtx base_reg = NULL)
13561 {
13562 if (REG_P (x))
13563 {
13564 if (REGNO (x) == GBR_REG)
13565 return base_reg_disp (x, disp);
13566
13567 /* We've reached a hard-reg. This is probably the point where
13568 function args are copied to pseudos. Do not go any further and
13569 stick to the pseudo. If the original mem addr was in a hard reg
13570 from the beginning, it will become the base reg. */
13571 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13572 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13573
13574 /* Find the def of the reg and trace it. If there are more than one
13575 defs and they are not the same, assume it's not safe to proceed. */
13576 rtx_insn* last_i = NULL;
13577 rtx last_set = NULL;
13578 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
13579 d = DF_REF_NEXT_REG (d))
13580 {
13581 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
13582
13583 /* Accept multiple defs, as long as they are equal. */
13584 if (last_set == NULL || rtx_equal_p (last_set, set))
13585 {
13586 last_i = DF_REF_INSN (d);
13587 last_set = set;
13588 }
13589 else
13590 {
13591 last_i = NULL;
13592 last_set = NULL;
13593 break;
13594 }
13595 }
13596
13597 if (last_set != NULL && last_i != NULL)
13598 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
13599 XEXP (last_set, 0));
13600
13601 /* When here, no previous insn was found that sets the reg.
13602 The input reg is already the base reg. */
13603 return base_reg_disp (x, disp);
13604 }
13605
13606 else if (GET_CODE (x) == PLUS)
13607 {
13608 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13609 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13610
13611 /* Either left or right val must be a reg.
13612 We don't handle the case of 'reg + reg' here. */
13613 if (left_val.is_reg () && right_val.is_disp ())
13614 return base_reg_disp (left_val.reg (), left_val.disp ()
13615 + right_val.disp () + disp);
13616 else if (right_val.is_reg () && left_val.is_disp ())
13617 return base_reg_disp (right_val.reg (), right_val.disp ()
13618 + left_val.disp () + disp);
13619 else
13620 return base_reg_disp (base_reg, disp);
13621 }
13622
13623 else if (CONST_INT_P (x))
13624 return base_reg_disp (NULL, disp + INTVAL (x));
13625
13626 /* Didn't find anything useful. */
13627 return base_reg_disp (base_reg, disp);
13628 }
13629
13630 /* Given an insn and a memory operand, try to find an equivalent GBR
13631 based memory address and return the corresponding new memory address.
13632 Return NULL_RTX if not found. */
13633 rtx
13634 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
13635 {
13636 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
13637 return NULL_RTX;
13638
13639 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13640 if (side_effects_p (XEXP (mem, 0)))
13641 return NULL_RTX;
13642
13643 /* When not optimizing there might be no dataflow available. */
13644 if (df == NULL)
13645 return NULL_RTX;
13646
13647 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13648
13649 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13650 {
13651 /* If GBR is marked as call clobbered we bail out if we see a call.
13652 FIXME: Actually should check if this mem refers to the gbr value
13653 before or after the call. If there is a store_gbr preceeding this
13654 mem, it's safe to use GBR for this mem.
13655
13656 If GBR is not marked as call clobbered, but there is some other
13657 def than a call, it's probably a load_gbr upon which we also
13658 bail out to be on the safe side.
13659 FIXME: Should check if we have a use-after-def case, such as
13660 the call case above. */
13661 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
13662 d = DF_REF_NEXT_REG (d))
13663 {
13664 if (CALL_P (DF_REF_INSN (d)))
13665 {
13666 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
13667 return NULL_RTX;
13668 else
13669 continue;
13670 }
13671 else
13672 return NULL_RTX;
13673 }
13674
13675 rtx disp = GEN_INT (gbr_disp.disp ());
13676 if (gbr_displacement (disp, GET_MODE (mem)))
13677 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13678 }
13679
13680 return NULL_RTX;
13681 }
13682
13683 /*------------------------------------------------------------------------------
13684 Manual insn combine support code.
13685 */
13686
13687 /* Return true if the specified insn contains any UNSPECs or
13688 UNSPEC_VOLATILEs. */
13689 static bool
13690 sh_unspec_insn_p (rtx x)
13691 {
13692 subrtx_iterator::array_type array;
13693 FOR_EACH_SUBRTX (i, array, x, ALL)
13694 if (*i != NULL
13695 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
13696 return true;
13697
13698 return false;
13699 }
13700
13701 /* Return true if the register operands of the specified insn are modified
13702 between the specified from and to insns (exclusive of those two). */
13703 bool
13704 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
13705 const rtx_insn* from,
13706 const rtx_insn* to)
13707 {
13708 /* FIXME: Return true for multiple sets for now. */
13709 rtx s = single_set (operands_insn);
13710 if (s == NULL_RTX)
13711 return true;
13712
13713 subrtx_iterator::array_type array;
13714 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
13715 if (*i != NULL &&
13716 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
13717 return true;
13718
13719 return false;
13720 }
13721
13722 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
13723 negates the T bit and stores the result in the T bit. */
13724 bool
13725 sh_is_nott_insn (const rtx_insn* i)
13726 {
13727 return i != NULL && GET_CODE (PATTERN (i)) == SET
13728 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
13729 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
13730 }
13731
13732 rtx
13733 sh_movt_set_dest (const rtx_insn* i)
13734 {
13735 if (i == NULL)
13736 return NULL;
13737
13738 const_rtx p = PATTERN (i);
13739 return GET_CODE (p) == SET
13740 && arith_reg_dest (XEXP (p, 0), SImode)
13741 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13742 }
13743
13744 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
13745 that stores the negated T bit in a register, and return the destination
13746 register rtx, or null. */
13747 rtx
13748 sh_movrt_set_dest (const rtx_insn* i)
13749 {
13750 if (i == NULL)
13751 return NULL;
13752
13753 const_rtx p = PATTERN (i);
13754
13755 /* The negc movrt replacement is inside a parallel. */
13756 if (GET_CODE (p) == PARALLEL)
13757 p = XVECEXP (p, 0, 0);
13758
13759 return GET_CODE (p) == SET
13760 && arith_reg_dest (XEXP (p, 0), SImode)
13761 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL;
13762 }
13763
13764 /* Given an insn and a reg number, tell whether the reg dies or is unused
13765 after the insn. */
13766 bool
13767 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
13768 {
13769 return find_regno_note (i, REG_DEAD, regno) != NULL
13770 || find_regno_note (i, REG_UNUSED, regno) != NULL;
13771 }
13772
13773 /* Given an insn and a reg number, remove reg dead or reg unused notes to
13774 mark it as being used after the insn. */
13775 void
13776 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
13777 {
13778 if (rtx n = find_regno_note (i, REG_DEAD, regno))
13779 remove_note (i, n);
13780 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
13781 remove_note (i, n);
13782 }
13783
13784 /* Given an insn check if it contains any post/pre inc/dec mem operands and
13785 add the REG_INC notes accordingly.
13786 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
13787 FIXME: This function is currently used by peephole2 patterns because
13788 the peephole2 pass does not preserve REG_INC notes. If the notes
13789 are dropped the following passes will do wrong things. */
13790 rtx_insn*
13791 sh_check_add_incdec_notes (rtx_insn* i)
13792 {
13793 struct for_each_inc_dec_clb
13794 {
13795 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
13796 rtx dest, rtx src ATTRIBUTE_UNUSED,
13797 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
13798 {
13799 gcc_assert (REG_P (dest));
13800
13801 rtx_insn* i = (rtx_insn*)arg;
13802 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
13803 add_reg_note (i, REG_INC, dest);
13804
13805 return 0;
13806 }
13807 };
13808
13809 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
13810 return i;
13811 }
13812
13813 /* Given an op rtx and an insn, try to find out whether the result of the
13814 specified op consists only of logical operations on T bit stores. */
13815 bool
13816 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
13817 {
13818 if (!logical_operator (op, SImode))
13819 return false;
13820
13821 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13822 int op_is_t_count = 0;
13823
13824 for (int i = 0; i < 2; ++i)
13825 {
13826 if (t_reg_operand (ops[i], VOIDmode)
13827 || negt_reg_operand (ops[i], VOIDmode))
13828 op_is_t_count++;
13829
13830 else
13831 {
13832 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13833 prev_nonnote_insn_bb);
13834 if (op_set.set_src == NULL_RTX)
13835 continue;
13836
13837 if (t_reg_operand (op_set.set_src, VOIDmode)
13838 || negt_reg_operand (op_set.set_src, VOIDmode)
13839 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13840 op_is_t_count++;
13841 }
13842 }
13843
13844 return op_is_t_count == 2;
13845 }
13846
13847 /* Given the operand that is extended in a sign/zero extend insn, and the
13848 insn, try to figure out whether the sign/zero extension can be replaced
13849 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13850 NULL_RTX otherwise. */
13851 rtx
13852 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
13853 {
13854 if (REG_P (extended_op))
13855 extended_op = extended_op;
13856 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13857 extended_op = SUBREG_REG (extended_op);
13858 else
13859 return NULL_RTX;
13860
13861 /* Reg moves must be of the same mode. */
13862 if (GET_MODE (extended_op) != SImode)
13863 return NULL_RTX;
13864
13865 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13866 if (s.set_src == NULL_RTX)
13867 return NULL_RTX;
13868
13869 if (t_reg_operand (s.set_src, VOIDmode)
13870 || negt_reg_operand (s.set_src, VOIDmode))
13871 return extended_op;
13872
13873 /* If the zero extended reg was formed by a logical operation, check the
13874 operands of the logical operation. If both originated from T bit
13875 stores the zero extension can be eliminated. */
13876 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13877 return extended_op;
13878
13879 return NULL_RTX;
13880 }
13881
13882 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
13883 figure out whether it should be converted into a movt-xor sequence in
13884 the movrt_negc splitter.
13885 Returns true if insns have been modified and the splitter has succeeded. */
13886 bool
13887 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
13888 {
13889 /* In cases such as
13890 tst r4,r4
13891 mov #-1,r1
13892 negc r1,r1
13893 tst r4,r4
13894 we can replace the T bit clobbering negc with a movt-xor sequence and
13895 eliminate the redundant comparison.
13896 Because the xor insn depends on register allocation results, allow this
13897 only before reload. */
13898 if (!can_create_pseudo_p ())
13899 return false;
13900
13901 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13902 prev_nonnote_insn_bb);
13903 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
13904 next_nonnote_insn_bb);
13905
13906 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
13907 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
13908 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13909 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
13910 t_before_negc.insn,
13911 t_after_negc.insn)
13912 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
13913 && !sh_unspec_insn_p (t_after_negc.insn)
13914 && !volatile_insn_p (PATTERN (t_after_negc.insn))
13915 && !side_effects_p (PATTERN (t_after_negc.insn))
13916 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
13917 {
13918 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
13919 set_insn_deleted (t_after_negc.insn);
13920 return true;
13921 }
13922 else
13923 return false;
13924 }
13925
13926 /* Given a reg and the current insn, see if the value of the reg originated
13927 from a sign or zero extension and return the discovered information. */
13928 sh_extending_set_of_reg
13929 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
13930 {
13931 if (reg == NULL)
13932 return sh_extending_set_of_reg (curr_insn);
13933
13934 if (SUBREG_P (reg))
13935 reg = SUBREG_REG (reg);
13936
13937 if (!REG_P (reg))
13938 return sh_extending_set_of_reg (curr_insn);
13939
13940 /* FIXME: Also search the predecessor basic blocks. It seems that checking
13941 only the adjacent predecessor blocks would cover most of the cases.
13942 Also try to look through the first extension that we hit. There are some
13943 cases, where a zero_extend is followed an (implicit) sign_extend, and it
13944 fails to see the sign_extend. */
13945 sh_extending_set_of_reg result =
13946 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
13947
13948 if (result.set_src != NULL)
13949 {
13950 if (GET_CODE (result.set_src) == SIGN_EXTEND
13951 || GET_CODE (result.set_src) == ZERO_EXTEND)
13952 {
13953 if (dump_file)
13954 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13955 "explicitly sign/zero extended in insn %d\n",
13956 REGNO (reg), INSN_UID (result.insn));
13957 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
13958 result.ext_code = GET_CODE (result.set_src);
13959 }
13960 else if (MEM_P (result.set_src)
13961 && (GET_MODE (result.set_src) == QImode
13962 || GET_MODE (result.set_src) == HImode)
13963 && !sh_unspec_insn_p (result.insn))
13964 {
13965 /* On SH QIHImode memory loads always sign extend. However, in
13966 some cases where it seems that the higher bits are not
13967 interesting, the loads will not be expanded as sign extending
13968 insns, but as QIHImode loads into QIHImode regs. We report that
13969 the reg has been sign extended by the mem load. When it is used
13970 as such, we must convert the mem load into a sign extending insn,
13971 see also sh_extending_set_of_reg::use_as_extended_reg. */
13972 if (dump_file)
13973 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
13974 "implicitly sign extended in insn %d\n",
13975 REGNO (reg), INSN_UID (result.insn));
13976 result.from_mode = GET_MODE (result.set_src);
13977 result.ext_code = SIGN_EXTEND;
13978 }
13979 }
13980
13981 return result;
13982 }
13983
13984 /* Given a reg that is known to be sign or zero extended at some insn,
13985 take the appropriate measures so that the extended value can be used as
13986 a reg at the specified insn and return the resulting reg rtx. */
13987 rtx
13988 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
13989 {
13990 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
13991 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
13992 gcc_assert (from_mode == QImode || from_mode == HImode);
13993
13994 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
13995 {
13996 if (dump_file)
13997 fprintf (dump_file,
13998 "use_as_extended_reg: converting non-extending mem load in "
13999 "insn %d into sign-extending load\n", INSN_UID (insn));
14000
14001 rtx r = gen_reg_rtx (SImode);
14002 rtx_insn* i0;
14003 if (from_mode == QImode)
14004 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
14005 else if (from_mode == HImode)
14006 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
14007 else
14008 gcc_unreachable ();
14009
14010 emit_insn_after (
14011 gen_move_insn (XEXP (set_rtx, 0),
14012 gen_lowpart (GET_MODE (set_src), r)), i0);
14013 set_insn_deleted (insn);
14014 return r;
14015 }
14016 else
14017 {
14018 rtx extension_dst = XEXP (set_rtx, 0);
14019 if (GET_MODE (extension_dst) != SImode)
14020 extension_dst = simplify_gen_subreg (SImode, extension_dst,
14021 GET_MODE (extension_dst), 0);
14022 if (modified_between_p (extension_dst, insn, use_at_insn))
14023 {
14024 if (dump_file)
14025 fprintf (dump_file,
14026 "use_as_extended_reg: dest reg %d of extending insn %d is "
14027 "modified, inserting a reg-reg copy\n",
14028 REGNO (extension_dst), INSN_UID (insn));
14029
14030 rtx r = gen_reg_rtx (SImode);
14031 emit_insn_after (gen_move_insn (r, extension_dst), insn);
14032 return r;
14033 }
14034 else
14035 {
14036 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
14037 return extension_dst;
14038 }
14039 }
14040 }
14041
14042 bool
14043 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
14044 {
14045 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
14046 && (from_mode == QImode || from_mode == HImode)
14047 && set_src != NULL)
14048 return arith_reg_operand (XEXP (set_src, 0), from_mode);
14049 else
14050 return false;
14051 }
14052
14053 rtx
14054 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
14055 {
14056 gcc_assert (can_use_as_unextended_reg ());
14057
14058 rtx r = XEXP (set_src, 0);
14059 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
14060
14061 if (modified_between_p (r, insn, use_at_insn))
14062 {
14063 rtx r1 = gen_reg_rtx (SImode);
14064 emit_insn_after (gen_move_insn (r1, r0), insn);
14065 return r1;
14066 }
14067 else
14068 {
14069 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
14070 ? REGNO (SUBREG_REG (r))
14071 : REGNO (r));
14072 return r0;
14073 }
14074 }
14075
14076 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
14077 perform the necessary checks on the operands and split it accordingly. */
14078 void
14079 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
14080 int subreg_offset, rtx operands[])
14081 {
14082 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
14083
14084 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
14085 curr_insn);
14086 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
14087 curr_insn);
14088
14089 /* If one of the operands is known to be zero extended, that's already
14090 sufficient to mask out the unwanted high bits. */
14091 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
14092 {
14093 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14094 operands[1]));
14095 return;
14096 }
14097 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
14098 {
14099 emit_insn (gen_tstsi_t (operands[0],
14100 eop1.use_as_extended_reg (curr_insn)));
14101 return;
14102 }
14103
14104 /* None of the operands seem to be zero extended.
14105 If both are sign extended it's OK, too. */
14106 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
14107 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
14108 {
14109 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
14110 eop1.use_as_extended_reg (curr_insn)));
14111 return;
14112 }
14113
14114 /* Otherwise we have to insert a zero extension on one of the operands to
14115 mask out the unwanted high bits.
14116 Prefer the operand that has no known extension. */
14117 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
14118 std::swap (operands[0], operands[1]);
14119
14120 rtx tmp0 = gen_reg_rtx (SImode);
14121 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
14122 GET_MODE (operands[0]), subreg_offset);
14123 emit_insn (subreg_mode == QImode
14124 ? gen_zero_extendqisi2 (tmp0, tmp1)
14125 : gen_zero_extendhisi2 (tmp0, tmp1));
14126 emit_insn (gen_tstsi_t (tmp0, operands[1]));
14127 }
14128
14129 /* A helper class to increment/decrement a counter variable each time a
14130 function is entered/left. */
14131 class scope_counter
14132 {
14133 public:
14134 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
14135
14136 ~scope_counter (void)
14137 {
14138 --m_counter;
14139 gcc_assert (m_counter >= 0);
14140 }
14141
14142 int count (void) const { return m_counter; }
14143
14144 private:
14145 int& m_counter;
14146 };
14147
14148 /* Given an rtx x, determine whether the expression can be used to create
14149 an insn that calulates x and stores the result in the T bit.
14150 This is used by the 'treg_set_expr' predicate to construct insns sequences
14151 where T bit results are fed into other insns, such as addc, subc, negc
14152 insns.
14153
14154 FIXME: The patterns that expand 'treg_set_expr' operands tend to
14155 distinguish between 'positive' and 'negative' forms. For now this has to
14156 be done in the preparation code. We could also introduce
14157 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
14158 two different patterns for the 'postive' and 'negative' forms. However,
14159 the total amount of lines of code seems to be about the same and the
14160 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
14161 recog function would need to look inside the expression by temporarily
14162 splitting it. */
14163 static int sh_recog_treg_set_expr_reent_count = 0;
14164
14165 bool
14166 sh_recog_treg_set_expr (rtx op, machine_mode mode)
14167 {
14168 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
14169
14170 /* Limit the recursion count to avoid nested expressions which we can't
14171 resolve to a single treg set insn. */
14172 if (recursion.count () > 1)
14173 return false;
14174
14175 /* Early accept known possible operands before doing recog. */
14176 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode))
14177 return true;
14178
14179 /* Early reject impossible operands before doing recog.
14180 There are some (set ((t) (subreg ...))) patterns, but we must be careful
14181 not to allow any invalid reg-reg or mem-reg moves, or else other passes
14182 such as lower-subreg will bail out. Some insns such as SH4A movua are
14183 done with UNSPEC, so must reject those, too, or else it would result
14184 in an invalid reg -> treg move. */
14185 if (register_operand (op, mode) || memory_operand (op, mode)
14186 || sh_unspec_insn_p (op))
14187 return false;
14188
14189 if (!can_create_pseudo_p ())
14190 return false;
14191
14192 /* expand_debug_locations may call this to compute rtx costs at
14193 very early stage. In that case, don't make new insns here to
14194 avoid codegen differences with -g. */
14195 if (currently_expanding_to_rtl)
14196 return false;
14197
14198 /* We are going to invoke recog in a re-entrant way and thus
14199 have to capture its current state and restore it afterwards. */
14200 recog_data_d prev_recog_data = recog_data;
14201
14202 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
14203 SET_PREV_INSN (i) = NULL;
14204 SET_NEXT_INSN (i) = NULL;
14205
14206 int result = recog (PATTERN (i), i, 0);
14207
14208 /* It seems there is no insn like that. Create a simple negated
14209 version and try again. If we hit a negated form, we'll allow that
14210 and append a nott sequence when splitting out the insns. Insns that
14211 do the split can then remove the trailing nott if they know how to
14212 deal with it. */
14213 if (result < 0 && GET_CODE (op) == EQ)
14214 {
14215 PUT_CODE (op, NE);
14216 result = recog (PATTERN (i), i, 0);
14217 PUT_CODE (op, EQ);
14218 }
14219 if (result < 0 && GET_CODE (op) == NE)
14220 {
14221 PUT_CODE (op, EQ);
14222 result = recog (PATTERN (i), i, 0);
14223 PUT_CODE (op, NE);
14224 }
14225
14226 recog_data = prev_recog_data;
14227 return result >= 0;
14228 }
14229
14230 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
14231 This can be used as a condition for insn/split patterns to allow certain
14232 T bit setting patters only to be matched as sub expressions of other
14233 patterns. */
14234 bool
14235 sh_in_recog_treg_set_expr (void)
14236 {
14237 return sh_recog_treg_set_expr_reent_count > 0;
14238 }
14239
14240 /* Given an rtx x, which is assumed to be some expression that has been
14241 matched by the 'treg_set_expr' predicate before, split and emit the
14242 insns that are necessary to calculate the expression and store the result
14243 in the T bit.
14244 The splitting is done recursively similar to 'try_split' in emit-rt.c.
14245 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
14246 'delete_insn' which then causes the DF parts to bail out, because we
14247 currently are inside another gen_split* function and would invoke
14248 'try_split' in a reentrant way. */
14249 static std::pair<rtx_insn*, rtx_insn*>
14250 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
14251 {
14252 if (dump_file)
14253 {
14254 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
14255 print_rtl_single (dump_file, i);
14256 fprintf (dump_file, "\n");
14257 }
14258
14259 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
14260
14261 if (seq == NULL)
14262 return std::make_pair (i, i);
14263
14264 /* Avoid infinite splitter loops if any insn of the result matches
14265 the original pattern. */
14266 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
14267 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
14268 return std::make_pair (i, i);
14269
14270 unshare_all_rtl_in_chain (seq);
14271
14272 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
14273 a linked list, replace the single insn with the new insns. */
14274 rtx_insn* seqlast = seq;
14275 while (NEXT_INSN (seqlast) != NULL)
14276 seqlast = NEXT_INSN (seqlast);
14277
14278 if (rtx_insn* iprev = PREV_INSN (i))
14279 SET_NEXT_INSN (iprev) = seq;
14280 if (rtx_insn* inext = NEXT_INSN (i))
14281 SET_PREV_INSN (inext) = seqlast;
14282
14283 SET_PREV_INSN (seq) = PREV_INSN (i);
14284 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
14285
14286 SET_PREV_INSN (i) = NULL;
14287 SET_NEXT_INSN (i) = NULL;
14288
14289 /* Recursively split all insns. */
14290 for (i = seq; ; i = NEXT_INSN (i))
14291 {
14292 std::pair<rtx_insn*, rtx_insn*> ii =
14293 sh_try_split_insn_simple (i, curr_insn, n + 1);
14294 if (i == seq)
14295 seq = ii.first;
14296 if (i == seqlast)
14297 {
14298 seqlast = ii.second;
14299 break;
14300 }
14301 i = ii.first;
14302 }
14303
14304 return std::make_pair (seq, seqlast);
14305 }
14306
14307 sh_treg_insns
14308 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
14309 {
14310 if (t_reg_operand (x, VOIDmode))
14311 return sh_treg_insns ();
14312
14313 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
14314
14315 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
14316 SET_PREV_INSN (i) = NULL;
14317 SET_NEXT_INSN (i) = NULL;
14318
14319 if (dump_file)
14320 {
14321 fprintf (dump_file, "split_treg_set_expr insn:\n");
14322 print_rtl (dump_file, i);
14323 fprintf (dump_file, "\n");
14324 }
14325
14326 /* We are going to invoke recog/split_insns in a re-entrant way and thus
14327 have to capture its current state and restore it afterwards. */
14328 recog_data_d prev_recog_data = recog_data;
14329
14330 int insn_code = recog (PATTERN (i), i, 0);
14331
14332 /* If the insn was not found, see if we matched the negated form before
14333 and append a nott. */
14334 bool append_nott = false;
14335
14336 if (insn_code < 0 && GET_CODE (x) == EQ)
14337 {
14338 PUT_CODE (x, NE);
14339 insn_code = recog (PATTERN (i), i, 0);
14340 if (insn_code >= 0)
14341 append_nott = true;
14342 else
14343 PUT_CODE (x, EQ);
14344 }
14345 if (insn_code < 0 && GET_CODE (x) == NE)
14346 {
14347 PUT_CODE (x, EQ);
14348 insn_code = recog (PATTERN (i), i, 0);
14349 if (insn_code >= 0)
14350 append_nott = true;
14351 else
14352 PUT_CODE (x, NE);
14353 }
14354
14355 gcc_assert (insn_code >= 0);
14356
14357 /* Try to recursively split the insn. Some insns might refuse to split
14358 any further while we are in the treg_set_expr splitting phase. They
14359 will be emitted as part of the outer insn and then split again. */
14360 std::pair<rtx_insn*, rtx_insn*> insnlist =
14361 sh_try_split_insn_simple (i, curr_insn);
14362
14363 /* Restore recog state. */
14364 recog_data = prev_recog_data;
14365
14366 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
14367 ? insnlist.second
14368 : NULL;
14369 if (dump_file)
14370 {
14371 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
14372 print_rtl (dump_file, insnlist.first);
14373 fprintf (dump_file, "\n");
14374
14375 if (nott_insn != NULL)
14376 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
14377 }
14378
14379 emit_insn (insnlist.first);
14380
14381 if (nott_insn != NULL && append_nott)
14382 {
14383 if (dump_file)
14384 fprintf (dump_file, "removing trailing nott\n");
14385 remove_insn (nott_insn);
14386 nott_insn = NULL;
14387 append_nott = false;
14388 }
14389
14390 if (append_nott)
14391 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
14392
14393 rtx_insn* first_insn = get_insns ();
14394
14395 if (dump_file)
14396 {
14397 fprintf (dump_file, "resulting insns:\n");
14398 print_rtl (dump_file, first_insn);
14399 fprintf (dump_file, "\n");
14400 }
14401
14402 return sh_treg_insns (first_insn, nott_insn);
14403 }
14404
14405 /*------------------------------------------------------------------------------
14406 Mode switching support code.
14407 */
14408
14409 static void
14410 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
14411 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14412 {
14413 if ((TARGET_SH4A_FP || TARGET_SH4_300)
14414 && prev_mode != FP_MODE_NONE && prev_mode != mode)
14415 {
14416 emit_insn (gen_toggle_pr ());
14417 if (TARGET_FMOVD)
14418 emit_insn (gen_toggle_sz ());
14419 }
14420 else if (mode != FP_MODE_NONE)
14421 {
14422 rtx tmp = gen_reg_rtx (SImode);
14423 emit_insn (gen_sts_fpscr (tmp));
14424 rtx i = NULL;
14425
14426 const unsigned HOST_WIDE_INT fpbits =
14427 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
14428
14429 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
14430 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14431 else if (mode == FP_MODE_SINGLE)
14432 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
14433 else if (mode == FP_MODE_DOUBLE)
14434 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
14435 else
14436 gcc_unreachable ();
14437
14438 emit_insn (i);
14439 emit_insn (gen_lds_fpscr (tmp));
14440 }
14441 }
14442
14443 static int
14444 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
14445 {
14446 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
14447 }
14448
14449 static int
14450 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
14451 {
14452 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
14453 get_attr_fp_set (insn) != FP_SET_NONE)
14454 return (int) get_attr_fp_set (insn);
14455 else
14456 return mode;
14457 }
14458
14459 static int
14460 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
14461 {
14462 return NORMAL_MODE (entity);
14463 }
14464
14465 static int
14466 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
14467 {
14468 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
14469 }
14470
14471 static int
14472 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
14473 {
14474 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
14475 }
14476
14477 /*------------------------------------------------------------------------------
14478 Misc
14479 */
14480
14481 /* Return true if we use LRA instead of reload pass. */
14482 static bool
14483 sh_lra_p (void)
14484 {
14485 return sh_lra_flag;
14486 }
14487
14488 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14489
14490 static bool
14491 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14492 unsigned int align,
14493 enum by_pieces_operation op,
14494 bool speed_p)
14495 {
14496 switch (op)
14497 {
14498 case MOVE_BY_PIECES:
14499 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
14500 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14501 case STORE_BY_PIECES:
14502 case SET_BY_PIECES:
14503 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
14504 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
14505 default:
14506 return default_use_by_pieces_infrastructure_p (size, align,
14507 op, speed_p);
14508 }
14509 }
14510
14511 #include "gt-sh.h"